Search Shortcut cmd + k | ctrl + k

DuckDB is a fast
|

database system

Query and transform your data anywhere
using DuckDB's feature-rich SQL dialect

-- Get the top-3 busiest train stations
SELECT
    station_name,
    count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;

Live demo

DuckDB at a glance

Simple

DuckDB is easy to install and deploy. It has zero external dependencies and runs in-process in its host application or as a single binary.

Portable

DuckDB runs on Linux, macOS, Windows, Android, iOS and all popular hardware architectures. It has idiomatic client APIs for major programming languages.

Feature-rich

DuckDB offers a rich SQL dialect. It can read and write file formats such as CSV, Parquet, and JSON, to and from the local file system and remote endpoints such as S3 buckets.

Fast

DuckDB runs analytical queries at blazing speed thanks to its columnar engine, which supports parallel execution and can process larger-than-memory workloads.

Extensible

DuckDB is extensible by third-party features such as new data types, functions, file formats and new SQL syntax. User contributions are available as community extensions.

Free

DuckDB and its core extensions are open-source under the permissive MIT License. The intellectual property of the project is held by the DuckDB Foundation.

Installation

DuckDB is seamlessly integrated with major programming languages. It can be installed in less than 10 seconds on most platforms.

More installation options

curl https://install.duckdb.org | sh

Latest release: DuckDB 1.3.2 | System detected:

pip install duckdb

install.packages("duckdb")

<dependency>
	<groupId>org.duckdb</groupId>
	<artifactId>duckdb_jdbc</artifactId>
	<version>1.3.2.0</version>
</dependency>

npm install @duckdb/node-api

curl https://install.duckdb.org | sh

curl https://install.duckdb.org | sh

https://github.com/duckdb/duckdb/releases/download/v1.3.2/duckdb_cli-windows-amd64.zip

duckdb_odbc-osx-universal.zip

duckdb_odbc-linux-amd64.zip

duckdb_odbc-windows-amd64.zip

cargo add duckdb --features bundled

go get github.com/marcboeker/go-duckdb

Blog

extensions

DuckLake 0.2

2025-07-04

Mark Raasveldt

using DuckDB

Discovering DuckDB Use Cases via GitHub

2025-06-27

Petrica Leuca

All blog posts

Upcoming events

2025-09-17 meetup

DuckDB Amsterdam Meetup #3

Adyen Rokin office, Amsterdam

DuckDB meetup organizers

All events

-- Get the top-3 busiest train stations
SELECT
    station_name,
    count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;

-- Load CSV file to a table. DuckDB auto-detects
-- the CSV's format, column name and types
CREATE TABLE stations AS
    FROM 's3://duckdb-blobs/stations.csv';

-- Directly query Parquet file in S3
SELECT
    station_name,
    count(*) AS num_services
FROM 's3://duckdb-blobs/train_services.parquet'
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 10;

-- Find the top-3 longest domestic train routes
SELECT s1.name_short, s2.name_short, d.distance
FROM distances d
JOIN stations s1 ON d.station1 = s1.code
JOIN stations s2 ON d.station2 = s2.code
WHERE s1.country = s2.country
  AND s1.code < s2.code
ORDER BY distance DESC
LIMIT 3;

-- List the closest IC stations (as the crow flies)
SELECT
    s1.name_long AS station1,
    s2.name_long AS station2,
    ST_Distance(
        ST_Point(s1.geo_lng, s1.geo_lat),
        ST_Point(s2.geo_lng, s2.geo_lat)
    ) * 111_139 AS distance
FROM stations s1, stations s2
WHERE s1.type LIKE '%Intercity%'
  AND s2.type LIKE '%Intercity%'
  AND s1.id < s2.id
ORDER BY distance ASC
LIMIT 3;

# Get the top-3 busiest train stations
import duckdb
duckdb.sql("""
    SELECT station, count(*) AS num_services
    FROM train_services
    GROUP BY ALL
    ORDER BY num_services DESC
    LIMIT 3;
    """)

# Reading and writing Pandas dataframes
import pandas as pd
import duckdb

df_in = pd.DataFrame({
    'station': ['Delft', 'Delft', 'Gouda', 'Gouda'],
    'day': ['Mon', 'Tue', 'Mon', 'Tue'],
    'num_services' : [22, 20, 27, 25]})

# Run query on a dataframe and return a dataframe
df_out = duckdb.sql("""
    SELECT station, sum(num_services)
    FROM df_in
    GROUP BY station
    """).to_df()

# Create custom user-defined function
import duckdb

def plus_one(x):
    return x + 1

con = duckdb.connect()
con.create_function('plus_one', plus_one,
    ['BIGINT'], 'BIGINT', type='native')

con.sql("""
    SELECT sum(plus_one(i)) FROM range(10) tbl(i);
    """)

# Find the largest sepals/petals in the Iris data set
library(duckdb)

con <- dbConnect(duckdb())
duckdb_register(con, "iris", iris)

query <- r'(
    SELECT count(*) AS num_observations,
    max("Sepal.Width") AS max_width,
    max("Petal.Length") AS max_petal_length
    FROM iris
    WHERE "Sepal.Length" > 5
    GROUP BY ALL
    )'

dbGetQuery(con, query)

# Find the largest sepals/petals in the Iris data set
# using duckplyr
library("duckplyr")

iris |>
    filter(Sepal.Length > 5) |>
    group_by(Species) |>
    summarize(
        num_observations = n(),
        max_width = max(Sepal.Width),
        max_petal_length = max(Petal.Length),
        na.rm = TRUE) |>
    collect()

# Find the largest sepals/petals in the Iris data set
# using dplyr
library("duckdb")
library("dplyr")

con <- dbConnect(duckdb())
duckdb_register(con, "iris", iris)
tbl(con, "iris") |>
    filter(Sepal.Length > 5) |>
    group_by(Species) |>
    summarize(
        num_observations = count(),
        max_width = max(Sepal.Width),
        max_petal_length = max(Petal.Length),
        na.rm = TRUE) |>
    collect()

// Get a list of train stations by traffic
Connection conn =
    DriverManager.getConnection("jdbc:duckdb:");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(
    "SELECT station_name,\n" +
    "       count(*) AS num_services\n" +
    "FROM train_services\n" +
    "GROUP BY ALL\n" +
    "ORDER BY num_services DESC;");

System.out.println(rs.next());

// Perform bulk inserts using the Appender API
DuckDBConnection conn = (DuckDBConnection)
    DriverManager.getConnection("jdbc:duckdb:");
Statement st = conn.createStatement();
st.execute("CREATE TABLE person " +
    "(name VARCHAR, age INT)");

var appender = conn.createAppender(
    DuckDBConnection.DEFAULT_SCHEMA, "person");

appender.beginRow();
appender.append("MC Ducky");
appender.append(49);
appender.endRow();
appender.close();

// Get the top-3 busiest train stations in May
import { DuckDBInstance } from '@duckdb/node-api';
const instance = await DuckDBInstance.create();
const connection = await instance.connect();
const reader = await connection.runAndReadAll(
  `SELECT station_name, count(*) AS num_services
   FROM
   'http://blobs.duckdb.org/train_services.parquet'
   WHERE monthname(date) = 'May'
   GROUP BY ALL
   ORDER BY num_services DESC
   LIMIT 3;`
);
console.table(reader.getRows());

// Web Service Integration:
// Create endpoint to generate numbers
import express from "express";
import { DuckDBInstance } from '@duckdb/node-api';
const app = express();
const instance = await DuckDBInstance.create();
const connection = await instance.connect();
app.get("/getnumbers", async (req, res) => {
  const reader = await connection.runAndReadAll(
    "SELECT random() AS num FROM range(10)");
  res.end(JSON.stringify(reader.getRows()));
});

app.listen(8082, () => console.log(
  "Go to: http://localhost:8082/getnumbers"));

DuckDB is a fast | database system