Run analytics
where your data lives
DuckDB is an in‑process SQL database that runs everywhere:
on your laptop, on a server and even in the browser.
-- Get the top-3 busiest train stations
SELECT
station_name,
count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;
Query files and
cloud data directly
One SQL dialect. Any data source—Parquet, JSON, S3, data lakes. No clusters. No config.
Live DemoHow we built DuckDB
Simple
We made sure that you can install DuckDB in just a few seconds, and built it on familiar technologies so you can start using it immediately.
Read moreFeature-Rich
We support a wide range of industry standard technologies (e.g., Parquet, SQL, S3) and integrated them for a seamless user experience.
Read moreFast
We built DuckDB on a state-of-the-art columnar storage engine. We strived to enable larger-than-memory workloads so you never run out of memory.
Read moreFree
We release DuckDB, its core extensions and the DuckLake format under MIT, a permissive
Portable
We distribute DuckDB for all popular operating systems and major CPU architectures. We ship idiomatic client APIs for major programming languages.
Read moreExtensible
We created a powerful extension mechanism that enables adding new features and implemented many key DuckDB features as extensions.
Read morecurl https://install.duckdb.org | shpip install duckdbnpm install @duckdb/node-apicurl https://install.duckdb.org | shcurl https://install.duckdb.org | shcargo add duckdb --features bundledgo get github.com/duckdb/duckdb-go/v2-- Get the top-3 busiest train stations
SELECT
station_name,
count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;-- Load CSV file to a table. DuckDB auto-detects
-- the CSV's format, column name and types
CREATE TABLE stations AS
FROM 'https://blobs.duckdb.org/stations.csv';-- Directly query Parquet file over HTTPS
SELECT
station_name,
count(*) AS num_services
FROM 'https://blobs.duckdb.org/train_services.parquet'
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 10;-- Find the top-3 longest domestic train routes
SELECT s1.name_short, s2.name_short, d.distance
FROM distances d
JOIN stations s1 ON d.station1 = s1.code
JOIN stations s2 ON d.station2 = s2.code
WHERE s1.country = s2.country
AND s1.code < s2.code
ORDER BY distance DESC
LIMIT 3;-- List the closest IC stations (as the crow flies)
SELECT
s1.name_long AS station1,
s2.name_long AS station2,
ST_Distance(
ST_Point(s1.geo_lng, s1.geo_lat),
ST_Point(s2.geo_lng, s2.geo_lat)
) * 111_139 AS distance
FROM stations s1, stations s2
WHERE s1.type LIKE '%Intercity%'
AND s2.type LIKE '%Intercity%'
AND s1.id < s2.id
ORDER BY distance ASC
LIMIT 3;# Get the top-3 busiest train stations
import duckdb
duckdb.sql("""
SELECT station, count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;
""")# Reading and writing Pandas dataframes
import pandas as pd
import duckdb
df_in = pd.DataFrame({
'station': ['Delft', 'Delft', 'Gouda', 'Gouda'],
'day': ['Mon', 'Tue', 'Mon', 'Tue'],
'num_services' : [22, 20, 27, 25]})
# Run query on a dataframe and return a dataframe
df_out = duckdb.sql("""
SELECT station, sum(num_services)
FROM df_in
GROUP BY station
""").to_df()# Create custom user-defined function
import duckdb
def plus_one(x):
return x + 1
con = duckdb.connect()
con.create_function('plus_one', plus_one,
['BIGINT'], 'BIGINT', type='native')
con.sql("""
SELECT sum(plus_one(i)) FROM range(10) tbl(i);
""")# Find the largest sepals/petals in the Iris data set
library(duckdb)
con <- dbConnect(duckdb())
duckdb_register(con, "iris", iris)
query <- r'(
SELECT count(*) AS num_observations,
max("Sepal.Width") AS max_width,
max("Petal.Length") AS max_petal_length
FROM iris
WHERE "Sepal.Length" > 5
GROUP BY ALL
)'
dbGetQuery(con, query)# Find the largest sepals/petals in the Iris data set
# using duckplyr
library("duckplyr")
iris |>
filter(Sepal.Length > 5) |>
group_by(Species) |>
summarize(
num_observations = n(),
max_width = max(Sepal.Width),
max_petal_length = max(Petal.Length),
na.rm = TRUE) |>
collect()# Find the largest sepals/petals in the Iris data set
# using dplyr
library("duckdb")
library("dplyr")
con <- dbConnect(duckdb())
duckdb_register(con, "iris", iris)
tbl(con, "iris") |>
filter(Sepal.Length > 5) |>
group_by(Species) |>
summarize(
num_observations = count(),
max_width = max(Sepal.Width),
max_petal_length = max(Petal.Length),
na.rm = TRUE) |>
collect()// Get a list of train stations by traffic
Connection conn =
DriverManager.getConnection("jdbc:duckdb:");
Statement st = conn.createStatement();
ResultSet rs = st.executeQuery(
"""
SELECT station_name,
count(*) AS num_services
FROM train_services
GROUP BY ALL
ORDER BY num_services DESC;
""");
System.out.println(rs.next());// Perform bulk inserts using the Appender API
DuckDBConnection conn = (DuckDBConnection)
DriverManager.getConnection("jdbc:duckdb:");
Statement st = conn.createStatement();
st.execute(
"CREATE TABLE person (name VARCHAR, age INT)");
var appender = conn.createAppender(
DuckDBConnection.DEFAULT_SCHEMA, "person");
appender.beginRow();
appender.append("MC Ducky");
appender.append(49);
appender.endRow();
appender.close();// Get the top-3 busiest train stations in May
import { DuckDBInstance } from '@duckdb/node-api';
const instance = await DuckDBInstance.create();
const connection = await instance.connect();
const reader = await connection.runAndReadAll(
`SELECT station_name, count(*) AS num_services
FROM
'https://blobs.duckdb.org/train_services.parquet'
WHERE monthname(date) = 'May'
GROUP BY ALL
ORDER BY num_services DESC
LIMIT 3;`
);
console.table(reader.getRows());// Web Service Integration:
// Create endpoint to generate numbers
import express from "express";
import { DuckDBInstance } from '@duckdb/node-api';
const app = express();
const instance = await DuckDBInstance.create();
const connection = await instance.connect();
app.get("/getnumbers", async (req, res) => {
const reader = await connection.runAndReadAll(
"SELECT random() AS num FROM range(10)");
res.end(JSON.stringify(reader.getRows()));
});
app.listen(8082, () => console.log(
"Go to: http://localhost:8082/getnumbers"));