Export PostgreSQL table to 10 Parquet files with chDB in 15 lines

2 min read Original article ↗

Export PostgreSQL table to 10 Parquet files with chDB in 15 lines

import chdb
# Get total rows
total_rows = int(chdb.query("""
SELECT COUNT(*)
FROM postgresql('192.168.3.135', 'sample_db', 'users', 'postgres', 'mysecretpassword')
""").bytes().strip())
# Calculate rows per file
rows_per_file = -(-total_rows // 10) # Ceiling division
# Export to 10 files
for i in range(10):
chdb.query(f"""
SELECT *
FROM postgresql('192.168.3.135', 'sample_db', 'users', 'postgres', 'mysecretpassword')
WHERE rowNumberInAllBlocks() > {i * rows_per_file}
AND rowNumberInAllBlocks() <= {(i + 1) * rows_per_file}
INTO OUTFILE 'users_part_{i+1}.parquet'
""")
print(f"Exported part {i+1}/10")
Exported part 1/10
Exported part 2/10
Exported part 3/10
Exported part 4/10
Exported part 5/10
Exported part 6/10
Exported part 7/10
Exported part 8/10
Exported part 9/10
Exported part 10/10
# ls -l
-rw-r--r-- 1 root root 1589 Nov 14 07:21 users_part_1.parquet
-rw-r--r-- 1 root root 1577 Nov 14 07:21 users_part_10.parquet
-rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_2.parquet
-rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_3.parquet
-rw-r--r-- 1 root root 1588 Nov 14 07:21 users_part_4.parquet
-rw-r--r-- 1 root root 1588 Nov 14 07:21 users_part_5.parquet
-rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_6.parquet
-rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_7.parquet
-rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_8.parquet
-rw-r--r-- 1 root root 1582 Nov 14 07:21 users_part_9.parquet
# Create a Docker container running PostgreSQL
docker run --name my-postgres \
-e POSTGRES_PASSWORD=mysecretpassword \
-e POSTGRES_DB=sample_db \
-p 5432:5432 \
-d postgres:latest
# Wait a few seconds for the container to start
sleep 5
# Create a SQL file with table creation and sample data
cat << 'EOF' > init.sql
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(50),
email VARCHAR(100),
age INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INSERT INTO users (name, email, age)
SELECT
'User ' || generate_series AS name,
'user' || generate_series || '@example.com' AS email,
(random() * 50 + 20)::int AS age
FROM generate_series(1, 100);
EOF
# Execute the SQL file
docker exec -i my-postgres psql -U postgres -d sample_db < init.sql