Export PostgreSQL table to 10 Parquet files with chDB in 15 lines
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import chdb | |
| # Get total rows | |
| total_rows = int(chdb.query(""" | |
| SELECT COUNT(*) | |
| FROM postgresql('192.168.3.135', 'sample_db', 'users', 'postgres', 'mysecretpassword') | |
| """).bytes().strip()) | |
| # Calculate rows per file | |
| rows_per_file = -(-total_rows // 10) # Ceiling division | |
| # Export to 10 files | |
| for i in range(10): | |
| chdb.query(f""" | |
| SELECT * | |
| FROM postgresql('192.168.3.135', 'sample_db', 'users', 'postgres', 'mysecretpassword') | |
| WHERE rowNumberInAllBlocks() > {i * rows_per_file} | |
| AND rowNumberInAllBlocks() <= {(i + 1) * rows_per_file} | |
| INTO OUTFILE 'users_part_{i+1}.parquet' | |
| """) | |
| print(f"Exported part {i+1}/10") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Exported part 1/10 | |
| Exported part 2/10 | |
| Exported part 3/10 | |
| Exported part 4/10 | |
| Exported part 5/10 | |
| Exported part 6/10 | |
| Exported part 7/10 | |
| Exported part 8/10 | |
| Exported part 9/10 | |
| Exported part 10/10 | |
| # ls -l | |
| -rw-r--r-- 1 root root 1589 Nov 14 07:21 users_part_1.parquet | |
| -rw-r--r-- 1 root root 1577 Nov 14 07:21 users_part_10.parquet | |
| -rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_2.parquet | |
| -rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_3.parquet | |
| -rw-r--r-- 1 root root 1588 Nov 14 07:21 users_part_4.parquet | |
| -rw-r--r-- 1 root root 1588 Nov 14 07:21 users_part_5.parquet | |
| -rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_6.parquet | |
| -rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_7.parquet | |
| -rw-r--r-- 1 root root 1592 Nov 14 07:21 users_part_8.parquet | |
| -rw-r--r-- 1 root root 1582 Nov 14 07:21 users_part_9.parquet |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Create a Docker container running PostgreSQL | |
| docker run --name my-postgres \ | |
| -e POSTGRES_PASSWORD=mysecretpassword \ | |
| -e POSTGRES_DB=sample_db \ | |
| -p 5432:5432 \ | |
| -d postgres:latest | |
| # Wait a few seconds for the container to start | |
| sleep 5 | |
| # Create a SQL file with table creation and sample data | |
| cat << 'EOF' > init.sql | |
| CREATE TABLE users ( | |
| id SERIAL PRIMARY KEY, | |
| name VARCHAR(50), | |
| email VARCHAR(100), | |
| age INT, | |
| created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| ); | |
| INSERT INTO users (name, email, age) | |
| SELECT | |
| 'User ' || generate_series AS name, | |
| 'user' || generate_series || '@example.com' AS email, | |
| (random() * 50 + 20)::int AS age | |
| FROM generate_series(1, 100); | |
| EOF | |
| # Execute the SQL file | |
| docker exec -i my-postgres psql -U postgres -d sample_db < init.sql |