Conversation
53b0caa to
b8287fa
Compare
b8287fa to
042ef56
Compare
- Add `make test-clickhouse` target - Fix testcontainer readiness check (remove broken wait_for_logs, rely on built-in HTTP health check) - Import container classes independently so missing driver packages don't block other containers - Fix ClickHouse test config credentials to match container defaults - Remove calls to nonexistent _record_connection_opened/_closed methods - Set supports_overwrite=False since ClickHouse only supports APPEND mode - Fix base test_batch_loading to respect supports_overwrite config
f6b056a to
3184056
Compare
incrypto32
left a comment
There was a problem hiding this comment.
LGTM! just some minor comments thats not really about this PR
| def __post_init__(self): | ||
| if self.connection_params is None: | ||
| self.connection_params = {} |
There was a problem hiding this comment.
Note for future PR: consistent with other loaders, but we should probably clean this up across the board to use field(default_factory=dict) later in another PR
| def _get_required_config_fields(self) -> list[str]: | ||
| """Return required configuration fields""" | ||
| return ['host'] # Only host is truly required, others have defaults |
There was a problem hiding this comment.
Note for future PR: All loaders could accept their typed config class directly instead of a raw dict. base class already handles the conversion internally, making this a small change per loader. Would alsoeliminate _get_required_config_fields.
| # Declare loader capabilities | ||
| SUPPORTED_MODES = {LoadMode.APPEND} | ||
| REQUIRES_SCHEMA_MATCH = False | ||
| SUPPORTS_TRANSACTIONS = False # ClickHouse uses eventual consistency |
There was a problem hiding this comment.
Minor: SUPPORTS_TRANSACTIONS = False but load_batch_transactional is implemented and will be called regardless the base class doesn't use this flag. Seems like this flag is unused? is it meant to be purely decorative/metadata?
Performance Benchmark ResultsTest Summary: 19 passed, 4 deselected Results
Raw JSON Results{
"postgresql_large_table_loading_performance": {
"test_name": "large_table_loading_performance",
"loader_type": "postgresql",
"throughput_rows_per_sec": 142225.59807395603,
"memory_mb": 271.60546875,
"duration_seconds": 0.35155415534973145,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:13.486663",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_pipeline_performance": {
"test_name": "pipeline_performance",
"loader_type": "redis",
"throughput_rows_per_sec": 35121.65581329294,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:27.903330",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_data_structure_performance_hash": {
"test_name": "data_structure_performance_hash",
"loader_type": "redis",
"throughput_rows_per_sec": 35215.126663676594,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:30.960577",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_data_structure_performance_string": {
"test_name": "data_structure_performance_string",
"loader_type": "redis",
"throughput_rows_per_sec": 53490.288020625296,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:30.962725",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_data_structure_performance_sorted_set": {
"test_name": "data_structure_performance_sorted_set",
"loader_type": "redis",
"throughput_rows_per_sec": 81169.43636808387,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:30.964484",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_memory_efficiency": {
"test_name": "memory_efficiency",
"loader_type": "redis",
"throughput_rows_per_sec": 34993.896118545454,
"memory_mb": 13.984909057617188,
"duration_seconds": 1.4288206100463867,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:32.449038",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"delta_lake_large_file_write_performance": {
"test_name": "large_file_write_performance",
"loader_type": "delta_lake",
"throughput_rows_per_sec": 284376.55941507017,
"memory_mb": 288.8671875,
"duration_seconds": 0.17582321166992188,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:32.685532",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_large_table_loading_performance": {
"test_name": "large_table_loading_performance",
"loader_type": "lmdb",
"throughput_rows_per_sec": 40470.660704640206,
"memory_mb": 577.81640625,
"duration_seconds": 1.2354629039764404,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:35.769736",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_key_generation_strategy_performance_pattern_based": {
"test_name": "key_generation_strategy_performance_pattern_based",
"loader_type": "lmdb",
"throughput_rows_per_sec": 39648.3145015914,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:39.598310",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_key_generation_strategy_performance_single_column": {
"test_name": "key_generation_strategy_performance_single_column",
"loader_type": "lmdb",
"throughput_rows_per_sec": 44935.0973812472,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:39.600513",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_key_generation_strategy_performance_composite_key": {
"test_name": "key_generation_strategy_performance_composite_key",
"loader_type": "lmdb",
"throughput_rows_per_sec": 38736.66664942699,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:39.602308",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_writemap_performance_with": {
"test_name": "writemap_performance_with",
"loader_type": "lmdb",
"throughput_rows_per_sec": 44538.81562096881,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:47.615801",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_writemap_performance_without": {
"test_name": "writemap_performance_without",
"loader_type": "lmdb",
"throughput_rows_per_sec": 49669.074467185616,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:47.618108",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_memory_efficiency": {
"test_name": "memory_efficiency",
"loader_type": "lmdb",
"throughput_rows_per_sec": 44720.44815298957,
"memory_mb": 177.734375,
"duration_seconds": 1.1180567741394043,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:48.828475",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_concurrent_read_performance": {
"test_name": "concurrent_read_performance",
"loader_type": "lmdb",
"throughput_rows_per_sec": 147332.64344220547,
"memory_mb": 0,
"duration_seconds": 0.3393681049346924,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:50.357488",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_large_value_performance": {
"test_name": "large_value_performance",
"loader_type": "lmdb",
"throughput_rows_per_sec": 38515.188246097336,
"memory_mb": 0.0390625,
"duration_seconds": 0.025963783264160156,
"dataset_size": 1000,
"timestamp": "2026-03-12T15:58:50.873397",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"postgresql_throughput_comparison": {
"test_name": "throughput_comparison",
"loader_type": "postgresql",
"throughput_rows_per_sec": 142354.48244965007,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 10000,
"timestamp": "2026-03-12T15:58:51.491001",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"redis_throughput_comparison": {
"test_name": "throughput_comparison",
"loader_type": "redis",
"throughput_rows_per_sec": 34051.14283069999,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 10000,
"timestamp": "2026-03-12T15:58:51.493100",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"lmdb_throughput_comparison": {
"test_name": "throughput_comparison",
"loader_type": "lmdb",
"throughput_rows_per_sec": 51714.6210828459,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 10000,
"timestamp": "2026-03-12T15:58:51.494893",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"delta_lake_throughput_comparison": {
"test_name": "throughput_comparison",
"loader_type": "delta_lake",
"throughput_rows_per_sec": 708126.4878188786,
"memory_mb": 0,
"duration_seconds": 0,
"dataset_size": 10000,
"timestamp": "2026-03-12T15:58:51.496707",
"git_commit": "1a3aba47",
"environment": "github-actions"
},
"iceberg_large_file_write_performance": {
"test_name": "large_file_write_performance",
"loader_type": "iceberg",
"throughput_rows_per_sec": 201211.2094440564,
"memory_mb": 439.76953125,
"duration_seconds": 0.24849510192871094,
"dataset_size": 50000,
"timestamp": "2026-03-12T15:58:52.101516",
"git_commit": "1a3aba47",
"environment": "github-actions"
}
} |
Load your data into a ClickHouse instance!
Summary
insert_arrow()_amp_batch_idDepends on PR #32
Resolves #5