diff --git a/pg_clickhouse/README.md b/pg_clickhouse/README.md new file mode 100644 index 000000000..2b7e6181b --- /dev/null +++ b/pg_clickhouse/README.md @@ -0,0 +1,34 @@ +pg_clickhouse ClickBench +======================== + +This ClickBench configuration benchmarks [pg_clickhouse] running inside +PostgreSQL and pushing queries down to a ClickHouse database. Files: + +* `benchmark.sh`: Runs the benchmark +* `clickhouse.sh`: Installs ClickHouse and loads it with data; based on the + [ClickHouse benchmark script](../clickhouse/benchmark.sh) +* `create-postgres.sql`: Loads pg_clickhouse into Postgres and creates the + foreign table +* `create-tuned-memory.sql`, `create-tuned.sql`, and `create.sql`: Scripts + that create the ClickHouse table, copied from the [ClickHouse + Config](../clickhouse/) +* `postgres.sh`: Installs, configures, and starts PostgreSQL; based on the + [PostgreSQL benchmark script](../postgresql/benchmark.sh) +* `queries.sql`: The benchmark queries, identical to the [PostgreSQL + queries](../postgresql/queries.sql) +* `README.md`: This file +* `run.sh`: Runs the benchmark; identical too the [PostgreSQL run + script](../postgresql/run.sh) +* `template.json`: Describes this benchmark configuration + +## Environment Variables + +### TOTAL_PARTITIONS + +Set `$TOTAL_PARTITIONS` to the number of partitions to load into ClickHouse. +Defaults to 100. + +### EXPLAIN + +Set `$EXPLAIN` to a true value to run each query with +`EXPLAIN (ANALYZE VERBOSE)`. diff --git a/pg_clickhouse/benchmark.sh b/pg_clickhouse/benchmark.sh new file mode 100755 index 000000000..249734de8 --- /dev/null +++ b/pg_clickhouse/benchmark.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# apt-get update -y +# env DEBIAN_FRONTEND=noninteractive apt-get install -y wget curl sudo +# env TOTAL_PARTITIONS=1 EXPLAIN=1 ./benchmark.sh + +# Install and start ClickHouse and Postgres +./clickhouse.sh "$@" +./postgres.sh + +# Run the queries +./run.sh 2>&1 | tee log.txt + +echo -n "Data size: " +clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" + +cat log.txt | grep -oP '^Time: \d+\.\d+ ms|psql: error' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/; s/^.*psql: error.*$/null/' | + awk '{ if (i % 3 == 0) { printf "[" }; if ($1 == "null") { printf $1 } else { printf $1 / 1000 }; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/pg_clickhouse/clickhouse.sh b/pg_clickhouse/clickhouse.sh new file mode 100755 index 000000000..40f46983f --- /dev/null +++ b/pg_clickhouse/clickhouse.sh @@ -0,0 +1,51 @@ +#!/bin/bash + +# Install + +if [ ! -x /usr/bin/clickhouse ] +then + cd /tmp || exit + curl https://clickhouse.com/ | sh + sudo ./clickhouse install --noninteractive + rm clickhouse + cd - || exit +fi + +# Optional: if you want to use higher compression: +if (( 0 )); then + echo " +compression: + case: + method: zstd + " | sudo tee /etc/clickhouse-server/config.d/compression.yaml +fi; + +sudo clickhouse start + +for _ in {1..300} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +# Determine which set of files to use depending on the type of run +if [ "$1" != "" ] && [ "$1" != "tuned" ] && [ "$1" != "tuned-memory" ]; then + echo "Error: command line argument must be one of {'', 'tuned', 'tuned-memory'}" + exit 1 +elif [ ! -z "$1" ]; then + SUFFIX="-$1" +fi + +# Load the data + +clickhouse-client < create"$SUFFIX".sql + +TOTAL_PARTITIONS=${TOTAL_PARTITIONS:-100} + +seq 0 "$((TOTAL_PARTITIONS-1))" | xargs -P100 -I{} bash -c 'wget --continue --progress=dot:giga https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet' +mkdir -p /var/lib/clickhouse/user_files +sudo mv hits_*.parquet /var/lib/clickhouse/user_files/ +sudo chown clickhouse:clickhouse /var/lib/clickhouse/user_files/hits_*.parquet + +echo -n "Load time: " +clickhouse-client --time --query "INSERT INTO hits SELECT * FROM file('hits_*.parquet')" --max-insert-threads $(( $(nproc) / 4 )) diff --git a/pg_clickhouse/create-postgres.sql b/pg_clickhouse/create-postgres.sql new file mode 100644 index 000000000..7b5178ae9 --- /dev/null +++ b/pg_clickhouse/create-postgres.sql @@ -0,0 +1,121 @@ +BEGIN; + +CREATE EXTENSION IF NOT EXISTS pg_clickhouse; +CREATE SERVER ch_srv FOREIGN DATA WRAPPER clickhouse_fdw + OPTIONS(driver 'binary', host 'localhost', dbname 'default'); +CREATE USER MAPPING FOR CURRENT_USER SERVER ch_srv + OPTIONS (user 'default'); + +CREATE FOREIGN TABLE hits ( + "WatchID" BIGINT NOT NULL, + "JavaEnable" SMALLINT NOT NULL, + "Title" TEXT NOT NULL, + "GoodEvent" SMALLINT NOT NULL, + "EventTime" TIMESTAMP NOT NULL, + "EventDate" DATE NOT NULL, + "CounterID" INTEGER NOT NULL, + "ClientIP" INTEGER NOT NULL, + "RegionID" INTEGER NOT NULL, + "UserID" BIGINT NOT NULL, + "CounterClass" SMALLINT NOT NULL, + "OS" SMALLINT NOT NULL, + "UserAgent" SMALLINT NOT NULL, + "URL" TEXT NOT NULL, + "Referer" TEXT NOT NULL, + "IsRefresh" SMALLINT NOT NULL, + "RefererCategoryID" SMALLINT NOT NULL, + "RefererRegionID" INTEGER NOT NULL, + "URLCategoryID" SMALLINT NOT NULL, + "URLRegionID" INTEGER NOT NULL, + "ResolutionWidth" SMALLINT NOT NULL, + "ResolutionHeight" SMALLINT NOT NULL, + "ResolutionDepth" SMALLINT NOT NULL, + "FlashMajor" SMALLINT NOT NULL, + "FlashMinor" SMALLINT NOT NULL, + "FlashMinor2" TEXT NOT NULL, + "NetMajor" SMALLINT NOT NULL, + "NetMinor" SMALLINT NOT NULL, + "UserAgentMajor" SMALLINT NOT NULL, + "UserAgentMinor" VARCHAR(255) NOT NULL, + "CookieEnable" SMALLINT NOT NULL, + "JavascriptEnable" SMALLINT NOT NULL, + "IsMobile" SMALLINT NOT NULL, + "MobilePhone" SMALLINT NOT NULL, + "MobilePhoneModel" TEXT NOT NULL, + "Params" TEXT NOT NULL, + "IPNetworkID" INTEGER NOT NULL, + "TraficSourceID" SMALLINT NOT NULL, + "SearchEngineID" SMALLINT NOT NULL, + "SearchPhrase" TEXT NOT NULL, + "AdvEngineID" SMALLINT NOT NULL, + "IsArtifical" SMALLINT NOT NULL, + "WindowClientWidth" SMALLINT NOT NULL, + "WindowClientHeight" SMALLINT NOT NULL, + "ClientTimeZone" SMALLINT NOT NULL, + "ClientEventTime" TIMESTAMP NOT NULL, + "SilverlightVersion1" SMALLINT NOT NULL, + "SilverlightVersion2" SMALLINT NOT NULL, + "SilverlightVersion3" INTEGER NOT NULL, + "SilverlightVersion4" SMALLINT NOT NULL, + "PageCharset" TEXT NOT NULL, + "CodeVersion" INTEGER NOT NULL, + "IsLink" SMALLINT NOT NULL, + "IsDownload" SMALLINT NOT NULL, + "IsNotBounce" SMALLINT NOT NULL, + "FUniqID" BIGINT NOT NULL, + "OriginalURL" TEXT NOT NULL, + "HID" INTEGER NOT NULL, + "IsOldCounter" SMALLINT NOT NULL, + "IsEvent" SMALLINT NOT NULL, + "IsParameter" SMALLINT NOT NULL, + "DontCountHits" SMALLINT NOT NULL, + "WithHash" SMALLINT NOT NULL, + "HitColor" CHAR NOT NULL, + "LocalEventTime" TIMESTAMP NOT NULL, + "Age" SMALLINT NOT NULL, + "Sex" SMALLINT NOT NULL, + "Income" SMALLINT NOT NULL, + "Interests" SMALLINT NOT NULL, + "Robotness" SMALLINT NOT NULL, + "RemoteIP" INTEGER NOT NULL, + "WindowName" INTEGER NOT NULL, + "OpenerName" INTEGER NOT NULL, + "HistoryLength" SMALLINT NOT NULL, + "BrowserLanguage" TEXT NOT NULL, + "BrowserCountry" TEXT NOT NULL, + "SocialNetwork" TEXT NOT NULL, + "SocialAction" TEXT NOT NULL, + "HTTPError" SMALLINT NOT NULL, + "SendTiming" INTEGER NOT NULL, + "DNSTiming" INTEGER NOT NULL, + "ConnectTiming" INTEGER NOT NULL, + "ResponseStartTiming" INTEGER NOT NULL, + "ResponseEndTiming" INTEGER NOT NULL, + "FetchTiming" INTEGER NOT NULL, + "SocialSourceNetworkID" SMALLINT NOT NULL, + "SocialSourcePage" TEXT NOT NULL, + "ParamPrice" BIGINT NOT NULL, + "ParamOrderID" TEXT NOT NULL, + "ParamCurrency" TEXT NOT NULL, + "ParamCurrencyID" SMALLINT NOT NULL, + "OpenstatServiceName" TEXT NOT NULL, + "OpenstatCampaignID" TEXT NOT NULL, + "OpenstatAdID" TEXT NOT NULL, + "OpenstatSourceID" TEXT NOT NULL, + "UTMSource" TEXT NOT NULL, + "UTMMedium" TEXT NOT NULL, + "UTMCampaign" TEXT NOT NULL, + "UTMContent" TEXT NOT NULL, + "UTMTerm" TEXT NOT NULL, + "FromTag" TEXT NOT NULL, + "HasGCLID" SMALLINT NOT NULL, + "RefererHash" BIGINT NOT NULL, + "URLHash" BIGINT NOT NULL, + "CLID" INTEGER NOT NULL +) SERVER ch_srv OPTIONS( + database 'default', + table_name 'hits', + engine 'MergeTree' +); + +COMMIT; diff --git a/pg_clickhouse/create-tuned-memory.sql b/pg_clickhouse/create-tuned-memory.sql new file mode 100644 index 000000000..763bf8da9 --- /dev/null +++ b/pg_clickhouse/create-tuned-memory.sql @@ -0,0 +1,109 @@ +CREATE OR REPLACE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, +) +ENGINE = Memory; diff --git a/pg_clickhouse/create-tuned.sql b/pg_clickhouse/create-tuned.sql new file mode 100644 index 000000000..e3d79850b --- /dev/null +++ b/pg_clickhouse/create-tuned.sql @@ -0,0 +1,111 @@ +CREATE OR REPLACE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +) +ENGINE = MergeTree +SETTINGS index_granularity = 1024; diff --git a/pg_clickhouse/create.sql b/pg_clickhouse/create.sql new file mode 100644 index 000000000..3567bce17 --- /dev/null +++ b/pg_clickhouse/create.sql @@ -0,0 +1,110 @@ +CREATE OR REPLACE TABLE hits +( + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title TEXT NOT NULL, + GoodEvent SMALLINT NOT NULL, + EventTime TIMESTAMP NOT NULL, + EventDate Date NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + RegionID INTEGER NOT NULL, + UserID BIGINT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL TEXT NOT NULL, + Referer TEXT NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INTEGER NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INTEGER NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 TEXT NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel TEXT NOT NULL, + Params TEXT NOT NULL, + IPNetworkID INTEGER NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase TEXT NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime TIMESTAMP NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INTEGER NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset TEXT NOT NULL, + CodeVersion INTEGER NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL TEXT NOT NULL, + HID INTEGER NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime TIMESTAMP NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INTEGER NOT NULL, + WindowName INTEGER NOT NULL, + OpenerName INTEGER NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage TEXT NOT NULL, + BrowserCountry TEXT NOT NULL, + SocialNetwork TEXT NOT NULL, + SocialAction TEXT NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INTEGER NOT NULL, + DNSTiming INTEGER NOT NULL, + ConnectTiming INTEGER NOT NULL, + ResponseStartTiming INTEGER NOT NULL, + ResponseEndTiming INTEGER NOT NULL, + FetchTiming INTEGER NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage TEXT NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID TEXT NOT NULL, + ParamCurrency TEXT NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName TEXT NOT NULL, + OpenstatCampaignID TEXT NOT NULL, + OpenstatAdID TEXT NOT NULL, + OpenstatSourceID TEXT NOT NULL, + UTMSource TEXT NOT NULL, + UTMMedium TEXT NOT NULL, + UTMCampaign TEXT NOT NULL, + UTMContent TEXT NOT NULL, + UTMTerm TEXT NOT NULL, + FromTag TEXT NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) +) +ENGINE = MergeTree; diff --git a/pg_clickhouse/postgres.sh b/pg_clickhouse/postgres.sh new file mode 100755 index 000000000..f3ba4a0f7 --- /dev/null +++ b/pg_clickhouse/postgres.sh @@ -0,0 +1,74 @@ +#!/bin/bash + +set -eu + +PGVERSION=17 + +# Source: https://wiki.postgresql.org/wiki/Apt +sudo apt-get update -y +sudo env DEBIAN_FRONTEND=noninteractive apt-get install -y postgresql-common -y +sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -y + +sudo apt-get update -y +sudo env DEBIAN_FRONTEND=noninteractive apt-get install -y "postgresql-$PGVERSION" "postgresql-server-dev-$PGVERSION" + +memory="$(awk '/MemTotal/ {print $2}' /proc/meminfo)" +threads="$(nproc)" +cpus=$(("$threads" / 2)) +# Shared buffers is set to 25% of memory in AWS RDS by default. We do the same. +# https://docs.aws.amazon.com/prescriptive-guidance/latest/tuning-postgresql-parameters/shared-buffers.html +shared_buffers=$(($memory / 4)) +# Effective cache size does not need to be perfect, but it should be somewhat +# close to the total memory minus what is expected to be used for queries. +# https://www.cybertec-postgresql.com/en/effective_cache_size-what-it-means-in-postgresql/ +effective_cache_size=$(("$memory" - ("$memory" / 4))) +# By default, max_worker_processes is set to in postgres. We want to be able to +# use all the threads for parallel workers so we increase it. We also add a +# small buffer of 15 for any other background workers that might be created. +max_worker_processes=$(("$threads" + 15)) +# Below we make sure to configure the rest of the parallel worker settings to +# match the number of cpu cores: +# https://www.crunchydata.com/blog/postgres-tuning-and-performance-for-analytics-data +# +# We also increase work_mem because we are doing an analytics workload to allow +# some more memory for sorting, aggregations, etc. +# +# It's necessary to increase max_wal_size to make the dataload not take very +# long. With the default value it's constantly checkpointing, and the PG logs +# warn you about that and tell you to increase max_wal_size. + +sudo tee /etc/postgresql/$PGVERSION/main/conf.d/clickbench.conf < /dev/null 2>&1 ; then + sudo systemctl restart "postgresql@$PGVERSION-main" +else + sudo /etc/init.d/postgresql start +fi + +sudo env DEBIAN_FRONTEND=noninteractive apt-get install -y \ + libcurl4-openssl-dev \ + uuid-dev \ + libssl-dev \ + make \ + cmake \ + g++ \ + pgxnclient + +# Setup the database. +pgxn install pg_clickhouse + +sudo -u postgres psql -t -c 'CREATE DATABASE test' +sudo -u postgres psql test -f create-postgres.sql 2>&1 | tee pg_load_out.txt +if grep 'ERROR' pg_load_out.txt +then + exit 1 +fi diff --git a/pg_clickhouse/queries.sql b/pg_clickhouse/queries.sql new file mode 100644 index 000000000..34469a2d6 --- /dev/null +++ b/pg_clickhouse/queries.sql @@ -0,0 +1,43 @@ +SELECT COUNT(*) FROM hits; +SELECT COUNT(*) FROM hits WHERE "AdvEngineID" <> 0; +SELECT SUM("AdvEngineID"), COUNT(*), AVG("ResolutionWidth") FROM hits; +SELECT AVG("UserID") FROM hits; +SELECT COUNT(DISTINCT "UserID") FROM hits; +SELECT COUNT(DISTINCT "SearchPhrase") FROM hits; +SELECT MIN("EventDate"), MAX("EventDate") FROM hits; +SELECT "AdvEngineID", COUNT(*) FROM hits WHERE "AdvEngineID" <> 0 GROUP BY "AdvEngineID" ORDER BY COUNT(*) DESC; +SELECT "RegionID", COUNT(DISTINCT "UserID") AS u FROM hits GROUP BY "RegionID" ORDER BY u DESC LIMIT 10; +SELECT "RegionID", SUM("AdvEngineID"), COUNT(*) AS c, AVG("ResolutionWidth"), COUNT(DISTINCT "UserID") FROM hits GROUP BY "RegionID" ORDER BY c DESC LIMIT 10; +SELECT "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +SELECT "MobilePhone", "MobilePhoneModel", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "MobilePhoneModel" <> '' GROUP BY "MobilePhone", "MobilePhoneModel" ORDER BY u DESC LIMIT 10; +SELECT "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "SearchPhrase", COUNT(DISTINCT "UserID") AS u FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY u DESC LIMIT 10; +SELECT "SearchEngineID", "SearchPhrase", COUNT(*) AS c FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "UserID", COUNT(*) FROM hits GROUP BY "UserID" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID", "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", "SearchPhrase" LIMIT 10; +SELECT "UserID", extract(minute FROM "EventTime") AS m, "SearchPhrase", COUNT(*) FROM hits GROUP BY "UserID", m, "SearchPhrase" ORDER BY COUNT(*) DESC LIMIT 10; +SELECT "UserID" FROM hits WHERE "UserID" = 435090932899640449; +SELECT COUNT(*) FROM hits WHERE "URL" LIKE '%google%'; +SELECT "SearchPhrase", MIN("URL"), COUNT(*) AS c FROM hits WHERE "URL" LIKE '%google%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT "SearchPhrase", MIN("URL"), MIN("Title"), COUNT(*) AS c, COUNT(DISTINCT "UserID") FROM hits WHERE "Title" LIKE '%Google%' AND "URL" NOT LIKE '%.google.%' AND "SearchPhrase" <> '' GROUP BY "SearchPhrase" ORDER BY c DESC LIMIT 10; +SELECT * FROM hits WHERE "URL" LIKE '%google%' ORDER BY "EventTime" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "SearchPhrase" LIMIT 10; +SELECT "SearchPhrase" FROM hits WHERE "SearchPhrase" <> '' ORDER BY "EventTime", "SearchPhrase" LIMIT 10; +SELECT "CounterID", AVG(length("URL")) AS l, COUNT(*) AS c FROM hits WHERE "URL" <> '' GROUP BY "CounterID" HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT REGEXP_REPLACE("Referer", '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length("Referer")) AS l, COUNT(*) AS c, MIN("Referer") FROM hits WHERE "Referer" <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; +SELECT SUM("ResolutionWidth"), SUM("ResolutionWidth" + 1), SUM("ResolutionWidth" + 2), SUM("ResolutionWidth" + 3), SUM("ResolutionWidth" + 4), SUM("ResolutionWidth" + 5), SUM("ResolutionWidth" + 6), SUM("ResolutionWidth" + 7), SUM("ResolutionWidth" + 8), SUM("ResolutionWidth" + 9), SUM("ResolutionWidth" + 10), SUM("ResolutionWidth" + 11), SUM("ResolutionWidth" + 12), SUM("ResolutionWidth" + 13), SUM("ResolutionWidth" + 14), SUM("ResolutionWidth" + 15), SUM("ResolutionWidth" + 16), SUM("ResolutionWidth" + 17), SUM("ResolutionWidth" + 18), SUM("ResolutionWidth" + 19), SUM("ResolutionWidth" + 20), SUM("ResolutionWidth" + 21), SUM("ResolutionWidth" + 22), SUM("ResolutionWidth" + 23), SUM("ResolutionWidth" + 24), SUM("ResolutionWidth" + 25), SUM("ResolutionWidth" + 26), SUM("ResolutionWidth" + 27), SUM("ResolutionWidth" + 28), SUM("ResolutionWidth" + 29), SUM("ResolutionWidth" + 30), SUM("ResolutionWidth" + 31), SUM("ResolutionWidth" + 32), SUM("ResolutionWidth" + 33), SUM("ResolutionWidth" + 34), SUM("ResolutionWidth" + 35), SUM("ResolutionWidth" + 36), SUM("ResolutionWidth" + 37), SUM("ResolutionWidth" + 38), SUM("ResolutionWidth" + 39), SUM("ResolutionWidth" + 40), SUM("ResolutionWidth" + 41), SUM("ResolutionWidth" + 42), SUM("ResolutionWidth" + 43), SUM("ResolutionWidth" + 44), SUM("ResolutionWidth" + 45), SUM("ResolutionWidth" + 46), SUM("ResolutionWidth" + 47), SUM("ResolutionWidth" + 48), SUM("ResolutionWidth" + 49), SUM("ResolutionWidth" + 50), SUM("ResolutionWidth" + 51), SUM("ResolutionWidth" + 52), SUM("ResolutionWidth" + 53), SUM("ResolutionWidth" + 54), SUM("ResolutionWidth" + 55), SUM("ResolutionWidth" + 56), SUM("ResolutionWidth" + 57), SUM("ResolutionWidth" + 58), SUM("ResolutionWidth" + 59), SUM("ResolutionWidth" + 60), SUM("ResolutionWidth" + 61), SUM("ResolutionWidth" + 62), SUM("ResolutionWidth" + 63), SUM("ResolutionWidth" + 64), SUM("ResolutionWidth" + 65), SUM("ResolutionWidth" + 66), SUM("ResolutionWidth" + 67), SUM("ResolutionWidth" + 68), SUM("ResolutionWidth" + 69), SUM("ResolutionWidth" + 70), SUM("ResolutionWidth" + 71), SUM("ResolutionWidth" + 72), SUM("ResolutionWidth" + 73), SUM("ResolutionWidth" + 74), SUM("ResolutionWidth" + 75), SUM("ResolutionWidth" + 76), SUM("ResolutionWidth" + 77), SUM("ResolutionWidth" + 78), SUM("ResolutionWidth" + 79), SUM("ResolutionWidth" + 80), SUM("ResolutionWidth" + 81), SUM("ResolutionWidth" + 82), SUM("ResolutionWidth" + 83), SUM("ResolutionWidth" + 84), SUM("ResolutionWidth" + 85), SUM("ResolutionWidth" + 86), SUM("ResolutionWidth" + 87), SUM("ResolutionWidth" + 88), SUM("ResolutionWidth" + 89) FROM hits; +SELECT "SearchEngineID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "SearchEngineID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits WHERE "SearchPhrase" <> '' GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "WatchID", "ClientIP", COUNT(*) AS c, SUM("IsRefresh"), AVG("ResolutionWidth") FROM hits GROUP BY "WatchID", "ClientIP" ORDER BY c DESC LIMIT 10; +SELECT "URL", COUNT(*) AS c FROM hits GROUP BY "URL" ORDER BY c DESC LIMIT 10; +SELECT 1, "URL", COUNT(*) AS c FROM hits GROUP BY 1, "URL" ORDER BY c DESC LIMIT 10; +SELECT "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3, COUNT(*) AS c FROM hits GROUP BY "ClientIP", "ClientIP" - 1, "ClientIP" - 2, "ClientIP" - 3 ORDER BY c DESC LIMIT 10; +SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "URL" <> '' GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10; +SELECT "Title", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "DontCountHits" = 0 AND "IsRefresh" = 0 AND "Title" <> '' GROUP BY "Title" ORDER BY "PageViews" DESC LIMIT 10; +SELECT "URL", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "IsLink" <> 0 AND "IsDownload" = 0 GROUP BY "URL" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000; +SELECT "TraficSourceID", "SearchEngineID", "AdvEngineID", CASE WHEN ("SearchEngineID" = 0 AND "AdvEngineID" = 0) THEN "Referer" ELSE '' END AS "Src", "URL" AS "Dst", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 GROUP BY "TraficSourceID", "SearchEngineID", "AdvEngineID", "Src", "Dst" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 1000; +SELECT "URLHash", "EventDate", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "TraficSourceID" IN (-1, 6) AND "RefererHash" = 3594120000172545465 GROUP BY "URLHash", "EventDate" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 100; +SELECT "WindowClientWidth", "WindowClientHeight", COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-01' AND "EventDate" <= '2013-07-31' AND "IsRefresh" = 0 AND "DontCountHits" = 0 AND "URLHash" = 2868770270353813622 GROUP BY "WindowClientWidth", "WindowClientHeight" ORDER BY "PageViews" DESC LIMIT 10 OFFSET 10000; +SELECT DATE_TRUNC('minute', "EventTime") AS M, COUNT(*) AS "PageViews" FROM hits WHERE "CounterID" = 62 AND "EventDate" >= '2013-07-14' AND "EventDate" <= '2013-07-15' AND "IsRefresh" = 0 AND "DontCountHits" = 0 GROUP BY DATE_TRUNC('minute', "EventTime") ORDER BY DATE_TRUNC('minute', "EventTime") LIMIT 10 OFFSET 1000; diff --git a/pg_clickhouse/results/c6a.4xlarge.json b/pg_clickhouse/results/c6a.4xlarge.json new file mode 100644 index 000000000..77c924966 --- /dev/null +++ b/pg_clickhouse/results/c6a.4xlarge.json @@ -0,0 +1,57 @@ +{ + "system": "pg_clickhosue", + "date": "2026-01-21", + "machine": "c6a.4xlarge", + "cluster_size": 1, + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": ["C","column-oriented","PostgreSQL compatible","lukewarm-cold-run"], + "load_time": 242, + "data_size": 15262174510, + "result": [ + [0.006607,0.001705,0.001341], + [0.014535,0.007505,0.007095], + [0.035208,0.02233,0.021298], + [0.384252,0.033788,0.031596], + [0.406934,0.369646,0.358387], + [0.468169,0.43637,0.439444], + [0.016635,0.00989,0.009765], + [0.014634,0.012972,0.017182], + [0.481673,0.457925,0.452197], + [0.571931,0.529559,0.520876], + [0.146547,0.139848,0.142484], + [0.168581,0.156008,0.164324], + [0.632167,0.608047,0.603235], + [0.902,0.852523,0.855817], + [0.643766,0.572602,0.596788], + [0.449551,0.416767,0.421356], + [1.90815,1.79591,1.77585], + [1.0569,1.06624,1.06372], + [4.18495,3.23001,3.14883], + [0.023241,0.007348,0.006817], + [7.4562,0.341562,0.334343], + [0.766191,0.09725,0.099462], + [5.84716,0.692562,0.697863], + [0.551441,0.506311,0.535559], + [0.1673,0.154354,0.15241], + [0.216353,0.209134,0.212657], + [0.150158,0.153212,0.148557], + [0.08528,0.087941,0.083922], + [5.48048,5.2618,5.24145], + [0.040604,0.03773,0.032683], + [0.367266,0.335817,0.338838], + [2.21749,0.622636,0.546126], + [4.57155,4.11727,4.13586], + [3.09299,3.2854,3.30868], + [3.21993,3.28495,3.35442], + [0.970742,1.08363,0.987148], + [0.0539,0.045306,0.043795], + [0.026517,0.026791,0.026242], + [0.030499,0.021188,0.021636], + [0.089864,0.087235,0.090421], + [0.0261,0.01606,0.017285], + [0.018587,0.011619,0.025312], + [0.014538,0.009853,0.012973] +] +} diff --git a/pg_clickhouse/run.sh b/pg_clickhouse/run.sh new file mode 100755 index 000000000..7746d5e9f --- /dev/null +++ b/pg_clickhouse/run.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +TRIES=3 +prefix="" +if [ -n "$EXPLAIN" ]; then + prefix="EXPLAIN (ANALYZE, VERBOSE) " +fi + +cat queries.sql | while read -r query; do + sync + [ -w /proc/sys ] && echo 3 | sudo tee /proc/sys/vm/drop_caches + + ( + echo '\timing' + yes "$prefix$query" | head -n $TRIES + ) | sudo -u postgres psql -e --no-psqlrc --tuples-only test 2>&1 # | grep -P 'Time|psql: error' +done diff --git a/pg_clickhouse/template.json b/pg_clickhouse/template.json new file mode 100644 index 000000000..bc3914bf1 --- /dev/null +++ b/pg_clickhouse/template.json @@ -0,0 +1,12 @@ +{ + "system": "pg_clickhouse", + "proprietary": "no", + "hardware": "cpu", + "tuned": "no", + "tags": [ + "C", + "column-oriented", + "PostgreSQL compatible", + "lukewarm-cold-run" + ] +}