From c63cd34aa51941d5851dfd6d3d273415ad02a7fb Mon Sep 17 00:00:00 2001
From: Heikki Linnakangas <heikki.linnakangas@iki.fi>
Date: Thu, 8 Sep 2016 21:42:55 +0300
Subject: [PATCH 3/3] Add sorting test suite

---
 src/test/sorttestsuite/Makefile      |  31 ++++++
 src/test/sorttestsuite/correctness.c | 153 +++++++++++++++++++++++++++
 src/test/sorttestsuite/generate.c    | 198 +++++++++++++++++++++++++++++++++++
 src/test/sorttestsuite/speed.c       | 139 ++++++++++++++++++++++++
 4 files changed, 521 insertions(+)
 create mode 100644 src/test/sorttestsuite/Makefile
 create mode 100644 src/test/sorttestsuite/correctness.c
 create mode 100644 src/test/sorttestsuite/generate.c
 create mode 100644 src/test/sorttestsuite/speed.c

diff --git a/src/test/sorttestsuite/Makefile b/src/test/sorttestsuite/Makefile
new file mode 100644
index 0000000..91c8ccd
--- /dev/null
+++ b/src/test/sorttestsuite/Makefile
@@ -0,0 +1,31 @@
+CFLAGS=-g -I/home/heikki/pgsql.master/include
+
+LDFLAGS=-L/home/heikki/pgsql.master/lib -lpq -lm
+
+TESTDB=sorttest
+
+# For testing quicksort.
+SCALE_SMALL=1024	# 1 MB
+
+# For testing external sort, while the dataset still fits in OS cache.
+SCALE_MEDIUM=1048576	# 1 GB
+
+# Does not fit in memory.
+SCALE_LARGE=20971520	# 20 GB
+#SCALE_LARGE=1500000	# 20 GB
+
+all: generate speed correctness
+
+generate: generate.c
+
+speed: speed.c
+
+correctness: correctness.c
+
+generate_testdata:
+	dropdb --if-exists $(TESTDB)
+	createdb $(TESTDB)
+	psql $(TESTDB) -c "CREATE SCHEMA small; CREATE SCHEMA medium; CREATE SCHEMA large;"
+	(echo "set search_path=small;"; ./generate all $(SCALE_SMALL)) | psql $(TESTDB)
+	(echo "set search_path=medium;"; ./generate all $(SCALE_MEDIUM)) | psql $(TESTDB)
+	(echo "set search_path=large;"; ./generate all $(SCALE_LARGE)) | psql $(TESTDB)
diff --git a/src/test/sorttestsuite/correctness.c b/src/test/sorttestsuite/correctness.c
new file mode 100644
index 0000000..b41aa2e
--- /dev/null
+++ b/src/test/sorttestsuite/correctness.c
@@ -0,0 +1,153 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+
+#include <libpq-fe.h>
+
+static PGconn *conn;
+
+static void
+execute(const char *sql)
+{
+	int			i;
+	PGresult   *res;
+
+	fprintf(stderr, "%s\n", sql);
+	
+	res = PQexec(conn, sql);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+
+	PQclear(res);
+}
+
+static void
+check_sorted(const char *sql, int (*cmp)(const char *a, const char *b))
+{
+	int			i;
+	PGresult   *res;
+	PGresult   *prevres = NULL;
+	int			rowno;
+
+	fprintf(stderr, "running query: %s\n", sql);
+	if (!PQsendQuery(conn, sql))
+	{
+		fprintf(stderr,"query failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+	if (!PQsetSingleRowMode(conn))
+	{
+		fprintf(stderr,"setting single-row mode failed: %s", PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+
+	rowno = 1;
+	while (res = PQgetResult(conn))
+	{
+		if (PQresultStatus(res) == PGRES_TUPLES_OK)
+			continue;
+		if (PQresultStatus(res) != PGRES_SINGLE_TUPLE)
+		{
+			fprintf(stderr,"error while fetching: %d, %s\n%s", PQresultStatus(res), sql, PQerrorMessage(conn));
+			PQclear(res);
+			exit(1);
+		}
+
+		if (prevres)
+		{
+			if (!cmp(PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0)))
+			{
+				fprintf(stderr,"FAIL: result not sorted, row %d: %s, prev %s\n", rowno,
+						PQgetvalue(prevres, 0, 0), PQgetvalue(res, 0, 0));
+				PQclear(res);
+				exit(1);
+			}
+			PQclear(prevres);
+		}
+		prevres = res;
+
+		rowno++;
+	}
+
+	if (prevres)
+		PQclear(prevres);
+}
+
+
+static int
+compare_strings(const char *a, const char *b)
+{
+	return strcmp(a, b) <= 0;
+}
+
+static int
+compare_ints(const char *a, const char *b)
+{
+	return atoi(a) <= atoi(b);
+}
+
+int
+main(int argc, char **argv)
+{
+	double duration;
+	char		buf[1000];
+
+	/* Make a connection to the database */
+	conn = PQconnectdb("");
+
+	/* Check to see that the backend connection was successfully made */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		fprintf(stderr, "Connection to database failed: %s",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+	execute("set trace_sort=on");
+
+	execute("set work_mem = '4MB'");
+
+	check_sorted("SELECT * FROM small.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM small.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM small.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM small.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '16MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '256MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '512MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	execute("set work_mem = '2048MB'");
+
+	check_sorted("SELECT * FROM medium.ordered_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.random_ints ORDER BY i", compare_ints);
+	check_sorted("SELECT * FROM medium.ordered_text ORDER BY t", compare_strings);
+	check_sorted("SELECT * FROM medium.random_text ORDER BY t", compare_strings);
+
+	PQfinish(conn);
+
+	return 0;
+}
diff --git a/src/test/sorttestsuite/generate.c b/src/test/sorttestsuite/generate.c
new file mode 100644
index 0000000..f481189
--- /dev/null
+++ b/src/test/sorttestsuite/generate.c
@@ -0,0 +1,198 @@
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+static void
+generate_ordered_integers(int scale)
+{
+	int			rows = ((double) scale) * 28.75;
+	int			i;
+
+	printf("DROP TABLE IF EXISTS ordered_ints;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE ordered_ints (i int4);\n");
+	printf("COPY ordered_ints FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+		printf("%d\n", i);
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+static void
+generate_random_integers(int scale)
+{
+	int			rows = ((double) scale) * 28.75;
+	int			i;
+
+	printf("DROP TABLE IF EXISTS random_ints;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE random_ints (i int4);\n");
+	printf("COPY random_ints FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+		printf("%d\n", random());
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+#define ALPHABET_SIZE 26
+static const char alphabet[ALPHABET_SIZE + 1] = "abcdefghijklmnopqrstuvwxyz";
+
+#define TEXT_LEN 50
+
+static void
+random_string(char *buf, int len)
+{
+	int			i;
+	long		r;
+	long		m;
+
+	m = 0;
+	for (i = 0; i < len; i++)
+	{
+		if (m / ALPHABET_SIZE < ALPHABET_SIZE)
+		{
+			m = RAND_MAX;
+			r = random();
+		}
+
+		*buf = alphabet[r % ALPHABET_SIZE];
+		m = m / ALPHABET_SIZE;
+		r = r / ALPHABET_SIZE;
+		buf++;
+	}
+	*buf = '\0';
+	return;
+}
+
+static void
+generate_random_text(int scale)
+{
+	int			rows = ((double) scale) * 12.7;
+	int			i;
+	char		buf[TEXT_LEN + 1] = { 0 };
+
+	printf("DROP TABLE IF EXISTS random_text;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE random_text (t text);\n");
+	printf("COPY random_text FROM STDIN WITH (FREEZE);\n");
+
+	for (i = 0; i < rows; i++)
+	{
+		random_string(buf, TEXT_LEN);
+		printf("%s\n", buf);
+	}
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+static void
+generate_ordered_text(int scale)
+{
+	int			rows = ((double) scale) * 12.7;
+	int			i;
+	int			j;
+	char		indexes[TEXT_LEN] = {0};
+	char		buf[TEXT_LEN + 1];
+	double			digits;
+
+	printf("DROP TABLE IF EXISTS ordered_text;\n");
+	printf("BEGIN;");
+	printf("CREATE TABLE ordered_text (t text);\n");
+	printf("COPY ordered_text FROM STDIN WITH (FREEZE);\n");
+
+	/*
+	 * We don't want all the strings to have the same prefix.
+	 * That makes the comparisons very expensive. That might be an
+	 * interesting test case too, but not what we want here. To avoid
+	 * that, figure out how many characters will change, with the #
+	 * of rows we chose.
+	 */
+	digits = ceil(log(rows) / log((double) ALPHABET_SIZE));
+
+	if (digits > TEXT_LEN)
+		digits = TEXT_LEN;
+
+	for (i = 0; i < rows; i++)
+	{
+		for (j = 0; j < TEXT_LEN; j++)
+		{
+			buf[j] = alphabet[indexes[j]];
+		}
+		buf[j] = '\0';
+		printf("%s\n", buf);
+
+		/* increment last character, carrying if needed */
+		for (j = digits - 1; j >= 0; j--)
+		{
+			indexes[j]++;
+			if (indexes[j] == ALPHABET_SIZE)
+				indexes[j] = 0;
+			else
+				break;
+		}
+	}
+
+	printf("\\.\n");
+	printf("COMMIT;\n");
+}
+
+
+struct
+{
+	char *name;
+	void (*generate_func)(int scale);
+} datasets[] =
+{
+ 	{ "ordered_integers", generate_ordered_integers },
+	{ "random_integers", generate_random_integers },
+	{ "ordered_text", generate_ordered_text },
+	{ "random_text", generate_random_text },
+	{ NULL, NULL }
+};
+
+void
+usage()
+{
+	printf("Usage: generate <dataset name> [scale] [schema]");
+	exit(1);
+}
+
+int
+main(int argc, char **argv)
+{
+	int			scale;
+	int			i;
+	int			found = 0;
+
+	if (argc < 2)
+		usage();
+
+	if (argc >= 3)
+		scale = atoi(argv[2]);
+	else
+		scale = 1024; /* 1 MB */
+
+	for (i = 0; datasets[i].name != NULL; i++)
+	{
+		if (strcmp(argv[1], datasets[i].name) == 0 ||
+			strcmp(argv[1], "all") == 0)
+		{
+			fprintf (stderr, "Generating %s for %d kB...\n", datasets[i].name, scale);
+			datasets[i].generate_func(scale);
+			found = 1;
+		}
+	}
+
+	if (!found)
+	{
+		fprintf(stderr, "unrecognized test name %s\n", argv[1]);
+		exit(1);
+	}
+	exit(0);
+}
diff --git a/src/test/sorttestsuite/speed.c b/src/test/sorttestsuite/speed.c
new file mode 100644
index 0000000..3ebc57c
--- /dev/null
+++ b/src/test/sorttestsuite/speed.c
@@ -0,0 +1,139 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <sys/time.h>
+
+#include <libpq-fe.h>
+
+#define REPETITIONS 3
+
+static PGconn *conn;
+
+/* returns duration in ms */
+static double
+execute(const char *sql)
+{
+	struct timeval before, after;
+	PGresult   *res;
+
+	gettimeofday(&before, NULL);
+	res = PQexec(conn, sql);
+	gettimeofday(&after, NULL);
+	if (PQresultStatus(res) != PGRES_COMMAND_OK && PQresultStatus(res) != PGRES_TUPLES_OK)
+	{
+		fprintf(stderr,"command failed: %s\n%s", sql, PQerrorMessage(conn));
+		PQclear(res);
+		exit(1);
+	}
+	PQclear(res);
+
+	return (((double) (after.tv_sec - before.tv_sec)) * 1000.0 + ((double) (after.tv_usec - before.tv_usec) / 1000.0));
+}
+
+static void
+execute_test(const char *testname, const char *query)
+{
+	double		duration;
+	char		buf[100];
+	int			i;
+
+	printf ("%s: ", testname);
+	fflush(stdout);
+	for (i = 0; i < REPETITIONS; i++)
+	{
+		duration = execute(query);
+
+		if (i > 0)
+			printf(", ");
+		printf("%.0f ms", duration);
+		fflush(stdout);
+	}
+	printf("\n");
+}
+
+int
+main(int argc, char **argv)
+{
+	double duration;
+	char		buf[1000];
+
+	/* Make a connection to the database */
+	conn = PQconnectdb("");
+
+	/* Check to see that the backend connection was successfully made */
+	if (PQstatus(conn) != CONNECTION_OK)
+	{
+		fprintf(stderr, "Connection to database failed: %s",
+				PQerrorMessage(conn));
+		exit(1);
+	}
+
+	execute("set trace_sort=on");
+
+	printf("# Tests on small tables (1 MB), 4MB work_mem\n");
+	printf("# Performs a quicksort\n");
+	printf("-----\n");
+	execute("set work_mem='4MB'");
+	execute_test("ordered_ints,", "SELECT COUNT(*) FROM (SELECT * FROM small.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM small.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM small.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM small.random_text ORDER BY t) t");
+	printf("\n");
+
+	printf("# Tests on medium-sized tables (1 GB), 4MB work_mem\n");
+	printf("# Performs an external sort, but the table still fits in OS cache\n");
+	printf("# Needs a multi-stage merge\n");
+	printf("-----\n");
+	execute("set work_mem='4MB'");
+	execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t");
+	printf("\n");
+
+	printf("# Tests on medium-sized tables (1 GB), 16MB work_mem\n");
+	printf("# Same as previous test, but with larger work_mem\n");
+	printf("-----\n");
+	execute("set work_mem='16MB'");
+	execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t");
+	printf("\n");
+
+	printf("# Tests on medium-sized tables (1 GB), 256MB work_mem\n");
+	printf("# This works with a single merge pass\n");
+	printf("-----\n");
+	execute("set work_mem='256MB'");
+	execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t");
+	printf("\n");
+
+	printf("# Tests on medium-sized tables (1 GB), 512MB work_mem\n");
+	printf("# This works with a single merge pass\n");
+	printf("-----\n");
+	execute("set work_mem='512MB'");
+	execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t");
+	printf("\n");
+	
+	printf("# Tests on medium-sized tables (1 GB), 2GB work_mem\n");
+	printf("# I thought 2GB would be enough to do a quicksort, but because of\n");
+	printf("# SortTuple overhead (?), it doesn't fit. Performs an external sort with two runs\n");
+	printf("-----\n");
+	execute("set work_mem='2048MB'");
+	execute_test("ordered_ints", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_ints ORDER BY i) t");
+	execute_test("random_ints",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_ints ORDER BY i) t");
+	execute_test("ordered_text", "SELECT COUNT(*) FROM (SELECT * FROM medium.ordered_text ORDER BY t) t");
+	execute_test("random_text",  "SELECT COUNT(*) FROM (SELECT * FROM medium.random_text ORDER BY t) t");
+	printf("\n");
+
+	PQfinish(conn);
+
+	return 0;
+}
-- 
2.9.3

