/*------------------------------------------------------------------------- * * pg_test_iorates --- measures I/O and prefetch distance impact * * Win32 not tested * *------------------------------------------------------------------------- */ #include "c.h" #include "postgres_fe.h" #include #include #include #define _GNU_SOURCE #define __USE_GNU 1 #include #include #include #include #include "common/logging.h" #include "common/pg_prng.h" #include "getopt_long.h" #define DEFAULT_FILE_SIZE 16 * 1024 * 1024 * 1024 /* 16 GB */ #define DEFAULT_BLOCK_SIZE 8192 /* 8 KB */ #define DEFAULT_TEST_TIME 3 /* seconds */ static const char *progname; static char *test_filename = "pg_test_io.tmp"; static int needs_unlink = 0; static ssize_t file_size = (ssize_t)DEFAULT_FILE_SIZE; static size_t block_size = DEFAULT_BLOCK_SIZE; static unsigned test_seconds = DEFAULT_TEST_TIME; static volatile sig_atomic_t alarm_triggered; static void handle_args(int argc, char **argv); static void create_test_file(void); static void cleanup(void); static void signal_handler(SIGNAL_ARGS); static void test_sequential_read(int use_direct); static void test_random_read(int use_direct, int use_advise, int prefetch_distance); #ifndef WIN32 #define DIRECT_FLAG O_DIRECT #else #define DIRECT_FLAG _O_DIRECT #endif int main(int argc, char **argv) { progname = get_progname(argv[0]); pg_logging_init(progname); handle_args(argc, argv); #ifndef WIN32 signal(SIGALRM, signal_handler); #endif pg_prng_seed(&pg_global_prng_state, (uint64) time(NULL)); printf("File size: %.2f GB, Block size: %zu bytes\n", (double)file_size / (1024 * 1024 * 1024), block_size); create_test_file(); test_sequential_read(false); #ifdef DIRECT_FLAG test_sequential_read(true); #endif printf("random read performance with different prefetch distances:\n"); int test_distances[] = {1, 2, 4, 8, 16, 32, 64}; int num_distances = sizeof(test_distances) / sizeof(int); for (int i = 0; i < num_distances; i++) { int d = test_distances[i]; printf("using prefetch distance %d: ", d); fflush(stdout); test_random_read(false, true, d); } cleanup(); return 0; } static void handle_args(int argc, char **argv) { static struct option long_options[] = { {"file", required_argument, NULL, 'f'}, {"size", required_argument, NULL, 's'}, {"block-size", required_argument, NULL, 'b'}, {"time", required_argument, NULL, 't'}, {NULL, 0, NULL, 0} }; int c; while ((c = getopt_long(argc, argv, "f:s:b:t:", long_options, NULL)) != -1) { switch (c) { case 'f': test_filename = pg_strdup(optarg); break; case 's': file_size = strtoul(optarg, NULL, 10); break; case 'b': block_size = strtoul(optarg, NULL, 10); break; case 't': test_seconds = strtoul(optarg, NULL, 10); break; default: pg_fatal("Unknown argument"); } } } static void create_test_file(void) { int fd; char *buf; size_t remaining; ssize_t written; buf = pg_malloc(block_size); if ((fd = open(test_filename, O_WRONLY | O_CREAT | O_TRUNC | PG_BINARY, 0600)) < 0) pg_fatal("could not create test file"); needs_unlink = 1; /* Fill buffer with pseudo-random data */ for (size_t i = 0; i < block_size; i++) buf[i] = (char)(i % 256); /* Write file in blocks */ for (remaining = file_size; remaining > 0; remaining -= written) { size_t current = (remaining > block_size) ? block_size : remaining; written = write(fd, buf, current); if (written < 0) pg_fatal("write failed"); } fsync(fd); close(fd); free(buf); } static void test_sequential_read(int use_direct) { int fd; char *buf; size_t total_read = 0; struct timeval start, end; double elapsed, mbps; int flags = O_RDONLY | PG_BINARY; #ifdef DIRECT_FLAG if (use_direct) flags |= DIRECT_FLAG; #endif buf = pg_malloc(block_size); if (posix_memalign((void *)&buf, 512, block_size) < 0) pg_fatal("posix_memalign failed: %m");; if ((fd = open(test_filename, flags)) < 0) pg_fatal("could not open test file"); printf("%s sequential read: ", use_direct ? "direct" : "buffered"); fflush(stdout); alarm_triggered = false; alarm(test_seconds); gettimeofday(&start, NULL); while (!alarm_triggered) { ssize_t bytes_read; off_t offset = 0; while (offset < file_size && !alarm_triggered) { bytes_read = pread(fd, buf, block_size, offset); if (bytes_read < 0) pg_fatal("read error, rc=%ld: %m", bytes_read); total_read += bytes_read; offset += bytes_read; } lseek(fd, 0, SEEK_SET); } gettimeofday(&end, NULL); close(fd); free(buf); elapsed = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1000000.0; mbps = (total_read / (1024.0 * 1024.0)) / elapsed; printf("%.2f MB/s\n", mbps); } static void cleanup(void) { if (needs_unlink) unlink(test_filename); } static void signal_handler(SIGNAL_ARGS) { alarm_triggered = true; } static void test_random_read(int use_direct, int use_advise, int prefetch_distance) { int fd; char *buf; size_t total_read = 0; struct timeval start, end; double elapsed, mbps; size_t max_blocks = file_size / block_size; off_t *prefetch_queue = NULL; int qhead = 0; int flags = O_RDONLY | PG_BINARY; #ifdef DIRECT_FLAG if (use_direct) flags |= DIRECT_FLAG; #endif buf = pg_malloc(block_size); /* FIXME: search for PG variant of this */ if (posix_memalign((void *)&buf, 512, block_size) < 0) pg_fatal("posix_memalign failed"); if ((fd = open(test_filename, flags)) < 0) pg_fatal("could not open test file"); /* Initialize prefetch queue if using advice */ if (use_advise) { prefetch_queue = pg_malloc(prefetch_distance * sizeof(off_t)); /* Pre-fill prefetch queue */ for (int i = 0; i < prefetch_distance; i++) { prefetch_queue[i] = (pg_prng_uint64(&pg_global_prng_state) % max_blocks) * block_size; posix_fadvise(fd, prefetch_queue[i], block_size, POSIX_FADV_WILLNEED); } qhead = 0; } alarm_triggered = false; alarm(test_seconds); gettimeofday(&start, NULL); while (!alarm_triggered) { //for(int i = 0; i <= 32 ; i++) { off_t offset; if (use_advise) { /* Get offset from prefetch queue */ offset = prefetch_queue[qhead]; /* Add new prefetch target */ off_t new_offset = (pg_prng_uint64(&pg_global_prng_state) % max_blocks) * block_size; posix_fadvise(fd, new_offset, block_size, POSIX_FADV_WILLNEED); prefetch_queue[qhead] = new_offset; qhead = (qhead + 1) % prefetch_distance; } else { /* Simple random read without prefetch */ offset = (pg_prng_uint64(&pg_global_prng_state) % max_blocks) * block_size; } ssize_t bytes_read = pread(fd, buf, block_size, offset); if (bytes_read < 0) pg_fatal("read error, rc=%ld: %m", bytes_read); total_read += bytes_read; } gettimeofday(&end, NULL); close(fd); free(buf); if (prefetch_queue) free(prefetch_queue); elapsed = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1000000.0; mbps = (total_read / (1024.0 * 1024.0)) / elapsed; printf("%.2f MB/s\n", mbps); }