/********************************************************************* * compile postgres first with different crc32c implementation(use arm vmull_p64 or not) * we should comment out some codes about elog in pg_crc32c_armv8_choose.c to compile correctly and simply. * $ gcc -I ../postgres/_install/include -I ../postgres/_install/include/server main.c \ * -L ../postgres/build/src/port -l pgport_srv -O2 -o main * this test was run on Neoverse-N1 * $ ./main.no_vmull * data size is 512 bytes, and compute crc cost 139 us totally, 0.135742 us per loop * data size is 4096 bytes, and compute crc cost 1061 us totally, 1.036133 us per loop * $ ./main.use_vmull * data size is 512 bytes, and compute crc cost 101 us totally, 0.098633 us per loop * data size is 4096 bytes, and compute crc cost 540 us totally, 0.527344 us per loop * We can see that the cost of computing crc32c without vmull_p64 is about two times than * the cost that using vmull_p64 when data size is large. and the cost is almost same when * data size is small. *********************************************************************/ #include #include #include #include #include #include "c.h" #include "port/pg_crc32c.h" uint64_t GetTickCount() { struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec * 1000000 + tv.tv_usec; } int main() { #define CASE_CNT 2 uint32_t test_size[CASE_CNT] = {512, 1024 * 4}; for (int case_cnt = 0; case_cnt < CASE_CNT; case_cnt++) { uint8_t *buf = (uint8_t *) malloc(test_size[case_cnt] * sizeof(uint8_t)); srand(0); for (int i = 0; i < test_size[case_cnt]; i++) { *(buf + i) = (uint8_t) (rand() % 256u); } static const uint32_t kLoop = 1024; uint32_t crc = 0; uint64_t start = GetTickCount(); INIT_CRC32C(crc); for (int i = 0; i < kLoop; i++) { COMP_CRC32C(crc, buf, test_size[case_cnt]); } FIN_CRC32C(crc); uint64_t stop = GetTickCount(); printf("data size is %d bytes, and compute crc cost %ld us totally, %f us per loop\n", test_size[case_cnt], stop - start, (double) (stop - start) / kLoop); free(buf); } #undef CASE_CNT return 0; }