diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c new file mode 100644 index 8dc3054..a3ace20 *** a/contrib/pg_stat_statements/pg_stat_statements.c --- b/contrib/pg_stat_statements/pg_stat_statements.c *************** static const struct config_enum_entry tr *** 143,152 **** --- 143,167 ---- {NULL, 0, false} }; + typedef enum + { + PGSS_CLEAR_NONE, /* no query cleaning at all */ + PGSS_CLEAR_BASIC, /* basic parameter value replacement */ + PGSS_CLEAR_AGGRESSIVE /* more replacements (spaces, comments) */ + } PGSSClearLevel; + + static const struct config_enum_entry clear_options[] = { + {"none", PGSS_CLEAR_NONE, false}, + {"basic", PGSS_CLEAR_BASIC, false}, + {"aggressive", PGSS_CLEAR_AGGRESSIVE, false}, + {NULL, 0, false} + }; + static int pgss_max; /* max # statements to track */ static int pgss_track; /* tracking level */ static bool pgss_track_utility; /* whether to track utility commands */ static bool pgss_save; /* whether to save stats across shutdown */ + static int pgss_clean; /* whether to clean query parameter values */ #define pgss_enabled() \ *************** static Size pgss_memsize(void); *** 183,189 **** static pgssEntry *entry_alloc(pgssHashKey *key); static void entry_dealloc(void); static void entry_reset(void); ! /* * Module load callback --- 198,204 ---- static pgssEntry *entry_alloc(pgssHashKey *key); static void entry_dealloc(void); static void entry_reset(void); ! static char * pgss_clean_query(const char * query); /* * Module load callback *************** _PG_init(void) *** 252,257 **** --- 267,284 ---- NULL, NULL); + DefineCustomEnumVariable("pg_stat_statements.clean", + "Clean the queries (remove parameter values).", + NULL, + &pgss_clean, + PGSS_CLEAR_NONE, + clear_options, + PGC_SIGHUP, + 0, + NULL, + NULL, + NULL); + EmitWarningsOnPlaceholders("pg_stat_statements"); /* *************** pgss_ExecutorFinish(QueryDesc *queryDesc *** 583,588 **** --- 610,618 ---- static void pgss_ExecutorEnd(QueryDesc *queryDesc) { + + char * query; + if (queryDesc->totaltime && pgss_enabled()) { /* *************** pgss_ExecutorEnd(QueryDesc *queryDesc) *** 590,597 **** * levels of hook all do this.) */ InstrEndLoop(queryDesc->totaltime); ! pgss_store(queryDesc->sourceText, queryDesc->totaltime->total, queryDesc->estate->es_processed, &queryDesc->totaltime->bufusage); --- 620,633 ---- * levels of hook all do this.) */ InstrEndLoop(queryDesc->totaltime); + + if (pgss_clean != PGSS_CLEAR_NONE) { + query = pgss_clean_query(queryDesc->sourceText); + } else { + query = (char*)queryDesc->sourceText; + } ! pgss_store(query, queryDesc->totaltime->total, queryDesc->estate->es_processed, &queryDesc->totaltime->bufusage); *************** entry_reset(void) *** 1040,1042 **** --- 1076,1343 ---- LWLockRelease(pgss->lock); } + + /* + * Clear the query, so that queries that differ only by parameter + * values are considered equal (and represented by a single row in + * pg_stat_statements). + * + * This parsing is just very basic and performs just these steps + * + * a) replaces string values with :s + * b) replaces numeric values with :n + * c) replaces boolean values with :b + * d) replaces multiple whitespaces (whatever isspace() considers + * a space with a single space) + * + * This surely is very simple and does not help with differently + * formatted queries - e.g. those two queries + * + * SELECT ((1)); + * SELECT ( (1) ); + * + * are considered different because the function produces this: + * + * SELECT ((:n)); + * SELECT ( (:n) ); + * + * The basic assumption is the validity of the query, but this + * is executed after successful execution so it's fine. + * + * Possible enhancements (in no special order), mostly aiming to + * normalize the queries further: + * + * a) replace multiple values in the IN clause with a single one + * b) remove comments (not sure about this) + * c) many others ... + * + */ + + /* those are just helper methods for parsing ... */ + static bool string_start(const char * query, int idx, int len); + static int string_find_end(const char * query, int idx, int len); + + static bool number_start(const char * query, int idx, int len); + static int number_find_end(const char * query, int idx, int len); + + static bool boolean_start(const char * query, int idx, int len); + static int boolean_find_end(const char * query, int idx, int len); + + /* + * Does the actual cleaning and returns the cleaned version. + */ + static + char * pgss_clean_query(const char * query) { + + int i, idx = 0; + + /* buffer for the new query */ + char * new_query = (char*)palloc(pgss->query_size+1); + + /* length of the original query */ + int len = strlen(query); + + /* end when the whole query is processed or when the output query + * is full (whichever comes first) */ + i = 0; + while ((i < len) && ((idx + 4) < pgss->query_size)) { + + /* check what we're dealing with */ + if (string_start(query, i, len)) { + + /* string - replace it with :s and find the end */ + i = string_find_end(query, i, len); + + new_query[idx++] = ':'; + new_query[idx++] = 's'; + + } else if (number_start(query, i, len)) { + + /* number - replace it with :n and find the end */ + i = number_find_end(query, i, len); + + new_query[idx++] = ':'; + new_query[idx++] = 'n'; + + } else if (boolean_start(query, i, len)) { + + /* number - replace it with :n and find the end */ + i = boolean_find_end(query, i, len); + + new_query[idx++] = ':'; + new_query[idx++] = 'b'; + + } else if (isspace(query[i]) + && (pgss_clean == PGSS_CLEAR_AGGRESSIVE)) { + + /* just a whitespace - check if there was a previous space */ + if ((idx > 0) && (new_query[idx-1] != ' ')) { + new_query[idx++] = ' '; + } + + } else if (query[i] == ';') { + + /* a semi-colon, so terminate the processing */ + break; + + } else { + + /* other string */ + new_query[idx++] = query[i]; + } + + i++; + + } + + /* terminate the query and remove the trailing spaces */ + new_query[idx] = '\0'; + while ((idx > 0) && (new_query[idx-1] == ' ')) { + new_query[--idx] = '\0'; + } + + return new_query; + + } + + /* + * Implementation of the helper methods. + */ + static + bool string_start(const char * query, int idx, int len) { + /* supports basic escape styles (plain, C-style, Unicode and dollar)*/ + return (query[idx] == '\'') || (query[idx] == '$'); + } + + static + int string_find_end(const char * query, int idx, int len) { + + bool isEscaped = false; + + /* is it C-style escape or a dollar escape? */ + if (query[idx] == '\'') { + + /* C-style escape */ + int endIdx = idx + 1; + + /* find the end of the string */ + while (endIdx < len) { + + if (query[endIdx] == '\\') { + isEscaped = !isEscaped; + } else if ((query[endIdx] == '\'') && (! isEscaped)) { + /* this is the actual end of string, so return it */ + return endIdx; + } + + endIdx++; + + } + + } else if (query[idx] == '$') { + + char * end; + char * tag; + int len; + + /* dollar escape - lets see what's the tag */ + end = strchr(&query[idx+1], '$'); + + /* copy the tag */ + len = end - &query[idx] + 2; + tag = (char*)palloc(len); + memcpy(tag, &query[idx], len-1); + tag[len-1] = '\0'; + + /* what's the next occurence of the tag */ + end = strstr(&query[idx+len], tag); + + return (idx + len - 2 + end - &query[idx]); + + } + + elog(DEBUG1, "end of string starting at %d not found", idx); + + return len; + + } + + static + bool number_start(const char * query, int idx, int len) { + /* numbers start with a digit or a dot */ + return (isdigit(query[idx]) || (query[idx] == '.')); + } + + static + int number_find_end(const char * query, int idx, int len) { + + int endIdx = idx; + + /* find the end of the string */ + while (idx < len) { + + if (! isalnum(query[endIdx])) { + /* the number actually ends at the previous character */ + return (endIdx-1); + } + + endIdx++; + + } + + elog(DEBUG1, "end of number starting at %d not found", idx); + + return len; + + } + + static bool boolean_start_true(const char * query, int idx, int len) { + + char buff[4]; + int i; + + /* true */ + if (len - idx >= 4) { + strncpy(buff, &query[idx], 4); + for (i = 0; i < 4; i++) { + buff[i] = tolower(buff[i]); + } + if (strncmp(buff, "true", 4) == 0) { + return true; + } + } + + return false; + } + + static bool boolean_start_false(const char * query, int idx, int len) { + + char buff[5]; + int i; + + /* true */ + if (len - idx >= 5) { + strncpy(buff, &query[idx], 5); + for (i = 0; i < 5; i++) { + buff[i] = tolower(buff[i]); + } + if (strncmp(buff, "false", 5) == 0) { + return true; + } + } + + return false; + } + + static bool boolean_start(const char * query, int idx, int len) { + + return (boolean_start_true(query, idx, len) + || boolean_start_false(query, idx, len)); + + } + + static int boolean_find_end(const char * query, int idx, int len) { + /* this should be called only when there's a boolean, so add + * 4 (true) or 5 (false) */ + return idx + (boolean_start_true(query, idx, len) ? 4 : 5); + } diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml new file mode 100644 index 5a0230c..d3e4842 *** a/doc/src/sgml/pgstatstatements.sgml --- b/doc/src/sgml/pgstatstatements.sgml *************** *** 264,269 **** --- 264,290 ---- + + + + pg_stat_statements.clean (enum) + + + + + pg_stat_statements.clean controls how are the + stataments preprocessed. + Specify basic to replace parameter values with common + values (:n for numbers, :s for strings and + :b for booleans), aggressive to further + clean the queries (remove line endings, replace multiple spaces + with a single one), or none to disable. + The default value is none. + Only superusers can change this setting. + + + +