diff --git a/contrib/pg_stat_statements/pg_stat_statements.c b/contrib/pg_stat_statements/pg_stat_statements.c
new file mode 100644
index 8dc3054..a3ace20
*** a/contrib/pg_stat_statements/pg_stat_statements.c
--- b/contrib/pg_stat_statements/pg_stat_statements.c
*************** static const struct config_enum_entry tr
*** 143,152 ****
--- 143,167 ----
{NULL, 0, false}
};
+ typedef enum
+ {
+ PGSS_CLEAR_NONE, /* no query cleaning at all */
+ PGSS_CLEAR_BASIC, /* basic parameter value replacement */
+ PGSS_CLEAR_AGGRESSIVE /* more replacements (spaces, comments) */
+ } PGSSClearLevel;
+
+ static const struct config_enum_entry clear_options[] = {
+ {"none", PGSS_CLEAR_NONE, false},
+ {"basic", PGSS_CLEAR_BASIC, false},
+ {"aggressive", PGSS_CLEAR_AGGRESSIVE, false},
+ {NULL, 0, false}
+ };
+
static int pgss_max; /* max # statements to track */
static int pgss_track; /* tracking level */
static bool pgss_track_utility; /* whether to track utility commands */
static bool pgss_save; /* whether to save stats across shutdown */
+ static int pgss_clean; /* whether to clean query parameter values */
#define pgss_enabled() \
*************** static Size pgss_memsize(void);
*** 183,189 ****
static pgssEntry *entry_alloc(pgssHashKey *key);
static void entry_dealloc(void);
static void entry_reset(void);
!
/*
* Module load callback
--- 198,204 ----
static pgssEntry *entry_alloc(pgssHashKey *key);
static void entry_dealloc(void);
static void entry_reset(void);
! static char * pgss_clean_query(const char * query);
/*
* Module load callback
*************** _PG_init(void)
*** 252,257 ****
--- 267,284 ----
NULL,
NULL);
+ DefineCustomEnumVariable("pg_stat_statements.clean",
+ "Clean the queries (remove parameter values).",
+ NULL,
+ &pgss_clean,
+ PGSS_CLEAR_NONE,
+ clear_options,
+ PGC_SIGHUP,
+ 0,
+ NULL,
+ NULL,
+ NULL);
+
EmitWarningsOnPlaceholders("pg_stat_statements");
/*
*************** pgss_ExecutorFinish(QueryDesc *queryDesc
*** 583,588 ****
--- 610,618 ----
static void
pgss_ExecutorEnd(QueryDesc *queryDesc)
{
+
+ char * query;
+
if (queryDesc->totaltime && pgss_enabled())
{
/*
*************** pgss_ExecutorEnd(QueryDesc *queryDesc)
*** 590,597 ****
* levels of hook all do this.)
*/
InstrEndLoop(queryDesc->totaltime);
! pgss_store(queryDesc->sourceText,
queryDesc->totaltime->total,
queryDesc->estate->es_processed,
&queryDesc->totaltime->bufusage);
--- 620,633 ----
* levels of hook all do this.)
*/
InstrEndLoop(queryDesc->totaltime);
+
+ if (pgss_clean != PGSS_CLEAR_NONE) {
+ query = pgss_clean_query(queryDesc->sourceText);
+ } else {
+ query = (char*)queryDesc->sourceText;
+ }
! pgss_store(query,
queryDesc->totaltime->total,
queryDesc->estate->es_processed,
&queryDesc->totaltime->bufusage);
*************** entry_reset(void)
*** 1040,1042 ****
--- 1076,1343 ----
LWLockRelease(pgss->lock);
}
+
+ /*
+ * Clear the query, so that queries that differ only by parameter
+ * values are considered equal (and represented by a single row in
+ * pg_stat_statements).
+ *
+ * This parsing is just very basic and performs just these steps
+ *
+ * a) replaces string values with :s
+ * b) replaces numeric values with :n
+ * c) replaces boolean values with :b
+ * d) replaces multiple whitespaces (whatever isspace() considers
+ * a space with a single space)
+ *
+ * This surely is very simple and does not help with differently
+ * formatted queries - e.g. those two queries
+ *
+ * SELECT ((1));
+ * SELECT ( (1) );
+ *
+ * are considered different because the function produces this:
+ *
+ * SELECT ((:n));
+ * SELECT ( (:n) );
+ *
+ * The basic assumption is the validity of the query, but this
+ * is executed after successful execution so it's fine.
+ *
+ * Possible enhancements (in no special order), mostly aiming to
+ * normalize the queries further:
+ *
+ * a) replace multiple values in the IN clause with a single one
+ * b) remove comments (not sure about this)
+ * c) many others ...
+ *
+ */
+
+ /* those are just helper methods for parsing ... */
+ static bool string_start(const char * query, int idx, int len);
+ static int string_find_end(const char * query, int idx, int len);
+
+ static bool number_start(const char * query, int idx, int len);
+ static int number_find_end(const char * query, int idx, int len);
+
+ static bool boolean_start(const char * query, int idx, int len);
+ static int boolean_find_end(const char * query, int idx, int len);
+
+ /*
+ * Does the actual cleaning and returns the cleaned version.
+ */
+ static
+ char * pgss_clean_query(const char * query) {
+
+ int i, idx = 0;
+
+ /* buffer for the new query */
+ char * new_query = (char*)palloc(pgss->query_size+1);
+
+ /* length of the original query */
+ int len = strlen(query);
+
+ /* end when the whole query is processed or when the output query
+ * is full (whichever comes first) */
+ i = 0;
+ while ((i < len) && ((idx + 4) < pgss->query_size)) {
+
+ /* check what we're dealing with */
+ if (string_start(query, i, len)) {
+
+ /* string - replace it with :s and find the end */
+ i = string_find_end(query, i, len);
+
+ new_query[idx++] = ':';
+ new_query[idx++] = 's';
+
+ } else if (number_start(query, i, len)) {
+
+ /* number - replace it with :n and find the end */
+ i = number_find_end(query, i, len);
+
+ new_query[idx++] = ':';
+ new_query[idx++] = 'n';
+
+ } else if (boolean_start(query, i, len)) {
+
+ /* number - replace it with :n and find the end */
+ i = boolean_find_end(query, i, len);
+
+ new_query[idx++] = ':';
+ new_query[idx++] = 'b';
+
+ } else if (isspace(query[i])
+ && (pgss_clean == PGSS_CLEAR_AGGRESSIVE)) {
+
+ /* just a whitespace - check if there was a previous space */
+ if ((idx > 0) && (new_query[idx-1] != ' ')) {
+ new_query[idx++] = ' ';
+ }
+
+ } else if (query[i] == ';') {
+
+ /* a semi-colon, so terminate the processing */
+ break;
+
+ } else {
+
+ /* other string */
+ new_query[idx++] = query[i];
+ }
+
+ i++;
+
+ }
+
+ /* terminate the query and remove the trailing spaces */
+ new_query[idx] = '\0';
+ while ((idx > 0) && (new_query[idx-1] == ' ')) {
+ new_query[--idx] = '\0';
+ }
+
+ return new_query;
+
+ }
+
+ /*
+ * Implementation of the helper methods.
+ */
+ static
+ bool string_start(const char * query, int idx, int len) {
+ /* supports basic escape styles (plain, C-style, Unicode and dollar)*/
+ return (query[idx] == '\'') || (query[idx] == '$');
+ }
+
+ static
+ int string_find_end(const char * query, int idx, int len) {
+
+ bool isEscaped = false;
+
+ /* is it C-style escape or a dollar escape? */
+ if (query[idx] == '\'') {
+
+ /* C-style escape */
+ int endIdx = idx + 1;
+
+ /* find the end of the string */
+ while (endIdx < len) {
+
+ if (query[endIdx] == '\\') {
+ isEscaped = !isEscaped;
+ } else if ((query[endIdx] == '\'') && (! isEscaped)) {
+ /* this is the actual end of string, so return it */
+ return endIdx;
+ }
+
+ endIdx++;
+
+ }
+
+ } else if (query[idx] == '$') {
+
+ char * end;
+ char * tag;
+ int len;
+
+ /* dollar escape - lets see what's the tag */
+ end = strchr(&query[idx+1], '$');
+
+ /* copy the tag */
+ len = end - &query[idx] + 2;
+ tag = (char*)palloc(len);
+ memcpy(tag, &query[idx], len-1);
+ tag[len-1] = '\0';
+
+ /* what's the next occurence of the tag */
+ end = strstr(&query[idx+len], tag);
+
+ return (idx + len - 2 + end - &query[idx]);
+
+ }
+
+ elog(DEBUG1, "end of string starting at %d not found", idx);
+
+ return len;
+
+ }
+
+ static
+ bool number_start(const char * query, int idx, int len) {
+ /* numbers start with a digit or a dot */
+ return (isdigit(query[idx]) || (query[idx] == '.'));
+ }
+
+ static
+ int number_find_end(const char * query, int idx, int len) {
+
+ int endIdx = idx;
+
+ /* find the end of the string */
+ while (idx < len) {
+
+ if (! isalnum(query[endIdx])) {
+ /* the number actually ends at the previous character */
+ return (endIdx-1);
+ }
+
+ endIdx++;
+
+ }
+
+ elog(DEBUG1, "end of number starting at %d not found", idx);
+
+ return len;
+
+ }
+
+ static bool boolean_start_true(const char * query, int idx, int len) {
+
+ char buff[4];
+ int i;
+
+ /* true */
+ if (len - idx >= 4) {
+ strncpy(buff, &query[idx], 4);
+ for (i = 0; i < 4; i++) {
+ buff[i] = tolower(buff[i]);
+ }
+ if (strncmp(buff, "true", 4) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ static bool boolean_start_false(const char * query, int idx, int len) {
+
+ char buff[5];
+ int i;
+
+ /* true */
+ if (len - idx >= 5) {
+ strncpy(buff, &query[idx], 5);
+ for (i = 0; i < 5; i++) {
+ buff[i] = tolower(buff[i]);
+ }
+ if (strncmp(buff, "false", 5) == 0) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ static bool boolean_start(const char * query, int idx, int len) {
+
+ return (boolean_start_true(query, idx, len)
+ || boolean_start_false(query, idx, len));
+
+ }
+
+ static int boolean_find_end(const char * query, int idx, int len) {
+ /* this should be called only when there's a boolean, so add
+ * 4 (true) or 5 (false) */
+ return idx + (boolean_start_true(query, idx, len) ? 4 : 5);
+ }
diff --git a/doc/src/sgml/pgstatstatements.sgml b/doc/src/sgml/pgstatstatements.sgml
new file mode 100644
index 5a0230c..d3e4842
*** a/doc/src/sgml/pgstatstatements.sgml
--- b/doc/src/sgml/pgstatstatements.sgml
***************
*** 264,269 ****
--- 264,290 ----
+
+
+
+ pg_stat_statements.clean (enum)
+
+
+
+
+ pg_stat_statements.clean controls how are the
+ stataments preprocessed.
+ Specify basic> to replace parameter values with common
+ values (:n> for numbers, :s> for strings and
+ :b> for booleans), aggressive> to further
+ clean the queries (remove line endings, replace multiple spaces
+ with a single one), or none> to disable.
+ The default value is none>.
+ Only superusers can change this setting.
+
+
+
+