INSERT/SELECT and excessive foreign key checks

From: Lodewijk Voege <lvoege(at)gmail(dot)com>
To: pgsql-hackers(at)postgresql(dot)org
Subject: INSERT/SELECT and excessive foreign key checks
Date: 2007-08-19 01:52:49
Message-ID: slrnfcf8ld.2vv9.lvoege@chucky.dimins.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-hackers

hello,

I'm working on an application that once in a while needs to shuffle some data
around with a query like:

INSERT INTO foo (some_field, bar_FK, baz_FK)
SELECT some_field,
12345 AS bar_FK,
baz_FK
FROM foo
WHERE bar_FK=123

ie. copy a block of data from a table into itself while setting one foreign
key to a constant and carrying some other foreign keys over as is.

the foreign key checks for this case appear very suboptimal: for every
inserted row, both the constant foreign key as well as the keys being carried
over are checked. some of these blocks of data being copied are large, the
largest block being (currently) 1.6M rows. the table has three such foreign
keys, which from reading the code means 4.8M trigger events get queued up,
where it only needs to do exactly one as far as I can tell.

I hacked up a patch that handles these two cases:
- for such an INSERT/SELECT, check constant FKs only once.
- for an INSERT/SELECT from/to the same table, don't check FKs that are
carried over as is at all. (it'd be nice to extend this to fields that
have the same FK constraint, rather than the current patch' strict
same-table, same-field condition)

the difference for this application is pretty dramatic: on my development
system, copying 1.6M rows in this manner with the table having 3 FKs on it:

- 8.3 from CVS with 128MB of shared memory takes just under 6 minutes. the
postgres process peaks at 552MB virtual, 527MB RSS and 135MB shared.
- 8.3 with patch and 128MB of shared memory takes 42 seconds. the process
peaks at 160MB virtual, 138MB RSS and 135MB shared.

so, is this optimization generally useful and correct? and if so, what do I
do to get a version of the patch in the tree? it's against CVS HEAD (well,
against the HEAD of the SVN mirror of CVS) and it passes the regression
tests. but I have no clue whether I'm trampling over abstraction barriers,
etc. comments appreciated.

thanks,
Lodewijk

Index: src/include/commands/trigger.h
===================================================================
--- src/include/commands/trigger.h (revision 28781)
+++ src/include/commands/trigger.h (working copy)
@@ -157,6 +157,7 @@
/*
* in utils/adt/ri_triggers.c
*/
+extern bool RI_FKey_all_attrs_covered(Trigger *trigger, Relation fk_rel, int16 *attnums, int num_attnums);
extern bool RI_FKey_keyequal_upd_pk(Trigger *trigger, Relation pk_rel,
HeapTuple old_row, HeapTuple new_row);
extern bool RI_FKey_keyequal_upd_fk(Trigger *trigger, Relation fk_rel,
Index: src/backend/executor/execMain.c
===================================================================
--- src/backend/executor/execMain.c (revision 28781)
+++ src/backend/executor/execMain.c (working copy)
@@ -39,6 +39,7 @@
#include "catalog/heap.h"
#include "catalog/namespace.h"
#include "catalog/toasting.h"
+#include "catalog/pg_trigger.h"
#include "commands/tablespace.h"
#include "commands/trigger.h"
#include "executor/execdebug.h"
@@ -65,6 +66,9 @@

/* decls for local routines only used within this module */
static void InitPlan(QueryDesc *queryDesc, int eflags);
+static void FilterFKChecks(EState *estate, int (*condition)(TargetEntry *te));
+static void FilterFKChecksForFieldsCarriedOver(EState *estate);
+static void FilterFKChecksForConstantFields(EState *estate);
static void initResultRelInfo(ResultRelInfo *resultRelInfo,
Relation resultRelationDesc,
Index resultRelationIndex,
@@ -821,6 +825,9 @@
queryDesc->tupDesc = tupType;
queryDesc->planstate = planstate;

+ if (operation == CMD_INSERT)
+ FilterFKChecksForFieldsCarriedOver(estate);
+
/*
* If doing SELECT INTO, initialize the "into" relation. We must wait
* till now so we have the "clean" result tuple type to create the new
@@ -832,7 +839,76 @@
OpenIntoRel(queryDesc);
}

+static void
+FilterFKChecks(EState *estate, int (*condition)(TargetEntry *te)) {
+ TriggerDesc *trigdesc;
+ Trigger *trigger;
+ ListCell *em;
+ Relation rel;
+ int16 *attrs;
+ int i, num_attrs;
+
+ if (estate->es_plannedstmt == NULL ||
+ estate->es_num_result_relations > 1 ||
+ estate->es_result_relations == NULL ||
+ estate->es_plannedstmt->planTree->type == T_ValuesScan)
+ return;
+ trigdesc = estate->es_result_relations[0].ri_TrigDesc;
+ if (trigdesc == NULL)
+ return;
+
+ num_attrs = 0;
+ attrs = palloc(
+ list_length(estate->es_plannedstmt->planTree->targetlist) * sizeof(int16));
+ foreach(em, estate->es_plannedstmt->planTree->targetlist) {
+ TargetEntry *te = (TargetEntry *)lfirst(em);
+ if (condition(te))
+ attrs[num_attrs++] = te->resno;
+ }
+
+ rel = estate->es_result_relations[0].ri_RelationDesc;
+ for (i = 0; i < trigdesc->n_after_row[TRIGGER_EVENT_INSERT]; ) {
+ trigger = trigdesc->triggers +
+ trigdesc->tg_after_row[TRIGGER_EVENT_INSERT][i];
+
+ if (TRIGGER_FOR_INSERT(trigger->tgtype) &&
+ RI_FKey_trigger_type(trigger->tgfoid) != RI_TRIGGER_NONE &&
+ RI_FKey_all_attrs_covered(trigger, rel, attrs, num_attrs)) {
+ trigdesc->n_after_row[TRIGGER_EVENT_INSERT]--;
+ memmove(trigdesc->tg_after_row[TRIGGER_EVENT_INSERT] + i,
+ trigdesc->tg_after_row[TRIGGER_EVENT_INSERT] + i + 1,
+ (trigdesc->n_after_row[TRIGGER_EVENT_INSERT] - i) * sizeof(trigdesc->tg_after_row[0]));
+ } else i++;
+ }
+ pfree(attrs);
+}
+
/*
+ * If the current statement is an INSERT/SELECT from some table into
+ * the same table, remove triggers on FKs for which the value comes
+ * from that same FK.
+ */
+static int
+FilterFKChecksForFieldsCarriedOverFilter(TargetEntry *te) {
+ return te->expr->type == T_Var && te->resno == ((Var *)te->expr)->varattno;
+}
+
+static void
+FilterFKChecksForFieldsCarriedOver(EState *estate) {
+ FilterFKChecks(estate, FilterFKChecksForFieldsCarriedOverFilter);
+}
+
+static int
+FilterFKChecksForConstantFieldsFilter(TargetEntry *te) {
+ return te->expr->type == T_Const;
+}
+
+static void
+FilterFKChecksForConstantFields(EState *estate) {
+ FilterFKChecks(estate, FilterFKChecksForConstantFieldsFilter);
+}
+
+/*
* Initialize ResultRelInfo data for one result relation
*/
static void
@@ -1520,6 +1596,10 @@
/* AFTER ROW INSERT Triggers */
ExecARInsertTriggers(estate, resultRelInfo, tuple);

+ if (estate->es_processed == 1) {
+ FilterFKChecksForConstantFields(estate);
+ }
+
/* Process RETURNING if present */
if (resultRelInfo->ri_projectReturning)
ExecProcessReturning(resultRelInfo->ri_projectReturning,
Index: src/backend/utils/adt/ri_triggers.c
===================================================================
--- src/backend/utils/adt/ri_triggers.c (revision 28781)
+++ src/backend/utils/adt/ri_triggers.c (working copy)
@@ -2497,7 +2497,37 @@
return PointerGetDatum(NULL);
}

+/* ----------
+ * RI_FKey_all_attrs_covered -
+ *
+ * Check if the given attributes fully cover the attributes mentioned
+ * in a foreign key check.
+ * ----------
+ */

+bool
+RI_FKey_all_attrs_covered(Trigger *trigger, Relation fk_rel,
+ int16 *attnums, int num_attrnums) {
+ RI_ConstraintInfo riinfo;
+ int i, j;
+
+ /*
+ * Get arguments.
+ */
+ ri_FetchConstraintInfo(&riinfo, trigger, fk_rel, false);
+
+ for (i = 0; i < riinfo.nkeys; i++) {
+ int16 attnum = riinfo.fk_attnums[i];
+ for (j = 0; j < num_attrnums; j++) {
+ if (attnum == attnums[j])
+ break;
+ }
+ if (j == num_attrnums)
+ return false;
+ }
+ return true;
+}
+
/* ----------
* RI_FKey_keyequal_upd_pk -
*

Responses

Browse pgsql-hackers by date

  From Date Subject
Next Message Michael Glaesemann 2007-08-19 02:31:04 Re: change name of redirect_stderr?
Previous Message Andrew Dunstan 2007-08-19 01:44:11 Re: change name of redirect_stderr?