From 71dcd1fc8acf8e54478526bd0a7feaf5e43bf2c1 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan
Date: Tue, 9 Aug 2016 14:53:49 -0700
Subject: [PATCH 3/6] Displace heap's root during tuplesort merge
Directly displace and shift down. This is significantly faster than
shifting down to compact the heap, and then shifting up to insert a new
preload tuple during tuplesort merging, especially when the merge can
naturally return at least a few tuples from each tape (return them as
sorted output to caller) before some other tape needs to return tuples.
This is fairly common in the real world; tuple attribute values are
often found to be physically clustered together on input into
tuplesort.c.
---
src/backend/utils/sort/tuplesort.c | 110 +++++++++++++++++++++++++++++++++----
1 file changed, 100 insertions(+), 10 deletions(-)
diff --git a/src/backend/utils/sort/tuplesort.c b/src/backend/utils/sort/tuplesort.c
index c80e5a1..35d7cb1 100644
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -68,9 +68,9 @@
* (or two, when replacement selection is still used), then merge the runs
* using Algorithm D.
*
- * When merging runs, we use a heap containing just the frontmost tuple from
- * each source run; we repeatedly output the smallest tuple and insert the
- * next tuple from its source tape (if any). When the heap empties, the merge
+ * When merging runs, we use a minheap containing only frontmost tuples from
+ * each source run; on output of the root tuple, it is displaced by the next
+ * tuple from its source tape (if any). When the heap empties, the merge
* is complete. The basic merge algorithm thus needs very little memory ---
* only M tuples for an M-way merge, and M is constrained to a small number.
* However, we can still make good use of our full workMem allocation by
@@ -572,6 +572,7 @@ static void sort_bounded_heap(Tuplesortstate *state);
static void tuplesort_sort_memtuples(Tuplesortstate *state);
static void tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
int tupleindex, bool checkIndex);
+static void tuplesort_heap_root_displace(Tuplesortstate *state, SortTuple *newtup);
static void tuplesort_heap_siftup(Tuplesortstate *state, bool checkIndex);
static void reversedirection(Tuplesortstate *state);
static unsigned int getlen(Tuplesortstate *state, int tapenum, bool eofOK);
@@ -1993,7 +1994,6 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
* more generally.
*/
*stup = state->memtuples[0];
- tuplesort_heap_siftup(state, false);
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
/*
@@ -2014,18 +2014,21 @@ tuplesort_gettuple_common(Tuplesortstate *state, bool forward,
*/
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
+ /* compact the heap */
+ tuplesort_heap_siftup(state, false);
/* Free tape's buffer, avoiding dangling pointer */
if (state->batchUsed)
mergebatchfreetape(state, srcTape, stup, should_free);
return true;
}
}
- /* pull next preread tuple from list, insert in heap */
+ /* pull next preread tuple from list, displace root in heap */
newtup = &state->memtuples[tupIndex];
state->mergenext[srcTape] = newtup->tupindex;
if (state->mergenext[srcTape] == 0)
state->mergelast[srcTape] = 0;
- tuplesort_heap_insert(state, newtup, srcTape, false);
+ newtup->tupindex = srcTape;
+ tuplesort_heap_root_displace(state, newtup);
/* put the now-unused memtuples entry on the freelist */
newtup->tupindex = state->mergefreelist;
state->mergefreelist = tupIndex;
@@ -2714,8 +2717,6 @@ mergeonerun(Tuplesortstate *state, int destTape, bool finalMerge)
*/
spaceFreed = state->availMem - priorAvail;
state->mergeavailmem[srcTape] += spaceFreed;
- /* compact the heap */
- tuplesort_heap_siftup(state, false);
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
/* out of preloaded data on this tape, try to read more */
@@ -2759,18 +2760,21 @@ mergeonerun(Tuplesortstate *state, int destTape, bool finalMerge)
/* if still no data, we've reached end of run on this tape */
if ((tupIndex = state->mergenext[srcTape]) == 0)
{
+ /* compact the heap */
+ tuplesort_heap_siftup(state, false);
/* Free tape's buffer */
if (state->batchUsed)
mergebatchfreetape(state, srcTape, NULL, NULL);
continue;
}
}
- /* pull next preread tuple from list, insert in heap */
+ /* pull next preread tuple from list, displace root in heap */
tup = &state->memtuples[tupIndex];
state->mergenext[srcTape] = tup->tupindex;
if (state->mergenext[srcTape] == 0)
state->mergelast[srcTape] = 0;
- tuplesort_heap_insert(state, tup, srcTape, false);
+ tup->tupindex = srcTape;
+ tuplesort_heap_root_displace(state, tup);
/* put the now-unused memtuples entry on the freelist */
tup->tupindex = state->mergefreelist;
state->mergefreelist = tupIndex;
@@ -3909,6 +3913,92 @@ tuplesort_heap_insert(Tuplesortstate *state, SortTuple *tuple,
/*
* The tuple at state->memtuples[0] has been removed from the heap.
+ * Shift down to find new location for next preread tuple from the tape
+ * that old root originated from. New tuple's tupindex must be
+ * initialized to its source tape number by caller.
+ *
+ * This routine is only used during merging. It is more efficient to
+ * displace the hole at the root than to compact and then insert into
+ * the heap. We prefer to not alter the size of the heap until it's
+ * truly necessary (until a tape is exhausted), and shift down using
+ * the caller's tuple directly (not the existing heap's last tuple),
+ * avoiding a separate insertion step entirely.
+ *
+ * When the "tape at the root of the heap" does not change, then only
+ * two comparisons are required, as well as one swap. (Actually, that's
+ * the maximum cost per subheap/iteration in general.)
+ */
+static void
+tuplesort_heap_root_displace(Tuplesortstate *state, SortTuple *newtup)
+{
+ SortTuple *memtuples = state->memtuples;
+ int n,
+ imin,
+ i;
+
+ Assert(memtuples[0].tupindex == newtup->tupindex);
+
+ CHECK_FOR_INTERRUPTS();
+
+ n = state->memtupcount; /* n is heap's size, including old root */
+ imin = 0; /* start with caller's "hole" in root */
+ i = imin;
+ /* Shift down as required */
+ for (;;)
+ {
+ int j = i * 2 + 1;
+
+ if (j >= n)
+ break;
+
+ /*
+ * Test newtup as candidate for new root of the (sub)heap whose
+ * root is i. (The i position is always a hole left either by
+ * caller, or by a previous iteration here.)
+ *
+ * Determine which node among left child, right child, and
+ * newtup-as-root has the minimum value (and as such should be
+ * swapped into i's position).
+ */
+ Assert(memtuples[j].tupindex != newtup->tupindex);
+ if (COMPARETUP(state, newtup, &memtuples[j]) <= 0)
+ {
+ /*
+ * Break when new tuple "fits" as root for (sub)heap (when i
+ * is equal to imin, and should remain equal)
+ */
+ if (j + 1 < n &&
+ COMPARETUP(state, newtup, &memtuples[j + 1]) > 0)
+ imin = j + 1;
+ else
+ break;
+ }
+ else
+ {
+ if (j + 1 < n &&
+ COMPARETUP(state, &memtuples[j], &memtuples[j + 1]) > 0)
+ imin = j + 1;
+ else
+ imin = j;
+ }
+
+ /*
+ * Fill hole at root of (sub)heap by swapping. Either left
+ * child or right child of the (sub)heap considered in this
+ * iteration is swapped in as new root here. Next iteration
+ * fills the new hole this creates. Last iteration is one that
+ * finds place for caller's new tuple, leaving no hole.
+ */
+ memtuples[i] = memtuples[imin];
+ i = imin;
+ }
+
+ Assert(state->memtupcount > 1 || imin == 0);
+ memtuples[imin] = *newtup;
+}
+
+/*
+ * The tuple at state->memtuples[0] has been removed from the heap.
* Decrement memtupcount, and sift up to maintain the heap invariant.
*/
static void
--
2.7.4