diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index 41642eb..1af1b85 100644
*** a/src/backend/storage/page/bufpage.c
--- b/src/backend/storage/page/bufpage.c
***************
*** 18,23 ****
--- 18,24 ----
  #include "access/itup.h"
  #include "access/xlog.h"
  #include "storage/checksum.h"
+ #include "utils/inline_sort.h"
  #include "utils/memdebug.h"
  #include "utils/memutils.h"
  
*************** typedef struct itemIdSortData
*** 425,439 ****
  } itemIdSortData;
  typedef itemIdSortData *itemIdSort;
  
! static int
  itemoffcompare(const void *itemidp1, const void *itemidp2)
  {
- 	/* Sort in decreasing itemoff order */
  	return ((itemIdSort) itemidp2)->itemoff -
  		((itemIdSort) itemidp1)->itemoff;
  }
  
  /*
   * After removing or marking some line pointers unused, move the tuples to
   * remove the gaps caused by the removed items.
   */
--- 426,542 ----
  } itemIdSortData;
  typedef itemIdSortData *itemIdSort;
  
! /* Comparator for sorting in decreasing itemoff order */
! static inline int
  itemoffcompare(const void *itemidp1, const void *itemidp2)
  {
  	return ((itemIdSort) itemidp2)->itemoff -
  		((itemIdSort) itemidp1)->itemoff;
  }
  
  /*
+  * Sort an array of itemIdSort's on itemoff, descending.
+  *
+  * This uses Shell sort.  Given that array is small and itemoffcompare
+  * can be inlined, it is much faster than general-purpose qsort.
+  */
+ static void
+ sort_itemIds_small(itemIdSort itemidbase, int nitems)
+ {
+ 	pg_shell_sort(itemIdSortData, itemidbase, nitems, itemoffcompare);
+ }
+ 
+ /*
+  * Sort an array of itemIdSort's on itemoff, descending.
+  *
+  * This uses bucket sort:
+  * - single pass of stable prefix sort on high 8 bits of itemoffs
+  * - then insertion sort on buckets larger than 1 element
+  */
+ static void
+ sort_itemIds(itemIdSort itemidbase, int nitems)
+ {
+ 	/* number of buckets to use: */
+ #define NSPLIT 256
+ 	/* divisor to scale input values into 0..NSPLIT-1: */
+ #define PREFDIV (BLCKSZ / NSPLIT)
+ 	/* per-bucket counts; we need two extra elements, see below */
+ 	uint16		count[NSPLIT + 2];
+ 	itemIdSortData copy[Max(MaxIndexTuplesPerPage, MaxHeapTuplesPerPage)];
+ 	int			i,
+ 				max,
+ 				total,
+ 				pos,
+ 				highbits;
+ 
+ 	Assert(nitems <= lengthof(copy));
+ 
+ 	/*
+ 	 * Count how many items in each bucket.  We assume all itemoff values are
+ 	 * less than BLCKSZ, therefore dividing by PREFDIV gives a value less than
+ 	 * NSPLIT.
+ 	 */
+ 	memset(count, 0, sizeof(count));
+ 	for (i = 0; i < nitems; i++)
+ 	{
+ 		highbits = itemidbase[i].itemoff / PREFDIV;
+ 		count[highbits]++;
+ 	}
+ 
+ 	/*
+ 	 * Now convert counts to bucket position info, placing the buckets in
+ 	 * decreasing order.  After this loop, count[k+1] is start of bucket k
+ 	 * (for 0 <= k < NSPLIT), count[k] is end+1 of bucket k, and therefore
+ 	 * count[k] - count[k+1] is length of bucket k.
+ 	 *
+ 	 * Also detect whether any buckets have more than one element.  For this
+ 	 * purpose, "max" is set to the OR of all the counts (not really the max).
+ 	 */
+ 	max = total = count[NSPLIT - 1];
+ 	for (i = NSPLIT - 2; i >= 0; i--)
+ 	{
+ 		max |= count[i];
+ 		total += count[i];
+ 		count[i] = total;
+ 	}
+ 	Assert(count[0] == nitems);
+ 
+ 	/*
+ 	 * Now copy the data to be sorted into appropriate positions in the copy[]
+ 	 * array.  We increment each bucket-start pointer as we insert data into
+ 	 * its bucket; hence, after this loop count[k+1] is the end+1 of bucket k,
+ 	 * count[k+2] is the start of bucket k, and count[k+1] - count[k+2] is the
+ 	 * length of bucket k.
+ 	 */
+ 	for (i = 0; i < nitems; i++)
+ 	{
+ 		highbits = itemidbase[i].itemoff / PREFDIV;
+ 		pos = count[highbits + 1]++;
+ 		copy[pos] = itemidbase[i];
+ 	}
+ 	Assert(count[1] == nitems);
+ 
+ 	/*
+ 	 * If any buckets are larger than 1 item, we must sort them.  They should
+ 	 * be small enough to make insertion sort effective.
+ 	 */
+ 	if (max > 1)
+ 	{
+ 		/* i is bucket number plus 1 */
+ 		for (i = NSPLIT; i > 0; i--)
+ 		{
+ 			pg_insertion_sort(itemIdSortData,
+ 							  copy + count[i + 1],
+ 							  count[i] - count[i + 1],
+ 							  itemoffcompare);
+ 		}
+ 	}
+ 
+ 	/* And transfer the sorted data back to the caller */
+ 	memcpy(itemidbase, copy, sizeof(itemIdSortData) * nitems);
+ }
+ 
+ /*
   * After removing or marking some line pointers unused, move the tuples to
   * remove the gaps caused by the removed items.
   */
*************** compactify_tuples(itemIdSort itemidbase,
*** 445,452 ****
  	int			i;
  
  	/* sort itemIdSortData array into decreasing itemoff order */
! 	qsort((char *) itemidbase, nitems, sizeof(itemIdSortData),
! 		  itemoffcompare);
  
  	upper = phdr->pd_special;
  	for (i = 0; i < nitems; i++)
--- 548,558 ----
  	int			i;
  
  	/* sort itemIdSortData array into decreasing itemoff order */
! 	/* empirically, bucket sort is worth the trouble above 48 items */
! 	if (nitems > 48)
! 		sort_itemIds(itemidbase, nitems);
! 	else
! 		sort_itemIds_small(itemidbase, nitems);
  
  	upper = phdr->pd_special;
  	for (i = 0; i < nitems; i++)
diff --git a/src/include/utils/inline_sort.h b/src/include/utils/inline_sort.h
index ...c97a248 .
*** a/src/include/utils/inline_sort.h
--- b/src/include/utils/inline_sort.h
***************
*** 0 ****
--- 1,88 ----
+ /*-------------------------------------------------------------------------
+  *
+  * inline_sort.h
+  *	  Macros to perform specialized types of sorts.
+  *
+  *
+  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
+  * Portions Copyright (c) 1994, Regents of the University of California
+  *
+  * src/include/utils/inline_sort.h
+  *
+  *-------------------------------------------------------------------------
+  */
+ #ifndef INLINE_SORT_H
+ #define INLINE_SORT_H
+ 
+ /*
+  * pg_shell_sort - sort for small arrays with inlinable comparator.
+  *
+  * This is best used with arrays smaller than 200 elements, and could be
+  * safely used with up to 1000 elements.  But it degrades fast after that.
+  *
+  * Since this is implemented as a macro it can be optimized together with
+  * comparison function; using a macro or inlinable function is recommended.
+  *
+  * Arguments:
+  *	 elem_t - type of array elements (for declaring temporary variables)
+  *	 array	- pointer to elements to be sorted
+  *	 nitems - number of elements to be sorted
+  *	 cmp	- comparison function that accepts addresses of 2 elements
+  *			  (same API as qsort comparison function).
+  * cmp argument should be a function or macro name.
+  * array and nitems arguments are evaluated only once.
+  *
+  * This uses Shellsort (see e.g. wikipedia's entry), with gaps selected as
+  * "gap(i) = smallest prime number below e^i".  These are close to the gaps
+  * recommended by Incerpi & Sedwick, but look to be better on average.
+  */
+ #define pg_shell_sort(elem_t, array, nitems, cmp) \
+ 	do { \
+ 		elem_t *_arr = (array); \
+ 		int		_n = (nitems); \
+ 		static const int _offsets[] = {401, 139, 53, 19, 7, 3}; \
+ 		int		_noff; \
+ 		for (_noff = 0; _noff < lengthof(_offsets); _noff++) \
+ 		{ \
+ 			int		_off = _offsets[_noff]; \
+ 			pg_shell_sort_pass(elem_t, cmp, _off, _arr, _n); \
+ 		} \
+ 		pg_shell_sort_pass(elem_t, cmp, 1, _arr, _n); \
+ 	} while (0)
+ 
+ /*
+  * pg_insertion_sort - plain insertion sort.
+  * Useful for very small array, or if array was almost sorted already.
+  * Same API as pg_shell_sort.
+  */
+ #define pg_insertion_sort(elem_t, array, nitems, cmp) \
+ 	do { \
+ 		elem_t *_arr = (array); \
+ 		int		_n = (nitems); \
+ 		pg_shell_sort_pass(elem_t, cmp, 1, _arr, _n); \
+ 	} while (0)
+ 
+ /*
+  * One pass of Shellsort: simple insertion sort of the subset of entries
+  * at stride "off".  Not intended to be used outside of above macros.
+  */
+ #define pg_shell_sort_pass(elem_t, cmp, off, _arr, _n) \
+ 	do { \
+ 		int		_i; \
+ 		for (_i = off; _i < _n; _i += off) \
+ 		{ \
+ 			if (cmp(_arr + _i - off, _arr + _i) > 0) \
+ 			{ \
+ 				elem_t	_temp = _arr[_i]; \
+ 				int		_j = _i; \
+ 				do \
+ 				{ \
+ 					_arr[_j] = _arr[_j - off]; \
+ 					_j -= off; \
+ 				} while (_j >= off && cmp(_arr + _j - off, &_temp) > 0); \
+ 				_arr[_j] = _temp; \
+ 			} \
+ 		} \
+ 	} while (0)
+ 
+ #endif							/* INLINE_SORT_H */
