Skip site navigation (1) Skip section navigation (2)

Re: rtree improvements

From: Bruce Momjian <pgman(at)candle(dot)pha(dot)pa(dot)us>
To: Kenneth Been <kennethb(at)telocity(dot)com>
Cc: pgsql-patches(at)postgresql(dot)org
Subject: Re: rtree improvements
Date: 2001-09-29 03:42:55
Message-ID: 200109290342.f8T3guB02849@candle.pha.pa.us (view raw or flat)
Thread:
Lists: pgsql-patches
Patch applied.  Thanks.

> I have made three changes to the rtree code: one bug fix and 
> two performance improvements.  I put an explanation of the 
> changes at
> 
> http://cs1.cs.nyu.edu/been/postgres-rtree.html
> 
> The performance improvements are quite significant.
> 
> All the changes are in the file src/backend/access/rtree/rtree.c
> 
> I was working with the 7.1.3 code.
> 
> I'm including the diff output as an attachment.
> 
> Ken
> 

> diff -Naur postgresql-7.1.3.old/src/backend/access/rtree/rtree.c postgresql-7.1.3/src/backend/access/rtree/rtree.c
> --- postgresql-7.1.3.old/src/backend/access/rtree/rtree.c	Wed Mar 21 22:59:16 2001
> +++ postgresql-7.1.3/src/backend/access/rtree/rtree.c	Tue Sep 25 13:56:31 2001
> @@ -55,6 +55,14 @@
>  	Datum		spl_rdatum;
>  } SPLITVEC;
>  
> +/* for sorting tuples by cost, for picking split */
> +typedef struct SPLITCOST
> +{
> +	OffsetNumber	offset_number;
> +	float			cost_differential;
> +	bool			choose_left;
> +} SPLITCOST;
> +
>  typedef struct RTSTATE
>  {
>  	FmgrInfo	unionFn;		/* union function */
> @@ -79,6 +87,7 @@
>  	   RTSTATE *rtstate);
>  static int	nospace(Page p, IndexTuple it);
>  static void initRtstate(RTSTATE *rtstate, Relation index);
> +static int qsort_comp_splitcost(const void *a, const void *b);
>  
>  
>  Datum
> @@ -427,7 +436,12 @@
>  	FunctionCall2(&rtstate->sizeFn, datum,
>  				  PointerGetDatum(&newd_size));
>  
> -	if (newd_size != old_size)
> +	/*
> +	 * If newd_size == 0 we have degenerate rectangles, so we
> +	 * don't know if there was any change, so we have to
> +	 * assume there was.
> +	 */
> +	if ((newd_size == 0) || (newd_size != old_size))
>  	{
>  		TupleDesc	td = RelationGetDescr(r);
>  
> @@ -503,6 +517,8 @@
>  	OffsetNumber *spl_left,
>  			   *spl_right;
>  	TupleDesc	tupDesc;
> +	int			n;
> +	OffsetNumber newitemoff;
>  
>  	p = (Page) BufferGetPage(buffer);
>  	opaque = (RTreePageOpaque) PageGetSpecialPointer(p);
> @@ -539,56 +555,64 @@
>  	spl_right = v.spl_right;
>  	leftoff = rightoff = FirstOffsetNumber;
>  	maxoff = PageGetMaxOffsetNumber(p);
> -	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
> -	{
> -		itemid = PageGetItemId(p, i);
> -		item = (IndexTuple) PageGetItem(p, itemid);
> -
> -		if (i == *spl_left)
> -		{
> -			if (PageAddItem(left, (Item) item, IndexTupleSize(item),
> -							leftoff, LP_USED) == InvalidOffsetNumber)
> -				elog(ERROR, "rtdosplit: failed to copy index item in %s",
> -					 RelationGetRelationName(r));
> -			leftoff = OffsetNumberNext(leftoff);
> -			spl_left++;			/* advance in left split vector */
> -		}
> -		else
> -		{
> -			Assert(i == *spl_right);
> -			if (PageAddItem(right, (Item) item, IndexTupleSize(item),
> -							rightoff, LP_USED) == InvalidOffsetNumber)
> -				elog(ERROR, "rtdosplit: failed to copy index item in %s",
> -					 RelationGetRelationName(r));
> -			rightoff = OffsetNumberNext(rightoff);
> -			spl_right++;		/* advance in right split vector */
> -		}
> -	}
> +	newitemoff = OffsetNumberNext(maxoff);
>  
>  	/* build an InsertIndexResult for this insertion */
>  	res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
>  
> -	/* now insert the new index tuple */
> -	if (*spl_left == maxoff + 1)
> +	/*
> +	 * spl_left contains a list of the offset numbers of the 
> +	 * tuples that will go to the left page.  For each offset
> +	 * number, get the tuple item, then add the item to the
> +	 * left page.  Similarly for the right side.
> +	 */
> +
> +	/* fill left node */
> +	for (n = 0; n < v.spl_nleft; n++)
>  	{
> -		if (PageAddItem(left, (Item) itup, IndexTupleSize(itup),
> +		i = *spl_left;
> +		if (i == newitemoff)
> +			item = itup;
> +		else
> +		{
> +			itemid = PageGetItemId(p, i);
> +			item = (IndexTuple) PageGetItem(p, itemid);
> +		}
> +
> +		if (PageAddItem(left, (Item) item, IndexTupleSize(item),
>  						leftoff, LP_USED) == InvalidOffsetNumber)
>  			elog(ERROR, "rtdosplit: failed to add index item to %s",
>  				 RelationGetRelationName(r));
>  		leftoff = OffsetNumberNext(leftoff);
> -		ItemPointerSet(&(res->pointerData), lbknum, leftoff);
> -		spl_left++;
> +
> +		if (i == newitemoff)
> +			ItemPointerSet(&(res->pointerData), lbknum, leftoff);
> +
> +		spl_left++;		/* advance in left split vector */
>  	}
> -	else
> +
> +	/* fill right node */
> +	for (n = 0; n < v.spl_nright; n++)
>  	{
> -		Assert(*spl_right == maxoff + 1);
> -		if (PageAddItem(right, (Item) itup, IndexTupleSize(itup),
> +		i = *spl_right;
> +		if (i == newitemoff)
> +			item = itup;
> +		else
> +		{
> +			itemid = PageGetItemId(p, i);
> +			item = (IndexTuple) PageGetItem(p, itemid);
> +		}
> +
> +		if (PageAddItem(right, (Item) item, IndexTupleSize(item),
>  						rightoff, LP_USED) == InvalidOffsetNumber)
>  			elog(ERROR, "rtdosplit: failed to add index item to %s",
>  				 RelationGetRelationName(r));
>  		rightoff = OffsetNumberNext(rightoff);
> -		ItemPointerSet(&(res->pointerData), rbknum, rightoff);
> -		spl_right++;
> +
> +		if (i == newitemoff)
> +			ItemPointerSet(&(res->pointerData), rbknum, rightoff);
> +
> +		spl_right++;		/* advance in right split vector */
>  	}
>  
>  	/* Make sure we consumed all of the split vectors, and release 'em */
> @@ -741,8 +765,10 @@
>   * In addition, the item to be added (itup) is listed in the appropriate
>   * vector.	It is represented by item number N+1 (N = # of items on page).
>   *
> - * Both vectors appear in sequence order with a terminating sentinel value
> - * of InvalidOffsetNumber.
> + * Both vectors have a terminating sentinel value of InvalidOffsetNumber,
> + * but the sentinal value is no longer used, because the SPLITVEC
> + * vector also contains the length of each vector, and that information
> + * is now used to iterate over them in rtdosplit(). --kbb, 21 Sept 2001
>   *
>   * The bounding-box datums for the two new pages are also returned in *v.
>   *
> @@ -797,6 +823,12 @@
>  				item_2_sz,
>  				left_avail_space,
>  				right_avail_space;
> +	int			total_num_tuples,
> +				num_tuples_without_seeds,
> +				max_after_split; /* in Guttman's lingo, (M - m) */
> +	float		diff; /* diff between cost of putting tuple left or right */
> +	SPLITCOST   *cost_vector;
> +	int			n;
>  
>  	/*
>  	 * First, make sure the new item is not so large that we can't
> @@ -812,6 +844,9 @@
>  	maxoff = PageGetMaxOffsetNumber(page);
>  	newitemoff = OffsetNumberNext(maxoff);		/* phony index for new
>  												 * item */
> +	total_num_tuples = newitemoff;
> +	num_tuples_without_seeds = total_num_tuples - 2;
> +	max_after_split = total_num_tuples / 2;		/* works for m = M/2 */
>  
>  	/* Make arrays big enough for worst case, including sentinel */
>  	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
> @@ -909,47 +944,111 @@
>  	right_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_2);
>  
>  	/*
> -	 * Now split up the regions between the two seeds.	An important
> -	 * property of this split algorithm is that the split vector v has the
> -	 * indices of items to be split in order in its left and right
> -	 * vectors.  We exploit this property by doing a merge in the code
> -	 * that actually splits the page.
> +	 * Now split up the regions between the two seeds.
> +	 *
> +	 * The cost_vector array will contain hints for determining where
> +	 * each tuple should go.  Each record in the array will contain
> +	 * a boolean, choose_left, that indicates which node the tuple
> +	 * prefers to be on, and the absolute difference in cost between
> +	 * putting the tuple in its favored node and in the other node.
>  	 *
> -	 * For efficiency, we also place the new index tuple in this loop. This
> -	 * is handled at the very end, when we have placed all the existing
> -	 * tuples and i == maxoff + 1.
> +	 * Later, we will sort the cost_vector in descending order by cost
> +	 * difference, and consider the tuples in that order for
> +	 * placement.  That way, the tuples that *really* want to be in
> +	 * one node or the other get to choose first, and the tuples that
> +	 * don't really care choose last.
> +	 *
> +	 * First, build the cost_vector array.  The new index tuple will
> +	 * also be handled in this loop, and represented in the array,
> +	 * with i==newitemoff.
> +	 *
> +	 * In the case of variable size tuples it is possible that we only
> +	 * have the two seeds and no other tuples, in which case we don't
> +	 * do any of this cost_vector stuff.
>  	 */
> +
> +	/* to keep compiler quiet */
> +	cost_vector = (SPLITCOST *) NULL;
> +
> +	if (num_tuples_without_seeds > 0)
> +	{
> +		cost_vector =
> +			(SPLITCOST *) palloc(num_tuples_without_seeds * sizeof(SPLITCOST));
> +		n = 0;
> +		for (i = FirstOffsetNumber; i <= newitemoff; i = OffsetNumberNext(i))
> +		{
> +			/* Compute new union datums and sizes for both choices */
> +
> +			if ((i == seed_1) || (i == seed_2))
> +				continue;
> +			else if (i == newitemoff)
> +				item_1 = itup;
> +			else
> +				item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
> +
> +			datum_alpha = IndexTupleGetDatum(item_1);
> +			union_dl = FunctionCall2(&rtstate->unionFn, datum_l, datum_alpha);
> +			union_dr = FunctionCall2(&rtstate->unionFn, datum_r, datum_alpha);
> +			FunctionCall2(&rtstate->sizeFn, union_dl,
> +						  PointerGetDatum(&size_alpha));
> +			FunctionCall2(&rtstate->sizeFn, union_dr,
> +						  PointerGetDatum(&size_beta));
> +
> +			diff = (size_alpha - size_l) - (size_beta - size_r);
> +
> +			cost_vector[n].offset_number = i;
> +			cost_vector[n].cost_differential = fabs(diff);
> +			cost_vector[n].choose_left = (diff < 0);
> +
> +			n++;
> +		}
> +
> +		/*
> +		 * Sort the array.  The function qsort_comp_splitcost is
> +		 * set up "backwards", to provided descending order.
> +		 */
> +		qsort(cost_vector, num_tuples_without_seeds, sizeof(SPLITCOST),
> +			  &qsort_comp_splitcost);
> +	}
> +
> +	/*
> +	 * Now make the final decisions about where each tuple will go,
> +	 * and build the vectors to return in the SPLITVEC record.
> +	 *
> +	 * The cost_vector array contains (descriptions of) all the
> +	 * tuples, in the order that we want to consider them, so we
> +	 * we just iterate through it and place each tuple in left
> +	 * or right nodes, according to the criteria described below.
> +	 */
> +
>  	left = v->spl_left;
>  	v->spl_nleft = 0;
>  	right = v->spl_right;
>  	v->spl_nright = 0;
>  
> -	for (i = FirstOffsetNumber; i <= newitemoff; i = OffsetNumberNext(i))
> +	/* Place the seeds first.
> +	 * left avail space, left union, right avail space, and right
> +	 * union have already been adjusted for the seeds.
> +	 */
> +
> +	*left++ = seed_1;
> +	v->spl_nleft++;
> +
> +	*right++ = seed_2;
> +	v->spl_nright++;
> +
> +	for (n = 0; n < num_tuples_without_seeds; n++)
>  	{
>  		bool		left_feasible,
>  					right_feasible,
>  					choose_left;
>  
>  		/*
> -		 * If we've already decided where to place this item, just put it
> -		 * on the correct list.  Otherwise, we need to figure out which
> -		 * page needs the least enlargement in order to store the item.
> +		 * We need to figure out which page needs the least
> +		 * enlargement in order to store the item.
>  		 */
>  
> -		if (i == seed_1)
> -		{
> -			*left++ = i;
> -			v->spl_nleft++;
> -			/* left avail_space & union already includes this one */
> -			continue;
> -		}
> -		if (i == seed_2)
> -		{
> -			*right++ = i;
> -			v->spl_nright++;
> -			/* right avail_space & union already includes this one */
> -			continue;
> -		}
> +		i = cost_vector[n].offset_number;
>  
>  		/* Compute new union datums and sizes for both possible additions */
>  		if (i == newitemoff)
> @@ -979,6 +1078,24 @@
>  		 * (We know that all the old items together can fit on one page, so
>  		 * we need not worry about any other problem than failing to fit
>  		 * the new item.)
> +		 *
> +		 * Guttman's algorithm actually has two factors to consider (in
> +		 * order):  1. if one node has so many tuples already assigned to
> +		 * it that the other needs all the rest in order to satisfy the
> +		 * condition that neither node has fewer than m tuples, then
> +		 * that is decisive; 2. otherwise, choose the page that shows
> +		 * the smaller enlargement of its union area.
> +		 *
> +		 * I have chosen m = M/2, where M is the maximum number of
> +		 * tuples on a page.  (Actually, this is only strictly
> +		 * true for fixed size tuples.  For variable size tuples,
> +		 * there still might have to be only one tuple on a page,
> +		 * if it is really big.  But even with variable size
> +		 * tuples we still try to get m as close as possible to M/2.)
> +		 *
> +		 * The question of which page shows the smaller enlargement of
> +		 * its union area has already been answered, and the answer
> +		 * stored in the choose_left field of the SPLITCOST record.
>  		 */
>  		left_feasible = (left_avail_space >= item_1_sz &&
>  						 ((left_avail_space - item_1_sz) >= newitemsz ||
> @@ -988,8 +1105,18 @@
>  						   left_avail_space >= newitemsz));
>  		if (left_feasible && right_feasible)
>  		{
> -			/* Both feasible, use Guttman's algorithm */
> -			choose_left = (size_alpha - size_l < size_beta - size_r);
> +			/*
> +			 * Both feasible, use Guttman's algorithm.
> +			 * First check the m condition described above, and if
> +			 * that doesn't apply, choose the page with the smaller
> +			 * enlargement of its union area.
> +			 */
> +			if (v->spl_nleft > max_after_split)
> +				choose_left = false;
> +			else if (v->spl_nright > max_after_split)
> +				choose_left = true;
> +			else
> +				choose_left = cost_vector[n].choose_left;
>  		}
>  		else if (left_feasible)
>  			choose_left = true;
> @@ -1023,6 +1150,11 @@
>  		}
>  	}
>  
> +	if (num_tuples_without_seeds > 0)
> +	{
> +		pfree(cost_vector);
> +	}
> +
>  	*left = *right = InvalidOffsetNumber;		/* add ending sentinels */
>  
>  	v->spl_ldatum = datum_l;
> @@ -1152,6 +1284,21 @@
>  	fmgr_info(size_proc, &rtstate->sizeFn);
>  	fmgr_info(inter_proc, &rtstate->interFn);
>  	return;
> +}
> +
> +/* for sorting SPLITCOST records in descending order */
> +static int
> +qsort_comp_splitcost(const void *a, const void *b)
> +{
> +	float diff =
> +		((SPLITCOST *)a)->cost_differential -
> +		((SPLITCOST *)b)->cost_differential;
> +	if (diff < 0)
> +		return 1;
> +	else if (diff > 0)
> +		return -1;
> +	else
> +		return 0;
>  }
>  
>  #ifdef RTDEBUG

> 
> ---------------------------(end of broadcast)---------------------------
> TIP 6: Have you searched our list archives?
> 
> http://archives.postgresql.org

-- 
  Bruce Momjian                        |  http://candle.pha.pa.us
  pgman(at)candle(dot)pha(dot)pa(dot)us               |  (610) 853-3000
  +  If your life is a hard drive,     |  830 Blythe Avenue
  +  Christ can be your backup.        |  Drexel Hill, Pennsylvania 19026

In response to

pgsql-patches by date

Next:From: Tom LaneDate: 2001-09-29 05:43:02
Subject: Re: Stray file in contrib/xml -please remove
Previous:From: Bruce MomjianDate: 2001-09-29 03:12:46
Subject: Re: pgcrypto Makefile update

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group