Skip site navigation (1) Skip section navigation (2)

Planner speedup diffs

From: Tom Lane <tgl(at)sss(dot)pgh(dot)pa(dot)us>
To: pgsql-patches(at)postgreSQL(dot)org
Subject: Planner speedup diffs
Date: 2000-12-15 19:46:34
Message-ID: 13132.976909594@sss.pgh.pa.us (view raw or flat)
Thread:
Lists: pgsql-patches
These are already applied --- posted per Bruce's request.

			regards, tom lane


Index: backend/nodes/copyfuncs.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v
retrieving revision 1.134
retrieving revision 1.135
diff -c -r1.134 -r1.135
*** backend/nodes/copyfuncs.c	2000/12/12 23:33:32	1.134
--- backend/nodes/copyfuncs.c	2000/12/14 22:30:42	1.135
***************
*** 15,21 ****
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.134 2000/12/12 23:33:32 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 15,21 ----
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/copyfuncs.c,v 1.135 2000/12/14 22:30:42 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 1424,1430 ****
--- 1424,1435 ----
  	newnode->mergejoinoperator = from->mergejoinoperator;
  	newnode->left_sortop = from->left_sortop;
  	newnode->right_sortop = from->right_sortop;
+ 	/* Do not copy pathkeys, since they'd not be canonical in a copied query */
+ 	newnode->left_pathkey = NIL;
+ 	newnode->right_pathkey = NIL;
  	newnode->hashjoinoperator = from->hashjoinoperator;
+ 	newnode->left_dispersion = from->left_dispersion;
+ 	newnode->right_dispersion = from->right_dispersion;
  
  	return newnode;
  }
Index: backend/nodes/equalfuncs.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v
retrieving revision 1.84
retrieving revision 1.85
diff -c -r1.84 -r1.85
*** backend/nodes/equalfuncs.c	2000/12/12 23:33:33	1.84
--- backend/nodes/equalfuncs.c	2000/12/14 22:30:42	1.85
***************
*** 20,26 ****
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.84 2000/12/12 23:33:33 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 20,26 ----
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/equalfuncs.c,v 1.85 2000/12/14 22:30:42 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 515,522 ****
  	if (!equal(a->clause, b->clause))
  		return false;
  	/*
! 	 * ignore eval_cost, since it may not be set yet, and should be
! 	 * derivable from the clause anyway
  	 */
  	if (a->ispusheddown != b->ispusheddown)
  		return false;
--- 515,523 ----
  	if (!equal(a->clause, b->clause))
  		return false;
  	/*
! 	 * ignore eval_cost, left/right_pathkey, and left/right_dispersion,
! 	 * since they may not be set yet, and should be derivable from the
! 	 * clause anyway
  	 */
  	if (a->ispusheddown != b->ispusheddown)
  		return false;
Index: backend/nodes/readfuncs.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/readfuncs.c,v
retrieving revision 1.101
retrieving revision 1.102
diff -c -r1.101 -r1.102
*** backend/nodes/readfuncs.c	2000/12/12 23:33:33	1.101
--- backend/nodes/readfuncs.c	2000/12/14 22:30:42	1.102
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.101 2000/12/12 23:33:33 tgl Exp $
   *
   * NOTES
   *	  Most of the read functions for plan nodes are tested. (In fact, they
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/nodes/readfuncs.c,v 1.102 2000/12/14 22:30:42 tgl Exp $
   *
   * NOTES
   *	  Most of the read functions for plan nodes are tested. (In fact, they
***************
*** 1848,1853 ****
--- 1848,1858 ----
  
  	/* eval_cost is not part of saved representation; compute on first use */
  	local_node->eval_cost = -1;
+ 	/* ditto for cached pathkeys and dispersion */
+ 	local_node->left_pathkey = NIL;
+ 	local_node->right_pathkey = NIL;
+ 	local_node->left_dispersion = -1;
+ 	local_node->right_dispersion = -1;
  
  	return local_node;
  }
Index: backend/optimizer/README
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/README,v
retrieving revision 1.20
retrieving revision 1.21
diff -c -r1.20 -r1.21
*** backend/optimizer/README	2000/11/12 00:37:02	1.20
--- backend/optimizer/README	2000/12/14 22:30:45	1.21
***************
*** 356,364 ****
  as long as the clause has been applied at some point while forming the
  join relation.  (In the current implementation, we always apply qual
  clauses as soon as possible, ie, as far down in the plan tree as possible.
! So we can always make this deduction.  If we postponed filtering by qual
! clauses then we'd not be able to assume pathkey equivalence until after
! the equality check(s) had been applied.)
  
  In short, then: when producing the pathkeys for a merge or nestloop join,
  we can keep all of the keys of the outer path, since the ordering of the
--- 356,368 ----
  as long as the clause has been applied at some point while forming the
  join relation.  (In the current implementation, we always apply qual
  clauses as soon as possible, ie, as far down in the plan tree as possible.
! So we can treat the pathkeys as equivalent everywhere.  The exception is
! when the relations A and B are joined inside the nullable side of an
! OUTER JOIN and the equijoin clause comes from above the OUTER JOIN.  In this
! case we cannot apply the qual as soon as A and B are joined, so we do not
! consider the pathkeys to be equivalent.  This could be improved if we wanted
! to go to the trouble of making pathkey equivalence be context-dependent,
! but that seems much more complex than it's worth.)
  
  In short, then: when producing the pathkeys for a merge or nestloop join,
  we can keep all of the keys of the outer path, since the ordering of the
***************
*** 415,433 ****
  equivalence set, we instantly add all the other vars equivalenced to it,
  whether they appear yet in the pathkey's relation or not.  And we also
  mandate that the pathkey sublist appear in the same order as the
! equivalence set it comes from.  (In practice, we simply return a pointer
! to the relevant equivalence set without building any new sublist at all.
! Each equivalence set becomes a "canonical pathkey" for all its members.)
! This makes comparing pathkeys very simple and fast, and saves a lot of
! work and memory space for pathkey construction as well.
! 
! Note that pathkey sublists having just one item still exist, and are
! not expected to be equal() to any equivalence set.  This occurs when
! we describe a sort order that involves a var that's not mentioned in
! any equijoin clause of the WHERE.  We could add singleton sets containing
! such vars to the query's list of equivalence sets, but there's little
! point in doing so.
  
  By the way, it's OK and even useful for us to build equivalence sets
  that mention multiple vars from the same relation.  For example, if
  we have WHERE A.X = A.Y and we are scanning A using an index on X,
--- 419,433 ----
  equivalence set, we instantly add all the other vars equivalenced to it,
  whether they appear yet in the pathkey's relation or not.  And we also
  mandate that the pathkey sublist appear in the same order as the
! equivalence set it comes from.
  
+ In fact, we can go even further, and say that the canonical representation
+ of a pathkey sublist is a pointer directly to the relevant equivalence set,
+ which is kept in a list of pathkey equivalence sets for the query.  Then
+ pathkey sublist comparison reduces to pointer-equality checking!  To do this
+ we also have to add single-element pathkey sublists to the query's list of
+ equivalence sets, but that's a small price to pay.
+ 
  By the way, it's OK and even useful for us to build equivalence sets
  that mention multiple vars from the same relation.  For example, if
  we have WHERE A.X = A.Y and we are scanning A using an index on X,
***************
*** 468,472 ****
--- 468,483 ----
  we're given WHERE int2var = int4var AND int4var = int8var, we'll fail
  while trying to create a representation of the implied clause
  int2var = int8var.
+ 
+ An additional refinement we can make is to insist that canonical pathkey
+ lists (sort orderings) do not mention the same pathkey set more than once.
+ For example, a pathkey list ((A) (B) (A)) is redundant --- the second
+ occurrence of (A) does not change the ordering, since the data must already
+ be sorted by A.  Although a user probably wouldn't write ORDER BY A,B,A
+ directly, such redundancies are more probable once equijoin equivalences
+ have been considered.  Also, the system is likely to generate redundant
+ pathkey lists when computing the sort ordering needed for a mergejoin.  By
+ eliminating the redundancy, we save time and improve planning, since the
+ planner will more easily recognize equivalent orderings as being equivalent.
  
  -- bjm & tgl
Index: backend/optimizer/path/allpaths.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v
retrieving revision 1.67
retrieving revision 1.68
diff -c -r1.67 -r1.68
*** backend/optimizer/path/allpaths.c	2000/11/12 00:36:58	1.67
--- backend/optimizer/path/allpaths.c	2000/12/14 22:30:43	1.68
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.67 2000/11/12 00:36:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.68 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 162,170 ****
  	create_tidscan_paths(root, rel);
  
  	/* Consider index paths for both simple and OR index clauses */
! 	create_index_paths(root, rel, indices,
! 					   rel->baserestrictinfo,
! 					   rel->joininfo);
  
  	/*
  	 * Note: create_or_index_paths depends on create_index_paths to
--- 162,168 ----
  	create_tidscan_paths(root, rel);
  
  	/* Consider index paths for both simple and OR index clauses */
! 	create_index_paths(root, rel, indices);
  
  	/*
  	 * Note: create_or_index_paths depends on create_index_paths to
Index: backend/optimizer/path/indxpath.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v
retrieving revision 1.99
retrieving revision 1.100
diff -c -r1.99 -r1.100
*** backend/optimizer/path/indxpath.c	2000/11/25 20:33:51	1.99
--- backend/optimizer/path/indxpath.c	2000/12/14 22:30:43	1.100
***************
*** 9,15 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.99 2000/11/25 20:33:51 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 9,15 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.100 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 87,97 ****
  					  List **clausegroups, List **outerrelids);
  static List *index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
  				List *clausegroup_list, List *outerrelids_list);
- static bool useful_for_mergejoin(RelOptInfo *rel, IndexOptInfo *index,
- 					 List *joininfo_list);
- static bool useful_for_ordering(Query *root, RelOptInfo *rel,
- 					IndexOptInfo *index,
- 					ScanDirection scandir);
  static bool match_index_to_operand(int indexkey, Var *operand,
  					   RelOptInfo *rel, IndexOptInfo *index);
  static bool function_index_operand(Expr *funcOpnd, RelOptInfo *rel,
--- 87,92 ----
***************
*** 125,155 ****
   * attributes are available and fixed during any one scan of the indexpath.
   *
   * An IndexPath is generated and submitted to add_path() for each index
!  * this routine deems potentially interesting for the current query
!  * (at most one IndexPath per index on the given relation).  An innerjoin
!  * path is also generated for each interesting combination of outer join
!  * relations.  The innerjoin paths are *not* passed to add_path(), but are
!  * appended to the "innerjoin" list of the relation for later consideration
!  * in nested-loop joins.
   *
   * 'rel' is the relation for which we want to generate index paths
   * 'indices' is a list of available indexes for 'rel'
-  * 'restrictinfo_list' is a list of restrictinfo nodes for 'rel'
-  * 'joininfo_list' is a list of joininfo nodes for 'rel'
   */
  void
  create_index_paths(Query *root,
  				   RelOptInfo *rel,
! 				   List *indices,
! 				   List *restrictinfo_list,
! 				   List *joininfo_list)
  {
  	List	   *ilist;
  
  	foreach(ilist, indices)
  	{
  		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
  		List	   *restrictclauses;
  		List	   *joinclausegroups;
  		List	   *joinouterrelids;
  
--- 120,150 ----
   * attributes are available and fixed during any one scan of the indexpath.
   *
   * An IndexPath is generated and submitted to add_path() for each index
!  * this routine deems potentially interesting for the current query.
!  * An innerjoin path is also generated for each interesting combination of
!  * outer join relations.  The innerjoin paths are *not* passed to add_path(),
!  * but are appended to the "innerjoin" list of the relation for later
!  * consideration in nested-loop joins.
   *
   * 'rel' is the relation for which we want to generate index paths
   * 'indices' is a list of available indexes for 'rel'
   */
  void
  create_index_paths(Query *root,
  				   RelOptInfo *rel,
! 				   List *indices)
  {
+ 	List	   *restrictinfo_list = rel->baserestrictinfo;
+ 	List	   *joininfo_list = rel->joininfo;
  	List	   *ilist;
  
  	foreach(ilist, indices)
  	{
  		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
  		List	   *restrictclauses;
+ 		List	   *index_pathkeys;
+ 		List	   *useful_pathkeys;
+ 		bool		index_is_ordered;
  		List	   *joinclausegroups;
  		List	   *joinouterrelids;
  
***************
*** 179,187 ****
  		match_index_orclauses(rel, index, restrictinfo_list);
  
  		/*
! 		 * 2. If the keys of this index match any of the available
! 		 * non-'or' restriction clauses, then create a path using those
! 		 * clauses as indexquals.
  		 */
  		restrictclauses = group_clauses_by_indexkey(rel,
  													index,
--- 174,180 ----
  		match_index_orclauses(rel, index, restrictinfo_list);
  
  		/*
! 		 * 2. Match the index against non-'or' restriction clauses.
  		 */
  		restrictclauses = group_clauses_by_indexkey(rel,
  													index,
***************
*** 189,231 ****
  													index->classlist,
  													restrictinfo_list);
  
! 		if (restrictclauses != NIL)
! 			add_path(rel, (Path *) create_index_path(root, rel, index,
! 													 restrictclauses,
! 											   NoMovementScanDirection));
! 
! 		/*
! 		 * 3. If this index can be used for a mergejoin, then create an
! 		 * index path for it even if there were no restriction clauses.
! 		 * (If there were, there is no need to make another index path.)
! 		 * This will allow the index to be considered as a base for a
! 		 * mergejoin in later processing.  Similarly, if the index matches
! 		 * the ordering that is needed for the overall query result, make
! 		 * an index path for it even if there is no other reason to do so.
  		 */
! 		if (restrictclauses == NIL)
! 		{
! 			if (useful_for_mergejoin(rel, index, joininfo_list) ||
! 			 useful_for_ordering(root, rel, index, ForwardScanDirection))
! 				add_path(rel, (Path *)
! 						 create_index_path(root, rel, index,
! 										   restrictclauses,
! 										   ForwardScanDirection));
! 		}
  
  		/*
! 		 * Currently, backwards scan is never considered except for the
! 		 * case of matching a query result ordering.  Possibly should
! 		 * consider it in other places?
  		 */
! 		if (useful_for_ordering(root, rel, index, BackwardScanDirection))
  			add_path(rel, (Path *)
  					 create_index_path(root, rel, index,
  									   restrictclauses,
! 									   BackwardScanDirection));
  
  		/*
! 		 * 4. Create an innerjoin index path for each combination of other
  		 * rels used in available join clauses.  These paths will be
  		 * considered as the inner side of nestloop joins against those
  		 * sets of other rels.	indexable_joinclauses() finds sets of
--- 182,231 ----
  													index->classlist,
  													restrictinfo_list);
  
! 		/*
! 		 * 3. Compute pathkeys describing index's ordering, if any,
! 		 * then see how many of them are actually useful for this query.
  		 */
! 		index_pathkeys = build_index_pathkeys(root, rel, index,
! 											  ForwardScanDirection);
! 		index_is_ordered = (index_pathkeys != NIL);
! 		useful_pathkeys = truncate_useless_pathkeys(root, rel,
! 													index_pathkeys);
  
  		/*
! 		 * 4. Generate an indexscan path if there are relevant restriction
! 		 * clauses OR the index ordering is potentially useful for later
! 		 * merging or final output ordering.
  		 */
! 		if (restrictclauses != NIL || useful_pathkeys != NIL)
  			add_path(rel, (Path *)
  					 create_index_path(root, rel, index,
  									   restrictclauses,
! 									   useful_pathkeys,
! 									   index_is_ordered ?
! 									   ForwardScanDirection :
! 									   NoMovementScanDirection));
! 
! 		/*
! 		 * 5. If the index is ordered, a backwards scan might be interesting.
! 		 * Currently this is only possible for a DESC query result ordering.
! 		 */
! 		if (index_is_ordered)
! 		{
! 			index_pathkeys = build_index_pathkeys(root, rel, index,
! 												  BackwardScanDirection);
! 			useful_pathkeys = truncate_useless_pathkeys(root, rel,
! 														index_pathkeys);
! 			if (useful_pathkeys != NIL)
! 				add_path(rel, (Path *)
! 						 create_index_path(root, rel, index,
! 										   restrictclauses,
! 										   useful_pathkeys,
! 										   BackwardScanDirection));
! 		}
  
  		/*
! 		 * 6. Create an innerjoin index path for each combination of other
  		 * rels used in available join clauses.  These paths will be
  		 * considered as the inner side of nestloop joins against those
  		 * sets of other rels.	indexable_joinclauses() finds sets of
***************
*** 902,989 ****
  	}
  
  	return InvalidOid;
- }
- 
- /*
-  * useful_for_mergejoin
-  *	  Determine whether the given index can support a mergejoin based
-  *	  on any available join clause.
-  *
-  *	  We look to see whether the first indexkey of the index matches the
-  *	  left or right sides of any of the mergejoinable clauses and provides
-  *	  the ordering needed for that side.  If so, the index is useful.
-  *	  Matching a second or later indexkey is not useful unless there is
-  *	  also a mergeclause for the first indexkey, so we need not consider
-  *	  secondary indexkeys at this stage.
-  *
-  * 'rel' is the relation for which 'index' is defined
-  * 'joininfo_list' is the list of JoinInfo nodes for 'rel'
-  */
- static bool
- useful_for_mergejoin(RelOptInfo *rel,
- 					 IndexOptInfo *index,
- 					 List *joininfo_list)
- {
- 	int		   *indexkeys = index->indexkeys;
- 	Oid		   *ordering = index->ordering;
- 	List	   *i;
- 
- 	if (!indexkeys || indexkeys[0] == 0 ||
- 		!ordering || ordering[0] == InvalidOid)
- 		return false;			/* unordered index is not useful */
- 
- 	foreach(i, joininfo_list)
- 	{
- 		JoinInfo   *joininfo = (JoinInfo *) lfirst(i);
- 		List	   *j;
- 
- 		foreach(j, joininfo->jinfo_restrictinfo)
- 		{
- 			RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(j);
- 
- 			if (restrictinfo->mergejoinoperator)
- 			{
- 				if (restrictinfo->left_sortop == ordering[0] &&
- 					match_index_to_operand(indexkeys[0],
- 										get_leftop(restrictinfo->clause),
- 										   rel, index))
- 					return true;
- 				if (restrictinfo->right_sortop == ordering[0] &&
- 					match_index_to_operand(indexkeys[0],
- 									   get_rightop(restrictinfo->clause),
- 										   rel, index))
- 					return true;
- 			}
- 		}
- 	}
- 	return false;
- }
- 
- /*
-  * useful_for_ordering
-  *	  Determine whether the given index can produce an ordering matching
-  *	  the order that is wanted for the query result.
-  *
-  * 'rel' is the relation for which 'index' is defined
-  * 'scandir' is the contemplated scan direction
-  */
- static bool
- useful_for_ordering(Query *root,
- 					RelOptInfo *rel,
- 					IndexOptInfo *index,
- 					ScanDirection scandir)
- {
- 	List	   *index_pathkeys;
- 
- 	if (root->query_pathkeys == NIL)
- 		return false;			/* no special ordering requested */
- 
- 	index_pathkeys = build_index_pathkeys(root, rel, index, scandir);
- 
- 	if (index_pathkeys == NIL)
- 		return false;			/* unordered index */
- 
- 	return pathkeys_contained_in(root->query_pathkeys, index_pathkeys);
  }
  
  /****************************************************************************
--- 902,907 ----
Index: backend/optimizer/path/joinpath.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v
retrieving revision 1.59
retrieving revision 1.60
diff -c -r1.59 -r1.60
*** backend/optimizer/path/joinpath.c	2000/11/23 03:57:31	1.59
--- backend/optimizer/path/joinpath.c	2000/12/14 22:30:43	1.60
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.59 2000/11/23 03:57:31 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.60 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 152,157 ****
--- 152,158 ----
  					 List *mergeclause_list,
  					 JoinType jointype)
  {
+ 	List	   *all_pathkeys;
  	List	   *i;
  
  	/*
***************
*** 159,194 ****
  	 * generate a differently-sorted result path at essentially the same
  	 * cost.  We have no basis for choosing one over another at this level
  	 * of joining, but some sort orders may be more useful than others for
! 	 * higher-level mergejoins.  Generating a path here for *every*
! 	 * permutation of mergejoin clauses doesn't seem like a winning
! 	 * strategy, however; the cost in planning time is too high.
  	 *
! 	 * For now, we generate one path for each mergejoin clause, listing that
! 	 * clause first and the rest in random order.  This should allow at
  	 * least a one-clause mergejoin without re-sorting against any other
  	 * possible mergejoin partner path.  But if we've not guessed the
! 	 * right ordering of secondary clauses, we may end up evaluating
  	 * clauses as qpquals when they could have been done as mergeclauses.
  	 * We need to figure out a better way.	(Two possible approaches: look
  	 * at all the relevant index relations to suggest plausible sort
  	 * orders, or make just one output path and somehow mark it as having
  	 * a sort-order that can be rearranged freely.)
  	 */
! 	foreach(i, mergeclause_list)
  	{
! 		RestrictInfo *restrictinfo = lfirst(i);
! 		List	   *curclause_list;
  		List	   *outerkeys;
  		List	   *innerkeys;
  		List	   *merge_pathkeys;
  
! 		/* Make a mergeclause list with this guy first. */
! 		if (i != mergeclause_list)
! 			curclause_list = lcons(restrictinfo,
! 								   lremove(restrictinfo,
! 										   listCopy(mergeclause_list)));
  		else
! 			curclause_list = mergeclause_list;	/* no work at first one... */
  
  		/*
  		 * Build sort pathkeys for both sides.
--- 160,216 ----
  	 * generate a differently-sorted result path at essentially the same
  	 * cost.  We have no basis for choosing one over another at this level
  	 * of joining, but some sort orders may be more useful than others for
! 	 * higher-level mergejoins, so it's worth considering multiple orderings.
  	 *
! 	 * Actually, it's not quite true that every mergeclause ordering will
! 	 * generate a different path order, because some of the clauses may be
! 	 * redundant.  Therefore, what we do is convert the mergeclause list to
! 	 * a list of canonical pathkeys, and then consider different orderings
! 	 * of the pathkeys.
! 	 *
! 	 * Generating a path for *every* permutation of the pathkeys doesn't
! 	 * seem like a winning strategy; the cost in planning time is too high.
! 	 * For now, we generate one path for each pathkey, listing that pathkey
! 	 * first and the rest in random order.  This should allow at
  	 * least a one-clause mergejoin without re-sorting against any other
  	 * possible mergejoin partner path.  But if we've not guessed the
! 	 * right ordering of secondary keys, we may end up evaluating
  	 * clauses as qpquals when they could have been done as mergeclauses.
  	 * We need to figure out a better way.	(Two possible approaches: look
  	 * at all the relevant index relations to suggest plausible sort
  	 * orders, or make just one output path and somehow mark it as having
  	 * a sort-order that can be rearranged freely.)
  	 */
! 	all_pathkeys = make_pathkeys_for_mergeclauses(root,
! 												  mergeclause_list,
! 												  outerrel);
! 
! 	foreach(i, all_pathkeys)
  	{
! 		List	   *front_pathkey = lfirst(i);
! 		List	   *cur_pathkeys;
! 		List	   *cur_mergeclauses;
  		List	   *outerkeys;
  		List	   *innerkeys;
  		List	   *merge_pathkeys;
  
! 		/* Make a pathkey list with this guy first. */
! 		if (i != all_pathkeys)
! 			cur_pathkeys = lcons(front_pathkey,
! 								 lremove(front_pathkey,
! 										 listCopy(all_pathkeys)));
  		else
! 			cur_pathkeys = all_pathkeys;	/* no work at first one... */
! 
! 		/*
! 		 * Select mergeclause(s) that match this sort ordering.  If we had
! 		 * redundant merge clauses then we will get a subset of the original
! 		 * clause list.  There had better be some match, however...
! 		 */
! 		cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
! 														  cur_pathkeys,
! 														  mergeclause_list);
! 		Assert(cur_mergeclauses != NIL);
  
  		/*
  		 * Build sort pathkeys for both sides.
***************
*** 198,212 ****
  		 * suppress an explicit sort step, so we needn't do so here.
  		 */
  		outerkeys = make_pathkeys_for_mergeclauses(root,
! 												   curclause_list,
  												   outerrel);
  		innerkeys = make_pathkeys_for_mergeclauses(root,
! 												   curclause_list,
  												   innerrel);
  		/* Build pathkeys representing output sort order. */
! 		merge_pathkeys = build_join_pathkeys(outerkeys,
! 											 joinrel->targetlist,
! 											 root->equi_key_list);
  
  		/*
  		 * And now we can make the path.  We only consider the cheapest-
--- 220,232 ----
  		 * suppress an explicit sort step, so we needn't do so here.
  		 */
  		outerkeys = make_pathkeys_for_mergeclauses(root,
! 												   cur_mergeclauses,
  												   outerrel);
  		innerkeys = make_pathkeys_for_mergeclauses(root,
! 												   cur_mergeclauses,
  												   innerrel);
  		/* Build pathkeys representing output sort order. */
! 		merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
  
  		/*
  		 * And now we can make the path.  We only consider the cheapest-
***************
*** 221,227 ****
  									   innerrel->cheapest_total_path,
  									   restrictlist,
  									   merge_pathkeys,
! 									   curclause_list,
  									   outerkeys,
  									   innerkeys));
  	}
--- 241,247 ----
  									   innerrel->cheapest_total_path,
  									   restrictlist,
  									   merge_pathkeys,
! 									   cur_mergeclauses,
  									   outerkeys,
  									   innerkeys));
  	}
***************
*** 301,317 ****
  		List	   *trialsortkeys;
  		Path	   *cheapest_startup_inner;
  		Path	   *cheapest_total_inner;
! 		int			num_mergeclauses;
! 		int			clausecnt;
  
  		/*
  		 * The result will have this sort order (even if it is implemented
  		 * as a nestloop, and even if some of the mergeclauses are
  		 * implemented by qpquals rather than as true mergeclauses):
  		 */
! 		merge_pathkeys = build_join_pathkeys(outerpath->pathkeys,
! 											 joinrel->targetlist,
! 											 root->equi_key_list);
  
  		if (nestjoinOK)
  		{
--- 321,336 ----
  		List	   *trialsortkeys;
  		Path	   *cheapest_startup_inner;
  		Path	   *cheapest_total_inner;
! 		int			num_sortkeys;
! 		int			sortkeycnt;
  
  		/*
  		 * The result will have this sort order (even if it is implemented
  		 * as a nestloop, and even if some of the mergeclauses are
  		 * implemented by qpquals rather than as true mergeclauses):
  		 */
! 		merge_pathkeys = build_join_pathkeys(root, joinrel,
! 											 outerpath->pathkeys);
  
  		if (nestjoinOK)
  		{
***************
*** 347,353 ****
  		}
  
  		/* Look for useful mergeclauses (if any) */
! 		mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
  													  mergeclause_list);
  
  		/* Done with this outer path if no chance for a mergejoin */
--- 366,373 ----
  		}
  
  		/* Look for useful mergeclauses (if any) */
! 		mergeclauses = find_mergeclauses_for_pathkeys(root,
! 													  outerpath->pathkeys,
  													  mergeclause_list);
  
  		/* Done with this outer path if no chance for a mergejoin */
***************
*** 362,368 ****
  		/*
  		 * Generate a mergejoin on the basis of sorting the cheapest
  		 * inner. Since a sort will be needed, only cheapest total cost
! 		 * matters.
  		 */
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
--- 382,389 ----
  		/*
  		 * Generate a mergejoin on the basis of sorting the cheapest
  		 * inner. Since a sort will be needed, only cheapest total cost
! 		 * matters.  (But create_mergejoin_path will do the right thing
! 		 * if innerrel->cheapest_total_path is already correctly sorted.)
  		 */
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
***************
*** 376,413 ****
  									   innersortkeys));
  
  		/*
! 		 * Look for presorted inner paths that satisfy the mergeclause
  		 * list or any truncation thereof.	Here, we consider both cheap
! 		 * startup cost and cheap total cost.
  		 */
! 		trialsortkeys = listCopy(innersortkeys);		/* modifiable copy */
  		cheapest_startup_inner = NULL;
  		cheapest_total_inner = NULL;
- 		num_mergeclauses = length(mergeclauses);
  
! 		for (clausecnt = num_mergeclauses; clausecnt > 0; clausecnt--)
  		{
  			Path	   *innerpath;
  			List	   *newclauses = NIL;
  
  			/*
! 			 * Look for an inner path ordered well enough to merge with
! 			 * the first 'clausecnt' mergeclauses.	NB: trialsortkeys list
  			 * is modified destructively, which is why we made a copy...
  			 */
! 			trialsortkeys = ltruncate(clausecnt, trialsortkeys);
  			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
  													   trialsortkeys,
  													   TOTAL_COST);
  			if (innerpath != NULL &&
  				(cheapest_total_inner == NULL ||
  				 compare_path_costs(innerpath, cheapest_total_inner,
  									TOTAL_COST) < 0))
  			{
  				/* Found a cheap (or even-cheaper) sorted path */
! 				if (clausecnt < num_mergeclauses)
! 					newclauses = ltruncate(clausecnt,
! 										   listCopy(mergeclauses));
  				else
  					newclauses = mergeclauses;
  				add_path(joinrel, (Path *)
--- 397,445 ----
  									   innersortkeys));
  
  		/*
! 		 * Look for presorted inner paths that satisfy the innersortkey
  		 * list or any truncation thereof.	Here, we consider both cheap
! 		 * startup cost and cheap total cost.  Ignore
! 		 * innerrel->cheapest_total_path, since we already made a path with it.
  		 */
! 		num_sortkeys = length(innersortkeys);
! 		if (num_sortkeys > 1)
! 			trialsortkeys = listCopy(innersortkeys); /* need modifiable copy */
! 		else
! 			trialsortkeys = innersortkeys; /* won't really truncate */
  		cheapest_startup_inner = NULL;
  		cheapest_total_inner = NULL;
  
! 		for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
  		{
  			Path	   *innerpath;
  			List	   *newclauses = NIL;
  
  			/*
! 			 * Look for an inner path ordered well enough for the first
! 			 * 'sortkeycnt' innersortkeys.	NB: trialsortkeys list
  			 * is modified destructively, which is why we made a copy...
  			 */
! 			trialsortkeys = ltruncate(sortkeycnt, trialsortkeys);
  			innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
  													   trialsortkeys,
  													   TOTAL_COST);
  			if (innerpath != NULL &&
+ 				innerpath != innerrel->cheapest_total_path &&
  				(cheapest_total_inner == NULL ||
  				 compare_path_costs(innerpath, cheapest_total_inner,
  									TOTAL_COST) < 0))
  			{
  				/* Found a cheap (or even-cheaper) sorted path */
! 				/* Select the right mergeclauses, if we didn't already */
! 				if (sortkeycnt < num_sortkeys)
! 				{
! 					newclauses =
! 						find_mergeclauses_for_pathkeys(root,
! 													   trialsortkeys,
! 													   mergeclauses);
! 					Assert(newclauses != NIL);
! 				}
  				else
  					newclauses = mergeclauses;
  				add_path(joinrel, (Path *)
***************
*** 427,432 ****
--- 459,465 ----
  													   trialsortkeys,
  													   STARTUP_COST);
  			if (innerpath != NULL &&
+ 				innerpath != innerrel->cheapest_total_path &&
  				(cheapest_startup_inner == NULL ||
  				 compare_path_costs(innerpath, cheapest_startup_inner,
  									STARTUP_COST) < 0))
***************
*** 441,449 ****
  					 */
  					if (newclauses == NIL)
  					{
! 						if (clausecnt < num_mergeclauses)
! 							newclauses = ltruncate(clausecnt,
! 												 listCopy(mergeclauses));
  						else
  							newclauses = mergeclauses;
  					}
--- 474,487 ----
  					 */
  					if (newclauses == NIL)
  					{
! 						if (sortkeycnt < num_sortkeys)
! 						{
! 							newclauses =
! 								find_mergeclauses_for_pathkeys(root,
! 															   trialsortkeys,
! 															   mergeclauses);
! 							Assert(newclauses != NIL);
! 						}
  						else
  							newclauses = mergeclauses;
  					}
***************
*** 501,507 ****
  		Path	   *startupouterpath;
  
  		/* Look for useful mergeclauses (if any) */
! 		mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys,
  													  mergeclause_list);
  		if (mergeclauses == NIL)
  			continue;
--- 539,546 ----
  		Path	   *startupouterpath;
  
  		/* Look for useful mergeclauses (if any) */
! 		mergeclauses = find_mergeclauses_for_pathkeys(root,
! 													  innerpath->pathkeys,
  													  mergeclause_list);
  		if (mergeclauses == NIL)
  			continue;
***************
*** 516,524 ****
  		 * outer. Since a sort will be needed, only cheapest total cost
  		 * matters.
  		 */
! 		merge_pathkeys = build_join_pathkeys(outersortkeys,
! 											 joinrel->targetlist,
! 											 root->equi_key_list);
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
  									   jointype,
--- 555,561 ----
  		 * outer. Since a sort will be needed, only cheapest total cost
  		 * matters.
  		 */
! 		merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
  									   jointype,
***************
*** 545,553 ****
  			continue;			/* there won't be a startup-cost path
  								 * either */
  
! 		merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys,
! 											 joinrel->targetlist,
! 											 root->equi_key_list);
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
  									   jointype,
--- 582,589 ----
  			continue;			/* there won't be a startup-cost path
  								 * either */
  
! 		merge_pathkeys = build_join_pathkeys(root, joinrel,
! 											 totalouterpath->pathkeys);
  		add_path(joinrel, (Path *)
  				 create_mergejoin_path(joinrel,
  									   jointype,
***************
*** 564,572 ****
  														  STARTUP_COST);
  		if (startupouterpath != NULL && startupouterpath != totalouterpath)
  		{
! 			merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys,
! 												 joinrel->targetlist,
! 												 root->equi_key_list);
  			add_path(joinrel, (Path *)
  					 create_mergejoin_path(joinrel,
  										   jointype,
--- 600,607 ----
  														  STARTUP_COST);
  		if (startupouterpath != NULL && startupouterpath != totalouterpath)
  		{
! 			merge_pathkeys = build_join_pathkeys(root, joinrel,
! 												 startupouterpath->pathkeys);
  			add_path(joinrel, (Path *)
  					 create_mergejoin_path(joinrel,
  										   jointype,
***************
*** 637,646 ****
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
  		Expr	   *clause;
  		Var		   *left,
! 				   *right,
! 				   *inner;
! 		List	   *hashclauses;
  		Selectivity innerdispersion;
  
  		if (restrictinfo->hashjoinoperator == InvalidOid)
  			continue;			/* not hashjoinable */
--- 672,680 ----
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
  		Expr	   *clause;
  		Var		   *left,
! 				   *right;
  		Selectivity innerdispersion;
+ 		List	   *hashclauses;
  
  		if (restrictinfo->hashjoinoperator == InvalidOid)
  			continue;			/* not hashjoinable */
***************
*** 657,682 ****
  		left = get_leftop(clause);
  		right = get_rightop(clause);
  
! 		/* check if clause is usable with these sub-rels, find inner var */
  		if (intMember(left->varno, outerrelids) &&
  			intMember(right->varno, innerrelids))
! 			inner = right;
  		else if (intMember(left->varno, innerrelids) &&
  				 intMember(right->varno, outerrelids))
! 			inner = left;
  		else
  			continue;			/* no good for these input relations */
  
  		/* always a one-element list of hash clauses */
  		hashclauses = makeList1(restrictinfo);
  
- 		/* estimate dispersion of inner var for costing purposes */
- 		innerdispersion = estimate_dispersion(root, inner);
- 
  		/*
  		 * We consider both the cheapest-total-cost and
  		 * cheapest-startup-cost outer paths.  There's no need to consider
! 		 * any but the cheapest- total-cost inner path, however.
  		 */
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(joinrel,
--- 691,738 ----
  		left = get_leftop(clause);
  		right = get_rightop(clause);
  
! 		/*
! 		 * Check if clause is usable with these sub-rels, find inner side,
! 		 * estimate dispersion of inner var for costing purposes.
! 		 *
! 		 * Since we tend to visit the same clauses over and over when
! 		 * planning a large query, we cache the dispersion estimates in the
! 		 * RestrictInfo node to avoid repeated lookups of statistics.
! 		 */
  		if (intMember(left->varno, outerrelids) &&
  			intMember(right->varno, innerrelids))
! 		{
! 			/* righthand side is inner */
! 			innerdispersion = restrictinfo->right_dispersion;
! 			if (innerdispersion < 0)
! 			{
! 				/* not cached yet */
! 				innerdispersion = estimate_dispersion(root, right);
! 				restrictinfo->right_dispersion = innerdispersion;
! 			}
! 		}
  		else if (intMember(left->varno, innerrelids) &&
  				 intMember(right->varno, outerrelids))
! 		{
! 			/* lefthand side is inner */
! 			innerdispersion = restrictinfo->left_dispersion;
! 			if (innerdispersion < 0)
! 			{
! 				/* not cached yet */
! 				innerdispersion = estimate_dispersion(root, left);
! 				restrictinfo->left_dispersion = innerdispersion;
! 			}
! 		}
  		else
  			continue;			/* no good for these input relations */
  
  		/* always a one-element list of hash clauses */
  		hashclauses = makeList1(restrictinfo);
  
  		/*
  		 * We consider both the cheapest-total-cost and
  		 * cheapest-startup-cost outer paths.  There's no need to consider
! 		 * any but the cheapest-total-cost inner path, however.
  		 */
  		add_path(joinrel, (Path *)
  				 create_hashjoin_path(joinrel,
Index: backend/optimizer/path/pathkeys.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v
retrieving revision 1.27
retrieving revision 1.28
diff -c -r1.27 -r1.28
*** backend/optimizer/path/pathkeys.c	2000/11/12 00:36:58	1.27
--- backend/optimizer/path/pathkeys.c	2000/12/14 22:30:43	1.28
***************
*** 11,17 ****
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.27 2000/11/12 00:36:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 11,17 ----
   * Portions Copyright (c) 1994, Regents of the University of California
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/path/pathkeys.c,v 1.28 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 195,203 ****
   *	  Given a PathKeyItem, find the equi_key_list subset it is a member of,
   *	  if any.  If so, return a pointer to that sublist, which is the
   *	  canonical representation (for this query) of that PathKeyItem's
!  *	  equivalence set.	If it is not found, return a single-element list
!  *	  containing the PathKeyItem (when the item has no equivalence peers,
!  *	  we just allow it to be a standalone list).
   *
   * Note that this function must not be used until after we have completed
   * scanning the WHERE clause for equijoin operators.
--- 195,202 ----
   *	  Given a PathKeyItem, find the equi_key_list subset it is a member of,
   *	  if any.  If so, return a pointer to that sublist, which is the
   *	  canonical representation (for this query) of that PathKeyItem's
!  *	  equivalence set.	If it is not found, add a singleton "equivalence set"
!  *	  to the equi_key_list and return that --- see compare_pathkeys.
   *
   * Note that this function must not be used until after we have completed
   * scanning the WHERE clause for equijoin operators.
***************
*** 206,211 ****
--- 205,211 ----
  make_canonical_pathkey(Query *root, PathKeyItem *item)
  {
  	List	   *cursetlink;
+ 	List	   *newset;
  
  	foreach(cursetlink, root->equi_key_list)
  	{
***************
*** 214,220 ****
  		if (member(item, curset))
  			return curset;
  	}
! 	return lcons(item, NIL);
  }
  
  /*
--- 214,222 ----
  		if (member(item, curset))
  			return curset;
  	}
! 	newset = makeList1(item);
! 	root->equi_key_list = lcons(newset, root->equi_key_list);
! 	return newset;
  }
  
  /*
***************
*** 234,239 ****
--- 236,242 ----
  	{
  		List	   *pathkey = (List *) lfirst(i);
  		PathKeyItem *item;
+ 		List	   *cpathkey;
  
  		/*
  		 * It's sufficient to look at the first entry in the sublist; if
***************
*** 242,249 ****
  		 */
  		Assert(pathkey != NIL);
  		item = (PathKeyItem *) lfirst(pathkey);
! 		new_pathkeys = lappend(new_pathkeys,
! 							   make_canonical_pathkey(root, item));
  	}
  	return new_pathkeys;
  }
--- 245,259 ----
  		 */
  		Assert(pathkey != NIL);
  		item = (PathKeyItem *) lfirst(pathkey);
! 		cpathkey = make_canonical_pathkey(root, item);
! 		/*
! 		 * Eliminate redundant ordering requests --- ORDER BY A,A
! 		 * is the same as ORDER BY A.  We want to check this only
! 		 * after we have canonicalized the keys, so that equivalent-key
! 		 * knowledge is used when deciding if an item is redundant.
! 		 */
! 		if (!ptrMember(cpathkey, new_pathkeys))
! 			new_pathkeys = lappend(new_pathkeys, cpathkey);
  	}
  	return new_pathkeys;
  }
***************
*** 256,275 ****
   * compare_pathkeys
   *	  Compare two pathkeys to see if they are equivalent, and if not whether
   *	  one is "better" than the other.
-  *
-  *	  A pathkey can be considered better than another if it is a superset:
-  *	  it contains all the keys of the other plus more.	For example, either
-  *	  ((A) (B)) or ((A B)) is better than ((A)).
-  *
-  *	  Because we actually only expect to see canonicalized pathkey sublists,
-  *	  we don't have to do the full two-way-subset-inclusion test on each
-  *	  pair of sublists that is implied by the above statement.	Instead we
-  *	  just do an equal().  In the normal case where multi-element sublists
-  *	  are pointers into the root's equi_key_list, equal() will be very fast:
-  *	  it will recognize pointer equality when the sublists are the same,
-  *	  and will fail at the first sublist element when they are not.
   *
!  * Yes, this gets called enough to be worth coding it this tensely.
   */
  PathKeysComparison
  compare_pathkeys(List *keys1, List *keys2)
--- 266,275 ----
   * compare_pathkeys
   *	  Compare two pathkeys to see if they are equivalent, and if not whether
   *	  one is "better" than the other.
   *
!  *	  This function may only be applied to canonicalized pathkey lists.
!  *	  In the canonical representation, sublists can be checked for equality
!  *	  by simple pointer comparison.
   */
  PathKeysComparison
  compare_pathkeys(List *keys1, List *keys2)
***************
*** 285,294 ****
  		List	   *subkey2 = lfirst(key2);
  
  		/*
  		 * We will never have two subkeys where one is a subset of the
! 		 * other, because of the canonicalization explained above.	Either
! 		 * they are equal or they ain't.
  		 */
  		if (!equal(subkey1, subkey2))
  			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
  	}
--- 285,354 ----
  		List	   *subkey2 = lfirst(key2);
  
  		/*
+ 		 * XXX would like to check that we've been given canonicalized input,
+ 		 * but query root not accessible here...
+ 		 */
+ #ifdef NOT_USED
+ 		Assert(ptrMember(subkey1, root->equi_key_list));
+ 		Assert(ptrMember(subkey2, root->equi_key_list));
+ #endif
+ 
+ 		/*
  		 * We will never have two subkeys where one is a subset of the
! 		 * other, because of the canonicalization process.  Either they
! 		 * are equal or they ain't.  Furthermore, we only need pointer
! 		 * comparison to detect equality.
  		 */
+ 		if (subkey1 != subkey2)
+ 			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
+ 	}
+ 
+ 	/*
+ 	 * If we reached the end of only one list, the other is longer and
+ 	 * therefore not a subset.	(We assume the additional sublist(s) of
+ 	 * the other list are not NIL --- no pathkey list should ever have a
+ 	 * NIL sublist.)
+ 	 */
+ 	if (key1 == NIL && key2 == NIL)
+ 		return PATHKEYS_EQUAL;
+ 	if (key1 != NIL)
+ 		return PATHKEYS_BETTER1;/* key1 is longer */
+ 	return PATHKEYS_BETTER2;	/* key2 is longer */
+ }
+ 
+ /*
+  * compare_noncanonical_pathkeys
+  *	  Compare two pathkeys to see if they are equivalent, and if not whether
+  *	  one is "better" than the other.  This is used when we must compare
+  *	  non-canonicalized pathkeys.
+  *
+  *	  A pathkey can be considered better than another if it is a superset:
+  *	  it contains all the keys of the other plus more.	For example, either
+  *	  ((A) (B)) or ((A B)) is better than ((A)).
+  *
+  *	  Currently, the only user of this routine is grouping_planner(),
+  *	  and it will only pass single-element sublists (from
+  *	  make_pathkeys_for_sortclauses).  Therefore we don't have to do the
+  *	  full two-way-subset-inclusion test on each pair of sublists that is
+  *	  implied by the above statement.  Instead we just verify they are
+  *	  singleton lists and then do an equal().  This could be improved if
+  *	  necessary.
+  */
+ PathKeysComparison
+ compare_noncanonical_pathkeys(List *keys1, List *keys2)
+ {
+ 	List	   *key1,
+ 			   *key2;
+ 
+ 	for (key1 = keys1, key2 = keys2;
+ 		 key1 != NIL && key2 != NIL;
+ 		 key1 = lnext(key1), key2 = lnext(key2))
+ 	{
+ 		List	   *subkey1 = lfirst(key1);
+ 		List	   *subkey2 = lfirst(key2);
+ 
+ 		Assert(length(subkey1) == 1);
+ 		Assert(length(subkey2) == 1);
  		if (!equal(subkey1, subkey2))
  			return PATHKEYS_DIFFERENT;	/* no need to keep looking */
  	}
***************
*** 326,331 ****
--- 386,409 ----
  }
  
  /*
+  * noncanonical_pathkeys_contained_in
+  *	  The same, when we don't have canonical pathkeys.
+  */
+ bool
+ noncanonical_pathkeys_contained_in(List *keys1, List *keys2)
+ {
+ 	switch (compare_noncanonical_pathkeys(keys1, keys2))
+ 	{
+ 			case PATHKEYS_EQUAL:
+ 			case PATHKEYS_BETTER2:
+ 			return true;
+ 		default:
+ 			break;
+ 	}
+ 	return false;
+ }
+ 
+ /*
   * get_cheapest_path_for_pathkeys
   *	  Find the cheapest path (according to the specified criterion) that
   *	  satisfies the given pathkeys.  Return NULL if no such path.
***************
*** 464,469 ****
--- 542,548 ----
  		while (*indexkeys != 0 && *ordering != InvalidOid)
  		{
  			Var		   *relvar = find_indexkey_var(root, rel, *indexkeys);
+ 			List	   *cpathkey;
  
  			sortop = *ordering;
  			if (ScanDirectionIsBackward(scandir))
***************
*** 475,482 ****
  
  			/* OK, make a sublist for this sort key */
  			item = makePathKeyItem((Node *) relvar, sortop);
! 			retval = lappend(retval, make_canonical_pathkey(root, item));
! 
  			indexkeys++;
  			ordering++;
  		}
--- 554,566 ----
  
  			/* OK, make a sublist for this sort key */
  			item = makePathKeyItem((Node *) relvar, sortop);
! 			cpathkey = make_canonical_pathkey(root, item);
! 			/*
! 			 * Eliminate redundant ordering info; could happen if query
! 			 * is such that index keys are equijoined...
! 			 */
! 			if (!ptrMember(cpathkey, retval))
! 				retval = lappend(retval, cpathkey);
  			indexkeys++;
  			ordering++;
  		}
***************
*** 526,546 ****
   *	  outer path (since the join will retain the ordering of the outer path)
   *	  plus any vars of the inner path that are equijoined to the outer vars.
   *
!  *	  Per the discussion at the top of this file, equijoined inner vars
   *	  can be considered path keys of the result, just the same as the outer
   *	  vars they were joined with; furthermore, it doesn't matter what kind
   *	  of join algorithm is actually used.
   *
!  * 'outer_pathkeys' is the list of the outer path's path keys
!  * 'join_rel_tlist' is the target list of the join relation
!  * 'equi_key_list' is the query's list of pathkeyitem equivalence sets
   *
   * Returns the list of new path keys.
   */
  List *
! build_join_pathkeys(List *outer_pathkeys,
! 					List *join_rel_tlist,
! 					List *equi_key_list)
  {
  
  	/*
--- 610,629 ----
   *	  outer path (since the join will retain the ordering of the outer path)
   *	  plus any vars of the inner path that are equijoined to the outer vars.
   *
!  *	  Per the discussion in backend/optimizer/README, equijoined inner vars
   *	  can be considered path keys of the result, just the same as the outer
   *	  vars they were joined with; furthermore, it doesn't matter what kind
   *	  of join algorithm is actually used.
   *
!  * 'joinrel' is the join relation that paths are being formed for
!  * 'outer_pathkeys' is the list of the current outer path's path keys
   *
   * Returns the list of new path keys.
   */
  List *
! build_join_pathkeys(Query *root,
! 					RelOptInfo *joinrel,
! 					List *outer_pathkeys)
  {
  
  	/*
***************
*** 549,557 ****
  	 * a darn thing here!  The inner-rel vars we used to need to add are
  	 * *already* part of the outer pathkey!
  	 *
! 	 * I'd remove the routine entirely, but maybe someday we'll need it...
  	 */
! 	return outer_pathkeys;
  }
  
  /****************************************************************************
--- 632,642 ----
  	 * a darn thing here!  The inner-rel vars we used to need to add are
  	 * *already* part of the outer pathkey!
  	 *
! 	 * We do, however, need to truncate the pathkeys list, since it may
! 	 * contain pathkeys that were useful for forming this joinrel but are
! 	 * uninteresting to higher levels.
  	 */
! 	return truncate_useless_pathkeys(root, joinrel, outer_pathkeys);
  }
  
  /****************************************************************************
***************
*** 603,608 ****
--- 688,726 ----
   ****************************************************************************/
  
  /*
+  * cache_mergeclause_pathkeys
+  *		Make the cached pathkeys valid in a mergeclause restrictinfo.
+  *
+  * RestrictInfo contains fields in which we may cache the result
+  * of looking up the canonical pathkeys for the left and right sides
+  * of the mergeclause.  (Note that in normal cases they will be the
+  * same, but not if the mergeclause appears above an OUTER JOIN.)
+  * This is a worthwhile savings because these routines will be invoked
+  * many times when dealing with a many-relation query.
+  */
+ static void
+ cache_mergeclause_pathkeys(Query *root, RestrictInfo *restrictinfo)
+ {
+ 	Node	   *key;
+ 	PathKeyItem *item;
+ 
+ 	Assert(restrictinfo->mergejoinoperator != InvalidOid);
+ 
+ 	if (restrictinfo->left_pathkey == NIL)
+ 	{
+ 		key = (Node *) get_leftop(restrictinfo->clause);
+ 		item = makePathKeyItem(key, restrictinfo->left_sortop);
+ 		restrictinfo->left_pathkey = make_canonical_pathkey(root, item);
+ 	}
+ 	if (restrictinfo->right_pathkey == NIL)
+ 	{
+ 		key = (Node *) get_rightop(restrictinfo->clause);
+ 		item = makePathKeyItem(key, restrictinfo->right_sortop);
+ 		restrictinfo->right_pathkey = make_canonical_pathkey(root, item);
+ 	}
+ }
+ 
+ /*
   * find_mergeclauses_for_pathkeys
   *	  This routine attempts to find a set of mergeclauses that can be
   *	  used with a specified ordering for one of the input relations.
***************
*** 618,628 ****
   *
   * XXX Ideally we ought to be considering context, ie what path orderings
   * are available on the other side of the join, rather than just making
!  * an arbitrary choice among the mergeclause orders that will work for
!  * this side of the join.
   */
  List *
! find_mergeclauses_for_pathkeys(List *pathkeys, List *restrictinfos)
  {
  	List	   *mergeclauses = NIL;
  	List	   *i;
--- 736,748 ----
   *
   * XXX Ideally we ought to be considering context, ie what path orderings
   * are available on the other side of the join, rather than just making
!  * an arbitrary choice among the mergeclauses that will work for this side
!  * of the join.
   */
  List *
! find_mergeclauses_for_pathkeys(Query *root,
! 							   List *pathkeys,
! 							   List *restrictinfos)
  {
  	List	   *mergeclauses = NIL;
  	List	   *i;
***************
*** 634,671 ****
  		List	   *j;
  
  		/*
! 		 * We can match any of the keys in this pathkey sublist, since
! 		 * they're all equivalent.  And we can match against either left
! 		 * or right side of any mergejoin clause we haven't used yet.  For
! 		 * the moment we use a dumb "greedy" algorithm with no
! 		 * backtracking.  Is it worth being any smarter to make a longer
! 		 * list of usable mergeclauses?  Probably not.
  		 */
! 		foreach(j, pathkey)
  		{
! 			PathKeyItem *keyitem = lfirst(j);
! 			Node	   *key = keyitem->key;
! 			Oid			keyop = keyitem->sortop;
! 			List	   *k;
  
! 			foreach(k, restrictinfos)
  			{
! 				RestrictInfo *restrictinfo = lfirst(k);
! 
! 				Assert(restrictinfo->mergejoinoperator != InvalidOid);
! 
! 				if (((keyop == restrictinfo->left_sortop &&
! 					  equal(key, get_leftop(restrictinfo->clause))) ||
! 					 (keyop == restrictinfo->right_sortop &&
! 					  equal(key, get_rightop(restrictinfo->clause)))) &&
! 					!member(restrictinfo, mergeclauses))
! 				{
! 					matched_restrictinfo = restrictinfo;
! 					break;
! 				}
! 			}
! 			if (matched_restrictinfo)
  				break;
  		}
  
  		/*
--- 754,781 ----
  		List	   *j;
  
  		/*
! 		 * We can match a pathkey against either left or right side of any
! 		 * mergejoin clause we haven't used yet.  For the moment we use a
! 		 * dumb "greedy" algorithm with no backtracking.  Is it worth being
! 		 * any smarter to make a longer list of usable mergeclauses?
! 		 * Probably not.
  		 */
! 		foreach(j, restrictinfos)
  		{
! 			RestrictInfo *restrictinfo = lfirst(j);
  
! 			cache_mergeclause_pathkeys(root, restrictinfo);
! 			/*
! 			 * We can compare canonical pathkey sublists by simple
! 			 * pointer equality; see compare_pathkeys.
! 			 */
! 			if ((pathkey == restrictinfo->left_pathkey ||
! 				 pathkey == restrictinfo->right_pathkey) &&
! 				!ptrMember(restrictinfo, mergeclauses))
  			{
! 				matched_restrictinfo = restrictinfo;
  				break;
+ 			}
  		}
  
  		/*
***************
*** 715,761 ****
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
  		Node	   *key;
- 		Oid			sortop;
- 		PathKeyItem *item;
  		List	   *pathkey;
  
! 		Assert(restrictinfo->mergejoinoperator != InvalidOid);
  
- 		/*
- 		 * Which key and sortop is needed for this relation?
- 		 */
  		key = (Node *) get_leftop(restrictinfo->clause);
! 		sortop = restrictinfo->left_sortop;
! 		if (!IsA(key, Var) ||
! 			!intMember(((Var *) key)->varno, rel->relids))
  		{
  			key = (Node *) get_rightop(restrictinfo->clause);
! 			sortop = restrictinfo->right_sortop;
! 			if (!IsA(key, Var) ||
! 				!intMember(((Var *) key)->varno, rel->relids))
  				elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use");
  		}
  
  		/*
! 		 * Find or create canonical pathkey sublist for this sort item.
  		 */
! 		item = makePathKeyItem(key, sortop);
! 		pathkey = make_canonical_pathkey(root, item);
  
  		/*
! 		 * Most of the time we will get back a canonical pathkey set
! 		 * including both the mergeclause's left and right sides (the only
! 		 * case where we don't is if the mergeclause appeared in an OUTER
! 		 * JOIN, which causes us not to generate an equijoin set from it).
! 		 * Therefore, most of the time the item we just made is not part
! 		 * of the returned structure, and we can free it.  This check
! 		 * saves a useful amount of storage in a big join tree.
  		 */
! 		if (item != (PathKeyItem *) lfirst(pathkey))
! 			pfree(item);
  
! 		pathkeys = lappend(pathkeys, pathkey);
  	}
  
! 	return pathkeys;
  }
--- 825,994 ----
  	{
  		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
  		Node	   *key;
  		List	   *pathkey;
  
! 		cache_mergeclause_pathkeys(root, restrictinfo);
  
  		key = (Node *) get_leftop(restrictinfo->clause);
! 		if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids))
! 		{
! 			/* Rel is left side of mergeclause */
! 			pathkey = restrictinfo->left_pathkey;
! 		}
! 		else
  		{
  			key = (Node *) get_rightop(restrictinfo->clause);
! 			if (IsA(key, Var) && intMember(((Var *) key)->varno, rel->relids))
! 			{
! 				/* Rel is right side of mergeclause */
! 				pathkey = restrictinfo->right_pathkey;
! 			}
! 			else
! 			{
  				elog(ERROR, "make_pathkeys_for_mergeclauses: can't identify which side of mergeclause to use");
+ 				pathkey = NIL;	/* keep compiler quiet */
+ 			}
  		}
  
  		/*
! 		 * When we are given multiple merge clauses, it's possible that some
! 		 * clauses refer to the same vars as earlier clauses.  There's no
! 		 * reason for us to specify sort keys like (A,B,A) when (A,B) will
! 		 * do --- and adding redundant sort keys makes add_path think that
! 		 * this sort order is different from ones that are really the same,
! 		 * so don't do it.  Since we now have a canonicalized pathkey,
! 		 * a simple ptrMember test is sufficient to detect redundant keys.
  		 */
! 		if (!ptrMember(pathkey, pathkeys))
! 			pathkeys = lappend(pathkeys, pathkey);
! 	}
! 
! 	return pathkeys;
! }
  
+ /****************************************************************************
+  *		PATHKEY USEFULNESS CHECKS
+  *
+  * We only want to remember as many of the pathkeys of a path as have some
+  * potential use, either for subsequent mergejoins or for meeting the query's
+  * requested output ordering.  This ensures that add_path() won't consider
+  * a path to have a usefully different ordering unless it really is useful.
+  * These routines check for usefulness of given pathkeys.
+  ****************************************************************************/
+ 
+ /*
+  * pathkeys_useful_for_merging
+  *		Count the number of pathkeys that may be useful for mergejoins
+  *		above the given relation (by looking at its joininfo lists).
+  *
+  * We consider a pathkey potentially useful if it corresponds to the merge
+  * ordering of either side of any joinclause for the rel.  This might be
+  * overoptimistic, since joinclauses that appear in different join lists
+  * might never be usable at the same time, but trying to be exact is likely
+  * to be more trouble than it's worth.
+  */
+ int
+ pathkeys_useful_for_merging(Query *root, RelOptInfo *rel, List *pathkeys)
+ {
+ 	int			useful = 0;
+ 	List	   *i;
+ 
+ 	foreach(i, pathkeys)
+ 	{
+ 		List	   *pathkey = lfirst(i);
+ 		bool		matched = false;
+ 		List	   *j;
+ 
+ 		foreach(j, rel->joininfo)
+ 		{
+ 			JoinInfo   *joininfo = (JoinInfo *) lfirst(j);
+ 			List	   *k;
+ 
+ 			foreach(k, joininfo->jinfo_restrictinfo)
+ 			{
+ 				RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(k);
+ 
+ 				if (restrictinfo->mergejoinoperator == InvalidOid)
+ 					continue;
+ 				cache_mergeclause_pathkeys(root, restrictinfo);
+ 				/*
+ 				 * We can compare canonical pathkey sublists by simple
+ 				 * pointer equality; see compare_pathkeys.
+ 				 */
+ 				if (pathkey == restrictinfo->left_pathkey ||
+ 					pathkey == restrictinfo->right_pathkey)
+ 				{
+ 					matched = true;
+ 					break;
+ 				}
+ 			}
+ 
+ 			if (matched)
+ 				break;
+ 		}
+ 
  		/*
! 		 * If we didn't find a mergeclause, we're done --- any additional
! 		 * sort-key positions in the pathkeys are useless.	(But we can
! 		 * still mergejoin if we found at least one mergeclause.)
  		 */
! 		if (matched)
! 			useful++;
! 		else
! 			break;
! 	}
! 
! 	return useful;
! }
! 
! /*
!  * pathkeys_useful_for_ordering
!  *		Count the number of pathkeys that are useful for meeting the
!  *		query's requested output ordering.
!  *
!  * Unlike merge pathkeys, this is an all-or-nothing affair: it does us
!  * no good to order by just the first key(s) of the requested ordering.
!  * So the result is always either 0 or length(root->query_pathkeys).
!  */
! int
! pathkeys_useful_for_ordering(Query *root, List *pathkeys)
! {
! 	if (root->query_pathkeys == NIL)
! 		return 0;				/* no special ordering requested */
! 
! 	if (pathkeys == NIL)
! 		return 0;				/* unordered path */
  
! 	if (pathkeys_contained_in(root->query_pathkeys, pathkeys))
! 	{
! 		/* It's useful ... or at least the first N keys are */
! 		return length(root->query_pathkeys);
  	}
  
! 	return 0;					/* path ordering not useful */
! }
! 
! /*
!  * truncate_useless_pathkeys
!  *		Shorten the given pathkey list to just the useful pathkeys.
!  */
! List *
! truncate_useless_pathkeys(Query *root,
! 						  RelOptInfo *rel,
! 						  List *pathkeys)
! {
! 	int			nuseful;
! 	int			nuseful2;
! 
! 	nuseful = pathkeys_useful_for_merging(root, rel, pathkeys);
! 	nuseful2 = pathkeys_useful_for_ordering(root, pathkeys);
! 	if (nuseful2 > nuseful)
! 		nuseful = nuseful2;
! 	/* Note: not safe to modify input list destructively, but we can avoid
! 	 * copying the list if we're not actually going to change it
! 	 */
! 	if (nuseful == length(pathkeys))
! 		return pathkeys;
! 	else
! 		return ltruncate(nuseful, listCopy(pathkeys));
  }
Index: backend/optimizer/plan/initsplan.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v
retrieving revision 1.54
retrieving revision 1.55
diff -c -r1.54 -r1.55
*** backend/optimizer/plan/initsplan.c	2000/12/12 23:33:33	1.54
--- backend/optimizer/plan/initsplan.c	2000/12/14 22:30:43	1.55
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.54 2000/12/12 23:33:33 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/initsplan.c,v 1.55 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 343,349 ****
--- 343,353 ----
  	restrictinfo->mergejoinoperator = InvalidOid;
  	restrictinfo->left_sortop = InvalidOid;
  	restrictinfo->right_sortop = InvalidOid;
+ 	restrictinfo->left_pathkey = NIL; /* not computable yet */
+ 	restrictinfo->right_pathkey = NIL;
  	restrictinfo->hashjoinoperator = InvalidOid;
+ 	restrictinfo->left_dispersion = -1; /* not computed until needed */
+ 	restrictinfo->right_dispersion = -1;
  
  	/*
  	 * Retrieve all relids and vars contained within the clause.
Index: backend/optimizer/plan/planner.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v
retrieving revision 1.97
retrieving revision 1.98
diff -c -r1.97 -r1.98
*** backend/optimizer/plan/planner.c	2000/12/06 23:55:17	1.97
--- backend/optimizer/plan/planner.c	2000/12/14 22:30:43	1.98
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.97 2000/12/06 23:55:17 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.98 2000/12/14 22:30:43 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 931,943 ****
  			 * If both GROUP BY and ORDER BY are specified, we will need
  			 * two levels of sort --- and, therefore, certainly need to
  			 * read all the input tuples --- unless ORDER BY is a subset
! 			 * of GROUP BY.  (Although we are comparing non-canonicalized
! 			 * pathkeys here, it should be OK since they will both contain
! 			 * only single-element sublists at this point.	See
! 			 * pathkeys.c.)
  			 */
  			if (parse->groupClause && parse->sortClause &&
! 				!pathkeys_contained_in(sort_pathkeys, group_pathkeys))
  				tuple_fraction = 0.0;
  		}
  		else if (parse->hasAggs)
--- 931,942 ----
  			 * If both GROUP BY and ORDER BY are specified, we will need
  			 * two levels of sort --- and, therefore, certainly need to
  			 * read all the input tuples --- unless ORDER BY is a subset
! 			 * of GROUP BY.  (We have not yet canonicalized the pathkeys,
! 			 * so must use the slower noncanonical comparison method.)
  			 */
  			if (parse->groupClause && parse->sortClause &&
! 				!noncanonical_pathkeys_contained_in(sort_pathkeys,
! 													group_pathkeys))
  				tuple_fraction = 0.0;
  		}
  		else if (parse->hasAggs)
Index: backend/optimizer/prep/prepunion.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v
retrieving revision 1.57
retrieving revision 1.58
diff -c -r1.57 -r1.58
*** backend/optimizer/prep/prepunion.c	2000/12/12 23:33:34	1.57
--- backend/optimizer/prep/prepunion.c	2000/12/14 22:30:44	1.58
***************
*** 14,20 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.57 2000/12/12 23:33:34 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 14,20 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/prep/prepunion.c,v 1.58 2000/12/14 22:30:44 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 667,672 ****
--- 667,676 ----
  
  		newinfo->subclauseindices = NIL;
  		newinfo->eval_cost = -1; /* reset this too */
+ 		newinfo->left_pathkey = NIL; /* and these */
+ 		newinfo->right_pathkey = NIL;
+ 		newinfo->left_dispersion = -1;
+ 		newinfo->right_dispersion = -1;
  
  		return (Node *) newinfo;
  	}
Index: backend/optimizer/util/pathnode.c
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v
retrieving revision 1.68
retrieving revision 1.69
diff -c -r1.68 -r1.69
*** backend/optimizer/util/pathnode.c	2000/11/12 00:36:59	1.68
--- backend/optimizer/util/pathnode.c	2000/12/14 22:30:44	1.69
***************
*** 8,14 ****
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.68 2000/11/12 00:36:59 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   *
   *
   * IDENTIFICATION
!  *	  $Header: /home/projects/pgsql/cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.69 2000/12/14 22:30:44 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 161,171 ****
   *	  pathlist any old paths that are dominated by new_path --- that is,
   *	  new_path is both cheaper and at least as well ordered.
   *
   *	  NOTE: discarded Path objects are immediately pfree'd to reduce planner
   *	  memory consumption.  We dare not try to free the substructure of a Path,
   *	  since much of it may be shared with other Paths or the query tree itself;
   *	  but just recycling discarded Path nodes is a very useful savings in
!  *	  a large join tree.
   *
   * 'parent_rel' is the relation entry to which the path corresponds.
   * 'new_path' is a potential path for parent_rel.
--- 161,177 ----
   *	  pathlist any old paths that are dominated by new_path --- that is,
   *	  new_path is both cheaper and at least as well ordered.
   *
+  *	  The pathlist is kept sorted by TOTAL_COST metric, with cheaper paths
+  *	  at the front.  No code depends on that for correctness; it's simply
+  *	  a speed hack within this routine.  Doing it that way makes it more
+  *	  likely that we will reject an inferior path after a few comparisons,
+  *	  rather than many comparisons.
+  *
   *	  NOTE: discarded Path objects are immediately pfree'd to reduce planner
   *	  memory consumption.  We dare not try to free the substructure of a Path,
   *	  since much of it may be shared with other Paths or the query tree itself;
   *	  but just recycling discarded Path nodes is a very useful savings in
!  *	  a large join tree.  We can recycle the List nodes of pathlist, too.
   *
   * 'parent_rel' is the relation entry to which the path corresponds.
   * 'new_path' is a potential path for parent_rel.
***************
*** 177,182 ****
--- 183,189 ----
  {
  	bool		accept_new = true;		/* unless we find a superior old
  										 * path */
+ 	List	   *insert_after = NIL;		/* where to insert new item */
  	List	   *p1_prev = NIL;
  	List	   *p1;
  
***************
*** 185,191 ****
  	 * possible for more than one old path to be tossed out because
  	 * new_path dominates it.
  	 */
! 	foreach(p1, parent_rel->pathlist)
  	{
  		Path	   *old_path = (Path *) lfirst(p1);
  		bool		remove_old = false; /* unless new proves superior */
--- 192,199 ----
  	 * possible for more than one old path to be tossed out because
  	 * new_path dominates it.
  	 */
! 	p1 = parent_rel->pathlist;			/* cannot use foreach here */
! 	while (p1 != NIL)
  	{
  		Path	   *old_path = (Path *) lfirst(p1);
  		bool		remove_old = false; /* unless new proves superior */
***************
*** 197,209 ****
  		 * If the two paths compare differently for startup and total
  		 * cost, then we want to keep both, and we can skip the (much
  		 * slower) comparison of pathkeys.	If they compare the same,
! 		 * proceed with the pathkeys comparison.  Note this test relies on
! 		 * the fact that compare_path_costs will only return 0 if both
  		 * costs are equal (and, therefore, there's no need to call it
  		 * twice in that case).
  		 */
  		if (costcmp == 0 ||
! 		 costcmp == compare_path_costs(new_path, old_path, STARTUP_COST))
  		{
  			switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
  			{
--- 205,218 ----
  		 * If the two paths compare differently for startup and total
  		 * cost, then we want to keep both, and we can skip the (much
  		 * slower) comparison of pathkeys.	If they compare the same,
! 		 * proceed with the pathkeys comparison.  Note: this test relies
! 		 * on the fact that compare_path_costs will only return 0 if both
  		 * costs are equal (and, therefore, there's no need to call it
  		 * twice in that case).
  		 */
  		if (costcmp == 0 ||
! 			costcmp == compare_path_costs(new_path, old_path,
! 										  STARTUP_COST))
  		{
  			switch (compare_pathkeys(new_path->pathkeys, old_path->pathkeys))
  			{
***************
*** 234,247 ****
  		 */
  		if (remove_old && parent_rel->pruneable)
  		{
  			if (p1_prev)
! 				lnext(p1_prev) = lnext(p1);
  			else
! 				parent_rel->pathlist = lnext(p1);
  			pfree(old_path);
  		}
  		else
  			p1_prev = p1;
  
  		/*
  		 * If we found an old path that dominates new_path, we can quit
--- 243,266 ----
  		 */
  		if (remove_old && parent_rel->pruneable)
  		{
+ 			List   *p1_next = lnext(p1);
+ 
  			if (p1_prev)
! 				lnext(p1_prev) = p1_next;
  			else
! 				parent_rel->pathlist = p1_next;
  			pfree(old_path);
+ 			pfree(p1);			/* this is why we can't use foreach */
+ 			p1 = p1_next;
  		}
  		else
+ 		{
+ 			/* new belongs after this old path if it has cost >= old's */
+ 			if (costcmp >= 0)
+ 				insert_after = p1;
  			p1_prev = p1;
+ 			p1 = lnext(p1);
+ 		}
  
  		/*
  		 * If we found an old path that dominates new_path, we can quit
***************
*** 254,265 ****
  
  	if (accept_new)
  	{
! 		/* Accept the path */
! 		parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
  	}
  	else
  	{
! 		/* Reject and recycle the path */
  		pfree(new_path);
  	}
  }
--- 273,287 ----
  
  	if (accept_new)
  	{
! 		/* Accept the new path: insert it at proper place in pathlist */
! 		if (insert_after)
! 			lnext(insert_after) = lcons(new_path, lnext(insert_after));
! 		else
! 			parent_rel->pathlist = lcons(new_path, parent_rel->pathlist);
  	}
  	else
  	{
! 		/* Reject and recycle the new path */
  		pfree(new_path);
  	}
  }
***************
*** 296,304 ****
   * 'index' is an index on 'rel'
   * 'restriction_clauses' is a list of RestrictInfo nodes
   *			to be used as index qual conditions in the scan.
   * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
!  *			if the caller expects a specific scan direction,
!  *			or NoMovementScanDirection if the caller is willing to accept
   *			an unordered index.
   *
   * Returns the new path node.
--- 318,326 ----
   * 'index' is an index on 'rel'
   * 'restriction_clauses' is a list of RestrictInfo nodes
   *			to be used as index qual conditions in the scan.
+  * 'pathkeys' describes the ordering of the path.
   * 'indexscandir' is ForwardScanDirection or BackwardScanDirection
!  *			for an ordered index, or NoMovementScanDirection for
   *			an unordered index.
   *
   * Returns the new path node.
***************
*** 308,313 ****
--- 330,336 ----
  				  RelOptInfo *rel,
  				  IndexOptInfo *index,
  				  List *restriction_clauses,
+ 				  List *pathkeys,
  				  ScanDirection indexscandir)
  {
  	IndexPath  *pathnode = makeNode(IndexPath);
***************
*** 315,339 ****
  
  	pathnode->path.pathtype = T_IndexScan;
  	pathnode->path.parent = rel;
! 
! 	pathnode->path.pathkeys = build_index_pathkeys(root, rel, index,
! 												   indexscandir);
! 	if (pathnode->path.pathkeys == NIL)
! 	{
! 		/* No ordering available from index, is that OK? */
! 		if (!ScanDirectionIsNoMovement(indexscandir))
! 			elog(ERROR, "create_index_path: failed to create ordered index scan");
! 	}
! 	else
! 	{
! 
! 		/*
! 		 * The index is ordered, and build_index_pathkeys defaulted to
! 		 * forward scan, so make sure we mark the pathnode properly.
! 		 */
! 		if (ScanDirectionIsNoMovement(indexscandir))
! 			indexscandir = ForwardScanDirection;
! 	}
  
  	indexquals = get_actual_clauses(restriction_clauses);
  	/* expand special operators to indexquals the executor can handle */
--- 338,344 ----
  
  	pathnode->path.pathtype = T_IndexScan;
  	pathnode->path.parent = rel;
! 	pathnode->path.pathkeys = pathkeys;
  
  	indexquals = get_actual_clauses(restriction_clauses);
  	/* expand special operators to indexquals the executor can handle */
Index: include/nodes/relation.h
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/include/nodes/relation.h,v
retrieving revision 1.51
retrieving revision 1.52
diff -c -r1.51 -r1.52
*** include/nodes/relation.h	2000/12/12 23:33:32	1.51
--- include/nodes/relation.h	2000/12/14 22:30:44	1.52
***************
*** 7,13 ****
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: relation.h,v 1.51 2000/12/12 23:33:32 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 7,13 ----
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: relation.h,v 1.52 2000/12/14 22:30:44 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 504,511 ****
--- 504,519 ----
  	Oid			left_sortop;	/* leftside sortop needed for mergejoin */
  	Oid			right_sortop;	/* rightside sortop needed for mergejoin */
  
+ 	/* cache space for mergeclause processing; NIL if not yet set */
+ 	List	   *left_pathkey;	/* canonical pathkey for left side */
+ 	List	   *right_pathkey;	/* canonical pathkey for right side */
+ 
  	/* valid if clause is hashjoinable, else InvalidOid: */
  	Oid			hashjoinoperator;		/* copy of clause operator */
+ 
+ 	/* cache space for hashclause processing; -1 if not yet set */
+ 	Selectivity	left_dispersion;	/* dispersion of left side */
+ 	Selectivity	right_dispersion;	/* dispersion of right side */
  } RestrictInfo;
  
  /*
Index: include/optimizer/pathnode.h
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/include/optimizer/pathnode.h,v
retrieving revision 1.31
retrieving revision 1.32
diff -c -r1.31 -r1.32
*** include/optimizer/pathnode.h	2000/11/12 00:37:01	1.31
--- include/optimizer/pathnode.h	2000/12/14 22:30:45	1.32
***************
*** 7,13 ****
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: pathnode.h,v 1.31 2000/11/12 00:37:01 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 7,13 ----
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: pathnode.h,v 1.32 2000/12/14 22:30:45 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 30,35 ****
--- 30,36 ----
  extern IndexPath *create_index_path(Query *root, RelOptInfo *rel,
  				  IndexOptInfo *index,
  				  List *restriction_clauses,
+ 				  List *pathkeys,
  				  ScanDirection indexscandir);
  extern TidPath *create_tidscan_path(RelOptInfo *rel, List *tideval);
  extern AppendPath *create_append_path(RelOptInfo *rel, List *subpaths);
Index: include/optimizer/paths.h
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/include/optimizer/paths.h,v
retrieving revision 1.48
retrieving revision 1.49
diff -c -r1.48 -r1.49
*** include/optimizer/paths.h	2000/09/29 18:21:40	1.48
--- include/optimizer/paths.h	2000/12/14 22:30:45	1.49
***************
*** 8,14 ****
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: paths.h,v 1.48 2000/09/29 18:21:40 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
--- 8,14 ----
   * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
   * Portions Copyright (c) 1994, Regents of the University of California
   *
!  * $Id: paths.h,v 1.49 2000/12/14 22:30:45 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
***************
*** 34,42 ****
   * indxpath.c
   *	  routines to generate index paths
   */
! extern void create_index_paths(Query *root, RelOptInfo *rel, List *indices,
! 				   List *restrictinfo_list,
! 				   List *joininfo_list);
  extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam,
  				   bool indexkey_on_left);
  extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
--- 34,40 ----
   * indxpath.c
   *	  routines to generate index paths
   */
! extern void create_index_paths(Query *root, RelOptInfo *rel, List *indices);
  extern Oid indexable_operator(Expr *clause, Oid opclass, Oid relam,
  				   bool indexkey_on_left);
  extern List *extract_or_indexqual_conditions(RelOptInfo *rel,
***************
*** 97,102 ****
--- 95,103 ----
  extern List *canonicalize_pathkeys(Query *root, List *pathkeys);
  extern PathKeysComparison compare_pathkeys(List *keys1, List *keys2);
  extern bool pathkeys_contained_in(List *keys1, List *keys2);
+ extern PathKeysComparison compare_noncanonical_pathkeys(List *keys1,
+ 														List *keys2);
+ extern bool noncanonical_pathkeys_contained_in(List *keys1, List *keys2);
  extern Path *get_cheapest_path_for_pathkeys(List *paths, List *pathkeys,
  							   CostSelector cost_criterion);
  extern Path *get_cheapest_fractional_path_for_pathkeys(List *paths,
***************
*** 105,119 ****
  extern List *build_index_pathkeys(Query *root, RelOptInfo *rel,
  					 IndexOptInfo *index,
  					 ScanDirection scandir);
! extern List *build_join_pathkeys(List *outer_pathkeys,
! 					List *join_rel_tlist,
! 					List *equi_key_list);
  extern List *make_pathkeys_for_sortclauses(List *sortclauses,
  							  List *tlist);
! extern List *find_mergeclauses_for_pathkeys(List *pathkeys,
! 							   List *restrictinfos);
  extern List *make_pathkeys_for_mergeclauses(Query *root,
  											List *mergeclauses,
  											RelOptInfo *rel);
  
  #endif	 /* PATHS_H */
--- 106,128 ----
  extern List *build_index_pathkeys(Query *root, RelOptInfo *rel,
  					 IndexOptInfo *index,
  					 ScanDirection scandir);
! extern List *build_join_pathkeys(Query *root,
! 								 RelOptInfo *joinrel,
! 								 List *outer_pathkeys);
  extern List *make_pathkeys_for_sortclauses(List *sortclauses,
  							  List *tlist);
! extern List *find_mergeclauses_for_pathkeys(Query *root,
! 											List *pathkeys,
! 											List *restrictinfos);
  extern List *make_pathkeys_for_mergeclauses(Query *root,
  											List *mergeclauses,
  											RelOptInfo *rel);
+ extern int	pathkeys_useful_for_merging(Query *root,
+ 										RelOptInfo *rel,
+ 										List *pathkeys);
+ extern int	pathkeys_useful_for_ordering(Query *root, List *pathkeys);
+ extern List *truncate_useless_pathkeys(Query *root,
+ 									   RelOptInfo *rel,
+ 									   List *pathkeys);
  
  #endif	 /* PATHS_H */
Index: test/regress/expected/join.out
===================================================================
RCS file: /home/projects/pgsql/cvsroot/pgsql/src/test/regress/expected/join.out,v
retrieving revision 1.8
retrieving revision 1.9
diff -c -r1.8 -r1.9
*** test/regress/expected/join.out	2000/11/06 18:10:24	1.8
--- test/regress/expected/join.out	2000/12/14 22:30:45	1.9
***************
*** 1796,1808 ****
       | 4 | 1 | four  |   
       | 5 | 0 | five  | -5
       | 5 | 0 | five  | -5
-      |   |   |       |   
-      |   |   |       |  0
       | 6 | 6 | six   |   
       | 7 | 7 | seven |   
       | 8 | 8 | eight |   
       |   |   | null  |   
       |   | 0 | zero  |   
  (15 rows)
  
  SELECT '' AS "xxx", *
--- 1796,1808 ----
       | 4 | 1 | four  |   
       | 5 | 0 | five  | -5
       | 5 | 0 | five  | -5
       | 6 | 6 | six   |   
       | 7 | 7 | seven |   
       | 8 | 8 | eight |   
       |   |   | null  |   
       |   | 0 | zero  |   
+      |   |   |       |   
+      |   |   |       |  0
  (15 rows)
  
  SELECT '' AS "xxx", *
***************
*** 1817,1829 ****
       | 4 | 1 | four  |   
       | 5 | 0 | five  | -5
       | 5 | 0 | five  | -5
-      |   |   |       |   
-      |   |   |       |  0
       | 6 | 6 | six   |   
       | 7 | 7 | seven |   
       | 8 | 8 | eight |   
       |   |   | null  |   
       |   | 0 | zero  |   
  (15 rows)
  
  SELECT '' AS "xxx", *
--- 1817,1829 ----
       | 4 | 1 | four  |   
       | 5 | 0 | five  | -5
       | 5 | 0 | five  | -5
       | 6 | 6 | six   |   
       | 7 | 7 | seven |   
       | 8 | 8 | eight |   
       |   |   | null  |   
       |   | 0 | zero  |   
+      |   |   |       |   
+      |   |   |       |  0
  (15 rows)
  
  SELECT '' AS "xxx", *

pgsql-patches by date

Next:From: Michael DavisDate: 2000-12-16 00:05:48
Subject: Bug fix
Previous:From: Bruce MomjianDate: 2000-12-15 19:15:35
Subject: Re: 7.1: to_char() bug fix

Privacy Policy | About PostgreSQL
Copyright © 1996-2014 The PostgreSQL Global Development Group