Index: src/backend/nodes/outfuncs.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/nodes/outfuncs.c,v retrieving revision 1.355 diff -c -r1.355 outfuncs.c *** src/backend/nodes/outfuncs.c 21 Mar 2009 00:04:39 -0000 1.355 --- src/backend/nodes/outfuncs.c 26 Mar 2009 15:19:43 -0000 *************** *** 1448,1453 **** --- 1448,1454 ---- _outJoinPathInfo(str, (JoinPath *) node); WRITE_NODE_FIELD(path_hashclauses); + WRITE_INT_FIELD(num_batches); } static void Index: src/backend/optimizer/path/costsize.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v retrieving revision 1.205 diff -c -r1.205 costsize.c *** src/backend/optimizer/path/costsize.c 21 Mar 2009 00:04:39 -0000 1.205 --- src/backend/optimizer/path/costsize.c 26 Mar 2009 15:19:43 -0000 *************** *** 1880,1885 **** --- 1880,1887 ---- &numbatches, &num_skew_mcvs); virtualbuckets = (double) numbuckets *(double) numbatches; + /* mark the path with estimated # of batches */ + path->num_batches = numbatches; /* * Determine bucketsize fraction for inner relation. We use the smallest Index: src/backend/optimizer/plan/createplan.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v retrieving revision 1.256 diff -c -r1.256 createplan.c *** src/backend/optimizer/plan/createplan.c 21 Mar 2009 00:04:39 -0000 1.256 --- src/backend/optimizer/plan/createplan.c 26 Mar 2009 15:19:44 -0000 *************** *** 1910,1915 **** --- 1910,1919 ---- /* We don't want any excess columns in the hashed tuples */ disuse_physical_tlist(inner_plan, best_path->jpath.innerjoinpath); + /* If we expect batching, suppress excess columns in outer tuples too */ + if (best_path->num_batches > 1) + disuse_physical_tlist(outer_plan, best_path->jpath.outerjoinpath); + /* * If there is a single join clause and we can identify the outer * variable as a simple column reference, supply its identity for Index: src/backend/optimizer/util/pathnode.c =================================================================== RCS file: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v retrieving revision 1.150 diff -c -r1.150 pathnode.c *** src/backend/optimizer/util/pathnode.c 27 Feb 2009 00:06:27 -0000 1.150 --- src/backend/optimizer/util/pathnode.c 26 Mar 2009 15:19:44 -0000 *************** *** 1480,1488 **** pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; ! /* A hashjoin never has pathkeys, since its ordering is unpredictable */ pathnode->jpath.path.pathkeys = NIL; pathnode->path_hashclauses = hashclauses; cost_hashjoin(pathnode, root, sjinfo); --- 1480,1499 ---- pathnode->jpath.outerjoinpath = outer_path; pathnode->jpath.innerjoinpath = inner_path; pathnode->jpath.joinrestrictinfo = restrict_clauses; ! /* ! * A hashjoin never has pathkeys, since its output ordering is ! * unpredictable due to possible batching. XXX If the inner relation is ! * small enough, we could instruct the executor that it must not batch, ! * and then we could assume that the output inherits the outer relation's ! * ordering, which might save a sort step. However there is considerable ! * downside if our estimate of the inner relation size is badly off. ! * For the moment we don't risk it. (Note also that if we wanted to take ! * this seriously, joinpath.c would have to consider many more paths for ! * the outer rel than it does now.) ! */ pathnode->jpath.path.pathkeys = NIL; pathnode->path_hashclauses = hashclauses; + /* cost_hashjoin will fill in pathnode->num_batches */ cost_hashjoin(pathnode, root, sjinfo); Index: src/include/nodes/relation.h =================================================================== RCS file: /cvsroot/pgsql/src/include/nodes/relation.h,v retrieving revision 1.170 diff -c -r1.170 relation.h *** src/include/nodes/relation.h 5 Mar 2009 23:06:45 -0000 1.170 --- src/include/nodes/relation.h 26 Mar 2009 15:19:44 -0000 *************** *** 845,850 **** --- 845,851 ---- { JoinPath jpath; List *path_hashclauses; /* join clauses used for hashing */ + int num_batches; /* number of batches expected */ } HashPath; /*