diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c
index 65a34a2..345ed17 100644
*** a/src/backend/optimizer/path/allpaths.c
--- b/src/backend/optimizer/path/allpaths.c
*************** remove_unused_subquery_outputs(Query *su
*** 3306,3311 ****
--- 3306,3315 ----
  	/*
  	 * If subquery has regular DISTINCT (not DISTINCT ON), we're wasting our
  	 * time: all its output columns must be used in the distinctClause.
+ 	 * (Note: the latter is not necessarily true anymore, because planner.c
+ 	 * might have found some of the DISTINCT columns to be redundant and
+ 	 * dropped them.  But they'd still have sortgroupref markings, so unless
+ 	 * we improve the heuristic below, we would not recognize them as unused.)
  	 */
  	if (subquery->distinctClause && !subquery->hasDistinctOn)
  		return;
*************** remove_unused_subquery_outputs(Query *su
*** 3348,3358 ****
  
  		/*
  		 * If it has a sortgroupref number, it's used in some sort/group
! 		 * clause so we'd better not remove it.  Also, don't remove any
! 		 * resjunk columns, since their reason for being has nothing to do
! 		 * with anybody reading the subquery's output.  (It's likely that
! 		 * resjunk columns in a sub-SELECT would always have ressortgroupref
! 		 * set, but even if they don't, it seems imprudent to remove them.)
  		 */
  		if (tle->ressortgroupref || tle->resjunk)
  			continue;
--- 3352,3365 ----
  
  		/*
  		 * If it has a sortgroupref number, it's used in some sort/group
! 		 * clause so we'd better not remove it.  (This is a conservative
! 		 * heuristic, since it might not actually be used by any surviving
! 		 * sort/group clause; but we don't bother to expend the cycles needed
! 		 * for a more accurate test.)  Also, don't remove any resjunk columns,
! 		 * since their reason for being has nothing to do with anybody reading
! 		 * the subquery's output.  (It's likely that resjunk columns in a
! 		 * sub-SELECT would always have ressortgroupref set, but even if they
! 		 * don't, it seems imprudent to remove them.)
  		 */
  		if (tle->ressortgroupref || tle->resjunk)
  			continue;
diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c
index 008492b..a9ccfed 100644
*** a/src/backend/optimizer/plan/planner.c
--- b/src/backend/optimizer/plan/planner.c
*************** static double preprocess_limit(PlannerIn
*** 125,130 ****
--- 125,133 ----
  				 int64 *offset_est, int64 *count_est);
  static bool limit_needed(Query *parse);
  static void remove_useless_groupby_columns(PlannerInfo *root);
+ static void remove_useless_distinct_columns(PlannerInfo *root);
+ static List *remove_dependent_grouping_clauses(PlannerInfo *root,
+ 								  List *clauselist);
  static List *preprocess_groupclause(PlannerInfo *root, List *force);
  static List *extract_rollup_sets(List *groupingSets);
  static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
*************** subquery_planner(PlannerGlobal *glob, Qu
*** 965,970 ****
--- 968,976 ----
  	/* Remove any redundant GROUP BY columns */
  	remove_useless_groupby_columns(root);
  
+ 	/* Likewise for redundant DISTINCT columns */
+ 	remove_useless_distinct_columns(root);
+ 
  	/*
  	 * If we have any outer joins, try to reduce them to plain inner joins.
  	 * This step is most easily done after we've done expression
*************** static void
*** 2941,2967 ****
  remove_useless_groupby_columns(PlannerInfo *root)
  {
  	Query	   *parse = root->parse;
- 	Bitmapset **groupbyattnos;
- 	Bitmapset **surplusvars;
- 	ListCell   *lc;
- 	int			relid;
- 
- 	/* No chance to do anything if there are less than two GROUP BY items */
- 	if (list_length(parse->groupClause) < 2)
- 		return;
  
  	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
  	if (parse->groupingSets)
  		return;
  
  	/*
! 	 * Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
! 	 * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
! 	 * that are GROUP BY items.
  	 */
! 	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
! 										   (list_length(parse->rtable) + 1));
! 	foreach(lc, parse->groupClause)
  	{
  		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
  		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
--- 2947,3022 ----
  remove_useless_groupby_columns(PlannerInfo *root)
  {
  	Query	   *parse = root->parse;
  
  	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
  	if (parse->groupingSets)
  		return;
  
+ 	parse->groupClause =
+ 		remove_dependent_grouping_clauses(root, parse->groupClause);
+ }
+ 
+ /*
+  * remove_useless_distinct_columns
+  *		Like remove_useless_groupby_columns, but for the DISTINCT clause
+  *
+  * If we have both a multi-member GROUP BY clause and a multi-member DISTINCT
+  * clause, this will do a lot of the same catalog lookup work that
+  * remove_useless_groupby_columns already did.  That seems like an unlikely
+  * case, so for now we don't worry about it, but eventually it might be good
+  * to refactor to avoid that.
+  */
+ static void
+ remove_useless_distinct_columns(PlannerInfo *root)
+ {
+ 	Query	   *parse = root->parse;
+ 
  	/*
! 	 * Don't try to remove anything from a DISTINCT ON clause.  For this case,
! 	 * the distinctClauses are closely entwined with the ORDER BY clause, so
! 	 * we'd better not meddle with them.  You might think that we can just
! 	 * apply the same optimizations to the ORDER BY too, but we can't since
! 	 * removing an item there could affect the order of the query results.
  	 */
! 	if (parse->hasDistinctOn)
! 		return;
! 
! 	parse->distinctClause =
! 		remove_dependent_grouping_clauses(root, parse->distinctClause);
! }
! 
! /*
!  * remove_dependent_grouping_clauses
!  *		Process clauselist (a list of SortGroupClause) and remove any items
!  *		that can be proven to be functionally dependent on other items.
!  *		We will have the same grouping semantics without them.
!  *
!  * If any item from the list can be removed, then a new list is built which
!  * does not contain the removed items.  If nothing can be removed then the
!  * original list is returned.
!  */
! static List *
! remove_dependent_grouping_clauses(PlannerInfo *root,
! 								  List *clauselist)
! {
! 	Query	   *parse = root->parse;
! 	Bitmapset **clauseattnos;
! 	Bitmapset **surplusvars;
! 	ListCell   *lc;
! 	int			relid;
! 
! 	/* No chance of removing anything if there are fewer than two items */
! 	if (list_length(clauselist) < 2)
! 		return clauselist;
! 
! 	/*
! 	 * Scan the clauselist to find items that are simple Vars.  Fill
! 	 * clauseattnos[k] with a bitmapset of the column attnos of RTE k that
! 	 * appear in the clauselist.
! 	 */
! 	clauseattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
! 										  (list_length(parse->rtable) + 1));
! 	foreach(lc, clauselist)
  	{
  		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
  		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
*************** remove_useless_groupby_columns(PlannerIn
*** 2971,2979 ****
  		 * Ignore non-Vars and Vars from other query levels.
  		 *
  		 * XXX in principle, stable expressions containing Vars could also be
! 		 * removed, if all the Vars are functionally dependent on other GROUP
! 		 * BY items.  But it's not clear that such cases occur often enough to
! 		 * be worth troubling over.
  		 */
  		if (!IsA(var, Var) ||
  			var->varlevelsup > 0)
--- 3026,3034 ----
  		 * Ignore non-Vars and Vars from other query levels.
  		 *
  		 * XXX in principle, stable expressions containing Vars could also be
! 		 * removed, if all the Vars are functionally dependent on other items
! 		 * in the clauselist.  But it's not clear that such cases occur often
! 		 * enough to be worth troubling over.
  		 */
  		if (!IsA(var, Var) ||
  			var->varlevelsup > 0)
*************** remove_useless_groupby_columns(PlannerIn
*** 2982,2996 ****
  		/* OK, remember we have this Var */
  		relid = var->varno;
  		Assert(relid <= list_length(parse->rtable));
! 		groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
! 											  var->varattno - FirstLowInvalidHeapAttributeNumber);
  	}
  
  	/*
  	 * Consider each relation and see if it is possible to remove some of its
! 	 * Vars from GROUP BY.  For simplicity and speed, we do the actual removal
! 	 * in a separate pass.  Here, we just fill surplusvars[k] with a bitmapset
! 	 * of the column attnos of RTE k that are removable GROUP BY items.
  	 */
  	surplusvars = NULL;			/* don't allocate array unless required */
  	relid = 0;
--- 3037,3055 ----
  		/* OK, remember we have this Var */
  		relid = var->varno;
  		Assert(relid <= list_length(parse->rtable));
! 		clauseattnos[relid] = bms_add_member(clauseattnos[relid],
! 											 var->varattno - FirstLowInvalidHeapAttributeNumber);
  	}
  
  	/*
  	 * Consider each relation and see if it is possible to remove some of its
! 	 * Vars from the clauselist.  We can do so if they are functionally
! 	 * dependent on other Vars from the same relation that are also in the
! 	 * clauselist (independently of any other relations that are mentioned).
! 	 *
! 	 * For simplicity and speed, we do the actual removal in a separate pass.
! 	 * Here, we just fill surplusvars[k] with a bitmapset of the column attnos
! 	 * of RTE k that are removable clauselist items.
  	 */
  	surplusvars = NULL;			/* don't allocate array unless required */
  	relid = 0;
*************** remove_useless_groupby_columns(PlannerIn
*** 3007,3014 ****
  		if (rte->rtekind != RTE_RELATION)
  			continue;
  
! 		/* Nothing to do unless this rel has multiple Vars in GROUP BY */
! 		relattnos = groupbyattnos[relid];
  		if (bms_membership(relattnos) != BMS_MULTIPLE)
  			continue;
  
--- 3066,3073 ----
  		if (rte->rtekind != RTE_RELATION)
  			continue;
  
! 		/* Nothing to do unless this rel has multiple Vars in clauselist */
! 		relattnos = clauseattnos[relid];
  		if (bms_membership(relattnos) != BMS_MULTIPLE)
  			continue;
  
*************** remove_useless_groupby_columns(PlannerIn
*** 3022,3028 ****
  
  		/*
  		 * If the primary key is a proper subset of relattnos then we have
! 		 * some items in the GROUP BY that can be removed.
  		 */
  		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
  		{
--- 3081,3087 ----
  
  		/*
  		 * If the primary key is a proper subset of relattnos then we have
! 		 * some items in the clauselist that can be removed.
  		 */
  		if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
  		{
*************** remove_useless_groupby_columns(PlannerIn
*** 3044,3058 ****
  	}
  
  	/*
! 	 * If we found any surplus Vars, build a new GROUP BY clause without them.
  	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
  	 * markings, but that's harmless.)
  	 */
  	if (surplusvars != NULL)
  	{
! 		List	   *new_groupby = NIL;
  
! 		foreach(lc, parse->groupClause)
  		{
  			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
  			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
--- 3103,3117 ----
  	}
  
  	/*
! 	 * If we found any surplus Vars, build a new clause list without them.
  	 * (Note: this may leave some TLEs with unreferenced ressortgroupref
  	 * markings, but that's harmless.)
  	 */
  	if (surplusvars != NULL)
  	{
! 		List	   *new_clauselist = NIL;
  
! 		foreach(lc, clauselist)
  		{
  			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
  			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
*************** remove_useless_groupby_columns(PlannerIn
*** 3066,3076 ****
  				var->varlevelsup > 0 ||
  				!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
  							   surplusvars[var->varno]))
! 				new_groupby = lappend(new_groupby, sgc);
  		}
  
! 		parse->groupClause = new_groupby;
  	}
  }
  
  /*
--- 3125,3138 ----
  				var->varlevelsup > 0 ||
  				!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
  							   surplusvars[var->varno]))
! 				new_clauselist = lappend(new_clauselist, sgc);
  		}
  
! 		return new_clauselist;
  	}
+ 
+ 	/* nothing to change, just return the old list */
+ 	return clauselist;
  }
  
  /*
diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out
index f85e913..2976c4b 100644
*** a/src/test/regress/expected/aggregates.out
--- b/src/test/regress/expected/aggregates.out
*************** explain (costs off) select * from t3 gro
*** 1017,1022 ****
--- 1017,1081 ----
     ->  Seq Scan on t3
  (3 rows)
  
+ --
+ -- Test removal of redundant DISTINCT columns
+ --
+ -- Non-primary-key columns can be removed from DISTINCT clause
+ explain (costs off) select distinct a,b,c,d from t1;
+       QUERY PLAN      
+ ----------------------
+  HashAggregate
+    Group Key: a, b
+    ->  Seq Scan on t1
+ (3 rows)
+ 
+ -- No removal can happen if the complete PK is not present in DISTINCT clause
+ explain (costs off) select distinct a,c,d from t1;
+       QUERY PLAN      
+ ----------------------
+  HashAggregate
+    Group Key: a, c, d
+    ->  Seq Scan on t1
+ (3 rows)
+ 
+ -- Test removal across multiple relations
+ explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z
+ from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+                       QUERY PLAN                      
+ ------------------------------------------------------
+  HashAggregate
+    Group Key: t1.a, t1.b, t2.x, t2.y
+    ->  Hash Join
+          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+          ->  Seq Scan on t2
+          ->  Hash
+                ->  Seq Scan on t1
+ (7 rows)
+ 
+ -- Test case where t1 can be optimized but not t2
+ explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z
+ from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+                       QUERY PLAN                      
+ ------------------------------------------------------
+  HashAggregate
+    Group Key: t1.a, t1.b, t2.x, t2.z
+    ->  Hash Join
+          Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b))
+          ->  Seq Scan on t2
+          ->  Hash
+                ->  Seq Scan on t1
+ (7 rows)
+ 
+ -- Ensure we don't remove DISTINCT ON items
+ explain (costs off) select distinct on (a,b,c) d from t1 order by a,b,c,d;
+           QUERY PLAN          
+ ------------------------------
+  Unique
+    ->  Sort
+          Sort Key: a, b, c, d
+          ->  Seq Scan on t1
+ (4 rows)
+ 
  drop table t1;
  drop table t2;
  drop table t3;
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 84c6e9b..fd03559 100644
*** a/src/test/regress/expected/join.out
--- b/src/test/regress/expected/join.out
*************** select d.* from d left join (select * fr
*** 4126,4147 ****
           ->  Seq Scan on d
  (8 rows)
  
! -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
    on d.a = s.id;
!               QUERY PLAN              
! --------------------------------------
!  Merge Right Join
!    Merge Cond: (b.id = d.a)
!    ->  Unique
!          ->  Sort
!                Sort Key: b.id, b.c_id
!                ->  Seq Scan on b
!    ->  Sort
!          Sort Key: d.a
!          ->  Seq Scan on d
! (9 rows)
  
  -- check join removal works when uniqueness of the join condition is enforced
  -- by a UNION
--- 4126,4147 ----
           ->  Seq Scan on d
  (8 rows)
  
! -- similarly, but keying off a DISTINCT clause (again, removal of b.c_id
! -- from the DISTINCT step happens too late for join removal)
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
    on d.a = s.id;
!               QUERY PLAN               
! ---------------------------------------
!  Hash Left Join
!    Hash Cond: (d.a = s.id)
!    ->  Seq Scan on d
!    ->  Hash
!          ->  Subquery Scan on s
!                ->  HashAggregate
!                      Group Key: b.id
!                      ->  Seq Scan on b
! (8 rows)
  
  -- check join removal works when uniqueness of the join condition is enforced
  -- by a UNION
diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql
index 506d044..1ee26ca 100644
*** a/src/test/regress/sql/aggregates.sql
--- b/src/test/regress/sql/aggregates.sql
*************** group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z;
*** 362,367 ****
--- 362,387 ----
  -- Cannot optimize when PK is deferrable
  explain (costs off) select * from t3 group by a,b,c;
  
+ --
+ -- Test removal of redundant DISTINCT columns
+ --
+ -- Non-primary-key columns can be removed from DISTINCT clause
+ explain (costs off) select distinct a,b,c,d from t1;
+ 
+ -- No removal can happen if the complete PK is not present in DISTINCT clause
+ explain (costs off) select distinct a,c,d from t1;
+ 
+ -- Test removal across multiple relations
+ explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z
+ from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+ 
+ -- Test case where t1 can be optimized but not t2
+ explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z
+ from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y;
+ 
+ -- Ensure we don't remove DISTINCT ON items
+ explain (costs off) select distinct on (a,b,c) d from t1 order by a,b,c,d;
+ 
  drop table t1;
  drop table t2;
  drop table t3;
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index b1e05a3..31fd397 100644
*** a/src/test/regress/sql/join.sql
--- b/src/test/regress/sql/join.sql
*************** explain (costs off)
*** 1360,1366 ****
  select d.* from d left join (select * from b group by b.id, b.c_id) s
    on d.a = s.id;
  
! -- similarly, but keying off a DISTINCT clause
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
    on d.a = s.id;
--- 1360,1367 ----
  select d.* from d left join (select * from b group by b.id, b.c_id) s
    on d.a = s.id;
  
! -- similarly, but keying off a DISTINCT clause (again, removal of b.c_id
! -- from the DISTINCT step happens too late for join removal)
  explain (costs off)
  select d.* from d left join (select distinct * from b) s
    on d.a = s.id;
