Re: Any thoughts on a better approach to this query?

From: Marc Mamin <M(dot)Mamin(at)intershop(dot)de>
To: "David G(dot) Johnston" <david(dot)g(dot)johnston(at)gmail(dot)com>, "pgsql-general(at)postgresql(dot)org" <pgsql-general(at)postgresql(dot)org>
Subject: Re: Any thoughts on a better approach to this query?
Date: 2015-09-05 12:36:41
Message-ID: B6F6FD62F2624C4C9916AC0175D56D884207288E@jenmbs02.ad.intershop.net
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

>?Formatted query attached in addition to placing it inline. The commentary is inline with the query. Basically I've already solved this problem but was wondering if someone has a different perspective; or simply observations.
>
>TIA,
>
>David J.
>
>/*
>For a given id there are multiple linked values of differing types.
>Types "A" and "B" are important and, if present, should be explicitly assigned.
>It is possible that more than one link is associated with a given type.
>If either A or B is lacking an explicit value it is assigned a value
>from:
>1. any extra As that are present
>2. any extra Bs that are present
>3. any extra non-A/B values that are present
>
>The final result contains values for A and B and
>and array of values for whatever links went unused.
>*/
>WITH demo AS (
> -- A sample record where B needs to be assigned and ends
> -- up using the excess A
> SELECT * FROM (
> VALUES (1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D')
> ) vals (id, link, type)
>)
>, link_allocation_1 AS (
> SELECT id,
>
> -- Grab the first A
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A' LIMIT 1
> ) AS type_a_first_link,
>
> -- Grab the first B
> (SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B' LIMIT 1
> ) AS type_b_first_link,
>
> -- Any additional As and Bs are placed into an array
> -- and appended to an array constructed from all of the non-A/B
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'A'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type = 'B'
> ORDER BY link
> OFFSET 1) ||
> ARRAY(
> SELECT link
> FROM demo
> WHERE demo.id = master.id
> AND type NOT IN ('A','B')
> ORDER BY link) AS unassigned_links
> FROM (SELECT DISTINCT id FROM demo) master
>)
>, allocate_unassigned_links AS (
> SELECT *,
> -- Determine how many allocations from the "extra" array are required
> -- so that we can trim slice them out of the final result
> CASE WHEN type_a_first_link IS NULL
> THEN 1
> ELSE 0 END +
> CASE WHEN type_b_first_link IS NULL
> THEN 1
> ELSE 0
> END AS reassign_count,
> -- A always gets the first extra if needed
> CASE WHEN type_a_first_link IS NULL
> THEN unassigned_links[1]
> ELSE type_a_first_link
> END AS actual_a_link,
> -- B gets the first extra unless A took it in which case it gets the second one
> CASE WHEN type_b_first_link IS NULL THEN
> CASE WHEN type_a_first_link IS NOT NULL
> THEN unassigned_links[1]
> ELSE unassigned_links[2]
> END
> ELSE type_b_first_link
> END AS actual_b_link
> FROM link_allocation_1
>)
>SELECT id,
>-- For A and B flag is the value was pulled from the extras
> type_a_first_link IS NULL AS a_link_is_missing,
> actual_a_link,
> type_b_first_link IS NULL AS b_link_is_missing,
> actual_b_link,
>-- Now slice off the first portion of the extras array based upon the assignment count
> unassigned_links[1+reassign_count:array_length(unassigned_links,1)] AS final_unassigned_links,
>-- output the unsliced array for visual comparison
> unassigned_links AS pre_allocation_unassigned_links
> FROM allocate_unassigned_links
>
>
>Output =>
>id
>a_link_is_missing actual_a_link
>b_link_is_missing actual_b_link
>final_unassigned_links pre_allocation_unassigned_links
>1
>False 1
>True 2
>{3,4} {2,3,4}
>

I would do it this way:

-- helper to remove nulls from arrays
-- could be merged into a array_agg_notnulls aggregate function
create function array_not_nulls (a anyarray) returns anyarray as $$
SELECT array_agg (u) from (select unnest($1) u)foo where u is not null;
$$ language sql immutable strict;

WITH demo AS (
-- A sample record where B needs to be assigned and ends
-- up using the excess A
SELECT * FROM (
VALUES
(1,1,'A'), (1,2,'A'), (1,3,'C'), (1,4,'D'),
(2,1,'e'), (2,2,'B'), (2,3,'C'), (2,4,'D'),
(3,1,'A'), (3,2,'y'), (3,3,'z'), (3,4,'B'),
(4,1,'B'), (4,2,'B'), (4,3,'z'), (4,4,'z'), (4,5,'B'),
(5,1,'x'), (5,2,'y'), (5,3,'z'), (5,4,'q')
) vals (id, link, type)
),
PREP AS (
select
id,
bool_or (type='A') as has_a,
bool_or (type='B') as has_b,
count(case when type='A' then 1 end ) as a_ct,
count(case when type='B' then 1 end ) as b_ct,
array_not_nulls(array_agg(case when type='A' then link end ))as ar_a,
array_not_nulls(array_agg(case when type='B' then link end ))as ar_b,
array_not_nulls(array_agg(case when type NOT IN ('A', 'B') then link end)) as ar_others
from demo
GROUP BY id
)

SELECT
id,
a_ct,
b_ct,
ar_a,
ar_b,
ar_others,
coalesce (ar_a[1], case when b_ct > 1 then ar_b[2] else ar_others[1] end ) as link_a,
coalesce (ar_b[1], case when a_ct > 1 then ar_a[2] when a_ct = 0 then ar_others[2] else ar_others[1] end) as link_b,

-- unused others
case when a_ct + b_ct >=2 then ar_others
else ar_others[3 - (a_ct + b_ct) : array_length(ar_others,1)]
end
||
-- unused A & B
case when has_a AND has_b then ar_a[2:a_ct] || ar_b[2:b_ct]
when a_ct > 2 then ar_a[3:a_ct]
when b_ct > 2 then ar_b[3:b_ct]
end as unused
FROM PREP
order by id

regards,

Marc Mamin

In response to

Responses

Browse pgsql-general by date

  From Date Subject
Next Message Adrian Klaver 2015-09-05 16:38:41 Re: Trouble setting up replication
Previous Message David Steele 2015-09-05 12:32:10 Re: error on online backup using pg_basebackup tool