Re: Temp tables...

From: "Greg Patnude" <gpatnude(at)hotmail(dot)com>
To: pgsql-general(at)postgresql(dot)org
Subject: Re: Temp tables...
Date: 2005-07-13 15:48:18
Message-ID: db3cn2$29g6$1@news.hub.org
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-general

I am "TOP POSTING" intentionally --

Thanks Mike... Very informative -- I never realized that child (INHERITED)
tables do NOT inherit the indexes from their parent... that might be part of
the solution.... I duplicated the parents index on the child table -- the
function still takes 4672 ms to complete... Based on your evaluation -- I
now believe that the issue is in the UPDATE clause --

-- THIS IS AN EXAMPLE OF THE ACTUAL UPDATE... WHICH GETS CALLED 50 TIMES...

EXPLAIN ANALYZE UPDATE ONLY l_store_hours SET amount = amount * 1.00 WHERE
l_store_hours.id = 14511;

-- OUTPUT...
"Nested Loop (cost=0.00..6.05 rows=1 width=52) (actual time=67.487..67.493
rows=1 loops=1)"
" -> Index Scan using l_store_hours_pkey on l_store_hours lh
(cost=0.00..3.01 rows=1 width=52) (actual time=54.674..54.675 rows=1
loops=1)"
" Index Cond: (14511 = id)"
" -> Index Scan using l_store_hours_pkey on l_store_hours
(cost=0.00..3.01 rows=1 width=4) (actual time=0.006..0.009 rows=1 loops=1)"
" Index Cond: (id = 14511)"
"Total runtime: 87.803 ms"
""
"Index Scan using l_store_hours_pkey on l_store_hours (cost=0.00..3.02
rows=1 width=58) (actual time=0.032..0.034 rows=1 loops=1)"
" Index Cond: (id = 14511)"
"Total runtime: 0.500 ms"

-- THE TABLES, CHILD TABLE, VIEW, AND FUNCTION...
CREATE TABLE l_store_hours (

id serial PRIMARY KEY,
l_store_id int4 NOT NULL DEFAULT 0,
l_activity_type int4 NOT NULL REFERENCES l_activity_type(id),
week_code int4 NOT NULL DEFAULT 0,
year_code int4 NOT NULL DEFAULT 0,
amount numeric(10,2) DEFAULT 0.00,
create_dt timestamp NOT NULL DEFAULT now(),
change_dt timestamp NOT NULL DEFAULT now(),
change_id int4 DEFAULT 0,
active_flag bool DEFAULT true

) WITH OIDS;

CREATE INDEX idx_store_hours ON l_store_hours USING btree (id, l_store_id,
year_code, week_code);
ALTER TABLE l_store_hours CLUSTER ON idx_store_hours;

-- THE CHILD (INHERITED) TABLE...
CREATE TABLE l_store_hours_history (

hist_id serial PRIMARY KEY,
hist_dt timestamp NOT NULL DEFAULT now()

) INHERITS (l_store_hours) WITH OIDS;

CREATE INDEX idx_store_hours_history ON l_store_hours_history USING btree
(id, l_store_id, year_code, week_code);
ALTER TABLE l_store_hours_history CLUSTER ON idx_store_hours_history;

-- THE UPDATE RULE ON THE PARENT TABLE...
CREATE OR REPLACE RULE l_store_hours_history_upd AS ON UPDATE TO
l_store_hours DO INSERT INTO l_store_hours_history (SELECT * FROM ONLY
l_store_hours LH WHERE LH.id = old.id);

-- THE VIEW...
CREATE OR REPLACE VIEW v_storehours AS
SELECT ls.id, ls.l_activity_type, ls.l_store_id, ls.week_code, ls.year_code,
ls.amount, ls.create_dt, ls.change_dt, ls.change_id, ls.active_flag,
COALESCE(lsh.amount, ls.amount) AS previous,
COALESCE(lsh.hist_id, 0) AS history, lsh.hist_dt
FROM ONLY l_store_hours ls
FULL JOIN l_store_hours_history lsh ON ls.id = lsh.id
ORDER BY ls.year_code, ls.week_code;

/*

This function updates the block of expected employee hours (l_store_hours)
based on the current projected sales figures

*/
CREATE OR REPLACE FUNCTION l_updatehoursonsales(int4, int4, int4)
RETURNS bool AS
$BODY$

DECLARE ROW v_storesales%ROWTYPE;
DECLARE F1 real;
DECLARE CUR t_updhours%ROWTYPE;
DECLARE STORE ALIAS FOR $1;
DECLARE WEEK ALIAS FOR $2;
DECLARE YEAR ALIAS FOR $3;
DECLARE C RECORD;
BEGIN

RAISE LOG 'STARTING: l_updatehoursonsales for store: %', $1;
RAISE LOG 'STARTING: l_updatehoursonsales for week: %', $2;
RAISE LOG 'STARTING: l_updatehoursonsales for year: %', $3;

-- RAISE LOG 'CREATING TEMP TABLE AS SELECT...';
CREATE TEMPORARY TABLE tmphours AS

SELECT LT.type_desc, 0 AS hist_id, LSH.* FROM ONLY l_store_hours LSH
FULL OUTER JOIN l_activity_type LT ON LSH.l_activity_type = LT.id
WHERE LSH.l_store_id = $1 AND LSH.week_code = $2
AND LSH.year_code = $3 ORDER BY l_activity_type;

-- UPDATE THE TEMP TABLE WITH THE LEAST HISTORY ID FROM THE HISTORY
TABLE...
-- RAISE LOG 'SETTING HISTORY IDs...';
UPDATE tmphours SET hist_id = (

SELECT A.hist_id FROM (

SELECT MIN(hist_id) AS hist_id, id FROM ONLY l_store_hours_history LSH
WHERE tmphours.l_store_id = LSH.l_store_id
AND tmphours.year_code = LSH.year_code
AND tmphours.week_code = LSH.week_code
AND tmphours.l_activity_type = LSH.l_activity_type
AND tmphours.id = LSH.id
GROUP BY 2

)

AS A);

-- UPDATE THE AMOUNTS (HOURS) WITH THE PRESERVED VALUES...
-- RAISE LOG 'UPDATING tmphours --> Setting amount...';
UPDATE tmphours SET amount = LSH.amount FROM ONLY l_store_hours_history LSH
WHERE tmphours.hist_id = LSH.hist_id;

-- GET THE CURRENT SALES FIGURES FROM THE VIEW...
FOR ROW IN SELECT VSS.* FROM v_storesales VSS
WHERE VSS.l_store_id = $1 AND VSS.week_code = $2 AND VSS.year_code = $3
ORDER BY history LIMIT 1
LOOP

-- RAISE LOG 'LOOPING FOR UPDATE... %', ROW.id;
IF (ROW.amount > ROW.previous) THEN

SELECT (1 + (((ROW.amount::float / ROW.previous::float)::float - 1) *
0.8))::NUMERIC(10, 2) INTO F1;
RAISE LOG 'USING FORMULA (a > b): %', F1;

END IF;

IF (ROW.amount < ROW.previous) THEN

SELECT (ROW.amount::float / ROW.previous::float)::NUMERIC(10, 2) INTO F1;
RAISE LOG 'USING FORMULA: (a < b) %', F1;

END IF;

IF (ROW.amount = ROW.previous) THEN

SELECT 1.00::NUMERIC(10, 2) INTO F1;
RAISE LOG 'USING FORMULA: (a = b)%', F1;

END IF;

-- LOOP THROUGH THE TEMP TABLE AND UPDATE l_store_hours...
FOR C IN SELECT * FROM tmphours LOOP

UPDATE ONLY l_store_hours SET amount = C.amount * F1 WHERE
l_store_hours.id = C.id;

END LOOP;

END LOOP;

-- CLEAN UP AFTER YOURSELF...
RAISE LOG 'LOOP COMPLETE! Dropping TEMPORARY TABLE tmphours...';
DROP TABLE tmphours;

-- AND RETURN...
RETURN TRUE;

END;

$BODY$
LANGUAGE 'plpgsql' VOLATILE;

Regards,

Greg Patnude - Manager, Dynamic Applications Group

Data Mosaics, Inc.

2406 South Dishman-Mica Road / Suite # 6

Spokane Valley, WA 99206-6429

VOICE: (866) 904-DMSF

FAX: (509) 928-4236

"Michael Fuhr" <mike(at)fuhr(dot)org> wrote in message
news:20050713005700(dot)GA87233(at)winnie(dot)fuhr(dot)org(dot)(dot)(dot)
> On Tue, Jul 12, 2005 at 10:52:24AM -0700, Greg Patnude wrote:
>>
>> Performing an update to an inherited table system from inside of a stored
>> procedure (PLPGSQL) seems to be unusually sluggish...
>
> Is the update slower when done inside a function than when doing
> it directly (e.g., from psql)? That is, is the use of a function
> relevant, or is the update equally slow in any case? Could you
> post the EXPLAIN ANALYZE output for the update?
>
> The message subject is "Temp tables." Are you using temporary
> tables, and if so, are you seeing different behavior with temporary
> tables than with "real" tables? Again, is that relevant to the
> problem?
>
>> Does anyone have a faster solution ? I am updating 50 records and it
>> takes approximately 4.375 seconds + or -....
>>
>> The inherited table has an ON INSERT DO INSTEAD and there are
>> approximately
>> 2 million rows in the inherited table structure...
>
> Could you post the table definitions, including all indexes, rules,
> etc.? Do all the child tables have indexes on the column(s) used
> to restrict the update? As the documentation states, indexes aren't
> inherited, so you might need to create additional indexes on the
> children, indexes that you'd think would be redundant. Example:
>
> CREATE TABLE parent (id serial PRIMARY KEY);
> CREATE TABLE child (x integer) INHERITS (parent);
>
> INSERT INTO child (x) SELECT * FROM generate_series(1, 100000);
>
> ANALYZE parent;
> ANALYZE child;
>
> EXPLAIN ANALYZE UPDATE child SET id = id WHERE id BETWEEN 1 AND 50;
> QUERY PLAN
> -------------------------------------------------------------------------------------------------------
> Seq Scan on child (cost=0.00..1991.00 rows=41 width=14) (actual
> time=0.059..307.234 rows=50 loops=1)
> Filter: ((id >= 1) AND (id <= 50))
> Total runtime: 309.350 ms
> (3 rows)
>
> EXPLAIN ANALYZE UPDATE parent SET id = id WHERE id BETWEEN 1 AND 50;
> QUERY PLAN
> -----------------------------------------------------------------------------------------------------------------------------
> Append (cost=0.00..2006.37 rows=52 width=14) (actual
> time=304.838..306.252 rows=50 loops=1)
> -> Index Scan using parent_pkey on parent (cost=0.00..15.37 rows=11
> width=10) (actual time=0.110..0.110 rows=0 loops=1)
> Index Cond: ((id >= 1) AND (id <= 50))
> -> Seq Scan on child parent (cost=0.00..1991.00 rows=41 width=14)
> (actual time=304.705..305.619 rows=50 loops=1)
> Filter: ((id >= 1) AND (id <= 50))
> Total runtime: 307.935 ms
> (6 rows)
>
> Notice the sequential scans on child, even though we have an index
> on parent.id, a column that child inherits. We need to create an
> index on child.id as well:
>
> CREATE INDEX child_id_idx ON child (id);
>
> EXPLAIN ANALYZE UPDATE child SET id = id WHERE id BETWEEN 1 AND 50;
> QUERY PLAN
> -----------------------------------------------------------------------------------------------------------------------
> Index Scan using child_id_idx on child (cost=0.00..3.65 rows=41 width=14)
> (actual time=0.369..1.371 rows=50 loops=1)
> Index Cond: ((id >= 1) AND (id <= 50))
> Total runtime: 6.100 ms
> (3 rows)
>
> EXPLAIN ANALYZE UPDATE parent SET id = id WHERE id BETWEEN 1 AND 50;
> QUERY PLAN
> ------------------------------------------------------------------------------------------------------------------------------------
> Append (cost=0.00..19.02 rows=52 width=14) (actual time=0.119..1.895
> rows=50 loops=1)
> -> Index Scan using parent_pkey on parent (cost=0.00..15.37 rows=11
> width=10) (actual time=0.037..0.037 rows=0 loops=1)
> Index Cond: ((id >= 1) AND (id <= 50))
> -> Index Scan using child_id_idx on child parent (cost=0.00..3.65
> rows=41 width=14) (actual time=0.066..1.320 rows=50 loops=1)
> Index Cond: ((id >= 1) AND (id <= 50))
> Total runtime: 7.820 ms
> (6 rows)
>
> If that's not the problem, then do other tables have foreign key
> references to the table(s) you're updating? If so, then you might
> need indexes on the foreign key columns in the referring tables.
>
> --
> Michael Fuhr
> http://www.fuhr.org/~mfuhr/
>
> ---------------------------(end of broadcast)---------------------------
> TIP 9: In versions below 8.0, the planner will ignore your desire to
> choose an index scan if your joining column's datatypes do not
> match
>

In response to

Browse pgsql-general by date

  From Date Subject
Next Message Karsten Hilbert 2005-07-13 15:56:03 chosing a database name
Previous Message Tom Lane 2005-07-13 15:28:28 Re: Strange memory behaviour with PGreset() ...