Slow update/insert process

From: Patrick Hatcher <PHatcher(at)macys(dot)com>
To: <pgsql-performance(at)postgresql(dot)org>
Subject: Slow update/insert process
Date: 2004-10-01 18:14:08
Message-ID: OF3AE712A1.C7740C8F-ON85256F20.0064BEE6-88256F20.006559A1@fds.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

Pg: 7.4.5
RH 7.3
8g Ram
200 g drive space
RAID0+1
Tables vacuum on a nightly basis

The following process below takes 8 hours to run on 90k records and I'm
not sure where to being to look for the bottleneck. This isn't the only
updating on this database that seems to take a long time to complete. Is
there something I should be looking for in my conf settings?

TIA
Patrick

SQL:
---Bring back only selected records to run through the update process.
--Without the function the SQL takes < 10secs to return 90,000 records
SELECT count(pm.pm_delta_function_amazon(upc.keyp_upc,'amazon'))
FROM mdc_upc upc
JOIN public.mdc_products prod ON upc.keyf_products = prod.keyp_products
JOIN public.mdc_price_post_inc price ON prod.keyp_products =
price.keyf_product
JOIN public.mdc_attribute_product ap on ap.keyf_products =
prod.keyp_products and keyf_attribute=22
WHERE
upper(trim(ap.attributevalue)) NOT IN ('ESTEE LAUDER',
'CLINIQUE','ORGINS','PRESCRIPTIVES','LANC?ME','CHANEL','ARAMIS','M.A.C','TAG
HEUER')
AND keyf_producttype<>222
AND prod.action_publish = 1;

Function:

CREATE OR REPLACE FUNCTION pm.pm_delta_function_amazon(int4, "varchar")
RETURNS bool AS
'DECLARE
varkeyf_upc ALIAS FOR $1;
varPassword ALIAS FOR $2;
varRealMD5 varchar;
varDeltaMD5 varchar;
varLastTouchDate date;
varQuery text;
varQuery1 text;
varQueryMD5 text;
varQueryRecord record;
varFuncStatus boolean := false;

BEGIN

-- Check the password
IF varPassword <> \'amazon\' THEN
Return false;
END IF;

-- Get the md5 hash for this product
SELECT into varQueryRecord md5(upc.keyp_upc || prod.description ||
pm.pm_price_post_inc(prod.keyp_products)) AS md5
FROM public.mdc_upc upc
JOIN public.mdc_products prod ON upc.keyf_products =
prod.keyp_products
JOIN public.mdc_price_post_inc price ON price.keyf_product =
prod.keyp_products
WHERE upc.keyp_upc = varkeyf_upc LIMIT 1 ;


IF NOT FOUND THEN
RAISE EXCEPTION \'varRealMD5 is NULL. UPC ID is %\', varkeyf_upc;
ELSE
varRealMD5:=varQueryRecord.md5;
END IF;

-- Check that the product is in the delta table and return its hash for
comparison
SELECT into varQueryRecord md5_hash,last_touch_date
FROM pm.pm_delta_master_amazon
WHERE keyf_upc = varkeyf_upc LIMIT 1;

IF NOT FOUND THEN
-- ADD and exit
INSERT INTO pm.pm_delta_master_amazon
(keyf_upc,status,md5_hash,last_touch_date)
values (varkeyf_upc,\'add\',varRealMD5,CURRENT_DATE);
varFuncStatus:=true;
RETURN varFuncStatus;
ELSE
--Update the record
--- If the hash matches then set the record to HOLD
IF varRealMD5 = varQueryRecord.md5_hash THEN
UPDATE pm.pm_delta_master_amazon
SET status= \'hold\',
last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc AND last_touch_date <> CURRENT_DATE;

varFuncStatus:=true;
ELSE
-- ELSE mark the item as ADD
UPDATE pm.pm_delta_master_amazon
SET status= \'add\',
last_touch_date = CURRENT_DATE
WHERE keyf_upc = varkeyf_upc;
varFuncStatus:=true;
END IF;
END IF;

RETURN varFuncStatus;
END;'
LANGUAGE 'plpgsql' IMMUTABLE;

TableDef
CREATE TABLE pm.pm_delta_master_amazon (
keyf_upc int4 ,
status varchar(6) ,
md5_hash varchar(40) ,
last_touch_date date
)
GO

CREATE INDEX status_idx
ON pm.pm_delta_master_amazon(status)
GO

CONF
--------
# WRITE AHEAD LOG
#---------------------------------------------------------------------------

# - Settings -

#fsync = true # turns forced synchronization on or off
#wal_sync_method = fsync # the default varies across platforms:
# fsync, fdatasync, open_sync, or
open_datasync
wal_buffers = 32 # min 4, 8KB each

# - Checkpoints -

checkpoint_segments = 50 # in logfile segments, min 1, 16MB each
checkpoint_timeout = 600 # range 30-3600, in seconds
#checkpoint_warning = 30 # 0 is off, in seconds
#commit_delay = 0 # range 0-100000, in microseconds
#commit_siblings = 5 # range 1-1000

Patrick Hatcher
Macys.Com

Responses

Browse pgsql-performance by date

  From Date Subject
Next Message Aaron Werman 2004-10-01 19:48:50 Re: Slow update/insert process
Previous Message Josh Berkus 2004-10-01 17:10:40 Re: Caching of Queries