---------------------------------------------------------------------------- -- Test 1 -- Highest Salary Per Department -- -- Notes: Get the name, department and salary of highest paid department -- member for each department. If the two highest paid employees -- in the department get the same salary show the one with the -- lowest id. ---------------------------------------------------------------------------- create table employees ( id INT primary key, name varchar(30) not null, department varchar(30) not null, salary int not null, check (salary >= 0) ); insert into employees values(1,'Jeff','IT',10000); insert into employees values(2,'Sam','IT',12000); insert into employees values(3,'Richard','Manager',30000); insert into employees values(4,'Ian','Manager',20000); insert into employees values(5,'John','IT',60000); insert into employees values(6,'Matthew','Director',60000); VACUUM ANALYZE employees; -- Normal way. SELECT name,department,salary FROM employees AS e WHERE id = (SELECT id FROM employees WHERE department = e.department ORDER BY salary DESC,id ASC LIMIT 1); -- above query = 498 tps -- With windowing functions. SELECT name,department,salary FROM (SELECT name, department, salary, row_number() OVER (PARTITION BY department ORDER BY salary DESC,id ASC) AS num FROM employees ) AS t WHERE num = 1; -- above query = 578 tps ---------------------------------------------------------------------------- -- Test 2 -- Split times problem -- -- Notes: Find the interval of time between 1 timestamp and the previous -- timestamp. The previous timestamp being the one with the next lowest id -- column value. If there is no previous timestamp show the value NULL. CREATE TABLE tstest ( id SERIAL NOT NULL PRIMARY KEY, timestamp TIMESTAMP NOT NULL ); INSERT INTO tstest (timestamp) VALUES(NOW()); INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; INSERT INTO tstest (timestamp) SELECT MAX(timestamp) + ((random() * 100)::TEXT || ' sec ')::interval FROM tstest; VACUUM ANALYZE tstest; -- Normal way SELECT timestamp - (SELECT timestamp FROM tstest WHERE id < t.id ORDER BY id DESC LIMIT 1) FROM tstest t; -- above query = 336 tps -- With windowing functions. SELECT timestamp - LAG(timestamp,1) OVER(ORDER BY id) FROM tstest; -- above query = 481 tps ------------------------------------------------------------------------------------------ -- Test 3 -- Meter reading problem -- -- Notes: Some testing with slightly larger tables. -- Find the day were the least units were used. This will be the smallest difference -- between that day and the previous meter reading. Note, this does not have to be -- the previous day. It's possible that the meter was not read on some days. CREATE TABLE meter_readings ( date DATE NOT NULL PRIMARY KEY, reading DOUBLE PRECISION NOT NULL ); -- Create inital meter reading of 0 for 1st Jan 2000. INSERT INTO meter_readings (date,reading) VALUES('2000-01-01',0); -- Create 10000 readings for each day after 2000-01-01. INSERT INTO meter_readings (date,reading) SELECT '2000-01-01'::date + i.a,i.a * 10000 + RANDOM() * 10000 FROM generate_series(1,10000) AS i(a); VACUUM ANALYZE meter_readings; SELECT date,reading - (SELECT reading FROM meter_readings WHERE date < p.date ORDER BY date DESC LIMIT 1) AS used FROM meter_readings p ORDER BY used ASC NULLS LAST LIMIT 1; -- above query = 1.3 tps. SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used FROM meter_Readings ORDER BY used ASC NULLS LAST LIMIT 1; -- above query = 8.45 tps. /* Notes: There seems to be a small issue with the above query. david=# explain SELECT date,reading - LAG(reading,1) OVER (ORDER BY date) AS used david-# FROM meter_Readings david-# ORDER BY used ASC NULLS LAST LIMIT 1; QUERY PLAN --------------------------------------------------------------------------------------------- Limit (cost=989.49..989.49 rows=1 width=12) -> Sort (cost=989.49..1014.49 rows=10001 width=12) Sort Key: ((reading - lag(reading, (1)))) -> Window (cost=814.47..939.48 rows=10001 width=12) -> Sort (cost=814.47..839.47 rows=10001 width=12) Sort Key: date -> Seq Scan on meter_readings (cost=0.00..150.01 rows=10001 width=12) The planner uses a seq scan on meter_readings then performs a sort on date. Even if I do SET enable_seqscan = off; the planner still performs a seq_scan. Why can't the exeutor use an index scan on the primary key? */ ------------------------------------------------------------------------------------------ -- Test 4 -- Marathon Results Problem -- -- Notes: Show the runnerid,name,time taken and the position of each Marathon runner. CREATE TABLE marathon_results ( runnerid SERIAL NOT NULL PRIMARY KEY, name VARCHAR(30) NOT NULL, time INTERVAL NOT NULL ); INSERT INTO marathon_results (name,time) VALUES('Samuel','02:05:24'); INSERT INTO marathon_results (name,time) VALUES('Paul','02:04:55'); INSERT INTO marathon_results (name,time) VALUES('Haile','02:03:59'); INSERT INTO marathon_results (name,time) VALUES('Martin','02:05:15'); INSERT INTO marathon_results (name,time) VALUES('Sammy','02:04:56'); INSERT INTO marathon_results (name,time) VALUES('David','03:15:56'); INSERT INTO marathon_results (name,time) VALUES('Eric','03:15:56'); INSERT INTO marathon_results (name,time) VALUES('Alan','03:24:56'); VACUUM ANALYZE marathon_results; -- the Normal way SELECT runnerid,name,time,(SELECT COUNT(DISTINCT time)+1 FROM marathon_results WHERE time < r.time) AS position FROM marathon_results r ORDER BY position; -- above query = 424 tps -- With windowing functions. SELECT runnerid,name,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position FROM marathon_results ORDER BY position; -- above query = 629 tps ------------------------------------------------------------------------------------------ -- Test 5 -- Big Marathon Problem -- -- Notes: As above but bigger table. CREATE TABLE big_marathon ( runnerid SERIAL NOT NULL PRIMARY KEY, time INTERVAL NOT NULL ); -- Generate 10,000 random times from 2 hrs to 4 hrs INSERT INTO big_marathon (time) SELECT '03:00:00'::interval + (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL - (CAST(RANDOM() * 3600 AS INT) || ' secs')::INTERVAL FROM generate_series(1,10000); -- Give the non-windowing one a chance. CREATE INDEX big_marathon_time_idx ON big_marathon (time); VACUUM ANALYZE big_marathon; -- Who was in Nth place (using 2nd place here but could be anything) -- Normal SELECT runnerid,time,position FROM (SELECT runnerid,time,(SELECT COUNT(DISTINCT time)+1 FROM big_marathon WHERE time < r.time) AS position FROM big_marathon r ) results WHERE position = 2; -- above query = 404851.763 ms (6 mins 44 secs) -- With windowing functions. SELECT runnerid,time,position FROM (SELECT runnerid,time,DENSE_RANK() OVER(ORDER BY time ASC) AS position FROM big_marathon ) results WHERE position = 2; -- above query = 115.932 ms (3492 times faster) -- Notes: The windowing function method did not use the index on the time column. ------------------------------------------------------------------------------------------ -- Test 6 -- Stock Problem -- -- Notes: If we have a table for stock which holds the stock levels for each product -- and we have an orders table that contains orders for products and their -- due dates. We need to know how much stock we have for each order. We -- need to allocate stock to an order with the earliest due date first. -- If there are two orders with the same due date the one with the lowest -- order id number should take the stock first. -- We need to see a list of orders with the orderid, partnum, duedate and -- the orderqty and the remaining stock after the order is completed. If the -- remaining stock is less than zero we show that rather than 0 to indicate -- how much stock needs to be bought or made. CREATE TABLE stock ( partnum VARCHAR(16) NOT NULL PRIMARY KEY, stockqty INT NOT NULL, CHECK (stockqty > 0) ); CREATE TABLE orders ( orderid SERIAL NOT NULL PRIMARY KEY, partnum VARCHAR(16) NOT NULL, duedate DATE NOT NULL, orderqty INT NOT NULL, CHECK (orderqty > 0) ); INSERT INTO stock VALUES('CPU',1000); INSERT INTO stock VALUES('HARDDRIVE',2000); INSERT INTO stock VALUES('KEYBOARD',4000); INSERT INTO stock VALUES('MOUSE',9000); INSERT INTO stock VALUES('MONITOR',100); INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-12-22', 700); INSERT INTO orders (partnum,duedate,orderqty) VALUES('CPU','2008-11-15', 600); INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-05', 1200); INSERT INTO orders (partnum,duedate,orderqty) VALUES('HARDDRIVE','2008-11-17', 200); INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-11-17', 1000); INSERT INTO orders (partnum,duedate,orderqty) VALUES('KEYBOARD','2008-12-03', 3000); INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 50); INSERT INTO orders (partnum,duedate,orderqty) VALUES('MONITOR','2008-12-03', 60); VACUUM ANALYZE stock; VACUUM ANALYZE orders; -- Normal SELECT t.orderid, t.partnum, t.duedate, t.orderqty, st.stockqty - t.orderqty - COALESCE((SELECT SUM(orderqty) FROM orders WHERE partnum = t.partnum AND (duedate < t.duedate OR (duedate = t.duedate AND orderid < t.orderid)) ),0) AS remaining FROM orders t INNER JOIN stock st ON t.partnum = st.partnum ORDER BY t.partnum ASC,t.duedate ASC,t.orderid ASC; -- above query = 253 tps. -- With windowing functions SELECT o.orderid, o.partnum, o.duedate, o.orderqty, s.stockqty - SUM(o.orderqty) OVER (PARTITION BY o.partnum ORDER BY o.duedate ASC,o.orderid ASC) AS remaining FROM orders o INNER JOIN stock s ON o.partnum = s.partnum ORDER BY o.partnum ASC,o.duedate ASC,o.orderid ASC; -- above query = 305 tps.