Re: Database storage bloat

From: "Tony and Bryn Reina" <reina_ga(at)hotmail(dot)com>
To: "Douglas Trainor" <trainor(at)uic(dot)edu>
Cc: <pgsql-admin(at)postgresql(dot)org>
Subject: Re: Database storage bloat
Date: 2004-04-08 13:01:26
Message-ID: BAY8-DAV37L5QALrHx00001e801@hotmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-admin

I'm using 'text' instead of char. That seemed to cut the bloat down by about
30%. There's only about 11 tables in the DB. I included them at the bottom
in case you're interested.

Perhaps there's just something fundamentally boneheaded about my design
(re-reading "Database Design for Mere Mortals" as I write).

-Tony

CREATE SEQUENCE FileIndicies_fileID_seq;
CREATE TABLE FileIndicies (
fileID integer DEFAULT nextval('FileIndicies_fileID_seq') UNIQUE NOT
NULL, --PK
szFileName text NOT NULL,
szFileType text,
CreationDate date,
CreationTime time,
dtimestampresolution float4,
dtimespan float4,
szAppName text,
szFileComment text,
PRIMARY KEY (fileID),
UNIQUE (szFileName, szFileType, CreationDate, CreationTime)
);

CREATE SEQUENCE EntityFile_dbIndex_seq;
CREATE TABLE EntityFile (
EntityID integer, --PK
fileID integer REFERENCES FileIndicies (fileID),
dbIndex integer DEFAULT nextval('EntityFile_dbIndex_seq') UNIQUE NOT
NULL,
PRIMARY KEY (fileID, EntityID)
);

-- AnalogIndex
CREATE TABLE AnalogIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dsamplerate float4 CHECK (dsamplerate > (0)::float4),
dminval float4,
dmaxval float4 CHECK (dmaxval >= dminval),
szunits text,
dresolution float4 CHECK (dresolution > (0)::float4),
dlocationx float4,
dlocationy float4,
dlocationz float4,
dlocationuser float4,
dhighfreqcorner float4 CHECK (dhighfreqcorner >= (0)::float4),
dwhighfreqorder float4 CHECK (dwhighfreqorder >= (0)::float4),
szhighfiltertype text,
dlowfreqcorner float4 CHECK (dlowfreqcorner >= (0)::float4),
dwlowfreqorder float4 CHECK (dwlowfreqorder >= (0)::float4),
szlowfiltertype float4,
szprobeinfo text,
PRIMARY KEY (dbIndex)
);

CREATE TABLE AnalogValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4 NOT NULL,
dvalue float4,
PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE EventIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dweventtype smallint CHECK (dweventtype >=0 AND dweventtype <= 4),
dwmindatalength smallint CHECK (dwmindatalength > (0)::float4),
dwmaxdatalength smallint CHECK (dwmaxdatalength >= dwmindatalength),
szcsvdesc text,
PRIMARY KEY (dbIndex)
);

CREATE TABLE EventValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4,
dwdatabytesize smallint CHECK (dwdatabytesize > (0)::float4),
eventvalue text,
PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE NeuralIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourceentityid smallint,
dwsourceunitid smallint,
szprobeinfo text,
PRIMARY KEY (dbIndex)
);

CREATE TABLE NeuralValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dtimestamp float4 NOT NULL,
PRIMARY KEY (dbIndex, dtimestamp)
);

CREATE TABLE SegmentIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourcecount smallint CHECK (dwsourcecount > (0)::float4),
dwminsamplecount smallint CHECK (dwminsamplecount > (0)::float4),
dwmaxsamplecount smallint CHECK (dwmaxsamplecount > (0)::float4),
dsamplerate float4 CHECK (dsamplerate > (0)::float4),
szunits text,
PRIMARY KEY (dbIndex)
);

CREATE TABLE SegmentSourceIndicies (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwsourceid integer CHECK (dwsourceid >= 0),
dminval float4,
dmaxval float4 CHECK (dmaxval >= dminval),
dresolution float4 CHECK (dresolution > (0)::float4),
dsubsampleshift float4,
dlocationx float4,
dlocationy float4,
dlocationz float4,
dlocationuser float4,
dhighfreqcorner float4 CHECK (dhighfreqcorner >= (0)::float4),
dwhighfreqorder float4 CHECK (dwhighfreqorder >= (0)::float4),
szhighfiltertype text,
dlowfreqcorner float4 CHECK (dlowfreqcorner >= (0)::float4),
dwlowfreqorder float4 CHECK (dwlowfreqorder >= (0)::float4),
szlowfiltertype text,
szprobeinfo text,
PRIMARY KEY (dbIndex, dwsourceid)
);

CREATE TABLE SegmentValues (
dbIndex integer REFERENCES EntityFile (dbIndex),
dwunitid smallint,
dwsampleindex smallint,
dtimestamp float4,
dvalue float4,
PRIMARY KEY (dbIndex, dtimestamp, dwsampleindex, dwunitid)
);

CREATE VIEW SegmentData AS
SELECT a.szFileName, a.szFileType, a.creationdate, a.creationtime,
c.EntityID,
b.dwunitid, b.dwsampleindex, b.dtimestamp, b.dvalue
FROM FileIndicies AS a, SegmentValues AS b, EntityFile AS c
WHERE
b.dbIndex = c.dbIndex AND
a.fileID = c.fileID
ORDER BY a.szFileName, a.szFileType, a.creationdate, a.creationtime,
c.EntityID,
b.dwunitid, b.dwsampleindex, b.dtimestamp;

----- Original Message -----
From: "Douglas Trainor" <trainor(at)uic(dot)edu>
To: "Tony Reina" <reina_ga(at)hotmail(dot)com>
Cc: <pgsql-admin(at)postgresql(dot)org>
Sent: Thursday, April 08, 2004 1:41 PM
Subject: Re: [ADMIN] Database storage bloat

> Saying "we've set field sizes to their theoretical skinniness" makes me
> think that
> you may have the wrong data types. For example, you may have used CHAR
> and not VARCHAR.
>
> douglas
>
> Tony Reina wrote:
>
> >I'm developing a database for scientific recordings. These recordings
> >are traditionally saved as binary flat files for simplicity and
> >compact storage. Although I think ultimately having a database is
> >better than 1,000s of flat files in terms of data access, I've found
> >that the database (or at least my design) is pretty wasteful on
> >storage space compared with the binary flat files.
> >
> >In particular, I tried importing all of the data from a binary flat
> >file that is 1.35 MB into a PostgreSQL database (a very small test
> >file; average production file is probably more like 100 MB). The
> >database directory ballooned from 4.1 MB to 92 MB (a bloat of 65X the
> >original storage of the binary flat file).
> >
> >Now I know that table design and normalizing is important. As far as
> >my partner and I can tell, we've made good use of normalizing (no
> >redundancy), we've set field sizes to their theoretical skinniness,
> >[...]
> >
> >
>
>

In response to

Responses

Browse pgsql-admin by date

  From Date Subject
Next Message Tom Bakken 2004-04-08 13:10:25 Re: Out of space
Previous Message Tony and Bryn Reina 2004-04-08 12:51:14 Re: Database storage bloat