Certain query eating up all free memory (out of memory error)

From: Łukasz Dejneka <l(dot)dejneka(at)gmail(dot)com>
To: pgsql-performance(at)postgresql(dot)org
Subject: Certain query eating up all free memory (out of memory error)
Date: 2010-05-24 16:50:32
Message-ID: AANLkTingEH23V3VF3_P1eaKluVa3PgsA8RGVzFAfv0_N@mail.gmail.com
Views: Raw Message | Whole Thread | Download mbox | Resend email
Thread:
Lists: pgsql-performance

Hi group,

I could really use your help with this one. I don't have all the
details right now (I can provide more descriptions tomorrow and logs
if needed), but maybe this will be enough:

I have written a PG (8.3.8) module, which uses Flex Lexical Analyser.
It takes text from database field and finds matches for defined rules.
It returns a set of two text fields (value found and value type).

When I run query like this:
SELECT * FROM flex_me(SELECT some_text FROM some_table WHERE id = 1);
It works perfectly fine. Memory never reaches more than 1% (usually
its below 0.5% of system mem).

But when I run query like this:
SELECT flex_me(some_text_field) FROM some_table WHERE id = 1;
Memory usage goes through the roof, and if the result is over about
10k matches (rows) it eats up all memory and I get "out of memory"
error.

I try to free all memory allocated, and even did a version with double
linked list of results but the same behaviour persists. I tried to
track it down on my own and from my own trials it seems that the
problem lies directly in the set returning function in File 2
"flex_me()" as even with 40k of results in a 2 column array it
shouldn't take more than 1MB of memory. Also when I run it just to the
point of SRF_IS_FIRSTCALL() (whole bit) the memory usage doesn't go
up, but when subsequent SRF_PERCALL calls are made it's where the
memory usage goes through the roof.

Btw, if the following code contains some nasty errors and I'm pretty
sure it does, please know that I'm just learning PG and C programming.
Any help or tips would be greatly appreciated.

Simplified (but still relevant) code below:

File 1 (Flex parser template which is compiled with flex):

%{
#include <stdio.h>

extern void *addToken(int type);
extern char ***flexme(char *ptr);

#define T_NUM 1
#define S_NUM "number"
#define T_FLO 2
#define S_FLO "float"
#define T_DAT 3
#define S_DAT "date
#define T_WRD 7
#define S_WRD "word"

char ***vals;

int cnt = 0, mem_cnt = 64;

%}

DGT [0-9]
NUMBER (-)?{DGT}+
FLOAT ((-)?{DGT}+[\.,]{DGT}+)|{NUMBER}

DATE_S1 "-"
DATE_S2 ","
DATE_S3 "."
DATE_S4 "/"
DATE_S5 ""
DATE_YY ([0-9]|([0-9][0-9])|([0-1][0-9][0-9][0-9])|(2[0-4][0-9][0-9]))
DATE_DD ([1-9]|(([0-2][0-9])|(3[0-1])))
DATE_MM ([1-9]|((0[1-9])|(1[0-2])))

DATE_YMD_S1 ({DATE_YY}{DATE_S1}{DATE_MM}{DATE_S1}{DATE_DD})
DATE_YMD_S2 ({DATE_YY}{DATE_S2}{DATE_MM}{DATE_S2}{DATE_DD})
DATE_YMD_S3 ({DATE_YY}{DATE_S3}{DATE_MM}{DATE_S3}{DATE_DD})
DATE_YMD_S4 ({DATE_YY}{DATE_S4}{DATE_MM}{DATE_S4}{DATE_DD})
DATE_YMD_S5 ({DATE_YY}{DATE_S5}{DATE_MM}{DATE_S5}{DATE_DD})
DATE_YMD ({DATE_YMD_S1}|{DATE_YMD_S2}|{DATE_YMD_S3}|{DATE_YMD_S4}|{DATE_YMD_S5})

WORD ([a-zA-Z0-9]+)

%%

{FLOAT} addToken(T_FLO);

{DATE_YMD} addToken(T_DAT);

{WORD} addToken(T_WRD);

.|\n /* eat up any unmatched character */

%%

void *
addToken(int type)
{
int x = 0;

// elog(NOTICE,"W[%d] %s", type, yytext);

//check if we need to add more mem
if (mem_cnt-1 <= cnt) {
mem_cnt *= 2;
vals = repalloc(vals, mem_cnt * sizeof(char *));
// elog(NOTICE, "mem increased to: %d", mem_cnt*sizeof(char *));
}
vals[cnt] = palloc(2 * sizeof(char *));

//types
switch (type) {
case T_FLO: //float
x = strlen(S_FLO);
vals[cnt][1] = palloc((x+1) * sizeof(char));
strncpy(vals[cnt][1], S_FLO, x);
vals[cnt][1][x] = '\0';
break;
case T_DAT: //date
x = strlen(S_DAT);
vals[cnt][1] = palloc((x+1) * sizeof(char));
strncpy(vals[cnt][1], S_DAT, x);
vals[cnt][1][x] = '\0';
break;
case T_WRD: //word
x = strlen(S_WRD);
vals[cnt][1] = palloc((x+1) * sizeof(char));
strncpy(vals[cnt][1], S_WRD, x);
vals[cnt][1][x] = '\0';
break;
default:
elog(ERROR,"Unknown flexme type: %d", type);
break;
}
//value
vals[cnt][0] = palloc((yyleng+1) * sizeof(char));
strncpy(vals[cnt][0], yytext, yyleng);
vals[cnt][0][yyleng] = '\0';

cnt++;
// elog(NOTICE,"i: %d", cnt);

return 0;
}

char ***flexme(char *ptr)
{

YY_BUFFER_STATE bp;
int yyerr = 0;
cnt = 0;

//initial table size
vals = palloc(mem_cnt * sizeof(char *));

bp = yy_scan_string(ptr);
yy_switch_to_buffer(bp);
yyerr = yylex();
yy_delete_buffer(bp);

if (yyerr != 0) {
elog(ERROR, "Flex parser error code: %d", yyerr);
}

return vals;
}

File 2 (PG function, which includes flex output analyser of compiled
File 1 - lex.yy.c):

#include "postgres.h"
#include "fmgr.h"
#include "funcapi.h"

#include "lex.yy.c"

char *text_to_cstring(const text *t); //this is copied directly from
PG sources
char *
text_to_cstring(const text *t)
{
/* must cast away the const, unfortunately */
text *tunpacked = pg_detoast_datum_packed((struct
varlena *) t);
int len = VARSIZE_ANY_EXHDR(tunpacked);
char *result;

result = (char *) palloc(len + 1);
memcpy(result, VARDATA_ANY(tunpacked), len);
result[len] = '\0';

if (tunpacked != t)
pfree(tunpacked);

return result;
}

PG_FUNCTION_INFO_V1(flex_me);
Datum flex_me(PG_FUNCTION_ARGS);

Datum
flex_me(PG_FUNCTION_ARGS) {
text *in = PG_GETARG_TEXT_P(0);

FuncCallContext *funcctx;
TupleDesc tupdesc;
AttInMetadata *attinmeta;
int call_cntr, max_calls;
char ***values;
char *ptr;

// stuff done only on the first call of the function
if (SRF_IS_FIRSTCALL()) {
MemoryContext oldcontext;

// create a function context for cross-call persistence
funcctx = SRF_FIRSTCALL_INIT();

// switch to memory context appropriate for multiple function calls
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);

ptr = text_to_cstring_imm(in);
values = flexme(ptr);

//free char pointer
pfree(ptr);

// Build a tuple descriptor for our result type
if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg
("function returning record called in context "
"that cannot accept type record")));

// generate attribute metadata needed later to produce
// tuples from raw C strings
attinmeta = TupleDescGetAttInMetadata(tupdesc);
funcctx->attinmeta = attinmeta;

//pass first list element
funcctx->user_fctx = values;

// total number of tuples to be returned
funcctx->max_calls = cnt;

//go back to normal memory context
MemoryContextSwitchTo(oldcontext);
}

// stuff done on every call of the function.
funcctx = SRF_PERCALL_SETUP();
call_cntr = funcctx->call_cntr;
max_calls = funcctx->max_calls;
attinmeta = funcctx->attinmeta;
values = (char ***) funcctx->user_fctx;

//set return routine
if (call_cntr < max_calls) {
char **rvals;
HeapTuple tuple;
Datum result;
int i;

// Prepare a values array for building the returned
//tuple. This should be an array of C strings which
//will be processed later by the type input functions
rvals = palloc(2*sizeof(char *));

//value (text)
i = strlen(values[call_cntr][0]);
rvals[0] = palloc((i+1)*sizeof(char));
strncpy(rvals[0], values[call_cntr][0], i);
rvals[0][i] = '\0';

//type (text)
i = strlen(values[call_cntr][1]);
rvals[1] = palloc((i+1)*sizeof(char));
strncpy(rvals[1], values[call_cntr][1], i);
rvals[1][i] = '\0';

// build a tuple and make into datum.
tuple = BuildTupleFromCStrings(attinmeta, rvals);

result = HeapTupleGetDatum(tuple);

//free memory
pfree(rvals[0]);
pfree(rvals[1]);
pfree(rvals);
pfree(values[call_cntr][0]);
pfree(values[call_cntr][1]);
pfree(values[call_cntr]);

//return datum
SRF_RETURN_NEXT(funcctx, result);
}
else {
SRF_RETURN_DONE(funcctx);
}

return true;
}

Responses

Browse pgsql-performance by date

  From Date Subject
Next Message Merlin Moncure 2010-05-24 18:25:58 Re: shared_buffers advice
Previous Message Ben Chobot 2010-05-24 15:27:43 [SPAM] Re: shared_buffers advice