/********************************************************* * datacube.c * Utility to build a datacube in PostgreSQL. * * Currently does not support nested CUBE queries. * ********************************************************/ #include #include "postgres.h" #include "datacube.h" #include "utils/memutils.h" #include "tcop/tcopprot.h" #include "parser/parse_expr.h" /* ---------------- * Global variables * ---------------- */ int m, Ind = 0, numAtts = 0, **Combinations; char **c, **projectedAttrs, cubeTableName[30]; /* ----------------------------------------------- * nCr * * Function to calculate the number of ways of * selecting r objects from n objects. * * ---------------------------------------------- */ int nCr (int n, int r) { int num = 1, den = 1, i; for (i = n; i >= n-r+1; i--) num *= i; for (i = r; i >= 1; i--) den *= i; return num/den; } /* ---------------------------------------------- * getMaxCombinations * * This calls the function nCr for values * of r ranging from 0 to n. The total value * of their sum is the maximum number of * combinations possible for given values of * n and r. * * This value is required to decide the size of the * array required to store the different combinations. * * ---------------------------------------------- */ int getMaxCombinations(int n) { int r = 0; int NumCombinations = 0; for (r = 0; r <= n; r++) NumCombinations += nCr(n,r); return NumCombinations; } /* ---------------------------------------------- * Combination * * The actual function where the combinations * are built and stored into the global array * called Combinations. This array is later on * used to generate the actual query for the * DATACUBE. * * ---------------------------------------------- */ void Combination(char **arr ,int n, int x, int r) { int i, j; static int y = -1; y++; for(i = x; i < n; ++i) { c[y] = (char *) malloc( sizeof (arr[i]) ); strcpy (c[y], arr[i]); if(n == m) { for (j = 0; j < r; j++) Combinations[Ind][getIndex(c[j])] = 1; Ind++; } else Combination (arr+1, n-1, i, r); } y--; } /* ---------------------------------------------- * getIndex * * A small function to find the index of the * location of the generated combination in the * actual array. * * Never mind if you didnt understand what is * written above. * * ---------------------------------------------- */ int getIndex(char *str) { int i = 0; for( i = 0; i <= numAtts; i++) if ( strcmp(projectedAttrs[i],str) == 0 ) return i; return -1; } /* ---------------------------------------------- * generateCombinations * * A simple function which initialises the global * array Combinations * * ---------------------------------------------- */ int generateCombinations (void) { int length, i, j; int NumCombinations = getMaxCombinations(numAtts); Combinations = (int **)malloc(sizeof(int*) * NumCombinations); for(i = 0; i < NumCombinations; i++) { Combinations[i] = (int *) malloc(sizeof(int)*numAtts); for(j = 0; j < numAtts; j++) Combinations[i][j] = 0; } for(length = 0; length <= numAtts; length++) { m = numAtts-length+1; c = (char **) malloc(length*sizeof(char*)); Combination(projectedAttrs,numAtts,0,length); } return NumCombinations; } /* ---------------------------------------------- * generateQuery * * This is the function where the actual query * generation takes place. Depending on the * current combination in the Combinations array, * the query is generated taking the attribute * name if the value for that particular attribute * is 1 or using the same attribute as NULL if 0. * * ex: Suppose the attributes are type, store. Let * the current combination be 01. Then the targetlist * is NULL AS type, store. Similarly, for 10 it is * type, NULL AS store * * Returns the generated query to the main loop. * * ---------------------------------------------- */ StringInfo generateQuery(int numSelect, char *aggregateAttrsList) { StringInfo cubeQueryString = makeStringInfo(); int i, j, k, flag = 0; for (i = 0; i < numSelect; i++) { flag = 0; appendStringInfo(cubeQueryString, "SELECT "); for (j = 0; j < numAtts; j++) { if (Combinations[i][j] == 1) appendStringInfo(cubeQueryString, "%s, ", projectedAttrs[j]); else if (Combinations[i][j] == 0) appendStringInfo(cubeQueryString, "NULL AS %s, ", projectedAttrs[j]); } appendStringInfo(cubeQueryString, "%s FROM %s", aggregateAttrsList, cubeTableName); for (j = 0; j < numAtts; j++) if (Combinations[i][j] == 1) flag = 1; if (flag != 0) { appendStringInfo(cubeQueryString, " GROUP BY "); for (j = 0; j < numAtts; j++) { flag = 0; if (Combinations[i][j] == 1) { appendStringInfo(cubeQueryString, projectedAttrs[j]); for (k = j+1; k < numAtts; k++) if (Combinations[i][k] == 1) flag = 1; if (flag == 1) appendStringInfo(cubeQueryString, ", "); } } } if (i != numSelect-1) appendStringInfo(cubeQueryString, " UNION "); } return cubeQueryString; } /* ---------------------------------------------- * executeQuery * * Routine to handle the execution of the query * whose query string is passed along with the * TopMemoryContext. * * This invokes a call to pg_exec_query_string. * * ---------------------------------------------- */ void executeQuery(StringInfo buf, MemoryContext context) { MemoryContext CubeContext; CubeContext = AllocSetContextCreate(context, "CubeContext", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); pg_exec_query_string(buf, 2, CubeContext); } /* ---------------------------------------------- * getTableName * * Our function needs to create a temporary table * to generate the final result. To avoid any * clashes/errors, a table name is generated using * the function gettimeofday(). * * The name of the table so generated is returned. * * ---------------------------------------------- */ char* getTableName(void) { struct timeval tval; struct timezone tz; char *timestr; timestr = palloc(sizeof(char)*30); tz.tz_minuteswest = 0; tz.tz_dsttime = 0; if(gettimeofday(&tval, &tz) != 0) elog(ERROR, "Could not get time of day"); else sprintf(timestr, "cube_%ld_%ld",tval.tv_sec,tval.tv_usec); return timestr; } /* ------------------------------------------------------------------ * constructCube * * The main routine which handles the generation of the DATACUBE. * This function is invoked from postgres.c if it is found that the * current query has a CubeSelectStmt (defined by us) tag. * * The main processes which take place are * * a) Retrieve the actual query string from the querytree using * the function get_query_def defined in ruleutils.c * b) Create a table with the generated name with the result of * the actual query WITHOUT the CUBE part. * c) Find out the attributes in the targetlist and extract the * aggregatelist into aggregateAttrsList. * d) Generate the possible combinations of attributes possible * using the function generateCombinations(). * e) Using these generate combinations, construct the actual * query to be run on the table generated in (b). * f) Execute this query. If the original query had an 'into', * save the result of the CUBE query to that particular table. * Else display the result to the user. * * ------------------------------------------------------------------ */ void constructCube(Query *querytree, MemoryContext context, char *cubeTable) { StringInfo buf = makeStringInfo(); StringInfo createTableQuery = makeStringInfo(); StringInfo cubeQuery = makeStringInfo(); StringInfo intoCubeQuery = makeStringInfo(); List *l; char *aggregateAttrsList, intoRelation[30]; int i = 0, count = 0, numSelect = 0; numAtts = 0, Ind = 0; //Retrieve query string from querytree get_query_def(querytree, buf, NIL, NULL); strcpy (cubeTableName,cubeTable); appendStringInfo(createTableQuery, "CREATE TABLE %s AS %s", cubeTableName, buf->data); // printf("CREATE TABLE QUERY IS %s\n\n",createTableQuery->data); executeQuery(createTableQuery, context); foreach(l, querytree->targetList) { TargetEntry *tle = (TargetEntry *) lfirst(l); if(tle->resdom->ressortgroupref > 0) numAtts++; } //Allocate Memory for projectedAttrs projectedAttrs = palloc( sizeof(char *) * numAtts ); aggregateAttrsList = palloc (sizeof (char) * 1000); foreach(l, querytree->targetList) { char *funcname; TargetEntry *tle = (TargetEntry *) lfirst(l); if(tle->resdom->ressortgroupref > 0) { projectedAttrs[i] = (char *)malloc(sizeof(char) * sizeof(tle->resdom->resname)); projectedAttrs[i] = tle->resdom->resname; i++; } if(nodeTag(tle->expr) == T_Aggref ) { Aggref *aggref = (Aggref *) tle->expr; Oid argtype = exprType(aggref->target); funcname = generate_function_name(aggref->aggfnoid, 1, &argtype); if(count == 0) sprintf(aggregateAttrsList,"%s(%s)", funcname,tle->resdom->resname); else sprintf(aggregateAttrsList,"%s, %s(%s)", aggregateAttrsList, funcname,tle->resdom->resname); count++; } } numSelect = generateCombinations(); cubeQuery = generateQuery(numSelect, aggregateAttrsList); if (querytree->into != NULL) { strcpy (intoRelation, querytree->into->relname); intoCubeQuery->data = palloc(sizeof(char)); appendStringInfo(intoCubeQuery, "CREATE TABLE %s AS %s", intoRelation, cubeQuery->data); // printf("CUBE QUERY IS %s\n",intoCubeQuery->data); executeQuery(intoCubeQuery, context); } else // { // printf("CUBE QUERY IS %s\n",cubeQuery->data); executeQuery(cubeQuery, context); // } } /* ---------------------------------------------- * dropCubeTable * * This routine is used to drop the temporary table * created using getTableName(). * * ---------------------------------------------- */ void dropCubeTable (char *tableName, MemoryContext context) { StringInfo dropTableQuery = makeStringInfo(); appendStringInfo(dropTableQuery, "DROP TABLE %s", tableName); // printf("dropTableQuery is %s\n\n",dropTableQuery->data); executeQuery(dropTableQuery, context); }