/* ------------------------------------------------------------------------
 * nodeBuffering.c
 *	  Routines to handle buffering of scan
 *
 * -------------------------------------------------------------------------
 */

#include "postgres.h"

#include "executor/execdebug.h"
#include "executor/nodeBuffering.h"
#include "executor/execPara.h"
#include "access/heapam.h"
#include "parser/parsetree.h"
#include "storage/conqueue.h"
#include "postmaster/pgjet.h"

#ifdef PAR_SERVER

static TupleTableSlot *BufferingNext(BufferingState *node);
static void InitScanRelation(BufferingState *node, EState *estate);

static void
InitScanRelation(BufferingState *node, EState *estate)
{
	Index		relid;
	List	   *rangeTable;
	RangeTblEntry *rtentry;
	Oid			reloid;
	Relation	currentRelation;
	HeapScanDesc currentScanDesc;

	relid = ((Buffering *) node->ps.plan)->scanrelid;
	rangeTable = estate->es_range_table;
	rtentry = rt_fetch(relid, rangeTable);
	reloid = rtentry->relid;

	currentRelation = heap_open(reloid, AccessShareLock);

	currentScanDesc = heap_beginscan(currentRelation,
									 estate->es_snapshot,
									 0,
									 NULL);

	node->bs_currentRelation = currentRelation;
	node->bs_currentScanDesc = currentScanDesc;

	ExecAssignScanType(node, RelationGetDescr(currentRelation), false);
}

/* ----------------------------------------------------------------
 *		ExecInitBuffering
 *
 *		This initializes the buffering node state structures and
 *		the node's outer subtrees.
 * ----------------------------------------------------------------
 */
BufferingState *
ExecInitBuffering(Buffering *node, EState *estate, int eflags)
{
	int				i;
	BufferingDesc	bdesc;
	BufferingState *bufstate;

	/*
	 * create state structure
	 */
	bufstate = makeNode(BufferingState);
	bufstate->ps.plan = (Plan *) node;
	bufstate->ps.state = estate;

	ExecAssignExprContext(estate, &bufstate->ps);

	/*
	 * initialize child expressions
	 */
	bufstate->ps.targetlist = (List *)
		ExecInitExpr((Expr *) node->plan.targetlist,
					 (PlanState *) bufstate);
	bufstate->ps.qual = (List *)
		ExecInitExpr((Expr *) node->plan.qual,
					 (PlanState *) bufstate);

	/*
	 * tuple table initialization
	 */
	ExecInitResultTupleSlot(estate, &bufstate->ps);
	ExecInitScanTupleSlot(estate, bufstate);

	/*
	 * initialize scan relation
	 */
	InitScanRelation(bufstate, estate);

	bufstate->ps.ps_TupFromTlist = false;

	/*
	 * Initialize result tuple type and projection info.
	 */
	ExecAssignResultTypeFromTL(&bufstate->ps);
	ExecAssignScanProjectionInfo(bufstate);

	/* initializes child nodes */
	outerPlanState(bufstate) = ExecInitNode(outerPlan(node), estate, eflags);

	/* reset tuple buffers */
 	CQueueReset(nodeToString(estate->es_query),
 			nodeToString(outerPlan(node)));
	
	/* wake up parallel executors */
	WakeUpPgJet();

	for (i = 0; i < MAX_PARALLEL_DEGREE; i++)
	{
		if (i < parallel_degree)
		{
			bufstate->bs_bufferingDesc[i] = 
					palloc(sizeof(BufferingDescData));
			bdesc = bufstate->bs_bufferingDesc[i];
		
			bdesc->bd_seqno = i;
			bdesc->bd_done = false;
			bdesc->bd_cindex = 0;
			bdesc->bd_nmembers = 0;
			bdesc->bd_buffer = (char *)palloc(QUE_SIZE);
			bdesc->bd_member = (uint16 *)palloc(
					MAX_TUPLES_PER_BUFFER * sizeof(uint16));
		}
		else
			bufstate->bs_bufferingDesc[i] = NULL;
	}
	
	return bufstate;
}

int
ExecCountSlotsBuffering(Buffering *node)
{
	return ExecCountSlotsNode(outerPlan(node)) +
		ExecCountSlotsNode(innerPlan(node)) + 2;
}

char *
cacheOneMember(char *buffer, BufferingDesc	bdesc)
{
	CQueueMember *qmem = (CQueueMember *)buffer;

	AssertArg(qmem != NULL);
	
	switch (qmem->type)
	{
	case CQ_DATA:
		Assert(!bdesc->bd_done);
		Assert(qmem->len <= QUE_SIZE);
		
		if (bdesc->bd_nmembers >= MAX_TUPLES_PER_BUFFER)
			elog(ERROR, "number of tuples out of range");
		
		bdesc->bd_member[bdesc->bd_nmembers++] 
			= buffer - bdesc->bd_buffer;
		buffer += MAXALIGN(qmem->len);
		break;
		
	case CQ_DONE:
		bdesc->bd_done = true;
		break;
		
	case CQ_ELOG:
		{
			/* TODO: need more work here ... */
			int		 elevel, msglen;
			char	*message;

			elevel = ERROR;

			msglen = CQueueMemberDataLen(qmem) + 1;
			message = (char *)palloc(msglen);
			StrNCpy(message, CQueueMemberData(qmem), msglen);
			elog(elevel, message);

			pfree(message);
		}
		break;
		
	default:
		elog(ERROR, 
			"unrecognized queue member type: %d", qmem->type);
	}

	return buffer;
}

/* ----------------------------------------------------------------
 *		BufferingNext
 *
 *		The work-horse of the buffering node. It retrieve data from 
 *		the PC queue and form it into a tuple table slot.
 * ----------------------------------------------------------------
 */
static TupleTableSlot *
BufferingNext(BufferingState *node)
{
	char		   *pbuffer;
	BufferingDesc	bdesc;
	bool			found;
	int				i, seqno, rsize;
	CQueueMember   *qmem;

top:
	found = false;
	for (i = 0; i < MAX_PARALLEL_DEGREE; i++)
	{
		if ((bdesc = node->bs_bufferingDesc[i]) == NULL)
			continue;

		/* if one tuple is already in our local cache, return it */
		if (bdesc->bd_cindex < bdesc->bd_nmembers)
		{
			HeapTuple		tuple;
			uint16			curr;
			TupleTableSlot *slot;
			
			curr = bdesc->bd_member[bdesc->bd_cindex];
			qmem = (CQueueMember *)(bdesc->bd_buffer + curr);
			
			bdesc->bd_cindex ++;
			slot = node->ps.ps_ResultTupleSlot;
			tuple = ExecParaReadTuple(CQueueMemberData(qmem), 
								CQueueMemberDataLen(qmem));

			return ExecStoreTuple(tuple, slot, 
							InvalidBuffer, true);			
		}
	}

	/* 
	 * No slot is avaliable, now try to retrieve them from 
	 * the buffering queues
	 */
	for (i = 0; i < MAX_PARALLEL_DEGREE; i++)
	{
		if ((bdesc = node->bs_bufferingDesc[i]) == NULL)
			continue;
		
		seqno = bdesc->bd_seqno;
		pbuffer = bdesc->bd_buffer;
		if (!bdesc->bd_done)
		{
			/* retrieve data from the queue */
			CQueueGetAll(seqno, true, pbuffer, &rsize);

			/* cache content to local buffer */
			bdesc->bd_cindex = 0;
			bdesc->bd_done = false;
			bdesc->bd_nmembers = 0;

			while (!bdesc->bd_done && 
				pbuffer < bdesc->bd_buffer + rsize)
			{
				/* cache member address into an array */
				pbuffer = cacheOneMember(pbuffer, bdesc);
			}
		}
	}

	/* Check if all queues done */
	for (i = 0; i < MAX_PARALLEL_DEGREE; i++)
	{
		if ((bdesc = node->bs_bufferingDesc[i]) == NULL)
			continue;

		if (bdesc->bd_nmembers > bdesc->bd_cindex)
			goto top;
	}

	/* All queues are done */
	return NULL;
}

TupleTableSlot *
ExecBuffering(BufferingState *node)
{
	return BufferingNext(node);
}

void 
ExecReBuffering(BufferingState *node, ExprContext *exprCtxt)
{
	Assert(false);
}

void 
ExecEndBuffering(BufferingState *node)
{
	Relation	relation;
	HeapScanDesc scanDesc;
	int			i;
	BufferingDesc		bdesc;

	/*
	 * get information from node
	 */
	relation = node->bs_currentRelation;
	scanDesc = node->bs_currentScanDesc;

	/*
	 * Free the exprcontext
	 */
	ExecFreeExprContext(&node->ps);

	/*
	 * clean out the tuple table
	 */
	ExecClearTuple(node->ps.ps_ResultTupleSlot);
	//ExecClearTuple(node->bs_ScanTupleSlot);

	/*
	 * FREE MEMORY ...
	 */
	for (i = 0; i < MAX_PARALLEL_DEGREE; i++)
	{
		if (NULL == (bdesc = node->bs_bufferingDesc[i]))
			continue;

		pfree(bdesc->bd_buffer);
		pfree(bdesc->bd_member);
		pfree(bdesc);
	}
	
	/*
	 * close heap scan
	 */
	heap_endscan(scanDesc);

	/*
	 * close the heap relation.
	 */
	ExecCloseScanRelation(relation);
}

#endif /* PAR_SERVER */

