/*-------------------------------------------------------------------------
 *
 * conqueue.c
 *	  A simple concurrent PC queue implementation
 *
 *-------------------------------------------------------------------------
 */
#include "postgres.h"

#include "miscadmin.h"
#include "storage/lwlock.h"
#include "storage/shmem.h"
#include "storage/spin.h"
#include "storage/proc.h"
#include "postmaster/pgjet.h"
#include "storage/conqueue.h"

#ifdef PAR_SERVER

/* parameters for dequeuer waiting adjustment */
#define MIN_GET_DELAY		0
#define MAX_GET_DELAY		1000

/* Communication area for buffering process and participants */
typedef struct CQueueStruct
{
	uint16		seqno;			/* sequence number, from 0 */

	slock_t		que_lock;		/* to lock access to the queue */
	int		wait_pid;		/* waiter (enq/deq) of the queue */

	int		get_delay;		/* dynamic adjust get wait time */ 

	uint16		cur_size;		/* current queue size <= QUE_SIZE */
	char		buffer[QUE_SIZE]; 	/* buffer hold queue members */
}CQueueStruct;

static CQueueStruct *CQueueShmem = NULL;

/*
 * local routines
 */
static Size CQueueShmemSize(void);
static void CQueuePutInternal(int seqno, const void *item, uint16 len, uint16 type);

/*
 * CQueueShmemSize
 *		Compute space needed for concurrent queue shared memory
 */
static Size
CQueueShmemSize(void)
{
	return mul_size(sizeof(CQueueStruct), parallel_degree);
}

void
CQueueReset(const char *query, const char *plan)
{
	int		 i;
	volatile CQueueStruct *CQueue;
	
	MemSet(CQueueShmem, 0, CQueueShmemSize());
	for (i = 0; i < parallel_degree; i++)
	{
		CQueue = CQueueShmem + i;
		
		CQueue->seqno = i;
		CQueue->cur_size = 0;
		CQueue->wait_pid = 0;
		CQueue->get_delay = MIN_GET_DELAY;
		SpinLockInit(&CQueue->que_lock);
	}

	/* FIXME: now temporarily put the plan in queue */
	if (query != NULL)
	{
		Assert(plan != NULL);
		
		if (strlen(query) > QUE_SIZE)
			elog(ERROR, "query is too big");
		
		if (strlen(plan) > QUE_SIZE)
			elog(ERROR, "plan is too big");

		strcpy(CQueueShmem[0].buffer, query);
		strcpy(CQueueShmem[1].buffer, plan);
	}
}

/*
 * CQueueShmemInit
 *		Allocate and initialize concurrent queue shared memory
 */
void
CQueueShmemInit(void)
{
	bool	 found;

	/* see if parallel execution is required */
	if (parallel_degree <= 0)
		return;

	CQueueShmem = (CQueueStruct *)
		ShmemInitStruct("Concurrent Queue Data",
						CQueueShmemSize(), &found);
	if (CQueueShmem == NULL)
		ereport(FATAL,
				(errcode(ERRCODE_OUT_OF_MEMORY),
				 errmsg("not enough shared memory for concurrent queue")));
	if (found)
		return;					/* already initialized */

	CQueueReset(NULL, NULL);
}

/*
 * CQueuePutInternal
 *		Put an item from a backend to the queue, wait if no space avaliable
 */
static void
CQueuePutInternal(int seqno, const void *item, uint16 len, uint16 type)
{
	int		wait_pid;
	int		aln_len, cur_size;
	CQueueMember	*cqmember;	
	volatile char	*buffer;
	volatile CQueueStruct *CQueue;

	CQueue = CQueueShmem + seqno;
	Assert(CQueue->seqno == seqno);
	
	buffer = (char *)CQueue->buffer;
	aln_len  = MAXALIGN(len + SizeOfCQueueMember);

	if (aln_len > QUE_SIZE)
		elog(ERROR, "queue member size is too big");

retry:	
	SpinLockAcquire(&CQueue->que_lock);
	cur_size = CQueue->cur_size;

	if (aln_len <= QUE_SIZE - cur_size)
	{
		cqmember = (CQueueMember *)(buffer + cur_size);
		cqmember->type = type;
		cqmember->len = len + SizeOfCQueueMember;
		memcpy((char *)cqmember + SizeOfCQueueMember, item, len);
		CQueue->cur_size += aln_len;
	}
	else
	{
		/* not enough space, wait */
		CQueue->wait_pid = MyProcPid;
		SpinLockRelease(&CQueue->que_lock);
		ProcWaitForSignal();
		goto retry;
	}
	
	wait_pid = CQueue->wait_pid;
	CQueue->wait_pid = 0;
	SpinLockRelease(&CQueue->que_lock);

	/* notify the waiter if any */
	if (wait_pid > 0)
		ProcSendSignal(wait_pid);
}

void
CQueueElog(int seqno, int elevel, const char *emsg)
{
	/* system will take care of PANIC automatically */
	if (elevel > FATAL)
		return;
	
	CQueuePutInternal(seqno, (void *)emsg, (uint16)strlen(emsg), CQ_ELOG);
}

void
CQueuePut(int seqno, const void *item, uint16 len)
{
	CQueuePutInternal(seqno, (void *)item, len, CQ_DATA);
}

void
CQueueDone(int seqno)
{
	CQueuePutInternal(seqno, "done", (uint16)strlen("done"), CQ_DONE);
}

/* FIXME: may need several rounds to get big plan */
char *
CQueueGetQuery(int seqno)
{
	char *buffer = palloc(BLCKSZ);
	StrNCpy(buffer, CQueueShmem[0].buffer, BLCKSZ);

	return buffer;
}

char *
CQueueGetPlan(int seqno)
{
	char *buffer = palloc(BLCKSZ);
	StrNCpy(buffer, CQueueShmem[1].buffer, BLCKSZ);

	return buffer;
}

/*
 * CQueueGetAll
 *		Get all avaliable items from the queue
 */
bool
CQueueGetAll(int seqno, bool wait, void *buffer, int *real_size)
{

	Size	cur_size;
	int		wait_pid;
	volatile CQueueStruct *CQueue;
	
	CQueue = CQueueShmem + seqno;
	Assert(CQueue->seqno == seqno);

	/* block the process to wait enqueuers */
	if (CQueue->get_delay)
		pg_usleep(CQueue->get_delay);

retry:
	SpinLockAcquire(&CQueue->que_lock);
	cur_size = CQueue->cur_size;
	
	if (cur_size == 0)
	{
		/* empty queue, wait if asked */
		if (!wait)
		{
			SpinLockRelease(&CQueue->que_lock);
			return false;
		}
		else
		{
			CQueue->wait_pid = MyProcPid;
			SpinLockRelease(&CQueue->que_lock);
			ProcWaitForSignal();
			goto retry;
		}
	}	

	*real_size = cur_size;
	memcpy(buffer, (char *)CQueue->buffer, cur_size);
	wait_pid = CQueue->wait_pid;
	CQueue->wait_pid = 0;
	CQueue->cur_size = 0;
	SpinLockRelease(&CQueue->que_lock);

	/*
	 * It is difficult to determine the optimization trick. See the figures
	 * below. With optimization, the total execution time is longer, but the
	 * resource consumption is much less. Without optimization, we can see 
	 * that this function is invoked much more frequently, but the IO is thus
	 * more aggresive. Some of the reason is due to the corase granulairty of 
	 * sleep(). 
	 *
	 * SeqScan: 278528 records, 2700 pages.
	 *
	 *  -- with get_delay optimization
	 *	system usage stats:
	 *  !       12.228000 elapsed 0.310446 user 0.070101 system sec
	 *  !       [0.340489 user 0.090129 sys total]
	 *  
	 *   -- without get_delay optimization 
	 *   system usage stats:
	 *   !       9.855000 elapsed 1.071541 user 0.250360 system sec
	 *   !       [1.101584 user 0.270388 sys total]
	 */
	if (wait_pid > 0)
	{
		/* wakeup the waiter if any */
		ProcSendSignal(wait_pid);

		/* dequeuer should be more aggresive */
		if (CQueue->get_delay > MIN_GET_DELAY)
			CQueue->get_delay = Max(CQueue->get_delay - 1, MIN_GET_DELAY);
	}
	else 
	{
		/* dequeuer should wait longer */
		if (CQueue->get_delay < MAX_GET_DELAY) 
			CQueue->get_delay = Min(CQueue->get_delay + 1, MAX_GET_DELAY);
	}

	/* currently we disable the optimization */
	CQueue->get_delay = 0;

	return true;
}

#endif /* PAR_SERVER */
