From af483065afd0c21a33321332abaab3823d9d4285 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Thu, 31 Aug 2017 11:40:26 -0700
Subject: [PATCH 14/16] WIP: JITed tuple deforming.

---
 src/backend/access/common/heaptuple.c  | 660 +++++++++++++++++++++++++++++++++
 src/backend/executor/execExprCompile.c |  36 ++
 src/backend/executor/execTuples.c      |   5 +
 src/backend/lib/llvmjit.c              |   2 +-
 src/backend/utils/misc/guc.c           |  12 +
 src/include/executor/executor.h        |   1 +
 src/include/executor/tuptable.h        |   2 +-
 src/include/lib/llvmjit.h              |   6 +
 8 files changed, 722 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index f77ea477fb..0e552fb49a 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -60,7 +60,11 @@
 #include "access/sysattr.h"
 #include "access/tuptoaster.h"
 #include "executor/tuptable.h"
+#include "nodes/execnodes.h"
 #include "utils/expandeddatum.h"
+#include "utils/memutils.h"
+#include "utils/resowner.h"
+#include "lib/llvmjit.h"
 
 
 /* Does att's datatype allow packing into the 1-byte-header varlena format? */
@@ -70,6 +74,11 @@
 #define VARLENA_ATT_IS_PACKABLE(att) \
 	((att)->attstorage != 'p')
 
+#ifdef USE_LLVM
+bool jit_tuple_deforming = false;
+
+#endif /* USE_LLVM */
+
 
 /* ----------------------------------------------------------------
  *						misc support routines
@@ -1058,6 +1067,7 @@ slot_deform_tuple(TupleTableSlot *slot, int natts)
 		/* Start from the first attribute */
 		off = 0;
 		slow = false;
+		Assert(slot->tts_off == 0);
 	}
 	else
 	{
@@ -1476,3 +1486,653 @@ minimal_tuple_from_heap_tuple(HeapTuple htup)
 	result->t_len = len;
 	return result;
 }
+
+
+#ifdef USE_LLVM
+
+extern size_t varsize_any(void *p);
+
+size_t
+varsize_any(void *p)
+{
+	return VARSIZE_ANY(p);
+}
+
+/* build extern reference for varsize_any */
+static LLVMValueRef
+create_varsize_any(LLVMModuleRef mod)
+{
+	LLVMTypeRef *param_types = palloc(sizeof(LLVMTypeRef) * 1);
+	LLVMTypeRef sig;
+	LLVMValueRef fn;
+	const char *nm = "varsize_any";
+
+	fn = LLVMGetNamedFunction(mod, nm);
+	if (fn)
+		return fn;
+
+	param_types[0] = LLVMPointerType(LLVMInt8Type(), 0);
+	sig = LLVMFunctionType(LLVMInt64Type(), param_types, 1, 0);
+	fn = LLVMAddFunction(mod, nm, sig);
+
+	{
+		char argname[] = "readonly";
+		LLVMAttributeRef ref =
+			LLVMCreateStringAttribute(LLVMGetGlobalContext(), argname, strlen(argname), NULL, 0);
+		LLVMAddAttributeAtIndex(fn, LLVMAttributeFunctionIndex, ref);
+	}
+	{
+		char argname[] = "argmemonly";
+		LLVMAttributeRef ref =
+			LLVMCreateStringAttribute(LLVMGetGlobalContext(), argname, strlen(argname), NULL, 0);
+		LLVMAddAttributeAtIndex(fn, LLVMAttributeFunctionIndex, ref);
+	}
+
+	return fn;
+}
+
+/* build extern reference for strlen */
+static LLVMValueRef
+create_strlen(LLVMModuleRef mod)
+{
+	LLVMTypeRef *param_types = palloc(sizeof(LLVMTypeRef) * 1);
+	LLVMTypeRef sig;
+	LLVMValueRef fn;
+	const char *nm = "strlen";
+
+	fn = LLVMGetNamedFunction(mod, nm);
+	if (fn)
+		return fn;
+
+	param_types[0] = LLVMPointerType(LLVMInt8Type(), 0);
+	sig = LLVMFunctionType(TypeSizeT, param_types, 1, 0);
+	fn = LLVMAddFunction(mod, nm, sig);
+
+	return fn;
+}
+
+
+LLVMValueRef
+slot_compile_deform(LLVMJitContext *context, TupleDesc desc, int natts)
+{
+	static int deformcounter = 0;
+	char *funcname;
+
+	LLVMModuleRef mod;
+	LLVMBuilderRef builder;
+
+	LLVMTypeRef deform_sig;
+	LLVMValueRef deform_fn;
+
+	LLVMBasicBlockRef entry;
+	LLVMBasicBlockRef outblock;
+	LLVMBasicBlockRef deadblock;
+	LLVMBasicBlockRef *attcheckattnoblocks;
+	LLVMBasicBlockRef *attstartblocks;
+	LLVMBasicBlockRef *attisnullblocks;
+	LLVMBasicBlockRef *attcheckalignblocks;
+	LLVMBasicBlockRef *attalignblocks;
+	LLVMBasicBlockRef *attstoreblocks;
+	LLVMBasicBlockRef *attoutblocks;
+
+	LLVMValueRef l_varsize_any;
+	LLVMValueRef l_strlen;
+
+	LLVMValueRef v_tupdata_base;
+	LLVMValueRef v_off, v_off_inc, v_off_start;
+	LLVMValueRef v_tts_values;
+	LLVMValueRef v_tts_nulls;
+	LLVMValueRef v_slotoffp;
+	LLVMValueRef v_nvalidp, v_nvalid;
+	LLVMValueRef v_maxatt;
+
+	LLVMValueRef v_slot;
+
+	LLVMValueRef v_tupleheaderp;
+	LLVMValueRef v_tuplep;
+	LLVMValueRef v_infomask1;
+	//LLVMValueRef v_infomask2;
+	LLVMValueRef v_bits;
+
+	LLVMValueRef v_hoff;
+	//LLVMValueRef v_natts;
+
+	LLVMValueRef v_hasnulls;
+
+
+	int attnum;
+	int attcuralign = 0;
+	bool lastcouldbenull = false;
+
+	llvm_initialize();
+
+	mod = context->module;
+	if (!mod)
+	{
+		context->compiled = false;
+		mod = context->module = LLVMModuleCreateWithName("deform");
+		LLVMSetTarget(mod, llvm_triple);
+	}
+
+	funcname = psprintf("deform%d", context->counter++);
+	deformcounter++;
+
+	/* Create the signature and function */
+	{
+		LLVMTypeRef param_types[] = {
+			LLVMPointerType(StructTupleTableSlot, 0),
+			LLVMInt16Type()};
+		deform_sig = LLVMFunctionType(LLVMVoidType(), param_types,
+									  lengthof(param_types), 0);
+	}
+	deform_fn = LLVMAddFunction(mod, funcname, deform_sig);
+	LLVMSetLinkage(deform_fn, LLVMInternalLinkage);
+	LLVMSetVisibility(deform_fn, LLVMDefaultVisibility);
+	LLVMSetParamAlignment(LLVMGetParam(deform_fn, 0), MAXIMUM_ALIGNOF);
+
+	entry = LLVMAppendBasicBlock(deform_fn, "entry");
+	outblock = LLVMAppendBasicBlock(deform_fn, "out");
+	deadblock = LLVMAppendBasicBlock(deform_fn, "deadblock");
+	builder = LLVMCreateBuilder();
+
+	attcheckattnoblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attstartblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attisnullblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attcheckalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attalignblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attstoreblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+	attoutblocks = palloc(sizeof(LLVMBasicBlockRef) * natts);
+
+	l_varsize_any = create_varsize_any(mod);
+	l_strlen = create_strlen(mod);
+
+	attcuralign = 0;
+	lastcouldbenull = false;
+
+
+	LLVMPositionBuilderAtEnd(builder, entry);
+
+	v_slot = LLVMGetParam(deform_fn, 0);
+
+	v_tts_values = LLVMBuildLoad(builder,
+						   LLVMBuildStructGEP(builder, v_slot, 10, ""),
+						   "tts_values");
+	v_tts_nulls = LLVMBuildLoad(builder,
+							LLVMBuildStructGEP(builder, v_slot, 11, ""),
+							"tts_isnull");
+	v_slotoffp = LLVMBuildStructGEP(builder, v_slot, 14, "");
+	v_nvalidp = LLVMBuildStructGEP(builder, v_slot, 9, "");
+
+	v_tupleheaderp = LLVMBuildLoad(
+		builder,
+		LLVMBuildStructGEP(builder, v_slot, 5, ""),
+		"tupleheader");
+	v_tuplep = LLVMBuildLoad(
+		builder,
+		LLVMBuildStructGEP(builder, v_tupleheaderp, 3, ""),
+		"tuple");
+	v_bits = LLVMBuildBitCast(
+		builder,
+		LLVMBuildStructGEP(builder, v_tuplep, 5, "t_bits"),
+		LLVMPointerType(LLVMInt8Type(), 0),
+		"");
+
+	v_infomask1 =
+		LLVMBuildLoad(builder,
+					  LLVMBuildStructGEP(builder, v_tuplep, 3, ""),
+					  "infomask");
+	//(tuple)->t_data->t_infomask & HEAP_HASNULL
+	v_hasnulls =
+		LLVMBuildICmp(builder, LLVMIntNE,
+					  LLVMBuildAnd(builder,
+								   LLVMConstInt(LLVMInt16Type(), HEAP_HASNULL, false),
+								   v_infomask1, ""),
+					  LLVMConstInt(LLVMInt16Type(), 0, false),
+					  "hasnulls");
+
+	v_hoff = LLVMBuildLoad(
+		builder,
+		LLVMBuildStructGEP(
+			builder,
+			v_tuplep,
+			4,
+			""),
+		"t_hoff");
+
+	v_tupdata_base = LLVMBuildGEP(
+		builder,
+		LLVMBuildBitCast(
+			builder,
+			v_tuplep,
+			LLVMPointerType(LLVMInt8Type(), 0),
+			""),
+		&v_hoff, 1,
+		"v_tupdata_base");
+
+	v_off_start = LLVMBuildLoad(
+		builder,
+		LLVMBuildStructGEP(
+			builder,
+			v_slot,
+			14,
+			""),
+		"v_slot_off");
+
+	v_off_inc = v_off = v_off_start;
+
+	v_maxatt = LLVMGetParam(deform_fn, 1);
+
+	/* build the basic block for each attribute, need them as jump target */
+	for (attnum = 0; attnum < natts; attnum++)
+	{
+		char *blockname;
+
+		blockname = psprintf("block.attr.%d.attcheckattno", attnum);
+		attcheckattnoblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.start", attnum);
+		attstartblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.attisnull", attnum);
+		attisnullblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.attcheckalign", attnum);
+		attcheckalignblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.align", attnum);
+		attalignblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.store", attnum);
+		attstoreblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+		blockname = psprintf("block.attr.%d.out", attnum);
+		attoutblocks[attnum] = LLVMAppendBasicBlock(deform_fn, blockname);
+		pfree(blockname);
+	}
+
+	v_nvalid = LLVMBuildLoad(builder, v_nvalidp, "");
+
+	/* build switch to go from nvalid to the right startblock */
+	if (true)
+	{
+		LLVMValueRef v_switch = LLVMBuildSwitch(builder, v_nvalid,
+												deadblock, natts);
+		for (attnum = 0; attnum < natts; attnum++)
+		{
+			LLVMValueRef v_attno = LLVMConstInt(LLVMInt32Type(), attnum, false);
+			LLVMAddCase(v_switch, v_attno, attstartblocks[attnum]);
+		}
+
+	}
+	else
+	{
+		/* jump from entry block to first block */
+		LLVMBuildBr(builder, attstartblocks[0]);
+	}
+
+	LLVMPositionBuilderAtEnd(builder, deadblock);
+	LLVMBuildUnreachable(builder);
+
+	for (attnum = 0; attnum < natts; attnum++)
+	{
+		Form_pg_attribute att = TupleDescAttr(desc, attnum);
+		LLVMValueRef incby;
+		int alignto;
+		LLVMValueRef l_attno = LLVMConstInt(LLVMInt32Type(), attnum, false);
+		LLVMValueRef v_attdatap;
+		LLVMValueRef v_resultp;
+		LLVMValueRef v_islast;
+
+		/* build block checking whether we did all the necessary attributes */
+		LLVMPositionBuilderAtEnd(builder, attcheckattnoblocks[attnum]);
+
+		/*
+		 * Build phi node, unless first block. This can be reached from:
+		 * - store block of last attribute
+		 * - start block of last attribute if null
+		 */
+		if (lastcouldbenull)
+		{
+			LLVMValueRef incoming_values[] =
+				{v_off, v_off_inc};
+			LLVMBasicBlockRef incoming_blocks[] =
+				{attisnullblocks[attnum - 1], attstoreblocks[attnum - 1]};
+			v_off = LLVMBuildPhi(builder, LLVMInt32Type(), "off");
+			LLVMAddIncoming(v_off,
+							incoming_values, incoming_blocks,
+							lengthof(incoming_blocks));
+		}
+		else
+		{
+			v_off = v_off_inc;
+		}
+
+		/* check if done */
+		v_islast = LLVMBuildICmp(builder, LLVMIntEQ,
+								 LLVMConstInt(LLVMInt16Type(), attnum, false),
+								 v_maxatt, "");
+		LLVMBuildCondBr(
+			builder,
+			v_islast,
+			attoutblocks[attnum], attstartblocks[attnum]);
+
+		/* build block to jump out */
+		LLVMPositionBuilderAtEnd(builder, attoutblocks[attnum]);
+		LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), attnum, false), v_nvalidp);
+		LLVMBuildStore(builder, v_off, v_slotoffp);
+		LLVMBuildRetVoid(builder);
+
+		LLVMPositionBuilderAtEnd(builder, attstartblocks[attnum]);
+
+		/*
+		 * This block can be reached because
+		 * - we've been directly jumped through to continue deforming
+		 * - this attribute's checkattno block
+		 * Build the appropriate phi node.
+		 */
+		{
+			LLVMValueRef incoming_values[] =
+				{v_off_start, v_off};
+			LLVMBasicBlockRef incoming_blocks[] =
+				{entry, attcheckattnoblocks[attnum]};
+
+			v_off = LLVMBuildPhi(builder, LLVMInt32Type(), "off");
+			LLVMAddIncoming(v_off,
+							incoming_values, incoming_blocks,
+							lengthof(incoming_blocks));
+		}
+
+		/* check for nulls if necessary */
+		if (!att->attnotnull)
+		{
+			LLVMBasicBlockRef blockifnotnull;
+			LLVMBasicBlockRef blockifnull;
+			LLVMBasicBlockRef blocknext;
+			LLVMValueRef attisnull;
+			LLVMValueRef v_nullbyteno;
+			LLVMValueRef v_nullbytemask;
+			LLVMValueRef v_nullbyte;
+			LLVMValueRef v_nullbit;
+
+			blockifnotnull = attcheckalignblocks[attnum];
+			blockifnull = attisnullblocks[attnum];
+
+			if (attnum + 1 == natts)
+				blocknext = outblock;
+			else
+				blocknext = attcheckattnoblocks[attnum + 1];
+
+			/* FIXME: replace with neg */
+			v_nullbyteno = LLVMConstInt(LLVMInt32Type(), attnum >> 3, false);
+			v_nullbytemask = LLVMConstInt(LLVMInt8Type(), 1 << ((attnum) & 0x07), false);
+			v_nullbyte = LLVMBuildLoad(
+				builder,
+				LLVMBuildGEP(builder, v_bits,
+							 &v_nullbyteno, 1, ""),
+				"attnullbyte");
+
+			v_nullbit = LLVMBuildICmp(
+				builder,
+				LLVMIntEQ,
+				LLVMBuildAnd(builder, v_nullbyte, v_nullbytemask, ""),
+				LLVMConstInt(LLVMInt8Type(), 0, false),
+				"attisnull");
+
+			attisnull = LLVMBuildAnd(builder, v_hasnulls, v_nullbit, "");
+
+			LLVMBuildCondBr(builder, attisnull, blockifnull, blockifnotnull);
+
+			LLVMPositionBuilderAtEnd(builder, blockifnull);
+
+			/* store null-byte */
+			LLVMBuildStore(builder,
+						   LLVMConstInt(LLVMInt8Type(), 1, false),
+						   LLVMBuildGEP(builder, v_tts_nulls, &l_attno, 1, ""));
+			/* store zero datum */
+			LLVMBuildStore(builder,
+						   LLVMConstInt(TypeSizeT, 0, false),
+						   LLVMBuildGEP(builder, v_tts_values, &l_attno, 1, ""));
+
+			LLVMBuildBr(builder, blocknext);
+
+			lastcouldbenull = true;
+		}
+		else
+		{
+			LLVMBuildBr(builder, attcheckalignblocks[attnum]);
+			lastcouldbenull = false;
+
+			/* yuck, dirty hack */
+			LLVMPositionBuilderAtEnd(builder, attisnullblocks[attnum]);
+			LLVMBuildBr(builder, attcheckalignblocks[attnum]);
+		}
+		LLVMPositionBuilderAtEnd(builder, attcheckalignblocks[attnum]);
+
+		/* perform alignment */
+		if (att->attalign == 'i')
+		{
+			alignto = ALIGNOF_INT;
+		}
+		else if (att->attalign == 'c')
+		{
+			alignto = 1;
+		}
+		else if (att->attalign == 'd')
+		{
+			alignto = ALIGNOF_DOUBLE;
+		}
+		else if (att->attalign == 's')
+		{
+			alignto = ALIGNOF_SHORT;
+		}
+		else
+		{
+			elog(ERROR, "unknown alignment");
+			alignto = 0;
+		}
+
+		if ((alignto > 1 &&
+			 (attcuralign < 0 || attcuralign != TYPEALIGN(alignto, attcuralign))))
+		{
+			LLVMValueRef v_off_aligned;
+			bool conditional_alignment;
+
+			/*
+			 * If varlena, do only alignment if not short varlena. Check if
+			 * the byte is padding for that.
+			 */
+			if (att->attlen == -1)
+			{
+				LLVMValueRef possible_padbyte;
+				LLVMValueRef ispad;
+				possible_padbyte =
+					LLVMBuildLoad(builder,
+								  LLVMBuildGEP(builder, v_tupdata_base, &v_off, 1, ""),
+								  "padbyte");
+				ispad =
+					LLVMBuildICmp(builder, LLVMIntEQ, possible_padbyte,
+								  LLVMConstInt(LLVMInt8Type(), 0, false),
+								  "ispadbyte");
+				LLVMBuildCondBr(builder, ispad,
+								attalignblocks[attnum],
+								attstoreblocks[attnum]);
+				conditional_alignment = true;
+			}
+			else
+			{
+				LLVMBuildBr(builder, attalignblocks[attnum]);
+				conditional_alignment = false;
+			}
+
+			LLVMPositionBuilderAtEnd(builder, attalignblocks[attnum]);
+
+			{
+				/* translation of alignment code (cf TYPEALIGN()) */
+
+				/* ((ALIGNVAL) - 1) */
+				LLVMValueRef alignval = LLVMConstInt(LLVMInt32Type(), alignto - 1, false);
+				/* ((uintptr_t) (LEN) + ((ALIGNVAL) - 1)) */
+				LLVMValueRef lh = LLVMBuildAdd(builder, v_off, alignval, "");
+				/* ~((uintptr_t) ((ALIGNVAL) - 1))*/
+				LLVMValueRef rh = LLVMConstInt(LLVMInt32Type(), ~(alignto - 1), false);
+
+				v_off_aligned = LLVMBuildAnd(builder, lh, rh, "aligned_offset");
+			}
+
+			LLVMBuildBr(builder, attstoreblocks[attnum]);
+			LLVMPositionBuilderAtEnd(builder, attstoreblocks[attnum]);
+
+			if (conditional_alignment)
+			{
+				LLVMValueRef incoming_values[] =
+					{v_off, v_off_aligned};
+				LLVMBasicBlockRef incoming_blocks[] =
+					{attcheckalignblocks[attnum], attalignblocks[attnum]};
+				v_off_inc = LLVMBuildPhi(builder, LLVMInt32Type(), "");
+				LLVMAddIncoming(v_off_inc,
+								incoming_values, incoming_blocks,
+								lengthof(incoming_values));
+			}
+			else
+			{
+				v_off_inc = v_off_aligned;
+			}
+		}
+		else
+		{
+			LLVMPositionBuilderAtEnd(builder, attcheckalignblocks[attnum]);
+			LLVMBuildBr(builder, attalignblocks[attnum]);
+			LLVMPositionBuilderAtEnd(builder, attalignblocks[attnum]);
+			LLVMBuildBr(builder, attstoreblocks[attnum]);
+			v_off_inc = v_off;
+		}
+		LLVMPositionBuilderAtEnd(builder, attstoreblocks[attnum]);
+
+
+		/* compute what following columns are aligned to */
+		if (att->attlen < 0)
+		{
+			/* can't guarantee any alignment after varlen field */
+			attcuralign = -1;
+		}
+		else if (att->attnotnull && attcuralign >= 0)
+		{
+			Assert(att->attlen > 0);
+			attcuralign += att->attlen;
+		}
+		else if (att->attnotnull)
+		{
+			/*
+			 * After a NOT NULL fixed-width column, alignment is
+			 * guaranteed to be the minimum of the forced alignment and
+			 * length.  XXX
+			 */
+			attcuralign = alignto + att->attlen;
+			Assert(attcuralign > 0);
+		}
+		else
+		{
+			//elog(LOG, "attnotnullreset: %d", attnum);
+			attcuralign = -1;
+		}
+
+		/* compute address to load data from */
+		v_attdatap =
+			LLVMBuildGEP(builder, v_tupdata_base, &v_off_inc, 1, "");
+
+		/* compute address to store value at */
+		v_resultp = LLVMBuildGEP(builder, v_tts_values, &l_attno, 1, "");
+
+		/* store null-byte (false) */
+		LLVMBuildStore(builder,
+					   LLVMConstInt(LLVMInt8Type(), 0, false),
+					   LLVMBuildGEP(builder, v_tts_nulls, &l_attno, 1, ""));
+
+		if (att->attbyval)
+		{
+			LLVMValueRef tmp_loaddata;
+			LLVMTypeRef vartypep =
+				LLVMPointerType(LLVMIntType(att->attlen*8), 0);
+			tmp_loaddata =
+				LLVMBuildPointerCast(builder, v_attdatap, vartypep, "");
+			tmp_loaddata = LLVMBuildLoad(builder, tmp_loaddata, "attr_byval");
+			tmp_loaddata = LLVMBuildZExt(builder, tmp_loaddata, TypeSizeT, "");
+
+			LLVMBuildStore(builder, tmp_loaddata, v_resultp);
+		}
+		else
+		{
+			LLVMValueRef tmp_loaddata;
+
+			/* store pointer */
+			tmp_loaddata =
+				LLVMBuildPtrToInt(builder,
+								  v_attdatap,
+								  TypeSizeT,
+								  "attr_ptr");
+			LLVMBuildStore(builder, tmp_loaddata, v_resultp);
+		}
+
+		/* increment data pointer */
+		if (att->attlen > 0)
+		{
+			incby = LLVMConstInt(LLVMInt32Type(), att->attlen, false);
+		}
+		else if (att->attlen == -1)
+		{
+			incby =
+				LLVMBuildCall(builder, l_varsize_any,
+							  &v_attdatap, 1,
+							  "varsize_any");
+			{
+				char argname[] = "readonly";
+				LLVMAttributeRef ref =
+					LLVMCreateStringAttribute(LLVMGetGlobalContext(), argname, strlen(argname), NULL, 0);
+				LLVMAddCallSiteAttribute(incby, LLVMAttributeFunctionIndex, ref);
+			}
+			incby = LLVMBuildTrunc(builder, incby,
+								   LLVMInt32Type(), "");
+		}
+		else if (att->attlen == -2)
+		{
+			incby = LLVMBuildCall(builder, l_strlen, &v_attdatap, 1, "strlen");
+			incby = LLVMBuildTrunc(builder, incby,
+								   LLVMInt32Type(), "");
+			/* add 1 for NULL byte */
+			incby =
+				LLVMBuildAdd(builder, incby,
+							 LLVMConstInt(LLVMInt32Type(), 1, false), "");
+		}
+		else
+		{
+			Assert(false);
+			incby = NULL; /* silence compiler */
+		}
+
+		v_off_inc = LLVMBuildAdd(builder, v_off_inc, incby, "increment_offset");
+
+		/*
+		 * jump to next block, unless last possible column, or all desired
+		 * (available) attributes have been fetched.
+		 */
+		if (attnum + 1 == natts)
+		{
+			LLVMBuildBr(builder, outblock);
+		}
+		else
+		{
+			LLVMBuildBr(builder, attcheckattnoblocks[attnum + 1]);
+		}
+	}
+
+	/* jump out */
+	LLVMPositionBuilderAtEnd(builder, outblock);
+	LLVMBuildStore(builder, LLVMBuildZExt(builder, v_maxatt, LLVMInt32Type(), ""), v_nvalidp);
+	LLVMBuildStore(builder, v_off, v_slotoffp);
+	LLVMBuildRetVoid(builder);
+
+	LLVMDisposeBuilder(builder);
+
+	return deform_fn;
+}
+#endif
diff --git a/src/backend/executor/execExprCompile.c b/src/backend/executor/execExprCompile.c
index d41405b648..79b3ebd6c4 100644
--- a/src/backend/executor/execExprCompile.c
+++ b/src/backend/executor/execExprCompile.c
@@ -504,23 +504,37 @@ ExecReadyCompiledExpr(ExprState *state, PlanState *parent)
 			case EEOP_OUTER_FETCHSOME:
 			case EEOP_SCAN_FETCHSOME:
 				{
+					TupleDesc desc = NULL;
 					LLVMValueRef v_slot;
 					LLVMBasicBlockRef b_fetch = LLVMInsertBasicBlock(opblocks[i + 1], "");
 					LLVMValueRef v_nvalid;
 
 					if (op->opcode == EEOP_INNER_FETCHSOME)
 					{
+						PlanState *is = innerPlanState(parent);
 
 						v_slot = v_innerslot;
 
+						if (is &&
+							is->ps_ResultTupleSlot &&
+							is->ps_ResultTupleSlot->tts_fixedTupleDescriptor)
+							desc = is->ps_ResultTupleSlot->tts_tupleDescriptor;
 					}
 					else if (op->opcode == EEOP_OUTER_FETCHSOME)
 					{
+						PlanState *os = outerPlanState(parent);
+
 						v_slot = v_outerslot;
+
+						if (os &&
+							os->ps_ResultTupleSlot &&
+							os->ps_ResultTupleSlot->tts_fixedTupleDescriptor)
+							desc = os->ps_ResultTupleSlot->tts_tupleDescriptor;
 					}
 					else
 					{
 						v_slot = v_scanslot;
+						desc = parent ?  parent->scandesc : NULL;
 					}
 
 					/*
@@ -539,6 +553,28 @@ ExecReadyCompiledExpr(ExprState *state, PlanState *parent)
 						opblocks[i + 1], b_fetch);
 
 					LLVMPositionBuilderAtEnd(builder, b_fetch);
+
+					/*
+					 * If the tupledesc of the to-be-deformed tuple is known,
+					 * and JITing of deforming is enabled, build deform
+					 * function specific to tupledesc and the exact number of
+					 * to-be-extracted attributes.
+					 */
+					if (desc && jit_tuple_deforming)
+					{
+						LLVMValueRef params[2];
+						LLVMValueRef l_jit_deform;
+
+						l_jit_deform = slot_compile_deform(context,
+														   desc,
+														   op->d.fetch.last_var);
+						params[0] = v_slot;
+						params[1] = LLVMConstInt(LLVMInt16Type(), op->d.fetch.last_var, false);
+
+						LLVMBuildCall(builder, l_jit_deform, params, lengthof(params), "");
+
+					}
+					else
 					{
 						LLVMValueRef params[2];
 
diff --git a/src/backend/executor/execTuples.c b/src/backend/executor/execTuples.c
index 78ec871f50..e5568b922e 100644
--- a/src/backend/executor/execTuples.c
+++ b/src/backend/executor/execTuples.c
@@ -121,6 +121,7 @@ MakeTupleTableSlot(TupleDesc tupleDesc)
 	slot->tts_mcxt = CurrentMemoryContext;
 	slot->tts_buffer = InvalidBuffer;
 	slot->tts_nvalid = 0;
+	slot->tts_off = 0;
 	slot->tts_values = NULL;
 	slot->tts_isnull = NULL;
 	slot->tts_mintuple = NULL;
@@ -358,6 +359,7 @@ ExecStoreTuple(HeapTuple tuple,
 
 	/* Mark extracted state invalid */
 	slot->tts_nvalid = 0;
+	slot->tts_off = 0;
 
 	/*
 	 * If tuple is on a disk page, keep the page pinned as long as we hold a
@@ -431,6 +433,7 @@ ExecStoreMinimalTuple(MinimalTuple mtup,
 
 	/* Mark extracted state invalid */
 	slot->tts_nvalid = 0;
+	slot->tts_off = 0;
 
 	return slot;
 }
@@ -477,6 +480,7 @@ ExecClearTuple(TupleTableSlot *slot)	/* slot in which to store tuple */
 	 */
 	slot->tts_isempty = true;
 	slot->tts_nvalid = 0;
+	slot->tts_off = 0;
 
 	return slot;
 }
@@ -776,6 +780,7 @@ ExecMaterializeSlot(TupleTableSlot *slot)
 	 * that we have not pfree'd tts_mintuple, if there is one.)
 	 */
 	slot->tts_nvalid = 0;
+	slot->tts_off = 0;
 
 	/*
 	 * On the same principle of not depending on previous remote storage,
diff --git a/src/backend/lib/llvmjit.c b/src/backend/lib/llvmjit.c
index 460cb6b325..e05fe2dd72 100644
--- a/src/backend/lib/llvmjit.c
+++ b/src/backend/lib/llvmjit.c
@@ -293,7 +293,7 @@ llvm_create_types(void)
 		members[11] = LLVMPointerType(LLVMInt8Type(), 0); /* nulls */
 		members[12] = LLVMPointerType(StructMinimalTupleData, 0); /* mintuple */
 		members[13] = StructHeapTupleData; /* minhdr */
-		members[14] = LLVMInt64Type(); /* off: FIXME, deterministic type, not long */
+		members[14] = LLVMInt32Type(); /* off */
 
 		StructTupleTableSlot = LLVMStructCreateNamed(LLVMGetGlobalContext(),
 													 "struct.TupleTableSlot");
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 9a80ecedc2..4cc9f305a2 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -41,6 +41,7 @@
 #include "commands/vacuum.h"
 #include "commands/variable.h"
 #include "commands/trigger.h"
+#include "executor/executor.h"
 #include "funcapi.h"
 #include "lib/llvmjit.h"
 #include "libpq/auth.h"
@@ -1031,6 +1032,17 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		{"jit_tuple_deforming", PGC_USERSET, DEVELOPER_OPTIONS,
+			gettext_noop("just-in-time compile tuple deforming"),
+			NULL,
+			GUC_NOT_IN_SAMPLE
+		},
+		&jit_tuple_deforming,
+		false,
+		NULL, NULL, NULL
+	},
+
 #endif
 
 	{
diff --git a/src/include/executor/executor.h b/src/include/executor/executor.h
index 4de4bf4035..ab2df96ca0 100644
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -91,6 +91,7 @@ extern PGDLLIMPORT ExecutorCheckPerms_hook_type ExecutorCheckPerms_hook;
 /* GUC variables for JITing */
 #ifdef USE_LLVM
 extern bool jit_expressions;
+extern bool jit_tuple_deforming;
 #endif
 
 /*
diff --git a/src/include/executor/tuptable.h b/src/include/executor/tuptable.h
index 6c24fd334d..475b2bdcef 100644
--- a/src/include/executor/tuptable.h
+++ b/src/include/executor/tuptable.h
@@ -126,7 +126,7 @@ typedef struct TupleTableSlot
 	bool	   *tts_isnull;		/* current per-attribute isnull flags */
 	MinimalTuple tts_mintuple;	/* minimal tuple, or NULL if none */
 	HeapTupleData tts_minhdr;	/* workspace for minimal-tuple-only case */
-	long		tts_off;		/* saved state for slot_deform_tuple */
+	int32		tts_off;		/* saved state for slot_deform_tuple */
 	bool		tts_fixedTupleDescriptor;
 } TupleTableSlot;
 
diff --git a/src/include/lib/llvmjit.h b/src/include/lib/llvmjit.h
index 9711d398ca..61d7c67d6f 100644
--- a/src/include/lib/llvmjit.h
+++ b/src/include/lib/llvmjit.h
@@ -9,6 +9,7 @@
 #undef PM
 
 #include "nodes/pg_list.h"
+#include "access/tupdesc.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Core.h>
@@ -70,6 +71,8 @@ extern void llvm_shutdown_perf_support(LLVMExecutionEngineRef EE);
 extern void llvm_perf_orc_support(LLVMOrcJITStackRef llvm_orc);
 extern void llvm_shutdown_orc_perf_support(LLVMOrcJITStackRef llvm_orc);
 
+extern LLVMValueRef slot_compile_deform(struct LLVMJitContext *context, TupleDesc desc, int natts);
+
 #else
 
 struct LLVMJitContext;
@@ -79,4 +82,7 @@ typedef struct LLVMJitContext LLVMJitContext;
 
 extern void llvm_release_handle(ResourceOwner resowner, Datum handle);
 
+
+struct LLVMJitContext;
+
 #endif /* LLVMJIT_H */
-- 
2.14.1.2.g4274c698f4.dirty

