From 80953c4d16e4c2cc694e79d23381097eb7017a35 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@anarazel.de>
Date: Mon, 31 Oct 2022 22:35:56 -0700
Subject: [PATCH v3 4/4] wip: llvmjit: Plug memory leak when performing
 inlining.

When performing inlining LLVM unfortunately "leaks" types (the types
survive and are usable, but a new round of inlining will recreate new
types that are structurally equivalent). I could not come up with a
backpatchable fix for that, so address the problem by dropping &
recreating the LLVMContextRef that contains all IR related data -
which releases all the types.

That incurs some overhead, so only do so after 10000 JITed queries.

Needs a good bit more work:
- confusing naming between different types of contexts (PG side
  LLVMJitContext, and LLVM side LLVMContextRef)
- add detailed explanation of leak issue
- consider only incrementing the counter triggering the recreation of
  the memory context during inlining.
- several FIXMEs in code

Reported-By: Justin Pryzby <pryzby@telsasoft.com>
Reported-By: Kurt Roeckx <kurt@roeckx.be>
Reported-By: Jaime Casanova <jcasanov@systemguards.com.ec>
Author: Andres Freund
Discussion: https://postgr.es/m/7acc8678-df5f-4923-9cf6-e843131ae89d@www.fastmail.com
Discussion: https://postgr.es/m/20201218235607.GC30237@telsasoft.com
---
 src/include/jit/llvmjit.h               |  8 +++
 src/backend/jit/llvm/llvmjit.c          | 76 ++++++++++++++++++++++++-
 src/backend/jit/llvm/llvmjit_inline.cpp | 12 ++++
 3 files changed, 94 insertions(+), 2 deletions(-)

diff --git a/src/include/jit/llvmjit.h b/src/include/jit/llvmjit.h
index f4c01c28672..d90d7b2e347 100644
--- a/src/include/jit/llvmjit.h
+++ b/src/include/jit/llvmjit.h
@@ -42,6 +42,13 @@ typedef struct LLVMJitContext
 	/* number of modules created */
 	size_t		module_generation;
 
+	/*
+	 * The LLVM Context used by this JIT context. An LLVM context is reused
+	 * across many compilations, but occasionally reset to prevent it using
+	 * too much memory due to more and more types accumulating.
+	 */
+	LLVMContextRef llvm_context;
+
 	/* current, "open for write", module */
 	LLVMModuleRef module;
 
@@ -99,6 +106,7 @@ extern LLVMValueRef llvm_function_reference(LLVMJitContext *context,
 						LLVMModuleRef mod,
 						FunctionCallInfo fcinfo);
 
+extern void llvm_inline_reset_caches(void);
 extern void llvm_inline(LLVMModuleRef mod);
 
 /*
diff --git a/src/backend/jit/llvm/llvmjit.c b/src/backend/jit/llvm/llvmjit.c
index 3625edceb3e..a7e19a69a8b 100644
--- a/src/backend/jit/llvm/llvmjit.c
+++ b/src/backend/jit/llvm/llvmjit.c
@@ -42,6 +42,10 @@
 #include "utils/memutils.h"
 #include "utils/resowner_private.h"
 
+
+#define LLVMJIT_LLVM_CONTEXT_REUSE_MAX 100
+
+
 /* Handle of a module emitted via ORC JIT */
 typedef struct LLVMJitHandle
 {
@@ -81,6 +85,10 @@ static LLVMModuleRef llvm_types_module = NULL;
 
 static bool llvm_session_initialized = false;
 static size_t llvm_generation = 0;
+/* number of LLVMJitContexts that currently are in use */
+static size_t llvm_jit_context_in_use_count = 0;
+/* how many times has the current LLVMContextRef been used */
+static size_t llvm_llvm_context_reuse_count = 0;
 static const char *llvm_triple = NULL;
 static const char *llvm_layout = NULL;
 static LLVMContextRef llvm_context;
@@ -141,6 +149,55 @@ llvm_create_context(int jitFlags)
 
 	llvm_session_initialize();
 
+	/*
+	 * Every now and then create a new LLVMContextRef. Unfortunately, during
+	 * every round of inlining, types may "leak" (they can still be found/used
+	 * via the context, but new types will be created the next time in
+	 * inlining is performed). To prevent that from slowly accumulating
+	 * problematic amounts of memory, recreate the LLVMContextRef we use. We
+	 * don't want to do so too often, as that implies some overhead
+	 * (particularly re-loading the module summaries / modules is fairly
+	 * expensive).
+	 *
+	 * We can only safely recreate the LLVM context if no other code is being
+	 * JITed, otherwise we'd release the types in use for that.
+	 *
+	 * FIXME: Extract into helper function.
+	 */
+	if (llvm_jit_context_in_use_count == 0 &&
+		llvm_llvm_context_reuse_count > LLVMJIT_LLVM_CONTEXT_REUSE_MAX)
+	{
+		llvm_llvm_context_reuse_count = 0;
+
+		Assert(llvm_context != NULL);
+
+		/*
+		 * Need to reset the modules that the inlining code caches before
+		 * disposing of the context. LLVM modules exist within a specific LLVM
+		 * context, therefore disposing of the context before resetting the
+		 * cache would lead to dangling pointers to modules.
+		 */
+		llvm_inline_reset_caches();
+
+		if (llvm_context != NULL)
+			LLVMContextDispose(llvm_context);
+		llvm_context = LLVMContextCreate();
+
+		/*
+		 * Re-build cached type information, so code generation code can rely
+		 * on that information to be present (also prevents the variables to
+		 * be dangling references).
+		 *
+		 * FIXME: should split the handling of llvm_triple / llvm_layout out
+		 * of llvm_create_types() - that doesn't need to be redone.
+		 */
+		llvm_create_types();
+	}
+	else
+	{
+		llvm_llvm_context_reuse_count++;
+	}
+
 	ResourceOwnerEnlargeJIT(CurrentResourceOwner);
 
 	context = MemoryContextAllocZero(TopMemoryContext,
@@ -151,6 +208,8 @@ llvm_create_context(int jitFlags)
 	context->base.resowner = CurrentResourceOwner;
 	ResourceOwnerRememberJIT(CurrentResourceOwner, PointerGetDatum(context));
 
+	llvm_jit_context_in_use_count++;
+
 	return context;
 }
 
@@ -163,6 +222,12 @@ llvm_release_context(JitContext *context)
 	LLVMJitContext *llvm_context = (LLVMJitContext *) context;
 	ListCell   *lc;
 
+	/*
+	 * Consider as cleaned up even if we skip doing so below, that way we can
+	 * verify the tracking is correct (see llvm_shutdown()).
+	 */
+	llvm_jit_context_in_use_count--;
+
 	/*
 	 * When this backend is exiting, don't clean up LLVM. As an error might
 	 * have occurred from within LLVM, we do not want to risk reentering. All
@@ -901,6 +966,10 @@ llvm_shutdown(int code, Datum arg)
 		return;
 	}
 
+	if (llvm_jit_context_in_use_count != 0)
+		elog(PANIC, "LLVMJitContext in use count not 0 at exit (is %zu)",
+			 llvm_jit_context_in_use_count);
+
 #if LLVM_VERSION_MAJOR > 11
 	{
 		if (llvm_opt3_orc)
@@ -1004,8 +1073,11 @@ llvm_create_types(void)
 	 * Load triple & layout from clang emitted file so we're guaranteed to be
 	 * compatible.
 	 */
-	llvm_triple = pstrdup(LLVMGetTarget(llvm_types_module));
-	llvm_layout = pstrdup(LLVMGetDataLayoutStr(llvm_types_module));
+	if (llvm_triple == NULL)
+	{
+		llvm_triple = pstrdup(LLVMGetTarget(llvm_types_module));
+		llvm_layout = pstrdup(LLVMGetDataLayoutStr(llvm_types_module));
+	}
 
 	TypeSizeT = llvm_pg_var_type("TypeSizeT");
 	TypeParamBool = load_return_type(llvm_types_module, "FunctionReturningBool");
diff --git a/src/backend/jit/llvm/llvmjit_inline.cpp b/src/backend/jit/llvm/llvmjit_inline.cpp
index 73fe253f2ec..6b46e141fcb 100644
--- a/src/backend/jit/llvm/llvmjit_inline.cpp
+++ b/src/backend/jit/llvm/llvmjit_inline.cpp
@@ -152,6 +152,18 @@ summaries_for_guid(const InlineSearchPath& path, llvm::GlobalValue::GUID guid);
 #define ilog(...)	(void) 0
 #endif
 
+/*
+ * Reset inlining related state. This needs to be called before the currently
+ * used LLVMContextRef is disposed (and a new one create), otherwise we would
+ * have dangling references to deleted modules.
+ */
+void
+llvm_inline_reset_caches(void)
+{
+	module_cache->clear();
+	summary_cache->clear();
+}
+
 /*
  * Perform inlining of external function references in M based on a simple
  * cost based analysis.
-- 
2.38.0

