From 123531059ea12c3b480d0987cbb48f1bf134aca7 Mon Sep 17 00:00:00 2001
From: Anton Bacaj <abacaj1@gmail.com>
Date: Mon, 26 Jun 2023 05:51:40 +0000
Subject: [PATCH] remove hard coded thread count

---
 inference.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/inference.py b/inference.py
index 13b1658..9a2fa7b 100644
--- a/inference.py
+++ b/inference.py
@@ -1,3 +1,4 @@
+import os
 from ctransformers import AutoModelForCausalLM, AutoConfig
 
 
@@ -31,7 +32,7 @@ def generate(llm, system_prompt, user_prompt):
         seed=42,
         reset=True,  # reset history (cache)
         stream=True,  # streaming per word/token
-        threads=24,  # adjust for your CPU
+        threads=os.cpu_count() / 2,  # adjust for your CPU
         stop=["<|im_end|>", "|<"],
     )