From d3c0bdd23c14f0adffb41f7104ef7f1dd74e1a3a Mon Sep 17 00:00:00 2001
From: Dave Parr <8363743+DaveParr@users.noreply.github.com>
Date: Wed, 28 Feb 2024 15:54:28 +0000
Subject: [PATCH] Add notes on Langchain logging challenges

---
 starpilot/main.py  |  4 ++--
 starpilot/notes.md | 12 ++++++++++++
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/starpilot/main.py b/starpilot/main.py
index 8c68469..197ab04 100644
--- a/starpilot/main.py
+++ b/starpilot/main.py
@@ -186,8 +186,8 @@ def astrologer(
     if not os.path.exists(VECTORSTORE_PATH):
         raise Exception("Please load the stars before shooting")
 
-    OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]  # noqa: F841 rely on langchain to handle this
-    OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]  # noqa: F841 rely on langchain to handle this
+    OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
+    OPENAI_ORG_ID = os.environ["OPENAI_ORG_ID"]
 
     metadata_field_info = [
         # IDEA: create valid specific values on data load for each users content
diff --git a/starpilot/notes.md b/starpilot/notes.md
index 87a3a64..6e9e9ec 100644
--- a/starpilot/notes.md
+++ b/starpilot/notes.md
@@ -128,3 +128,15 @@ which is reported to pylance as:
 
 but actually is runnable and correct?
 
+
+## Langchain has bad local logging becuase it wants you to use langsmith
+
+Langchain the company seemed like they just weren't interested in local logging. `verbose=true` seems to not actually do anything in many cases, and grabbing information out of chains during runtime either gives lots of pointless model card information which is not useful, or forces a process of function wrapping or avoiding LCEL. 
+
+Instead they've been developing an extremely complex cloud observability tool called Langsmith. It is defintely useful, and easy to set up and currently has a generous free personal tier. However it does seem a little disingeous to not have a local logging solution, even if the 'new paradigm' of LLM/AI does pose some challenges to traditional logging.
+
+These challenges seem to be:
+
+logging run detail and then aggregating across a number of runs and parameters is more valuable than logging the detail of a single run
+langchain is effectively a data orchestration tool, so the path of a run can vary significantly even with no code changes
+llm/ai is inherently non-deterministic, so the same code can produce different results
\ No newline at end of file