From 08123f75f72d5a5fcfbc9a2842a48cd876617c66 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 16:45:29 -0700 Subject: [PATCH 1/6] Add tests --- .github/workflows/test_groq.yml | 112 ++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 .github/workflows/test_groq.yml diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml new file mode 100644 index 0000000000..e62b5e0d27 --- /dev/null +++ b/.github/workflows/test_groq.yml @@ -0,0 +1,112 @@ +name: Groq Llama 3.1 70b Capabilities Test + +env: + OPENAI_API_KEY: ${{ secrets.GROQ_API_KEY }} + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: "Setup Python, Poetry and Dependencies" + uses: packetcoders/action-setup-cache-python-poetry@main + with: + python-version: "3.12" + poetry-version: "1.8.2" + install-args: "-E dev -E external-tools" + + - name: Test first message contains expected function call and inner monologue + id: test_first_message + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message + echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test model sends message with keyword + id: test_keyword_message + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword + echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test model uses external tool correctly + id: test_external_tool + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool + echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test model recalls chat memory + id: test_chat_memory + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory + echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test model uses 'archival_memory_search' to find secret + id: test_archival_memory + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval + echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test model can edit core memories + id: test_core_memory + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory + echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Test embedding endpoint + id: test_embedding_endpoint + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + run: | + poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai + echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV + continue-on-error: true + + - name: Summarize test results + if: always() + run: | + echo "Test Results Summary:" + echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test model uses external tool: $([[ $TEST_EXTERNAL_TOOL_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test model recalls chat memory: $([[ $TEST_CHAT_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test model uses 'archival_memory_search' to find secret: $([[ $TEST_ARCHIVAL_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test model can edit core memories: $([[ $TEST_CORE_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + echo "Test embedding endpoint: $([[ $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" + + # Check if any test failed + if [[ $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \ + $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \ + $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \ + $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \ + $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \ + $TEST_CORE_MEMORY_EXIT_CODE -ne 0 || \ + $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -ne 0 ]]; then + echo "Some tests failed." + exit 78 + fi From cd27aa8a9184e7813a10d9d078f8cebae19faf57 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 16:46:50 -0700 Subject: [PATCH 2/6] Add groq key to CI --- .github/workflows/test_groq.yml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index e62b5e0d27..2aa117ef4c 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -1,7 +1,7 @@ name: Groq Llama 3.1 70b Capabilities Test env: - OPENAI_API_KEY: ${{ secrets.GROQ_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} on: push: @@ -27,7 +27,7 @@ jobs: - name: Test first message contains expected function call and inner monologue id: test_first_message env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_valid_first_message echo "TEST_FIRST_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV @@ -36,7 +36,7 @@ jobs: - name: Test model sends message with keyword id: test_keyword_message env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_returns_keyword echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$?" >> $GITHUB_ENV @@ -45,7 +45,7 @@ jobs: - name: Test model uses external tool correctly id: test_external_tool env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_uses_external_tool echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$?" >> $GITHUB_ENV @@ -54,7 +54,7 @@ jobs: - name: Test model recalls chat memory id: test_chat_memory env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_recall_chat_memory echo "TEST_CHAT_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV @@ -63,7 +63,7 @@ jobs: - name: Test model uses 'archival_memory_search' to find secret id: test_archival_memory env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_archival_memory_retrieval echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV @@ -72,7 +72,7 @@ jobs: - name: Test model can edit core memories id: test_core_memory env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_groq_llama31_70b_edit_core_memory echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV @@ -81,7 +81,7 @@ jobs: - name: Test embedding endpoint id: test_embedding_endpoint env: - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} run: | poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV From 9cb8d0768bdfe8a69ec67484726fb8fdc1eee21a Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 16:48:13 -0700 Subject: [PATCH 3/6] Remove extraneous embedding test --- .github/workflows/test_groq.yml | 9 --------- 1 file changed, 9 deletions(-) diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index 2aa117ef4c..9aed5d89d9 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -78,15 +78,6 @@ jobs: echo "TEST_CORE_MEMORY_EXIT_CODE=$?" >> $GITHUB_ENV continue-on-error: true - - name: Test embedding endpoint - id: test_embedding_endpoint - env: - GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} - run: | - poetry run pytest -s -vv tests/test_endpoints.py::test_embedding_endpoint_openai - echo "TEST_EMBEDDING_ENDPOINT_EXIT_CODE=$?" >> $GITHUB_ENV - continue-on-error: true - - name: Summarize test results if: always() run: | From e2458b7671d1691a9785dc8a57039e1ec4c89bd8 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 16:59:58 -0700 Subject: [PATCH 4/6] Add echo debugs --- .github/workflows/test_groq.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index 9aed5d89d9..3b0b9319ea 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -81,6 +81,14 @@ jobs: - name: Summarize test results if: always() run: | + # Print the values of the exit codes for debugging + echo "TEST_FIRST_MESSAGE_EXIT_CODE=$TEST_FIRST_MESSAGE_EXIT_CODE" + echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$TEST_KEYWORD_MESSAGE_EXIT_CODE" + echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$TEST_EXTERNAL_TOOL_EXIT_CODE" + echo "TEST_CHAT_MEMORY_EXIT_CODE=$TEST_CHAT_MEMORY_EXIT_CODE" + echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$TEST_ARCHIVAL_MEMORY_EXIT_CODE" + echo "TEST_CORE_MEMORY_EXIT_CODE=$TEST_CORE_MEMORY_EXIT_CODE" + echo "Test Results Summary:" echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" From 39a8784d47cd77aaf5938ad16d6d2a8539834359 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 17:05:41 -0700 Subject: [PATCH 5/6] Try summary again --- .github/workflows/test_groq.yml | 40 +++++++++++++-------------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index 3b0b9319ea..4f26f81973 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -81,31 +81,23 @@ jobs: - name: Summarize test results if: always() run: | - # Print the values of the exit codes for debugging - echo "TEST_FIRST_MESSAGE_EXIT_CODE=$TEST_FIRST_MESSAGE_EXIT_CODE" - echo "TEST_KEYWORD_MESSAGE_EXIT_CODE=$TEST_KEYWORD_MESSAGE_EXIT_CODE" - echo "TEST_EXTERNAL_TOOL_EXIT_CODE=$TEST_EXTERNAL_TOOL_EXIT_CODE" - echo "TEST_CHAT_MEMORY_EXIT_CODE=$TEST_CHAT_MEMORY_EXIT_CODE" - echo "TEST_ARCHIVAL_MEMORY_EXIT_CODE=$TEST_ARCHIVAL_MEMORY_EXIT_CODE" - echo "TEST_CORE_MEMORY_EXIT_CODE=$TEST_CORE_MEMORY_EXIT_CODE" - echo "Test Results Summary:" - echo "Test first message: $([[ $TEST_FIRST_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model sends message with keyword: $([[ $TEST_KEYWORD_MESSAGE_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model uses external tool: $([[ $TEST_EXTERNAL_TOOL_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model recalls chat memory: $([[ $TEST_CHAT_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model uses 'archival_memory_search' to find secret: $([[ $TEST_ARCHIVAL_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test model can edit core memories: $([[ $TEST_CORE_MEMORY_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - echo "Test embedding endpoint: $([[ $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -eq 0 ]] && echo ✅ || echo ❌)" - # Check if any test failed - if [[ $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \ - $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \ - $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \ - $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_CORE_MEMORY_EXIT_CODE -ne 0 || \ - $TEST_EMBEDDING_ENDPOINT_EXIT_CODE -ne 0 ]]; then + # If the exit code is empty, treat it as a failure (❌) + echo "Test first message: $([[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + echo "Test model sends message with keyword: $([[ -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + echo "Test model uses external tool: $([[ -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + echo "Test model recalls chat memory: $([[ -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + echo "Test model uses 'archival_memory_search' to find secret: $([[ -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + echo "Test model can edit core memories: $([[ -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]] && echo ❌ || echo ✅)" + + # Check if any test failed (either non-zero or unset exit code) + if [[ -z $TEST_FIRST_MESSAGE_EXIT_CODE || $TEST_FIRST_MESSAGE_EXIT_CODE -ne 0 || \ + -z $TEST_KEYWORD_MESSAGE_EXIT_CODE || $TEST_KEYWORD_MESSAGE_EXIT_CODE -ne 0 || \ + -z $TEST_EXTERNAL_TOOL_EXIT_CODE || $TEST_EXTERNAL_TOOL_EXIT_CODE -ne 0 || \ + -z $TEST_CHAT_MEMORY_EXIT_CODE || $TEST_CHAT_MEMORY_EXIT_CODE -ne 0 || \ + -z $TEST_ARCHIVAL_MEMORY_EXIT_CODE || $TEST_ARCHIVAL_MEMORY_EXIT_CODE -ne 0 || \ + -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]]; then echo "Some tests failed." exit 78 - fi + fi` From cf1b77047f552da342da8162985894647f2c3d68 Mon Sep 17 00:00:00 2001 From: Matt Zhou Date: Thu, 10 Oct 2024 17:07:57 -0700 Subject: [PATCH 6/6] Finish tweaking summarize panel --- .github/workflows/test_groq.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_groq.yml b/.github/workflows/test_groq.yml index 4f26f81973..f14da94a82 100644 --- a/.github/workflows/test_groq.yml +++ b/.github/workflows/test_groq.yml @@ -100,4 +100,5 @@ jobs: -z $TEST_CORE_MEMORY_EXIT_CODE || $TEST_CORE_MEMORY_EXIT_CODE -ne 0 ]]; then echo "Some tests failed." exit 78 - fi` + fi + continue-on-error: true