From 38f52be1e764d1fe09088722165e4754e1f8f102 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Sun, 8 Sep 2024 19:51:11 -0700 Subject: [PATCH 1/5] cr --- README.md | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 90a1005..96dac67 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,26 @@ # LlamaParse -LlamaParse is an API created by LlamaIndex to efficiently parse and represent files for efficient retrieval and context augmentation using LlamaIndex frameworks. +[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-parse)](https://pypi.org/project/llama-parse/) +[![GitHub contributors](https://img.shields.io/github/contributors/run-llama/llama_parse)](https://github.com/run-llama/llama_parse/graphs/contributors) +[![Discord](https://img.shields.io/discord/1059199217496772688)](https://discord.gg/dGcwcsnxhU) -LlamaParse directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index). -Free plan is up to 1000 pages a day. Paid plan is free 7k pages per week + 0.3c per additional page. +LlamaParse is a **GenAI-native document parser** that can parse complex document data for any downstream LLM use case (RAG, agents). + +It is really good at the following: +✅ **Broad file type support**: Parsing a variety of unstructured file types (.pdf, .pptx, .docx, .xlsx, .html) with text, tables, visual elements, weird layouts, and more. +✅ **Table recognition**: Parsing embedded tables accurately into text and semi-structured representations. +✅ **Multimodal parsing and chunking**: Extracting visual elements (images/diagrams) into structured formats and return image chunks using the latest multimodal models. +✅ **Custom parsing**: Input custom prompt instructions to customize the output the way you want it. + +LlamaParse directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index). -There is a sandbox available to test the API [**https://cloud.llamaindex.ai/parse ↗**](https://cloud.llamaindex.ai/parse). +The free plan is up to 1000 pages a day. Paid plan is free 7k pages per week + 0.3c per additional page by default. There is a sandbox available to test the API [**https://cloud.llamaindex.ai/parse ↗**](https://cloud.llamaindex.ai/parse). Read below for some quickstart information, or see the [full documentation](https://docs.cloud.llamaindex.ai/). +If you're a company interested in enterprise RAG solutions, and/or high volume/on-prem usage of LlamaParse, come [talk to us](https://www.llamaindex.ai/contact). + ## Getting Started First, login and get an api-key from [**https://cloud.llamaindex.ai/api-key ↗**](https://cloud.llamaindex.ai/api-key). @@ -126,3 +137,9 @@ Several end-to-end indexing examples can be found in the examples folder ## Terms of Service See the [Terms of Service Here](./TOS.pdf). + +## Get in Touch (LlamaCloud) + +LlamaParse is part of LlamaCloud, our e2e enterprise RAG platform that provides out-of-the-box, production-ready connectors, indexing, and retrieval over your complex data sources. We offer SaaS and VPC options. + +LlamaCloud is currently available via waitlist (join by [creating an account](https://cloud.llamaindex.ai/)). If you're interested in state-of-the-art quality and in centralizing your RAG efforts, come [get in touch with us](https://www.llamaindex.ai/contact). From b38bcf93cc11df4746cf16f1043c656daab15ff7 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Sun, 8 Sep 2024 19:57:06 -0700 Subject: [PATCH 2/5] cr --- README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 96dac67..79584d5 100644 --- a/README.md +++ b/README.md @@ -4,14 +4,13 @@ [![GitHub contributors](https://img.shields.io/github/contributors/run-llama/llama_parse)](https://github.com/run-llama/llama_parse/graphs/contributors) [![Discord](https://img.shields.io/discord/1059199217496772688)](https://discord.gg/dGcwcsnxhU) - LlamaParse is a **GenAI-native document parser** that can parse complex document data for any downstream LLM use case (RAG, agents). It is really good at the following: -✅ **Broad file type support**: Parsing a variety of unstructured file types (.pdf, .pptx, .docx, .xlsx, .html) with text, tables, visual elements, weird layouts, and more. -✅ **Table recognition**: Parsing embedded tables accurately into text and semi-structured representations. -✅ **Multimodal parsing and chunking**: Extracting visual elements (images/diagrams) into structured formats and return image chunks using the latest multimodal models. -✅ **Custom parsing**: Input custom prompt instructions to customize the output the way you want it. +- ✅ **Broad file type support**: Parsing a variety of unstructured file types (.pdf, .pptx, .docx, .xlsx, .html) with text, tables, visual elements, weird layouts, and more. +- ✅ **Table recognition**: Parsing embedded tables accurately into text and semi-structured representations. +- ✅ **Multimodal parsing and chunking**: Extracting visual elements (images/diagrams) into structured formats and return image chunks using the latest multimodal models. +- ✅ **Custom parsing**: Input custom prompt instructions to customize the output the way you want it. LlamaParse directly integrates with [LlamaIndex](https://github.com/run-llama/llama_index). @@ -140,6 +139,6 @@ See the [Terms of Service Here](./TOS.pdf). ## Get in Touch (LlamaCloud) -LlamaParse is part of LlamaCloud, our e2e enterprise RAG platform that provides out-of-the-box, production-ready connectors, indexing, and retrieval over your complex data sources. We offer SaaS and VPC options. +LlamaParse is part of LlamaCloud, our e2e enterprise RAG platform that provides out-of-the-box, production-ready connectors, indexing, and retrieval over your complex data sources. We offer SaaS and VPC options. LlamaCloud is currently available via waitlist (join by [creating an account](https://cloud.llamaindex.ai/)). If you're interested in state-of-the-art quality and in centralizing your RAG efforts, come [get in touch with us](https://www.llamaindex.ai/contact). From 02aa3d74485fd834e31d34936f38b82d2af81524 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Sun, 8 Sep 2024 20:39:42 -0700 Subject: [PATCH 3/5] cr --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 79584d5..3d5f187 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ LlamaParse is a **GenAI-native document parser** that can parse complex document data for any downstream LLM use case (RAG, agents). It is really good at the following: + - ✅ **Broad file type support**: Parsing a variety of unstructured file types (.pdf, .pptx, .docx, .xlsx, .html) with text, tables, visual elements, weird layouts, and more. - ✅ **Table recognition**: Parsing embedded tables accurately into text and semi-structured representations. - ✅ **Multimodal parsing and chunking**: Extracting visual elements (images/diagrams) into structured formats and return image chunks using the latest multimodal models. From 43fd3e1e1ab9fc0fd8155301071dc49210c1d304 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Mon, 16 Sep 2024 13:58:20 -0700 Subject: [PATCH 4/5] cr --- .../multimodal_rag_slide_deck.ipynb | 516 ++++++++++-------- 1 file changed, 292 insertions(+), 224 deletions(-) diff --git a/examples/multimodal/multimodal_rag_slide_deck.ipynb b/examples/multimodal/multimodal_rag_slide_deck.ipynb index 63d5df9..f07552a 100644 --- a/examples/multimodal/multimodal_rag_slide_deck.ipynb +++ b/examples/multimodal/multimodal_rag_slide_deck.ipynb @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "70ccdd53-e68a-4199-aacb-cfe71ad1ff0b", "metadata": {}, "outputs": [], @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "16e2071d-bbc2-4707-8ae7-cb4e1fecafd3", "metadata": {}, "outputs": [], @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "570089e5-238a-4dcc-af65-96e7393c2b4d", "metadata": {}, "outputs": [], @@ -162,10 +162,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "ef82a985-4088-4bb7-9a21-0318e1b9207d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Parsing text...\n", + "Started parsing the file under job_id 62f157a9-9ef9-4e5b-95ac-67093fa25800\n", + "..........Parsing PDF file...\n", + "Started parsing the file under job_id 1ddd5654-062b-4e19-b488-d66efc9c509d\n" + ] + } + ], "source": [ "print(f\"Parsing text...\")\n", "docs_text = parser_text.load_data(\"data/conocophillips.pdf\")\n", @@ -176,40 +187,34 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "7506b603-c01f-45de-b354-4a0728dde03c", - "metadata": {}, - "outputs": [], - "source": [ - "print(docs_text[0].get_content())" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "5318fb7b-fe6a-4a8a-b82e-4ed7b4512c37", "metadata": {}, - "outputs": [], - "source": [ - "print(md_json_list[10][\"md\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a46a73e-c6e2-4b0b-bd10-31b0d3e4b70f", - "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "dict_keys(['page', 'text', 'md', 'images', 'items'])\n" + "# Commitment to Disciplined Reinvestment Rate\n", + "\n", + "| Period | Description | Reinvestment Rate | WTI Average |\n", + "|--------------|--------------------------------------|-------------------|-------------|\n", + "| 2012-2016 | Industry Growth Focus | >100% | ~$75/BBL |\n", + "| 2017-2022 | ConocoPhillips Strategy Reset | <60% | ~$63/BBL |\n", + "| 2023E | | | at $80/BBL |\n", + "| 2024-2028 | Disciplined Reinvestment Rate | ~50% | at $60/BBL |\n", + "| 2029-2032 | | ~6% CFO CAGR | at $60/BBL |\n", + "\n", + "- **Historic Reinvestment Rate**: Gray bars\n", + "- **Reinvestment Rate at $60/BBL WTI**: Blue bars\n", + "- **Reinvestment Rate at $80/BBL WTI**: Dashed blue lines\n", + "\n", + "Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n" ] } ], "source": [ - "print(md_json_list[1].keys())" + "print(md_json_list[10][\"md\"])" ] }, { @@ -246,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "18c24174-05ce-417f-8dd2-79c3f375db03", "metadata": {}, "outputs": [], @@ -257,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "8e331dfe-a627-4e23-8c57-70ab1d9342e4", "metadata": {}, "outputs": [], @@ -282,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "346fe5ef-171e-4a54-9084-7a7805103a13", "metadata": {}, "outputs": [], @@ -299,7 +304,7 @@ " image_files = _get_sorted_image_files(image_dir) if image_dir is not None else None\n", " md_texts = [d[\"md\"] for d in json_dicts] if json_dicts is not None else None\n", "\n", - " doc_chunks = docs[0].text.split(\"---\")\n", + " doc_chunks = [c for d in docs for c in d.text.split(\"---\")]\n", " for idx, doc_chunk in enumerate(doc_chunks):\n", " chunk_metadata = {\"page_num\": idx + 1}\n", " if image_files is not None:\n", @@ -319,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "f591669c-5a8e-491d-9cef-0b754abbf26f", "metadata": {}, "outputs": [], @@ -330,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "32c13950-c1db-435f-b5b4-89d62b8b7744", "metadata": {}, "outputs": [ @@ -339,25 +344,23 @@ "output_type": "stream", "text": [ "page_num: 11\n", - "image_path: data_images/d9137e19-3974-4b5d-998f-dac0cf29dd9d-page-10.jpg\n", + "image_path: data_images/1ddd5654-062b-4e19-b488-d66efc9c509d-page_39.jpg\n", "parsed_text_markdown: # Commitment to Disciplined Reinvestment Rate\n", "\n", - "| Year | Reinvestment Rate | WTI Average Price | Reinvestment Rate at $60/BBL WTI | Reinvestment Rate at $80/BBL WTI |\n", - "|------------|-------------------|-------------------|----------------------------------|----------------------------------|\n", - "| 2012-2016 | >100% | ~$75/BBL | | |\n", - "| 2017-2022 | <60% | ~$63/BBL | | |\n", - "| 2023E | | | | at $80/BBL WTI |\n", - "| 2024-2028 | | | at $60/BBL WTI | at $80/BBL WTI |\n", - "| 2029-2032 | | | at $60/BBL WTI | at $80/BBL WTI |\n", - "\n", - "**Disciplined Reinvestment Rate is the Foundation for Superior Returns on and of Capital, while Driving Durable CFO Growth**\n", + "| Period | Description | Reinvestment Rate | WTI Average |\n", + "|--------------|--------------------------------------|-------------------|-------------|\n", + "| 2012-2016 | Industry Growth Focus | >100% | ~$75/BBL |\n", + "| 2017-2022 | ConocoPhillips Strategy Reset | <60% | ~$63/BBL |\n", + "| 2023E | | | at $80/BBL |\n", + "| 2024-2028 | Disciplined Reinvestment Rate | ~50% | at $60/BBL |\n", + "| 2029-2032 | | ~6% CFO CAGR | at $60/BBL |\n", "\n", - "- ~50% 10-Year Reinvestment Rate\n", - "- ~6% CFO CAGR 2024-2032 at $60/BBL WTI Mid-Cycle Planning Price\n", + "- **Historic Reinvestment Rate**: Gray bars\n", + "- **Reinvestment Rate at $60/BBL WTI**: Blue bars\n", + "- **Reinvestment Rate at $80/BBL WTI**: Dashed blue lines\n", "\n", - "**Note:** Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n", - "parsed_text: \n", - "Commitment to Disciplined Reinvestment Rate\n", + "Reinvestment rate and cash from operations (CFO) are non-GAAP measures. Definitions and reconciliations are included in the Appendix.\n", + "parsed_text: Commitment to Disciplined Reinvestment Rate\n", " Industry ConocoPhillips\n", " Strategy Reset Disciplined Reinvestment Rate is the Foundation for Superior\n", " Growth Focus Returns on and of Capital, while Driving Durable CFO Growth\n", @@ -374,7 +377,7 @@ " 0%\n", " 2012-2016 2017-2022 2023E 2024-2028 2029-2032\n", " Historic Reinvestment Rate Reinvestment Rate at $60/BBL WTI Reinvestment Rate at $80/BBL WTI\n", - " Reinvestment rate andcashfrom operations (CFO) are non-GAAP measures: Definitions and reconciliations are included in the Appendix ConocoPhillips\n" + " Reinvestment rate and cash from operations (CFO) are non-GAAP measures: Definitions and reconciliations are included in the Appendix ConocoPhillips\n" ] } ], @@ -394,10 +397,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "6ea53c31-0e38-421c-8d9b-0e3adaa1677e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/jerryliu/Programming/gpt_index/.venv/lib/python3.10/site-packages/tiktoken/core.py:50: RuntimeWarning: coroutine 'LlamaParse.aload_data' was never awaited\n", + " self._core_bpe = _tiktoken.CoreBPE(mergeable_ranks, special_tokens, pat_str)\n", + "RuntimeWarning: Enable tracemalloc to get the object allocation traceback\n" + ] + } + ], "source": [ "import os\n", "from llama_index.core import (\n", @@ -432,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "35a94be2-e289-41a6-92e4-d3cb428fb0c8", "metadata": {}, "outputs": [], @@ -517,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "0890be59-fb12-4bb5-959b-b2d9600f7774", "metadata": {}, "outputs": [], @@ -543,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "c0b15a48-d177-4666-aec2-98ee90664642", "metadata": {}, "outputs": [], @@ -565,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "2065d2c6-d6ba-4ee3-8e9e-dbc83cbcec1b", "metadata": {}, "outputs": [], @@ -575,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "bcaea1a8-26c9-4385-8f62-32855aa898b6", "metadata": {}, "outputs": [ @@ -588,7 +601,7 @@ " Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n", " S50 S32/BBL Lower 48 Alaska\n", " Average Cost of Supply\n", - " 3$40 GKA GWA\n", + " 3 $40 GKA GWA\n", " GPA WNS\n", " $30 EMENA\n", " 3 Norway\n", @@ -599,7 +612,7 @@ " APLNG Montney\n", " S0\n", " 10 15 20 Bakken\n", - " Resource (BBOE) Eagle Ford Other MalaysiaChina Surmont\n", + " Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n", " Lower 48 Canada Alaska EMENA Asia Pacific\n", "Costs assumemid-cycle price environment of S60/BBL WTI:\n", " ConocoPhillips\n" @@ -612,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "f6bcfbc6-4e9b-41ad-ad81-1c4245b95cd5", "metadata": {}, "outputs": [], @@ -633,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "5b7a8c5f-39fc-4d04-8c56-3642f5718437", "metadata": {}, "outputs": [], @@ -656,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "2b4f7eb1-d247-45fa-bb41-c02fc353a22a", "metadata": {}, "outputs": [], @@ -686,77 +699,131 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "1cdce5d8-6bb3-4cd3-929d-1cec249d9052", + "execution_count": 49, + "id": "d78e53cf-35cb-4ef8-b03e-1b47ba15ae64", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Added user message to memory: How does the Conoco Phillips capex/EUR in the delaware basin compare against other competitors?\n", + "Added user message to memory: Tell me about the diverse geographies where Conoco Phillips has a production base\n", "=== Calling Function ===\n", - "Calling function: vector_tool with args: {\"input\": \"Conoco Phillips capex/EUR in the Delaware Basin\"}\n", + "Calling function: vector_tool with args: {\"input\": \"Conoco Phillips production base geographies\"}\n", "=== Function Output ===\n", - "The ConocoPhillips capex/EUR in the Delaware Basin is $10/BOE.\n", + "ConocoPhillips' production base geographies include:\n", + "\n", + "1. **Lower 48** (Permian, Eagle Ford, Bakken, Other)\n", + "2. **Alaska** (GKA, GWA, GPA, WNS)\n", + "3. **EMENA** (Norway, Libya, Qatar)\n", + "4. **Asia Pacific** (APLNG, Malaysia, China)\n", + "5. **Canada** (Montney, Surmont)\n", + "\n", + "This information was derived from the image on page 14, which provides a detailed breakdown of the diverse production base and the regions involved. The parsed markdown and raw text also support this information, but the image provides the clearest and most comprehensive view. There are no discrepancies between the image and the parsed text in this case.\n", + "=== LLM Response ===\n", + "ConocoPhillips has a diverse production base spread across various geographies, including:\n", + "\n", + "1. **Lower 48**:\n", + " - Permian Basin\n", + " - Eagle Ford\n", + " - Bakken\n", + " - Other regions within the continental United States\n", + "\n", + "2. **Alaska**:\n", + " - Greater Kuparuk Area (GKA)\n", + " - Greater Prudhoe Area (GPA)\n", + " - Greater Willow Area (GWA)\n", + " - Western North Slope (WNS)\n", + "\n", + "3. **EMENA (Europe, Middle East, and North Africa)**:\n", + " - Norway\n", + " - Libya\n", + " - Qatar\n", + "\n", + "4. **Asia Pacific**:\n", + " - Australia Pacific LNG (APLNG)\n", + " - Malaysia\n", + " - China\n", "\n", - "I obtained this information from the image provided. The image clearly shows a bar chart under the section \"Delaware Basin Well Capex/EUR ($/BOE)\" where ConocoPhillips is listed with a capex/EUR of $10/BOE. This information is consistent with the parsed markdown text, which also lists ConocoPhillips' capex/EUR as $10/BOE in the Delaware Basin. There are no discrepancies between the image and the parsed markdown text in this case.\n", + "5. **Canada**:\n", + " - Montney\n", + " - Surmont\n", + "\n", + "These regions highlight the global reach and diverse geographical footprint of ConocoPhillips' production operations.\n", + "Added user message to memory: Tell me about the diverse geographies where Conoco Phillips has a production base\n", "=== Calling Function ===\n", - "Calling function: vector_tool with args: {\"input\": \"competitors capex/EUR in the Delaware Basin\"}\n", + "Calling function: vector_tool with args: {\"input\": \"diverse geographies where Conoco Phillips has a production base\"}\n", "=== Function Output ===\n", - "The competitors' Capex/EUR in the Delaware Basin can be found in the image on the slide titled \"Delaware: Vast Inventory with Proven Track Record of Performance.\" The relevant information is presented in a bar chart under the section \"Delaware Basin Well Capex/EUR ($/BOE)\".\n", + "ConocoPhillips has a diverse production base that includes the Lower 48 (Permian, Bakken, Eagle Ford), Alaska, Canada (Montney, Surmont), EMENA (Norway, Libya), Asia Pacific (Malaysia, China, APLNG), and Qatar.\n", + "=== LLM Response ===\n", + "ConocoPhillips has a diverse production base spanning several key geographies:\n", "\n", - "Here are the details:\n", + "1. **Lower 48 (United States)**: This includes major production areas such as the Permian Basin, Bakken Formation, and Eagle Ford Shale.\n", + "2. **Alaska**: Significant operations in the North Slope region.\n", + "3. **Canada**: Operations in the Montney Formation and the Surmont oil sands project.\n", + "4. **EMENA (Europe, Middle East, and North Africa)**: Notable operations in Norway and Libya.\n", + "5. **Asia Pacific**: Includes operations in Malaysia, China, and the Australia Pacific LNG (APLNG) project.\n", + "6. **Qatar**: Involvement in the country's energy sector.\n", "\n", - "- ConocoPhillips: $10/BOE\n", - "- Competitor 1: $15/BOE\n", - "- Competitor 2: $20/BOE\n", - "- Competitor 3: $25/BOE\n", - "- Competitor 4: $30/BOE\n", - "- Competitor 5: $35/BOE\n", - "- Competitor 6: $40/BOE\n", - "- Competitor 7: $45/BOE\n", + "These regions highlight the company's extensive and varied geographical footprint in the energy production industry.\n" + ] + } + ], + "source": [ + "query = \"Tell me about the diverse geographies where Conoco Phillips has a production base\"\n", + "response = agent.query(query)\n", + "base_response = base_agent.query(query)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "355d2aa4-c26f-480e-b512-4446acbd9227", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ConocoPhillips has a diverse production base spread across various geographies, including:\n", "\n", - "This information was obtained directly from the image, which provides a clear visual representation of the Capex/EUR values for ConocoPhillips and its competitors in the Delaware Basin. The parsed markdown text also confirms these values, ensuring consistency between the image and the text.\n", - "=== LLM Response ===\n", - "The capital expenditure per estimated ultimate recovery (capex/EUR) for ConocoPhillips in the Delaware Basin is $10 per barrel of oil equivalent (BOE). When compared to its competitors, ConocoPhillips has a significantly lower capex/EUR. Here are the capex/EUR values for ConocoPhillips and its competitors:\n", - "\n", - "- **ConocoPhillips**: $10/BOE\n", - "- **Competitor 1**: $15/BOE\n", - "- **Competitor 2**: $20/BOE\n", - "- **Competitor 3**: $25/BOE\n", - "- **Competitor 4**: $30/BOE\n", - "- **Competitor 5**: $35/BOE\n", - "- **Competitor 6**: $40/BOE\n", - "- **Competitor 7**: $45/BOE\n", - "\n", - "This data indicates that ConocoPhillips has a more cost-efficient operation in the Delaware Basin compared to its competitors.\n", - "The capital expenditure per estimated ultimate recovery (capex/EUR) for ConocoPhillips in the Delaware Basin is $10 per barrel of oil equivalent (BOE). When compared to its competitors, ConocoPhillips has a significantly lower capex/EUR. Here are the capex/EUR values for ConocoPhillips and its competitors:\n", - "\n", - "- **ConocoPhillips**: $10/BOE\n", - "- **Competitor 1**: $15/BOE\n", - "- **Competitor 2**: $20/BOE\n", - "- **Competitor 3**: $25/BOE\n", - "- **Competitor 4**: $30/BOE\n", - "- **Competitor 5**: $35/BOE\n", - "- **Competitor 6**: $40/BOE\n", - "- **Competitor 7**: $45/BOE\n", - "\n", - "This data indicates that ConocoPhillips has a more cost-efficient operation in the Delaware Basin compared to its competitors.\n" + "1. **Lower 48**:\n", + " - Permian Basin\n", + " - Eagle Ford\n", + " - Bakken\n", + " - Other regions within the continental United States\n", + "\n", + "2. **Alaska**:\n", + " - Greater Kuparuk Area (GKA)\n", + " - Greater Prudhoe Area (GPA)\n", + " - Greater Willow Area (GWA)\n", + " - Western North Slope (WNS)\n", + "\n", + "3. **EMENA (Europe, Middle East, and North Africa)**:\n", + " - Norway\n", + " - Libya\n", + " - Qatar\n", + "\n", + "4. **Asia Pacific**:\n", + " - Australia Pacific LNG (APLNG)\n", + " - Malaysia\n", + " - China\n", + "\n", + "5. **Canada**:\n", + " - Montney\n", + " - Surmont\n", + "\n", + "These regions highlight the global reach and diverse geographical footprint of ConocoPhillips' production operations.\n" ] } ], "source": [ - "# response = agent.query(\"Tell me about the different regions and subregions where Conoco Phillips has a production base.\")\n", - "response = agent.query(\n", - " \"How does the Conoco Phillips capex/EUR in the delaware basin compare against other competitors?\"\n", - ")\n", "print(str(response))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 59, "id": "d584c560-8f49-4c10-a4db-2e0d3b7085d2", "metadata": {}, "outputs": [ @@ -764,90 +831,87 @@ "name": "stdout", "output_type": "stream", "text": [ - "page_num: 38\n", - "image_path: data_images/d9137e19-3974-4b5d-998f-dac0cf29dd9d-page-37.jpg\n", - "parsed_text_markdown: # Delaware: Vast Inventory with Proven Track Record of Performance\n", - "\n", - "## Prolific Acreage Spanning Over ~659,000 Net Acres¹\n", - "\n", - "![Map of Delaware Basin](image)\n", - "\n", - "### Total 10-Year Operated Permian Inventory\n", - "\n", - "- Delaware Basin: 65%\n", - "- Midland Basin: 35%\n", - "\n", - "### High Single-Digit Production Growth\n", - "\n", - "## 12-Month Cumulative Production³ (BOE/FT)\n", - "\n", - "| Months | 2019 | 2020 | 2021 | 2022 |\n", - "|--------|------|------|------|------|\n", - "| 1 | 0 | 0 | 0 | 0 |\n", - "| 2 | 5 | 6 | 7 | 8 |\n", - "| 3 | 10 | 12 | 14 | 16 |\n", - "| 4 | 15 | 18 | 21 | 24 |\n", - "| 5 | 20 | 24 | 28 | 32 |\n", - "| 6 | 25 | 30 | 35 | 40 |\n", - "| 7 | 30 | 36 | 42 | 48 |\n", - "| 8 | 35 | 42 | 49 | 56 |\n", - "| 9 | 40 | 48 | 56 | 64 |\n", - "| 10 | 45 | 54 | 63 | 72 |\n", - "| 11 | 50 | 60 | 70 | 80 |\n", - "| 12 | 55 | 66 | 77 | 88 |\n", - "\n", - "~30% Improved Performance from 2019 to 2022\n", - "\n", - "## Delaware Basin Well Capex/EUR⁴ ($/BOE)\n", - "\n", - "| Company | Capex/EUR |\n", - "|------------------|-----------|\n", - "| ConocoPhillips | 10 |\n", - "| Competitor 1 | 15 |\n", - "| Competitor 2 | 20 |\n", - "| Competitor 3 | 25 |\n", - "| Competitor 4 | 30 |\n", - "| Competitor 5 | 35 |\n", - "| Competitor 6 | 40 |\n", - "| Competitor 7 | 45 |\n", - "\n", - "---\n", - "\n", - "¹ Unconventional acres. \n", - "² Source: Enverus and ConocoPhillips (March 2023). \n", - "³ Source: Enverus (March 2023) based on wells online year. \n", - "⁴ Source: Enverus (March 2023). Average single well capex/EUR. Top eight public operators based on wells online in years 2021-2022, greater than 50% oil weight. COP based on COP well design. Competitors include: CVX, DVN, EOG, MTDR, OXY, PR and XOM.\n", - "parsed_text: \n", - "Delaware: Vast Inventory with Proven Track Record of Performance\n", - " New Prolific Acreage Spanning Over 12-Month Cumulative Production? (BOE/FT)\n", - " Mexico 659,000 Net Acres' 40\n", - " Texas 3828\n", - " 30 2019\n", - " 20 30%\n", - " 10 Improved Performancefrom 2019 to 2022\n", - " Total\n", - " Permian Inventory\n", - " 10-Year Operated\n", - " 2 10 11 12\n", - " Months\n", - " Delaware Basin Well Capex/EUR4 (S/BOE)\n", - " 65% 25\n", - " Delaware Basin 20\n", - " Midland Basin 15\n", - " Low HighCost of Supplyz 10 ConocoPhillips\n", - " High Single-Digit Production Growth\n", - " \"Unconventional acres. 2Source: Enverus and ConocoPhillips (March 2023). 3SourceEnverus (March 2023) based on wells online year: \"Source; Enverus (March 2023). Average single well capex/EUR Top eight public operators based on\n", - "wells online in years 2021-2022, greater than 50% oil weight; COP based on COP well design: Competitors include; CVX DVN, EOG; MTDR, OXY, PR and XOM: ConocoPhillips\n" + "page_num: 14\n", + "image_path: data_images/1ddd5654-062b-4e19-b488-d66efc9c509d-page_12.jpg\n", + "parsed_text_markdown: # Our Differentiated Portfolio: Deep, Durable and Diverse\n", + "\n", + "## ~20 BBOE of Resource\n", + "Under $40/BBL Cost of Supply\n", + "\n", + "### ~ $32/BBL\n", + "Average Cost of Supply\n", + "\n", + "### WTI Cost of Supply ($/BBL)\n", + "\n", + "| Cost ($/BBL) | Resource (BBOE) |\n", + "|--------------|-----------------|\n", + "| $0 | 0 |\n", + "| $10 | |\n", + "| $20 | |\n", + "| $30 | |\n", + "| $40 | |\n", + "| $50 | |\n", + "\n", + "- **Legend:**\n", + " - Lower 48\n", + " - Canada\n", + " - Alaska\n", + " - EMENA\n", + " - Asia Pacific\n", + "\n", + "*Costs assume a mid-cycle price environment of $60/BBL WTI.*\n", + "\n", + "## Diverse Production Base\n", + "10-Year Plan Cumulative Production (BBOE)\n", + "\n", + "| Region | Sub-region |\n", + "|--------------|-----------------|\n", + "| Lower 48 | Permian |\n", + "| | Eagle Ford |\n", + "| | Bakken |\n", + "| | Other |\n", + "| Alaska | GKA |\n", + "| | GWA |\n", + "| | GPA |\n", + "| | WNS |\n", + "| EMENA | Norway |\n", + "| | Libya |\n", + "| | Qatar |\n", + "| Asia Pacific | APLNG |\n", + "| | Malaysia |\n", + "| | China |\n", + "| Canada | Montney |\n", + "| | Surmont |\n", + "parsed_text: Our Differentiated Portfolio: Deep; Durable and Diverse\n", + " 20 BBOE of Resource Diverse Production Base\n", + " Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n", + " S50 S32/BBL Lower 48 Alaska\n", + " Average Cost of Supply\n", + " 3 $40 GKA GWA\n", + " GPA WNS\n", + " $30 EMENA\n", + " 3 Norway\n", + " 8 $20\n", + " E Qatar Libya\n", + " Asia Pacific Canada\n", + " $10 Permian\n", + " APLNG Montney\n", + " S0\n", + " 10 15 20 Bakken\n", + " Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n", + " Lower 48 Canada Alaska EMENA Asia Pacific\n", + "Costs assumemid-cycle price environment of S60/BBL WTI:\n", + " ConocoPhillips\n" ] } ], "source": [ - "print(response.source_nodes[0].get_content(metadata_mode=\"all\"))" + "print(response.source_nodes[7].get_content(metadata_mode=\"all\"))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 60, "id": "d21d694b-6618-4d04-a6f6-8b0c2625f539", "metadata": {}, "outputs": [ @@ -855,32 +919,26 @@ "name": "stdout", "output_type": "stream", "text": [ - "Added user message to memory: How does the Conoco Phillips capex/EUR in the delaware basin compare against other competitors?\n", - "=== Calling Function ===\n", - "Calling function: vector_tool with args: {\"input\": \"Conoco Phillips capex/EUR in the Delaware Basin\"}\n", - "=== Function Output ===\n", - "ConocoPhillips' capex/EUR in the Delaware Basin is approximately $20/BOE.\n", - "=== Calling Function ===\n", - "Calling function: vector_tool with args: {\"input\": \"competitors capex/EUR in the Delaware Basin\"}\n", - "=== Function Output ===\n", - "The average single well capex/EUR for competitors in the Delaware Basin is between $10 and $25 per BOE.\n", - "=== LLM Response ===\n", - "ConocoPhillips' capex/EUR in the Delaware Basin is approximately $20 per BOE. In comparison, the average capex/EUR for competitors in the Delaware Basin ranges between $10 and $25 per BOE. This places ConocoPhillips' capex/EUR towards the higher end of the competitive range.\n", - "ConocoPhillips' capex/EUR in the Delaware Basin is approximately $20 per BOE. In comparison, the average capex/EUR for competitors in the Delaware Basin ranges between $10 and $25 per BOE. This places ConocoPhillips' capex/EUR towards the higher end of the competitive range.\n" + "ConocoPhillips has a diverse production base spanning several key geographies:\n", + "\n", + "1. **Lower 48 (United States)**: This includes major production areas such as the Permian Basin, Bakken Formation, and Eagle Ford Shale.\n", + "2. **Alaska**: Significant operations in the North Slope region.\n", + "3. **Canada**: Operations in the Montney Formation and the Surmont oil sands project.\n", + "4. **EMENA (Europe, Middle East, and North Africa)**: Notable operations in Norway and Libya.\n", + "5. **Asia Pacific**: Includes operations in Malaysia, China, and the Australia Pacific LNG (APLNG) project.\n", + "6. **Qatar**: Involvement in the country's energy sector.\n", + "\n", + "These regions highlight the company's extensive and varied geographical footprint in the energy production industry.\n" ] } ], "source": [ - "# base_response = base_agent.query(\"Tell me about the different regions and subregions where Conoco Phillips has a production base.\")\n", - "base_response = base_agent.query(\n", - " \"How does the Conoco Phillips capex/EUR in the delaware basin compare against other competitors?\"\n", - ")\n", "print(str(base_response))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 65, "id": "d3afccae-ad8d-4c5d-9d93-810dba413a5d", "metadata": {}, "outputs": [ @@ -888,31 +946,40 @@ "name": "stdout", "output_type": "stream", "text": [ - "Deep, Durable and Diverse Portfolio with Significant Growth Runway\n", - " 1,2002022 Lower 48 Unconventional Production' (MBOED S50 ~S32/BBL\n", - " 000 ConocoPhillips Cost of SupplyAverage\n", - " 00 S40\n", - " 500 3\n", - " 400 1 S30\n", - " 200\n", - " 5\n", - " 15,000ConocoPhillipsNet Remaining Well Inventory? 1 S20\n", - " 12,000 S10\n", - " 000\n", - " 0o0 SO\n", - " 3,000 10\n", - " Resource (BBOE)\n", - " Delaware Basin Midland Basin Eagle Ford Bakken Other\n", - " Largest Lower 48 Unconventional Producer; Growing into the Next Decade\n", - " onshore operated inventory that achieves 15% IRR at $SO/BBL WTI, Competitors include CVX, DVN, EOG, FANG, MRO, OXY, PXD,and XOM:\n", - " Source: Wood Mackenzie Lower 48 Unconventional Plays 2022 ProductionCompetitors include CVX, DVN; EOG, FANG, MRO, OXY, PXD and XOM; greaterthan50% liquids weight: ?Source: Wood Mackenzie (March 2023), Lower 48\n", - " ConocoPhillips\n" + "Our Differentiated Portfolio: Deep; Durable and Diverse\n", + " 20 BBOE of Resource Diverse Production Base\n", + " Under $40/BBL Cost of Supply 10-Year Plan Cumulative Production (BBOE)\n", + " S50 S32/BBL Lower 48 Alaska\n", + " Average Cost of Supply\n", + " 3 $40 GKA GWA\n", + " GPA WNS\n", + " $30 EMENA\n", + " 3 Norway\n", + " 8 $20\n", + " E Qatar Libya\n", + " Asia Pacific Canada\n", + " $10 Permian\n", + " APLNG Montney\n", + " S0\n", + " 10 15 20 Bakken\n", + " Resource (BBOE) Eagle Ford Other Malaysia ChinaSurmont\n", + " Lower 48 Canada Alaska EMENA Asia Pacific\n", + "Costs assumemid-cycle price environment of S60/BBL WTI:\n", + " ConocoPhillips\n" ] } ], "source": [ - "print(base_response.source_nodes[0].get_content(metadata_mode=\"llm\"))" + "print(base_response.source_nodes[1].get_content(metadata_mode=\"all\"))" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "158667df-6721-477f-b455-a9d817112af1", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -930,7 +997,8 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" + "pygments_lexer": "ipython3", + "version": "3.10.8" } }, "nbformat": 4, From 6c7196c6f23e391cefd7c1141e223c49691fa593 Mon Sep 17 00:00:00 2001 From: Jerry Liu Date: Mon, 16 Sep 2024 13:59:39 -0700 Subject: [PATCH 5/5] cr --- .../multimodal_rag_slide_deck.ipynb | 63 +++++++++---------- 1 file changed, 28 insertions(+), 35 deletions(-) diff --git a/examples/multimodal/multimodal_rag_slide_deck.ipynb b/examples/multimodal/multimodal_rag_slide_deck.ipynb index f07552a..cbc7470 100644 --- a/examples/multimodal/multimodal_rag_slide_deck.ipynb +++ b/examples/multimodal/multimodal_rag_slide_deck.ipynb @@ -32,7 +32,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "70ccdd53-e68a-4199-aacb-cfe71ad1ff0b", "metadata": {}, "outputs": [], @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "16e2071d-bbc2-4707-8ae7-cb4e1fecafd3", "metadata": {}, "outputs": [], @@ -148,7 +148,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "570089e5-238a-4dcc-af65-96e7393c2b4d", "metadata": {}, "outputs": [], @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "ef82a985-4088-4bb7-9a21-0318e1b9207d", "metadata": {}, "outputs": [ @@ -187,7 +187,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "5318fb7b-fe6a-4a8a-b82e-4ed7b4512c37", "metadata": {}, "outputs": [ @@ -251,7 +251,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "18c24174-05ce-417f-8dd2-79c3f375db03", "metadata": {}, "outputs": [], @@ -262,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "8e331dfe-a627-4e23-8c57-70ab1d9342e4", "metadata": {}, "outputs": [], @@ -287,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "id": "346fe5ef-171e-4a54-9084-7a7805103a13", "metadata": {}, "outputs": [], @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "f591669c-5a8e-491d-9cef-0b754abbf26f", "metadata": {}, "outputs": [], @@ -335,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "32c13950-c1db-435f-b5b4-89d62b8b7744", "metadata": {}, "outputs": [ @@ -397,7 +397,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "6ea53c31-0e38-421c-8d9b-0e3adaa1677e", "metadata": {}, "outputs": [ @@ -445,7 +445,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "35a94be2-e289-41a6-92e4-d3cb428fb0c8", "metadata": {}, "outputs": [], @@ -530,7 +530,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "id": "0890be59-fb12-4bb5-959b-b2d9600f7774", "metadata": {}, "outputs": [], @@ -556,7 +556,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "id": "c0b15a48-d177-4666-aec2-98ee90664642", "metadata": {}, "outputs": [], @@ -578,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "2065d2c6-d6ba-4ee3-8e9e-dbc83cbcec1b", "metadata": {}, "outputs": [], @@ -588,7 +588,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "id": "bcaea1a8-26c9-4385-8f62-32855aa898b6", "metadata": {}, "outputs": [ @@ -625,7 +625,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "id": "f6bcfbc6-4e9b-41ad-ad81-1c4245b95cd5", "metadata": {}, "outputs": [], @@ -646,7 +646,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "5b7a8c5f-39fc-4d04-8c56-3642f5718437", "metadata": {}, "outputs": [], @@ -669,7 +669,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "2b4f7eb1-d247-45fa-bb41-c02fc353a22a", "metadata": {}, "outputs": [], @@ -699,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": null, "id": "d78e53cf-35cb-4ef8-b03e-1b47ba15ae64", "metadata": {}, "outputs": [ @@ -770,14 +770,16 @@ } ], "source": [ - "query = \"Tell me about the diverse geographies where Conoco Phillips has a production base\"\n", + "query = (\n", + " \"Tell me about the diverse geographies where Conoco Phillips has a production base\"\n", + ")\n", "response = agent.query(query)\n", "base_response = base_agent.query(query)" ] }, { "cell_type": "code", - "execution_count": 50, + "execution_count": null, "id": "355d2aa4-c26f-480e-b512-4446acbd9227", "metadata": {}, "outputs": [ @@ -823,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": null, "id": "d584c560-8f49-4c10-a4db-2e0d3b7085d2", "metadata": {}, "outputs": [ @@ -911,7 +913,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": null, "id": "d21d694b-6618-4d04-a6f6-8b0c2625f539", "metadata": {}, "outputs": [ @@ -938,7 +940,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "id": "d3afccae-ad8d-4c5d-9d93-810dba413a5d", "metadata": {}, "outputs": [ @@ -972,14 +974,6 @@ "source": [ "print(base_response.source_nodes[1].get_content(metadata_mode=\"all\"))" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "158667df-6721-477f-b455-a9d817112af1", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -997,8 +991,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" + "pygments_lexer": "ipython3" } }, "nbformat": 4,