From 3b8ecace0b86d15388d102eea50c403ba61974dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zeme=C5=82ka?= Date: Sun, 22 Dec 2024 18:02:29 +0100 Subject: [PATCH 1/2] Updated prompt to extract text and format it in Markdown, including additional visual details, instead of only describing the image. --- src/markitdown/_markitdown.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index 789c1e55..ee3e59c6 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -1047,7 +1047,21 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: def _get_llm_description(self, local_path, extension, client, model, prompt=None): if prompt is None or prompt.strip() == "": - prompt = "Write a detailed caption for this image." + prompt = ''' + Analyze the image and extract all visible text in the original language. + Reproduce the extracted text in a structured Markdown format, preserving + any formatting such as headings, bullet points, and highlights. Ensure + the output accurately reflects the structure and style of the original + document. + + Additionally, if the image includes any visual elements (e.g., diagrams, + logos, or specific layouts) that cannot be represented directly in Markdown, + describe them in plain text as part of the Markdown document under a section + titled "Visual Notes." + + Output only the converted Markdown text without any additional commentary + or explanations. + ''' data_uri = "" with open(local_path, "rb") as image_file: From ca5a25140f783485f6f6444aa8e7541af8eb6e1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Zeme=C5=82ka?= Date: Fri, 17 Jan 2025 14:29:08 +0100 Subject: [PATCH 2/2] I changed the prompt as suggested in the PR comments. --- src/markitdown/_markitdown.py | 45 +++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py index ee3e59c6..c499d27d 100644 --- a/src/markitdown/_markitdown.py +++ b/src/markitdown/_markitdown.py @@ -1048,19 +1048,38 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]: def _get_llm_description(self, local_path, extension, client, model, prompt=None): if prompt is None or prompt.strip() == "": prompt = ''' - Analyze the image and extract all visible text in the original language. - Reproduce the extracted text in a structured Markdown format, preserving - any formatting such as headings, bullet points, and highlights. Ensure - the output accurately reflects the structure and style of the original - document. - - Additionally, if the image includes any visual elements (e.g., diagrams, - logos, or specific layouts) that cannot be represented directly in Markdown, - describe them in plain text as part of the Markdown document under a section - titled "Visual Notes." - - Output only the converted Markdown text without any additional commentary - or explanations. + Analyze the image and extract all visible text in the original language. Reproduce the extracted text in a structured Markdown format, preserving any formatting such as headings, bullet points, and highlights. Ensure the output accurately reflects the structure and style of the original document. + + Follow these additional guidelines based on the content type: + + **Tables:** + * Create exact markdown representation of the table using markdown syntax (|column1|column2|) + * Create a separator row (|---|---|) after the header + * Transcribe all values exactly as they appear in the table + + **Mathematical Formulas:** + * Use LaTeX notation within markdown delimiters, e.g., `$$ y = mx + b $$` + + **Charts and Graphs:** + * Identify the graph type (bar, line, pie, etc.) + * Extract data points into a markdown table + * Include axis labels, units, and scale information + * Describe patterns (e.g., linear, exponential) under markdown headers + * Record maximums, minimums, and important values + + **Flowcharts and Diagrams:** + * Use mermaid markdown syntax where possible: + ```mermaid + graph LR + A-->B + B-->C + ``` + * For process flows, create a numbered list with clear step progression and any branching conditions + * For technical diagrams, list components and their relationships in a structured way, preserving measurements/specifications in tables + + For any visual elements that cannot be represented directly in Markdown, describe them in plain text under a section titled "Visual Notes." + + Maintain numerical precision exactly as shown, preserve all labels and annotations as markdown text, and structure the output for both human and machine readability. Output only the converted Markdown text without any additional commentary or explanations. ''' data_uri = ""