Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix LLM terminology in code #73

Merged
merged 4 commits into from
Dec 17, 2024
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 19 additions & 18 deletions src/markitdown/_markitdown.py
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:

class ImageConverter(MediaConverter):
"""
Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an mlm_client is configured).
Converts images to markdown via extraction of metadata (if `exiftool` is installed), OCR (if `easyocr` is installed), and description via a multimodal LLM (if an llm_client is configured).
"""

def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
Expand Down Expand Up @@ -811,17 +811,17 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
md_content += f"{f}: {metadata[f]}\n"

# Try describing the image with GPTV
mlm_client = kwargs.get("mlm_client")
mlm_model = kwargs.get("mlm_model")
if mlm_client is not None and mlm_model is not None:
llm_client = kwargs.get("llm_client")
llm_model = kwargs.get("llm_model")
if llm_client is not None and llm_model is not None:
md_content += (
"\n# Description:\n"
+ self._get_mlm_description(
+ self._get_llm_description(
local_path,
extension,
mlm_client,
mlm_model,
prompt=kwargs.get("mlm_prompt"),
llm_client,
llm_model,
prompt=kwargs.get("llm_prompt"),
).strip()
+ "\n"
)
Expand All @@ -831,11 +831,11 @@ def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
text_content=md_content,
)

def _get_mlm_description(self, local_path, extension, client, model, prompt=None):
def _get_llm_description(self, local_path, extension, client, model, prompt=None):
if prompt is None or prompt.strip() == "":
prompt = "Write a detailed caption for this image."

sys.stderr.write(f"MLM Prompt:\n{prompt}\n")
sys.stderr.write(f"llm Prompt:\n{prompt}\n")

data_uri = ""
with open(local_path, "rb") as image_file:
Expand Down Expand Up @@ -997,16 +997,16 @@ class MarkItDown:
def __init__(
self,
requests_session: Optional[requests.Session] = None,
mlm_client: Optional[Any] = None,
mlm_model: Optional[Any] = None,
llm_client: Optional[Any] = None,
llm_model: Optional[Any] = None,
):
if requests_session is None:
self._requests_session = requests.Session()
else:
self._requests_session = requests_session

self._mlm_client = mlm_client
self._mlm_model = mlm_model
self._llm_client = llm_client
self._llm_model = llm_model

self._page_converters: List[DocumentConverter] = []

Expand Down Expand Up @@ -1176,11 +1176,12 @@ def _convert(
_kwargs.update({"file_extension": ext})

# Copy any additional global options
if "mlm_client" not in _kwargs and self._mlm_client is not None:
_kwargs["mlm_client"] = self._mlm_client
if "llm_client" not in _kwargs and self._llm_client is not None:
_kwargs["llm_client"] = self._llm_client

if "mlm_model" not in _kwargs and self._mlm_model is not None:
_kwargs["mlm_model"] = self._mlm_model
if "llm_model" not in _kwargs and self._llm_model is not None:
_kwargs["llm_model"] = self._llm_model

# Add the list of converters for nested processing
_kwargs["_parent_converters"] = self._page_converters

Expand Down
Loading