Skip to content

Commit

Permalink
feat: change the display on artifacts (#238)
Browse files Browse the repository at this point in the history
* feat: change the display on artifacts

* clean up media download code

* fix lint

* fix lint
  • Loading branch information
wuyiqunLu authored Sep 18, 2024
1 parent a56927d commit 84bab5e
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 33 deletions.
23 changes: 2 additions & 21 deletions vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
VA_CODE,
)
from vision_agent.lmm import LMM, Message, OpenAILMM
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
from vision_agent.tools import META_TOOL_DOCSTRING
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
from vision_agent.tools.tools import extract_frames, save_video
from vision_agent.utils import CodeInterpreterFactory
from vision_agent.utils.execute import CodeInterpreter, Execution

Expand Down Expand Up @@ -223,25 +222,7 @@ def chat_with_code(
for chat_i in int_chat:
if "media" in chat_i:
for media in chat_i["media"]:
if type(media) is str and media.startswith(("http", "https")):
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
file_path = str(
Path(self.local_artifacts_path).parent
/ Path(media).name
)
if file_path.lower().endswith(
".mp4"
) or file_path.lower().endswith(".mov"):
video_frames = extract_frames(media)
save_video(
[frame for frame, _ in video_frames], file_path
)
else:
ndarray = load_image(media)
save_image(ndarray, file_path)
media = file_path
else:
media = cast(str, media)
media = cast(str, media)
artifacts.artifacts[Path(media).name] = open(media, "rb").read()

media_remote_path = (
Expand Down
43 changes: 35 additions & 8 deletions vision_agent/tools/meta_tools.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import difflib
import json
import os
import pickle as pkl
import re
Expand Down Expand Up @@ -70,8 +71,8 @@ def redisplay_results(execution: Execution) -> None:
display({MimeType.TEXT_LATEX: result.latex}, raw=True)
if result.json is not None:
display({MimeType.APPLICATION_JSON: result.json}, raw=True)
if result.artifact_name is not None:
display({MimeType.TEXT_ARTIFACT_NAME: result.artifact_name}, raw=True)
if result.artifact is not None:
display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True)
if result.extra is not None:
display(result.extra, raw=True)

Expand Down Expand Up @@ -210,7 +211,14 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
return_str = f"[Artifact {name} created]"
print(return_str)

display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
display(
{
MimeType.APPLICATION_ARTIFACT: json.dumps(
{"name": name, "content": artifacts[name]}
)
},
raw=True,
)
return return_str


Expand Down Expand Up @@ -294,7 +302,14 @@ def edit_code_artifact(

artifacts[name] = "".join(edited_lines)

display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
display(
{
MimeType.APPLICATION_ARTIFACT: json.dumps(
{"name": name, "content": artifacts[name]}
)
},
raw=True,
)
return open_code_artifact(artifacts, name, cur_line)


Expand Down Expand Up @@ -350,7 +365,10 @@ def detect_dogs(image_path: str):
code_lines = code.splitlines(keepends=True)
total_lines = len(code_lines)

display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
display(
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
raw=True,
)
return view_lines(code_lines, 0, total_lines, name, total_lines)


Expand Down Expand Up @@ -415,7 +433,10 @@ def detect_dogs(image_path: str):
code_lines = code.splitlines(keepends=True)
total_lines = len(code_lines)

display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
display(
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
raw=True,
)
return view_lines(code_lines, 0, total_lines, name, total_lines)


Expand All @@ -429,7 +450,6 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
with open(local_path, "rb") as f:
media = f.read()
artifacts[Path(local_path).name] = media
display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True)
return f"[Media {Path(local_path).name} saved]"


Expand Down Expand Up @@ -596,7 +616,14 @@ def replacer(match: re.Match) -> str:
diff = get_diff_with_prompts(name, code, new_code)
print(diff)

display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
display(
{
MimeType.APPLICATION_ARTIFACT: json.dumps(
{"name": name, "content": new_code}
)
},
raw=True,
)
return diff


Expand Down
8 changes: 4 additions & 4 deletions vision_agent/utils/execute.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ class MimeType(str, Enum):
TEXT_LATEX = "text/latex"
APPLICATION_JSON = "application/json"
APPLICATION_JAVASCRIPT = "application/javascript"
TEXT_ARTIFACT_NAME = "text/artifact/name"
APPLICATION_ARTIFACT = "application/artifact"


class FileSerializer:
Expand Down Expand Up @@ -129,7 +129,7 @@ def __init__(self, is_main_result: bool, data: Dict[str, Any]):
self.latex = data.pop(MimeType.TEXT_LATEX, None)
self.json = data.pop(MimeType.APPLICATION_JSON, None)
self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
self.artifact_name = data.pop(MimeType.TEXT_ARTIFACT_NAME, None)
self.artifact = data.pop(MimeType.APPLICATION_ARTIFACT, None)
self.extra = data
# Only keeping the PNG representation if both PNG and JPEG are present
if self.png and self.jpeg:
Expand Down Expand Up @@ -207,8 +207,8 @@ def formats(self) -> Iterable[str]:
formats.append("javascript")
if self.mp4:
formats.append("mp4")
if self.artifact_name:
formats.append("artifact_name")
if self.artifact:
formats.append("artifact")
if self.extra:
formats.extend(iter(self.extra))
return formats
Expand Down

0 comments on commit 84bab5e

Please sign in to comment.