Skip to content

Commit

Permalink
If pass multi files, e.g. from gradio, then show those files that hav…
Browse files Browse the repository at this point in the history
…e no handler for instead of just filtering
  • Loading branch information
pseudotensor committed May 22, 2023
1 parent f7d1134 commit 3b99158
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 21 deletions.
29 changes: 15 additions & 14 deletions gpt_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ def file_to_doc(file, base_path=None, verbose=False, fail_any_exception=False, c
# recurse
doc1 = path_to_docs(base_path, verbose=verbose, fail_any_exception=fail_any_exception)
else:
raise RuntimeError("No file handler for %s" % file)
raise RuntimeError("No file handler for %s" % os.path.basename(file))

# allow doc1 to be list or not. If not list, did not chunk yet, so chunk now
if not isinstance(doc1, list):
Expand Down Expand Up @@ -580,26 +580,27 @@ def path_to_docs(path_or_paths, verbose=False, fail_any_exception=False, n_jobs=
globs_image_types = []
globs_non_image_types = []
if url:
globs = [url]
globs_non_image_types = [url]
elif text:
globs = [text]
globs_non_image_types = [text]
elif isinstance(path_or_paths, str):
# single file
# single path, only consume allowed files
path = path_or_paths
# Below globs should match patterns in file_to_doc()
[globs_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True)) for ftype in
image_types]
[globs_non_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True)) for ftype in
non_image_types]
globs = globs_non_image_types + globs_image_types
[globs_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True))
for ftype in image_types]
[globs_non_image_types.extend(glob.glob(os.path.join(path, "./**/*.%s" % ftype), recursive=True))
for ftype in non_image_types]
else:
# list/tuple of files
# list/tuple of files (consume what can, and exception those that selected but cannot consume so user knows)
assert isinstance(path_or_paths, (list, tuple)), "Wrong type for path_or_paths: %s" % type(path_or_paths)
globs = path_or_paths
# reform out of allowed types
globs_image_types = flatten_list([[x for x in globs if x.endswith(y)] for y in image_types])
globs_non_image_types = flatten_list([[x for x in globs if x.endswith(y)] for y in non_image_types])
globs = globs_non_image_types + globs_image_types
globs_image_types = flatten_list([[x for x in path_or_paths if x.endswith(y)] for y in image_types])
# could do below:
# globs_non_image_types = flatten_list([[x for x in path_or_paths if x.endswith(y)] for y in non_image_types])
# But instead, allow fail so can collect unsupported too
set_globs_image_types = set(globs_image_types)
globs_non_image_types = [x for x in path_or_paths if x not in set_globs_image_types]
# could use generator, but messes up metadata handling in recursive case
if caption_loader and not isinstance(caption_loader, (bool, str)) and \
caption_loader.device != 'cpu' or \
Expand Down
14 changes: 7 additions & 7 deletions gradio_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,13 +1234,13 @@ def update_user_db(file, db1, x, y, *args, dbs=None, langchain_mode='UserData',


def _update_user_db(file, db1, x, y, dbs=None, db_type=None, langchain_mode='UserData', use_openai_embedding=False,
hf_embedding_model="sentence-transformers/all-MiniLM-L6-v2",
caption_loader=None,
enable_captions=True,
captions_model="Salesforce/blip-image-captioning-base",
enable_ocr=False,
verbose=False,
chunk=True, chunk_size=512, is_url=False, is_txt=False):
hf_embedding_model="sentence-transformers/all-MiniLM-L6-v2",
caption_loader=None,
enable_captions=True,
captions_model="Salesforce/blip-image-captioning-base",
enable_ocr=False,
verbose=False,
chunk=True, chunk_size=512, is_url=False, is_txt=False):
assert isinstance(dbs, dict), "Wrong type for dbs: %s" % str(type(dbs))
assert db_type in ['faiss', 'chroma'], "db_type %s not supported" % db_type
from gpt_langchain import add_to_db, get_db, path_to_docs
Expand Down

0 comments on commit 3b99158

Please sign in to comment.