diff --git a/lute/bing/routes.py b/lute/bing/routes.py index 98e4ad827..9f57d38ba 100644 --- a/lute/bing/routes.py +++ b/lute/bing/routes.py @@ -3,6 +3,8 @@ """ import os +import datetime +import hashlib import re import urllib.request from flask import ( @@ -107,7 +109,11 @@ def _get_dir_and_filename(langid, text): image_dir = os.path.join(datapath, "userimages", langid) if not os.path.exists(image_dir): os.makedirs(image_dir) - filename = re.sub(r"\s+", "_", text) + ".jpeg" + + now = datetime.datetime.now() + timestamp = now.strftime("%Y%m%d_%H%M%S%f")[:-3] + hash_part = hashlib.md5(text.encode()).hexdigest()[:8] + filename = f"{timestamp}_{hash_part}.jpeg" return [image_dir, filename] @@ -126,9 +132,11 @@ def bing_save(): with urllib.request.urlopen(src) as response, open(destfile, "wb") as out_file: out_file.write(response.read()) - # This is the format of legacy Lute v2 data. - image_url = f"/userimages/{langid}/{filename}" - return jsonify({"filename": image_url}) + ret = { + "url": f"/userimages/{langid}/{filename}", + "filename": filename, + } + return jsonify(ret) @bp.route("/manual_image_post", methods=["POST"]) @@ -152,6 +160,8 @@ def manual_image_post(): destfile = os.path.join(imgdir, filename) f.save(destfile) - # This is the format of legacy Lute v2 data. - image_url = f"/userimages/{langid}/{filename}" - return jsonify({"filename": image_url}) + ret = { + "url": f"/userimages/{langid}/{filename}", + "filename": filename, + } + return jsonify(ret) diff --git a/lute/db/data_cleanup.py b/lute/db/data_cleanup.py index 3f279e90c..65947cf6e 100644 --- a/lute/db/data_cleanup.py +++ b/lute/db/data_cleanup.py @@ -5,9 +5,10 @@ These cleanup routines will be called by the app_factory. """ -from sqlalchemy import text as sqltext +from sqlalchemy import select, text as sqltext from lute.models.language import Language from lute.models.book import Text, Sentence +from lute.models.term import TermImage class ProgressReporter: @@ -138,7 +139,53 @@ def _get_next_batch(batch_size): output_function("Done.") +def _update_term_images(session, output_function): + """ + Fix TermImage sources (ref https://github.com/LuteOrg/lute-v3/issues/582) + + Prior to issue 582, images were stored in the db as url-like items, + "/userimages/{language_id}/{term}.jpg". + + e.g. wordimages.wisource = "/userimages/2/thiết_kế_nội_thất.jpeg", including + zero-width spaces. This routine removes the "/userimages/{language_id}/" + from the start of the strings. + + Also, some images didn't have ".jpg" at the end ... this adds that. + """ + + def _fix_source(s): + "Remove the leading userimages and languageid, add .jpeg if needed." + parts = s.split("/", 3) + ret = parts[-1] + if not ret.endswith(".jpeg"): + ret = f"{ret}.jpeg" + return ret + + stmt = select(TermImage).where(TermImage.source.contains("userimages")) + recalc = session.execute(stmt).scalars().all() + if len(recalc) == 0: + # Nothing to calculate, quit. + return + + batch_size = 1000 + output_function(f"Fixing image sources for {len(recalc)} word images.") + pr = ProgressReporter(len(recalc), output_function, report_every=batch_size) + n = 0 + for ti in recalc: + pr.increment() + ti.source = _fix_source(ti.source) + session.add(ti) + n += 1 + if n % batch_size == 0: + session.commit() + + # Any remaining. + session.commit() + output_function("Done.") + + def clean_data(session, output_function): "Clean all data as required, sending messages to output_function." _set_texts_word_count(session, output_function) _load_sentence_textlc(session, output_function) + _update_term_images(session, output_function) diff --git a/lute/models/term.py b/lute/models/term.py index ac89236b6..bfd1224f5 100644 --- a/lute/models/term.py +++ b/lute/models/term.py @@ -266,20 +266,12 @@ def add_parent(self, parent): if len(self.parents) > 1: self.sync_status = False - def get_current_image(self, strip_jpeg=True): + def get_current_image(self): "Get the current (first) image for the term." if len(self.images) == 0: return None i = self.images[0] - - src = i.source - - if not strip_jpeg: - return src - - # Ugly hack: we have to remove the .jpeg at the end because - # Flask doesn't handle params with periods. - return src.replace(".jpeg", "") + return i.source def set_current_image(self, s): "Set the current image for this term." diff --git a/lute/read/service.py b/lute/read/service.py index 8c661c5c9..a67896c41 100644 --- a/lute/read/service.py +++ b/lute/read/service.py @@ -62,9 +62,11 @@ def _get_popup_image_data(self): # it can get confusing! # ref https://github.com/LuteOrg/lute-v3/issues/355 terms = [self.term, *self.term.parents] - images = [ - (t.get_current_image(), t.text) for t in terms if t.get_current_image() - ] + + def _make_image_url(t): + return f"/userimages/{t.language.id}/{t.get_current_image()}" + + images = [(_make_image_url(t), t.text) for t in terms if t.get_current_image()] imageresult = defaultdict(list) for key, value in images: imageresult[key].append(self._clean(value)) diff --git a/lute/templates/imagesearch/index.html b/lute/templates/imagesearch/index.html index 27f47ac1f..6f1eb07bf 100644 --- a/lute/templates/imagesearch/index.html +++ b/lute/templates/imagesearch/index.html @@ -30,7 +30,7 @@