Skip to content

Commit

Permalink
Merge branch 'develop' into make-bing-search-async
Browse files Browse the repository at this point in the history
  • Loading branch information
jzohrab committed Jan 31, 2025
2 parents fa48e7d + 5042622 commit a32b8c3
Show file tree
Hide file tree
Showing 10 changed files with 91 additions and 44 deletions.
24 changes: 17 additions & 7 deletions lute/bing/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
"""

import os
import datetime
import hashlib
import re
import urllib.request
from flask import (
Expand Down Expand Up @@ -107,7 +109,11 @@ def _get_dir_and_filename(langid, text):
image_dir = os.path.join(datapath, "userimages", langid)
if not os.path.exists(image_dir):
os.makedirs(image_dir)
filename = re.sub(r"\s+", "_", text) + ".jpeg"

now = datetime.datetime.now()
timestamp = now.strftime("%Y%m%d_%H%M%S%f")[:-3]
hash_part = hashlib.md5(text.encode()).hexdigest()[:8]
filename = f"{timestamp}_{hash_part}.jpeg"
return [image_dir, filename]


Expand All @@ -126,9 +132,11 @@ def bing_save():
with urllib.request.urlopen(src) as response, open(destfile, "wb") as out_file:
out_file.write(response.read())

# This is the format of legacy Lute v2 data.
image_url = f"/userimages/{langid}/{filename}"
return jsonify({"filename": image_url})
ret = {
"url": f"/userimages/{langid}/{filename}",
"filename": filename,
}
return jsonify(ret)


@bp.route("/manual_image_post", methods=["POST"])
Expand All @@ -152,6 +160,8 @@ def manual_image_post():
destfile = os.path.join(imgdir, filename)
f.save(destfile)

# This is the format of legacy Lute v2 data.
image_url = f"/userimages/{langid}/{filename}"
return jsonify({"filename": image_url})
ret = {
"url": f"/userimages/{langid}/{filename}",
"filename": filename,
}
return jsonify(ret)
49 changes: 48 additions & 1 deletion lute/db/data_cleanup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@
These cleanup routines will be called by the app_factory.
"""

from sqlalchemy import text as sqltext
from sqlalchemy import select, text as sqltext
from lute.models.language import Language
from lute.models.book import Text, Sentence
from lute.models.term import TermImage


class ProgressReporter:
Expand Down Expand Up @@ -138,7 +139,53 @@ def _get_next_batch(batch_size):
output_function("Done.")


def _update_term_images(session, output_function):
"""
Fix TermImage sources (ref https://github.com/LuteOrg/lute-v3/issues/582)
Prior to issue 582, images were stored in the db as url-like items,
"/userimages/{language_id}/{term}.jpg".
e.g. wordimages.wisource = "/userimages/2/thiết_kế_nội_thất.jpeg", including
zero-width spaces. This routine removes the "/userimages/{language_id}/"
from the start of the strings.
Also, some images didn't have ".jpg" at the end ... this adds that.
"""

def _fix_source(s):
"Remove the leading userimages and languageid, add .jpeg if needed."
parts = s.split("/", 3)
ret = parts[-1]
if not ret.endswith(".jpeg"):
ret = f"{ret}.jpeg"
return ret

stmt = select(TermImage).where(TermImage.source.contains("userimages"))
recalc = session.execute(stmt).scalars().all()
if len(recalc) == 0:
# Nothing to calculate, quit.
return

batch_size = 1000
output_function(f"Fixing image sources for {len(recalc)} word images.")
pr = ProgressReporter(len(recalc), output_function, report_every=batch_size)
n = 0
for ti in recalc:
pr.increment()
ti.source = _fix_source(ti.source)
session.add(ti)
n += 1
if n % batch_size == 0:
session.commit()

# Any remaining.
session.commit()
output_function("Done.")


def clean_data(session, output_function):
"Clean all data as required, sending messages to output_function."
_set_texts_word_count(session, output_function)
_load_sentence_textlc(session, output_function)
_update_term_images(session, output_function)
12 changes: 2 additions & 10 deletions lute/models/term.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,20 +266,12 @@ def add_parent(self, parent):
if len(self.parents) > 1:
self.sync_status = False

def get_current_image(self, strip_jpeg=True):
def get_current_image(self):
"Get the current (first) image for the term."
if len(self.images) == 0:
return None
i = self.images[0]

src = i.source

if not strip_jpeg:
return src

# Ugly hack: we have to remove the .jpeg at the end because
# Flask doesn't handle params with periods.
return src.replace(".jpeg", "")
return i.source

def set_current_image(self, s):
"Set the current image for this term."
Expand Down
8 changes: 5 additions & 3 deletions lute/read/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@ def _get_popup_image_data(self):
# it can get confusing!
# ref https://github.com/LuteOrg/lute-v3/issues/355
terms = [self.term, *self.term.parents]
images = [
(t.get_current_image(), t.text) for t in terms if t.get_current_image()
]

def _make_image_url(t):
return f"/userimages/{t.language.id}/{t.get_current_image()}"

images = [(_make_image_url(t), t.text) for t in terms if t.get_current_image()]
imageresult = defaultdict(list)
for key, value in images:
imageresult[key].append(self._clean(value))
Expand Down
13 changes: 4 additions & 9 deletions lute/templates/imagesearch/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
</body>

<script>
let update_term_form = function(filename) {
let _update_term_form_image = function(filename, new_url) {
// Well, this took **far** too long to figure out ...
let fr = window.parent.frames['wordframe'];
if (fr == null) {
Expand All @@ -53,13 +53,8 @@
img.style.gridColumn = "-1";
img.style.display = "block";

// Ugly hack: we have to remove the .jpeg at the end, because
// Symfony doesn't handle params with periods.
// Ref https://github.com/symfony/symfony/issues/25541.
// The src/ImageController adds the .jpeg at the end again to
// find the actual file.
var timestamp = new Date().getTime();
const newsrc = filename.replace('.jpeg', '') + `?${timestamp}`;
const newsrc = `${new_url}?${timestamp}`;
// console.log(`switching source to ${newsrc}`);
img.src = newsrc;
}
Expand All @@ -76,7 +71,7 @@
dataType: 'json',
success: function(data) {
// console.log("Saved: " + data + "\nStatus: " + status);
update_term_form(data.filename);
_update_term_form_image(data.filename, data.url);
$('.saved').removeClass('saved');
$('.highlight').addClass('saved').removeClass('highlight');
}
Expand Down Expand Up @@ -104,7 +99,7 @@
contentType: false,
success: function(data) {
// console.log("Saved: " + data + "\nStatus: " + status);
update_term_form(data.filename);
_update_term_form_image(data.filename, data.url);
},
error: function(xhr, status, error) {
console.error(`Error submitting form. ${error}; ${status}; ${xhr.responseText}`);
Expand Down
2 changes: 1 addition & 1 deletion lute/templates/term/_form.html
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
class="zoomableTermImage"
id="term_image"
tabindex="0"
src="{{ form.current_image.object_data or '' }}"
src="/userimages/{{ term.language_id or 0 }}/{{ form.current_image.object_data or '-' }}"
onclick="clicked_zoomable_image(this);"
/>
</div>
Expand Down
2 changes: 1 addition & 1 deletion lute/templates/term/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@
if (imgsrc) {
const img = document.createElement('img');
img.className = 'term-listing-image';
img.src = imgsrc;
img.src = `/userimages/${rowData["LgID"]}/${imgsrc}`;
$(td).append(img);
}

Expand Down
2 changes: 1 addition & 1 deletion lute/term/datatables.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get_data_tables_list(parameters, session):
base_sql = """SELECT
w.WoID as WoID, LgName, L.LgID as LgID, w.WoText as WoText, parents.parentlist as ParentText, w.WoTranslation,
w.WoRomanization,
replace(wi.WiSource, '.jpeg', '') as WiSource,
WiSource,
ifnull(tags.taglist, '') as TagList,
StText,
StID,
Expand Down
13 changes: 5 additions & 8 deletions lute/userimage/routes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
"""
User images routes.
User images are stored in the database as /userimages/langid/term, but
with no jpeg extension. Reason: the old symfony code couldn't manage
urls with periods.
"""

import os
Expand All @@ -12,10 +8,11 @@
bp = Blueprint("userimages", __name__, url_prefix="/userimages")


@bp.route("/<int:lgid>/<term>", methods=["GET"])
def get_image(lgid, term):
@bp.route("/<int:lgid>/<path:f>", methods=["GET"])
def get_image(lgid, f):
"Serve the image from the data/userimages directory."
datapath = current_app.config["DATAPATH"]
directory = os.path.join(datapath, "userimages", str(lgid))
filename = term + ".jpeg"
return send_from_directory(directory, filename)
if not os.path.exists(os.path.join(directory, f)):
return ""
return send_from_directory(directory, f)
10 changes: 7 additions & 3 deletions tests/unit/read/test_service_popup_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,23 @@ def test_images_combined_in_popup(spanish, app_context, service):
db.session.commit()

d = service.get_popup_data(t.id)
assert d.popup_image_data == {"gato.jpg": "gato"}
img_url_start = f"/userimages/{spanish.id}/"
assert d.popup_image_data == {img_url_start + "gato.jpg": "gato"}

p.set_current_image("perro.jpg")
db.session.add(p)
db.session.commit()
d = service.get_popup_data(t.id)
assert d.popup_image_data == {"gato.jpg": "gato", "perro.jpg": "perro"}
assert d.popup_image_data == {
img_url_start + "gato.jpg": "gato",
img_url_start + "perro.jpg": "perro",
}

p.set_current_image("gato.jpg")
db.session.add(p)
db.session.commit()
d = service.get_popup_data(t.id)
assert d.popup_image_data == {"gato.jpg": "gato, perro"}
assert d.popup_image_data == {img_url_start + "gato.jpg": "gato, perro"}


def test_single_parent_translation_can_be_promoted_to_term_if_term_translation_blank(
Expand Down

0 comments on commit a32b8c3

Please sign in to comment.