Skip to content

Commit

Permalink
typorify: Don't try to add nbsp to weight abbreviations, due to too m…
Browse files Browse the repository at this point in the history
…any false positives; remove nbsp between words; add nbsp to names starting with 'De '
  • Loading branch information
acabal committed Nov 11, 2023
1 parent 0f3de97 commit 2043147
Showing 1 changed file with 8 additions and 1 deletion.
9 changes: 8 additions & 1 deletion se/typography.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,12 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
xhtml = regex.sub(r"([Ii])\.\s+e\.", r"\1.e.", xhtml)
xhtml = regex.sub(r"([Ee])\.\s+g\.", r"\1.g.", xhtml)

# Remove nbsps between words
xhtml = regex.sub(fr"([^>…]){se.NO_BREAK_SPACE}([\p{{Letter}}\p{{Digit}}])", r"\1 \2", xhtml)

# Add nbsp before `De` last names, but not Latin titles like `<i xml:lang="la">De Natura</i>`
xhtml = regex.sub(r"([^>])De ([A-Z][a-z]+?)", fr"\1De{se.NO_BREAK_SPACE}\2", xhtml)

# WARNING! This and below can remove the ending period of a sentence, if AD or BC is the last word! We need interactive S&R for this
xhtml = regex.sub(r"([\d\s])A\.\s+D\.", r"\1AD", xhtml)
xhtml = regex.sub(r"(?<!A\. )B\.\s+C\.", r"BC", xhtml)
Expand Down Expand Up @@ -277,7 +283,8 @@ def typogrify(xhtml: str, smart_quotes: bool = True) -> str:
xhtml = regex.sub(r"<p>\. …", "<p>…", xhtml)

# Add non-breaking spaces between amounts with an abbreviated unit. E.g. 8 oz., 10 lbs.
xhtml = regex.sub(r"([0-9])\s+([\p{Letter}]{1,3}\.)", fr"\1{se.NO_BREAK_SPACE}\2", xhtml, flags=regex.IGNORECASE)
# Removed because it's adding too many false positives
#xhtml = regex.sub(r"([0-9])\s+([\p{Letter}]{1,3}\.)", fr"\1{se.NO_BREAK_SPACE}\2", xhtml, flags=regex.IGNORECASE)

# Add non-breaking spaces between Arabic numbers and AM/PM
xhtml = regex.sub(r"([0-9])\s+([ap])\.m\.", fr"\1{se.NO_BREAK_SPACE}\2.m.", xhtml, flags=regex.IGNORECASE)
Expand Down

0 comments on commit 2043147

Please sign in to comment.