Skip to content

Commit

Permalink
Option to set alternative encoding, defaulting to 'utf-8', fixing #13
Browse files Browse the repository at this point in the history
  • Loading branch information
jwcarr committed Oct 2, 2024
1 parent 0f999f2 commit 374cd46
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 8 deletions.
14 changes: 14 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
bibfish 0.3.3 - 2024-10-02
==========================

Added
-----

- Option to specify the encoding of tex/bibtext files, defaulting to UTF-8.

Changed
-------

- Bibfish now defaults to UTF-8 encoding.


bibfish 0.3.2 - 2024-06-13
==========================

Expand Down
31 changes: 23 additions & 8 deletions bibfish/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,17 @@
__version__ = "???"


def extract_citekeys(manuscript_file: str, cite_commands: list) -> list:
def extract_citekeys(
manuscript_file: str, cite_commands: list, encoding: str = "utf-8"
) -> list:
"""
Search manuscript_file for any cite commands and return the citekeys they
make reference to. If the manuscript has any nested files (through input,
import, or include), these will be resursively expanded.
"""
if len(cite_commands) == 0:
return []
with open(manuscript_file, "r") as file:
with open(manuscript_file, "r", encoding=encoding) as file:
full_manuscript = file.read()

uncommented_lines = []
Expand All @@ -37,7 +39,7 @@ def extract_citekeys(manuscript_file: str, cite_commands: list) -> list:
pass
for nestfile in find_imported_files(manuscript):
try:
citekeys += extract_citekeys(nestfile, cite_commands)
citekeys += extract_citekeys(nestfile, cite_commands, encoding=encoding)
except FileNotFoundError:
pass
citations = re.findall(
Expand Down Expand Up @@ -74,14 +76,16 @@ def find_imported_files(manuscript: str) -> list:
return filenames


def parse_bibtex_entries(bib_files: list, citekeys: list) -> BibDatabase:
def parse_bibtex_entries(
bib_files: list, citekeys: list, encoding: str = "utf-8"
) -> BibDatabase:
"""
Return a bibtexparser.bibdatabase.BibDatabase which contains only the
entries in *bib_files* which match *citekeys*.
"""
out_db = BibDatabase()
for bib_file in reversed(bib_files): # give priority to earlier bib files
with open(bib_file) as file:
with open(bib_file, "r", encoding=encoding) as file:
bib_database = bibtexparser.load(
file,
parser=bibtexparser.bparser.BibTexParser(
Expand Down Expand Up @@ -168,6 +172,7 @@ def main(
local_bib_file,
cite_commands,
force_overwrite=False,
encoding="utf-8",
short_dois=False,
drop_fields=None,
):
Expand All @@ -179,10 +184,10 @@ def main(
print(f"bibfish: {local_bib_file} already exists. Use -f to force overwrite.")
return

citekeys = extract_citekeys(manuscript_file, cite_commands)
citekeys = extract_citekeys(manuscript_file, cite_commands, encoding=encoding)
if not isinstance(master_bib_files, list):
master_bib_files = [master_bib_files]
bibtex_db = parse_bibtex_entries(master_bib_files, citekeys)
bibtex_db = parse_bibtex_entries(master_bib_files, citekeys, encoding=encoding)

if short_dois:
bibtex_db = shorten_dois_in_db(bibtex_db)
Expand All @@ -197,7 +202,7 @@ def main(
if any("crossref" in entry for entry in bibtex_db.entries):
db_writer.order_entries_by = None

with open(local_bib_file, "w") as file:
with open(local_bib_file, "w", encoding=encoding) as file:
bibtexparser.dump(bibtex_db, file, db_writer)


Expand Down Expand Up @@ -250,6 +255,15 @@ def cli():
dest="force_overwrite",
help="Overwrite the local .bib file if it already exists",
)
parser.add_argument(
"-e",
"--encoding",
action="store",
type=str,
default="utf-8",
dest="encoding",
help="Character encoding of the tex and bibtex files (default: 'utf-8')",
)
parser.add_argument(
"--sdoi",
action="store_true",
Expand Down Expand Up @@ -283,6 +297,7 @@ def cli():
local_bib_file=args.local_bib_file,
cite_commands=cite_commands,
force_overwrite=args.force_overwrite,
encoding=args.encoding,
short_dois=args.short_dois,
drop_fields=drop_fields,
)
1 change: 1 addition & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def test_main():
local_bib_file=output_bib_file,
cite_commands=["textcite", "parencite", "possessivecite"],
force_overwrite=True,
encoding="utf-8",
drop_fields=["abstract"],
)

Expand Down

0 comments on commit 374cd46

Please sign in to comment.