Skip to content

Commit

Permalink
Add changes from code review
Browse files Browse the repository at this point in the history
  • Loading branch information
scottbarnes committed Jun 18, 2024
1 parent 684dc2e commit ce52c90
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 23 deletions.
14 changes: 7 additions & 7 deletions openlibrary/core/batch_imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def generate_hash(data: bytes, length: int = 20):
This is used to help uniquely identify a batch_import. Truncating
to 20 characters gives about a 50% chance of collision after 1 billion
community hash imports. According to ChatGPT, anyway. :)
hash imports. According to ChatGPT, anyway. :)
"""
hash = hashlib.sha256()
hash.update(data)
Expand All @@ -36,7 +36,7 @@ class BatchImportError:
"""
Represents a Batch Import error item, containing a line number and error message.
As the JSONL in a community batch import file is parsed, it's validated and errors are
As the JSONL in a batch import file is parsed, it's validated and errors are
recorded by line number and the error thrown, and this item represents that information,
which is returned to the uploading patron.
"""
Expand Down Expand Up @@ -72,7 +72,7 @@ class BatchResult:

def batch_import(raw_data: bytes) -> BatchResult:
"""
This process raw byte data from a JSONL POST to the community batch import endpoint.
This processes raw byte data from a JSONL POST to the batch import endpoint.
Each line in the JSONL file (i.e. import item) must pass the same validation as
required for a valid import going through load().
Expand All @@ -84,9 +84,9 @@ def batch_import(raw_data: bytes) -> BatchResult:
"""
user = accounts.get_current_user()
username = user.get_username()
batch_name = f"cb-{generate_hash(raw_data)}"
batch_name = f"batch-{generate_hash(raw_data)}"
errors: list[BatchImportError] = []
import_records: list[dict] = []
raw_import_records: list[dict] = []

for index, line in enumerate(raw_data.decode("utf-8").splitlines()):
# Allow comments with `#` in these import records, even though they're JSONL.
Expand All @@ -100,7 +100,7 @@ def batch_import(raw_data: bytes) -> BatchResult:
validate_record(edition_builder.get_dict())

# Add the raw_record for later processing; it still must go througd load() independently.
import_records.append(raw_record)
raw_import_records.append(raw_record)
except (
JSONDecodeError,
PublicationYearTooOld,
Expand Down Expand Up @@ -130,7 +130,7 @@ def batch_import(raw_data: bytes) -> BatchResult:
'data': item,
'submitter': username,
}
for item in import_records
for item in raw_import_records
]

# Create the batch
Expand Down
16 changes: 8 additions & 8 deletions openlibrary/core/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ def __init__(self, mapping, *requireds, **defaults):
Initialize some statistics instance attributes yet retain web.storage's __init__ method.
"""
super().__init__(mapping, *requireds, **defaults)
self.total_tried: int = 0
self.total_added: int = 0
self.total_not_added: int = 0
self.items_not_added: set = set()
self.total_submitted: int = 0
self.total_queued: int = 0
self.total_skipped: int = 0
self.items_skipped: set = set()

@staticmethod
def find(name: str, create: bool = False) -> "Batch": # type: ignore[return]
Expand Down Expand Up @@ -76,10 +76,10 @@ def dedupe_items(self, items):
)

# Update batch counts
self.total_tried = len(ia_ids)
self.total_not_added = len(already_present)
self.total_added = self.total_tried - self.total_not_added
self.items_not_added = already_present
self.total_submitted = len(ia_ids)
self.total_skipped = len(already_present)
self.total_queued = self.total_submitted - self.total_skipped
self.items_skipped = already_present

# Those unique items whose ia_id's aren't already present
return [item for item in items if item.get('ia_id') not in already_present]
Expand Down
2 changes: 1 addition & 1 deletion openlibrary/i18n/messages.pot
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ msgid "Total queued:"
msgstr ""

#: batch_import.html
msgid "Total not queued:"
msgid "Total skipped:"
msgstr ""

#: batch_import.html
Expand Down
6 changes: 4 additions & 2 deletions openlibrary/plugins/openlibrary/code.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def remove_high_priority(query: str) -> str:

class batch_imports(delegate.page):
"""
The community batch import endpoint. Expects a JSONL file POSTed via multipart/form-data.
The batch import endpoint. Expects a JSONL file POSTed with multipart/form-data.
"""

path = '/import/batch/new'
Expand All @@ -492,7 +492,9 @@ def GET(self):
return render_template("batch_import.html", batch_result=None)

def POST(self):
if not can_write():

user_key = delegate.context.user and delegate.context.user.key
if user_key not in _get_members_of_group("/usergroup/admin"):
raise Forbidden('Permission Denied.')

# Get the upload from web.py. See the template for the <form> used.
Expand Down
10 changes: 5 additions & 5 deletions openlibrary/templates/batch_import.html
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ <h1>$_("Batch Imports")</h1>

$if batch_result and batch_result.batch:
<p>$_('Import results for batch:') Batch #$batch_result.batch.id ($batch_result.batch.name)</p>
<p>$_('Records found:') $batch_result.batch.total_tried</p>
<p>$_('Total queued:') $batch_result.batch.total_added</p>
<p>$_('Total not queued:') $batch_result.batch.total_not_added</p>
$if batch_result.batch.total_not_added > 0:
<p>$_('Records submitted:') $batch_result.batch.total_submitted</p>
<p>$_('Total queued:') $batch_result.batch.total_queued</p>
<p>$_('Total skipped:') $batch_result.batch.total_skipped</p>
$if batch_result.batch.total_skipped > 0:
<details>
<summary>$_('Not queued because they are already present in the queue:')</summary>
<ol>
$for item in batch_result.batch.items_not_added:
$for item in batch_result.batch.items_skipped:
<li>$item</li>
</ol>
</details>
Expand Down

0 comments on commit ce52c90

Please sign in to comment.