Add changes from code review

internetarchive · Jun 18, 2024 · ce52c90 · ce52c90
1 parent 684dc2e
commit ce52c90
Show file tree

Hide file tree

Showing 5 changed files with 25 additions and 23 deletions.
diff --git a/openlibrary/core/batch_imports.py b/openlibrary/core/batch_imports.py
@@ -24,7 +24,7 @@ def generate_hash(data: bytes, length: int = 20):
 
     This is used to help uniquely identify a batch_import. Truncating
     to 20 characters gives about a 50% chance of collision after 1 billion
-    community hash imports. According to ChatGPT, anyway. :)
+    hash imports. According to ChatGPT, anyway. :)
     """
     hash = hashlib.sha256()
     hash.update(data)
@@ -36,7 +36,7 @@ class BatchImportError:
     """
     Represents a Batch Import error item, containing a line number and error message.
 
-    As the JSONL in a community batch import file is parsed, it's validated and errors are
+    As the JSONL in a batch import file is parsed, it's validated and errors are
     recorded by line number and the error thrown, and this item represents that information,
     which is returned to the uploading patron.
     """
@@ -72,7 +72,7 @@ class BatchResult:
 
 def batch_import(raw_data: bytes) -> BatchResult:
     """
-    This process raw byte data from a JSONL POST to the community batch import endpoint.
+    This processes raw byte data from a JSONL POST to the batch import endpoint.
 
     Each line in the JSONL file (i.e. import item) must pass the same validation as
     required for a valid import going through load().
@@ -84,9 +84,9 @@ def batch_import(raw_data: bytes) -> BatchResult:
     """
     user = accounts.get_current_user()
     username = user.get_username()
-    batch_name = f"cb-{generate_hash(raw_data)}"
+    batch_name = f"batch-{generate_hash(raw_data)}"
     errors: list[BatchImportError] = []
-    import_records: list[dict] = []
+    raw_import_records: list[dict] = []
 
     for index, line in enumerate(raw_data.decode("utf-8").splitlines()):
         # Allow comments with `#` in these import records, even though they're JSONL.
@@ -100,7 +100,7 @@ def batch_import(raw_data: bytes) -> BatchResult:
             validate_record(edition_builder.get_dict())
 
             # Add the raw_record for later processing; it still must go througd load() independently.
-            import_records.append(raw_record)
+            raw_import_records.append(raw_record)
         except (
             JSONDecodeError,
             PublicationYearTooOld,
@@ -130,7 +130,7 @@ def batch_import(raw_data: bytes) -> BatchResult:
             'data': item,
             'submitter': username,
         }
-        for item in import_records
+        for item in raw_import_records
     ]
 
     # Create the batch

diff --git a/openlibrary/core/imports.py b/openlibrary/core/imports.py
@@ -36,10 +36,10 @@ def __init__(self, mapping, *requireds, **defaults):
         Initialize some statistics instance attributes yet retain web.storage's __init__ method.
         """
         super().__init__(mapping, *requireds, **defaults)
-        self.total_tried: int = 0
-        self.total_added: int = 0
-        self.total_not_added: int = 0
-        self.items_not_added: set = set()
+        self.total_submitted: int = 0
+        self.total_queued: int = 0
+        self.total_skipped: int = 0
+        self.items_skipped: set = set()
 
     @staticmethod
     def find(name: str, create: bool = False) -> "Batch":  # type: ignore[return]
@@ -76,10 +76,10 @@ def dedupe_items(self, items):
         )
 
         # Update batch counts
-        self.total_tried = len(ia_ids)
-        self.total_not_added = len(already_present)
-        self.total_added = self.total_tried - self.total_not_added
-        self.items_not_added = already_present
+        self.total_submitted = len(ia_ids)
+        self.total_skipped = len(already_present)
+        self.total_queued = self.total_submitted - self.total_skipped
+        self.items_skipped = already_present
 
         # Those unique items whose ia_id's aren't already present
         return [item for item in items if item.get('ia_id') not in already_present]

diff --git a/openlibrary/i18n/messages.pot b/openlibrary/i18n/messages.pot
@@ -161,7 +161,7 @@ msgid "Total queued:"
 msgstr ""
 
 #: batch_import.html
-msgid "Total not queued:"
+msgid "Total skipped:"
 msgstr ""
 
 #: batch_import.html

diff --git a/openlibrary/plugins/openlibrary/code.py b/openlibrary/plugins/openlibrary/code.py
@@ -483,7 +483,7 @@ def remove_high_priority(query: str) -> str:
 
 class batch_imports(delegate.page):
     """
-    The community batch import endpoint. Expects a JSONL file POSTed via multipart/form-data.
+    The batch import endpoint. Expects a JSONL file POSTed with multipart/form-data.
     """
 
     path = '/import/batch/new'
@@ -492,7 +492,9 @@ def GET(self):
         return render_template("batch_import.html", batch_result=None)
 
     def POST(self):
-        if not can_write():
+
+        user_key = delegate.context.user and delegate.context.user.key
+        if user_key not in _get_members_of_group("/usergroup/admin"):
             raise Forbidden('Permission Denied.')
 
         # Get the upload from web.py. See the template for the <form> used.

diff --git a/openlibrary/templates/batch_import.html b/openlibrary/templates/batch_import.html
@@ -24,14 +24,14 @@ <h1>$_("Batch Imports")</h1>
 
     $if batch_result and batch_result.batch:
       <p>$_('Import results for batch:') Batch #$batch_result.batch.id ($batch_result.batch.name)</p>
-      <p>$_('Records found:') $batch_result.batch.total_tried</p>
-      <p>$_('Total queued:') $batch_result.batch.total_added</p>
-      <p>$_('Total not queued:') $batch_result.batch.total_not_added</p>
-      $if batch_result.batch.total_not_added > 0:
+      <p>$_('Records submitted:') $batch_result.batch.total_submitted</p>
+      <p>$_('Total queued:') $batch_result.batch.total_queued</p>
+      <p>$_('Total skipped:') $batch_result.batch.total_skipped</p>
+      $if batch_result.batch.total_skipped > 0:
         <details>
           <summary>$_('Not queued because they are already present in the queue:')</summary>
           <ol>
-            $for item in batch_result.batch.items_not_added:
+            $for item in batch_result.batch.items_skipped:
               <li>$item</li>
           </ol>
         </details>