Skip to content

Commit

Permalink
Organize and remove unused imports
Browse files Browse the repository at this point in the history
  • Loading branch information
edsu committed Oct 20, 2023
1 parent ab3af9a commit 89d8a75
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 4 deletions.
8 changes: 4 additions & 4 deletions tests/test_warcdb.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from click.testing import CliRunner
from warcdb import warcdb_cli
import os
import re
import pathlib
import re

import pytest
import sqlite_utils
from unittest import TestCase
from click.testing import CliRunner
from warcdb import warcdb_cli

db_file = "test_warc.db"
tests_dir = pathlib.Path(__file__).parent
Expand Down
1 change: 1 addition & 0 deletions warcdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,6 +264,7 @@ def to_import():
if f.startswith('http'):
yield from tqdm(ArchiveIterator(req.get(f, stream=True).raw, arc2warc=True), desc=f)
elif f.endswith('.wacz'):
# TODO: can we support loading WACZ files by URL?
wacz = zipfile.ZipFile(f)
warcs = filter(lambda f: f.filename.endswith('warc.gz'), wacz.infolist())
for warc in warcs:
Expand Down

0 comments on commit 89d8a75

Please sign in to comment.