diff --git a/kingfisher_scrapy/pipelines.py b/kingfisher_scrapy/pipelines.py index 4ebb1b466..f5a28b074 100644 --- a/kingfisher_scrapy/pipelines.py +++ b/kingfisher_scrapy/pipelines.py @@ -46,7 +46,7 @@ def __init__(self): def process_item(self, item, spider): if isinstance(item, (File, FileItem)): if item.invalid_json: - raise DropItem(f'Invalid JSON data') + raise DropItem('Invalid JSON data') validator = self.validators.get(item.__class__.__name__) if validator: diff --git a/tests/pipelines/test_validate.py b/tests/pipelines/test_validate.py index 68a868629..62098a4ac 100644 --- a/tests/pipelines/test_validate.py +++ b/tests/pipelines/test_validate.py @@ -159,18 +159,18 @@ def test_process_item_with_duplicate_file_item(caplog): assert str(excinfo.value) == "Duplicate FileItem: ('test1', 1)" -@pytest.mark.parametrize('klass, message', [(File, "'test.json'"), (FileItem, "('test.json', 1)")]) -def test_process_item_with_invalid_json(klass, message): - spider = spider_with_crawler() +@pytest.mark.parametrize('klass', [File, FileItem]) +def test_process_item_with_invalid_json(klass): pipeline = Validate() + spider = spider_with_crawler() kwargs = {'number': 1} if klass is FileItem else {} item = klass( - file_name='test.json', - url='http://test.com', + file_name='test', + url='http://example.com', data_type='release_package', - data='{"key": "value"}', + data='{"broken": }', invalid_json=True, **kwargs ) @@ -178,4 +178,4 @@ def test_process_item_with_invalid_json(klass, message): with pytest.raises(DropItem) as excinfo: pipeline.process_item(item, spider) - assert str(excinfo.value) == f"Invalid {klass.__name__} data: {message}" + assert str(excinfo.value) == 'Invalid JSON data' diff --git a/tests/test_spidermiddlewares.py b/tests/test_spidermiddlewares.py index e6b89e21a..671942f03 100644 --- a/tests/test_spidermiddlewares.py +++ b/tests/test_spidermiddlewares.py @@ -9,13 +9,13 @@ from kingfisher_scrapy.items import File, FileError, FileItem from kingfisher_scrapy.spidermiddlewares import ( AddPackageMiddleware, - ValidateJSONMiddleware, ConcatenatedJSONMiddleware, LineDelimitedMiddleware, ReadDataMiddleware, ResizePackageMiddleware, RetryDataErrorMiddleware, RootPathMiddleware, + ValidateJSONMiddleware, ) from tests import response_fixture, spider_with_crawler @@ -45,13 +45,13 @@ async def alist(iterable): file_name='test.json', url='http://test.com', data_type='release_package', - data={}, + data='{}', ), FileItem( file_name='test.json', url='http://test.com', data_type='release_package', - data={}, + data='{}', number=1, ), FileError( @@ -114,8 +114,8 @@ async def test_bytes_or_file(middleware_class, attribute, value, override, tmpdi 'file_name': 'test.json', 'url': 'http://test.com', 'data_type': 'release', - 'path': '', 'invalid_json': False, + 'path': '', } expected.update(override) @@ -161,8 +161,8 @@ async def test_encoding(middleware_class, attribute, value, override, tmpdir): 'file_name': 'test.json', 'url': 'http://test.com', 'data_type': 'release', - 'path': '', 'invalid_json': False, + 'path': '', } expected.update(override) @@ -203,8 +203,8 @@ async def test_add_package_middleware(data_type, data, root_path): expected = { 'file_name': 'test.json', 'url': 'http://test.com', - 'path': '', 'invalid_json': False, + 'path': '', } if 'item' in root_path: expected['number'] = 1 @@ -295,8 +295,8 @@ async def test_json_streaming_middleware(middleware_class, attribute, separator, 'data_type': 'release_package', 'data': data, 'number': i, - 'path': '', 'invalid_json': False, + 'path': '', } @@ -379,8 +379,8 @@ async def test_json_streaming_middleware_with_compressed_file_spider(middleware_ 'data_type': 'release_package', 'data': data, 'number': i, - 'path': '', 'invalid_json': False, + 'path': '', } @@ -543,13 +543,10 @@ async def test_validate_json_middleware(invalid, klass): file_name='test.json', url='http://test.com', data_type='release_package', - data='{"key": "value"}', + data='{"broken": }' if invalid else '{"key": "value"}', **kwargs ) - if invalid: - item.data = '{"broken": }' - generator = middleware.process_spider_output(None, _aiter([item]), spider) transformed_items = await alist(generator)