Skip to content
This repository has been archived by the owner on Jul 11, 2023. It is now read-only.

Commit

Permalink
Fixed processors for sample (#85) - v0.6.2
Browse files Browse the repository at this point in the history
* added test

* fixed dependencies max versions

* fixed sample-processors bug

* minor refactoring

* updated version
  • Loading branch information
roll authored Sep 13, 2016
1 parent ec00d0e commit 5dd35e9
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 37 deletions.
18 changes: 9 additions & 9 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ def read(*paths):
# Prepare
PACKAGE = 'tabulator'
INSTALL_REQUIRES = [
'six>=1.9,<2.0',
'xlrd>=1.0,<2.0',
'ijson>=2.0,<3.0',
'chardet>=2.0,<3.0',
'openpyxl>=2.0,<3.0',
'requests>=2.8,<3.0',
'beautifulsoup4>=4.4,<5.0',
'linear-tsv>=0.99,<0.100',
'unicodecsv>=0.14,<0.15',
'six>=1.9,<2.0a',
'xlrd>=1.0,<2.0a',
'ijson>=2.0,<3.0a',
'chardet>=2.0,<3.0a',
'openpyxl>=2.0,<3.0a',
'requests>=2.8,<3.0a',
'beautifulsoup4>=4.4,<5.0a',
'linear-tsv>=0.99,<0.100a',
'unicodecsv>=0.14,<0.15a',
]
TESTS_REQUIRE = [
'pylama',
Expand Down
2 changes: 1 addition & 1 deletion tabulator/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.6.1
0.6.2
55 changes: 29 additions & 26 deletions tabulator/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __init__(self,
loader_options=None,
parser_options=None,):

# Init parameters
# Defaults
if loader_options is None:
loader_options = {}
if parser_options is None:
Expand All @@ -90,7 +90,7 @@ def __init__(self,
if sample_size is None:
sample_size = helpers.DEFAULT_SAMPLE_SIZE

# Set headers
# Headers
self.__headers = None
self.__headers_row = 0
if isinstance(headers, (tuple, list)):
Expand All @@ -102,23 +102,23 @@ def __init__(self,
msg = msg % (self.__headers_row, sample_size)
raise exceptions.TabulatorException(msg)

# Set loader
# Loader
if scheme is None:
scheme = helpers.detect_scheme(source) or helpers.DEFAULT_SCHEME
if scheme not in _LOADERS:
message = 'Scheme "%s" is not supported' % scheme
raise exceptions.LoadingError(message)
self.__loader = _LOADERS[scheme](**loader_options)

# Set parser
# Parser
if format is None:
format = helpers.detect_format(source)
if format not in _PARSERS:
message = 'Format "%s" is not supported' % format
raise exceptions.ParsingError(message)
self.__parser = _PARSERS[format](**parser_options)

# Set attributes
# Attributes
self.__source = source
self.__encoding = encoding
self.__post_parse = post_parse
Expand Down Expand Up @@ -184,7 +184,9 @@ def sample(self):
"""list[]: sample of rows
"""
sample = []
for number, headers, row in self.__sample_extended_rows:
iterator = iter(self.__sample_extended_rows)
iterator = self.__apply_processors(iterator)
for number, headers, row in iterator:
sample.append(row)
return sample

Expand All @@ -199,16 +201,19 @@ def iter(self, keyed=False, extended=False):
mixed[]/mixed{}: row/keyed row/extended row
"""
extended_rows = self.__iter_exteneded_rows()
for processor in self.__post_parse:
extended_rows = processor(extended_rows)
for number, headers, row in extended_rows:
if extended:
yield (number, headers, row)
elif keyed:
yield dict(zip(headers, row))
else:
yield row
iterator = chain(
self.__sample_extended_rows,
self.__parser.extended_rows)
iterator = self.__apply_processors(iterator)
for number, headers, row in iterator:
if number > self.__number:
self.__number = number
if extended:
yield (number, headers, row)
elif keyed:
yield dict(zip(headers, row))
else:
yield row

def read(self, keyed=False, extended=False, limit=None):
"""Return table rows with count limit.
Expand Down Expand Up @@ -285,19 +290,17 @@ def __detect_html(self):
msg = 'Source has been detected as HTML (not supported)'
raise exceptions.TabulatorException(msg)

def __iter_exteneded_rows(self):

# Prepare iterator
iterator = chain(
self.__sample_extended_rows,
self.__parser.extended_rows)
def __apply_processors(self, iterator):

# Iter extended rows
for number, headers, row in iterator:
if number > self.__number:
self.__number = number
# Apply processors to iterator
def builtin_processor(extended_rows):
for number, headers, row in extended_rows:
headers = self.__headers
yield (number, headers, row)
processors = [builtin_processor] + self.__post_parse
for processor in processors:
iterator = processor(iterator)
return iterator


# Internal
Expand Down
16 changes: 15 additions & 1 deletion tests/test_topen.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,7 +524,21 @@ def cast_rows(extended_rows):

# Make assertions
assert table.headers == ['id', 'name']
assert table.read() == [[2, '中国人']]


def test_processors_sample():

# Processors
def only_first_row(extended_rows):
for number, header, row in extended_rows:
if number == 1:
yield (number, header, row)

# Get table
table = topen('data/table.csv', post_parse=[only_first_row])

# Make assertions
assert table.sample == [['id', 'name']]


# Tests [save]
Expand Down

0 comments on commit 5dd35e9

Please sign in to comment.