Skip to content

Commit

Permalink
Merge pull request #1 from bfaludi/csv
Browse files Browse the repository at this point in the history
[wip] csv dialect
  • Loading branch information
bfaludi committed Oct 9, 2015
2 parents 771807d + 00f0ca5 commit 161ba69
Show file tree
Hide file tree
Showing 6 changed files with 275 additions and 16 deletions.
4 changes: 2 additions & 2 deletions riwo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def name(self):
# str
@property
def encoding(self):
if hasattr(self.resource, 'encoding'):
if hasattr(self.resource, 'encoding') and self.resource.encoding is not None:
return str(self.resource.encoding)

return 'utf-8'
Expand Down Expand Up @@ -170,7 +170,7 @@ def write(self):

# void
def write_item(self, item):
raise extension.NotImplemented("{self}'s `write_item(item)` function is not implemented yet!" \
raise exceptions.NotImplemented("{self}'s `write_item(item)` function is not implemented yet!" \
.format(self=self.name))

from .dialects import *
Expand Down
11 changes: 11 additions & 0 deletions riwo/compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@
long = int
unicode = str

import io
StringIO=io.StringIO

import urllib.request
import urllib.parse
urlparse = urllib.parse.urlparse
urlopen = urllib.request.urlopen


else:
string_types = basestring,
integer_types = (int, long)
Expand All @@ -26,6 +30,13 @@
binary_type = str
long = long

import codecs
getencoder=codecs.getincrementalencoder
getreader=codecs.getreader

import cStringIO
StringIO=cStringIO.StringIO

import urllib
import urlparse
urlparse = urlparse.urlparse
Expand Down
91 changes: 82 additions & 9 deletions riwo/dialects/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,12 @@
QUOTE_NONE
)

# TODO: Python 2.7 compatibility is missing. (encoding is failing)

class Reader(AbstractReader):
class Py3Reader(AbstractReader):
# void
def __init__(self, resource, schema, offset=0, limit=None, use_header=False, **fmtparams):
self.fmtparams = fmtparams
self.use_header = use_header
super(Reader,self).__init__(resource, schema, offset, limit)
super(Py3Reader,self).__init__(resource, schema, offset, limit)

# function
def get_mapper(self):
Expand All @@ -32,23 +30,47 @@ def get_mapper(self):

# Iterable (csv.reader or csv.DictReader)
def get_iterable_data(self):
resource_gen = (decode(r, self.encoding) for r in to_iterable(self.resource)) # csv package can't open bytestream
resource_gen = (decode(r, self.encoding) for r in to_iterable(self.resource))
return csv.reader(resource_gen, **self.fmtparams) \
if not self.use_header \
else csv.DictReader(resource_gen, **self.fmtparams)

class Writer(AbstractWriter):
class Py2Reader(AbstractReader):
# void
def __init__(self, resource, schema, offset=0, limit=None, use_header=False, **fmtparams):
self.fmtparams = fmtparams
self.use_header = use_header
super(Py2Reader,self).__init__(resource, schema, offset, limit)

# function
def get_mapper(self):
return daprot.mapper.INDEX \
if not self.use_header \
else daprot.mapper.NAME

# Iterable
def get_iterable_data(self):
resource_gen = (encode(r, self.encoding) for r in to_iterable(self.resource))
reader_gen = csv.reader(resource_gen, **self.fmtparams)
result_gen = ( [unicode(c, self.encoding) for c in r] for r in reader_gen )
if not self.use_header:
return result_gen

header = result_gen.next()
return (dict(zip(header,r)) for r in result_gen)

class Py3Writer(AbstractWriter):
# void
def __init__(self, resource, iterable_data, input_schema=None, add_header=True, **fmtparams):
self.fmtparams = fmtparams
self.add_header = add_header
super(Writer, self).__init__(resource, iterable_data, input_schema)
super(Py3Writer, self).__init__(resource, iterable_data, input_schema)

if self.is_nested():
raise exceptions.NestedSchemaNotSupported("{self} is not support nested schemas." \
.format(self=self.name))

# csv.writer
# csv.DictWriter
def init_writer(self):
return csv.DictWriter(self.resource, fieldnames=self.fieldnames, **self.fmtparams)

Expand All @@ -63,8 +85,59 @@ def unmarshal_item(self, item):
# void
def write(self):
if self.add_header: self.writer.writeheader()
super(Writer, self).write()
super(Py3Writer, self).write()

# void
def write_item(self, item):
self.writer.writerow(unmarshal(item, self.unmarshal_item))

class Py2Writer(Py3Writer):
# UnicodeWriter
def init_writer(self):
return UnicodeWriter(self.resource, **self.fmtparams)

# void
def write_header(self):
self.writer.writerow(self.fieldnames)

# void
def write(self):
if self.add_header: self.write_header()
for item in self.reader:
self.write_item(item)

# void
def write_item(self, item):
data = unmarshal(item, self.unmarshal_item)
self.writer.writerow([data[c] for c in self.fieldnames])

class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
# Redirect output to a queue
self.queue = StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = getencoder(encoding)()

def writerow(self, row):
self.writer.writerow([encode(s, "utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = decode(data, "utf-8")
# ... and reencode it into the target encoding
# data = self.encoder.encode(data)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)

def writerows(self, rows):
for row in rows:
self.writerow(row)

if PY3:
Reader = Py3Reader
Writer = Py3Writer
else:
Reader = Py2Reader
Writer = Py2Writer
174 changes: 174 additions & 0 deletions riwo/tests/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
# -*- coding: utf-8 -*-

from __future__ import absolute_import
import io
import os
import riwo
import daprot as dp
import string
import random
import datetime
import unittest
import requests
from riwo.compat import *
from . import CommonReader, __dir__, __remote__

class IndexSchema(dp.SchemaFlow):
id = dp.Field(0)
name = dp.Field(1, type=unicode, transforms=unicode.strip)
price = dp.Field(2)
quantity = dp.Field(3, type=long)
updated_at = dp.Field(4)

class HeaderedSchema(dp.SchemaFlow):
id = dp.Field()
name = dp.Field(type=unicode, transforms=unicode.strip)
price = dp.Field()
quantity = dp.Field(type=long)
updated_at = dp.Field()

class LocalReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = io.open(os.path.join(__dir__, 'test.csv'), 'r', encoding='utf-8')
self.reader = riwo.csv.Reader(self.resource, IndexSchema, delimiter=';')

class LocalHeaderReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = io.open(os.path.join(__dir__, 'test-header.csv'), 'r', encoding='utf-8')
self.reader = riwo.csv.Reader(self.resource, HeaderedSchema, use_header=True)

class RequestsReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = requests.get(os.path.join(__remote__, 'test.csv'))
self.reader = riwo.csv.Reader(self.resource, IndexSchema, delimiter=';')

class RequestsHeaderReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = requests.get(os.path.join(__remote__, 'test-header.csv'))
self.reader = riwo.csv.Reader(self.resource, HeaderedSchema, use_header=True)

class UrllibReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = urlopen(os.path.join(__remote__, 'test.csv'))
self.reader = riwo.csv.Reader(self.resource, IndexSchema, delimiter=';')

class UrllibHeaderReader(CommonReader, unittest.TestCase):
def setUp(self):
self.resource = urlopen(os.path.join(__remote__, 'test-header.csv'))
self.reader = riwo.csv.Reader(self.resource, HeaderedSchema, use_header=True)

class LocalWriter(unittest.TestCase):
class Schema(dp.SchemaFlow):
id = dp.Field()
name = dp.Field()
price = dp.Field()
quantity = dp.Field()
updated_at = dp.Field()

class NestedSchema(dp.SchemaFlow):
inner_schema = dp.DictOf(IndexSchema)

iterable_input = CommonReader.expected_result
test_file_base = os.path.join(__dir__, 'output-{token}.csv')

def setUp(self):
self.test_file_path = self.test_file_base.format(token=''.join([random.choice(string.ascii_uppercase) for i in range(6)]))
self.resource = io.open(self.test_file_path, 'w', encoding='utf-8')

def test_add_header(self):
self.writer = riwo.csv.Writer(self.resource, self.iterable_input, self.Schema, add_header=True)
self.writer.write()
self.content = u'''\
id,name,price,quantity,updated_at
P0001,đói,449,1,2015.09.20 20:00
P0002,배고픈,399,1,2015.09.20 20:02
P0003,голодный,199,10,
P0004,Űrállomás krízis,"999,5",1,2015.09.20 12:47
P0005,Ovális iroda,2 399,1,2015.09.20 07:31
'''

def test_not_add_header(self):
self.writer = riwo.csv.Writer(self.resource, self.iterable_input, self.Schema, add_header=False)
self.writer.write()
self.content = u'''\
P0001,đói,449,1,2015.09.20 20:00
P0002,배고픈,399,1,2015.09.20 20:02
P0003,голодный,199,10,
P0004,Űrállomás krízis,"999,5",1,2015.09.20 12:47
P0005,Ovális iroda,2 399,1,2015.09.20 07:31
'''

def test_delimiter(self):
self.writer = riwo.csv.Writer(self.resource, self.iterable_input, self.Schema, delimiter='\001')
self.writer.write()
self.content = u'''\
id\001name\001price\001quantity\001updated_at
P0001\001đói\001449\0011\0012015.09.20 20:00
P0002\001배고픈\001399\0011\0012015.09.20 20:02
P0003\001голодный\001199\00110\001
P0004\001Űrállomás krízis\001999,5\0011\0012015.09.20 12:47
P0005\001Ovális iroda\0012 399\0011\0012015.09.20 07:31
'''

def test_quote(self):
self.writer = riwo.csv.Writer(self.resource, self.iterable_input, self.Schema, \
delimiter='\\', quotechar='`', quoting=riwo.csv.QUOTE_ALL)
self.writer.write()
self.content = u'''\
`id`\`name`\`price`\`quantity`\`updated_at`
`P0001`\`đói`\`449`\`1`\`2015.09.20 20:00`
`P0002`\`배고픈`\`399`\`1`\`2015.09.20 20:02`
`P0003`\`голодный`\`199`\`10`\``
`P0004`\`Űrállomás krízis`\`999,5`\`1`\`2015.09.20 12:47`
`P0005`\`Ovális iroda`\`2 399`\`1`\`2015.09.20 07:31`
'''

def test_defined_schema_writer(self):
self.writer = riwo.csv.Writer(self.resource, self.iterable_input, self.Schema)
self.writer.write()
self.content = u'''\
id,name,price,quantity,updated_at
P0001,đói,449,1,2015.09.20 20:00
P0002,배고픈,399,1,2015.09.20 20:02
P0003,голодный,199,10,
P0004,Űrállomás krízis,"999,5",1,2015.09.20 12:47
P0005,Ovális iroda,2 399,1,2015.09.20 07:31
'''

def test_schema_flow_writer(self):
self.writer = riwo.csv.Writer(self.resource, self.Schema(self.iterable_input, mapper=dp.mapper.NAME))
self.writer.write()
self.content = u'''\
id,name,price,quantity,updated_at
P0001,đói,449,1,2015.09.20 20:00
P0002,배고픈,399,1,2015.09.20 20:02
P0003,голодный,199,10,
P0004,Űrállomás krízis,"999,5",1,2015.09.20 12:47
P0005,Ovális iroda,2 399,1,2015.09.20 07:31
'''

def test_without_schema(self):
with self.assertRaises(riwo.exceptions.SchemaRequired):
riwo.csv.Writer(self.resource, self.iterable_input)
self.content = u''

def test_unmarshal(self):
self.writer = riwo.csv.Writer(self.resource, [], self.Schema)
self.content = u''

current_datetime = datetime.datetime.now()
self.assertEqual(self.writer.unmarshal_item(current_datetime), current_datetime.isoformat())
self.assertEqual(self.writer.unmarshal_item(4.21), u'4.21')
self.assertEqual(self.writer.unmarshal_item(u'голодный'), u'голодный')

def test_requisites(self):
with self.assertRaises(riwo.exceptions.NestedSchemaNotSupported):
riwo.csv.Writer(self.resource, [], self.NestedSchema)
self.content = u''

def tearDown(self):
self.resource.close()
with io.open(self.test_file_path, 'r', encoding='utf-8') as f:
content = f.read()
self.assertEqual(self.content, content)
os.remove(self.test_file_path)
1 change: 1 addition & 0 deletions riwo/tests/source/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
output-*
10 changes: 5 additions & 5 deletions riwo/tests/source/test.csv
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
P0001,đói,449,1,2015.09.20 20:00
P0002,배고픈,399,1,2015.09.20 20:02
P0003,голодный,199,10,
P0004,Űrállomás krízis,"999,5",1,2015.09.20 12:47
P0005,Ovális iroda,2 399,1,2015.09.20 07:31
P0001;đói;449;1;2015.09.20 20:00
P0002;배고픈;399;1;2015.09.20 20:02
P0003;голодный;199;10;
P0004;Űrállomás krízis;"999,5";1;2015.09.20 12:47
P0005;Ovális iroda;2 399;1;2015.09.20 07:31

0 comments on commit 161ba69

Please sign in to comment.