Skip to content

Commit

Permalink
addded support for readable/seekable object as input
Browse files Browse the repository at this point in the history
 - add() and file_digest() now accepts readable/seekable object
 - added 'Fsdb._calc_digest()'
 - added 'Fsdb._copy_content()'
 - added some tests
  • Loading branch information
ael-code committed Mar 23, 2015
1 parent e8e113a commit 865bef1
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 27 deletions.
79 changes: 55 additions & 24 deletions fsdb/fsdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class Fsdb(object):
are managed using a directory tree generated from the file digest
"""

BLOCK_SIZE = 2**20
CONFIG_FILE = ".fsdb.conf"

def __init__(self, fsdbRoot, deep=None, hash_alg=None, fmode=None, dmode=None):
Expand Down Expand Up @@ -93,9 +94,37 @@ def __init__(self, fsdbRoot, deep=None, hash_alg=None, fmode=None, dmode=None):

self.logger.debug("Fsdb initialized successfully: "+self.__str__())

def _calc_digest(self, path):
"""calculate digest of the file at the given path"""
return Fsdb.file_digest(path, algorithm=self._conf['hash_alg'])
def _calc_digest(self, origin):
"""calculate digest for the given file or readable/seekable object
Args:
origin -- could be the path of a file or a readable/seekable object ( fileobject, stream, stringIO...)
Returns:
String rapresenting the digest for the given origin
"""
if hasattr(origin, 'read') and hasattr(origin, 'seek'):
pos = origin.tell()
digest = Fsdb.file_digest(origin, algorithm=self._conf['hash_alg'])
origin.seek(pos)
else:
with open(origin, 'rb') as f:
digest = Fsdb.file_digest(f, algorithm=self._conf['hash_alg'])
return digest

def _copy_content(self, origin, dstPath):
"""copy the content of origin into dstPath"""

if hasattr(origin, 'read') and hasattr(origin, 'seek'):
pos = origin.tell()
with open(dstPath, 'wb') as dst:
while True:
chunk = origin.read(Fsdb.BLOCK_SIZE)
if not chunk:
break
dst.write(chunk)
origin.seek(pos)
else:
shutil.copyfile(origin, dstPath)

def _create_empty_file(self, path):
oldmask = os.umask(0)
Expand Down Expand Up @@ -125,18 +154,16 @@ def _makedirs(self, path):
else:
raise e

def add(self, filePath):
"""Add an existing file to fsdb.
File under @filePath will be copied under fsdb directory tree
def add(self, origin):
"""Add new element to fsdb.
Args:
filePath -- path of the file to be add
origin -- could be the path of a file or a readable/seekable object ( fileobject, stream, stringIO...)
Returns:
String rapresenting the digest of the file
"""
if not os.path.isfile(filePath):
raise Exception("fsdb can not add: not regular file received")

digest = self._calc_digest(filePath)
digest = self._calc_digest(origin)

if self.exists(digest):
self.logger.debug('Added File: ['+digest+'] ( Already exists. Skipping transfer)')
Expand All @@ -148,16 +175,16 @@ def add(self, filePath):
# make all parent directories if they do not exist
self._makedirs(absFolderPath)
self._create_empty_file(absPath)
shutil.copyfile(filePath, absPath)
self._copy_content(origin, absPath)

self.logger.debug('Added file: "'+filePath+'" -> "'+absPath+'" [ '+digest+' ]')
self.logger.debug('Added file: "'+digest+'" [ '+absPath+' ]')

return digest

def remove(self, digest):
"""Remove an existing file from fsdb.
File with the given digest will be removed from fsdb and
the directory tree will be cleaned (remove empty folders)
File with the given digest will be removed from fsdb and
the directory tree will be cleaned (remove empty folders)
Args:
digest -- digest of the file to remove
"""
Expand Down Expand Up @@ -251,6 +278,8 @@ def __contains__(self, digest):
def __getitem__(self, digest):
"""Return an readable only file object of the stored file with the given digest
Client should care about closing the file object after finished with it.
Could raise ``IOError`` acoording to the standard ``open()`` function.
If you need to write on file or implement some more complicated logic refer to :py:func:`get_file_path()`
"""
Expand All @@ -261,11 +290,11 @@ def __getitem__(self, digest):
return open(self.get_file_path(digest), 'rb')

@staticmethod
def file_digest(filepath, algorithm="sha1", block_size=2**20):
"""Calculate digest of the file located at @filepath
def file_digest(origin, algorithm="sha1", block_size=None):
"""Calculate digest of a readable object
Args:
filepath -- the filepath of the file from which calculate digest
Args:
origin -- a readable object for which calculate digest
algorithn -- the algorithm to use [md5,sha1,sha224,sha256,sha384,sha512]
block_size -- the size of the block to read at each iteration
"""
Expand All @@ -284,13 +313,15 @@ def file_digest(filepath, algorithm="sha1", block_size=2**20):
else:
raise ValueError('"' + algorithm + '" it is not a supported algorithm function')

if not block_size:
block_size = Fsdb.BLOCK_SIZE

hashM = algFunct()
with open(filepath, 'rb') as f:
while True:
chunk = f.read(block_size)
if not chunk:
break
hashM.update(chunk)
while True:
chunk = origin.read(block_size)
if not chunk:
break
hashM.update(chunk)
return hashM.hexdigest()

@staticmethod
Expand Down
31 changes: 28 additions & 3 deletions tests/fsdb_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ def createTestFile(self):
def test_add(self):
self.fsdb.add(self.createTestFile())

def test_add_readable(self):
with open(self.createTestFile(), 'rb') as testFile:
self.fsdb.add(testFile)

def test_file_exists(self):
testFilePath = self.createTestFile()
digest = self.fsdb.add(testFilePath)
Expand All @@ -45,12 +49,26 @@ def test_get_file_path(self):
self.assertIsInstance(self.fsdb.get_file_path(digest),basestring)
self.assertTrue(os.path.isabs(self.fsdb.get_file_path(digest)))

def test_same_file_after_retrieval(self):
def test_same_digest_file_and_readable(self):
testFilePath = self.createTestFile()
fileDigest = self.fsdb.add(testFilePath)
with open(testFilePath, 'rb') as testFile:
readableDigest = self.fsdb.add(testFile)
self.assertTrue(readableDigest == fileDigest)

def test_same_file_content_after_retrieval(self):
testFilePath = self.createTestFile()
digest = self.fsdb.add(testFilePath)
storedFilePath = self.fsdb.get_file_path(digest)
self.assertTrue( filecmp.cmp(testFilePath, storedFilePath, shallow=False) )


def test_same_readable_content_after_retrieval(self):
testFilePath = self.createTestFile()
with open(testFilePath, 'rb') as testFile:
digest = self.fsdb.add(testFile)
storedFilePath = self.fsdb.get_file_path(digest)
self.assertTrue( filecmp.cmp(testFilePath, storedFilePath, shallow=False) )

def test_remove_existing_file(self):
testFilePath = self.createTestFile()
digest = self.fsdb.add(testFilePath)
Expand Down Expand Up @@ -123,11 +141,18 @@ def test_contains_empty(self):
digest = self.fsdb.add(self.createTestFile())
self.assertIn(digest, self.fsdb)

def test_get_item(self):
def test_get_item_with_file_insertion(self):
fpath = self.createTestFile()
digest = self.fsdb.add(fpath)
with open(fpath,'rb') as f1, self.fsdb[digest] as f2:
self.assertEqual(f1.read(), f2.read())

def test_get_item_with_readable_insertion(self):
fpath = self.createTestFile()
with open(fpath,'rb') as testFile:
digest = self.fsdb.add(testFile)
with open(fpath,'rb') as f1, self.fsdb[digest] as f2:
self.assertEqual(f1.read(), f2.read())

def test_get_item_type_error(self):
with self.assertRaises(TypeError):
Expand Down

1 comment on commit 865bef1

@boyska
Copy link
Contributor

@boyska boyska commented on 865bef1 Mar 24, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great! with these improvements, it should now be possible to rewrite many unit tests in order to use StringIO or cStringIO, achieving better performance and less I/O.

Please sign in to comment.