Skip to content

Commit

Permalink
timing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
gdraheim committed Sep 5, 2024
1 parent b526ce5 commit 53051c7
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 1 deletion.
30 changes: 30 additions & 0 deletions tabtotext.tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
KEEP = 0
TABTO = "./tabtotext.py"
FIXME = True
BIGFILE = 100000

try:
from tabtools import currency_default
Expand Down Expand Up @@ -12385,6 +12386,33 @@ def test_9866(self) -> None:
logg.debug("\n>> %s\n<< %s", want, back)
self.assertEqual(want, back)
self.assertEqual(test, scan)
def test_9888(self) -> None:
tmp = self.testdir()
tabs: List[TabSheet] = []
for sheet in range(5):
rows: JSONList = []
for row in range(BIGFILE//10):
num = sheet * BIGFILE + row
vals: JSONDict = {"a": num, "b": BIGFILE+num}
rows.append(vals)
title = "data%i" % sheet
tabs.append(TabSheet(rows, [], title))
filename = path.join(tmp, "bigfile.xlsx")
output = path.join(tmp, "bigfile.json")
starting = Time.now()
text = print_tablist(filename, tabs)
generated = Time.now()
text = sh(F"{TABTO} -^ {filename} -o {output}")
converted = Time.now()
logg.info("| %i numbers openpyxl write xlsx time | %s", BIGFILE, generated - starting)
logg.info("| %i numbers read xlsx and write json | %s", BIGFILE, converted - generated)
text = open(output).read()
# logg.debug("=>\n%s", text)
test = tablistscanJSON(text)
scan = tablistfile(output)
back = dict(tablistmap(scan))
# logg.debug("\n>> %s\n<< %s", want, back)
self.assertEqual(test, scan)

if __name__ == "__main__":
# unittest.main()
Expand All @@ -12393,12 +12421,14 @@ def test_9866(self) -> None:
cmdline.add_option("-v", "--verbose", action="count", default=0, help="more verbose logging")
cmdline.add_option("-^", "--quiet", action="count", default=0, help="less verbose logging")
cmdline.add_option("-k", "--keep", action="count", default=0, help="keep testdir")
cmdline.add_option("--bigfile", metavar=str(BIGFILE), default=BIGFILE)
cmdline.add_option("--failfast", action="store_true", default=False,
help="Stop the test run on the first error or failure. [%default]")
cmdline.add_option("--xmlresults", metavar="FILE", default=None,
help="capture results as a junit xml file [%default]")
opt, args = cmdline.parse_args()
logging.basicConfig(level=max(0, logging.WARNING - 10 * opt.verbose + 10 * opt.quiet))
BIGFILE = int(opt.bigfile)
KEEP = opt.keep
if not args:
args = ["test_*"]
Expand Down
9 changes: 8 additions & 1 deletion tabxlsx.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,14 @@ The original implementation in tabtoxlsx was based on openpyx1. The resulting xl
files were inspected how to write them with just Python's internal `zipfile`. The
xlsx reader is using `zipfile` and Python's internal `xml.etree`. This should be
portable to JPython and IronPython as well. And tests showed tabxlsx to be 10x
faster than openpyx1 for small datasets.
faster than openpyx1 for small datasets. For large datasets it is 3-4x faster.

| test_9888 (--bigfile=1000000) | time
| ----------------------------- | ----
| 100000 numbers tabxlsx write xlsx time | 00'02.548209
| 100000 numbers openpyxl write xlsx time | 00'09.300701
| 1.000.000 numbers tabxlsx write xlsx time | 00'27.265536
| 1.000.000 numbers openpyxl write xlsx time | 01'31.813367

Have fun!

Expand Down
31 changes: 31 additions & 0 deletions tabxlsx.tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
KEEP = 0
TABTO = "./tabxlsx.py"
FIXME = True
BIGFILE = 100000


try:
from tabtools import currency_default
Expand Down Expand Up @@ -2635,6 +2637,33 @@ def test_9866(self) -> None:
logg.debug("\n>> %s\n<< %s", want, back)
self.assertEqual(want, back)
self.assertEqual(test, scan)
def test_9888(self) -> None:
tmp = self.testdir()
tabs: List[TabSheet] = []
for sheet in range(5):
rows: JSONList = []
for row in range(BIGFILE//10):
num = sheet * BIGFILE + row
vals: JSONDict = {"a": num, "b": BIGFILE+num}
rows.append(vals)
title = "data%i" % sheet
tabs.append(TabSheet(rows, [], title))
filename = path.join(tmp, "bigfile.xlsx")
output = path.join(tmp, "bigfile.json")
starting = Time.now()
text = print_tablist(filename, tabs)
generated = Time.now()
text = sh(F"{TABTO} -^ {filename} -o {output}")
converted = Time.now()
logg.info("| %i numbers tabxlsx write xlsx time | %s", BIGFILE, generated - starting)
logg.info("| %i numbers read xls and write json | %s", BIGFILE, converted - generated)
text = open(output).read()
# logg.debug("=>\n%s", text)
test = tablistscanJSON(text)
scan = tablistfile(output)
back = dict(tablistmap(scan))
# logg.debug("\n>> %s\n<< %s", want, back)
self.assertEqual(test, scan)

if __name__ == "__main__":
# unittest.main()
Expand All @@ -2643,12 +2672,14 @@ def test_9866(self) -> None:
cmdline.add_option("-v", "--verbose", action="count", default=0, help="more verbose logging")
cmdline.add_option("-^", "--quiet", action="count", default=0, help="less verbose logging")
cmdline.add_option("-k", "--keep", action="count", default=0, help="keep testdir")
cmdline.add_option("--bigfile", metavar=str(BIGFILE), default=BIGFILE)
cmdline.add_option("--failfast", action="store_true", default=False,
help="Stop the test run on the first error or failure. [%default]")
cmdline.add_option("--xmlresults", metavar="FILE", default=None,
help="capture results as a junit xml file [%default]")
opt, args = cmdline.parse_args()
logging.basicConfig(level=max(0, logging.WARNING - 10 * opt.verbose + 10 * opt.quiet))
BIGFILE = int(opt.bigfile)
KEEP = opt.keep
if not args:
args = ["test_*"]
Expand Down

0 comments on commit 53051c7

Please sign in to comment.