timing tests

gdraheim · Sep 5, 2024 · 53051c7 · 53051c7
1 parent b526ce5
commit 53051c7
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 1 deletion.
diff --git a/tabtotext.tests.py b/tabtotext.tests.py
@@ -35,6 +35,7 @@
 KEEP = 0
 TABTO = "./tabtotext.py"
 FIXME = True
+BIGFILE = 100000
 
 try:
     from tabtools import currency_default
@@ -12385,6 +12386,33 @@ def test_9866(self) -> None:
         logg.debug("\n>> %s\n<< %s", want, back)
         self.assertEqual(want, back)
         self.assertEqual(test, scan)
+    def test_9888(self) -> None:
+        tmp = self.testdir()
+        tabs: List[TabSheet] = []
+        for sheet in range(5):
+            rows: JSONList = []
+            for row in range(BIGFILE//10):
+                num = sheet * BIGFILE + row
+                vals: JSONDict = {"a":  num, "b": BIGFILE+num}
+                rows.append(vals)
+            title = "data%i" % sheet
+            tabs.append(TabSheet(rows, [], title))
+        filename = path.join(tmp, "bigfile.xlsx")
+        output = path.join(tmp, "bigfile.json")
+        starting = Time.now()
+        text = print_tablist(filename, tabs)
+        generated = Time.now()
+        text = sh(F"{TABTO} -^ {filename} -o {output}")
+        converted = Time.now()
+        logg.info("| %i numbers openpyxl write xlsx time | %s", BIGFILE, generated - starting)
+        logg.info("| %i numbers read xlsx and write json | %s", BIGFILE, converted - generated)
+        text = open(output).read()
+        # logg.debug("=>\n%s", text)
+        test = tablistscanJSON(text)
+        scan = tablistfile(output)
+        back = dict(tablistmap(scan))
+        # logg.debug("\n>> %s\n<< %s", want, back)
+        self.assertEqual(test, scan)
 
 if __name__ == "__main__":
     # unittest.main()
@@ -12393,12 +12421,14 @@ def test_9866(self) -> None:
     cmdline.add_option("-v", "--verbose", action="count", default=0, help="more verbose logging")
     cmdline.add_option("-^", "--quiet", action="count", default=0, help="less verbose logging")
     cmdline.add_option("-k", "--keep", action="count", default=0, help="keep testdir")
+    cmdline.add_option("--bigfile", metavar=str(BIGFILE), default=BIGFILE)
     cmdline.add_option("--failfast", action="store_true", default=False,
                        help="Stop the test run on the first error or failure. [%default]")
     cmdline.add_option("--xmlresults", metavar="FILE", default=None,
                        help="capture results as a junit xml file [%default]")
     opt, args = cmdline.parse_args()
     logging.basicConfig(level=max(0, logging.WARNING - 10 * opt.verbose + 10 * opt.quiet))
+    BIGFILE = int(opt.bigfile)
     KEEP = opt.keep
     if not args:
         args = ["test_*"]

diff --git a/tabxlsx.md b/tabxlsx.md
@@ -87,7 +87,14 @@ The original implementation in tabtoxlsx was based on openpyx1. The resulting xl
 files were inspected how to write them with just Python's internal `zipfile`. The
 xlsx reader is using `zipfile` and Python's internal `xml.etree`. This should be
 portable to JPython and IronPython as well. And tests showed tabxlsx to be 10x
-faster than openpyx1 for small datasets.
+faster than openpyx1 for small datasets. For large datasets it is 3-4x faster.
+
+| test_9888 (--bigfile=1000000) | time
+| ----------------------------- | ----
+| 100000 numbers tabxlsx write xlsx time | 00'02.548209
+| 100000 numbers openpyxl write xlsx time | 00'09.300701
+| 1.000.000 numbers tabxlsx write xlsx time  | 00'27.265536
+| 1.000.000 numbers openpyxl write xlsx time | 01'31.813367
 
 Have fun!
 

diff --git a/tabxlsx.tests.py b/tabxlsx.tests.py
@@ -34,6 +34,8 @@
 KEEP = 0
 TABTO = "./tabxlsx.py"
 FIXME = True
+BIGFILE = 100000
+
 
 try:
     from tabtools import currency_default
@@ -2635,6 +2637,33 @@ def test_9866(self) -> None:
         logg.debug("\n>> %s\n<< %s", want, back)
         self.assertEqual(want, back)
         self.assertEqual(test, scan)
+    def test_9888(self) -> None:
+        tmp = self.testdir()
+        tabs: List[TabSheet] = []
+        for sheet in range(5):
+            rows: JSONList = []
+            for row in range(BIGFILE//10):
+                num = sheet * BIGFILE + row
+                vals: JSONDict = {"a":  num, "b": BIGFILE+num}
+                rows.append(vals)
+            title = "data%i" % sheet
+            tabs.append(TabSheet(rows, [], title))
+        filename = path.join(tmp, "bigfile.xlsx")
+        output = path.join(tmp, "bigfile.json")
+        starting = Time.now()
+        text = print_tablist(filename, tabs)
+        generated = Time.now()
+        text = sh(F"{TABTO} -^ {filename} -o {output}")
+        converted = Time.now()
+        logg.info("| %i numbers tabxlsx write xlsx time | %s", BIGFILE, generated - starting)
+        logg.info("| %i numbers read xls and write json | %s", BIGFILE, converted - generated)
+        text = open(output).read()
+        # logg.debug("=>\n%s", text)
+        test = tablistscanJSON(text)
+        scan = tablistfile(output)
+        back = dict(tablistmap(scan))
+        # logg.debug("\n>> %s\n<< %s", want, back)
+        self.assertEqual(test, scan)
 
 if __name__ == "__main__":
     # unittest.main()
@@ -2643,12 +2672,14 @@ def test_9866(self) -> None:
     cmdline.add_option("-v", "--verbose", action="count", default=0, help="more verbose logging")
     cmdline.add_option("-^", "--quiet", action="count", default=0, help="less verbose logging")
     cmdline.add_option("-k", "--keep", action="count", default=0, help="keep testdir")
+    cmdline.add_option("--bigfile", metavar=str(BIGFILE), default=BIGFILE)
     cmdline.add_option("--failfast", action="store_true", default=False,
                        help="Stop the test run on the first error or failure. [%default]")
     cmdline.add_option("--xmlresults", metavar="FILE", default=None,
                        help="capture results as a junit xml file [%default]")
     opt, args = cmdline.parse_args()
     logging.basicConfig(level=max(0, logging.WARNING - 10 * opt.verbose + 10 * opt.quiet))
+    BIGFILE = int(opt.bigfile)
     KEEP = opt.keep
     if not args:
         args = ["test_*"]