Skip to content

Commit

Permalink
perf(management/allocators): re-implement JSONTableAllocator with f…
Browse files Browse the repository at this point in the history
…aster algorithm assuming that we read much more often than we write
  • Loading branch information
ruancomelli committed Oct 29, 2022
1 parent 7a0bdf9 commit 68aa576
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 42 deletions.
45 changes: 33 additions & 12 deletions boiling_learning/management/allocators.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import abc
import json as _json
from pathlib import Path
from typing import Any, Callable, Generic, TypeVar
from typing import Any, Callable, Generic, List, Optional, TypeVar

from classes import AssociatedType, Supports, typeclass
from loguru import logger
from tinydb import TinyDB
from tinydb_smartcache import SmartCacheTable
from typing_extensions import final

from boiling_learning.descriptions import describe
Expand All @@ -14,7 +13,6 @@
from boiling_learning.utils.pathutils import PathLike, resolve

# Ensure that all databases/tables will now use the smart query cache
TinyDB.table_class = SmartCacheTable


class Allocator(abc.ABC):
Expand Down Expand Up @@ -93,20 +91,43 @@ def __init__(
) -> None:
root = resolve(path, dir=True)
self.path = resolve(root / 'data', dir=True)
self.db = TinyDB(str(root / 'db.json'))
self.db_path = root / 'db.json'
self._data: Optional[List[json.JSONDataType]] = None
self.describer = describer
self.suffix = suffix

@property
def data(self) -> List[json.JSONDataType]:
if self._data is None:
self._data = self._load_db()

return self._data

@data.setter
def data(self, data: List[json.JSONDataType]) -> None:
self._data = data
self._save_db()

def _doc_path(self, doc_id: int) -> Path:
return resolve(self.path / f'{doc_id}{self.suffix}', parents=True)
return self.path / f'{doc_id}{self.suffix}'

def _provide(self, serialized: json.JSONDataType) -> int:
serialized = {'data': serialized} # ensure that is a mapping

for doc in self.db:
if doc == serialized:
return doc.doc_id
return self.db.insert(serialized)
try:
return self.data.index(serialized)
except ValueError:
self.data = self.data + [serialized]
return len(self.data) - 1

def _load_db(self) -> List[json.JSONDataType]:
try:
with self.db_path.open('r', encoding='utf-8') as file:
return _json.load(file)
except FileNotFoundError:
return []

def _save_db(self) -> None:
with self.db_path.open('w', encoding='utf-8') as file:
_json.dump(self.data, file)

def __call__(self, pack: Pack[Any, Any]) -> Path:
logger.debug(f'Allocating path for args {pack}')
Expand Down
32 changes: 5 additions & 27 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ dependencies = [
"parse<2.0.0,>=1.19.0",
"Pint<1.0,>=0.18",
"scikit-image<1.0.0,>=0.19.1",
"tinydb<5.0.0,>=4.5.2",
"typing-extensions>=4.3.0",
"tensorflow>=2.9.1",
"classes<1.0.0,>=0.4.0",
"tinydb-smartcache<3.0.0,>=2.0.0",
"iteround<2.0.0,>=1.0.3",
"pyqtgraph==0.12.3",
"PyQt6<7.0.0,>=6.2.2",
Expand Down Expand Up @@ -144,7 +142,6 @@ warn_unused_ignores = true
plugins = [
"classes.contrib.mypy.classes_plugin",
"numpy.typing.mypy_plugin",
"tinydb.mypy_plugin",
]

[tool.commitizen]
Expand Down

0 comments on commit 68aa576

Please sign in to comment.