Skip to content

Commit

Permalink
FEAT: update db by default, add --overwrite option ...
Browse files Browse the repository at this point in the history
as suggested in karlicoss#20:

- drop PROMNESIA_INDEX_POLICY env-var.
- CLI options described in the 2nd case explained in karlicoss#211,
  due to simplicity.
- Function defaults are false, as suggested in
  [karlicoss#20](karlicoss#20 (comment)).
- Both index & demo updated.
- Env-var now checks its value one of (update|overwrite).
- All update/overwrite decision logic moved to __main_.
  • Loading branch information
ankostis committed Mar 6, 2021
1 parent 436ca96 commit f181127
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 25 deletions.
4 changes: 3 additions & 1 deletion doc/GUIDE.org
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,9 @@ Also see [[https://github.com/karlicoss/promnesia/issues/172][issues/172]].

** partial update

(experimental) Set env variable =PROMNESIA_INDEX_POLICY=update=.
Only index sources given in =promnesia index --sources SOURCE [SOURCE] ...=
(or all sources, if no =--sources= given), unless =--overwrite= is given,
in which case all existing visits are removed from db prior to indexing.

** exclude files from =auto= indexer

Expand Down
49 changes: 39 additions & 10 deletions src/promnesia/__main__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import argparse
import os
import logging
import inspect
import sys
Expand Down Expand Up @@ -63,7 +62,7 @@ def iter_all_visits(sources_subset: Iterable[Union[str, int]]=()) -> Iterator[Re
logger.warning("unknown --sources: %s", ", ".join(repr(i) for i in sources_subset))


def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=()) -> Iterable[Exception]:
def _do_index(dry: bool=False, sources_subset: Iterable[Union[str, int]]=(), overwrite_db=False) -> Iterable[Exception]:
# also keep & return errors for further display
errors: List[Exception] = []
def it() -> Iterable[Res[DbVisit]]:
Expand All @@ -78,7 +77,7 @@ def it() -> Iterable[Res[DbVisit]]:
for v in res:
print(v)
else:
dump_errors = visits_to_sqlite(it())
dump_errors = visits_to_sqlite(it(), overwrite_db=overwrite_db)
for e in dump_errors:
logger.exception(e)
errors.append(e)
Expand All @@ -89,10 +88,11 @@ def do_index(
config_file: Path,
dry: bool=False,
sources_subset: Iterable[Union[str, int]]=(),
overwrite_db: bool=False,
) -> None:
config.load_from(config_file) # meh.. should be cleaner
try:
errors = list(_do_index(dry=dry, sources_subset=sources_subset))
errors = list(_do_index(dry=dry, sources_subset=sources_subset, overwrite_db=overwrite_db))
finally:
config.reset()
if len(errors) > 0:
Expand Down Expand Up @@ -133,6 +133,7 @@ def do_demo(
config_file: Optional[Path],
name: str='demo',
sources_subset: Iterable[Union[str, int]]=(),
overwrite_db: bool=False,
) -> None:
from pprint import pprint
with TemporaryDirectory() as tdir:
Expand All @@ -149,7 +150,7 @@ def do_demo(
)
config.instance = cfg

errors = list(_do_index(sources_subset=sources_subset))
errors = list(_do_index(sources_subset=sources_subset, overwrite_db=overwrite_db))
if len(errors) > 0:
logger.error('%d errors during indexing (see logs above for backtraces)', len(errors))
for e in errors:
Expand Down Expand Up @@ -300,8 +301,14 @@ def main() -> None:
nargs="+",
type=_ordinal_or_name,
metavar="SOURCE",
help="Source names (or their 0-indexed position) to index."
" If missing, db is recreated empty and all sources are indexed.",
help="Source names (or their 0-indexed position) to index.",
)
ep.add_argument(
'--overwrite',
required=False,
action="store_true",
help="Empty db before populating it with newly indexed visits."
" If interrupted, db is left untouched."
)

sp = subp.add_parser('serve', help='Serve a link database', formatter_class=F) # type: ignore
Expand All @@ -323,8 +330,22 @@ def main() -> None:
default='guess',
help='Promnesia source to index as (see https://github.com/karlicoss/promnesia/tree/master/src/promnesia/sources for the full list)',
)
ap.add_argument('--sources', required=False, action="extend", nargs="+", type=_ordinal_or_name,
help="Subset of source(s) to run (name or 0-indexed position); use `promnisia --dry` to view sources")
ap.add_argument(
'--sources',
required=False,
action="extend",
nargs="+",
type=_ordinal_or_name,
metavar="SOURCE",
help="Source names (or their 0-indexed position) to index.",
)
ap.add_argument(
'--overwrite',
required=False,
action="store_true",
help="Empty db before populating it with newly indexed visits."
" If interrupted, db is left untouched."
)
ap.add_argument('params', nargs='*', help='Optional extra params for the indexer')

isp = subp.add_parser('install-server', help='Install server as a systemd service (for autostart)', formatter_class=F)
Expand Down Expand Up @@ -358,13 +379,20 @@ def main() -> None:
p.print_help(sys.stderr)
sys.exit(1)

logger.info("CLI args: %s", args)

# TODO maybe, it's better for server to compute intermediate represetnation?
# the only downside is storage. dunno.
# worst case -- could use database?

with get_tmpdir() as tdir: # TODO??
if args.mode == 'index':
do_index(config_file=args.config, dry=args.dry, sources_subset=args.sources)
do_index(
config_file=args.config,
dry=args.dry,
sources_subset=args.sources,
overwrite_db=args.overwrite,
)
elif args.mode == 'serve':
server.run(args)
elif args.mode == 'demo':
Expand All @@ -377,6 +405,7 @@ def main() -> None:
config_file=args.config,
name=args.name,
sources_subset=args.sources,
overwrite_db=args.overwrite,
)
elif args.mode == 'install-server': # todo rename to 'autostart' or something?
install_server.install(args)
Expand Down
18 changes: 4 additions & 14 deletions src/promnesia/dump.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
from pathlib import Path
import shutil
from typing import Dict, List, Tuple, Set, Iterable
Expand All @@ -14,14 +13,6 @@
from . import config


def update_policy_active() -> bool:
# NOTE: experimental.. need to make it a proper cmdline argument later
INDEX_POLICY = os.environ.get('PROMNESIA_INDEX_POLICY', 'overwrite_all')
# if 'update' is passed, will run against the existing db and only tough the sources present in the current index run
# not sue if a good name for this..
return INDEX_POLICY == 'update'


# NOTE: I guess the main performance benefit from this is not creating too many tmp lists and avoiding overhead
# since as far as sql is concerned it should all be in the same transaction. only a guess
# not sure it's the proper way to handle it
Expand All @@ -30,7 +21,7 @@ def update_policy_active() -> bool:


# returns critical warnings
def visits_to_sqlite(vit: Iterable[Res[DbVisit]]) -> List[Exception]:
def visits_to_sqlite(vit: Iterable[Res[DbVisit]], *, overwrite_db: bool) -> List[Exception]:
logger = get_logger()
db_path = config.get().db

Expand Down Expand Up @@ -58,8 +49,7 @@ def vit_ok() -> Iterable[DbVisit]:
yield ev

tpath = Path(get_tmpdir().name) / 'promnesia.tmp.sqlite'
policy_update = update_policy_active()
if not policy_update:
if overwrite_db:
engine = create_engine(f'sqlite:///{tpath}')
else:
engine = create_engine(f'sqlite:///{db_path}')
Expand All @@ -82,12 +72,12 @@ def vit_ok() -> Iterable[DbVisit]:
# pylint: disable=no-value-for-parameter
conn.execute(table.insert().values(bound))

if not policy_update:
if overwrite_db:
shutil.move(str(tpath), str(db_path))

errs = '' if errors == 0 else f', {errors} ERRORS'
total = ok + errors
what = 'updated' if policy_update else 'overwritten'
what = 'overwritten' if overwrite_db else 'updated'
logger.info('%s database "%s". %d total (%d OK%s)', what, db_path, total, ok, errs)
res: List[Exception] = []
if total == 0:
Expand Down

0 comments on commit f181127

Please sign in to comment.