Skip to content

Commit

Permalink
Rework Fastrun (#251)
Browse files Browse the repository at this point in the history
* Replace load_item() with get_item()
* Move some fastrun function from baseentity to wbi_fastrun
* Update tests
  • Loading branch information
LeMyst authored Nov 17, 2021
1 parent 76340d0 commit 0f706a1
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 105 deletions.
22 changes: 9 additions & 13 deletions test/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from wikibaseintegrator.entities import Item
from wikibaseintegrator.wbi_config import config
from wikibaseintegrator.wbi_enums import ActionIfExists
from wikibaseintegrator.wbi_fastrun import get_fastrun_container

config['DEBUG'] = True

Expand Down Expand Up @@ -53,43 +54,38 @@ class TestFastRun(unittest.TestCase):
some basic tests for fastrun mode
"""

def test_fast_run(self):
def test_fastrun(self):
statements = [
datatypes.ExternalID(value='P40095', prop_nr='P352'),
datatypes.ExternalID(value='YER158C', prop_nr='P705')
]

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)

fast_run_result = frc.write_required(data=statements)
fastrun_result = frc.write_required(data=statements)

if fast_run_result:
if fastrun_result:
message = 'fastrun failed'
else:
message = 'successful fastrun'
print(fast_run_result, message)
print(fastrun_result, message)

# here, fastrun should succeed, if not, test failed
if fast_run_result:
if fastrun_result:
raise ValueError

def test_fastrun_label(self):
# tests fastrun label, description and aliases, and label in another language
fast_run_base_filter = [datatypes.Item(prop_nr='P361', value='Q18589965')]
frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
item = WikibaseIntegrator().item.get('Q2')
item.init_fastrun(base_filter=fast_run_base_filter)
item.init_fastrun(base_filter=fast_run_base_filter) # Test if we found the same FastRunContainer
item.claims.add(datatypes.ExternalID(value='/m/02j71', prop_nr='P646'))

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=datatypes.BaseDataType)

assert item.labels.get(language='en') == "Earth"
descr = item.descriptions.get(language='en')
assert len(descr) > 3
assert "Terra" in item.aliases.get()

assert list(item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth"
assert item.fast_run_container.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert "Terra" in item.aliases.get()
assert "planet" in item.descriptions.get()

Expand Down
47 changes: 4 additions & 43 deletions wikibaseintegrator/entities/baseentity.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Union

import simplejson

from wikibaseintegrator import wbi_fastrun
from wikibaseintegrator.datatypes import BaseDataType
from wikibaseintegrator.models.claims import Claim, Claims
from wikibaseintegrator.wbi_config import config
from wikibaseintegrator.wbi_enums import ActionIfExists
from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError
from wikibaseintegrator.wbi_fastrun import FastRunContainer
from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper
from wikibaseintegrator.wbi_login import Login

Expand All @@ -19,8 +19,6 @@


class BaseEntity:
fast_run_store: List[FastRunContainer] = []

ETYPE = 'base-entity'

def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type: str = None, id: str = None, claims: Claims = None, is_bot: bool = None, login: Login = None):
Expand All @@ -38,8 +36,6 @@ def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type
self.id = id
self.claims = claims or Claims()

self.fast_run_container: Optional[FastRunContainer] = None

self.debug = config['DEBUG']

def add_claims(self, claims: Union[Claim, list], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity:
Expand Down Expand Up @@ -174,43 +170,8 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool =
self.lastrevid = json_data['entity']['lastrevid']
return json_data['entity']

def init_fastrun(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> None:
if base_filter is None:
base_filter = []

if self.debug:
print('Initialize Fast Run init_fastrun')
# We search if we already have a FastRunContainer with the same parameters to re-use it
for fast_run in BaseEntity.fast_run_store:
if (fast_run.base_filter == base_filter) and (fast_run.use_refs == use_refs) and (fast_run.case_insensitive == case_insensitive) and (
fast_run.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
self.fast_run_container = fast_run
self.fast_run_container.current_qid = ''
self.fast_run_container.base_data_type = BaseDataType
if self.debug:
print("Found an already existing FastRunContainer")

if not self.fast_run_container:
if self.debug:
print("Create a new FastRunContainer")
self.fast_run_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive)
BaseEntity.fast_run_store.append(self.fast_run_container)

# def fr_search(self, **kwargs: Any) -> str:
# self.init_fastrun(**kwargs)
#
# if self.fast_run_container is None:
# raise ValueError("FastRunContainer is not initialized.")
#
# self.fast_run_container.load_item(self.claims)
#
# return self.fast_run_container.current_qid

def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, **kwargs: Any) -> bool:
self.init_fastrun(base_filter=base_filter, **kwargs)

if self.fast_run_container is None:
raise ValueError("FastRunContainer is not initialized.")
fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs)

if base_filter is None:
base_filter = []
Expand All @@ -220,7 +181,7 @@ def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] =
if claim.mainsnak.property_number in base_filter:
claims_to_check.append(claim)

return self.fast_run_container.write_required(data=claims_to_check, cqid=self.id)
return fastrun_container.write_required(data=claims_to_check, cqid=self.id)

def __repr__(self):
"""A mixin implementing a simple __repr__."""
Expand Down
66 changes: 17 additions & 49 deletions wikibaseintegrator/wbi_fastrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str =
self.base_filter: List[BaseDataType | List[BaseDataType]] = []
self.base_filter_string = ''
self.prop_dt_map: Dict[str, str] = {}
self.current_qid = ''

self.base_data_type = base_data_type
self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL']
Expand Down Expand Up @@ -114,11 +113,7 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]:
self.reconstructed_statements = reconstructed_statements
return reconstructed_statements

def get_item(self, claims: List, cqid: str = None) -> str:
self.load_item(claims=claims, cqid=cqid)
return self.current_qid

def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[str]:
match_sets = []
for claim in claims:
# skip to next if statement has no value or no data type defined, e.g. for deletion objects
Expand Down Expand Up @@ -162,7 +157,7 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
# return True

if not match_sets:
return True
return None

if cqid:
matching_qids = {cqid}
Expand All @@ -174,11 +169,9 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
if not len(matching_qids) == 1:
if self.debug:
print(f"no matches ({len(matching_qids)})")
return True
return None

qid = matching_qids.pop()
self.current_qid = qid
return False
return matching_qids.pop()

def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExists = ActionIfExists.REPLACE, cqid: str = None) -> bool:
del_props = set()
Expand All @@ -190,12 +183,12 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi
for x in data:
if x.mainsnak.datavalue and x.mainsnak.datatype:
data_props.add(x.mainsnak.property_number)
self.load_item(data, cqid)
qid = self.get_item(data, cqid)

if not self.current_qid:
if not qid:
return True

reconstructed_statements = self.reconstruct_statements(self.current_qid)
reconstructed_statements = self.reconstruct_statements(qid)
tmp_rs = copy.deepcopy(reconstructed_statements)

# handle append properties
Expand Down Expand Up @@ -645,54 +638,29 @@ def __repr__(self) -> str:
)


# def fr_search(**kwargs: Any) -> str:
# FastRunContainer.init_fastrun(**kwargs)
#
# if self.fast_run_container is None:
# raise ValueError("FastRunContainer is not initialized.")
#
# self.fast_run_container.load_item(self.claims)
#
# return self.fast_run_container.current_qid


# def freezeargs(func):
# """Transform mutable dictionnary
# Into immutable
# Useful to be compatible with cache
# """
#
# @wraps(func)
# def wrapped(*args: Any, **kwargs: Any) -> Any:
# args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args)
# kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
# return func(*args, **kwargs)
#
# return wrapped


def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
if base_filter is None:
base_filter = []

if config['DEBUG']:
print('Initialize Fast Run get_fastrun_container')

# We search if we already have a FastRunContainer with the same parameters to re-use it
fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
fastrun_container.current_qid = ''
fastrun_container.base_data_type = BaseDataType

return fastrun_container


# @freezeargs
# @lru_cache()
def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
for c in fastrun_store:
if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and (
c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
return c
for fastrun in fastrun_store:
if (fastrun.base_filter == base_filter) and (fastrun.use_refs == use_refs) and (fastrun.case_insensitive == case_insensitive) and (
fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
return fastrun

# In case nothing was found in the fastrun_store
if config['DEBUG']:
print("Create a new FastRunContainer")
fastrun_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive)

fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
fastrun_store.append(fastrun_container)
return fastrun_container

0 comments on commit 0f706a1

Please sign in to comment.