Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework Fastrun #251

Merged
merged 1 commit into from
Nov 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 9 additions & 13 deletions test/test_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from wikibaseintegrator.entities import Item
from wikibaseintegrator.wbi_config import config
from wikibaseintegrator.wbi_enums import ActionIfExists
from wikibaseintegrator.wbi_fastrun import get_fastrun_container

config['DEBUG'] = True

Expand Down Expand Up @@ -53,43 +54,38 @@ class TestFastRun(unittest.TestCase):
some basic tests for fastrun mode
"""

def test_fast_run(self):
def test_fastrun(self):
statements = [
datatypes.ExternalID(value='P40095', prop_nr='P352'),
datatypes.ExternalID(value='YER158C', prop_nr='P705')
]

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType)

fast_run_result = frc.write_required(data=statements)
fastrun_result = frc.write_required(data=statements)

if fast_run_result:
if fastrun_result:
message = 'fastrun failed'
else:
message = 'successful fastrun'
print(fast_run_result, message)
print(fastrun_result, message)

# here, fastrun should succeed, if not, test failed
if fast_run_result:
if fastrun_result:
raise ValueError

def test_fastrun_label(self):
# tests fastrun label, description and aliases, and label in another language
fast_run_base_filter = [datatypes.Item(prop_nr='P361', value='Q18589965')]
frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')])
item = WikibaseIntegrator().item.get('Q2')
item.init_fastrun(base_filter=fast_run_base_filter)
item.init_fastrun(base_filter=fast_run_base_filter) # Test if we found the same FastRunContainer
item.claims.add(datatypes.ExternalID(value='/m/02j71', prop_nr='P646'))

frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=datatypes.BaseDataType)

assert item.labels.get(language='en') == "Earth"
descr = item.descriptions.get(language='en')
assert len(descr) > 3
assert "Terra" in item.aliases.get()

assert list(item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth"
assert item.fast_run_container.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en')
assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label')
assert "Terra" in item.aliases.get()
assert "planet" in item.descriptions.get()

Expand Down
47 changes: 4 additions & 43 deletions wikibaseintegrator/entities/baseentity.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from __future__ import annotations

from copy import copy
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
from typing import TYPE_CHECKING, Any, Dict, List, Union

import simplejson

from wikibaseintegrator import wbi_fastrun
from wikibaseintegrator.datatypes import BaseDataType
from wikibaseintegrator.models.claims import Claim, Claims
from wikibaseintegrator.wbi_config import config
from wikibaseintegrator.wbi_enums import ActionIfExists
from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError
from wikibaseintegrator.wbi_fastrun import FastRunContainer
from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper
from wikibaseintegrator.wbi_login import Login

Expand All @@ -19,8 +19,6 @@


class BaseEntity:
fast_run_store: List[FastRunContainer] = []

ETYPE = 'base-entity'

def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type: str = None, id: str = None, claims: Claims = None, is_bot: bool = None, login: Login = None):
Expand All @@ -38,8 +36,6 @@ def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type
self.id = id
self.claims = claims or Claims()

self.fast_run_container: Optional[FastRunContainer] = None

self.debug = config['DEBUG']

def add_claims(self, claims: Union[Claim, list], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity:
Expand Down Expand Up @@ -174,43 +170,8 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool =
self.lastrevid = json_data['entity']['lastrevid']
return json_data['entity']

def init_fastrun(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> None:
if base_filter is None:
base_filter = []

if self.debug:
print('Initialize Fast Run init_fastrun')
# We search if we already have a FastRunContainer with the same parameters to re-use it
for fast_run in BaseEntity.fast_run_store:
if (fast_run.base_filter == base_filter) and (fast_run.use_refs == use_refs) and (fast_run.case_insensitive == case_insensitive) and (
fast_run.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
self.fast_run_container = fast_run
self.fast_run_container.current_qid = ''
self.fast_run_container.base_data_type = BaseDataType
if self.debug:
print("Found an already existing FastRunContainer")

if not self.fast_run_container:
if self.debug:
print("Create a new FastRunContainer")
self.fast_run_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive)
BaseEntity.fast_run_store.append(self.fast_run_container)

# def fr_search(self, **kwargs: Any) -> str:
# self.init_fastrun(**kwargs)
#
# if self.fast_run_container is None:
# raise ValueError("FastRunContainer is not initialized.")
#
# self.fast_run_container.load_item(self.claims)
#
# return self.fast_run_container.current_qid

def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, **kwargs: Any) -> bool:
self.init_fastrun(base_filter=base_filter, **kwargs)

if self.fast_run_container is None:
raise ValueError("FastRunContainer is not initialized.")
fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs)

if base_filter is None:
base_filter = []
Expand All @@ -220,7 +181,7 @@ def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] =
if claim.mainsnak.property_number in base_filter:
claims_to_check.append(claim)

return self.fast_run_container.write_required(data=claims_to_check, cqid=self.id)
return fastrun_container.write_required(data=claims_to_check, cqid=self.id)

def __repr__(self):
"""A mixin implementing a simple __repr__."""
Expand Down
66 changes: 17 additions & 49 deletions wikibaseintegrator/wbi_fastrun.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str =
self.base_filter: List[BaseDataType | List[BaseDataType]] = []
self.base_filter_string = ''
self.prop_dt_map: Dict[str, str] = {}
self.current_qid = ''

self.base_data_type = base_data_type
self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL']
Expand Down Expand Up @@ -114,11 +113,7 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]:
self.reconstructed_statements = reconstructed_statements
return reconstructed_statements

def get_item(self, claims: List, cqid: str = None) -> str:
self.load_item(claims=claims, cqid=cqid)
return self.current_qid

def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[str]:
match_sets = []
for claim in claims:
# skip to next if statement has no value or no data type defined, e.g. for deletion objects
Expand Down Expand Up @@ -162,7 +157,7 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
# return True

if not match_sets:
return True
return None

if cqid:
matching_qids = {cqid}
Expand All @@ -174,11 +169,9 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool:
if not len(matching_qids) == 1:
if self.debug:
print(f"no matches ({len(matching_qids)})")
return True
return None

qid = matching_qids.pop()
self.current_qid = qid
return False
return matching_qids.pop()

def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExists = ActionIfExists.REPLACE, cqid: str = None) -> bool:
del_props = set()
Expand All @@ -190,12 +183,12 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi
for x in data:
if x.mainsnak.datavalue and x.mainsnak.datatype:
data_props.add(x.mainsnak.property_number)
self.load_item(data, cqid)
qid = self.get_item(data, cqid)

if not self.current_qid:
if not qid:
return True

reconstructed_statements = self.reconstruct_statements(self.current_qid)
reconstructed_statements = self.reconstruct_statements(qid)
tmp_rs = copy.deepcopy(reconstructed_statements)

# handle append properties
Expand Down Expand Up @@ -645,54 +638,29 @@ def __repr__(self) -> str:
)


# def fr_search(**kwargs: Any) -> str:
# FastRunContainer.init_fastrun(**kwargs)
#
# if self.fast_run_container is None:
# raise ValueError("FastRunContainer is not initialized.")
#
# self.fast_run_container.load_item(self.claims)
#
# return self.fast_run_container.current_qid


# def freezeargs(func):
# """Transform mutable dictionnary
# Into immutable
# Useful to be compatible with cache
# """
#
# @wraps(func)
# def wrapped(*args: Any, **kwargs: Any) -> Any:
# args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args)
# kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
# return func(*args, **kwargs)
#
# return wrapped


def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
if base_filter is None:
base_filter = []

if config['DEBUG']:
print('Initialize Fast Run get_fastrun_container')

# We search if we already have a FastRunContainer with the same parameters to re-use it
fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
fastrun_container.current_qid = ''
fastrun_container.base_data_type = BaseDataType

return fastrun_container


# @freezeargs
# @lru_cache()
def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer:
for c in fastrun_store:
if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and (
c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
return c
for fastrun in fastrun_store:
if (fastrun.base_filter == base_filter) and (fastrun.use_refs == use_refs) and (fastrun.case_insensitive == case_insensitive) and (
fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']):
return fastrun

# In case nothing was found in the fastrun_store
if config['DEBUG']:
print("Create a new FastRunContainer")
fastrun_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive)

fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive)
fastrun_store.append(fastrun_container)
return fastrun_container