From 7d41a7a061ba571d7c8d643aceca4d612608af51 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 11 May 2021 13:00:07 +0200 Subject: [PATCH 001/308] First step Quick and dirty search and replace --- README.md | 26 ++++++++-------- test/test_all.py | 14 ++++----- test/test_wbi_core.py | 46 ++++++++++++++-------------- test/test_wbi_fastrun.py | 24 ++++++++------- wikibaseintegrator/wbi_core.py | 14 ++++----- wikibaseintegrator/wbi_exceptions.py | 2 +- wikibaseintegrator/wbi_functions.py | 10 +++--- wikibaseintegrator/wbi_item.py | 5 +++ wikibaseintegrator/wbi_lexeme.py | 5 +++ 9 files changed, 79 insertions(+), 67 deletions(-) create mode 100644 wikibaseintegrator/wbi_item.py create mode 100644 wikibaseintegrator/wbi_lexeme.py diff --git a/README.md b/README.md index 77c6a12a..5176d69d 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ - [Installation](#installation) - [Using a Wikibase instance](#using-a-wikibase-instance) - [The Core Parts](#the-core-parts) - - [wbi_core.ItemEngine](#wbi_coreitemengine) + - [wbi_item.Item](#wbi_coreitemengine) - [wbi_functions](#wbi_functions) - [Use MediaWiki API](#use-mediawiki-api) - [wbi_login.Login](#wbi_loginlogin) @@ -60,9 +60,9 @@ To test for correct installation, start a Python console and execute the followi for ['Human'](https://www.wikidata.org/entity/Q5)): ```python -from wikibaseintegrator import wbi_core +from wikibaseintegrator import wbi_item -my_first_wikidata_item = wbi_core.ItemEngine(item_id='Q5') +my_first_wikidata_item = wbi_item.Item(item_id='Q5') # to check successful installation and retrieval of the data, you can print the json representation of the item print(my_first_wikidata_item.get_json_representation()) @@ -97,7 +97,7 @@ keeping Wikidata in sync with an external resource. wbi_core consists of a central class called ItemEngine and Login for authenticating with a MediaWiki isntance (like Wikidata). -## wbi_core.ItemEngine ## +## wbi_item.Item ## This is the central class which does all the heavy lifting. @@ -109,7 +109,7 @@ Features: exception) * Checks automatically if the correct item has been loaded by comparing it to the data provided * All Wikibase data types implemented -* A dedicated wbi_core.ItemEngine.write() method allows loading and consistency checks of data before any write to +* A dedicated wbi_item.Item.write() method allows loading and consistency checks of data before any write to Wikibase is performed * Full access to the whole Wikibase item as a JSON document @@ -254,7 +254,7 @@ tuple, depending on the complexity of the data type. ## Execute SPARQL queries ## -The method `wbi_core.ItemEngine.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes +The method `wbi_item.Item.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes the actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, the actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL server (user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you @@ -287,7 +287,7 @@ print(wbi_functions.mediawiki_api_call_helper(query, allow_anonymous=True)) ## Wikibase search entities ## -The method `wbi_core.ItemEngine.search_entities()` allows for string search in a Wikibase instance. This means that +The method `wbi_item.Item.search_entities()` allows for string search in a Wikibase instance. This means that labels, descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual search string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not Wikidata), an optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an @@ -315,7 +315,7 @@ In order to create a minimal bot based on wbi_core, three things are required: * A ItemEngine object which takes the data, does the checks and performs write. ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype # login object login_instance = wbi_login.Login(user='', pwd='') @@ -327,7 +327,7 @@ entrez_gene_id = wbi_datatype.String(value='', prop_nr='P351') data = [entrez_gene_id] # Search for and then edit/create new item -wd_item = wbi_core.ItemEngine(data=data) +wd_item = wbi_item.Item(data=data) wd_item.write(login_instance) ``` @@ -337,7 +337,7 @@ An enhanced example of the previous bot just puts two of the three things into a or modification of items. ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype # login object login_instance = wbi_login.Login(user='', pwd='') @@ -366,7 +366,7 @@ for entrez_id, ensembl in raw_data.items(): data = [entrez_gene_id, ensembl_transcript_id] # Search for and then edit/create new item - wd_item = wbi_core.ItemEngine(data=data) + wd_item = wbi_item.Item(data=data) wd_item.write(login_instance) ``` @@ -395,7 +395,7 @@ fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} The full example: ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype # login object login_instance = wbi_login.Login(user='', pwd='') @@ -428,7 +428,7 @@ for entrez_id, ensembl in raw_data.items(): data = [entrez_gene_id, ensembl_transcript_id] # Search for and then edit/create new item - wd_item = wbi_core.ItemEngine(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) + wd_item = wbi_item.Item(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) wd_item.write(login_instance) ``` diff --git a/test/test_all.py b/test/test_all.py index a006a971..dbe0b1dc 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -4,7 +4,7 @@ import requests -from wikibaseintegrator import wbi_core, wbi_fastrun, wbi_functions, wbi_datatype +from wikibaseintegrator import wbi_fastrun, wbi_functions, wbi_datatype, wbi_item from wikibaseintegrator.wbi_core import MWApiError __author__ = 'Sebastian Burgstaller-Muehlbacher' @@ -77,7 +77,7 @@ def test_live_item(self): """ Test an item against Wikidata """ - item = wbi_core.ItemEngine(item_id='Q423111') + item = wbi_item.Item(item_id='Q423111') mass_statement = [x for x in item.statements if x.get_prop_nr() == 'P2067'].pop() pprint.pprint(mass_statement.get_json_representation()) @@ -100,7 +100,7 @@ def test_fast_run(self): ] frc = wbi_fastrun.FastRunContainer(base_filter={'P352': '', 'P703': 'Q27510868'}, - base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item) fast_run_result = frc.write_required(data=statements) @@ -118,9 +118,9 @@ def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language data = [wbi_datatype.ExternalID('/m/02j71', 'P646')] fast_run_base_filter = {'P361': 'Q18589965'} - item = wbi_core.ItemEngine(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) + item = wbi_item.Item(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) - frc = wbi_core.ItemEngine.fast_run_store[0] + frc = wbi_item.Item.fast_run_store[0] frc.debug = True assert item.get_label('en') == "Earth" @@ -162,7 +162,7 @@ def test_fastrun_label(self): def test_sitelinks(): data = [wbi_datatype.ItemID(value='Q12136', prop_nr='P31')] - item = wbi_core.ItemEngine(item_id='Q622901', data=data) + item = wbi_item.Item(item_id='Q622901', data=data) item.get_sitelink("enwiki") assert "enwiki" not in item.json_representation['sitelinks'] item.set_sitelink("enwiki", "something") @@ -173,7 +173,7 @@ def test_sitelinks(): def test_nositelinks(): # this item doesn't and probably wont ever have any sitelinks (but who knows?? maybe one day..) data = [wbi_datatype.ItemID(value='Q5', prop_nr='P31')] - item = wbi_core.ItemEngine(item_id='Q27869338', data=data) + item = wbi_item.Item(item_id='Q27869338', data=data) item.get_sitelink("enwiki") assert "enwiki" not in item.json_representation['sitelinks'] item.set_sitelink("enwiki", "something") diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 27301736..6e603125 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -1,28 +1,28 @@ import unittest -from wikibaseintegrator import wbi_core, wbi_functions, wbi_datatype +from wikibaseintegrator import wbi_functions, wbi_datatype, wbi_item class TestWbiCore(unittest.TestCase): - common_item = wbi_core.ItemEngine(item_id="Q2") + common_item = wbi_item.Item(item_id="Q2") def test_item_engine(self): - wbi_core.ItemEngine(debug=True) - wbi_core.ItemEngine(data=None, debug=True) - wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True) - wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True) + wbi_item.Item(debug=True) + wbi_item.Item(data=None, debug=True) + wbi_item.Item(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True) + wbi_item.Item(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True) with self.assertRaises(TypeError): - wbi_core.ItemEngine(data='test', debug=True) + wbi_item.Item(data='test', debug=True) with self.assertRaises(ValueError): - wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True) + wbi_item.Item(fast_run_case_insensitive=True, debug=True) with self.assertRaises(TypeError): - wbi_core.ItemEngine(ref_handler='test', debug=True) + wbi_item.Item(ref_handler='test', debug=True) with self.assertRaises(ValueError): - wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True) - wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True) + wbi_item.Item(global_ref_mode='CUSTOM', debug=True) + wbi_item.Item(item_id='Q2', fast_run=True, debug=True) def test_search_only(self): - item = wbi_core.ItemEngine(item_id="Q2", search_only=True) + item = wbi_item.Item(item_id="Q2", search_only=True) assert item.get_label('en') == "Earth" descr = item.get_description('en') @@ -39,25 +39,25 @@ def test_basedatatype_if_exists(self): instance_of_replace = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='REPLACE') instance_of_keep = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='KEEP') - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_append, instance_of_append]) + item = wbi_item.Item(item_id="Q2", data=[instance_of_append, instance_of_append]) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 1 - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend]) + item = wbi_item.Item(item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend]) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 2 - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_replace], debug=True) + item = wbi_item.Item(item_id="Q2", data=[instance_of_replace], debug=True) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] if 'remove' not in x] removed_claims = [True for x in item.get_json_representation()['claims']['P31'] if 'remove' in x] assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 2 and True in removed_claims - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_keep], debug=True) + item = wbi_item.Item(item_id="Q2", data=[instance_of_keep], debug=True) claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] assert len(claims) == 2 and 'Q1234' not in claims def test_label(self): - item = wbi_core.ItemEngine(item_id="Q2") + item = wbi_item.Item(item_id="Q2") assert item.get_label('en') == "Earth" descr = item.get_description('en') @@ -155,18 +155,18 @@ def test_new_item_creation(self): core_props = set(["P{}".format(x) for x in range(20)]) for d in data: - item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props) + item = wbi_item.Item(new_item=True, data=[d], core_props=core_props) assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props) + item = wbi_item.Item(new_item=True, data=d, core_props=core_props) assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set()) + item = wbi_item.Item(new_item=True, data=[d], core_props=set()) assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set()) + item = wbi_item.Item(new_item=True, data=d, core_props=set()) assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props) + item = wbi_item.Item(new_item=True, data=data, core_props=core_props) assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set()) + item = wbi_item.Item(new_item=True, data=data, core_props=set()) assert item.get_json_representation() def test_get_property_list(self): diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 40bc31b0..8347ac32 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -1,4 +1,4 @@ -from wikibaseintegrator import wbi_core, wbi_fastrun, wbi_datatype +from wikibaseintegrator import wbi_fastrun, wbi_datatype, wbi_item wbi_fastrun.FastRunContainer.debug = True @@ -11,7 +11,7 @@ def test_query_data(): This tests that the fast run container correctly queries data from wikidata and stores it in the appropriate format without getting references """ - frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item) # get a string value frc._query_data('P699') # wikidata-item value @@ -42,7 +42,7 @@ def test_query_data_unit(): test_fastrun.test_query_data_unit This hits live wikidata and may change !! """ - frc = wbi_fastrun.FastRunContainer(base_filter={'P2044': '', 'P30': 'Q46'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, debug=True) + frc = wbi_fastrun.FastRunContainer(base_filter={'P2044': '', 'P30': 'Q46'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, debug=True) # get a quantity value frc._query_data('P2044', use_units=True) @@ -60,7 +60,7 @@ def test_query_data_ref(): This tests that the fast run container correctly queries data from wikidata and stores it in the appropriate format WITH getting references """ - frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) + frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, use_refs=True) frc._query_data('P699') # https://www.wikidata.org/wiki/Q10874 @@ -113,7 +113,7 @@ def __init__(self, *args, **kwargs): def test_fastrun_ref_ensembl(): # fastrun checks refs - frc = FastRunContainerFakeQueryDataEnsembl(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) + frc = FastRunContainerFakeQueryDataEnsembl(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, use_refs=True) # statement has no ref frc.debug = True @@ -122,21 +122,23 @@ def test_fastrun_ref_ensembl(): # statement has the same ref statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', - references=[[wbi_datatype.ItemID("Q29458763", "P248", is_reference=True), wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] + references=[[wbi_datatype.ItemID("Q29458763", "P248", is_reference=True), + wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] assert not frc.write_required(data=statements) # new statement has an different stated in statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', - references=[[wbi_datatype.ItemID("Q99999999999", "P248", is_reference=True), wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] + references=[[wbi_datatype.ItemID("Q99999999999", "P248", is_reference=True), + wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] assert frc.write_required(data=statements) # fastrun don't check references, statement has no reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=False) + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, use_refs=False) statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594')] assert not frc.write_required(data=statements) # fastrun don't check references, statement has reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=False) + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, use_refs=False) statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[wbi_datatype.ItemID("Q123", "P31", is_reference=True)]])] assert not frc.write_required(data=statements) @@ -177,7 +179,7 @@ def test_append_props(): # https://www.wikidata.org/wiki/Q3402672#P527 # don't consider refs - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item) # with append statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', if_exists='APPEND')] assert frc.write_required(data=statements, cqid=qid) is False @@ -189,7 +191,7 @@ def test_append_props(): assert frc.write_required(data=statements, cqid=qid) is True # if we are in append mode, and the refs are different, we should write - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) + frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item, use_refs=True) # with append statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', if_exists='APPEND')] assert frc.write_required(data=statements, cqid=qid) is True diff --git a/wikibaseintegrator/wbi_core.py b/wikibaseintegrator/wbi_core.py index 6a406929..1aa64895 100644 --- a/wikibaseintegrator/wbi_core.py +++ b/wikibaseintegrator/wbi_core.py @@ -2,7 +2,7 @@ import json from collections import defaultdict -from wikibaseintegrator import wbi_functions +from wikibaseintegrator import wbi_functions, wbi_item from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_datatype import BaseDataType from wikibaseintegrator.wbi_exceptions import (IDMissingError, SearchError, SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError, CorePropIntegrityException, @@ -10,7 +10,7 @@ from wikibaseintegrator.wbi_fastrun import FastRunContainer -class ItemEngine(object): +class Core(object): fast_run_store = [] distinct_value_props = {} @@ -136,12 +136,12 @@ def __init__(self, item_id='', new_item=False, data=None, mediawiki_api_url=None if self.global_ref_mode == 'CUSTOM' and self.ref_handler is None: raise ValueError("If using a custom ref mode, ref_handler must be set") - if (core_props is None) and (self.sparql_endpoint_url not in ItemEngine.distinct_value_props): - ItemEngine.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, + if (core_props is None) and (self.sparql_endpoint_url not in wbi_item.Item.distinct_value_props): + wbi_item.Item.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, self.wikibase_url, self.property_constraint_pid, self.distinct_values_constraint_qid) - self.core_props = core_props if core_props is not None else ItemEngine.distinct_value_props[self.sparql_endpoint_url] + self.core_props = core_props if core_props is not None else wbi_item.Item.distinct_value_props[self.sparql_endpoint_url] if self.fast_run: self.init_fastrun() @@ -195,7 +195,7 @@ def init_data_load(self): def init_fastrun(self): # We search if we already have a FastRunContainer with the same parameters to re-use it - for c in ItemEngine.fast_run_store: + for c in wbi_item.Item.fast_run_store: if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): self.fast_run_container = c self.fast_run_container.ref_handler = self.ref_handler @@ -219,7 +219,7 @@ def init_fastrun(self): ref_handler=self.ref_handler, case_insensitive=self.fast_run_case_insensitive, debug=self.debug) - ItemEngine.fast_run_store.append(self.fast_run_container) + wbi_item.Item.fast_run_store.append(self.fast_run_container) if not self.search_only: self.require_write = self.fast_run_container.write_required(self.data, cqid=self.item_id) diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 7cf3450d..a2ccdb31 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -84,5 +84,5 @@ def __str__(self): class SearchOnlyError(Exception): - """Raised when the ItemEngine is in search_only mode""" + """Raised when in search_only mode""" pass diff --git a/wikibaseintegrator/wbi_functions.py b/wikibaseintegrator/wbi_functions.py index 1c4d6c8f..58825246 100644 --- a/wikibaseintegrator/wbi_functions.py +++ b/wikibaseintegrator/wbi_functions.py @@ -330,23 +330,23 @@ def generate_item_instances(items, mediawiki_api_url=None, login=None, allow_ano """ A method which allows for retrieval of a list of Wikidata items or properties. The method generates a list of tuples where the first value in the tuple is the QID or property ID, whereas the second is the new instance of - ItemEngine containing all the data of the item. This is most useful for mass retrieval of items. + wbi_item.Item containing all the data of the item. This is most useful for mass retrieval of items. :param user_agent: A custom user agent :type user_agent: str :param items: A list of QIDs or property IDs :type items: list :param mediawiki_api_url: The MediaWiki url which should be used :type mediawiki_api_url: str - :return: A list of tuples, first value in the tuple is the QID or property ID string, second value is the instance of ItemEngine with the corresponding + :return: A list of tuples, first value in the tuple is the QID or property ID string, second value is the instance of wbi_item.Item with the corresponding item data. :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. :type allow_anonymous: bool """ - assert type(items) == list + from wikibaseintegrator import wbi_item - from wikibaseintegrator.wbi_core import ItemEngine + assert type(items) == list params = { 'action': 'wbgetentities', @@ -358,7 +358,7 @@ def generate_item_instances(items, mediawiki_api_url=None, login=None, allow_ano item_instances = [] for qid, v in reply['entities'].items(): - ii = ItemEngine(item_id=qid, item_data=v) + ii = wbi_item.Item(item_id=qid, item_data=v) ii.mediawiki_api_url = mediawiki_api_url item_instances.append((qid, ii)) diff --git a/wikibaseintegrator/wbi_item.py b/wikibaseintegrator/wbi_item.py new file mode 100644 index 00000000..a263d5c8 --- /dev/null +++ b/wikibaseintegrator/wbi_item.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.wbi_core import Core + + +class Item(Core): + pass diff --git a/wikibaseintegrator/wbi_lexeme.py b/wikibaseintegrator/wbi_lexeme.py new file mode 100644 index 00000000..21ab5595 --- /dev/null +++ b/wikibaseintegrator/wbi_lexeme.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.wbi_core import Core + + +class Lexeme(Core): + pass From ca651ccef88511344c2225732c93365e713c020d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 12 May 2021 11:26:10 +0200 Subject: [PATCH 002/308] Disable GitHub actions for this branch --- {.github => .github_disabled}/dependabot.yml | 0 {.github => .github_disabled}/workflows/codeql-analysis.yml | 0 {.github => .github_disabled}/workflows/publish-to-pypi.yml | 0 {.github => .github_disabled}/workflows/python-package.yml | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename {.github => .github_disabled}/dependabot.yml (100%) rename {.github => .github_disabled}/workflows/codeql-analysis.yml (100%) rename {.github => .github_disabled}/workflows/publish-to-pypi.yml (100%) rename {.github => .github_disabled}/workflows/python-package.yml (100%) diff --git a/.github/dependabot.yml b/.github_disabled/dependabot.yml similarity index 100% rename from .github/dependabot.yml rename to .github_disabled/dependabot.yml diff --git a/.github/workflows/codeql-analysis.yml b/.github_disabled/workflows/codeql-analysis.yml similarity index 100% rename from .github/workflows/codeql-analysis.yml rename to .github_disabled/workflows/codeql-analysis.yml diff --git a/.github/workflows/publish-to-pypi.yml b/.github_disabled/workflows/publish-to-pypi.yml similarity index 100% rename from .github/workflows/publish-to-pypi.yml rename to .github_disabled/workflows/publish-to-pypi.yml diff --git a/.github/workflows/python-package.yml b/.github_disabled/workflows/python-package.yml similarity index 100% rename from .github/workflows/python-package.yml rename to .github_disabled/workflows/python-package.yml From 3a2e177dacf9508c5fd029b9269a3da325913a19 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 12 May 2021 11:26:40 +0200 Subject: [PATCH 003/308] Create Property stub --- wikibaseintegrator/wbi_property.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 wikibaseintegrator/wbi_property.py diff --git a/wikibaseintegrator/wbi_property.py b/wikibaseintegrator/wbi_property.py new file mode 100644 index 00000000..da338d28 --- /dev/null +++ b/wikibaseintegrator/wbi_property.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.wbi_core import Core + + +class Property(Core): + pass From 097c52bfe6d155e5e96130d5a037215896b69b05 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 12 May 2021 11:26:51 +0200 Subject: [PATCH 004/308] Change datatype ItemID by Item --- wikibaseintegrator/wbi_datatype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/wbi_datatype.py b/wikibaseintegrator/wbi_datatype.py index 7b8b93c4..52d7a7d8 100644 --- a/wikibaseintegrator/wbi_datatype.py +++ b/wikibaseintegrator/wbi_datatype.py @@ -675,7 +675,7 @@ def from_json(cls, jsn): prop_nr=jsn['property']) -class ItemID(BaseDataType): +class Item(BaseDataType): """ Implements the Wikibase data type 'wikibase-item' with a value being another item ID """ From a4e27e3609602f5d252bb8097293c8699606e3e2 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 12 May 2021 11:27:09 +0200 Subject: [PATCH 005/308] Create WikibaseIntegrator stub --- wikibaseintegrator/__init__.py | 1 + wikibaseintegrator/wikibaseintegrator.py | 39 ++++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 wikibaseintegrator/wikibaseintegrator.py diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index e69de29b..12569926 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -0,0 +1 @@ +from wikibaseintegrator.wikibaseintegrator import WikibaseIntegrator diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py new file mode 100644 index 00000000..a09a9407 --- /dev/null +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -0,0 +1,39 @@ +from wikibaseintegrator.wbi_core import Core +from wikibaseintegrator.wbi_item import Item +from wikibaseintegrator.wbi_lexeme import Lexeme +from wikibaseintegrator.wbi_property import Property + +DEFAULT_CONFIG = { + 'BACKOFF_MAX_TRIES': None, + 'BACKOFF_MAX_VALUE': 3600, + 'USER_AGENT_DEFAULT': "WikibaseIntegrator/{} (https://github.com/LeMyst/WikibaseIntegrator)".format(__version__), + 'MAXLAG': 5, + 'PROPERTY_CONSTRAINT_PID': 'P2302', + 'DISTINCT_VALUES_CONSTRAINT_QID': 'Q21502410', + 'COORDINATE_GLOBE_QID': 'http://www.wikidata.org/entity/Q2', + 'CALENDAR_MODEL_QID': 'http://www.wikidata.org/entity/Q1985727', + 'MEDIAWIKI_API_URL': 'https://www.wikidata.org/w/api.php', + 'MEDIAWIKI_INDEX_URL': 'https://www.wikidata.org/w/index.php', + 'MEDIAWIKI_REST_URL': 'https://www.wikidata.org/w/rest.php', + 'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql', + 'WIKIBASE_URL': 'http://www.wikidata.org', + 'LANGUAGE': 'en' +} + + +class WikibaseIntegrator(object): + def __init__(self, + mediawiki_api_url=DEFAULT_CONFIG["MEDIAWIKI_API_URL"], + mediawiki_index_url=DEFAULT_CONFIG["MEDIAWIKI_INDEX_URL"], + mediawiki_rest_url=DEFAULT_CONFIG["MEDIAWIKI_REST_URL"], + sparql_endpoint_url=DEFAULT_CONFIG["MEDIAWIKI_API_URL"], + wikibase_url=DEFAULT_CONFIG["WIKIBASE_URL"], + is_bot=False, + language=DEFAULT_CONFIG["LANGUAGE"]): + core = Core() + + self.core = core + + self.item = Item(core) + self.property = Property(core) + self.lexeme = Lexeme(core) From 48261cd371e5fa3d619ff59960f9de1a7488029e Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 12 May 2021 19:39:16 +0200 Subject: [PATCH 006/308] Move things from item to core --- wikibaseintegrator/wbi_core.py | 323 +++------------------------------ wikibaseintegrator/wbi_item.py | 290 ++++++++++++++++++++++++++++- 2 files changed, 310 insertions(+), 303 deletions(-) diff --git a/wikibaseintegrator/wbi_core.py b/wikibaseintegrator/wbi_core.py index 1aa64895..51fdae1d 100644 --- a/wikibaseintegrator/wbi_core.py +++ b/wikibaseintegrator/wbi_core.py @@ -1,166 +1,21 @@ import copy import json -from collections import defaultdict -from wikibaseintegrator import wbi_functions, wbi_item -from wikibaseintegrator.wbi_config import config +from wikibaseintegrator import wbi_functions from wikibaseintegrator.wbi_datatype import BaseDataType -from wikibaseintegrator.wbi_exceptions import (IDMissingError, SearchError, SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError, CorePropIntegrityException, - ManualInterventionReqException) -from wikibaseintegrator.wbi_fastrun import FastRunContainer +from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError, ManualInterventionReqException, CorePropIntegrityException class Core(object): fast_run_store = [] distinct_value_props = {} - def __init__(self, item_id='', new_item=False, data=None, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, fast_run=False, fast_run_base_filter=None, - fast_run_use_refs=False, ref_handler=None, global_ref_mode='KEEP_GOOD', good_refs=None, keep_good_ref_statements=False, search_only=False, item_data=None, - user_agent=None, core_props=None, core_prop_match_thresh=0.66, property_constraint_pid=None, distinct_values_constraint_qid=None, fast_run_case_insensitive=False, - debug=False) -> None: - """ - constructor - :param item_id: Wikibase item id - :type item_id: str - :param new_item: This parameter lets the user indicate if a new item should be created - :type new_item: bool - :param data: a dictionary with property strings as keys and the data which should be written to a item as the property values - :type data: list[BaseDataType] or BaseDataType or None - :param mediawiki_api_url: - :type mediawiki_api_url: str - :param sparql_endpoint_url: - :type sparql_endpoint_url: str - :param wikibase_url: - :type wikibase_url: str - :param fast_run: True if this item should be run in fastrun mode, otherwise False. User setting this to True should also specify the - fast_run_base_filter for these item types - :type fast_run: bool - :param fast_run_base_filter: A property value dict determining the Wikibase property and the corresponding value which should be used as a filter for - this item type. Several filter criteria can be specified. The values can be either Wikibase item QIDs, strings or empty strings if the value should - be a variable in SPARQL. - Example: {'P352': '', 'P703': 'Q15978631'} if the basic common type of things this bot runs on is human proteins (specified by Uniprot IDs (P352) - and 'found in taxon' homo sapiens 'Q15978631'). - :type fast_run_base_filter: dict - :param fast_run_use_refs: If `True`, fastrun mode will consider references in determining if a statement should be updated and written to Wikibase. - Otherwise, only the value and qualifiers are used. Default: False - :type fast_run_use_refs: bool - :param ref_handler: This parameter defines a function that will manage the reference handling in a custom manner. This argument should be a function - handle that accepts two arguments, the old/current statement (first argument) and new/proposed/to be written statement (second argument), both of - type: a subclass of BaseDataType. The function should return an new item that is the item to be written. The item's values properties or qualifiers - should not be modified; only references. This function is also used in fastrun mode. This will only be used if the ref_mode is set to "CUSTOM". - :type ref_handler: function - :param global_ref_mode: sets the reference handling mode for an item. Four modes are possible, 'STRICT_KEEP' keeps all references as they are, - 'STRICT_KEEP_APPEND' keeps the references as they are and appends new ones. 'STRICT_OVERWRITE' overwrites all existing references for given. - 'KEEP_GOOD' will use the refs defined in good_refs. 'CUSTOM' will use the function defined in ref_handler - :type global_ref_mode: str - :param good_refs: This parameter lets the user define blocks of good references. It is a list of dictionaries. One block is a dictionary with Wikidata - properties as keys and potential values as the required value for a property. There can be arbitrarily many key: value pairs in one reference block. - Example: [{'P248': 'Q905695', 'P352': None, 'P407': None, 'P1476': None, 'P813': None}] This example contains one good reference block, stated in: - Uniprot, Uniprot ID, title of Uniprot entry, language of work and date when the information has been retrieved. A None type indicates that the value - varies from reference to reference. In this case, only the value for the Wikidata item for the Uniprot database stays stable over all of these - references. Key value pairs work here, as Wikidata references can hold only one value for one property. The number of good reference blocks is not - limited. This parameter OVERRIDES any other reference mode set!! - :type good_refs: list[dict] - :param keep_good_ref_statements: Do not delete any statement which has a good reference, either defined in the good_refs list or by any other - referencing mode. - :type keep_good_ref_statements: bool - :param search_only: If this flag is set to True, the data provided will only be used to search for the corresponding Wikibase item, but no actual data - updates will performed. This is useful, if certain states or values on the target item need to be checked before certain data is written to it. In - order to write new data to the item, the method update() will take data, modify the Wikibase item and a write() call will then perform the actual - write to the Wikibase instance. - :type search_only: bool - :param item_data: A Python JSON object corresponding to the item in item_id. This can be used in conjunction with item_id in order to provide raw data. - :type item_data: - :param user_agent: The user agent string to use when making http requests - :type user_agent: str - :param core_props: Core properties are used to retrieve an item based on `data` if a `item_id` is not given. This is a set of PIDs to use. If None, - all Wikibase properties with a distinct values constraint will be used. (see: get_core_props) - :type core_props: set - :param core_prop_match_thresh: The proportion of core props that must match during retrieval of an item when the item_id is not specified. - :type core_prop_match_thresh: float - :param property_constraint_pid: - :param distinct_values_constraint_qid: - :param fast_run_case_insensitive: - :param debug: Enable debug output. - :type debug: boolean - """ - - self.core_prop_match_thresh = core_prop_match_thresh - self.item_id = item_id - self.new_item = new_item - self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url - self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - self.property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid - self.distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid - if data is None: - self.data = [] - elif isinstance(data, list) and all(isinstance(x, BaseDataType) for x in data): - self.data = data - elif isinstance(data, BaseDataType): - self.data = [data] - else: - raise TypeError("`data` must be a list of BaseDataType or an instance of BaseDataType") - self.fast_run = fast_run - self.fast_run_base_filter = fast_run_base_filter - self.fast_run_use_refs = fast_run_use_refs - self.fast_run_case_insensitive = fast_run_case_insensitive - self.ref_handler = ref_handler - self.global_ref_mode = global_ref_mode - self.good_refs = good_refs - self.keep_good_ref_statements = keep_good_ref_statements - self.search_only = search_only - self.item_data = item_data - self.user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - self.create_new_item = False + def __init__(self, debug=False): self.json_representation = {} + self.debug = debug + self.create_new_item = False self.statements = [] self.original_statements = [] - self.entity_metadata = {} - self.fast_run_container = None - if self.search_only: - self.require_write = False - else: - self.require_write = True - self.sitelinks = {} - self.lastrevid = None # stores last revisionid after a write occurs - - self.debug = debug - - if fast_run_case_insensitive and not self.search_only: - raise ValueError("If using fast run case insensitive, search_only must be set") - - if self.ref_handler and not callable(self.ref_handler): - raise TypeError("ref_handler must be callable") - if self.global_ref_mode == 'CUSTOM' and self.ref_handler is None: - raise ValueError("If using a custom ref mode, ref_handler must be set") - - if (core_props is None) and (self.sparql_endpoint_url not in wbi_item.Item.distinct_value_props): - wbi_item.Item.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, - self.wikibase_url, - self.property_constraint_pid, - self.distinct_values_constraint_qid) - self.core_props = core_props if core_props is not None else wbi_item.Item.distinct_value_props[self.sparql_endpoint_url] - - if self.fast_run: - self.init_fastrun() - if self.debug: - if self.require_write: - if self.search_only: - print("Successful fastrun, search_only mode, we can't determine if data is up to date.") - else: - print("Successful fastrun, because no full data match you need to update the item.") - else: - print("Successful fastrun, no write to Wikibase instance required.") - - if self.item_id != '' and self.create_new_item: - raise IDMissingError("Cannot create a new item, when an identifier is given.") - elif self.new_item and len(self.data) > 0: - self.create_new_item = True - self.__construct_claim_json() - elif self.require_write or self.search_only: - self.init_data_load() def init_data_load(self): if self.item_id and self.item_data: @@ -193,45 +48,6 @@ def init_data_load(self): else: self.data = [] - def init_fastrun(self): - # We search if we already have a FastRunContainer with the same parameters to re-use it - for c in wbi_item.Item.fast_run_store: - if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): - self.fast_run_container = c - self.fast_run_container.ref_handler = self.ref_handler - self.fast_run_container.current_qid = '' - self.fast_run_container.base_data_type = BaseDataType - self.fast_run_container.engine = self.__class__ - self.fast_run_container.mediawiki_api_url = self.mediawiki_api_url - self.fast_run_container.wikibase_url = self.wikibase_url - self.fast_run_container.debug = self.debug - if self.debug: - print("Found an already existing FastRunContainer") - - if not self.fast_run_container: - self.fast_run_container = FastRunContainer(base_filter=self.fast_run_base_filter, - base_data_type=BaseDataType, - engine=self.__class__, - sparql_endpoint_url=self.sparql_endpoint_url, - mediawiki_api_url=self.mediawiki_api_url, - wikibase_url=self.wikibase_url, - use_refs=self.fast_run_use_refs, - ref_handler=self.ref_handler, - case_insensitive=self.fast_run_case_insensitive, - debug=self.debug) - wbi_item.Item.fast_run_store.append(self.fast_run_container) - - if not self.search_only: - self.require_write = self.fast_run_container.write_required(self.data, cqid=self.item_id) - # set item id based on fast run data - if not self.require_write and not self.item_id: - self.item_id = self.fast_run_container.current_qid - else: - self.fast_run_container.load_item(self.data) - # set item id based on fast run data - if not self.item_id: - self.item_id = self.fast_run_container.current_qid - def parse_json(self, json_data): """ Parses an entity json and generates the datatype objects, sets self.json_representation @@ -257,75 +73,6 @@ def parse_json(self, json_data): return data - def update(self, data): - """ - This method takes data, and modifies the Wikidata item. This works together with the data already provided via the constructor or if the constructor is - being instantiated with search_only=True. In the latter case, this allows for checking the item data before deciding which new data should be written to - the Wikidata item. The actual write to Wikidata only happens on calling of the write() method. If data has been provided already via the constructor, - data provided via the update() method will be appended to these data. - :param data: A list of Wikidata statment items inheriting from BaseDataType - :type data: list - """ - - if self.search_only: - raise SearchOnlyError - - assert type(data) == list - - self.data.extend(data) - self.statements = copy.deepcopy(self.original_statements) - - if self.debug: - print(self.data) - - if self.fast_run: - self.init_fastrun() - - if self.require_write and self.fast_run: - self.init_data_load() - self.__construct_claim_json() - self.__check_integrity() - elif not self.fast_run: - self.__construct_claim_json() - self.__check_integrity() - - def get_entity(self): - """ - retrieve an item in json representation from the Wikibase instance - :rtype: dict - :return: python complex dictionary representation of a json - """ - - params = { - 'action': 'wbgetentities', - 'sites': 'enwiki', - 'ids': self.item_id, - 'format': 'json' - } - - json_data = wbi_functions.mediawiki_api_call_helper(data=params, allow_anonymous=True) - return self.parse_json(json_data=json_data['entities'][self.item_id]) - - def get_property_list(self): - """ - List of properties on the current item - :return: a list of property ID strings (Pxxxx). - """ - - property_list = set() - for x in self.statements: - property_list.add(x.get_prop_nr()) - - return list(property_list) - - def get_json_representation(self): - """ - A method to access the internal json representation of the item, mainly for testing - :return: returns a Python json representation object of the item at the current state of the instance - """ - - return self.json_representation - def get_label(self, lang=None): """ Returns the label for a certain language @@ -523,57 +270,29 @@ def set_description(self, description, lang=None, if_exists='REPLACE'): 'value': description } - def get_sitelink(self, site): + def get_entity(self): """ - A method to access the interwiki links in the json.model - :param site: The Wikipedia site the interwiki/sitelink should be returned for - :return: The interwiki/sitelink string for the specified Wikipedia will be returned. + retrieve an item in json representation from the Wikibase instance + :rtype: dict + :return: python complex dictionary representation of a json """ - if site in self.sitelinks: - return self.sitelinks[site] - else: - return None + params = { + 'action': 'wbgetentities', + 'ids': self.item_id, + 'format': 'json' + } - def set_sitelink(self, site, title, badges=()): + json_data = wbi_functions.mediawiki_api_call_helper(data=params, allow_anonymous=True) + return self.parse_json(json_data=json_data['entities'][self.item_id]) + + def get_json_representation(self): """ - Set sitelinks to corresponding Wikipedia pages - :param site: The Wikipedia page a sitelink is directed to (e.g. 'enwiki') - :param title: The title of the Wikipedia page the sitelink is directed to - :param badges: An iterable containing Wikipedia badge strings. - :return: + A method to access the internal json representation of the item, mainly for testing + :return: returns a Python json representation object of the item at the current state of the instance """ - if self.search_only: - raise SearchOnlyError - - sitelink = { - 'site': site, - 'title': title, - 'badges': badges - } - self.json_representation['sitelinks'][site] = sitelink - self.sitelinks[site] = sitelink - - def count_references(self, prop_id): - counts = {} - for claim in self.get_json_representation()['claims'][prop_id]: - counts[claim['id']] = len(claim['references']) - return counts - - def get_reference_properties(self, prop_id): - references = [] - statements = [x for x in self.get_json_representation()['claims'][prop_id] if 'references' in x] - for statement in statements: - for reference in statement['references']: - references.append(reference['snaks'].keys()) - return references - - def get_qualifier_properties(self, prop_id): - qualifiers = [] - for statements in self.get_json_representation()['claims'][prop_id]: - qualifiers.append(statements['qualifiers'].keys()) - return qualifiers + return self.json_representation def write(self, login, bot_account=True, edit_summary='', entity_type='item', property_datatype='string', max_retries=1000, retry_after=60, all_claims=False, allow_anonymous=False): diff --git a/wikibaseintegrator/wbi_item.py b/wikibaseintegrator/wbi_item.py index a263d5c8..177a4072 100644 --- a/wikibaseintegrator/wbi_item.py +++ b/wikibaseintegrator/wbi_item.py @@ -1,5 +1,293 @@ +import copy + +from wikibaseintegrator import wbi_functions +from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_core import Core +from wikibaseintegrator.wbi_datatype import BaseDataType +from wikibaseintegrator.wbi_exceptions import (IDMissingError, SearchOnlyError) +from wikibaseintegrator.wbi_fastrun import FastRunContainer class Item(Core): - pass + fast_run_store = [] + distinct_value_props = {} + + def __init__(self, item_id='', new_item=False, data=None, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, fast_run=False, fast_run_base_filter=None, + fast_run_use_refs=False, ref_handler=None, global_ref_mode='KEEP_GOOD', good_refs=None, keep_good_ref_statements=False, search_only=False, item_data=None, + user_agent=None, core_props=None, core_prop_match_thresh=0.66, property_constraint_pid=None, distinct_values_constraint_qid=None, fast_run_case_insensitive=False, + debug=False) -> None: + """ + constructor + :param item_id: Wikibase item id + :type item_id: str + :param new_item: This parameter lets the user indicate if a new item should be created + :type new_item: bool + :param data: a dictionary with property strings as keys and the data which should be written to a item as the property values + :type data: list[BaseDataType] or BaseDataType or None + :param mediawiki_api_url: + :type mediawiki_api_url: str + :param sparql_endpoint_url: + :type sparql_endpoint_url: str + :param wikibase_url: + :type wikibase_url: str + :param fast_run: True if this item should be run in fastrun mode, otherwise False. User setting this to True should also specify the + fast_run_base_filter for these item types + :type fast_run: bool + :param fast_run_base_filter: A property value dict determining the Wikibase property and the corresponding value which should be used as a filter for + this item type. Several filter criteria can be specified. The values can be either Wikibase item QIDs, strings or empty strings if the value should + be a variable in SPARQL. + Example: {'P352': '', 'P703': 'Q15978631'} if the basic common type of things this bot runs on is human proteins (specified by Uniprot IDs (P352) + and 'found in taxon' homo sapiens 'Q15978631'). + :type fast_run_base_filter: dict + :param fast_run_use_refs: If `True`, fastrun mode will consider references in determining if a statement should be updated and written to Wikibase. + Otherwise, only the value and qualifiers are used. Default: False + :type fast_run_use_refs: bool + :param ref_handler: This parameter defines a function that will manage the reference handling in a custom manner. This argument should be a function + handle that accepts two arguments, the old/current statement (first argument) and new/proposed/to be written statement (second argument), both of + type: a subclass of BaseDataType. The function should return an new item that is the item to be written. The item's values properties or qualifiers + should not be modified; only references. This function is also used in fastrun mode. This will only be used if the ref_mode is set to "CUSTOM". + :type ref_handler: function + :param global_ref_mode: sets the reference handling mode for an item. Four modes are possible, 'STRICT_KEEP' keeps all references as they are, + 'STRICT_KEEP_APPEND' keeps the references as they are and appends new ones. 'STRICT_OVERWRITE' overwrites all existing references for given. + 'KEEP_GOOD' will use the refs defined in good_refs. 'CUSTOM' will use the function defined in ref_handler + :type global_ref_mode: str + :param good_refs: This parameter lets the user define blocks of good references. It is a list of dictionaries. One block is a dictionary with Wikidata + properties as keys and potential values as the required value for a property. There can be arbitrarily many key: value pairs in one reference block. + Example: [{'P248': 'Q905695', 'P352': None, 'P407': None, 'P1476': None, 'P813': None}] This example contains one good reference block, stated in: + Uniprot, Uniprot ID, title of Uniprot entry, language of work and date when the information has been retrieved. A None type indicates that the value + varies from reference to reference. In this case, only the value for the Wikidata item for the Uniprot database stays stable over all of these + references. Key value pairs work here, as Wikidata references can hold only one value for one property. The number of good reference blocks is not + limited. This parameter OVERRIDES any other reference mode set!! + :type good_refs: list[dict] + :param keep_good_ref_statements: Do not delete any statement which has a good reference, either defined in the good_refs list or by any other + referencing mode. + :type keep_good_ref_statements: bool + :param search_only: If this flag is set to True, the data provided will only be used to search for the corresponding Wikibase item, but no actual data + updates will performed. This is useful, if certain states or values on the target item need to be checked before certain data is written to it. In + order to write new data to the item, the method update() will take data, modify the Wikibase item and a write() call will then perform the actual + write to the Wikibase instance. + :type search_only: bool + :param item_data: A Python JSON object corresponding to the item in item_id. This can be used in conjunction with item_id in order to provide raw data. + :type item_data: + :param user_agent: The user agent string to use when making http requests + :type user_agent: str + :param core_props: Core properties are used to retrieve an item based on `data` if a `item_id` is not given. This is a set of PIDs to use. If None, + all Wikibase properties with a distinct values constraint will be used. (see: get_core_props) + :type core_props: set + :param core_prop_match_thresh: The proportion of core props that must match during retrieval of an item when the item_id is not specified. + :type core_prop_match_thresh: float + :param property_constraint_pid: + :param distinct_values_constraint_qid: + :param fast_run_case_insensitive: + :param debug: Enable debug output. + :type debug: boolean + """ + + super().__init__() + self.core_prop_match_thresh = core_prop_match_thresh + self.item_id = item_id + self.new_item = new_item + self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url + self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + self.property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid + self.distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid + if data is None: + self.data = [] + elif isinstance(data, list) and all(isinstance(x, BaseDataType) for x in data): + self.data = data + elif isinstance(data, BaseDataType): + self.data = [data] + else: + raise TypeError("`data` must be a list of BaseDataType or an instance of BaseDataType") + self.fast_run = fast_run + self.fast_run_base_filter = fast_run_base_filter + self.fast_run_use_refs = fast_run_use_refs + self.fast_run_case_insensitive = fast_run_case_insensitive + self.ref_handler = ref_handler + self.global_ref_mode = global_ref_mode + self.good_refs = good_refs + self.keep_good_ref_statements = keep_good_ref_statements + self.search_only = search_only + self.item_data = item_data + self.user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + + self.statements = [] + self.original_statements = [] + self.entity_metadata = {} + self.fast_run_container = None + if self.search_only: + self.require_write = False + else: + self.require_write = True + self.sitelinks = {} + self.lastrevid = None # stores last revisionid after a write occurs + + if fast_run_case_insensitive and not self.search_only: + raise ValueError("If using fast run case insensitive, search_only must be set") + + if self.ref_handler and not callable(self.ref_handler): + raise TypeError("ref_handler must be callable") + if self.global_ref_mode == 'CUSTOM' and self.ref_handler is None: + raise ValueError("If using a custom ref mode, ref_handler must be set") + + if (core_props is None) and (self.sparql_endpoint_url not in Item.distinct_value_props): + Item.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, + self.wikibase_url, + self.property_constraint_pid, + self.distinct_values_constraint_qid) + self.core_props = core_props if core_props is not None else Item.distinct_value_props[self.sparql_endpoint_url] + + if self.fast_run: + self.init_fastrun() + if self.debug: + if self.require_write: + if self.search_only: + print("Successful fastrun, search_only mode, we can't determine if data is up to date.") + else: + print("Successful fastrun, because no full data match you need to update the item.") + else: + print("Successful fastrun, no write to Wikibase instance required.") + + if self.item_id != '' and self.create_new_item: + raise IDMissingError("Cannot create a new item, when an identifier is given.") + elif self.new_item and len(self.data) > 0: + self.create_new_item = True + self.__construct_claim_json() + elif self.require_write or self.search_only: + self.init_data_load() + + def init_fastrun(self): + # We search if we already have a FastRunContainer with the same parameters to re-use it + for c in Item.fast_run_store: + if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): + self.fast_run_container = c + self.fast_run_container.ref_handler = self.ref_handler + self.fast_run_container.current_qid = '' + self.fast_run_container.base_data_type = BaseDataType + self.fast_run_container.engine = self.__class__ + self.fast_run_container.mediawiki_api_url = self.mediawiki_api_url + self.fast_run_container.wikibase_url = self.wikibase_url + self.fast_run_container.debug = self.debug + if self.debug: + print("Found an already existing FastRunContainer") + + if not self.fast_run_container: + self.fast_run_container = FastRunContainer(base_filter=self.fast_run_base_filter, + base_data_type=BaseDataType, + engine=self.__class__, + sparql_endpoint_url=self.sparql_endpoint_url, + mediawiki_api_url=self.mediawiki_api_url, + wikibase_url=self.wikibase_url, + use_refs=self.fast_run_use_refs, + ref_handler=self.ref_handler, + case_insensitive=self.fast_run_case_insensitive, + debug=self.debug) + Item.fast_run_store.append(self.fast_run_container) + + if not self.search_only: + self.require_write = self.fast_run_container.write_required(self.data, cqid=self.item_id) + # set item id based on fast run data + if not self.require_write and not self.item_id: + self.item_id = self.fast_run_container.current_qid + else: + self.fast_run_container.load_item(self.data) + # set item id based on fast run data + if not self.item_id: + self.item_id = self.fast_run_container.current_qid + + def update(self, data): + """ + This method takes data, and modifies the Wikidata item. This works together with the data already provided via the constructor or if the constructor is + being instantiated with search_only=True. In the latter case, this allows for checking the item data before deciding which new data should be written to + the Wikidata item. The actual write to Wikidata only happens on calling of the write() method. If data has been provided already via the constructor, + data provided via the update() method will be appended to these data. + :param data: A list of Wikidata statment items inheriting from BaseDataType + :type data: list + """ + + if self.search_only: + raise SearchOnlyError + + assert type(data) == list + + self.data.extend(data) + self.statements = copy.deepcopy(self.original_statements) + + if self.debug: + print(self.data) + + if self.fast_run: + self.init_fastrun() + + if self.require_write and self.fast_run: + self.init_data_load() + self.__construct_claim_json() + self.__check_integrity() + elif not self.fast_run: + self.__construct_claim_json() + self.__check_integrity() + + def get_property_list(self): + """ + List of properties on the current item + :return: a list of property ID strings (Pxxxx). + """ + + property_list = set() + for x in self.statements: + property_list.add(x.get_prop_nr()) + + return list(property_list) + + def get_sitelink(self, site): + """ + A method to access the interwiki links in the json.model + :param site: The Wikipedia site the interwiki/sitelink should be returned for + :return: The interwiki/sitelink string for the specified Wikipedia will be returned. + """ + + if site in self.sitelinks: + return self.sitelinks[site] + else: + return None + + def set_sitelink(self, site, title, badges=()): + """ + Set sitelinks to corresponding Wikipedia pages + :param site: The Wikipedia page a sitelink is directed to (e.g. 'enwiki') + :param title: The title of the Wikipedia page the sitelink is directed to + :param badges: An iterable containing Wikipedia badge strings. + :return: + """ + + if self.search_only: + raise SearchOnlyError + + sitelink = { + 'site': site, + 'title': title, + 'badges': badges + } + self.json_representation['sitelinks'][site] = sitelink + self.sitelinks[site] = sitelink + + def count_references(self, prop_id): + counts = {} + for claim in self.get_json_representation()['claims'][prop_id]: + counts[claim['id']] = len(claim['references']) + return counts + + def get_reference_properties(self, prop_id): + references = [] + statements = [x for x in self.get_json_representation()['claims'][prop_id] if 'references' in x] + for statement in statements: + for reference in statement['references']: + references.append(reference['snaks'].keys()) + return references + + def get_qualifier_properties(self, prop_id): + qualifiers = [] + for statements in self.get_json_representation()['claims'][prop_id]: + qualifiers.append(statements['qualifiers'].keys()) + return qualifiers From 05264c9d4911f49f2faf1cbe34b17093341ef350 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 24 May 2021 08:23:53 +0200 Subject: [PATCH 007/308] Lot of changes --- README.md | 115 +- requirements.txt | 2 +- test/test_all.py | 94 +- test/test_wbi_backoff.py | 4 +- ...{test_wbi_core.py => test_wbi_core.py.old} | 7 +- ...wbi_fastrun.py => test_wbi_fastrun.py.old} | 0 test/test_wbi_login.py | 7 +- wikibaseintegrator/__init__.py | 3 +- wikibaseintegrator/datatypes/__init__.py | 18 + wikibaseintegrator/datatypes/basedatatype.py | 353 ++++ wikibaseintegrator/datatypes/commonsmedia.py | 54 + wikibaseintegrator/datatypes/externalid.py | 52 + wikibaseintegrator/datatypes/form.py | 72 + wikibaseintegrator/datatypes/geoshape.py | 68 + .../datatypes/globecoordinate.py | 90 + wikibaseintegrator/datatypes/item.py | 75 + wikibaseintegrator/datatypes/lexeme.py | 75 + wikibaseintegrator/datatypes/math.py | 52 + .../datatypes/monolingualtext.py | 79 + .../datatypes/musicalnotation.py | 52 + wikibaseintegrator/datatypes/property.py | 75 + wikibaseintegrator/datatypes/quantity.py | 134 ++ wikibaseintegrator/datatypes/sense.py | 72 + wikibaseintegrator/datatypes/string.py | 53 + wikibaseintegrator/datatypes/tabulardata.py | 60 + wikibaseintegrator/datatypes/time.py | 116 ++ wikibaseintegrator/datatypes/url.py | 68 + wikibaseintegrator/entities/__init__.py | 3 + wikibaseintegrator/entities/baseentity.py | 320 ++++ wikibaseintegrator/entities/item.py | 51 + wikibaseintegrator/entities/lexeme.py | 52 + wikibaseintegrator/entities/property.py | 37 + wikibaseintegrator/models/__init__.py | 0 wikibaseintegrator/models/aliases.py | 47 + wikibaseintegrator/models/claims.py | 61 + wikibaseintegrator/models/descriptions.py | 5 + wikibaseintegrator/models/forms.py | 57 + wikibaseintegrator/models/labels.py | 5 + wikibaseintegrator/models/language_values.py | 65 + wikibaseintegrator/models/lemmas.py | 5 + wikibaseintegrator/models/qualifiers.py | 23 + wikibaseintegrator/models/senses.py | 67 + wikibaseintegrator/models/sitelinks.py | 46 + wikibaseintegrator/wbi_api.py | 449 +++++ wikibaseintegrator/wbi_backoff.py | 18 +- wikibaseintegrator/wbi_config.py | 5 +- wikibaseintegrator/wbi_core.py | 648 ------- wikibaseintegrator/wbi_datatype.py | 1548 ----------------- wikibaseintegrator/wbi_fastrun.py | 24 +- wikibaseintegrator/wbi_functions.py | 394 ----- wikibaseintegrator/wbi_item.py | 293 ---- wikibaseintegrator/wbi_jsonparser.py | 2 +- wikibaseintegrator/wbi_lexeme.py | 5 - wikibaseintegrator/wbi_property.py | 5 - wikibaseintegrator/wikibaseintegrator.py | 62 +- 55 files changed, 3082 insertions(+), 3065 deletions(-) rename test/{test_wbi_core.py => test_wbi_core.py.old} (95%) rename test/{test_wbi_fastrun.py => test_wbi_fastrun.py.old} (100%) create mode 100644 wikibaseintegrator/datatypes/__init__.py create mode 100644 wikibaseintegrator/datatypes/basedatatype.py create mode 100644 wikibaseintegrator/datatypes/commonsmedia.py create mode 100644 wikibaseintegrator/datatypes/externalid.py create mode 100644 wikibaseintegrator/datatypes/form.py create mode 100644 wikibaseintegrator/datatypes/geoshape.py create mode 100644 wikibaseintegrator/datatypes/globecoordinate.py create mode 100644 wikibaseintegrator/datatypes/item.py create mode 100644 wikibaseintegrator/datatypes/lexeme.py create mode 100644 wikibaseintegrator/datatypes/math.py create mode 100644 wikibaseintegrator/datatypes/monolingualtext.py create mode 100644 wikibaseintegrator/datatypes/musicalnotation.py create mode 100644 wikibaseintegrator/datatypes/property.py create mode 100644 wikibaseintegrator/datatypes/quantity.py create mode 100644 wikibaseintegrator/datatypes/sense.py create mode 100644 wikibaseintegrator/datatypes/string.py create mode 100644 wikibaseintegrator/datatypes/tabulardata.py create mode 100644 wikibaseintegrator/datatypes/time.py create mode 100644 wikibaseintegrator/datatypes/url.py create mode 100644 wikibaseintegrator/entities/__init__.py create mode 100644 wikibaseintegrator/entities/baseentity.py create mode 100644 wikibaseintegrator/entities/item.py create mode 100644 wikibaseintegrator/entities/lexeme.py create mode 100644 wikibaseintegrator/entities/property.py create mode 100644 wikibaseintegrator/models/__init__.py create mode 100644 wikibaseintegrator/models/aliases.py create mode 100644 wikibaseintegrator/models/claims.py create mode 100644 wikibaseintegrator/models/descriptions.py create mode 100644 wikibaseintegrator/models/forms.py create mode 100644 wikibaseintegrator/models/labels.py create mode 100644 wikibaseintegrator/models/language_values.py create mode 100644 wikibaseintegrator/models/lemmas.py create mode 100644 wikibaseintegrator/models/qualifiers.py create mode 100644 wikibaseintegrator/models/senses.py create mode 100644 wikibaseintegrator/models/sitelinks.py create mode 100644 wikibaseintegrator/wbi_api.py delete mode 100644 wikibaseintegrator/wbi_core.py delete mode 100644 wikibaseintegrator/wbi_datatype.py delete mode 100644 wikibaseintegrator/wbi_functions.py delete mode 100644 wikibaseintegrator/wbi_item.py delete mode 100644 wikibaseintegrator/wbi_lexeme.py delete mode 100644 wikibaseintegrator/wbi_property.py diff --git a/README.md b/README.md index 5176d69d..048383f9 100644 --- a/README.md +++ b/README.md @@ -60,9 +60,10 @@ To test for correct installation, start a Python console and execute the followi for ['Human'](https://www.wikidata.org/entity/Q5)): ```python -from wikibaseintegrator import wbi_item -my_first_wikidata_item = wbi_item.Item(item_id='Q5') +from wikibaseintegrator.entities import item + +my_first_wikidata_item = item.Item(item_id='Q5') # to check successful installation and retrieval of the data, you can print the json representation of the item print(my_first_wikidata_item.get_json_representation()) @@ -109,8 +110,8 @@ Features: exception) * Checks automatically if the correct item has been loaded by comparing it to the data provided * All Wikibase data types implemented -* A dedicated wbi_item.Item.write() method allows loading and consistency checks of data before any write to - Wikibase is performed +* A dedicated wbi_item.Item.write() method allows loading and consistency checks of data before any write to Wikibase is + performed * Full access to the whole Wikibase item as a JSON document There are two ways of working with Wikibase items: @@ -254,12 +255,12 @@ tuple, depending on the complexity of the data type. ## Execute SPARQL queries ## -The method `wbi_item.Item.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes -the actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, -the actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL -server (user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you -execute many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email -address, or the URL to your bot code repository.) +The method `wbi_item.Item.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes the +actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, the +actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL server ( +user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you execute +many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email address, +or the URL to your bot code repository.) ## Use Mediawiki API ## @@ -287,11 +288,11 @@ print(wbi_functions.mediawiki_api_call_helper(query, allow_anonymous=True)) ## Wikibase search entities ## -The method `wbi_item.Item.search_entities()` allows for string search in a Wikibase instance. This means that -labels, descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual -search string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not -Wikidata), an optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an -option dict_id_label to return a dict of item id and label as a result. +The method `wbi_item.Item.search_entities()` allows for string search in a Wikibase instance. This means that labels, +descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual search +string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not Wikidata), an +optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an option +dict_id_label to return a dict of item id and label as a result. ## Merge Wikibase items ## @@ -315,19 +316,21 @@ In order to create a minimal bot based on wbi_core, three things are required: * A ItemEngine object which takes the data, does the checks and performs write. ```python -from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_login +from wikibaseintegrator.entities import item +from wikibaseintegrator.datatypes import basedatatype # login object login_instance = wbi_login.Login(user='', pwd='') # data type object, e.g. for a NCBI gene entrez ID -entrez_gene_id = wbi_datatype.String(value='', prop_nr='P351') +entrez_gene_id = basedatatype.String(value='', prop_nr='P351') # data goes into a list, because many data objects can be provided to data = [entrez_gene_id] # Search for and then edit/create new item -wd_item = wbi_item.Item(data=data) +wd_item = item.Item(data=data) wd_item.write(login_instance) ``` @@ -337,37 +340,39 @@ An enhanced example of the previous bot just puts two of the three things into a or modification of items. ```python -from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_login +from wikibaseintegrator.entities import item +from wikibaseintegrator.datatypes import basedatatype # login object login_instance = wbi_login.Login(user='', pwd='') # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs raw_data = { - '50943': 'ENST00000376197', - '1029': 'ENST00000498124' + '50943': 'ENST00000376197', + '1029': 'ENST00000498124' } for entrez_id, ensembl in raw_data.items(): - # add some references - references = [ - [ - wbi_datatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - wbi_datatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - wbi_datatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) - ] + # add some references + references = [ + [ + basedatatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), + basedatatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), + basedatatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) ] + ] - # data type object - entrez_gene_id = wbi_datatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = wbi_datatype.String(value=ensembl, prop_nr='P704', references=references) + # data type object + entrez_gene_id = basedatatype.String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = basedatatype.String(value=ensembl, prop_nr='P704', references=references) - # data goes into a list, because many data objects can be provided to - data = [entrez_gene_id, ensembl_transcript_id] + # data goes into a list, because many data objects can be provided to + data = [entrez_gene_id, ensembl_transcript_id] - # Search for and then edit/create new item - wd_item = wbi_item.Item(data=data) - wd_item.write(login_instance) + # Search for and then edit/create new item + wd_item = item.Item(data=data) + wd_item.write(login_instance) ``` # Examples (in "fast run" mode) # @@ -395,7 +400,9 @@ fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} The full example: ```python -from wikibaseintegrator import wbi_item, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_login +from wikibaseintegrator.entities import item +from wikibaseintegrator.datatypes import basedatatype # login object login_instance = wbi_login.Login(user='', pwd='') @@ -406,30 +413,30 @@ fast_run = True # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs # You can iterate over any data source as long as you can map the values to Wikidata properties. raw_data = { - '50943': 'ENST00000376197', - '1029': 'ENST00000498124' + '50943': 'ENST00000376197', + '1029': 'ENST00000498124' } for entrez_id, ensembl in raw_data.items(): - # add some references - references = [ - [ - wbi_datatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - wbi_datatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - wbi_datatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) - ] + # add some references + references = [ + [ + basedatatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), + basedatatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), + basedatatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) ] + ] - # data type object - entrez_gene_id = wbi_datatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = wbi_datatype.String(value=ensembl, prop_nr='P704', references=references) + # data type object + entrez_gene_id = basedatatype.String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = basedatatype.String(value=ensembl, prop_nr='P704', references=references) - # data goes into a list, because many data objects can be provided to - data = [entrez_gene_id, ensembl_transcript_id] + # data goes into a list, because many data objects can be provided to + data = [entrez_gene_id, ensembl_transcript_id] - # Search for and then edit/create new item - wd_item = wbi_item.Item(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) - wd_item.write(login_instance) + # Search for and then edit/create new item + wd_item = item.Item(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) + wd_item.write(login_instance) ``` Note: Fastrun mode checks for equality of property/value pairs, qualifers (not including qualifier attributes), labels, diff --git a/requirements.txt b/requirements.txt index ea2658e6..3b7d95c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ requests~=2.25.1 mwoauth~=0.3.7 backoff~=1.10.0 pytest~=6.2.4 -setuptools~=56.2.0 +setuptools==57.0.0 oauthlib~=3.1.0 diff --git a/test/test_all.py b/test/test_all.py index dbe0b1dc..78797cf8 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -1,32 +1,33 @@ import copy -import pprint import unittest +from pprint import pprint import requests -from wikibaseintegrator import wbi_fastrun, wbi_functions, wbi_datatype, wbi_item -from wikibaseintegrator.wbi_core import MWApiError +from wikibaseintegrator import wbi_fastrun, WikibaseIntegrator, datatypes +from wikibaseintegrator.entities.baseentity import MWApiError +from wikibaseintegrator.entities.item import Item +from wikibaseintegrator.wbi_api import Api -__author__ = 'Sebastian Burgstaller-Muehlbacher' -__license__ = 'AGPLv3' +wbi = WikibaseIntegrator(fast_run=True, fast_run_base_filter={'P361': 'Q18589965'}) class TestMediawikiApiCall(unittest.TestCase): def test_all(self): with self.assertRaises(MWApiError): - wbi_functions.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", - max_retries=3, retry_after=1, allow_anonymous=True) + Api.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, + retry_after=1, allow_anonymous=True) with self.assertRaises(requests.HTTPError): - wbi_functions.mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) + Api.mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) - test = wbi_functions.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, - allow_anonymous=True) + test = Api.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, + allow_anonymous=True) print(test) class TestDataType(unittest.TestCase): def test_quantity(self): - dt = wbi_datatype.Quantity(quantity='34.5', prop_nr='P43') + dt = datatypes.Quantity(quantity='34.5', prop_nr='P43') dt_json = dt.get_json_representation() @@ -41,7 +42,7 @@ def test_quantity(self): if not value['value']['unit'] == '1': raise - dt2 = wbi_datatype.Quantity(quantity='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") + dt2 = datatypes.Quantity(quantity='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") value = dt2.get_json_representation()['mainsnak']['datavalue'] @@ -58,7 +59,7 @@ def test_quantity(self): raise def test_geoshape(self): - dt = wbi_datatype.GeoShape(value='Data:Inner_West_Light_Rail_stops.map', prop_nr='P43') + dt = datatypes.GeoShape(value='Data:Inner_West_Light_Rail_stops.map', prop_nr='P43') dt_json = dt.get_json_representation() @@ -77,16 +78,17 @@ def test_live_item(self): """ Test an item against Wikidata """ - item = wbi_item.Item(item_id='Q423111') + item = wbi.item.get('Q423111') - mass_statement = [x for x in item.statements if x.get_prop_nr() == 'P2067'].pop() - pprint.pprint(mass_statement.get_json_representation()) + mass_statements = item.claims.get('P2067') + + mass_statement = mass_statements[next(iter(mass_statements))] + pprint(mass_statement) + pprint(mass_statement.get_json_representation()) if not mass_statement: raise - # TODO: get json directly from the API and compare part to ItemEngine - class TestFastRun(unittest.TestCase): """ @@ -95,12 +97,11 @@ class TestFastRun(unittest.TestCase): def test_fast_run(self): statements = [ - wbi_datatype.ExternalID(value='P40095', prop_nr='P352'), - wbi_datatype.ExternalID(value='YER158C', prop_nr='P705') + datatypes.ExternalID(value='P40095', prop_nr='P352'), + datatypes.ExternalID(value='YER158C', prop_nr='P705') ] - frc = wbi_fastrun.FastRunContainer(base_filter={'P352': '', 'P703': 'Q27510868'}, - base_data_type=wbi_datatype.BaseDataType, engine=wbi_item.Item) + frc = wbi_fastrun.FastRunContainer(api=wbi.api, base_filter={'P352': '', 'P703': 'Q27510868'}, base_data_type=datatypes.BaseDataType) fast_run_result = frc.write_required(data=statements) @@ -116,14 +117,15 @@ def test_fast_run(self): def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language - data = [wbi_datatype.ExternalID('/m/02j71', 'P646')] fast_run_base_filter = {'P361': 'Q18589965'} - item = wbi_item.Item(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) + wbi_fr = WikibaseIntegrator(fast_run=True, fast_run_base_filter=fast_run_base_filter, debug=True) + item = wbi_fr.item.new() + item.claims.add(datatypes.ExternalID('/m/02j71', 'P646')) - frc = wbi_item.Item.fast_run_store[0] + frc = Api.fast_run_store[0] frc.debug = True - assert item.get_label('en') == "Earth" + assert item.labels.get('en') == "Earth" descr = item.get_description('en') assert len(descr) > 3 aliases = item.get_aliases() @@ -161,24 +163,22 @@ def test_fastrun_label(self): def test_sitelinks(): - data = [wbi_datatype.ItemID(value='Q12136', prop_nr='P31')] - item = wbi_item.Item(item_id='Q622901', data=data) - item.get_sitelink("enwiki") - assert "enwiki" not in item.json_representation['sitelinks'] - item.set_sitelink("enwiki", "something") - assert item.get_sitelink("enwiki")['title'] == "something" - assert "enwiki" in item.json_representation['sitelinks'] + item = wbi.item.get('Q622901') + item.claims.add(datatypes.Item(value='Q12136', prop_nr='P31')) + assert item.sitelinks.get('enwiki') is not None + item.sitelinks.set(site="enwiki", title="something") + assert item.sitelinks.get('enwiki').title == "something" + assert item.sitelinks.get('enwiki') is not None def test_nositelinks(): # this item doesn't and probably wont ever have any sitelinks (but who knows?? maybe one day..) - data = [wbi_datatype.ItemID(value='Q5', prop_nr='P31')] - item = wbi_item.Item(item_id='Q27869338', data=data) - item.get_sitelink("enwiki") - assert "enwiki" not in item.json_representation['sitelinks'] - item.set_sitelink("enwiki", "something") - assert item.get_sitelink("enwiki")['title'] == "something" - assert "enwiki" in item.json_representation['sitelinks'] + item = wbi.item.get('Q27869338') + item.claims.add(datatypes.Item(value='Q5', prop_nr='P31')) + assert item.sitelinks.get('enwiki') is None + item.sitelinks.set(site="enwiki", title="something") + assert item.sitelinks.get('enwiki').title == "something" + assert item.sitelinks.get('enwiki') is not None #### @@ -186,24 +186,24 @@ def test_nositelinks(): #### def test_ref_equals(): # statements are identical - oldref = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - olditem = wbi_datatype.ItemID("Q123", "P123", references=[oldref]) + oldref = [datatypes.ExternalID(value='P58742', prop_nr='P352', is_reference=True), + datatypes.Item(value='Q24784025', prop_nr='P527', is_reference=True), + datatypes.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] + olditem = datatypes.Item("Q123", "P123", references=[oldref]) newitem = copy.deepcopy(olditem) assert olditem.equals(newitem, include_ref=False) assert olditem.equals(newitem, include_ref=True) # dates are a month apart newitem = copy.deepcopy(olditem) - newitem.references[0][2] = wbi_datatype.Time(time='+2002-01-31T12:01:13Z', prop_nr='P813') + newitem.references[0][2] = datatypes.Time(time='+2002-01-31T12:01:13Z', prop_nr='P813') assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) # multiple refs newitem = copy.deepcopy(olditem) - newitem.references.append([wbi_datatype.ExternalID(value='99999', prop_nr='P352')]) + newitem.references.append([datatypes.ExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) - olditem.references.append([wbi_datatype.ExternalID(value='99999', prop_nr='P352')]) + olditem.references.append([datatypes.ExternalID(value='99999', prop_nr='P352')]) assert olditem.equals(newitem, include_ref=True) diff --git a/test/test_wbi_backoff.py b/test/test_wbi_backoff.py index 1411778f..e3a29f7e 100644 --- a/test/test_wbi_backoff.py +++ b/test/test_wbi_backoff.py @@ -10,7 +10,7 @@ class TestMethods(unittest.TestCase): def test_all(self): - config['BACKOFF_MAX_TRIES'] = 2 + config['BACKOFF_MAX_TRIES'] = 1 config['BACKOFF_MAX_VALUE'] = 2 with self.assertRaises(requests.RequestException): bad_http_code() @@ -25,6 +25,8 @@ def test_all(self): bad_json() +# @backoff.on_exception(backoff.expo, (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError), max_time=60) + @wbi_backoff() def bad_http_code(): r = requests.get("https://httpbin.org/status/400") diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py.old similarity index 95% rename from test/test_wbi_core.py rename to test/test_wbi_core.py.old index 6e603125..18fb3ffb 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py.old @@ -1,6 +1,7 @@ import unittest -from wikibaseintegrator import wbi_functions, wbi_datatype, wbi_item +from wikibaseintegrator import wbi_datatype, wbi_item +from wikibaseintegrator.wbi_api import Api class TestWbiCore(unittest.TestCase): @@ -105,14 +106,14 @@ def test_label(self): assert item.get_aliases('ak') == ['c'] def test_wd_search(self): - t = wbi_functions.search_entities('rivaroxaban') + t = Api.search_entities('rivaroxaban') print('Number of results: ', len(t)) self.assertIsNot(len(t), 0) def test_item_generator(self): items = ['Q408883', 'P715', 'Q18046452'] - item_instances = wbi_functions.generate_item_instances(items=items) + item_instances = Api.generate_item_instances(items=items) for qid, item in item_instances: self.assertIn(qid, items) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py.old similarity index 100% rename from test/test_wbi_fastrun.py rename to test/test_wbi_fastrun.py.old diff --git a/test/test_wbi_login.py b/test/test_wbi_login.py index c7894ac3..7c369d47 100644 --- a/test/test_wbi_login.py +++ b/test/test_wbi_login.py @@ -1,11 +1,10 @@ -from __future__ import print_function - import os import sys import pytest -from wikibaseintegrator import wbi_login, wbi_functions +from wikibaseintegrator import wbi_login +from wikibaseintegrator.wbi_api import Api # look for environment variables. if none set, don't do anything WDUSER = os.getenv("WDUSER") @@ -23,4 +22,4 @@ def test_write(): if WDUSER and WDPASS: login = wbi_login.Login(WDUSER, WDPASS) with pytest.raises(ValueError): - wbi_functions.mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') + Api.mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index 12569926..6b52479c 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -1 +1,2 @@ -from wikibaseintegrator.wikibaseintegrator import WikibaseIntegrator +from .wbi_api import Api +from .wikibaseintegrator import WikibaseIntegrator diff --git a/wikibaseintegrator/datatypes/__init__.py b/wikibaseintegrator/datatypes/__init__.py new file mode 100644 index 00000000..03dfebef --- /dev/null +++ b/wikibaseintegrator/datatypes/__init__.py @@ -0,0 +1,18 @@ +from .basedatatype import BaseDataType +from .commonsmedia import CommonsMedia +from .externalid import ExternalID +from .form import Form +from .geoshape import GeoShape +from .globecoordinate import GlobeCoordinate +from .item import Item +from .lexeme import Lexeme +from .math import Math +from .monolingualtext import MonolingualText +from .musicalnotation import MusicalNotation +from .property import Property +from .quantity import Quantity +from .sense import Sense +from .string import String +from .tabulardata import TabularData +from .time import Time +from .url import URL diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py new file mode 100644 index 00000000..df3d1ba9 --- /dev/null +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -0,0 +1,353 @@ +import copy +import re + +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class BaseDataType(object): + """ + The base class for all Wikibase data types, they inherit from it + """ + DTYPE = 'base-data-type' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}' . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, will be called by all data types. + :param value: Data value of the Wikibase data snak + :type value: str or int or tuple + :param prop_nr: The property number a Wikibase snak belongs to + :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int + :param data_type: The Wikibase data type declaration of this snak + :type data_type: str + :param snak_type: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or + unknown (somevalue). See Wikibase documentation. + :type snak_type: a str of either 'value', 'novalue' or 'somevalue' + :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, + e.g. references=[[, ], []] + This will create two references, the first one with two statements, the second with one + :type references: A one level nested list with instances of BaseDataType or children of it. + :param qualifiers: A list of qualifiers for the Wikibase mainsnak + :type qualifiers: A list with instances of BaseDataType or children of it. + :param is_reference: States if the snak is a reference, mutually exclusive with qualifier + :type is_reference: boolean + :param is_qualifier: States if the snak is a qualifier, mutually exlcusive with reference + :type is_qualifier: boolean + :param rank: The rank of a Wikibase mainsnak, should determine the status of a value + :type rank: A string of one of three allowed values: 'normal', 'deprecated', 'preferred' + :param check_qualifier_equality: When comparing two objects, test if qualifiers are equals between them. Default to true. + :type check_qualifier_equality: boolean + :param if_exists: Replace or append the statement. You can force an append if the statement already exists. + :type if_exists: A string of one of three allowed values: 'REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP' + :return: + """ + + self.value = value + self.data_type = kwargs.pop('data_type', self.DTYPE) + self.snak_type = kwargs.pop('snak_type', 'value') + self.references = kwargs.pop('references', None) + self.qualifiers = kwargs.pop('qualifiers', None) + self.is_reference = kwargs.pop('is_reference', None) + self.is_qualifier = kwargs.pop('is_qualifier', None) + self.rank = kwargs.pop('rank', 'normal') + self.check_qualifier_equality = kwargs.pop('check_qualifier_equality', True) + self.if_exists = kwargs.pop('if_exists', 'REPLACE') + + self._statement_ref_mode = 'KEEP_GOOD' + + if not self.references: + self.references = [] + else: + for ref_list in self.references: + for reference in ref_list: + if reference.is_reference is False: + raise ValueError('A reference can\'t be declared as is_reference=False') + elif reference.is_reference is None: + reference.is_reference = True + + if not self.qualifiers: + self.qualifiers = [] + else: + for qualifier in self.qualifiers: + if qualifier.is_qualifier is False: + raise ValueError('A qualifier can\'t be declared as is_qualifier=False') + elif qualifier.is_qualifier is None: + qualifier.is_qualifier = True + + if isinstance(prop_nr, int): + self.prop_nr = 'P' + str(prop_nr) + else: + pattern = re.compile(r'^P?([0-9]+)$') + matches = pattern.match(prop_nr) + + if not matches: + raise ValueError('Invalid prop_nr, format must be "P[0-9]+"') + else: + self.prop_nr = 'P' + str(matches.group(1)) + + # Internal ID and hash are issued by the Wikibase instance + self.id = '' + self.hash = '' + + self.json_representation = { + 'snaktype': self.snak_type, + 'property': self.prop_nr, + 'datavalue': {}, + 'datatype': self.data_type + } + + if self.snak_type not in ['value', 'novalue', 'somevalue']: + raise ValueError('{} is not a valid snak type'.format(self.snak_type)) + + if self.if_exists not in ['REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP']: + raise ValueError('{} is not a valid if_exists value'.format(self.if_exists)) + + if self.value is None and self.snak_type == 'value': + raise ValueError('Parameter \'value\' can\'t be \'None\' if \'snak_type\' is \'value\'') + + if self.is_qualifier and self.is_reference: + raise ValueError('A claim cannot be a reference and a qualifer at the same time') + if (len(self.references) > 0 or len(self.qualifiers) > 0) and (self.is_qualifier or self.is_reference): + raise ValueError('Qualifiers or references cannot have references or qualifiers') + + def has_equal_qualifiers(self, other): + # check if the qualifiers are equal with the 'other' object + equal_qualifiers = True + self_qualifiers = copy.deepcopy(self.get_qualifiers()) + other_qualifiers = copy.deepcopy(other.get_qualifiers()) + + if len(self_qualifiers) != len(other_qualifiers): + equal_qualifiers = False + else: + flg = [False for _ in range(len(self_qualifiers))] + for count, i in enumerate(self_qualifiers): + for q in other_qualifiers: + if i == q: + flg[count] = True + if not all(flg): + equal_qualifiers = False + + return equal_qualifiers + + def __eq__(self, other): + equal_qualifiers = self.has_equal_qualifiers(other) + equal_values = self.get_value() == other.get_value() and self.get_prop_nr() == other.get_prop_nr() + + if not (self.check_qualifier_equality and other.check_qualifier_equality) and equal_values: + return True + elif equal_values and equal_qualifiers: + return True + else: + return False + + @property + def statement_ref_mode(self): + return self._statement_ref_mode + + @statement_ref_mode.setter + def statement_ref_mode(self, value): + """Set the reference mode for a statement, always overrides the global reference state.""" + valid_values = ['STRICT_KEEP', 'STRICT_KEEP_APPEND', 'STRICT_OVERWRITE', 'KEEP_GOOD', 'CUSTOM'] + if value not in valid_values: + raise ValueError('Not an allowed reference mode, allowed values {}'.format(' '.join(valid_values))) + + self._statement_ref_mode = value + + def get_value(self): + return self.value + + def get_sparql_value(self): + return self.value + + def set_value(self, value): + if value is None and self.snak_type not in {'novalue', 'somevalue'}: + raise ValueError("If 'value' is None, snak_type must be novalue or somevalue") + if self.snak_type in {'novalue', 'somevalue'}: + del self.json_representation['datavalue'] + elif 'datavalue' not in self.json_representation: + self.json_representation['datavalue'] = {} + + self.value = value + + def get_references(self): + return self.references + + def set_references(self, references): + if len(references) > 0 and (self.is_qualifier or self.is_reference): + raise ValueError("Qualifiers or references cannot have references") + + # Force clean duplicate references + temp_references = [] + for reference in references: + if reference not in temp_references: + temp_references.append(reference) + references = temp_references + + self.references = references + + def get_qualifiers(self): + return self.qualifiers + + def set_qualifiers(self, qualifiers): + # TODO: introduce a check to prevent duplicate qualifiers, those are not allowed in Wikibase + if len(qualifiers) > 0 and (self.is_qualifier or self.is_reference): + raise ValueError("Qualifiers or references cannot have qualifiers") + + self.qualifiers = qualifiers + + def get_rank(self): + if self.is_qualifier or self.is_reference: + return '' + else: + return self.rank + + def set_rank(self, rank): + if self.is_qualifier or self.is_reference: + raise ValueError("References or qualifiers do not have ranks") + + valid_ranks = ['normal', 'deprecated', 'preferred'] + + if rank not in valid_ranks: + raise ValueError("{} not a valid rank".format(rank)) + + self.rank = rank + + def get_id(self): + return self.id + + def set_id(self, claim_id): + self.id = claim_id + + def set_hash(self, claim_hash): + self.hash = claim_hash + + def get_hash(self): + return self.hash + + def get_prop_nr(self): + return self.prop_nr + + def set_prop_nr(self, prop_nr): + if prop_nr[0] != 'P': + raise ValueError("Invalid property number") + + self.prop_nr = prop_nr + + def get_json_representation(self): + if self.is_qualifier or self.is_reference: + tmp_json = { + self.prop_nr: [self.json_representation] + } + if self.hash != '' and self.is_qualifier: + self.json_representation.update({'hash': self.hash}) + + return tmp_json + else: + ref_json = [] + for count, ref in enumerate(self.references): + snaks_order = [] + snaks = {} + ref_json.append({ + 'snaks': snaks, + 'snaks-order': snaks_order + }) + for sub_ref in ref: + prop_nr = sub_ref.get_prop_nr() + # set the hash for the reference block + if sub_ref.get_hash() != '': + ref_json[count].update({'hash': sub_ref.get_hash()}) + tmp_json = sub_ref.get_json_representation() + + # if more reference values with the same property number, append to its specific property list. + if prop_nr in snaks: + snaks[prop_nr].append(tmp_json[prop_nr][0]) + else: + snaks.update(tmp_json) + snaks_order.append(prop_nr) + + qual_json = {} + qualifiers_order = [] + for qual in self.qualifiers: + prop_nr = qual.get_prop_nr() + if prop_nr in qual_json: + qual_json[prop_nr].append(qual.get_json_representation()[prop_nr][0]) + else: + qual_json.update(qual.get_json_representation()) + qualifiers_order.append(qual.get_prop_nr()) + + if hasattr(self, 'remove'): + statement = { + 'remove': '' + } + else: + statement = { + 'mainsnak': self.json_representation, + 'type': 'statement', + 'rank': self.rank + } + if qual_json: + statement['qualifiers'] = qual_json + if qualifiers_order: + statement['qualifiers-order'] = qualifiers_order + if ref_json: + statement['references'] = ref_json + if self.id != '': + statement.update({'id': self.id}) + + return statement + + @classmethod + @JsonParser + def from_json(cls, json_representation): + pass + + def equals(self, that, include_ref=False, fref=None): + """ + Tests for equality of two statements. + If comparing references, the order of the arguments matters!!! + self is the current statement, the next argument is the new statement. + Allows passing in a function to use to compare the references 'fref'. Default is equality. + fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, + where each reference is a list of statements + """ + + if not include_ref: + # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers + return self == that + else: + if self != that: + return False + if fref is None: + return BaseDataType.refs_equal(self, that) + else: + return fref(self, that) + + @staticmethod + def refs_equal(olditem, newitem): + """ + tests for exactly identical references + """ + + oldrefs = olditem.references + newrefs = newitem.references + + def ref_equal(oldref, newref): + return True if (len(oldref) == len(newref)) and all(x in oldref for x in newref) else False + + if len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs): + return True + else: + return False + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py new file mode 100644 index 00000000..1e69754e --- /dev/null +++ b/wikibaseintegrator/datatypes/commonsmedia.py @@ -0,0 +1,54 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class CommonsMedia(BaseDataType): + """ + Implements the Wikibase data type for Wikimedia commons media files + """ + DTYPE = 'commonsMedia' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The media file name from Wikimedia commons to be used as the value + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + self.value = None + + super(CommonsMedia, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(CommonsMedia, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/externalid.py b/wikibaseintegrator/datatypes/externalid.py new file mode 100644 index 00000000..5dbc9d2d --- /dev/null +++ b/wikibaseintegrator/datatypes/externalid.py @@ -0,0 +1,52 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class ExternalID(BaseDataType): + """ + Implements the Wikibase data type 'external-id' + """ + DTYPE = 'external-id' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The string to be used as the value + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(ExternalID, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(ExternalID, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py new file mode 100644 index 00000000..ec66a9fe --- /dev/null +++ b/wikibaseintegrator/datatypes/form.py @@ -0,0 +1,72 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Form(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-form' + """ + DTYPE = 'wikibase-form' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The form number to serve as a value using the format "L-F
" (example: L252248-F2) + :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Form, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + if value is None: + self.value = value + else: + pattern = re.compile(r'^L[0-9]+-F[0-9]+$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid form ID ({}), format must be 'L[0-9]+-F[0-9]+'".format(value)) + + self.value = value + + self.json_representation['datavalue'] = { + 'value': { + 'entity-type': 'form', + 'id': self.value + }, + 'type': 'wikibase-entityid' + } + + super(Form, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py new file mode 100644 index 00000000..6e7eebe5 --- /dev/null +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -0,0 +1,68 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class GeoShape(BaseDataType): + """ + Implements the Wikibase data type 'geo-shape' + """ + DTYPE = 'geo-shape' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The GeoShape map file name in Wikimedia Commons to be linked + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(GeoShape, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + if value is None: + self.value = value + else: + # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map + pattern = re.compile(r'^Data:((?![:|#]).)+\.map$') + matches = pattern.match(value) + if not matches: + raise ValueError("Value must start with Data: and end with .map. In addition title should not contain characters like colon, hash or pipe.") + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(GeoShape, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py new file mode 100644 index 00000000..400e3b43 --- /dev/null +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -0,0 +1,90 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class GlobeCoordinate(BaseDataType): + """ + Implements the Wikibase data type for globe coordinates + """ + DTYPE = 'globe-coordinate' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^geo:wktLiteral . + }} + ''' + + def __init__(self, latitude, longitude, precision, prop_nr, globe=None, wikibase_url=None, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param latitude: Latitute in decimal format + :type latitude: float or None + :param longitude: Longitude in decimal format + :type longitude: float or None + :param precision: Precision of the position measurement + :type precision: float or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + globe = config['COORDINATE_GLOBE_QID'] if globe is None else globe + wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + + self.latitude = None + self.longitude = None + self.precision = None + self.globe = None + + if globe.startswith('Q'): + globe = wikibase_url + '/entity/' + globe + + value = (latitude, longitude, precision, globe) + + super(GlobeCoordinate, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + # TODO: Introduce validity checks for coordinates, etc. + # TODO: Add check if latitude/longitude/precision is None + self.latitude, self.longitude, self.precision, self.globe = value + + self.json_representation['datavalue'] = { + 'value': { + 'latitude': self.latitude, + 'longitude': self.longitude, + 'precision': self.precision, + 'globe': self.globe + }, + 'type': 'globecoordinate' + } + + self.value = (self.latitude, self.longitude, self.precision, self.globe) + super(GlobeCoordinate, self).set_value(value=self.value) + + def get_sparql_value(self): + return 'Point(' + str(self.latitude) + ', ' + str(self.longitude) + ')' + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(latitude=None, longitude=None, precision=None, prop_nr=jsn['property'], + snak_type=jsn['snaktype']) + + value = jsn['datavalue']['value'] + return cls(latitude=value['latitude'], longitude=value['longitude'], precision=value['precision'], + prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py new file mode 100644 index 00000000..2c59bbaf --- /dev/null +++ b/wikibaseintegrator/datatypes/item.py @@ -0,0 +1,75 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Item(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-item' with a value being another item ID + """ + DTYPE = 'wikibase-item' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/Q{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The item ID to serve as the value + :type value: str with a 'Q' prefix, followed by several digits or only the digits without the 'Q' prefix + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Item, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, (str, int)) or value is None, 'Expected str or int, found {} ({})'.format(type(value), value) + if value is None: + self.value = value + elif isinstance(value, int): + self.value = value + else: + pattern = re.compile(r'^Q?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) + else: + self.value = int(matches.group(1)) + + self.json_representation['datavalue'] = { + 'value': { + 'entity-type': 'item', + 'numeric-id': self.value, + 'id': 'Q{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } + + super(Item, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py new file mode 100644 index 00000000..182ace6c --- /dev/null +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -0,0 +1,75 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Lexeme(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-lexeme' + """ + DTYPE = 'wikibase-lexeme' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/L{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The lexeme number to serve as a value + :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Lexeme, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) + if value is None: + self.value = value + elif isinstance(value, int): + self.value = value + else: + pattern = re.compile(r'^L?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) + else: + self.value = int(matches.group(1)) + + self.json_representation['datavalue'] = { + 'value': { + 'entity-type': 'lexeme', + 'numeric-id': self.value, + 'id': 'L{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } + + super(Lexeme, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/math.py b/wikibaseintegrator/datatypes/math.py new file mode 100644 index 00000000..2f586aeb --- /dev/null +++ b/wikibaseintegrator/datatypes/math.py @@ -0,0 +1,52 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Math(BaseDataType): + """ + Implements the Wikibase data type 'math' for mathematical formula in TEX format + """ + DTYPE = 'math' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The string to be used as the value + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Math, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(Math, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py new file mode 100644 index 00000000..bcdc3e68 --- /dev/null +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -0,0 +1,79 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class MonolingualText(BaseDataType): + """ + Implements the Wikibase data type for Monolingual Text strings + """ + DTYPE = 'monolingualtext' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> {value} . + }} + ''' + + def __init__(self, text, prop_nr, language=None, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param text: The language specific string to be used as the value + :type text: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param language: Specifies the language the value belongs to + :type language: str + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + self.text = None + self.language = config['DEFAULT_LANGUAGE'] if language is None else language + + value = (text, self.language) + + super(MonolingualText, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + self.text, self.language = value + if self.text is not None: + assert isinstance(self.text, str) or self.text is None, "Expected str, found {} ({})".format(type(self.text), self.text) + elif self.snak_type == 'value': + raise ValueError("Parameter 'text' can't be 'None' if 'snak_type' is 'value'") + assert isinstance(self.language, str), "Expected str, found {} ({})".format(type(self.language), self.language) + + self.json_representation['datavalue'] = { + 'value': { + 'text': self.text, + 'language': self.language + }, + 'type': 'monolingualtext' + } + + self.value = (self.text, self.language) + super(MonolingualText, self).set_value(value=self.value) + + def get_sparql_value(self): + return '"' + self.text.replace('"', r'\"') + '"@' + self.language + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(text=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + + value = jsn['datavalue']['value'] + return cls(text=value['text'], prop_nr=jsn['property'], language=value['language']) diff --git a/wikibaseintegrator/datatypes/musicalnotation.py b/wikibaseintegrator/datatypes/musicalnotation.py new file mode 100644 index 00000000..e8a4a380 --- /dev/null +++ b/wikibaseintegrator/datatypes/musicalnotation.py @@ -0,0 +1,52 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class MusicalNotation(BaseDataType): + """ + Implements the Wikibase data type 'musical-notation' + """ + DTYPE = 'musical-notation' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: Values for that data type are strings describing music following LilyPond syntax. + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(MusicalNotation, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(MusicalNotation, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py new file mode 100644 index 00000000..aad1de1b --- /dev/null +++ b/wikibaseintegrator/datatypes/property.py @@ -0,0 +1,75 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Property(BaseDataType): + """ + Implements the Wikibase data type 'property' + """ + DTYPE = 'wikibase-property' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/P{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The property number to serve as a value + :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Property, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) + if value is None: + self.value = value + elif isinstance(value, int): + self.value = value + else: + pattern = re.compile(r'^P?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) + else: + self.value = int(matches.group(1)) + + self.json_representation['datavalue'] = { + 'value': { + 'entity-type': 'property', + 'numeric-id': self.value, + 'id': 'P{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } + + super(Property, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py new file mode 100644 index 00000000..400af310 --- /dev/null +++ b/wikibaseintegrator/datatypes/quantity.py @@ -0,0 +1,134 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Quantity(BaseDataType): + """ + Implements the Wikibase data type for quantities + """ + DTYPE = 'quantity' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:decimal . + }} + ''' + + def __init__(self, quantity, prop_nr, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param quantity: The quantity value + :type quantity: float, str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations + :type upper_bound: float, str + :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations + :type lower_bound: float, str + :param unit: The unit item URL or the QID a certain quantity has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). + The default is dimensionless, represented by a '1' + :type unit: str + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + + if unit.startswith('Q'): + unit = wikibase_url + '/entity/' + unit + + self.quantity = None + self.unit = None + self.upper_bound = None + self.lower_bound = None + + value = (quantity, unit, upper_bound, lower_bound) + + super(Quantity, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + self.quantity, self.unit, self.upper_bound, self.lower_bound = value + + if self.quantity is not None: + self.quantity = self.format_amount(self.quantity) + self.unit = str(self.unit) + if self.upper_bound: + self.upper_bound = self.format_amount(self.upper_bound) + if self.lower_bound: + self.lower_bound = self.format_amount(self.lower_bound) + + # Integrity checks for value and bounds + try: + for i in [self.quantity, self.upper_bound, self.lower_bound]: + if i: + float(i) + except ValueError: + raise ValueError("Value, bounds and units must parse as integers or float") + + if (self.lower_bound and self.upper_bound) and (float(self.lower_bound) > float(self.upper_bound) + or float(self.lower_bound) > float(self.quantity)): + raise ValueError("Lower bound too large") + + if self.upper_bound and float(self.upper_bound) < float(self.quantity): + raise ValueError("Upper bound too small") + elif self.snak_type == 'value': + raise ValueError("Parameter 'quantity' can't be 'None' if 'snak_type' is 'value'") + + self.json_representation['datavalue'] = { + 'value': { + 'amount': self.quantity, + 'unit': self.unit, + 'upperBound': self.upper_bound, + 'lowerBound': self.lower_bound + }, + 'type': 'quantity' + } + + # remove bounds from json if they are undefined + if not self.upper_bound: + del self.json_representation['datavalue']['value']['upperBound'] + + if not self.lower_bound: + del self.json_representation['datavalue']['value']['lowerBound'] + + self.value = (self.quantity, self.unit, self.upper_bound, self.lower_bound) + super(Quantity, self).set_value(value=self.value) + + def get_sparql_value(self): + return self.quantity + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(quantity=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + + value = jsn['datavalue']['value'] + upper_bound = value['upperBound'] if 'upperBound' in value else None + lower_bound = value['lowerBound'] if 'lowerBound' in value else None + return cls(quantity=value['amount'], prop_nr=jsn['property'], upper_bound=upper_bound, lower_bound=lower_bound, unit=value['unit']) + + @staticmethod + def format_amount(amount): + # Remove .0 by casting to int + if float(amount) % 1 == 0: + amount = int(float(amount)) + + # Adding prefix + for positive number and 0 + if not str(amount).startswith('+') and float(amount) >= 0: + amount = str('+{}'.format(amount)) + + # return as string + return str(amount) diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py new file mode 100644 index 00000000..a1633bc7 --- /dev/null +++ b/wikibaseintegrator/datatypes/sense.py @@ -0,0 +1,72 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Sense(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-sense' + """ + DTYPE = 'wikibase-sense' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: Value using the format "L-S" (example: L252248-S123) + :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(Sense, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + if value is None: + self.value = value + else: + pattern = re.compile(r'^L[0-9]+-S[0-9]+$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid sense ID ({}), format must be 'L[0-9]+-S[0-9]+'".format(value)) + + self.value = value + + self.json_representation['datavalue'] = { + 'value': { + 'entity-type': 'sense', + 'id': self.value + }, + 'type': 'wikibase-entityid' + } + + super(Sense, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py new file mode 100644 index 00000000..6ec2bd4f --- /dev/null +++ b/wikibaseintegrator/datatypes/string.py @@ -0,0 +1,53 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class String(BaseDataType): + """ + Implements the Wikibase data type 'string' + """ + + DTYPE = 'string' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The string to be used as the value + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(String, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(String, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py new file mode 100644 index 00000000..91d4b58f --- /dev/null +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -0,0 +1,60 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class TabularData(BaseDataType): + """ + Implements the Wikibase data type 'tabular-data' + """ + DTYPE = 'tabular-data' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: Reference to tabular data file on Wikimedia Commons. + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(TabularData, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + if value is None: + self.value = value + else: + # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab + pattern = re.compile(r'^Data:((?![:|#]).)+\.tab$') + matches = pattern.match(value) + if not matches: + raise ValueError("Value must start with Data: and end with .tab. In addition title should not contain characters like colon, hash or pipe.") + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(TabularData, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py new file mode 100644 index 00000000..c08e1c87 --- /dev/null +++ b/wikibaseintegrator/datatypes/time.py @@ -0,0 +1,116 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class Time(BaseDataType): + """ + Implements the Wikibase data type with date and time values + """ + DTYPE = 'time' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:dateTime . + }} + ''' + + def __init__(self, time, prop_nr, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 + :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param before: explicit integer value for how many units after the given time it could be. + The unit is given by the precision. + :type before: int + :param after: explicit integer value for how many units before the given time it could be. + The unit is given by the precision. + :type after: int + :param precision: Precision value for dates and time as specified in the Wikibase data model + (https://www.wikidata.org/wiki/Special:ListDatatypes#time) + :type precision: int + :param timezone: The timezone which applies to the date and time as specified in the Wikibase data model + :type timezone: int + :param calendarmodel: The calendar model used for the date. URL to the Wikibase calendar model item or the QID. + :type calendarmodel: str + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + calendarmodel = config['CALENDAR_MODEL_QID'] if calendarmodel is None else calendarmodel + wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + + self.time = None + self.before = None + self.after = None + self.precision = None + self.timezone = None + self.calendarmodel = None + + if calendarmodel.startswith('Q'): + calendarmodel = wikibase_url + '/entity/' + calendarmodel + + value = (time, before, after, precision, timezone, calendarmodel) + + super(Time, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel = value + assert isinstance(self.time, str) or self.time is None, "Expected str, found {} ({})".format(type(self.time), self.time) + + if self.time is not None: + if not (self.time.startswith("+") or self.time.startswith("-")): + self.time = "+" + self.time + pattern = re.compile(r'^[+-][0-9]*-(?:1[0-2]|0[0-9])-(?:3[01]|0[0-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') + matches = pattern.match(self.time) + if not matches: + raise ValueError("Time time must be a string in the following format: '+%Y-%m-%dT%H:%M:%SZ'") + self.value = value + if self.precision < 0 or self.precision > 15: + raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") + elif self.snak_type == 'value': + raise ValueError("Parameter 'time' can't be 'None' if 'snak_type' is 'value'") + + self.json_representation['datavalue'] = { + 'value': { + 'time': self.time, + 'before': self.before, + 'after': self.after, + 'precision': self.precision, + 'timezone': self.timezone, + 'calendarmodel': self.calendarmodel + }, + 'type': 'time' + } + + self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) + super(Time, self).set_value(value=self.value) + + def get_sparql_value(self): + return self.time + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(time=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + + value = jsn['datavalue']['value'] + return cls(time=value['time'], prop_nr=jsn['property'], before=value['before'], after=value['after'], precision=value['precision'], timezone=value['timezone'], + calendarmodel=value['calendarmodel']) diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py new file mode 100644 index 00000000..f6df7a6d --- /dev/null +++ b/wikibaseintegrator/datatypes/url.py @@ -0,0 +1,68 @@ +import re + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_jsonparser import JsonParser + + +class URL(BaseDataType): + """ + Implements the Wikibase data type for URL strings + """ + DTYPE = 'url' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{value}> . + }} + ''' + + def __init__(self, value, prop_nr, **kwargs): + """ + Constructor, calls the superclass BaseDataType + :param value: The URL to be used as the value + :type value: str or None + :param prop_nr: The item ID for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param is_reference: Whether this snak is a reference + :type is_reference: boolean + :param is_qualifier: Whether this snak is a qualifier + :type is_qualifier: boolean + :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' + :type snak_type: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super(URL, self).__init__(value=value, prop_nr=prop_nr, **kwargs) + + self.set_value(value) + + def set_value(self, value): + assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + if value is None: + self.value = value + else: + pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid URL {}".format(value)) + self.value = value + + self.json_representation['datavalue'] = { + 'value': self.value, + 'type': 'string' + } + + super(URL, self).set_value(value=self.value) + + @classmethod + @JsonParser + def from_json(cls, jsn): + if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': + return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/entities/__init__.py b/wikibaseintegrator/entities/__init__.py new file mode 100644 index 00000000..15bf3a41 --- /dev/null +++ b/wikibaseintegrator/entities/__init__.py @@ -0,0 +1,3 @@ +from .item import Item +from .lexeme import Lexeme +from .property import Property diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py new file mode 100644 index 00000000..7cd7801c --- /dev/null +++ b/wikibaseintegrator/entities/baseentity.py @@ -0,0 +1,320 @@ +import copy +from pprint import pprint + +import simplejson + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.models.claims import Claims +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError + + +class BaseEntity(object): + distinct_value_props = {} + + def __init__(self, api, **kwargs): + self.api = api + + self.lastrevid = kwargs.pop('lastrevid', None) + self.type = kwargs.pop('type', None) + self.id = kwargs.pop('id', None) + self.claims = kwargs.pop('claims', Claims()) + + # WikibaseIntegrator specific + self.entity_type = kwargs.pop('entity_type', None) + self.new_item = kwargs.pop('new_item', False) + if self.id and self.new_item: + raise ValueError("Cannot create a new item, when an identifier is given.") + elif not self.id: + self.new_item = True + + self.json = {} + self.statements = [] + + if self.api.search_only: + self.require_write = False + else: + self.require_write = True + + def get_json(self) -> {}: + return { + 'type': self.type, + 'id': self.id, + 'claims': self.claims.get_json() + } + + def from_json(self, json_data): + self.json = json_data + + self.lastrevid = json_data['lastrevid'] + self.type = json_data['type'] + self.id = json_data['id'] + self.claims = Claims().from_json(json_data['claims']) + + self.new_item = False + + def get(self, entity_id): + """ + retrieve an item in json representation from the Wikibase instance + :rtype: dict + :return: python complex dictionary representation of a json + """ + + params = { + 'action': 'wbgetentities', + 'ids': entity_id, + 'format': 'json' + } + + json_data = self.api.mediawiki_api_call_helper(data=params, mediawiki_api_url=self.api.mediawiki_api_url, allow_anonymous=True) + return json_data + + def _write(self, data=None, summary=''): + """ + Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. + For new items, also returns the new QIDs. + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param bot_account: Tell the Wikidata API whether the script should be run as part of a bot account or not. + :type bot_account: bool + :param summary: A short (max 250 characters) summary of the purpose of the edit. This will be displayed as the revision summary of the item. + :type summary: str + :param entity_type: Decides wether the object will become a 'form', 'item' (default), 'lexeme', 'property' or 'sense' + :type entity_type: str + :param property_datatype: When payload_type is 'property' then this parameter set the datatype for the property + :type property_datatype: str + :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times + :type max_retries: int + :param retry_after: Number of seconds to wait before retrying request (see max_retries) + :type retry_after: int + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :return: the entity ID on successful write + """ + + if self.api.search_only: + raise SearchOnlyError + + if data is None: + raise ValueError + + # if all_claims: + # data = json.JSONEncoder().encode(self.json_representation) + # else: + # new_json_repr = {k: self.json_representation[k] for k in set(list(self.json_representation.keys())) - {'claims'}} + # new_json_repr['claims'] = {} + # for claim in self.json_representation['claims']: + # if [True for x in self.json_representation['claims'][claim] if 'id' not in x or 'remove' in x]: + # new_json_repr['claims'][claim] = copy.deepcopy(self.json_representation['claims'][claim]) + # for statement in new_json_repr['claims'][claim]: + # if 'id' in statement and 'remove' not in statement: + # new_json_repr['claims'][claim].remove(statement) + # if not new_json_repr['claims'][claim]: + # new_json_repr['claims'].pop(claim) + # data = json.JSONEncoder().encode(new_json_repr) + + data = simplejson.JSONEncoder().encode(data) + + payload = { + 'action': 'wbeditentity', + 'data': data, + 'format': 'json', + 'token': self.api.login.get_edit_token(), + 'summary': summary + } + + if config['MAXLAG'] > 0: + payload.update({'maxlag': config['MAXLAG']}) + + if self.api.is_bot: + payload.update({'bot': ''}) + + if self.new_item: + payload.update({u'new': self.entity_type}) + else: + payload.update({u'id': self.id}) + + if self.api.debug: + print(payload) + + try: + json_data = self.api.mediawiki_api_call_helper(data=payload, login=self.api.login, mediawiki_api_url=self.api.mediawiki_api_url, allow_anonymous=False) + + if 'error' in json_data and 'messages' in json_data['error']: + error_msg_names = set(x.get('name') for x in json_data['error']['messages']) + if 'wikibase-validator-label-with-description-conflict' in error_msg_names: + raise NonUniqueLabelDescriptionPairError(json_data) + else: + raise MWApiError(json_data) + elif 'error' in json_data.keys(): + raise MWApiError(json_data) + except Exception: + print('Error while writing to the Wikibase instance') + raise + + # after successful write, update this object with latest json, QID and parsed data types. + self.id = json_data['entity']['id'] + if 'success' in json_data and 'entity' in json_data and 'lastrevid' in json_data['entity']: + self.lastrevid = json_data['entity']['lastrevid'] + pprint(json_data) + return json_data['entity'] + + def __construct_claim_json(self): + """ + Writes the properties from self.data to a new or existing json in self.json_representation + :return: None + """ + + def handle_qualifiers(old_item, new_item): + if not new_item.check_qualifier_equality: + old_item.set_qualifiers(new_item.get_qualifiers()) + + def is_good_ref(ref_block): + prop_nrs = [x.get_prop_nr() for x in ref_block] + values = [x.get_value() for x in ref_block] + good_ref = True + prop_value_map = dict(zip(prop_nrs, values)) + + # if self.good_refs has content, use these to determine good references + if self.good_refs and len(self.good_refs) > 0: + found_good = True + for rblock in self.good_refs: + + if not all([k in prop_value_map for k, v in rblock.items()]): + found_good = False + + if not all([v in prop_value_map[k] for k, v in rblock.items() if v]): + found_good = False + + if found_good: + return True + + return False + + return good_ref + + def handle_references(old_item, new_item): + """ + Local function to handle references + :param old_item: An item containing the data as currently in the Wikibase instance + :type old_item: A child of BaseDataType + :param new_item: An item containing the new data which should be written to the Wikibase instance + :type new_item: A child of BaseDataType + """ + + old_references = old_item.get_references() + new_references = new_item.get_references() + + if sum(map(lambda z: len(z), old_references)) == 0 or self.global_ref_mode == 'STRICT_OVERWRITE': + old_item.set_references(new_references) + + elif self.global_ref_mode == 'STRICT_KEEP' or new_item.statement_ref_mode == 'STRICT_KEEP': + pass + + elif self.global_ref_mode == 'STRICT_KEEP_APPEND' or new_item.statement_ref_mode == 'STRICT_KEEP_APPEND': + old_references.extend(new_references) + old_item.set_references(old_references) + + elif self.global_ref_mode == 'KEEP_GOOD' or new_item.statement_ref_mode == 'KEEP_GOOD': + # Copy only good_ref + refs = [x for x in old_references if is_good_ref(x)] + + # Don't add already existing references + for new_ref in new_references: + if new_ref not in old_references: + refs.append(new_ref) + + # Set the references + old_item.set_references(refs) + + # sort the incoming data according to the property number + self.data.sort(key=lambda z: z.get_prop_nr().lower()) + + # collect all statements which should be deleted because of an empty value + statements_for_deletion = [] + for item in self.data: + if isinstance(item, BaseDataType) and item.get_value() == '': + statements_for_deletion.append(item.get_prop_nr()) + + if self.create_new_item: + self.statements = copy.copy(self.data) + else: + for stat in self.data: + prop_nr = stat.get_prop_nr() + + prop_data = [x for x in self.statements if x.get_prop_nr() == prop_nr] + if prop_data and stat.if_exists == 'KEEP': + continue + prop_pos = [x.get_prop_nr() == prop_nr for x in self.statements] + prop_pos.reverse() + insert_pos = len(prop_pos) - (prop_pos.index(True) if any(prop_pos) else 0) + + # If value should be appended, check if values exists, if not, append + if 'APPEND' in stat.if_exists: + equal_items = [stat == x for x in prop_data] + if True not in equal_items or stat.if_exists == 'FORCE_APPEND': + self.statements.insert(insert_pos + 1, stat) + else: + # if item exists, modify rank + current_item = prop_data[equal_items.index(True)] + current_item.set_rank(stat.get_rank()) + handle_references(old_item=current_item, new_item=stat) + handle_qualifiers(old_item=current_item, new_item=stat) + continue + + # set all existing values of a property for removal + for x in prop_data: + # for deletion of single statements, do not set all others to delete + if hasattr(stat, 'remove'): + break + elif x.get_id() and not hasattr(x, 'retain'): + # keep statements with good references if keep_good_ref_statements is True + if self.keep_good_ref_statements: + if any([is_good_ref(r) for r in x.get_references()]): + setattr(x, 'retain', '') + else: + setattr(x, 'remove', '') + + match = [] + for i in prop_data: + if stat == i and hasattr(stat, 'remove'): + match.append(True) + setattr(i, 'remove', '') + elif stat == i: + match.append(True) + setattr(i, 'retain', '') + if hasattr(i, 'remove'): + delattr(i, 'remove') + handle_references(old_item=i, new_item=stat) + handle_qualifiers(old_item=i, new_item=stat) + + i.set_rank(rank=stat.get_rank()) + # if there is no value, do not add an element, this is also used to delete whole properties. + elif i.get_value(): + match.append(False) + + if True not in match and not hasattr(stat, 'remove'): + self.statements.insert(insert_pos + 1, stat) + + # For whole property deletions, add remove flag to all statements which should be deleted + for item in copy.deepcopy(self.statements): + if item.get_prop_nr() in statements_for_deletion: + if item.get_id() != '': + setattr(item, 'remove', '') + else: + self.statements.remove(item) + + # regenerate claim json + self.json_representation['claims'] = {} + for stat in self.statements: + prop_nr = stat.get_prop_nr() + if prop_nr not in self.json_representation['claims']: + self.json_representation['claims'][prop_nr] = [] + self.json_representation['claims'][prop_nr].append(stat.get_json_representation()) + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs="\r\n\t ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py new file mode 100644 index 00000000..173c4115 --- /dev/null +++ b/wikibaseintegrator/entities/item.py @@ -0,0 +1,51 @@ +from __future__ import annotations + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels +from wikibaseintegrator.models.sitelinks import Sitelinks + + +class Item(BaseEntity): + def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: + self.api = api + + super(Item, self).__init__(api=self.api, entity_type='item', **kwargs) + + # Item and property specific + self.labels = labels or Labels() + self.descriptions = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + # Item specific + self.sitelinks = sitelinks or Sitelinks() + + def new(self) -> Item: + return Item(self.api) + + def get(self, entity_id) -> Item: + json_data = super(Item, self).get(entity_id=entity_id) + return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_json(self) -> {}: + return { + 'labels': self.labels.get_json(), + 'descriptions': self.descriptions.get_json(), + 'aliases': self.aliases.get_json(), + **super(Item, self).get_json() + } + + def from_json(self, json_data) -> Item: + super(Item, self).from_json(json_data=json_data) + + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + self.aliases = Aliases().from_json(json_data['aliases']) + self.sitelinks = Sitelinks().from_json(json_data['sitelinks']) + + return self + + def write(self): + json_data = super(Item, self)._write(data=self.get_json()) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py new file mode 100644 index 00000000..0b5a178a --- /dev/null +++ b/wikibaseintegrator/entities/lexeme.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.forms import Forms +from wikibaseintegrator.models.lemmas import Lemmas +from wikibaseintegrator.models.senses import Senses + + +class Lexeme(BaseEntity): + def __init__(self, api, lemmas=None, lexical_category=None, language=None, forms=None, senses=None, **kwargs): + self.api = api + + super().__init__(api=self.api, entity_type='lexeme', **kwargs) + + self.lemmas = lemmas or Lemmas() + self.lexicalCategory = lexical_category + self.language = language or self.api.language + self.forms = forms or Forms() + self.senses = senses or Senses() + + def get(self, entity_id) -> Lexeme: + json_data = super(Lexeme, self).get(entity_id=entity_id) + return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) + + def set(self, **kwargs) -> Lexeme: + self.__init__(self.api, **kwargs) + return self + + def get_json(self) -> {}: + return { + 'lemmas': self.lemmas.get_json(), + 'lexicalCategory': self.lexicalCategory, + 'language': self.language, + 'forms': self.forms.get_json(), + 'senses': self.senses.get_json(), + **super(Lexeme, self).get_json() + } + + def from_json(self, json_data) -> Lexeme: + super(Lexeme, self).from_json(json_data=json_data) + + self.lemmas = Lemmas().from_json(json_data['lemmas']) + self.lexicalCategory = json_data['lexicalCategory'] + self.language = json_data['language'] + self.forms = Forms().from_json(json_data['forms']) + self.senses = Senses().from_json(json_data['senses']) + + return self + + def write(self): + json_data = super(Lexeme, self)._write(data=self.get_json()) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py new file mode 100644 index 00000000..4a14dd71 --- /dev/null +++ b/wikibaseintegrator/entities/property.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels + + +class Property(BaseEntity): + def __init__(self, api, datatype=None, labels=None, descriptions=None, aliases=None, **kwargs): + self.api = api + + super().__init__(api=api, entity_type='property', **kwargs) + + self.json = None + + # Property specific + self.datatype = datatype + + # Items and property specific + self.labels = labels or Labels() + self.descriptions = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + def get(self, entity_id) -> Property: + json_data = super(Property, self).get(entity_id=entity_id) + return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) + + def from_json(self, json_data) -> Property: + super(Property, self).from_json(json_data=json_data) + + self.datatype = json_data['datatype'] + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + self.aliases = Aliases().from_json(json_data['aliases']) + + return self diff --git a/wikibaseintegrator/models/__init__.py b/wikibaseintegrator/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py new file mode 100644 index 00000000..7f974ede --- /dev/null +++ b/wikibaseintegrator/models/aliases.py @@ -0,0 +1,47 @@ +from wikibaseintegrator.models.language_values import LanguageValue + + +class Aliases: + def __init__(self, language=None, value=None): + self.aliases = {} + + if language is not None: + self.set(language=language, value=value) + + def get(self, language=None): + return self.aliases[language] + + def set(self, language=None, value=None): + if language not in self.aliases: + self.aliases[language] = [] + alias = Alias(language, value) + self.aliases[language].append(alias) + return alias + + def get_json(self) -> []: + json_data = {} + for language in self.aliases: + if language not in json_data: + json_data[language] = [] + for alias in self.aliases[language]: + json_data[language].append(alias.get_json()) + return json_data + + def from_json(self, json_data): + for language in json_data: + for alias in json_data[language]: + self.set(alias['language'], alias['value']) + + return self + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Alias(LanguageValue): + pass diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py new file mode 100644 index 00000000..57337689 --- /dev/null +++ b/wikibaseintegrator/models/claims.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Claims: + def __init__(self, api=None): + self.claims = {} + self.api = api + + def get(self, property=None) -> dict: + return self.claims[property] + + def add(self, claims=None) -> Claims: + if isinstance(claims, BaseDataType): + claims = [claims] + elif not isinstance(claims, list): + raise ValueError + + for claim in claims: + property = claim.prop_nr + if claim is not None: + assert isinstance(claim, BaseDataType) + + if property is None: + property = claim.prop_nr + + if property not in self.claims: + self.claims[property] = {} + + self.claims[property][claim.id] = claim + + return self + + def get_json(self) -> {}: + json_data = {} + for property in self.claims: + if property not in json_data: + json_data[property] = [] + for claim in self.claims[property]: + json_data[property].append(self.claims[property][claim].get_json_representation()) + return json_data + + def from_json(self, json_data) -> Claims: + for property in json_data: + for alias in json_data[property]: + data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == alias['mainsnak']['datatype']][0] + self.add(data_type.from_json(alias)) + + return self + + def __repr__(self) -> str: + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + def __iter__(self): + return iter(self.claims.values()) diff --git a/wikibaseintegrator/models/descriptions.py b/wikibaseintegrator/models/descriptions.py new file mode 100644 index 00000000..e3d03641 --- /dev/null +++ b/wikibaseintegrator/models/descriptions.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.models.language_values import LanguageValues + + +class Descriptions(LanguageValues): + pass diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py new file mode 100644 index 00000000..eb80f8aa --- /dev/null +++ b/wikibaseintegrator/models/forms.py @@ -0,0 +1,57 @@ +from wikibaseintegrator.models.claims import Claims + + +class Forms: + def __init__(self): + self.forms = {} + + def get(self, id): + return self.forms[id] + + def add(self, form): + self.forms[form.id] = form + + def get_json(self) -> []: + json_data = [] + for form in self.forms: + json_data.append(self.forms[form].get_json()) + return json_data + + def from_json(self, json_data): + for form in json_data: + self.add(Form(form_id=form['id'], representations=form['representations'], grammatical_features=form['grammaticalFeatures'], + claims=Claims().from_json(form['claims']))) + + return self + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Form: + def __init__(self, form_id=None, representations=None, grammatical_features=None, claims=None): + self.id = form_id + self.representations = representations + self.grammatical_features = grammatical_features + self.claims = claims + + def get_json(self) -> {}: + return { + 'id': self.id, + 'representations': self.representations.get_json(), + 'grammaticalFeatures': self.grammatical_features.get_json(), + 'claims': self.claims.get_json() + } + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/models/labels.py b/wikibaseintegrator/models/labels.py new file mode 100644 index 00000000..9d092364 --- /dev/null +++ b/wikibaseintegrator/models/labels.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.models.language_values import LanguageValues + + +class Labels(LanguageValues): + pass diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py new file mode 100644 index 00000000..29e6c368 --- /dev/null +++ b/wikibaseintegrator/models/language_values.py @@ -0,0 +1,65 @@ +class LanguageValues: + def __init__(self): + self.values = {} + + def get(self, language=None): + return self.values[language] + + def set(self, language=None, value=None): + language_value = LanguageValue(language, value) + self.values[language] = language_value + return language_value + + def get_json(self) -> {}: + json_data = {} + for value in self.values: + json_data[value] = self.values[value].get_json() + return json_data + + def from_json(self, json_data): + for language_value in json_data: + self.set(language=json_data[language_value]['language'], value=json_data[language_value]['value']) + + return self + + def __iter__(self): + return iter(self.values.values()) + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class LanguageValue: + def __init__(self, language=None, value=None): + self.language = language + self.value = value + self.removed = False + + def remove(self): + self.removed = True + return self + + def get_json(self) -> {}: + json_data = { + 'language': self.language, + 'value': self.value + } + if self.removed: + json_data['remove'] = '' + return json_data + + def __str__(self): + return self.value + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/models/lemmas.py b/wikibaseintegrator/models/lemmas.py new file mode 100644 index 00000000..3ad03ebf --- /dev/null +++ b/wikibaseintegrator/models/lemmas.py @@ -0,0 +1,5 @@ +from wikibaseintegrator.models.language_values import LanguageValues + + +class Lemmas(LanguageValues): + pass diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py new file mode 100644 index 00000000..3d711d2c --- /dev/null +++ b/wikibaseintegrator/models/qualifiers.py @@ -0,0 +1,23 @@ +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Qualifiers: + def __init__(self): + self.qualifiers = {} + + def get(self, property=None): + return self.qualifiers[property] + + def add(self, property=None, qualifier=None): + if qualifier is not None: + assert isinstance(qualifier, BaseDataType) + + if property is None: + property = qualifier.prop_nr + + if property not in self.qualifiers: + self.qualifiers[property] = {} + + self.qualifiers[property] = qualifier + + return self diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py new file mode 100644 index 00000000..3df856ec --- /dev/null +++ b/wikibaseintegrator/models/senses.py @@ -0,0 +1,67 @@ +from wikibaseintegrator.models.claims import Claims +from wikibaseintegrator.models.language_values import LanguageValues + + +class Senses: + def __init__(self): + self.senses = {} + + def get(self, id): + return self.senses[id] + + def add(self, form): + self.senses[form.id] = form + + def get_json(self) -> []: + json_data = [] + for sense in self.senses: + json_data.append(self.senses[sense].get_json()) + return json_data + + def from_json(self, json_data): + for sense in json_data: + self.add(Sense(form_id=sense['id'], glosses=Glosses().from_json(sense['glosses']), claims=Claims().from_json(sense['claims']))) + + return self + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Sense: + def __init__(self, form_id=None, glosses=None, claims=None): + self.id = form_id + self.glosses = glosses or Glosses() + self.claims = claims or Claims() + self.removed = False + + def get_json(self) -> {}: + json_data = { + 'id': self.id, + 'glosses': self.glosses.get_json(), + 'claims': self.claims.get_json() + } + if self.removed: + json_data['remove'] = '' + return json_data + + def remove(self): + self.removed = True + return self + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Glosses(LanguageValues): + pass diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py new file mode 100644 index 00000000..eaaa76cb --- /dev/null +++ b/wikibaseintegrator/models/sitelinks.py @@ -0,0 +1,46 @@ +class Sitelinks: + def __init__(self): + self.sitelinks = {} + + def get(self, site=None): + if site in self.sitelinks: + return self.sitelinks[site] + else: + return None + + def set(self, site=None, title=None, badges=None): + sitelink = Sitelink(site, title, badges) + self.sitelinks[site] = sitelink + return sitelink + + def from_json(self, json_data): + for sitelink in json_data: + self.set(site=json_data[sitelink]['site'], title=json_data[sitelink]['title'], badges=json_data[sitelink]['badges']) + + return self + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Sitelink: + def __init__(self, site=None, title=None, badges=None): + self.site = site + self.title = title + self.badges = badges + + def __str__(self): + return self.title + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/wbi_api.py b/wikibaseintegrator/wbi_api.py new file mode 100644 index 00000000..175f511e --- /dev/null +++ b/wikibaseintegrator/wbi_api.py @@ -0,0 +1,449 @@ +import datetime +from time import sleep + +import requests + +from wikibaseintegrator.datatypes import BaseDataType +from wikibaseintegrator.wbi_backoff import wbi_backoff +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError +from wikibaseintegrator.wbi_fastrun import FastRunContainer + + +class Api(object): + fast_run_store = [] + + def __init__(self, mediawiki_api_url, mediawiki_index_url, mediawiki_rest_url, sparql_endpoint_url, wikibase_url, property_constraint_pid, distinct_values_constraint_qid, + search_only, fast_run, fast_run_base_filter, fast_run_use_refs, fast_run_case_insensitive, is_bot, language, login, debug=None): + self.mediawiki_api_url = mediawiki_api_url + self.mediawiki_index_url = mediawiki_index_url + self.mediawiki_rest_url = mediawiki_rest_url + self.sparql_endpoint_url = sparql_endpoint_url + self.wikibase_url = wikibase_url + self.property_constraint_pid = property_constraint_pid + self.distinct_values_constraint_qid = distinct_values_constraint_qid + self.search_only = search_only + + # Fast Run + self.fast_run = fast_run + self.fast_run_base_filter = fast_run_base_filter + self.fast_run_use_refs = fast_run_use_refs + self.fast_run_case_insensitive = fast_run_case_insensitive + + self.is_bot = is_bot + self.language = language + self.login = login + self.debug = debug or config['DEBUG'] + + self.fast_run_container = None + + if self.fast_run_case_insensitive and not self.search_only: + raise ValueError("If using fast run case insensitive, search_only must be set") + + if self.fast_run: + self.init_fastrun() + # if self.debug: + # if self.require_write: + # if self.search_only: + # print("Successful fastrun, search_only mode, we can't determine if data is up to date.") + # else: + # print("Successful fastrun, because no full data match you need to update the item.") + # else: + # print("Successful fastrun, no write to Wikibase instance required.") + + def init_fastrun(self): + print('Initialize Fast Run') + # We search if we already have a FastRunContainer with the same parameters to re-use it + for c in Api.fast_run_store: + if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): + self.fast_run_container = c + self.fast_run_container.current_qid = '' + self.fast_run_container.base_data_type = BaseDataType + self.fast_run_container.mediawiki_api_url = self.mediawiki_api_url + self.fast_run_container.wikibase_url = self.wikibase_url + if self.debug: + print("Found an already existing FastRunContainer") + + if not self.fast_run_container: + if self.debug: + print("Create a new FastRunContainer") + self.fast_run_container = FastRunContainer(api=self, + base_filter=self.fast_run_base_filter, + use_refs=self.fast_run_use_refs, + sparql_endpoint_url=self.sparql_endpoint_url, + base_data_type=BaseDataType, + mediawiki_api_url=self.mediawiki_api_url, + wikibase_url=self.wikibase_url, + case_insensitive=self.fast_run_case_insensitive) + Api.fast_run_store.append(self.fast_run_container) + + # TODO: Do something here + # if not self.search_only: + # self.require_write = self.fast_run_container.write_required(self.data, cqid=self.id) + # # set item id based on fast run data + # if not self.require_write and not self.id: + # self.id = self.fast_run_container.current_qid + # else: + # self.fast_run_container.load_item(self.data) + # # set item id based on fast run data + # if not self.id: + # self.id = self.fast_run_container.current_qid + + @staticmethod + @wbi_backoff() + def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): + """ + :param method: 'GET' or 'POST' + :param mediawiki_api_url: + :param session: If a session is passed, it will be used. Otherwise a new requests session is created + :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to + `max_retries` times + :type max_retries: int + :param retry_after: Number of seconds to wait before retrying request (see max_retries) + :type retry_after: int + :param kwargs: Passed to requests.request + :return: + """ + + mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + + # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) + + if 'data' in kwargs and kwargs['data']: + if 'format' not in kwargs['data']: + kwargs['data'].update({'format': 'json'}) + elif kwargs['data']['format'] != 'json': + raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") + + response = None + session = session if session else requests.session() + for n in range(max_retries): + try: + response = session.request(method, mediawiki_api_url, **kwargs) + except requests.exceptions.ConnectionError as e: + print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + sleep(retry_after) + continue + if response.status_code == 503: + print("service unavailable. sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + + response.raise_for_status() + json_data = response.json() + """ + Mediawiki api response has code = 200 even if there are errors. + rate limit doesn't return HTTP 429 either. may in the future + https://phabricator.wikimedia.org/T172293 + """ + if 'error' in json_data: + # rate limiting + error_msg_names = set() + if 'messages' in json_data['error']: + error_msg_names = set(x.get('name') for x in json_data['error']['messages']) + if 'actionthrottledtext' in error_msg_names: + sleep_sec = int(response.headers.get('retry-after', retry_after)) + print("{}: rate limited. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + sleep(sleep_sec) + continue + + # maxlag + if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': + sleep_sec = json_data['error'].get('lag', retry_after) + print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + sleep(sleep_sec) + continue + + # readonly + if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': + print('The Wikibase instance is currently in readonly mode, waiting for {} seconds'.format(retry_after)) + sleep(retry_after) + continue + + # others case + raise MWApiError(response.json() if response else {}) + + # there is no error or waiting. break out of this loop and parse response + break + else: + # the first time I've ever used for - else!! + # else executes if the for loop completes normally. i.e. does not encouter a `break` + # in this case, that means it tried this api call 10 times + raise MWApiError(response.json() if response else {}) + + return json_data + + @staticmethod + def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60): + mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + + if not allow_anonymous: + if login is None: + # Force allow_anonymous as False by default to ask for a login object + raise ValueError("allow_anonymous can't be False and login is None at the same time.") + elif mediawiki_api_url != login.mediawiki_api_url: + raise ValueError("mediawiki_api_url can't be different with the one in the login object.") + + headers = { + 'User-Agent': user_agent + } + + if data is not None: + if login is not None and 'token' not in data: + data.update({'token': login.get_edit_token()}) + + if not allow_anonymous: + # Always assert user if allow_anonymous is False + if 'assert' not in data: + data.update({'assert': 'user'}) + if 'token' in data and data['token'] == '+\\': + raise Exception("Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously.") + elif 'assert' not in data: + # Always assert anon if allow_anonymous is True + data.update({'assert': 'anon'}) + + login_session = login.get_session() if login is not None else None + + return Api.mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) + + @staticmethod + @wbi_backoff() + def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): + """ + Static method which can be used to execute any SPARQL query + :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes + :param query: The actual SPARQL query string + :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + :param max_retries: The number time this function should retry in case of header reports. + :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. + :param debug: Enable debug output. + :type debug: boolean + :return: The results of the query are returned in JSON format + """ + + sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint + user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + + if prefix: + query = prefix + '\n' + query + + params = { + 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, + 'format': 'json' + } + + headers = { + 'Accept': 'application/sparql-results+json', + 'User-Agent': user_agent, + 'Content-Type': 'multipart/form-data' + } + + if debug: + print(params['query']) + + for n in range(max_retries): + try: + response = requests.post(sparql_endpoint_url, params=params, headers=headers) + except requests.exceptions.ConnectionError as e: + print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + sleep(retry_after) + continue + if response.status_code == 503: + print("Service unavailable (503). Sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + if response.status_code == 429: + if 'retry-after' in response.headers.keys(): + retry_after = response.headers['retry-after'] + print("Too Many Requests (429). Sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + response.raise_for_status() + results = response.json() + + return results + + @staticmethod + def merge_items(from_id, to_id, ignore_conflicts='', mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): + """ + A static method to merge two items + :param from_id: The QID which should be merged into another item + :type from_id: string with 'Q' prefix + :param to_id: The QID into which another item should be merged + :type to_id: string with 'Q' prefix + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. + :type ignore_conflicts: str + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + """ + + params = { + 'action': 'wbmergeitems', + 'fromid': from_id, + 'toid': to_id, + 'format': 'json', + 'bot': '', + 'ignoreconflicts': ignore_conflicts + } + + if config['MAXLAG'] > 0: + params.update({'maxlag': config['MAXLAG']}) + + return Api.mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) + + @staticmethod + def remove_claims(claim_id, summary=None, revision=None, mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): + """ + Delete an item + :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. + :type claim_id: string + :param summary: Summary for the edit. Will be prepended by an automatically generated comment. + :type summary: str + :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. + :type revision: str + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + """ + + params = { + 'action': 'wbremoveclaims', + 'claim': claim_id, + 'summary': summary, + 'baserevid': revision, + 'bot': True, + 'format': 'json' + } + + if config['MAXLAG'] > 0: + params.update({'maxlag': config['MAXLAG']}) + + return Api.mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) + + @staticmethod + def search_entities(search_string, language=None, strict_language=True, search_type='item', mediawiki_api_url=None, max_results=500, dict_result=False, login=None, + allow_anonymous=True, user_agent=None): + """ + Performs a search for entities in the Wikibase instance using labels and aliases. + :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) + :type search_string: str + :param language: The language in which to perform the search. + :type language: str + :param strict_language: Whether to disable language fallback + :type strict_language: bool + :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense + :type search_type: str + :param mediawiki_api_url: Specify the mediawiki_api_url. + :type mediawiki_api_url: str + :param max_results: The maximum number of search results returned. Default 500 + :type max_results: int + :param dict_result: + :type dict_result: boolean + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: The user agent string transmitted in the http header + :type user_agent: str + :return: list + """ + + language = config['DEFAULT_LANGUAGE'] if language is None else language + + params = { + 'action': 'wbsearchentities', + 'search': search_string, + 'language': language, + 'strict_language': strict_language, + 'type': search_type, + 'limit': 50, + 'format': 'json' + } + + cont_count = 0 + results = [] + + while True: + params.update({'continue': cont_count}) + + search_results = Api.mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, + allow_anonymous=allow_anonymous) + + if search_results['success'] != 1: + raise SearchError('Wikibase API wbsearchentities failed') + else: + for i in search_results['search']: + if dict_result: + description = i['description'] if 'description' in i else None + aliases = i['aliases'] if 'aliases' in i else None + results.append({ + 'id': i['id'], + 'label': i['label'], + 'match': i['match'], + 'description': description, + 'aliases': aliases + }) + else: + results.append(i['id']) + + if 'search-continue' not in search_results: + break + else: + cont_count = search_results['search-continue'] + + if cont_count >= max_results: + break + + return results + + @staticmethod + def generate_item_instances(items, mediawiki_api_url=None, login=None, allow_anonymous=True, user_agent=None): + """ + A method which allows for retrieval of a list of Wikidata items or properties. The method generates a list of + tuples where the first value in the tuple is the QID or property ID, whereas the second is the new instance of + wbi_item.Item containing all the data of the item. This is most useful for mass retrieval of items. + :param user_agent: A custom user agent + :type user_agent: str + :param items: A list of QIDs or property IDs + :type items: list + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :return: A list of tuples, first value in the tuple is the QID or property ID string, second value is the instance of wbi_item.Item with the corresponding + item data. + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + """ + + from wikibaseintegrator.entities import item + + assert type(items) == list + + params = { + 'action': 'wbgetentities', + 'ids': '|'.join(items), + 'format': 'json' + } + + reply = Api.mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) + + item_instances = [] + for qid, v in reply['entities'].items(): + from wikibaseintegrator import WikibaseIntegrator + wbi = WikibaseIntegrator(mediawiki_api_url=mediawiki_api_url) + ii = item.Item(wbi.api).from_json(v) + ii.mediawiki_api_url = mediawiki_api_url + item_instances.append((qid, ii)) + + return item_instances diff --git a/wikibaseintegrator/wbi_backoff.py b/wikibaseintegrator/wbi_backoff.py index 9e52cde4..4836fc62 100644 --- a/wikibaseintegrator/wbi_backoff.py +++ b/wikibaseintegrator/wbi_backoff.py @@ -3,25 +3,19 @@ import backoff import requests -import simplejson as json +from simplejson import JSONDecodeError from wikibaseintegrator.wbi_config import config -JSONDecodeError = json.JSONDecodeError - -def get_config(name): - return partial(config.get, name) - - -def backoff_hdlr(details): +def wbi_backoff_backoff_hdlr(details): exc_type, exc_value, _ = sys.exc_info() if exc_type == JSONDecodeError: print(exc_value.doc) # pragma: no cover print("Backing off {wait:0.1f} seconds afters {tries} tries calling function with args {args} and kwargs {kwargs}".format(**details)) -def check_json_decode_error(e): +def wbi_backoff_check_json_decode_error(e): """ Check if the error message is "Expecting value: line 1 column 1 (char 0)" if not, its a real error and we shouldn't retry @@ -31,7 +25,7 @@ def check_json_decode_error(e): return type(e) == JSONDecodeError and str(e) != "Expecting value: line 1 column 1 (char 0)" -exceptions = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError) +wbi_backoff_exceptions = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError) -wbi_backoff = partial(backoff.on_exception, backoff.expo, exceptions, max_value=get_config("BACKOFF_MAX_VALUE"), giveup=check_json_decode_error, on_backoff=backoff_hdlr, - jitter=None, max_tries=get_config("BACKOFF_MAX_TRIES")) +wbi_backoff = partial(backoff.on_exception, backoff.expo, wbi_backoff_exceptions, max_value=partial(config.get, 'BACKOFF_MAX_VALUE'), giveup=wbi_backoff_check_json_decode_error, + on_backoff=wbi_backoff_backoff_hdlr, jitter=None, max_tries=partial(config.get, 'BACKOFF_MAX_TRIES')) diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index fe68553c..d04dd1f9 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -20,7 +20,7 @@ """ config = { - 'BACKOFF_MAX_TRIES': None, + 'BACKOFF_MAX_TRIES': 5, 'BACKOFF_MAX_VALUE': 3600, 'USER_AGENT_DEFAULT': "WikibaseIntegrator/{} (https://github.com/LeMyst/WikibaseIntegrator)".format(__version__), 'MAXLAG': 5, @@ -33,5 +33,6 @@ 'MEDIAWIKI_REST_URL': 'https://www.wikidata.org/w/rest.php', 'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql', 'WIKIBASE_URL': 'http://www.wikidata.org', - 'DEFAULT_LANGUAGE': 'en' + 'DEFAULT_LANGUAGE': 'en', + 'DEBUG': False } diff --git a/wikibaseintegrator/wbi_core.py b/wikibaseintegrator/wbi_core.py deleted file mode 100644 index 51fdae1d..00000000 --- a/wikibaseintegrator/wbi_core.py +++ /dev/null @@ -1,648 +0,0 @@ -import copy -import json - -from wikibaseintegrator import wbi_functions -from wikibaseintegrator.wbi_datatype import BaseDataType -from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError, ManualInterventionReqException, CorePropIntegrityException - - -class Core(object): - fast_run_store = [] - distinct_value_props = {} - - def __init__(self, debug=False): - self.json_representation = {} - self.debug = debug - self.create_new_item = False - self.statements = [] - self.original_statements = [] - - def init_data_load(self): - if self.item_id and self.item_data: - if self.debug: - print("Load item " + self.item_id + " from item_data") - self.json_representation = self.parse_json(self.item_data) - elif self.item_id: - if self.debug: - print("Load item " + self.item_id + " from MW API from item_id") - self.json_representation = self.get_entity() - else: - if self.debug: - print("Try to guess item QID from props") - qids_by_props = '' - try: - qids_by_props = self.__select_item() - except SearchError as e: - print("ERROR init_data_load: " + str(e)) - - if qids_by_props: - self.item_id = qids_by_props - if self.debug: - print("Item ID guessed is " + self.item_id) - print("Load item " + self.item_id + " from MW API") - self.json_representation = self.get_entity() - self.__check_integrity() - - if not self.search_only: - self.__construct_claim_json() - else: - self.data = [] - - def parse_json(self, json_data): - """ - Parses an entity json and generates the datatype objects, sets self.json_representation - :param json_data: the json of an entity - :type json_data: A Python Json representation of an item - :return: returns the json representation containing 'labels', 'descriptions', 'claims', 'aliases', 'sitelinks'. - """ - - data = {x: json_data[x] for x in ('labels', 'descriptions', 'claims', 'aliases') if x in json_data} - data['sitelinks'] = {} - self.entity_metadata = {x: json_data[x] for x in json_data if x not in ('labels', 'descriptions', 'claims', 'aliases', 'sitelinks')} - self.sitelinks = json_data.get('sitelinks', {}) - - self.statements = [] - for prop in data['claims']: - for z in data['claims'][prop]: - data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == z['mainsnak']['datatype']][0] - statement = data_type.from_json(z) - self.statements.append(statement) - - self.json_representation = data - self.original_statements = copy.deepcopy(self.statements) - - return data - - def get_label(self, lang=None): - """ - Returns the label for a certain language - :param lang: - :type lang: str - :return: returns the label in the specified language, an empty string if the label does not exist - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'label'))[0] - try: - return self.json_representation['labels'][lang]['value'] - except KeyError: - return '' - - def set_label(self, label, lang=None, if_exists='REPLACE'): - """ - Set the label for an item in a certain language - :param label: The label of the item in a certain language or None to remove the label in that language - :type label: str or None - :param lang: The language a label should be set for. - :type lang: str - :param if_exists: If a label already exist, 'REPLACE' it or 'KEEP' it - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if if_exists not in ('KEEP', 'REPLACE'): - raise ValueError("{} is not a valid value for if_exists (REPLACE or KEEP)".format(if_exists)) - - # Skip set_label if the item already have one and if_exists is at 'KEEP' - if if_exists == 'KEEP': - if lang in self.json_representation['labels']: - return - - if self.fast_run_container and self.fast_run_container.get_language_data(self.item_id, lang, 'label') != ['']: - return - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=[label], lang=lang, lang_data_type='label') - if self.require_write: - self.init_data_load() - else: - return - - if 'labels' not in self.json_representation or not self.json_representation['labels']: - self.json_representation['labels'] = {} - - if label is None: - self.json_representation['labels'][lang] = { - 'language': lang, - 'remove': '' - } - else: - self.json_representation['labels'][lang] = { - 'language': lang, - 'value': label - } - - def get_aliases(self, lang=None): - """ - Retrieve the aliases in a certain language - :param lang: The language the description should be retrieved for - :return: Returns a list of aliases, an empty list if none exist for the specified language - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'aliases')) - - alias_list = [] - if 'aliases' in self.json_representation and lang in self.json_representation['aliases']: - for alias in self.json_representation['aliases'][lang]: - alias_list.append(alias['value']) - - return alias_list - - def set_aliases(self, aliases, lang=None, if_exists='APPEND'): - """ - set the aliases for an item - :param aliases: a string or a list of strings representing the aliases of an item - :param lang: The language a description should be set for - :param if_exists: If aliases already exist, APPEND or REPLACE - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if isinstance(aliases, str): - aliases = [aliases] - if not isinstance(aliases, list): - raise TypeError("aliases must be a list or a string") - - if if_exists != 'APPEND' and if_exists != 'REPLACE': - raise ValueError("{} is not a valid value for if_exists (REPLACE or APPEND)".format(if_exists)) - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=aliases, lang=lang, lang_data_type='aliases', if_exists=if_exists) - if self.require_write: - self.init_data_load() - else: - return - - if 'aliases' not in self.json_representation: - self.json_representation['aliases'] = {} - - if if_exists == 'REPLACE' or lang not in self.json_representation['aliases']: - self.json_representation['aliases'][lang] = [] - for alias in aliases: - self.json_representation['aliases'][lang].append({ - 'language': lang, - 'value': alias - }) - else: - for alias in aliases: - found = False - for current_aliases in self.json_representation['aliases'][lang]: - if alias.strip().casefold() != current_aliases['value'].strip().casefold(): - continue - else: - found = True - break - - if not found: - self.json_representation['aliases'][lang].append({ - 'language': lang, - 'value': alias - }) - - def get_description(self, lang=None): - """ - Retrieve the description in a certain language - :param lang: The language the description should be retrieved for - :return: Returns the description string - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'description'))[0] - if 'descriptions' not in self.json_representation or lang not in self.json_representation['descriptions']: - return '' - else: - return self.json_representation['descriptions'][lang]['value'] - - def set_description(self, description, lang=None, if_exists='REPLACE'): - """ - Set the description for an item in a certain language - :param description: The description of the item in a certain language - :type description: str - :param lang: The language a description should be set for. - :type lang: str - :param if_exists: If a description already exist, REPLACE it or KEEP it. - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if if_exists != 'KEEP' and if_exists != 'REPLACE': - raise ValueError("{} is not a valid value for if_exists (REPLACE or KEEP)".format(if_exists)) - - # Skip set_description if the item already have one and if_exists is at 'KEEP' - if if_exists == 'KEEP': - if self.get_description(lang): - return - - if self.fast_run_container and self.fast_run_container.get_language_data(self.item_id, lang, 'description') != ['']: - return - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=[description], lang=lang, lang_data_type='description') - if self.require_write: - self.init_data_load() - else: - return - - if 'descriptions' not in self.json_representation or not self.json_representation['descriptions'] or if_exists == 'REPLACE': - self.json_representation['descriptions'] = {} - - self.json_representation['descriptions'][lang] = { - 'language': lang, - 'value': description - } - - def get_entity(self): - """ - retrieve an item in json representation from the Wikibase instance - :rtype: dict - :return: python complex dictionary representation of a json - """ - - params = { - 'action': 'wbgetentities', - 'ids': self.item_id, - 'format': 'json' - } - - json_data = wbi_functions.mediawiki_api_call_helper(data=params, allow_anonymous=True) - return self.parse_json(json_data=json_data['entities'][self.item_id]) - - def get_json_representation(self): - """ - A method to access the internal json representation of the item, mainly for testing - :return: returns a Python json representation object of the item at the current state of the instance - """ - - return self.json_representation - - def write(self, login, bot_account=True, edit_summary='', entity_type='item', property_datatype='string', max_retries=1000, retry_after=60, all_claims=False, - allow_anonymous=False): - """ - Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. - For new items, also returns the new QIDs. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param bot_account: Tell the Wikidata API whether the script should be run as part of a bot account or not. - :type bot_account: bool - :param edit_summary: A short (max 250 characters) summary of the purpose of the edit. This will be displayed as the revision summary of the item. - :type edit_summary: str - :param entity_type: Decides wether the object will become a 'form', 'item' (default), 'lexeme', 'property' or 'sense' - :type entity_type: str - :param property_datatype: When payload_type is 'property' then this parameter set the datatype for the property - :type property_datatype: str - :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times - :type max_retries: int - :param retry_after: Number of seconds to wait before retrying request (see max_retries) - :type retry_after: int - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :return: the entity ID on successful write - """ - - if self.search_only: - raise SearchOnlyError - - if not self.require_write: - return self.item_id - - if entity_type == 'property': - self.json_representation['datatype'] = property_datatype - if 'sitelinks' in self.json_representation: - del self.json_representation['sitelinks'] - - if all_claims: - data = json.JSONEncoder().encode(self.json_representation) - else: - new_json_repr = {k: self.json_representation[k] for k in set(list(self.json_representation.keys())) - {'claims'}} - new_json_repr['claims'] = {} - for claim in self.json_representation['claims']: - if [True for x in self.json_representation['claims'][claim] if 'id' not in x or 'remove' in x]: - new_json_repr['claims'][claim] = copy.deepcopy(self.json_representation['claims'][claim]) - for statement in new_json_repr['claims'][claim]: - if 'id' in statement and 'remove' not in statement: - new_json_repr['claims'][claim].remove(statement) - if not new_json_repr['claims'][claim]: - new_json_repr['claims'].pop(claim) - data = json.JSONEncoder().encode(new_json_repr) - - payload = { - 'action': 'wbeditentity', - 'data': data, - 'format': 'json', - 'token': login.get_edit_token(), - 'summary': edit_summary - } - - if config['MAXLAG'] > 0: - payload.update({'maxlag': config['MAXLAG']}) - - if bot_account: - payload.update({'bot': ''}) - - if self.create_new_item: - payload.update({u'new': entity_type}) - else: - payload.update({u'id': self.item_id}) - - if self.debug: - print(payload) - - try: - json_data = wbi_functions.mediawiki_api_call_helper(data=payload, login=login, max_retries=max_retries, retry_after=retry_after, allow_anonymous=allow_anonymous) - - if 'error' in json_data and 'messages' in json_data['error']: - error_msg_names = set(x.get('name') for x in json_data['error']['messages']) - if 'wikibase-validator-label-with-description-conflict' in error_msg_names: - raise NonUniqueLabelDescriptionPairError(json_data) - else: - raise MWApiError(json_data) - elif 'error' in json_data.keys(): - raise MWApiError(json_data) - except Exception: - print('Error while writing to the Wikibase instance') - raise - - # after successful write, update this object with latest json, QID and parsed data types. - self.create_new_item = False - self.item_id = json_data['entity']['id'] - self.parse_json(json_data=json_data['entity']) - self.data = [] - if 'success' in json_data and 'entity' in json_data and 'lastrevid' in json_data['entity']: - self.lastrevid = json_data['entity']['lastrevid'] - return self.item_id - - def __check_integrity(self): - """ - A method to check if when invoking __select_item() and the item does not exist yet, but another item - has a property of the current domain with a value like submitted in the data dict, this item does not get - selected but a ManualInterventionReqException() is raised. This check is dependent on the core identifiers - of a certain domain. - :return: boolean True if test passed - """ - - # all core props - wbi_core_props = self.core_props - # core prop statements that exist on the item - cp_statements = [x for x in self.statements if x.get_prop_nr() in wbi_core_props] - item_core_props = set(x.get_prop_nr() for x in cp_statements) - # core prop statements we are loading - cp_data = [x for x in self.data if x.get_prop_nr() in wbi_core_props] - - # compare the claim values of the currently loaded QIDs to the data provided in self.data - # this is the number of core_ids in self.data that are also on the item - count_existing_ids = len([x for x in self.data if x.get_prop_nr() in item_core_props]) - - core_prop_match_count = 0 - for new_stat in self.data: - for stat in self.statements: - if (new_stat.get_prop_nr() == stat.get_prop_nr()) and (new_stat.get_value() == stat.get_value()) and ( - new_stat.get_prop_nr() in item_core_props): - core_prop_match_count += 1 - - if core_prop_match_count < count_existing_ids * self.core_prop_match_thresh: - existing_core_pv = defaultdict(set) - for s in cp_statements: - existing_core_pv[s.get_prop_nr()].add(s.get_value()) - new_core_pv = defaultdict(set) - for s in cp_data: - new_core_pv[s.get_prop_nr()].add(s.get_value()) - nomatch_existing = {k: v - new_core_pv[k] for k, v in existing_core_pv.items()} - nomatch_existing = {k: v for k, v in nomatch_existing.items() if v} - nomatch_new = {k: v - existing_core_pv[k] for k, v in new_core_pv.items()} - nomatch_new = {k: v for k, v in nomatch_new.items() if v} - raise CorePropIntegrityException("Retrieved item ({}) does not match provided core IDs. " - "Matching count {}, non-matching count {}. " - .format(self.item_id, core_prop_match_count, - count_existing_ids - core_prop_match_count) + - "existing unmatched core props: {}. ".format(nomatch_existing) + - "statement unmatched core props: {}.".format(nomatch_new)) - else: - return True - - def __select_item(self): - """ - The most likely item QID should be returned, after querying the Wikibase instance for all values in core_id properties - :return: Either a single QID is returned, or an empty string if no suitable item in the Wikibase instance - """ - - qid_list = set() - conflict_source = {} - - for statement in self.data: - property_nr = statement.get_prop_nr() - - core_props = self.core_props - if property_nr in core_props: - tmp_qids = set() - query = statement.sparql_query.format(wb_url=self.wikibase_url, pid=property_nr, value=statement.get_sparql_value().replace("'", r"\'")) - results = wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url, debug=self.debug) - - for i in results['results']['bindings']: - qid = i['item_id']['value'].split('/')[-1] - tmp_qids.add(qid) - - qid_list.update(tmp_qids) - - # Protocol in what property the conflict arises - if property_nr in conflict_source: - conflict_source[property_nr].append(tmp_qids) - else: - conflict_source[property_nr] = [tmp_qids] - - if len(tmp_qids) > 1: - raise ManualInterventionReqException("More than one item has the same property value", property_nr, tmp_qids) - - if len(qid_list) == 0: - self.create_new_item = True - return '' - - if self.debug: - print(qid_list) - - unique_qids = set(qid_list) - if len(unique_qids) > 1: - raise ManualInterventionReqException("More than one item has the same property value", conflict_source, unique_qids) - elif len(unique_qids) == 1: - return list(unique_qids)[0] - - def __construct_claim_json(self): - """ - Writes the properties from self.data to a new or existing json in self.json_representation - :return: None - """ - - def handle_qualifiers(old_item, new_item): - if not new_item.check_qualifier_equality: - old_item.set_qualifiers(new_item.get_qualifiers()) - - def is_good_ref(ref_block): - prop_nrs = [x.get_prop_nr() for x in ref_block] - values = [x.get_value() for x in ref_block] - good_ref = True - prop_value_map = dict(zip(prop_nrs, values)) - - # if self.good_refs has content, use these to determine good references - if self.good_refs and len(self.good_refs) > 0: - found_good = True - for rblock in self.good_refs: - - if not all([k in prop_value_map for k, v in rblock.items()]): - found_good = False - - if not all([v in prop_value_map[k] for k, v in rblock.items() if v]): - found_good = False - - if found_good: - return True - - return False - - return good_ref - - def handle_references(old_item, new_item): - """ - Local function to handle references - :param old_item: An item containing the data as currently in the Wikibase instance - :type old_item: A child of BaseDataType - :param new_item: An item containing the new data which should be written to the Wikibase instance - :type new_item: A child of BaseDataType - """ - - old_references = old_item.get_references() - new_references = new_item.get_references() - - if sum(map(lambda z: len(z), old_references)) == 0 or self.global_ref_mode == 'STRICT_OVERWRITE': - old_item.set_references(new_references) - - elif self.global_ref_mode == 'STRICT_KEEP' or new_item.statement_ref_mode == 'STRICT_KEEP': - pass - - elif self.global_ref_mode == 'STRICT_KEEP_APPEND' or new_item.statement_ref_mode == 'STRICT_KEEP_APPEND': - old_references.extend(new_references) - old_item.set_references(old_references) - - elif self.global_ref_mode == 'CUSTOM' or new_item.statement_ref_mode == 'CUSTOM' and self.ref_handler and callable(self.ref_handler): - self.ref_handler(old_item, new_item) - - elif self.global_ref_mode == 'KEEP_GOOD' or new_item.statement_ref_mode == 'KEEP_GOOD': - # Copy only good_ref - refs = [x for x in old_references if is_good_ref(x)] - - # Don't add already existing references - for new_ref in new_references: - if new_ref not in old_references: - refs.append(new_ref) - - # Set the references - old_item.set_references(refs) - - # sort the incoming data according to the property number - self.data.sort(key=lambda z: z.get_prop_nr().lower()) - - # collect all statements which should be deleted because of an empty value - statements_for_deletion = [] - for item in self.data: - if isinstance(item, BaseDataType) and item.get_value() == '': - statements_for_deletion.append(item.get_prop_nr()) - - if self.create_new_item: - self.statements = copy.copy(self.data) - else: - for stat in self.data: - prop_nr = stat.get_prop_nr() - - prop_data = [x for x in self.statements if x.get_prop_nr() == prop_nr] - if prop_data and stat.if_exists == 'KEEP': - continue - prop_pos = [x.get_prop_nr() == prop_nr for x in self.statements] - prop_pos.reverse() - insert_pos = len(prop_pos) - (prop_pos.index(True) if any(prop_pos) else 0) - - # If value should be appended, check if values exists, if not, append - if 'APPEND' in stat.if_exists: - equal_items = [stat == x for x in prop_data] - if True not in equal_items or stat.if_exists == 'FORCE_APPEND': - self.statements.insert(insert_pos + 1, stat) - else: - # if item exists, modify rank - current_item = prop_data[equal_items.index(True)] - current_item.set_rank(stat.get_rank()) - handle_references(old_item=current_item, new_item=stat) - handle_qualifiers(old_item=current_item, new_item=stat) - continue - - # set all existing values of a property for removal - for x in prop_data: - # for deletion of single statements, do not set all others to delete - if hasattr(stat, 'remove'): - break - elif x.get_id() and not hasattr(x, 'retain'): - # keep statements with good references if keep_good_ref_statements is True - if self.keep_good_ref_statements: - if any([is_good_ref(r) for r in x.get_references()]): - setattr(x, 'retain', '') - else: - setattr(x, 'remove', '') - - match = [] - for i in prop_data: - if stat == i and hasattr(stat, 'remove'): - match.append(True) - setattr(i, 'remove', '') - elif stat == i: - match.append(True) - setattr(i, 'retain', '') - if hasattr(i, 'remove'): - delattr(i, 'remove') - handle_references(old_item=i, new_item=stat) - handle_qualifiers(old_item=i, new_item=stat) - - i.set_rank(rank=stat.get_rank()) - # if there is no value, do not add an element, this is also used to delete whole properties. - elif i.get_value(): - match.append(False) - - if True not in match and not hasattr(stat, 'remove'): - self.statements.insert(insert_pos + 1, stat) - - # For whole property deletions, add remove flag to all statements which should be deleted - for item in copy.deepcopy(self.statements): - if item.get_prop_nr() in statements_for_deletion: - if item.get_id() != '': - setattr(item, 'remove', '') - else: - self.statements.remove(item) - - # regenerate claim json - self.json_representation['claims'] = {} - for stat in self.statements: - prop_nr = stat.get_prop_nr() - if prop_nr not in self.json_representation['claims']: - self.json_representation['claims'][prop_nr] = [] - self.json_representation['claims'][prop_nr].append(stat.get_json_representation()) - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs="\r\n\t ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/wbi_datatype.py b/wikibaseintegrator/wbi_datatype.py deleted file mode 100644 index 52d7a7d8..00000000 --- a/wikibaseintegrator/wbi_datatype.py +++ /dev/null @@ -1,1548 +0,0 @@ -import copy -import re - -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_jsonparser import JsonParser - - -class BaseDataType(object): - """ - The base class for all Wikibase data types, they inherit from it - """ - DTYPE = 'base-data-type' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}' . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, will be called by all data types. - :param value: Data value of the Wikibase data snak - :type value: str or int or tuple - :param prop_nr: The property number a Wikibase snak belongs to - :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int - :param data_type: The Wikibase data type declaration of this snak - :type data_type: str - :param snak_type: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or - unknown (somevalue). See Wikibase documentation. - :type snak_type: a str of either 'value', 'novalue' or 'somevalue' - :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, - e.g. references=[[, ], []] - This will create two references, the first one with two statements, the second with one - :type references: A one level nested list with instances of BaseDataType or children of it. - :param qualifiers: A list of qualifiers for the Wikibase mainsnak - :type qualifiers: A list with instances of BaseDataType or children of it. - :param is_reference: States if the snak is a reference, mutually exclusive with qualifier - :type is_reference: boolean - :param is_qualifier: States if the snak is a qualifier, mutually exlcusive with reference - :type is_qualifier: boolean - :param rank: The rank of a Wikibase mainsnak, should determine the status of a value - :type rank: A string of one of three allowed values: 'normal', 'deprecated', 'preferred' - :param check_qualifier_equality: When comparing two objects, test if qualifiers are equals between them. Default to true. - :type check_qualifier_equality: boolean - :param if_exists: Replace or append the statement. You can force an append if the statement already exists. - :type if_exists: A string of one of three allowed values: 'REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP' - :return: - """ - - self.value = value - self.data_type = kwargs.pop('data_type', self.DTYPE) - self.snak_type = kwargs.pop('snak_type', 'value') - self.references = kwargs.pop('references', None) - self.qualifiers = kwargs.pop('qualifiers', None) - self.is_reference = kwargs.pop('is_reference', None) - self.is_qualifier = kwargs.pop('is_qualifier', None) - self.rank = kwargs.pop('rank', 'normal') - self.check_qualifier_equality = kwargs.pop('check_qualifier_equality', True) - self.if_exists = kwargs.pop('if_exists', 'REPLACE') - - self._statement_ref_mode = 'KEEP_GOOD' - - if not self.references: - self.references = [] - else: - for ref_list in self.references: - for reference in ref_list: - if reference.is_reference is False: - raise ValueError('A reference can\'t be declared as is_reference=False') - elif reference.is_reference is None: - reference.is_reference = True - - if not self.qualifiers: - self.qualifiers = [] - else: - for qualifier in self.qualifiers: - if qualifier.is_qualifier is False: - raise ValueError('A qualifier can\'t be declared as is_qualifier=False') - elif qualifier.is_qualifier is None: - qualifier.is_qualifier = True - - if isinstance(prop_nr, int): - self.prop_nr = 'P' + str(prop_nr) - else: - pattern = re.compile(r'^P?([0-9]+)$') - matches = pattern.match(prop_nr) - - if not matches: - raise ValueError('Invalid prop_nr, format must be "P[0-9]+"') - else: - self.prop_nr = 'P' + str(matches.group(1)) - - # Internal ID and hash are issued by the Wikibase instance - self.id = '' - self.hash = '' - - self.json_representation = { - 'snaktype': self.snak_type, - 'property': self.prop_nr, - 'datavalue': {}, - 'datatype': self.data_type - } - - if self.snak_type not in ['value', 'novalue', 'somevalue']: - raise ValueError('{} is not a valid snak type'.format(self.snak_type)) - - if self.if_exists not in ['REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP']: - raise ValueError('{} is not a valid if_exists value'.format(self.if_exists)) - - if self.value is None and self.snak_type == 'value': - raise ValueError('Parameter \'value\' can\'t be \'None\' if \'snak_type\' is \'value\'') - - if self.is_qualifier and self.is_reference: - raise ValueError('A claim cannot be a reference and a qualifer at the same time') - if (len(self.references) > 0 or len(self.qualifiers) > 0) and (self.is_qualifier or self.is_reference): - raise ValueError('Qualifiers or references cannot have references or qualifiers') - - def has_equal_qualifiers(self, other): - # check if the qualifiers are equal with the 'other' object - equal_qualifiers = True - self_qualifiers = copy.deepcopy(self.get_qualifiers()) - other_qualifiers = copy.deepcopy(other.get_qualifiers()) - - if len(self_qualifiers) != len(other_qualifiers): - equal_qualifiers = False - else: - flg = [False for _ in range(len(self_qualifiers))] - for count, i in enumerate(self_qualifiers): - for q in other_qualifiers: - if i == q: - flg[count] = True - if not all(flg): - equal_qualifiers = False - - return equal_qualifiers - - def __eq__(self, other): - equal_qualifiers = self.has_equal_qualifiers(other) - equal_values = self.get_value() == other.get_value() and self.get_prop_nr() == other.get_prop_nr() - - if not (self.check_qualifier_equality and other.check_qualifier_equality) and equal_values: - return True - elif equal_values and equal_qualifiers: - return True - else: - return False - - @property - def statement_ref_mode(self): - return self._statement_ref_mode - - @statement_ref_mode.setter - def statement_ref_mode(self, value): - """Set the reference mode for a statement, always overrides the global reference state.""" - valid_values = ['STRICT_KEEP', 'STRICT_KEEP_APPEND', 'STRICT_OVERWRITE', 'KEEP_GOOD', 'CUSTOM'] - if value not in valid_values: - raise ValueError('Not an allowed reference mode, allowed values {}'.format(' '.join(valid_values))) - - self._statement_ref_mode = value - - def get_value(self): - return self.value - - def get_sparql_value(self): - return self.value - - def set_value(self, value): - if value is None and self.snak_type not in {'novalue', 'somevalue'}: - raise ValueError("If 'value' is None, snak_type must be novalue or somevalue") - if self.snak_type in {'novalue', 'somevalue'}: - del self.json_representation['datavalue'] - elif 'datavalue' not in self.json_representation: - self.json_representation['datavalue'] = {} - - self.value = value - - def get_references(self): - return self.references - - def set_references(self, references): - if len(references) > 0 and (self.is_qualifier or self.is_reference): - raise ValueError("Qualifiers or references cannot have references") - - # Force clean duplicate references - temp_references = [] - for reference in references: - if reference not in temp_references: - temp_references.append(reference) - references = temp_references - - self.references = references - - def get_qualifiers(self): - return self.qualifiers - - def set_qualifiers(self, qualifiers): - # TODO: introduce a check to prevent duplicate qualifiers, those are not allowed in Wikibase - if len(qualifiers) > 0 and (self.is_qualifier or self.is_reference): - raise ValueError("Qualifiers or references cannot have qualifiers") - - self.qualifiers = qualifiers - - def get_rank(self): - if self.is_qualifier or self.is_reference: - return '' - else: - return self.rank - - def set_rank(self, rank): - if self.is_qualifier or self.is_reference: - raise ValueError("References or qualifiers do not have ranks") - - valid_ranks = ['normal', 'deprecated', 'preferred'] - - if rank not in valid_ranks: - raise ValueError("{} not a valid rank".format(rank)) - - self.rank = rank - - def get_id(self): - return self.id - - def set_id(self, claim_id): - self.id = claim_id - - def set_hash(self, claim_hash): - self.hash = claim_hash - - def get_hash(self): - return self.hash - - def get_prop_nr(self): - return self.prop_nr - - def set_prop_nr(self, prop_nr): - if prop_nr[0] != 'P': - raise ValueError("Invalid property number") - - self.prop_nr = prop_nr - - def get_json_representation(self): - if self.is_qualifier or self.is_reference: - tmp_json = { - self.prop_nr: [self.json_representation] - } - if self.hash != '' and self.is_qualifier: - self.json_representation.update({'hash': self.hash}) - - return tmp_json - else: - ref_json = [] - for count, ref in enumerate(self.references): - snaks_order = [] - snaks = {} - ref_json.append({ - 'snaks': snaks, - 'snaks-order': snaks_order - }) - for sub_ref in ref: - prop_nr = sub_ref.get_prop_nr() - # set the hash for the reference block - if sub_ref.get_hash() != '': - ref_json[count].update({'hash': sub_ref.get_hash()}) - tmp_json = sub_ref.get_json_representation() - - # if more reference values with the same property number, append to its specific property list. - if prop_nr in snaks: - snaks[prop_nr].append(tmp_json[prop_nr][0]) - else: - snaks.update(tmp_json) - snaks_order.append(prop_nr) - - qual_json = {} - qualifiers_order = [] - for qual in self.qualifiers: - prop_nr = qual.get_prop_nr() - if prop_nr in qual_json: - qual_json[prop_nr].append(qual.get_json_representation()[prop_nr][0]) - else: - qual_json.update(qual.get_json_representation()) - qualifiers_order.append(qual.get_prop_nr()) - - if hasattr(self, 'remove'): - statement = { - 'remove': '' - } - else: - statement = { - 'mainsnak': self.json_representation, - 'type': 'statement', - 'rank': self.rank - } - if qual_json: - statement['qualifiers'] = qual_json - if qualifiers_order: - statement['qualifiers-order'] = qualifiers_order - if ref_json: - statement['references'] = ref_json - if self.id != '': - statement.update({'id': self.id}) - - return statement - - @classmethod - @JsonParser - def from_json(cls, json_representation): - pass - - def equals(self, that, include_ref=False, fref=None): - """ - Tests for equality of two statements. - If comparing references, the order of the arguments matters!!! - self is the current statement, the next argument is the new statement. - Allows passing in a function to use to compare the references 'fref'. Default is equality. - fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, - where each reference is a list of statements - """ - - if not include_ref: - # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers - return self == that - else: - if self != that: - return False - if fref is None: - return BaseDataType.refs_equal(self, that) - else: - return fref(self, that) - - @staticmethod - def refs_equal(olditem, newitem): - """ - tests for exactly identical references - """ - - oldrefs = olditem.references - newrefs = newitem.references - - def ref_equal(oldref, newref): - return True if (len(oldref) == len(newref)) and all(x in oldref for x in newref) else False - - if len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs): - return True - else: - return False - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), - ) - - -class CommonsMedia(BaseDataType): - """ - Implements the Wikibase data type for Wikimedia commons media files - """ - DTYPE = 'commonsMedia' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The media file name from Wikimedia commons to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - self.value = None - - super(CommonsMedia, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(CommonsMedia, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class ExternalID(BaseDataType): - """ - Implements the Wikibase data type 'external-id' - """ - DTYPE = 'external-id' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(ExternalID, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(ExternalID, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Form(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-form' - """ - DTYPE = 'wikibase-form' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Form, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is None: - self.value = value - else: - pattern = re.compile(r'^L[0-9]+-F[0-9]+$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid form ID ({}), format must be 'L[0-9]+-F[0-9]+'".format(value)) - - self.value = value - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'form', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } - - super(Form, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) - - -class GeoShape(BaseDataType): - """ - Implements the Wikibase data type 'geo-shape' - """ - DTYPE = 'geo-shape' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The GeoShape map file name in Wikimedia Commons to be linked - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(GeoShape, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is None: - self.value = value - else: - # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map - pattern = re.compile(r'^Data:((?![:|#]).)+\.map$') - matches = pattern.match(value) - if not matches: - raise ValueError("Value must start with Data: and end with .map. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(GeoShape, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class GlobeCoordinate(BaseDataType): - """ - Implements the Wikibase data type for globe coordinates - """ - DTYPE = 'globe-coordinate' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^geo:wktLiteral . - }} - ''' - - def __init__(self, latitude, longitude, precision, prop_nr, globe=None, wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param latitude: Latitute in decimal format - :type latitude: float or None - :param longitude: Longitude in decimal format - :type longitude: float or None - :param precision: Precision of the position measurement - :type precision: float or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - globe = config['COORDINATE_GLOBE_QID'] if globe is None else globe - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - self.latitude = None - self.longitude = None - self.precision = None - self.globe = None - - if globe.startswith('Q'): - globe = wikibase_url + '/entity/' + globe - - value = (latitude, longitude, precision, globe) - - super(GlobeCoordinate, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - # TODO: Introduce validity checks for coordinates, etc. - # TODO: Add check if latitude/longitude/precision is None - self.latitude, self.longitude, self.precision, self.globe = value - - self.json_representation['datavalue'] = { - 'value': { - 'latitude': self.latitude, - 'longitude': self.longitude, - 'precision': self.precision, - 'globe': self.globe - }, - 'type': 'globecoordinate' - } - - self.value = (self.latitude, self.longitude, self.precision, self.globe) - super(GlobeCoordinate, self).set_value(value=self.value) - - def get_sparql_value(self): - return 'Point(' + str(self.latitude) + ', ' + str(self.longitude) + ')' - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(latitude=None, longitude=None, precision=None, prop_nr=jsn['property'], - snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(latitude=value['latitude'], longitude=value['longitude'], precision=value['precision'], - prop_nr=jsn['property']) - - -class Item(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-item' with a value being another item ID - """ - DTYPE = 'wikibase-item' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/Q{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The item ID to serve as the value - :type value: str with a 'Q' prefix, followed by several digits or only the digits without the 'Q' prefix - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(ItemID, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, 'Expected str or int, found {} ({})'.format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^Q?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'item', - 'numeric-id': self.value, - 'id': 'Q{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } - - super(ItemID, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class Lexeme(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-lexeme' - """ - DTYPE = 'wikibase-lexeme' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/L{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The lexeme number to serve as a value - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Lexeme, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^L?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'lexeme', - 'numeric-id': self.value, - 'id': 'L{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } - - super(Lexeme, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class Math(BaseDataType): - """ - Implements the Wikibase data type 'math' for mathematical formula in TEX format - """ - DTYPE = 'math' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Math, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(Math, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class MonolingualText(BaseDataType): - """ - Implements the Wikibase data type for Monolingual Text strings - """ - DTYPE = 'monolingualtext' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> {value} . - }} - ''' - - def __init__(self, text, prop_nr, language=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param text: The language specific string to be used as the value - :type text: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param language: Specifies the language the value belongs to - :type language: str - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - self.text = None - self.language = config['DEFAULT_LANGUAGE'] if language is None else language - - value = (text, self.language) - - super(MonolingualText, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.text, self.language = value - if self.text is not None: - assert isinstance(self.text, str) or self.text is None, "Expected str, found {} ({})".format(type(self.text), self.text) - elif self.snak_type == 'value': - raise ValueError("Parameter 'text' can't be 'None' if 'snak_type' is 'value'") - assert isinstance(self.language, str), "Expected str, found {} ({})".format(type(self.language), self.language) - - self.json_representation['datavalue'] = { - 'value': { - 'text': self.text, - 'language': self.language - }, - 'type': 'monolingualtext' - } - - self.value = (self.text, self.language) - super(MonolingualText, self).set_value(value=self.value) - - def get_sparql_value(self): - return '"' + self.text.replace('"', r'\"') + '"@' + self.language - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(text=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(text=value['text'], prop_nr=jsn['property'], language=value['language']) - - -class MusicalNotation(BaseDataType): - """ - Implements the Wikibase data type 'string' - """ - DTYPE = 'musical-notation' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Values for that data type are strings describing music following LilyPond syntax. - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(MusicalNotation, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(MusicalNotation, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Property(BaseDataType): - """ - Implements the Wikibase data type 'property' - """ - DTYPE = 'wikibase-property' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/P{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The property number to serve as a value - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Property, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^P?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'property', - 'numeric-id': self.value, - 'id': 'P{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } - - super(Property, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class Quantity(BaseDataType): - """ - Implements the Wikibase data type for quantities - """ - DTYPE = 'quantity' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:decimal . - }} - ''' - - def __init__(self, quantity, prop_nr, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param quantity: The quantity value - :type quantity: float, str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations - :type upper_bound: float, str - :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations - :type lower_bound: float, str - :param unit: The unit item URL or the QID a certain quantity has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). - The default is dimensionless, represented by a '1' - :type unit: str - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - if unit.startswith('Q'): - unit = wikibase_url + '/entity/' + unit - - self.quantity = None - self.unit = None - self.upper_bound = None - self.lower_bound = None - - value = (quantity, unit, upper_bound, lower_bound) - - super(Quantity, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.quantity, self.unit, self.upper_bound, self.lower_bound = value - - if self.quantity is not None: - self.quantity = self.format_amount(self.quantity) - self.unit = str(self.unit) - if self.upper_bound: - self.upper_bound = self.format_amount(self.upper_bound) - if self.lower_bound: - self.lower_bound = self.format_amount(self.lower_bound) - - # Integrity checks for value and bounds - try: - for i in [self.quantity, self.upper_bound, self.lower_bound]: - if i: - float(i) - except ValueError: - raise ValueError("Value, bounds and units must parse as integers or float") - - if (self.lower_bound and self.upper_bound) and (float(self.lower_bound) > float(self.upper_bound) - or float(self.lower_bound) > float(self.quantity)): - raise ValueError("Lower bound too large") - - if self.upper_bound and float(self.upper_bound) < float(self.quantity): - raise ValueError("Upper bound too small") - elif self.snak_type == 'value': - raise ValueError("Parameter 'quantity' can't be 'None' if 'snak_type' is 'value'") - - self.json_representation['datavalue'] = { - 'value': { - 'amount': self.quantity, - 'unit': self.unit, - 'upperBound': self.upper_bound, - 'lowerBound': self.lower_bound - }, - 'type': 'quantity' - } - - # remove bounds from json if they are undefined - if not self.upper_bound: - del self.json_representation['datavalue']['value']['upperBound'] - - if not self.lower_bound: - del self.json_representation['datavalue']['value']['lowerBound'] - - self.value = (self.quantity, self.unit, self.upper_bound, self.lower_bound) - super(Quantity, self).set_value(value=self.value) - - def get_sparql_value(self): - return self.quantity - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(quantity=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - upper_bound = value['upperBound'] if 'upperBound' in value else None - lower_bound = value['lowerBound'] if 'lowerBound' in value else None - return cls(quantity=value['amount'], prop_nr=jsn['property'], upper_bound=upper_bound, lower_bound=lower_bound, - unit=value['unit']) - - @staticmethod - def format_amount(amount): - # Remove .0 by casting to int - if float(amount) % 1 == 0: - amount = int(float(amount)) - - # Adding prefix + for positive number and 0 - if not str(amount).startswith('+') and float(amount) >= 0: - amount = str('+{}'.format(amount)) - - # return as string - return str(amount) - - -class Sense(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-sense' - """ - DTYPE = 'wikibase-sense' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Value using the format "L-S" (example: L252248-S123) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Sense, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is None: - self.value = value - else: - pattern = re.compile(r'^L[0-9]+-S[0-9]+$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid sense ID ({}), format must be 'L[0-9]+-S[0-9]+'".format(value)) - - self.value = value - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'sense', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } - - super(Sense, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) - - -class String(BaseDataType): - """ - Implements the Wikibase data type 'string' - """ - - DTYPE = 'string' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(String, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(String, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class TabularData(BaseDataType): - """ - Implements the Wikibase data type 'tabular-data' - """ - DTYPE = 'tabular-data' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Reference to tabular data file on Wikimedia Commons. - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(TabularData, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is None: - self.value = value - else: - # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab - pattern = re.compile(r'^Data:((?![:|#]).)+\.tab$') - matches = pattern.match(value) - if not matches: - raise ValueError("Value must start with Data: and end with .tab. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(TabularData, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Time(BaseDataType): - """ - Implements the Wikibase data type with date and time values - """ - DTYPE = 'time' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:dateTime . - }} - ''' - - def __init__(self, time, prop_nr, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 - :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param before: explicit integer value for how many units after the given time it could be. - The unit is given by the precision. - :type before: int - :param after: explicit integer value for how many units before the given time it could be. - The unit is given by the precision. - :type after: int - :param precision: Precision value for dates and time as specified in the Wikibase data model - (https://www.wikidata.org/wiki/Special:ListDatatypes#time) - :type precision: int - :param timezone: The timezone which applies to the date and time as specified in the Wikibase data model - :type timezone: int - :param calendarmodel: The calendar model used for the date. URL to the Wikibase calendar model item or the QID. - :type calendarmodel: str - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - calendarmodel = config['CALENDAR_MODEL_QID'] if calendarmodel is None else calendarmodel - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - self.time = None - self.before = None - self.after = None - self.precision = None - self.timezone = None - self.calendarmodel = None - - if calendarmodel.startswith('Q'): - calendarmodel = wikibase_url + '/entity/' + calendarmodel - - value = (time, before, after, precision, timezone, calendarmodel) - - super(Time, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel = value - assert isinstance(self.time, str) or self.time is None, "Expected str, found {} ({})".format(type(self.time), self.time) - - if self.time is not None: - if not (self.time.startswith("+") or self.time.startswith("-")): - self.time = "+" + self.time - pattern = re.compile(r'^[+-][0-9]*-(?:1[0-2]|0[0-9])-(?:3[01]|0[0-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') - matches = pattern.match(self.time) - if not matches: - raise ValueError("Time time must be a string in the following format: '+%Y-%m-%dT%H:%M:%SZ'") - self.value = value - if self.precision < 0 or self.precision > 15: - raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - elif self.snak_type == 'value': - raise ValueError("Parameter 'time' can't be 'None' if 'snak_type' is 'value'") - - self.json_representation['datavalue'] = { - 'value': { - 'time': self.time, - 'before': self.before, - 'after': self.after, - 'precision': self.precision, - 'timezone': self.timezone, - 'calendarmodel': self.calendarmodel - }, - 'type': 'time' - } - - self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) - super(Time, self).set_value(value=self.value) - - def get_sparql_value(self): - return self.time - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(time=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(time=value['time'], prop_nr=jsn['property'], before=value['before'], after=value['after'], precision=value['precision'], timezone=value['timezone'], - calendarmodel=value['calendarmodel']) - - -class Url(BaseDataType): - """ - Implements the Wikibase data type for URL strings - """ - DTYPE = 'url' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The URL to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(Url, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is None: - self.value = value - else: - pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid URL {}".format(value)) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(Url, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 23a222df..6c5ee6f9 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -4,13 +4,13 @@ from functools import lru_cache from itertools import chain -from wikibaseintegrator import wbi_functions from wikibaseintegrator.wbi_config import config class FastRunContainer(object): - def __init__(self, base_data_type, engine, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, ref_handler=None, - case_insensitive=False, debug=False): + def __init__(self, api, base_data_type, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, case_insensitive=False): + self.api = api + self.reconstructed_statements = [] self.rev_lookup = defaultdict(set) self.rev_lookup_ci = defaultdict(set) @@ -23,14 +23,11 @@ def __init__(self, base_data_type, engine, mediawiki_api_url=None, sparql_endpoi self.current_qid = '' self.base_data_type = base_data_type - self.engine = engine self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url self.use_refs = use_refs - self.ref_handler = ref_handler self.case_insensitive = case_insensitive - self.debug = debug if base_filter and any(base_filter): self.base_filter = base_filter @@ -188,7 +185,7 @@ def write_required(self, data: list, cqid=None) -> bool: # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] if len(comp) != len(app_data): - if self.debug: + if self.api.debug: print("failed append: {}".format(p)) return True @@ -463,7 +460,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if self.debug: print(query) - r = wbi_functions.execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + r = self.api.execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] count = int(r[0]['c']['value']) print("Count: {}".format(count)) num_pages = (int(count) // page_size) + 1 @@ -546,7 +543,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if self.debug: print(query) - results = wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + results = self.api.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] self.format_query_results(results, prop_nr) self.update_frc_from_query(results, prop_nr) page_count += 1 @@ -582,7 +579,7 @@ def _query_lang(self, lang: str, lang_data_type: str): if self.debug: print(query) - return wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + return self.api.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] @staticmethod def _process_lang(result: list): @@ -595,9 +592,10 @@ def _process_lang(result: list): @lru_cache(maxsize=100000) def get_prop_datatype(self, prop_nr: str) -> str: - item = self.engine(item_id=prop_nr, sparql_endpoint_url=self.sparql_endpoint_url, mediawiki_api_url=self.mediawiki_api_url, wikibase_url=self.wikibase_url, - debug=self.debug) - return item.entity_metadata['datatype'] + from wikibaseintegrator import WikibaseIntegrator + wbi = WikibaseIntegrator(sparql_endpoint_url=self.sparql_endpoint_url, mediawiki_api_url=self.mediawiki_api_url, wikibase_url=self.wikibase_url, debug=self.debug) + property = wbi.property.get(prop_nr) + return property.datatype def clear(self) -> None: """ diff --git a/wikibaseintegrator/wbi_functions.py b/wikibaseintegrator/wbi_functions.py deleted file mode 100644 index 58825246..00000000 --- a/wikibaseintegrator/wbi_functions.py +++ /dev/null @@ -1,394 +0,0 @@ -import datetime -from time import sleep -from warnings import warn - -import requests - -from wikibaseintegrator import wbi_login -from wikibaseintegrator.wbi_backoff import wbi_backoff -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError - - -def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): - """ - :param method: 'GET' or 'POST' - :param mediawiki_api_url: - :param session: If a session is passed, it will be used. Otherwise a new requests session is created - :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to - `max_retries` times - :type max_retries: int - :param retry_after: Number of seconds to wait before retrying request (see max_retries) - :type retry_after: int - :param kwargs: Passed to requests.request - :return: - """ - - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - - # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) - - if 'data' in kwargs and kwargs['data']: - if 'format' not in kwargs['data']: - kwargs['data'].update({'format': 'json'}) - elif kwargs['data']['format'] != 'json': - raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") - - response = None - session = session if session else requests.session() - for n in range(max_retries): - try: - response = session.request(method, mediawiki_api_url, **kwargs) - except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) - sleep(retry_after) - continue - if response.status_code == 503: - print("service unavailable. sleeping for {} seconds".format(retry_after)) - sleep(retry_after) - continue - - response.raise_for_status() - json_data = response.json() - """ - Mediawiki api response has code = 200 even if there are errors. - rate limit doesn't return HTTP 429 either. may in the future - https://phabricator.wikimedia.org/T172293 - """ - if 'error' in json_data: - # rate limiting - error_msg_names = set() - if 'messages' in json_data['error']: - error_msg_names = set(x.get('name') for x in json_data['error']['messages']) - if 'actionthrottledtext' in error_msg_names: - sleep_sec = int(response.headers.get('retry-after', retry_after)) - print("{}: rate limited. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) - sleep(sleep_sec) - continue - - # maxlag - if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': - sleep_sec = json_data['error'].get('lag', retry_after) - print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) - sleep(sleep_sec) - continue - - # readonly - if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': - print('The Wikibase instance is currently in readonly mode, waiting for {} seconds'.format(retry_after)) - sleep(retry_after) - continue - - # others case - raise MWApiError(response.json() if response else {}) - - # there is no error or waiting. break out of this loop and parse response - break - else: - # the first time I've ever used for - else!! - # else executes if the for loop completes normally. i.e. does not encouter a `break` - # in this case, that means it tried this api call 10 times - raise MWApiError(response.json() if response else {}) - - return json_data - - -def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60): - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if not allow_anonymous: - if login is None: - # Force allow_anonymous as False by default to ask for a login object - raise ValueError("allow_anonymous can't be False and login is None at the same time.") - elif mediawiki_api_url != login.mediawiki_api_url: - raise ValueError("mediawiki_api_url can't be different with the one in the login object.") - - headers = { - 'User-Agent': user_agent - } - - if data is not None: - if login is not None and 'token' not in data: - data.update({'token': login.get_edit_token()}) - - if not allow_anonymous: - # Always assert user if allow_anonymous is False - if 'assert' not in data: - data.update({'assert': 'user'}) - if 'token' in data and data['token'] == '+\\': - raise wbi_login.LoginError("Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously.") - elif 'assert' not in data: - # Always assert anon if allow_anonymous is True - data.update({'assert': 'anon'}) - - login_session = login.get_session() if login is not None else None - - return mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) - - -@wbi_backoff() -def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): - """ - Static method which can be used to execute any SPARQL query - :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes - :param query: The actual SPARQL query string - :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - :param max_retries: The number time this function should retry in case of header reports. - :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. - :param debug: Enable debug output. - :type debug: boolean - :return: The results of the query are returned in JSON format - """ - - sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if prefix: - query = prefix + '\n' + query - - params = { - 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, - 'format': 'json' - } - - headers = { - 'Accept': 'application/sparql-results+json', - 'User-Agent': user_agent - } - - if debug: - print(params['query']) - - for n in range(max_retries): - try: - response = requests.post(sparql_endpoint_url, params=params, headers=headers) - except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) - sleep(retry_after) - continue - if response.status_code == 503: - print("Service unavailable (503). Sleeping for {} seconds".format(retry_after)) - sleep(retry_after) - continue - if response.status_code == 429: - if 'retry-after' in response.headers.keys(): - retry_after = response.headers['retry-after'] - print("Too Many Requests (429). Sleeping for {} seconds".format(retry_after)) - sleep(retry_after) - continue - response.raise_for_status() - results = response.json() - - return results - - -def merge_items(from_id, to_id, ignore_conflicts='', mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): - """ - A static method to merge two items - :param from_id: The QID which should be merged into another item - :type from_id: string with 'Q' prefix - :param to_id: The QID into which another item should be merged - :type to_id: string with 'Q' prefix - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. - :type ignore_conflicts: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ - - params = { - 'action': 'wbmergeitems', - 'fromid': from_id, - 'toid': to_id, - 'format': 'json', - 'bot': '', - 'ignoreconflicts': ignore_conflicts - } - - if config['MAXLAG'] > 0: - params.update({'maxlag': config['MAXLAG']}) - - return mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - -def remove_claims(claim_id, summary=None, revision=None, mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): - """ - Delete an item - :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. - :type claim_id: string - :param summary: Summary for the edit. Will be prepended by an automatically generated comment. - :type summary: str - :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. - :type revision: str - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ - - params = { - 'action': 'wbremoveclaims', - 'claim': claim_id, - 'summary': summary, - 'baserevid': revision, - 'bot': True, - 'format': 'json' - } - - if config['MAXLAG'] > 0: - params.update({'maxlag': config['MAXLAG']}) - - return mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - -def search_entities(search_string, language=None, strict_language=True, search_type='item', mediawiki_api_url=None, max_results=500, dict_result=False, login=None, - allow_anonymous=True, user_agent=None): - """ - Performs a search for entities in the Wikibase instance using labels and aliases. - :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) - :type search_string: str - :param language: The language in which to perform the search. - :type language: str - :param strict_language: Whether to disable language fallback - :type strict_language: bool - :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense - :type search_type: str - :param mediawiki_api_url: Specify the mediawiki_api_url. - :type mediawiki_api_url: str - :param max_results: The maximum number of search results returned. Default 500 - :type max_results: int - :param dict_result: - :type dict_result: boolean - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: The user agent string transmitted in the http header - :type user_agent: str - :return: list - """ - - language = config['DEFAULT_LANGUAGE'] if language is None else language - - params = { - 'action': 'wbsearchentities', - 'search': search_string, - 'language': language, - 'strict_language': strict_language, - 'type': search_type, - 'limit': 50, - 'format': 'json' - } - - cont_count = 0 - results = [] - - while True: - params.update({'continue': cont_count}) - - search_results = mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, - allow_anonymous=allow_anonymous) - - if search_results['success'] != 1: - raise SearchError('Wikibase API wbsearchentities failed') - else: - for i in search_results['search']: - if dict_result: - description = i['description'] if 'description' in i else None - aliases = i['aliases'] if 'aliases' in i else None - results.append({ - 'id': i['id'], - 'label': i['label'], - 'match': i['match'], - 'description': description, - 'aliases': aliases - }) - else: - results.append(i['id']) - - if 'search-continue' not in search_results: - break - else: - cont_count = search_results['search-continue'] - - if cont_count >= max_results: - break - - return results - - -def generate_item_instances(items, mediawiki_api_url=None, login=None, allow_anonymous=True, user_agent=None): - """ - A method which allows for retrieval of a list of Wikidata items or properties. The method generates a list of - tuples where the first value in the tuple is the QID or property ID, whereas the second is the new instance of - wbi_item.Item containing all the data of the item. This is most useful for mass retrieval of items. - :param user_agent: A custom user agent - :type user_agent: str - :param items: A list of QIDs or property IDs - :type items: list - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :return: A list of tuples, first value in the tuple is the QID or property ID string, second value is the instance of wbi_item.Item with the corresponding - item data. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - """ - - from wikibaseintegrator import wbi_item - - assert type(items) == list - - params = { - 'action': 'wbgetentities', - 'ids': '|'.join(items), - 'format': 'json' - } - - reply = mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - item_instances = [] - for qid, v in reply['entities'].items(): - ii = wbi_item.Item(item_id=qid, item_data=v) - ii.mediawiki_api_url = mediawiki_api_url - item_instances.append((qid, ii)) - - return item_instances - - -def get_distinct_value_props(sparql_endpoint_url=None, wikibase_url=None, property_constraint_pid=None, distinct_values_constraint_qid=None): - """ - On wikidata, the default core IDs will be the properties with a distinct values constraint select ?p where {?p wdt:P2302 wd:Q21502410} - See: https://www.wikidata.org/wiki/Help:Property_constraints_portal - https://www.wikidata.org/wiki/Help:Property_constraints_portal/Unique_value - """ - - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid - distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid - - pcpid = property_constraint_pid - dvcqid = distinct_values_constraint_qid - - query = ''' - SELECT ?p WHERE {{ - ?p <{wb_url}/prop/direct/{prop_nr}> <{wb_url}/entity/{entity}> - }} - '''.format(wb_url=wikibase_url, prop_nr=pcpid, entity=dvcqid) - results = execute_sparql_query(query, endpoint=sparql_endpoint_url)['results']['bindings'] - if not results: - warn("Warning: No distinct value properties found\n" + - "Please set P2302 and Q21502410 in your Wikibase or set `core_props` manually.\n" + - "Continuing with no core_props") - return set() - else: - return set(map(lambda x: x['p']['value'].rsplit('/', 1)[-1], results)) diff --git a/wikibaseintegrator/wbi_item.py b/wikibaseintegrator/wbi_item.py deleted file mode 100644 index 177a4072..00000000 --- a/wikibaseintegrator/wbi_item.py +++ /dev/null @@ -1,293 +0,0 @@ -import copy - -from wikibaseintegrator import wbi_functions -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_core import Core -from wikibaseintegrator.wbi_datatype import BaseDataType -from wikibaseintegrator.wbi_exceptions import (IDMissingError, SearchOnlyError) -from wikibaseintegrator.wbi_fastrun import FastRunContainer - - -class Item(Core): - fast_run_store = [] - distinct_value_props = {} - - def __init__(self, item_id='', new_item=False, data=None, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, fast_run=False, fast_run_base_filter=None, - fast_run_use_refs=False, ref_handler=None, global_ref_mode='KEEP_GOOD', good_refs=None, keep_good_ref_statements=False, search_only=False, item_data=None, - user_agent=None, core_props=None, core_prop_match_thresh=0.66, property_constraint_pid=None, distinct_values_constraint_qid=None, fast_run_case_insensitive=False, - debug=False) -> None: - """ - constructor - :param item_id: Wikibase item id - :type item_id: str - :param new_item: This parameter lets the user indicate if a new item should be created - :type new_item: bool - :param data: a dictionary with property strings as keys and the data which should be written to a item as the property values - :type data: list[BaseDataType] or BaseDataType or None - :param mediawiki_api_url: - :type mediawiki_api_url: str - :param sparql_endpoint_url: - :type sparql_endpoint_url: str - :param wikibase_url: - :type wikibase_url: str - :param fast_run: True if this item should be run in fastrun mode, otherwise False. User setting this to True should also specify the - fast_run_base_filter for these item types - :type fast_run: bool - :param fast_run_base_filter: A property value dict determining the Wikibase property and the corresponding value which should be used as a filter for - this item type. Several filter criteria can be specified. The values can be either Wikibase item QIDs, strings or empty strings if the value should - be a variable in SPARQL. - Example: {'P352': '', 'P703': 'Q15978631'} if the basic common type of things this bot runs on is human proteins (specified by Uniprot IDs (P352) - and 'found in taxon' homo sapiens 'Q15978631'). - :type fast_run_base_filter: dict - :param fast_run_use_refs: If `True`, fastrun mode will consider references in determining if a statement should be updated and written to Wikibase. - Otherwise, only the value and qualifiers are used. Default: False - :type fast_run_use_refs: bool - :param ref_handler: This parameter defines a function that will manage the reference handling in a custom manner. This argument should be a function - handle that accepts two arguments, the old/current statement (first argument) and new/proposed/to be written statement (second argument), both of - type: a subclass of BaseDataType. The function should return an new item that is the item to be written. The item's values properties or qualifiers - should not be modified; only references. This function is also used in fastrun mode. This will only be used if the ref_mode is set to "CUSTOM". - :type ref_handler: function - :param global_ref_mode: sets the reference handling mode for an item. Four modes are possible, 'STRICT_KEEP' keeps all references as they are, - 'STRICT_KEEP_APPEND' keeps the references as they are and appends new ones. 'STRICT_OVERWRITE' overwrites all existing references for given. - 'KEEP_GOOD' will use the refs defined in good_refs. 'CUSTOM' will use the function defined in ref_handler - :type global_ref_mode: str - :param good_refs: This parameter lets the user define blocks of good references. It is a list of dictionaries. One block is a dictionary with Wikidata - properties as keys and potential values as the required value for a property. There can be arbitrarily many key: value pairs in one reference block. - Example: [{'P248': 'Q905695', 'P352': None, 'P407': None, 'P1476': None, 'P813': None}] This example contains one good reference block, stated in: - Uniprot, Uniprot ID, title of Uniprot entry, language of work and date when the information has been retrieved. A None type indicates that the value - varies from reference to reference. In this case, only the value for the Wikidata item for the Uniprot database stays stable over all of these - references. Key value pairs work here, as Wikidata references can hold only one value for one property. The number of good reference blocks is not - limited. This parameter OVERRIDES any other reference mode set!! - :type good_refs: list[dict] - :param keep_good_ref_statements: Do not delete any statement which has a good reference, either defined in the good_refs list or by any other - referencing mode. - :type keep_good_ref_statements: bool - :param search_only: If this flag is set to True, the data provided will only be used to search for the corresponding Wikibase item, but no actual data - updates will performed. This is useful, if certain states or values on the target item need to be checked before certain data is written to it. In - order to write new data to the item, the method update() will take data, modify the Wikibase item and a write() call will then perform the actual - write to the Wikibase instance. - :type search_only: bool - :param item_data: A Python JSON object corresponding to the item in item_id. This can be used in conjunction with item_id in order to provide raw data. - :type item_data: - :param user_agent: The user agent string to use when making http requests - :type user_agent: str - :param core_props: Core properties are used to retrieve an item based on `data` if a `item_id` is not given. This is a set of PIDs to use. If None, - all Wikibase properties with a distinct values constraint will be used. (see: get_core_props) - :type core_props: set - :param core_prop_match_thresh: The proportion of core props that must match during retrieval of an item when the item_id is not specified. - :type core_prop_match_thresh: float - :param property_constraint_pid: - :param distinct_values_constraint_qid: - :param fast_run_case_insensitive: - :param debug: Enable debug output. - :type debug: boolean - """ - - super().__init__() - self.core_prop_match_thresh = core_prop_match_thresh - self.item_id = item_id - self.new_item = new_item - self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url - self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - self.property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid - self.distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid - if data is None: - self.data = [] - elif isinstance(data, list) and all(isinstance(x, BaseDataType) for x in data): - self.data = data - elif isinstance(data, BaseDataType): - self.data = [data] - else: - raise TypeError("`data` must be a list of BaseDataType or an instance of BaseDataType") - self.fast_run = fast_run - self.fast_run_base_filter = fast_run_base_filter - self.fast_run_use_refs = fast_run_use_refs - self.fast_run_case_insensitive = fast_run_case_insensitive - self.ref_handler = ref_handler - self.global_ref_mode = global_ref_mode - self.good_refs = good_refs - self.keep_good_ref_statements = keep_good_ref_statements - self.search_only = search_only - self.item_data = item_data - self.user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - self.statements = [] - self.original_statements = [] - self.entity_metadata = {} - self.fast_run_container = None - if self.search_only: - self.require_write = False - else: - self.require_write = True - self.sitelinks = {} - self.lastrevid = None # stores last revisionid after a write occurs - - if fast_run_case_insensitive and not self.search_only: - raise ValueError("If using fast run case insensitive, search_only must be set") - - if self.ref_handler and not callable(self.ref_handler): - raise TypeError("ref_handler must be callable") - if self.global_ref_mode == 'CUSTOM' and self.ref_handler is None: - raise ValueError("If using a custom ref mode, ref_handler must be set") - - if (core_props is None) and (self.sparql_endpoint_url not in Item.distinct_value_props): - Item.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, - self.wikibase_url, - self.property_constraint_pid, - self.distinct_values_constraint_qid) - self.core_props = core_props if core_props is not None else Item.distinct_value_props[self.sparql_endpoint_url] - - if self.fast_run: - self.init_fastrun() - if self.debug: - if self.require_write: - if self.search_only: - print("Successful fastrun, search_only mode, we can't determine if data is up to date.") - else: - print("Successful fastrun, because no full data match you need to update the item.") - else: - print("Successful fastrun, no write to Wikibase instance required.") - - if self.item_id != '' and self.create_new_item: - raise IDMissingError("Cannot create a new item, when an identifier is given.") - elif self.new_item and len(self.data) > 0: - self.create_new_item = True - self.__construct_claim_json() - elif self.require_write or self.search_only: - self.init_data_load() - - def init_fastrun(self): - # We search if we already have a FastRunContainer with the same parameters to re-use it - for c in Item.fast_run_store: - if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): - self.fast_run_container = c - self.fast_run_container.ref_handler = self.ref_handler - self.fast_run_container.current_qid = '' - self.fast_run_container.base_data_type = BaseDataType - self.fast_run_container.engine = self.__class__ - self.fast_run_container.mediawiki_api_url = self.mediawiki_api_url - self.fast_run_container.wikibase_url = self.wikibase_url - self.fast_run_container.debug = self.debug - if self.debug: - print("Found an already existing FastRunContainer") - - if not self.fast_run_container: - self.fast_run_container = FastRunContainer(base_filter=self.fast_run_base_filter, - base_data_type=BaseDataType, - engine=self.__class__, - sparql_endpoint_url=self.sparql_endpoint_url, - mediawiki_api_url=self.mediawiki_api_url, - wikibase_url=self.wikibase_url, - use_refs=self.fast_run_use_refs, - ref_handler=self.ref_handler, - case_insensitive=self.fast_run_case_insensitive, - debug=self.debug) - Item.fast_run_store.append(self.fast_run_container) - - if not self.search_only: - self.require_write = self.fast_run_container.write_required(self.data, cqid=self.item_id) - # set item id based on fast run data - if not self.require_write and not self.item_id: - self.item_id = self.fast_run_container.current_qid - else: - self.fast_run_container.load_item(self.data) - # set item id based on fast run data - if not self.item_id: - self.item_id = self.fast_run_container.current_qid - - def update(self, data): - """ - This method takes data, and modifies the Wikidata item. This works together with the data already provided via the constructor or if the constructor is - being instantiated with search_only=True. In the latter case, this allows for checking the item data before deciding which new data should be written to - the Wikidata item. The actual write to Wikidata only happens on calling of the write() method. If data has been provided already via the constructor, - data provided via the update() method will be appended to these data. - :param data: A list of Wikidata statment items inheriting from BaseDataType - :type data: list - """ - - if self.search_only: - raise SearchOnlyError - - assert type(data) == list - - self.data.extend(data) - self.statements = copy.deepcopy(self.original_statements) - - if self.debug: - print(self.data) - - if self.fast_run: - self.init_fastrun() - - if self.require_write and self.fast_run: - self.init_data_load() - self.__construct_claim_json() - self.__check_integrity() - elif not self.fast_run: - self.__construct_claim_json() - self.__check_integrity() - - def get_property_list(self): - """ - List of properties on the current item - :return: a list of property ID strings (Pxxxx). - """ - - property_list = set() - for x in self.statements: - property_list.add(x.get_prop_nr()) - - return list(property_list) - - def get_sitelink(self, site): - """ - A method to access the interwiki links in the json.model - :param site: The Wikipedia site the interwiki/sitelink should be returned for - :return: The interwiki/sitelink string for the specified Wikipedia will be returned. - """ - - if site in self.sitelinks: - return self.sitelinks[site] - else: - return None - - def set_sitelink(self, site, title, badges=()): - """ - Set sitelinks to corresponding Wikipedia pages - :param site: The Wikipedia page a sitelink is directed to (e.g. 'enwiki') - :param title: The title of the Wikipedia page the sitelink is directed to - :param badges: An iterable containing Wikipedia badge strings. - :return: - """ - - if self.search_only: - raise SearchOnlyError - - sitelink = { - 'site': site, - 'title': title, - 'badges': badges - } - self.json_representation['sitelinks'][site] = sitelink - self.sitelinks[site] = sitelink - - def count_references(self, prop_id): - counts = {} - for claim in self.get_json_representation()['claims'][prop_id]: - counts[claim['id']] = len(claim['references']) - return counts - - def get_reference_properties(self, prop_id): - references = [] - statements = [x for x in self.get_json_representation()['claims'][prop_id] if 'references' in x] - for statement in statements: - for reference in statement['references']: - references.append(reference['snaks'].keys()) - return references - - def get_qualifier_properties(self, prop_id): - qualifiers = [] - for statements in self.get_json_representation()['claims'][prop_id]: - qualifiers.append(statements['qualifiers'].keys()) - return qualifiers diff --git a/wikibaseintegrator/wbi_jsonparser.py b/wikibaseintegrator/wbi_jsonparser.py index c586aa1a..8a0a81bb 100644 --- a/wikibaseintegrator/wbi_jsonparser.py +++ b/wikibaseintegrator/wbi_jsonparser.py @@ -70,7 +70,7 @@ def __call__(self, *args): return self.get_class_representation(jsn=self.json_representation) def get_class_representation(self, jsn): - from wikibaseintegrator.wbi_datatype import BaseDataType + from wikibaseintegrator.datatypes.basedatatype import BaseDataType data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == jsn['datatype']][0] self.final = True self.current_type = data_type diff --git a/wikibaseintegrator/wbi_lexeme.py b/wikibaseintegrator/wbi_lexeme.py deleted file mode 100644 index 21ab5595..00000000 --- a/wikibaseintegrator/wbi_lexeme.py +++ /dev/null @@ -1,5 +0,0 @@ -from wikibaseintegrator.wbi_core import Core - - -class Lexeme(Core): - pass diff --git a/wikibaseintegrator/wbi_property.py b/wikibaseintegrator/wbi_property.py deleted file mode 100644 index da338d28..00000000 --- a/wikibaseintegrator/wbi_property.py +++ /dev/null @@ -1,5 +0,0 @@ -from wikibaseintegrator.wbi_core import Core - - -class Property(Core): - pass diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index a09a9407..7a5c0b17 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -1,39 +1,35 @@ -from wikibaseintegrator.wbi_core import Core -from wikibaseintegrator.wbi_item import Item -from wikibaseintegrator.wbi_lexeme import Lexeme -from wikibaseintegrator.wbi_property import Property - -DEFAULT_CONFIG = { - 'BACKOFF_MAX_TRIES': None, - 'BACKOFF_MAX_VALUE': 3600, - 'USER_AGENT_DEFAULT': "WikibaseIntegrator/{} (https://github.com/LeMyst/WikibaseIntegrator)".format(__version__), - 'MAXLAG': 5, - 'PROPERTY_CONSTRAINT_PID': 'P2302', - 'DISTINCT_VALUES_CONSTRAINT_QID': 'Q21502410', - 'COORDINATE_GLOBE_QID': 'http://www.wikidata.org/entity/Q2', - 'CALENDAR_MODEL_QID': 'http://www.wikidata.org/entity/Q1985727', - 'MEDIAWIKI_API_URL': 'https://www.wikidata.org/w/api.php', - 'MEDIAWIKI_INDEX_URL': 'https://www.wikidata.org/w/index.php', - 'MEDIAWIKI_REST_URL': 'https://www.wikidata.org/w/rest.php', - 'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql', - 'WIKIBASE_URL': 'http://www.wikidata.org', - 'LANGUAGE': 'en' -} +from wikibaseintegrator.wbi_api import Api +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.entities.item import Item +from wikibaseintegrator.entities.lexeme import Lexeme +from wikibaseintegrator.entities.property import Property class WikibaseIntegrator(object): def __init__(self, - mediawiki_api_url=DEFAULT_CONFIG["MEDIAWIKI_API_URL"], - mediawiki_index_url=DEFAULT_CONFIG["MEDIAWIKI_INDEX_URL"], - mediawiki_rest_url=DEFAULT_CONFIG["MEDIAWIKI_REST_URL"], - sparql_endpoint_url=DEFAULT_CONFIG["MEDIAWIKI_API_URL"], - wikibase_url=DEFAULT_CONFIG["WIKIBASE_URL"], + mediawiki_api_url=config["MEDIAWIKI_API_URL"], + mediawiki_index_url=config["MEDIAWIKI_INDEX_URL"], + mediawiki_rest_url=config["MEDIAWIKI_REST_URL"], + sparql_endpoint_url=config["SPARQL_ENDPOINT_URL"], + wikibase_url=config["WIKIBASE_URL"], + property_constraint_pid=config["PROPERTY_CONSTRAINT_PID"], + distinct_values_constraint_qid=config["DISTINCT_VALUES_CONSTRAINT_QID"], + search_only=False, + fast_run=False, + fast_run_base_filter=None, + fast_run_use_refs=False, + fast_run_case_insensitive=False, is_bot=False, - language=DEFAULT_CONFIG["LANGUAGE"]): - core = Core() - - self.core = core + language=config["DEFAULT_LANGUAGE"], + login=None, + debug=False): + self.debug = debug + self.api = Api(mediawiki_api_url=mediawiki_api_url, mediawiki_index_url=mediawiki_index_url, mediawiki_rest_url=mediawiki_rest_url, sparql_endpoint_url=sparql_endpoint_url, + wikibase_url=wikibase_url, property_constraint_pid=property_constraint_pid, distinct_values_constraint_qid=distinct_values_constraint_qid, + search_only=search_only, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter, fast_run_use_refs=fast_run_use_refs, + fast_run_case_insensitive=fast_run_case_insensitive, is_bot=is_bot, language=language, login=login, debug=self.debug) - self.item = Item(core) - self.property = Property(core) - self.lexeme = Lexeme(core) + self.item = Item(api=self.api) + self.property = Property(api=self.api) + self.lexeme = Lexeme(api=self.api) + # self.functions = Functions() From 5dc40c4f4228714d3dbb2b326fa35f407b1e87e9 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 26 May 2021 18:41:01 +0200 Subject: [PATCH 008/308] Change to datatype and snaktype --- wikibaseintegrator/datatypes/basedatatype.py | 30 +++++++++---------- wikibaseintegrator/datatypes/commonsmedia.py | 6 ++-- wikibaseintegrator/datatypes/externalid.py | 6 ++-- wikibaseintegrator/datatypes/form.py | 6 ++-- wikibaseintegrator/datatypes/geoshape.py | 6 ++-- .../datatypes/globecoordinate.py | 6 ++-- wikibaseintegrator/datatypes/item.py | 6 ++-- wikibaseintegrator/datatypes/lexeme.py | 6 ++-- wikibaseintegrator/datatypes/math.py | 6 ++-- .../datatypes/monolingualtext.py | 10 +++---- .../datatypes/musicalnotation.py | 6 ++-- wikibaseintegrator/datatypes/property.py | 6 ++-- wikibaseintegrator/datatypes/quantity.py | 10 +++---- wikibaseintegrator/datatypes/sense.py | 6 ++-- wikibaseintegrator/datatypes/string.py | 6 ++-- wikibaseintegrator/datatypes/tabulardata.py | 6 ++-- wikibaseintegrator/datatypes/time.py | 10 +++---- wikibaseintegrator/datatypes/url.py | 6 ++-- wikibaseintegrator/wbi_jsonparser.py | 6 ++-- 19 files changed, 75 insertions(+), 75 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index df3d1ba9..75d8bf85 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -23,11 +23,11 @@ def __init__(self, value, prop_nr, **kwargs): :type value: str or int or tuple :param prop_nr: The property number a Wikibase snak belongs to :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int - :param data_type: The Wikibase data type declaration of this snak - :type data_type: str - :param snak_type: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or + :param datatype: The Wikibase data type declaration of this snak + :type datatype: str + :param snaktype: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or unknown (somevalue). See Wikibase documentation. - :type snak_type: a str of either 'value', 'novalue' or 'somevalue' + :type snaktype: a str of either 'value', 'novalue' or 'somevalue' :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, e.g. references=[[, ], []] This will create two references, the first one with two statements, the second with one @@ -48,8 +48,8 @@ def __init__(self, value, prop_nr, **kwargs): """ self.value = value - self.data_type = kwargs.pop('data_type', self.DTYPE) - self.snak_type = kwargs.pop('snak_type', 'value') + self.datatype = kwargs.pop('datatype', self.DTYPE) + self.snaktype = kwargs.pop('snaktype', 'value') self.references = kwargs.pop('references', None) self.qualifiers = kwargs.pop('qualifiers', None) self.is_reference = kwargs.pop('is_reference', None) @@ -95,20 +95,20 @@ def __init__(self, value, prop_nr, **kwargs): self.hash = '' self.json_representation = { - 'snaktype': self.snak_type, + 'snaktype': self.snaktype, 'property': self.prop_nr, 'datavalue': {}, - 'datatype': self.data_type + 'datatype': self.datatype } - if self.snak_type not in ['value', 'novalue', 'somevalue']: - raise ValueError('{} is not a valid snak type'.format(self.snak_type)) + if self.snaktype not in ['value', 'novalue', 'somevalue']: + raise ValueError('{} is not a valid snak type'.format(self.snaktype)) if self.if_exists not in ['REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP']: raise ValueError('{} is not a valid if_exists value'.format(self.if_exists)) - if self.value is None and self.snak_type == 'value': - raise ValueError('Parameter \'value\' can\'t be \'None\' if \'snak_type\' is \'value\'') + if self.value is None and self.snaktype == 'value': + raise ValueError('Parameter \'value\' can\'t be \'None\' if \'snaktype\' is \'value\'') if self.is_qualifier and self.is_reference: raise ValueError('A claim cannot be a reference and a qualifer at the same time') @@ -165,9 +165,9 @@ def get_sparql_value(self): return self.value def set_value(self, value): - if value is None and self.snak_type not in {'novalue', 'somevalue'}: - raise ValueError("If 'value' is None, snak_type must be novalue or somevalue") - if self.snak_type in {'novalue', 'somevalue'}: + if value is None and self.snaktype not in {'novalue', 'somevalue'}: + raise ValueError("If 'value' is None, snaktype must be novalue or somevalue") + if self.snaktype in {'novalue', 'somevalue'}: del self.json_representation['datavalue'] elif 'datavalue' not in self.json_representation: self.json_representation['datavalue'] = {} diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py index 1e69754e..361a029a 100644 --- a/wikibaseintegrator/datatypes/commonsmedia.py +++ b/wikibaseintegrator/datatypes/commonsmedia.py @@ -19,8 +19,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -50,5 +50,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/externalid.py b/wikibaseintegrator/datatypes/externalid.py index 5dbc9d2d..bc4f2d8a 100644 --- a/wikibaseintegrator/datatypes/externalid.py +++ b/wikibaseintegrator/datatypes/externalid.py @@ -19,8 +19,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -48,5 +48,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index ec66a9fe..7bd52523 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -68,5 +68,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 6e7eebe5..90acaa03 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -64,5 +64,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 400e3b43..b89b7248 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -30,8 +30,8 @@ def __init__(self, latitude, longitude, precision, prop_nr, globe=None, wikibase :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -83,7 +83,7 @@ def get_sparql_value(self): def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': return cls(latitude=None, longitude=None, precision=None, prop_nr=jsn['property'], - snak_type=jsn['snaktype']) + snaktype=jsn['snaktype']) value = jsn['datavalue']['value'] return cls(latitude=value['latitude'], longitude=value['longitude'], precision=value['precision'], diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 2c59bbaf..8de4a88a 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -71,5 +71,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 182ace6c..454c880b 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -71,5 +71,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/math.py b/wikibaseintegrator/datatypes/math.py index 2f586aeb..7dfd9a7f 100644 --- a/wikibaseintegrator/datatypes/math.py +++ b/wikibaseintegrator/datatypes/math.py @@ -19,8 +19,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -48,5 +48,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index bcdc3e68..69a21846 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -28,8 +28,8 @@ def __init__(self, text, prop_nr, language=None, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -51,8 +51,8 @@ def set_value(self, value): self.text, self.language = value if self.text is not None: assert isinstance(self.text, str) or self.text is None, "Expected str, found {} ({})".format(type(self.text), self.text) - elif self.snak_type == 'value': - raise ValueError("Parameter 'text' can't be 'None' if 'snak_type' is 'value'") + elif self.snaktype == 'value': + raise ValueError("Parameter 'text' can't be 'None' if 'snaktype' is 'value'") assert isinstance(self.language, str), "Expected str, found {} ({})".format(type(self.language), self.language) self.json_representation['datavalue'] = { @@ -73,7 +73,7 @@ def get_sparql_value(self): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(text=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(text=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) value = jsn['datavalue']['value'] return cls(text=value['text'], prop_nr=jsn['property'], language=value['language']) diff --git a/wikibaseintegrator/datatypes/musicalnotation.py b/wikibaseintegrator/datatypes/musicalnotation.py index e8a4a380..6d3ff2c0 100644 --- a/wikibaseintegrator/datatypes/musicalnotation.py +++ b/wikibaseintegrator/datatypes/musicalnotation.py @@ -19,8 +19,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -48,5 +48,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index aad1de1b..6ecfff39 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -71,5 +71,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 400af310..caac7d5d 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -32,8 +32,8 @@ def __init__(self, quantity, prop_nr, upper_bound=None, lower_bound=None, unit=' :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -83,8 +83,8 @@ def set_value(self, value): if self.upper_bound and float(self.upper_bound) < float(self.quantity): raise ValueError("Upper bound too small") - elif self.snak_type == 'value': - raise ValueError("Parameter 'quantity' can't be 'None' if 'snak_type' is 'value'") + elif self.snaktype == 'value': + raise ValueError("Parameter 'quantity' can't be 'None' if 'snaktype' is 'value'") self.json_representation['datavalue'] = { 'value': { @@ -113,7 +113,7 @@ def get_sparql_value(self): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(quantity=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(quantity=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) value = jsn['datavalue']['value'] upper_bound = value['upperBound'] if 'upperBound' in value else None diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index a1633bc7..3c7ce86b 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -68,5 +68,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 6ec2bd4f..08af62b6 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -20,8 +20,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -49,5 +49,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index 91d4b58f..182fbc9a 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -19,8 +19,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -56,5 +56,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index c08e1c87..acbd12e1 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -41,8 +41,8 @@ def __init__(self, time, prop_nr, before=0, after=0, precision=11, timezone=0, c :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -84,8 +84,8 @@ def set_value(self, value): self.value = value if self.precision < 0 or self.precision > 15: raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - elif self.snak_type == 'value': - raise ValueError("Parameter 'time' can't be 'None' if 'snak_type' is 'value'") + elif self.snaktype == 'value': + raise ValueError("Parameter 'time' can't be 'None' if 'snaktype' is 'value'") self.json_representation['datavalue'] = { 'value': { @@ -109,7 +109,7 @@ def get_sparql_value(self): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(time=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(time=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) value = jsn['datavalue']['value'] return cls(time=value['time'], prop_nr=jsn['property'], before=value['before'], after=value['after'], precision=value['precision'], timezone=value['timezone'], diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index f6df7a6d..cc129774 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -27,8 +27,8 @@ def __init__(self, value, prop_nr, **kwargs): :type is_reference: boolean :param is_qualifier: Whether this snak is a qualifier :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str :param references: List with reference objects :type references: A data type with subclass of BaseDataType :param qualifiers: List with qualifier objects @@ -64,5 +64,5 @@ def set_value(self, value): @JsonParser def from_json(cls, jsn): if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) + return cls(value=None, prop_nr=jsn['property'], snaktype=jsn['snaktype']) return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/wbi_jsonparser.py b/wikibaseintegrator/wbi_jsonparser.py index 8a0a81bb..b9caae12 100644 --- a/wikibaseintegrator/wbi_jsonparser.py +++ b/wikibaseintegrator/wbi_jsonparser.py @@ -35,7 +35,7 @@ def __call__(self, *args): for prop_ref in jsn: ref_class = self.get_class_representation(prop_ref) ref_class.is_reference = True - ref_class.snak_type = prop_ref['snaktype'] + ref_class.snaktype = prop_ref['snaktype'] ref_class.set_hash(ref_hash) self.references[count].append(copy.deepcopy(ref_class)) @@ -50,7 +50,7 @@ def __call__(self, *args): qual_class = self.get_class_representation(qual) qual_class.is_qualifier = True - qual_class.snak_type = qual['snaktype'] + qual_class.snaktype = qual['snaktype'] qual_class.set_hash(qual_hash) self.qualifiers.append(qual_class) @@ -62,7 +62,7 @@ def __call__(self, *args): mainsnak.set_id(json_representation['id']) if 'rank' in json_representation: mainsnak.set_rank(json_representation['rank']) - mainsnak.snak_type = json_representation['mainsnak']['snaktype'] + mainsnak.snaktype = json_representation['mainsnak']['snaktype'] return mainsnak From 5532e41e0101e3ea539643854d0577995dc2c878 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 20 Jun 2021 22:40:11 +0200 Subject: [PATCH 009/308] Lot of changes 2 --- .gitignore | 3 +- .../inspectionProfiles/WikibaseIntegrator.xml | 9 + setup.cfg | 2 +- test/test_all.py | 30 +-- test/test_wbi_core.py | 181 +++++++++++++ test/test_wbi_core.py.old | 184 ------------- ...wbi_fastrun.py.old => test_wbi_fastrun.py} | 49 ++-- wikibaseintegrator/__init__.py | 1 + wikibaseintegrator/datatypes/basedatatype.py | 151 +++-------- wikibaseintegrator/datatypes/quantity.py | 20 +- wikibaseintegrator/datatypes/tabulardata.py | 2 + wikibaseintegrator/entities/baseentity.py | 251 +++++++----------- wikibaseintegrator/entities/item.py | 15 +- wikibaseintegrator/entities/lexeme.py | 2 + wikibaseintegrator/entities/property.py | 2 + wikibaseintegrator/models/__init__.py | 11 + wikibaseintegrator/models/aliases.py | 16 +- wikibaseintegrator/models/claims.py | 232 ++++++++++++++-- wikibaseintegrator/models/language_values.py | 39 ++- wikibaseintegrator/models/qualifiers.py | 23 -- wikibaseintegrator/models/qualifiers.py.old | 32 +++ wikibaseintegrator/models/references.py | 102 +++++++ wikibaseintegrator/models/snaks.py | 92 +++++++ wikibaseintegrator/wbi_api.py | 98 ++----- wikibaseintegrator/wbi_fastrun.py | 80 +++--- wikibaseintegrator/wbi_helpers.py | 14 + wikibaseintegrator/wikibaseintegrator.py | 41 +-- 27 files changed, 960 insertions(+), 722 deletions(-) create mode 100644 test/test_wbi_core.py delete mode 100644 test/test_wbi_core.py.old rename test/{test_wbi_fastrun.py.old => test_wbi_fastrun.py} (72%) delete mode 100644 wikibaseintegrator/models/qualifiers.py create mode 100644 wikibaseintegrator/models/qualifiers.py.old create mode 100644 wikibaseintegrator/models/references.py create mode 100644 wikibaseintegrator/models/snaks.py create mode 100644 wikibaseintegrator/wbi_helpers.py diff --git a/.gitignore b/.gitignore index 11780056..1713957b 100644 --- a/.gitignore +++ b/.gitignore @@ -155,4 +155,5 @@ fabric.properties .idea/caches/build_file_checksums.ser # Other stuff -test*.py +/test*.py +config.py diff --git a/.idea/inspectionProfiles/WikibaseIntegrator.xml b/.idea/inspectionProfiles/WikibaseIntegrator.xml index c122a9d0..ecbc972c 100644 --- a/.idea/inspectionProfiles/WikibaseIntegrator.xml +++ b/.idea/inspectionProfiles/WikibaseIntegrator.xml @@ -41,6 +41,15 @@ + + + From 04796a7e2250db78269525ec7e120ca15a62311a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 12 Aug 2021 17:28:47 +0200 Subject: [PATCH 045/308] Add MediaInfo support (Wikimedia Commons) (#175) * Add MediaInfo support (Wikimedia Commons) Tests are not OK because I use * Fix tests --- test/test_all.py | 8 +++ wikibaseintegrator/entities/baseentity.py | 11 +++- wikibaseintegrator/entities/mediainfo.py | 73 +++++++++++++++++++++++ wikibaseintegrator/models/snaks.py | 3 +- wikibaseintegrator/wikibaseintegrator.py | 2 + 5 files changed, 93 insertions(+), 4 deletions(-) create mode 100644 wikibaseintegrator/entities/mediainfo.py diff --git a/test/test_all.py b/test/test_all.py index 73444b7a..79f2c7d9 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -195,3 +195,11 @@ def test_ref_equals(): assert not olditem.equals(newitem, include_ref=True) olditem.references.add(datatypes.ExternalID(value='99999', prop_nr='P352')) assert olditem.equals(newitem, include_ref=True) + + +def test_mediainfo(): + mediainfo_item_by_title = wbi.mediainfo.get_by_title(title='File:2018-07-05-budapest-buda-hill.jpg', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') + assert mediainfo_item_by_title.id == 'M75908279' + + mediainfo_item_by_id = wbi.mediainfo.get(entity_id='M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') + assert mediainfo_item_by_id.id == 'M75908279' diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 45fc08d5..2e98f36c 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -47,6 +47,8 @@ def get_json(self) -> {}: 'id': self.id, 'claims': self.claims.get_json() } + if self.type == 'mediainfo': + json_data['statements'] = json_data.pop('claims') if not self.id: del json_data['id'] @@ -61,9 +63,12 @@ def from_json(self, json_data): self.lastrevid = json_data['lastrevid'] self.type = json_data['type'] self.id = json_data['id'] - self.claims = Claims().from_json(json_data['claims']) + if self.type == 'mediainfo': # 'claims' is named 'statements' in Wikimedia Commons MediaInfo + self.claims = Claims().from_json(json_data['statements']) + else: + self.claims = Claims().from_json(json_data['claims']) - def get(self, entity_id): + def get(self, entity_id, **kwargs): """ retrieve an item in json representation from the Wikibase instance :rtype: dict @@ -76,7 +81,7 @@ def get(self, entity_id): 'format': 'json' } - return self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True) + return self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) def _write(self, data=None, summary='', allow_anonymous=False): """ diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py new file mode 100644 index 00000000..49c3d130 --- /dev/null +++ b/wikibaseintegrator/entities/mediainfo.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels + + +class MediaInfo(BaseEntity): + ETYPE = 'mediainfo' + + def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: + """ + + :param api: + :param labels: + :param descriptions: + :param aliases: + :param sitelinks: + :param kwargs: + """ + self.api = api + + super(MediaInfo, self).__init__(api=self.api, **kwargs) + + # Item and property specific + self.labels = labels or Labels() + self.descriptions = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + def new(self, **kwargs) -> MediaInfo: + return MediaInfo(self.api, **kwargs) + + def get(self, entity_id, **kwargs) -> MediaInfo: + json_data = super(MediaInfo, self).get(entity_id=entity_id, **kwargs) + return MediaInfo(self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: + params = { + 'action': 'wbgetentities', + 'sites': sites, + 'titles': title, + 'format': 'json' + } + + json_data = self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) + + if len(json_data['entities'].keys()) == 0: + raise Exception('Title not found') + if len(json_data['entities'].keys()) > 1: + raise Exception('More than one element for this title') + + return MediaInfo(self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) + + def get_json(self) -> {}: + return { + 'labels': self.labels.get_json(), + 'descriptions': self.descriptions.get_json(), + 'aliases': self.aliases.get_json(), + **super(MediaInfo, self).get_json() + } + + def from_json(self, json_data) -> MediaInfo: + super(MediaInfo, self).from_json(json_data=json_data) + + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + + return self + + def write(self, allow_anonymous=False): + json_data = super(MediaInfo, self)._write(data=self.get_json(), allow_anonymous=allow_anonymous) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index cd73453e..60f6e4c5 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -124,7 +124,8 @@ def from_json(self, json_data) -> Snak: self.hash = json_data['hash'] if 'datavalue' in json_data: self.datavalue = json_data['datavalue'] - self.datatype = json_data['datatype'] + if 'datatype' in json_data: # datatype can be null with MediaInfo + self.datatype = json_data['datatype'] return self def get_json(self) -> {}: diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index 017d818e..2df2a0db 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -1,5 +1,6 @@ from wikibaseintegrator.entities.item import Item from wikibaseintegrator.entities.lexeme import Lexeme +from wikibaseintegrator.entities.mediainfo import MediaInfo from wikibaseintegrator.entities.property import Property from wikibaseintegrator.wbi_helpers import Helpers @@ -18,6 +19,7 @@ def __init__(self, self.item = Item(api=self) self.property = Property(api=self) self.lexeme = Lexeme(api=self) + self.mediainfo = MediaInfo(api=self) # Helpers self.helpers = Helpers() From 6b5cd060a4d64ac2d9e49ce8582df69800a44db1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 12 Aug 2021 22:58:27 +0200 Subject: [PATCH 046/308] Update baseentity.py Add information --- wikibaseintegrator/entities/baseentity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 2e98f36c..cec3eb58 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -47,7 +47,7 @@ def get_json(self) -> {}: 'id': self.id, 'claims': self.claims.get_json() } - if self.type == 'mediainfo': + if self.type == 'mediainfo': # MediaInfo change name of 'claims' to 'statements' json_data['statements'] = json_data.pop('claims') if not self.id: del json_data['id'] From 8e90997a05318c9713db3f48e798806f7ea069f6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 15 Aug 2021 22:32:33 +0200 Subject: [PATCH 047/308] Update claims.py Add little doc --- wikibaseintegrator/models/claims.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 04ee6705..f4b2adca 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -124,6 +124,7 @@ def __init__(self, **kwargs): self.references = kwargs.pop('references', References()) self.removed = False + # Allow registration of subclasses of Claim into Claim.subclasses def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) cls.subclasses.append(cls) From e82a2d139b16ab5d24c6ccd9ca7107874999b77f Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 15 Aug 2021 22:33:13 +0200 Subject: [PATCH 048/308] Update README.md Quick update of one example --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index e684b2e1..629be354 100644 --- a/README.md +++ b/README.md @@ -67,12 +67,13 @@ for ['Human'](https://www.wikidata.org/entity/Q5)): ```python -from wikibaseintegrator.entities import item +from wikibaseintegrator import WikibaseIntegrator -my_first_wikidata_item = item.Item(item_id='Q5') +wbi = WikibaseIntegrator() +my_first_wikidata_item = wbi.item.get(entity_id='Q5') # to check successful installation and retrieval of the data, you can print the json representation of the item -print(my_first_wikidata_item.get_json_representation()) +print(my_first_wikidata_item.get_json()) ``` # Using a Wikibase instance # From 450adfb6b5dfb13826e3cace70f5f0eeaafcbc6c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 22:56:27 +0200 Subject: [PATCH 049/308] Update technical files --- .idea/WikibaseIntegrator.iml | 1 - LICENSE.txt => LICENSE | 0 2 files changed, 1 deletion(-) rename LICENSE.txt => LICENSE (100%) diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index 63580114..d19b962d 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -6,7 +6,6 @@ - diff --git a/LICENSE.txt b/LICENSE similarity index 100% rename from LICENSE.txt rename to LICENSE From a624135eb515742ef60d67160fa97536a22a9f2a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 22:59:29 +0200 Subject: [PATCH 050/308] Update wbi_login.py Allow warnings support for clientlogin/login --- wikibaseintegrator/wbi_login.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index f381f08a..32e0a45c 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -160,10 +160,10 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ elif debug: print("Successfully logged in as", login_result['login']['lgusername']) - if 'warnings' in login_result: - print("MediaWiki login warnings messages:") - for message in login_result['warnings']: - print("* {}: {}".format(message, login_result['warnings'][message]['*'])) + if 'warnings' in login_result: + print("MediaWiki login warnings messages:") + for message in login_result['warnings']: + print("* {}: {}".format(message, login_result['warnings'][message]['*'])) self.generate_edit_credentials() From f45b0a387f92d2e99237c14dbdaa24f6b9ee1c90 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 22:59:58 +0200 Subject: [PATCH 051/308] Update qualifiers.py --- wikibaseintegrator/models/qualifiers.py | 26 +++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index f67376d3..7061b5a1 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -7,6 +7,26 @@ class Qualifiers: def __init__(self): self.qualifiers = {} + @property + def qualifiers(self): + return self.__qualifiers + + @qualifiers.setter + def qualifiers(self, value): + assert isinstance(value, dict) + self.__qualifiers = value + + def set(self, qualifiers): + if isinstance(qualifiers, list): + for qualifier in qualifiers: + self.add(qualifier) + elif qualifiers is None: + self.qualifiers = {} + else: + self.qualifiers = qualifiers + + return self + def get(self, property=None): return self.qualifiers[property] @@ -44,5 +64,11 @@ def get_json(self) -> {}: json_data[property].append(qualifier.get_json()) return json_data + def __iter__(self): + iterate = [] + for qualifier in self.qualifiers.values(): + iterate.extend(qualifier) + return iter(iterate) + def __len__(self): return len(self.qualifiers) From 1e20bf9632c2aa0b24a8b45a41de6b72dd59d39e Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:04:18 +0200 Subject: [PATCH 052/308] Update README.md --- README.md | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index db81eb4f..47ca2e48 100644 --- a/README.md +++ b/README.md @@ -5,14 +5,6 @@ [![Pyversions](https://img.shields.io/pypi/pyversions/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) [![PyPi](https://img.shields.io/pypi/v/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) -# Breaking changes in v0.12 # - -I am currently doing a big rewrite of this library.
-You can track the progress and ask questions in the Pull Request [#152](https://github.com/LeMyst/WikibaseIntegrator/pull/152).
-It's a work in progress and can heavily change during development..
-This change will break the compatibility with all existing scripts.
-I will continue to maintain the current version (v0.11) even after the release of V0.12+ (if the merge is performed). -
@@ -69,7 +61,6 @@ To test for correct installation, start a Python console and execute the followi for ['Human'](https://www.wikidata.org/entity/Q5)): ```python - from wikibaseintegrator import WikibaseIntegrator wbi = WikibaseIntegrator() @@ -359,8 +350,8 @@ login_instance = wbi_login.Login(user='', pwd='') # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs raw_data = { - '50943': 'ENST00000376197', - '1029': 'ENST00000498124' + '50943': 'ENST00000376197', + '1029': 'ENST00000498124' } for entrez_id, ensembl in raw_data.items(): From d8df441332e9e0da1c28ac552b4b459bcf2edda1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:05:47 +0200 Subject: [PATCH 053/308] Update README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 47ca2e48..00bc6ef1 100644 --- a/README.md +++ b/README.md @@ -72,17 +72,17 @@ print(my_first_wikidata_item.get_json()) # Using a Wikibase instance # -WikibaseIntegrator use Wikidata as default endpoint. To use a Wikibase instance instead, you can overload the +WikibaseIntegrator use Wikidata as default endpoint. To use another Wikibase instance instead, you can overload the wbi_config. -An example for a Wikibase instance installed with [wikibase-docker](https://github.com/wmde/wikibase-docker), add this -to the top of your script: +An example for a Wikibase instance installed with [Wikibase Docker](https://www.mediawiki.org/wiki/Wikibase/Docker), add +this to the top of your script: ```python from wikibaseintegrator.wbi_config import config as wbi_config -wbi_config['MEDIAWIKI_API_URL'] = 'http://localhost:8181/api.php' -wbi_config['SPARQL_ENDPOINT_URL'] = 'http://localhost:8989/bigdata/sparql' +wbi_config['MEDIAWIKI_API_URL'] = 'http://localhost/api.php' +wbi_config['SPARQL_ENDPOINT_URL'] = 'http://localhost/bigdata/sparql' wbi_config['WIKIBASE_URL'] = 'http://wikibase.svc' ``` From aa4a52aba2a4c8742b08de5b73cd28301f1ee275 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:09:06 +0200 Subject: [PATCH 054/308] Update basedatatype.py Redundant __repr__() --- wikibaseintegrator/datatypes/basedatatype.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index f9224f98..79dc2cfc 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -103,11 +103,3 @@ def ref_equal(oldref, newref): return True else: return False - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), - ) From 2c396f22b667372e39658d3b74f3d758eaa5c3aa Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:14:46 +0200 Subject: [PATCH 055/308] Update aliases.py Change test order to correctly check None --- wikibaseintegrator/models/aliases.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index 7c45b148..05f7e732 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -33,20 +33,20 @@ def set(self, language=None, values=None, if_exists='APPEND'): assert language is not None - if isinstance(values, str): - values = [values] - elif not isinstance(values, list) and values is not None: - raise TypeError("value must be a str or list") - if language not in self.aliases: self.aliases[language] = [] - if values is None: + if values is None or values == '': if if_exists != 'KEEP': for alias in self.aliases[language]: alias.remove() return self.aliases[language] + if isinstance(values, str): + values = [values] + elif not isinstance(values, list) and values is not None: + raise TypeError("value must be a str or list") + if if_exists == 'REPLACE': aliases = [] for value in values: From 8539d191005e55b847697042c53c1c752878250c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:17:59 +0200 Subject: [PATCH 056/308] Update item.py Allow to pass parameters to BaseEntity.get() --- wikibaseintegrator/entities/item.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index c3408c81..0bcbad5b 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -35,8 +35,8 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks= def new(self, **kwargs) -> Item: return Item(self.api, **kwargs) - def get(self, entity_id) -> Item: - json_data = super(Item, self).get(entity_id=entity_id) + def get(self, entity_id, **kwargs) -> Item: + json_data = super(Item, self).get(entity_id=entity_id, **kwargs) return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: From 0188d1acc2c232ecc8e8f20f113b605cba6809c6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 21 Aug 2021 23:55:01 +0200 Subject: [PATCH 057/308] Update claims.py --- wikibaseintegrator/models/claims.py | 40 +++++++++++++++++------------ 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index f4b2adca..10832839 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -3,8 +3,9 @@ import copy from typing import Union +from wikibaseintegrator.models.qualifiers import Qualifiers from wikibaseintegrator.models.references import References -from wikibaseintegrator.models.snaks import Snak, Snaks +from wikibaseintegrator.models.snaks import Snak class Claims: @@ -39,6 +40,7 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists='REPLACE') -> C elif not isinstance(claims, list): raise ValueError + # TODO: Don't replace if claim is the same if if_exists == 'REPLACE': for claim in claims: if claim is not None: @@ -46,7 +48,8 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists='REPLACE') -> C property = claim.mainsnak.property_number if property in self.claims: for claim_to_remove in self.claims[property]: - claim_to_remove.remove() + if claim_to_remove not in claims: + claim_to_remove.remove() for claim in claims: if claim is not None: @@ -65,12 +68,18 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists='REPLACE') -> C if claim not in self.claims[property]: self.claims[property].append(claim) elif if_exists == 'REPLACE': - self.claims[property].append(claim) + if claim not in self.claims[property]: + self.claims[property].append(claim) return self - def clear(self): - self.claims = {} + def from_json(self, json_data) -> Claims: + for property in json_data: + for claim in json_data[property]: + # data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == alias['mainsnak']['datatype']][0] + self.add(claims=Claim().from_json(claim), if_exists='FORCE_APPEND') + + return self def get_json(self) -> {}: json_data = {} @@ -81,13 +90,9 @@ def get_json(self) -> {}: json_data[property].append(claim.get_json()) return json_data - def from_json(self, json_data) -> Claims: - for property in json_data: - for claim in json_data[property]: - # data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == alias['mainsnak']['datatype']][0] - self.add(claims=Claim().from_json(claim), if_exists='FORCE_APPEND') - return self + def clear(self): + self.claims = {} def __len__(self): return len(self.claims) @@ -114,13 +119,10 @@ class Claim: def __init__(self, **kwargs): self.mainsnak = Snak(datatype=self.DTYPE) self.type = 'statement' - # self.qualifiers = Snaks() - self.qualifiers = kwargs.pop('qualifiers', Snaks()) + self.qualifiers = kwargs.pop('qualifiers', Qualifiers()) self.qualifiers_order = [] self.id = None - # self.rank = None self.rank = kwargs.pop('rank', 'normal') - # self.references = References() self.references = kwargs.pop('references', References()) self.removed = False @@ -151,7 +153,11 @@ def qualifiers(self): @qualifiers.setter def qualifiers(self, value): - self.__qualifiers = value + assert isinstance(value, (Qualifiers, list)) + if isinstance(value, list): + self.__qualifiers = Qualifiers().set(value) + else: + self.__qualifiers = value @property def qualifiers_order(self): @@ -203,7 +209,7 @@ def from_json(self, json_data) -> Claim: self.mainsnak = Snak().from_json(json_data['mainsnak']) self.type = json_data['type'] if 'qualifiers' in json_data: - self.qualifiers = Snaks().from_json(json_data['qualifiers']) + self.qualifiers = Qualifiers().from_json(json_data['qualifiers']) if 'qualifiers-order' in json_data: self.qualifiers_order = json_data['qualifiers-order'] self.id = json_data['id'] From e1081ac218b51236fa2349e627cab19c300dc1c6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 22 Aug 2021 01:32:20 +0200 Subject: [PATCH 058/308] Update baseentity.py --- wikibaseintegrator/entities/baseentity.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index cec3eb58..68b5a222 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -12,13 +12,13 @@ class BaseEntity(object): ETYPE = 'base-entity' - def __init__(self, api, **kwargs): + def __init__(self, api, lastrevid=None, type=None, id=None, claims=None): self.api = api - self.lastrevid = kwargs.pop('lastrevid', None) - self.type = kwargs.pop('type', self.ETYPE) - self.id = kwargs.pop('id', None) - self.claims = kwargs.pop('claims', Claims()) + self.lastrevid = lastrevid + self.type = type or self.ETYPE + self.id = id + self.claims = claims or Claims() self.json = {} @@ -161,7 +161,7 @@ def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False, if base_filter is None: base_filter = {} - print('Initialize Fast Run') + print('Initialize Fast Run init_fastrun') # We search if we already have a FastRunContainer with the same parameters to re-use it for c in BaseEntity.fast_run_store: if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( From 390bee3f486c5cfcc444e79cefe95af4f8f2bd3a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 22 Aug 2021 01:38:42 +0200 Subject: [PATCH 059/308] Move Helpers functions --- test/test_all.py | 12 +- test/test_wbi_core.py | 6 +- test/test_wbi_login.py | 4 +- wikibaseintegrator/__init__.py | 1 - wikibaseintegrator/datatypes/quantity.py | 10 +- wikibaseintegrator/entities/baseentity.py | 3 +- wikibaseintegrator/wbi_fastrun.py | 13 +- wikibaseintegrator/wbi_helpers.py | 725 +++++++++++----------- wikibaseintegrator/wikibaseintegrator.py | 5 +- 9 files changed, 390 insertions(+), 389 deletions(-) diff --git a/test/test_all.py b/test/test_all.py index 79f2c7d9..d47a16ed 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -7,7 +7,7 @@ from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_helpers import Helpers +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper config['DEBUG'] = True @@ -17,13 +17,13 @@ class TestMediawikiApiCall(unittest.TestCase): def test_all(self): with self.assertRaises(MWApiError): - Helpers.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, - retry_after=1, allow_anonymous=True) + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, + retry_after=1, allow_anonymous=True) with self.assertRaises(requests.HTTPError): - Helpers.mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) + mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) - test = Helpers.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, - allow_anonymous=True) + test = mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, + allow_anonymous=True) print(test) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index c6a948cd..82fba532 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -6,7 +6,7 @@ MusicalNotation, Lexeme, Form, Sense from wikibaseintegrator.entities import Item from wikibaseintegrator.models import LanguageValues -from wikibaseintegrator.wbi_helpers import Helpers +from wikibaseintegrator.wbi_helpers import search_entities, generate_entity_instances wbi = WikibaseIntegrator() @@ -135,14 +135,14 @@ def test_label(self): assert 'remove' in item.get_json()['aliases']['ak'][0] def test_wd_search(self): - t = Helpers.search_entities('rivaroxaban') + t = search_entities('rivaroxaban') print('Number of results: ', len(t)) self.assertIsNot(len(t), 0) def test_entity_generator(self): entities = ['Q408883', 'P715', 'Q18046452'] - entity_instances = Helpers.generate_entity_instances(entities=entities) + entity_instances = generate_entity_instances(entities=entities) for qid, entity in entity_instances: self.assertIn(qid, entities) diff --git a/test/test_wbi_login.py b/test/test_wbi_login.py index ccffeeeb..6a03cc6e 100644 --- a/test/test_wbi_login.py +++ b/test/test_wbi_login.py @@ -4,7 +4,7 @@ import pytest from wikibaseintegrator import wbi_login -from wikibaseintegrator.wbi_helpers import Helpers +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper # look for environment variables. if none set, don't do anything WDUSER = os.getenv("WDUSER") @@ -22,4 +22,4 @@ def test_write(): if WDUSER and WDPASS: login = wbi_login.Login(WDUSER, WDPASS) with pytest.raises(ValueError): - Helpers.mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') + mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index 37da1637..8d1c82c7 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -1,2 +1 @@ -from .wbi_helpers import Helpers from .wikibaseintegrator import WikibaseIntegrator diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 34c53ae2..964832db 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -1,6 +1,6 @@ from wikibaseintegrator.datatypes.basedatatype import BaseDataType from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_helpers import Helpers +from wikibaseintegrator.wbi_helpers import format_amount class Quantity(BaseDataType): @@ -56,12 +56,12 @@ def __init__(self, quantity, upper_bound=None, lower_bound=None, unit='1', wikib self.quantity, self.unit, self.upper_bound, self.lower_bound = value if self.quantity is not None: - self.quantity = Helpers.format_amount(self.quantity) + self.quantity = format_amount(self.quantity) self.unit = str(self.unit) if self.upper_bound: - self.upper_bound = Helpers.format_amount(self.upper_bound) + self.upper_bound = format_amount(self.upper_bound) if self.lower_bound: - self.lower_bound = Helpers.format_amount(self.lower_bound) + self.lower_bound = format_amount(self.lower_bound) # Integrity checks for value and bounds try: @@ -98,4 +98,4 @@ def __init__(self, quantity, upper_bound=None, lower_bound=None, unit='1', wikib self.value = (self.quantity, self.unit, self.upper_bound, self.lower_bound) def get_sparql_value(self): - return Helpers.format_amount(self.quantity) + return format_amount(self.quantity) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 68b5a222..5274a475 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -5,6 +5,7 @@ from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError from wikibaseintegrator.wbi_fastrun import FastRunContainer +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper class BaseEntity(object): @@ -137,7 +138,7 @@ def _write(self, data=None, summary='', allow_anonymous=False): print(payload) try: - json_data = self.api.helpers.mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous) + json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot) if 'error' in json_data and 'messages' in json_data['error']: error_msg_names = set(x.get('name') for x in json_data['error']['messages']) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 9de9271e..e5cb270f 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -6,6 +6,7 @@ from wikibaseintegrator import Helpers from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_helpers import format_amount, execute_sparql_query class FastRunContainer(object): @@ -360,7 +361,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: if i['v']['type'] == 'uri' and prop_dt == 'wikibase-item': i['v'] = i['v']['value'].split('/')[-1] elif i['v']['type'] == 'literal' and prop_dt == 'quantity': - i['v'] = Helpers.format_amount(i['v']['value']) + i['v'] = format_amount(i['v']['value']) else: i['v'] = i['v']['value'] @@ -377,7 +378,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: if i['qval']['type'] == 'uri' and qual_prop_dt == 'wikibase-item': i['qval'] = i['qval']['value'].split('/')[-1] elif i['qval']['type'] == 'literal' and qual_prop_dt == 'quantity': - i['qval'] = Helpers.format_amount(i['qval']['value']) + i['qval'] = format_amount(i['qval']['value']) else: i['qval'] = i['qval']['value'] @@ -387,7 +388,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: if i['rval']['type'] == 'uri' and ref_prop_dt == 'wikibase-item': i['rval'] = i['rval']['value'].split('/')[-1] elif i['rval']['type'] == 'literal' and ref_prop_dt == 'quantity': - i['rval'] = Helpers.format_amount(i['rval']['value']) + i['rval'] = format_amount(i['rval']['value']) else: i['rval'] = i['rval']['value'] @@ -441,7 +442,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if self.debug: print(query) - r = Helpers.execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + r = execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] count = int(r[0]['c']['value']) print("Count: {}".format(count)) num_pages = (int(count) // page_size) + 1 @@ -524,7 +525,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if self.debug: print(query) - results = Helpers.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] self.format_query_results(results, prop_nr) self.update_frc_from_query(results, prop_nr) page_count += 1 @@ -560,7 +561,7 @@ def _query_lang(self, lang: str, lang_data_type: str): if self.debug: print(query) - return Helpers.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + return execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] @staticmethod def _process_lang(result: list): diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 38ffafbc..c6d0cdfc 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -8,400 +8,399 @@ from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError -class Helpers(object): - - @staticmethod - @wbi_backoff() - def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): +def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): + """ + :param method: 'GET' or 'POST' + :param mediawiki_api_url: + :param session: If a session is passed, it will be used. Otherwise a new requests session is created + :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times + :type max_retries: int + :param retry_after: Number of seconds to wait before retrying request (see max_retries) + :type retry_after: int + :param kwargs: Passed to requests.request + :return: + """ + + mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + + # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) + + if 'data' in kwargs and kwargs['data']: + if 'format' not in kwargs['data']: + kwargs['data'].update({'format': 'json'}) + elif kwargs['data']['format'] != 'json': + raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") + + response = None + session = session if session else requests.Session() + for n in range(max_retries): + try: + response = session.request(method, mediawiki_api_url, **kwargs) + except requests.exceptions.ConnectionError as e: + print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + sleep(retry_after) + continue + if response.status_code == 503: + print("service unavailable. sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + + response.raise_for_status() + json_data = response.json() """ - :param method: 'GET' or 'POST' - :param mediawiki_api_url: - :param session: If a session is passed, it will be used. Otherwise a new requests session is created - :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times - :type max_retries: int - :param retry_after: Number of seconds to wait before retrying request (see max_retries) - :type retry_after: int - :param kwargs: Passed to requests.request - :return: + Mediawiki api response has code = 200 even if there are errors. + rate limit doesn't return HTTP 429 either. may in the future + https://phabricator.wikimedia.org/T172293 """ - - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - - # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) - - if 'data' in kwargs and kwargs['data']: - if 'format' not in kwargs['data']: - kwargs['data'].update({'format': 'json'}) - elif kwargs['data']['format'] != 'json': - raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") - - response = None - session = session if session else requests.session() - for n in range(max_retries): - try: - response = session.request(method, mediawiki_api_url, **kwargs) - except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) - sleep(retry_after) + if 'error' in json_data: + # rate limiting + error_msg_names = set() + if 'messages' in json_data['error']: + error_msg_names = set(x.get('name') for x in json_data['error']['messages']) + if 'actionthrottledtext' in error_msg_names: + sleep_sec = int(response.headers.get('retry-after', retry_after)) + print("{}: rate limited. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + sleep(sleep_sec) continue - if response.status_code == 503: - print("service unavailable. sleeping for {} seconds".format(retry_after)) - sleep(retry_after) - continue - - response.raise_for_status() - json_data = response.json() - """ - Mediawiki api response has code = 200 even if there are errors. - rate limit doesn't return HTTP 429 either. may in the future - https://phabricator.wikimedia.org/T172293 - """ - if 'error' in json_data: - # rate limiting - error_msg_names = set() - if 'messages' in json_data['error']: - error_msg_names = set(x.get('name') for x in json_data['error']['messages']) - if 'actionthrottledtext' in error_msg_names: - sleep_sec = int(response.headers.get('retry-after', retry_after)) - print("{}: rate limited. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) - sleep(sleep_sec) - continue - - # maxlag - if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': - sleep_sec = json_data['error'].get('lag', retry_after) - print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) - sleep(sleep_sec) - continue - - # readonly - if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': - print('The Wikibase instance is currently in readonly mode, waiting for {} seconds'.format(retry_after)) - sleep(retry_after) - continue - - # others case - raise MWApiError(response.json() if response else {}) - - # there is no error or waiting. break out of this loop and parse response - break - else: - # the first time I've ever used for - else!! - # else executes if the for loop completes normally. i.e. does not encouter a `break` - # in this case, that means it tried this api call 10 times - raise MWApiError(response.json() if response else {}) - - return json_data - - @staticmethod - def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60): - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if not allow_anonymous: - if login is None: - # Force allow_anonymous as False by default to ask for a login object - raise ValueError("allow_anonymous can't be False and login is None at the same time.") - elif mediawiki_api_url != login.mediawiki_api_url: - raise ValueError("mediawiki_api_url can't be different with the one in the login object.") - - headers = { - 'User-Agent': user_agent - } - - if data is not None: - if login is not None and 'token' not in data: - data.update({'token': login.get_edit_token()}) - elif 'token' not in data: - data.update({'token': '+\\'}) - - if not allow_anonymous: - # Always assert user if allow_anonymous is False - if 'assert' not in data: - data.update({'assert': 'user'}) - if 'token' in data and data['token'] == '+\\': - raise Exception("Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously.") - elif 'assert' not in data: - # Always assert anon if allow_anonymous is True - data.update({'assert': 'anon'}) - if config['MAXLAG'] > 0: - data.update({'maxlag': config['MAXLAG']}) - - login_session = login.get_session() if login is not None else None - - return Helpers.mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) - - @staticmethod - @wbi_backoff() - def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): - """ - Static method which can be used to execute any SPARQL query - :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes - :param query: The actual SPARQL query string - :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - :param max_retries: The number time this function should retry in case of header reports. - :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. - :param debug: Enable debug output. - :type debug: boolean - :return: The results of the query are returned in JSON format - """ - - sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if prefix: - query = prefix + '\n' + query - - params = { - 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, - 'format': 'json' - } - - headers = { - 'Accept': 'application/sparql-results+json', - 'User-Agent': user_agent, - 'Content-Type': 'multipart/form-data' - } - if debug: - print(params['query']) - - for n in range(max_retries): - try: - response = requests.post(sparql_endpoint_url, params=params, headers=headers) - except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) - sleep(retry_after) - continue - if response.status_code == 503: - print("Service unavailable (503). Sleeping for {} seconds".format(retry_after)) - sleep(retry_after) + # maxlag + if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': + sleep_sec = json_data['error'].get('lag', retry_after) + print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + sleep(sleep_sec) continue - if response.status_code == 429: - if 'retry-after' in response.headers.keys(): - retry_after = response.headers['retry-after'] - print("Too Many Requests (429). Sleeping for {} seconds".format(retry_after)) + + # readonly + if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': + print('The Wikibase instance is currently in readonly mode, waiting for {} seconds'.format(retry_after)) sleep(retry_after) continue - response.raise_for_status() - results = response.json() - - return results - @staticmethod - def merge_items(from_id, to_id, ignore_conflicts='', **kwargs): - """ - A static method to merge two items - :param from_id: The QID which should be merged into another item - :type from_id: string with 'Q' prefix - :param to_id: The QID into which another item should be merged - :type to_id: string with 'Q' prefix - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. - :type ignore_conflicts: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ + # others case + raise MWApiError(response.json() if response else {}) - params = { - 'action': 'wbmergeitems', - 'fromid': from_id, - 'toid': to_id, - 'format': 'json', - 'bot': '', - 'ignoreconflicts': ignore_conflicts - } + # there is no error or waiting. break out of this loop and parse response + break + else: + # the first time I've ever used for - else!! + # else executes if the for loop completes normally. i.e. does not encouter a `break` + # in this case, that means it tried this api call 10 times + raise MWApiError(response.json() if response else {}) - return Helpers.mediawiki_api_call_helper(data=params, **kwargs) + return json_data - @staticmethod - def merge_lexemes(source, target, summary=None, **kwargs): - """ - A static method to merge two items - :param source: The QID which should be merged into another item - :type source: string with 'Q' prefix - :param target: The QID into which another item should be merged - :type target: string with 'Q' prefix - """ +def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60, is_bot=False): + mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - params = { - 'action': 'wblmergelexemes', - 'fromid': source, - 'toid': target, - 'format': 'json', - 'bot': '' - } + if not allow_anonymous: + if login is None: + # Force allow_anonymous as False by default to ask for a login object + raise ValueError("allow_anonymous can't be False and login is None at the same time.") + elif mediawiki_api_url != login.mediawiki_api_url: + raise ValueError("mediawiki_api_url can't be different with the one in the login object.") - if summary: - params.update({'summary': summary}) + headers = { + 'User-Agent': user_agent + } - return Helpers.mediawiki_api_call_helper(data=params, **kwargs) + if data is not None: + if login is not None and 'token' not in data: + data.update({'token': login.get_edit_token()}) + elif 'token' not in data: + data.update({'token': '+\\'}) - @staticmethod - def remove_claims(claim_id, summary=None, revision=None, **kwargs): - """ - Delete an item - :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. - :type claim_id: string - :param summary: Summary for the edit. Will be prepended by an automatically generated comment. - :type summary: str - :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. - :type revision: str - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ + if not allow_anonymous: + # Always assert user if allow_anonymous is False + if 'assert' not in data: + if is_bot: + data.update({'assert': 'bot'}) + else: + data.update({'assert': 'user'}) + if 'token' in data and data['token'] == '+\\': + raise Exception("Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously.") + elif 'assert' not in data: + # Always assert anon if allow_anonymous is True + data.update({'assert': 'anon'}) + if config['MAXLAG'] > 0: + data.update({'maxlag': config['MAXLAG']}) + + login_session = login.get_session() if login is not None else None + + return mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) + + +@wbi_backoff() +def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): + """ + Static method which can be used to execute any SPARQL query + :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes + :param query: The actual SPARQL query string + :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + :param max_retries: The number time this function should retry in case of header reports. + :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. + :param debug: Enable debug output. + :type debug: boolean + :return: The results of the query are returned in JSON format + """ + + sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint + user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + + if prefix: + query = prefix + '\n' + query + + params = { + 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, + 'format': 'json' + } + + headers = { + 'Accept': 'application/sparql-results+json', + 'User-Agent': user_agent, + 'Content-Type': 'multipart/form-data' + } + + if debug: + print(params['query']) + + for n in range(max_retries): + try: + response = requests.post(sparql_endpoint_url, params=params, headers=headers) + except requests.exceptions.ConnectionError as e: + print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + sleep(retry_after) + continue + if response.status_code == 503: + print("Service unavailable (503). Sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + if response.status_code == 429: + if 'retry-after' in response.headers.keys(): + retry_after = response.headers['retry-after'] + print("Too Many Requests (429). Sleeping for {} seconds".format(retry_after)) + sleep(retry_after) + continue + response.raise_for_status() + results = response.json() - params = { - 'action': 'wbremoveclaims', - 'claim': claim_id, - 'bot': '', - 'format': 'json' - } + return results - if summary: - params.update({'summary': summary}) - if revision: - params.update({'revision': revision}) +def merge_items(from_id, to_id, ignore_conflicts='', **kwargs): + """ + A static method to merge two items + :param from_id: The QID which should be merged into another item + :type from_id: string with 'Q' prefix + :param to_id: The QID into which another item should be merged + :type to_id: string with 'Q' prefix + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. + :type ignore_conflicts: str + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + """ + + params = { + 'action': 'wbmergeitems', + 'fromid': from_id, + 'toid': to_id, + 'format': 'json', + 'bot': '', + 'ignoreconflicts': ignore_conflicts + } + + return mediawiki_api_call_helper(data=params, **kwargs) + + +def merge_lexemes(source, target, summary=None, **kwargs): + """ + A static method to merge two items + + :param source: The QID which should be merged into another item + :type source: string with 'Q' prefix + :param target: The QID into which another item should be merged + :type target: string with 'Q' prefix + """ + + params = { + 'action': 'wblmergelexemes', + 'fromid': source, + 'toid': target, + 'format': 'json', + 'bot': '' + } + + if summary: + params.update({'summary': summary}) + + return mediawiki_api_call_helper(data=params, **kwargs) + + +def remove_claims(claim_id, summary=None, revision=None, **kwargs): + """ + Delete an item + :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. + :type claim_id: string + :param summary: Summary for the edit. Will be prepended by an automatically generated comment. + :type summary: str + :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. + :type revision: str + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :type user_agent: str + """ + + params = { + 'action': 'wbremoveclaims', + 'claim': claim_id, + 'bot': '', + 'format': 'json' + } + + if summary: + params.update({'summary': summary}) + + if revision: + params.update({'revision': revision}) + + return mediawiki_api_call_helper(data=params, **kwargs) + + +def search_entities(search_string, language=None, strict_language=True, search_type='item', max_results=500, dict_result=False, allow_anonymous=True, **kwargs): + """ + Performs a search for entities in the Wikibase instance using labels and aliases. + :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) + :type search_string: str + :param language: The language in which to perform the search. + :type language: str + :param strict_language: Whether to disable language fallback + :type strict_language: bool + :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense + :type search_type: str + :param mediawiki_api_url: Specify the mediawiki_api_url. + :type mediawiki_api_url: str + :param max_results: The maximum number of search results returned. Default 500 + :type max_results: int + :param dict_result: + :type dict_result: boolean + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + :param user_agent: The user agent string transmitted in the http header + :type user_agent: str + :return: list + """ + + language = config['DEFAULT_LANGUAGE'] if language is None else language + + params = { + 'action': 'wbsearchentities', + 'search': search_string, + 'language': language, + 'strict_language': strict_language, + 'type': search_type, + 'limit': 50, + 'format': 'json' + } + + cont_count = 0 + results = [] + + while True: + params.update({'continue': cont_count}) + + search_results = mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) + + if search_results['success'] != 1: + raise SearchError('Wikibase API wbsearchentities failed') + else: + for i in search_results['search']: + if dict_result: + description = i['description'] if 'description' in i else None + aliases = i['aliases'] if 'aliases' in i else None + results.append({ + 'id': i['id'], + 'label': i['label'], + 'match': i['match'], + 'description': description, + 'aliases': aliases + }) + else: + results.append(i['id']) + + if 'search-continue' not in search_results: + break + else: + cont_count = search_results['search-continue'] - return Helpers.mediawiki_api_call_helper(data=params, **kwargs) + if cont_count >= max_results: + break - @staticmethod - def search_entities(search_string, language=None, strict_language=True, search_type='item', max_results=500, dict_result=False, allow_anonymous=True, **kwargs): - """ - Performs a search for entities in the Wikibase instance using labels and aliases. - :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) - :type search_string: str - :param language: The language in which to perform the search. - :type language: str - :param strict_language: Whether to disable language fallback - :type strict_language: bool - :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense - :type search_type: str - :param mediawiki_api_url: Specify the mediawiki_api_url. - :type mediawiki_api_url: str - :param max_results: The maximum number of search results returned. Default 500 - :type max_results: int - :param dict_result: - :type dict_result: boolean - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: The user agent string transmitted in the http header - :type user_agent: str - :return: list - """ + return results - language = config['DEFAULT_LANGUAGE'] if language is None else language - - params = { - 'action': 'wbsearchentities', - 'search': search_string, - 'language': language, - 'strict_language': strict_language, - 'type': search_type, - 'limit': 50, - 'format': 'json' - } - - cont_count = 0 - results = [] - - while True: - params.update({'continue': cont_count}) - - search_results = Helpers.mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) - - if search_results['success'] != 1: - raise SearchError('Wikibase API wbsearchentities failed') - else: - for i in search_results['search']: - if dict_result: - description = i['description'] if 'description' in i else None - aliases = i['aliases'] if 'aliases' in i else None - results.append({ - 'id': i['id'], - 'label': i['label'], - 'match': i['match'], - 'description': description, - 'aliases': aliases - }) - else: - results.append(i['id']) - - if 'search-continue' not in search_results: - break - else: - cont_count = search_results['search-continue'] - - if cont_count >= max_results: - break - return results +def generate_entity_instances(entities, allow_anonymous=True, **kwargs): + """ + A method which allows for retrieval of a list of Wikidata entities. The method generates a list of tuples where the first value in the tuple is the entity's ID, whereas the + second is the new instance of a subclass of BaseEntity containing all the data of the entity. This is most useful for mass retrieval of entities. + :param user_agent: A custom user agent + :type user_agent: str + :param entities: A list of IDs. Item, Property or Lexeme. + :type entities: list + :param mediawiki_api_url: The MediaWiki url which should be used + :type mediawiki_api_url: str + :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. + :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :type allow_anonymous: bool + """ - @staticmethod - def generate_entity_instances(entities, allow_anonymous=True, **kwargs): - """ - A method which allows for retrieval of a list of Wikidata entities. The method generates a list of tuples where the first value in the tuple is the entity's ID, whereas the - second is the new instance of a subclass of BaseEntity containing all the data of the entity. This is most useful for mass retrieval of entities. - :param user_agent: A custom user agent - :type user_agent: str - :param entities: A list of IDs. Item, Property or Lexeme. - :type entities: list - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - """ + from wikibaseintegrator.entities.baseentity import BaseEntity - from wikibaseintegrator.entities.baseentity import BaseEntity + if isinstance(entities, str): + entities = [entities] - if isinstance(entities, str): - entities = [entities] + assert type(entities) == list - assert type(entities) == list + params = { + 'action': 'wbgetentities', + 'ids': '|'.join(entities), + 'format': 'json' + } - params = { - 'action': 'wbgetentities', - 'ids': '|'.join(entities), - 'format': 'json' - } + reply = mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) - reply = Helpers.mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) + entity_instances = [] + for qid, v in reply['entities'].items(): + from wikibaseintegrator import WikibaseIntegrator + wbi = WikibaseIntegrator() + f = [x for x in BaseEntity.__subclasses__() if x.ETYPE == v['type']][0] + ii = f(api=wbi).from_json(v) + entity_instances.append((qid, ii)) - entity_instances = [] - for qid, v in reply['entities'].items(): - from wikibaseintegrator import WikibaseIntegrator - wbi = WikibaseIntegrator() - f = [x for x in BaseEntity.__subclasses__() if x.ETYPE == v['type']][0] - ii = f(api=wbi).from_json(v) - entity_instances.append((qid, ii)) + return entity_instances - return entity_instances - @staticmethod - def format_amount(amount) -> str: - # Remove .0 by casting to int - if float(amount) % 1 == 0: - amount = int(float(amount)) +def format_amount(amount) -> str: + # Remove .0 by casting to int + if float(amount) % 1 == 0: + amount = int(float(amount)) - # Adding prefix + for positive number and 0 - if not str(amount).startswith('+') and float(amount) >= 0: - amount = str('+{}'.format(amount)) + # Adding prefix + for positive number and 0 + if not str(amount).startswith('+') and float(amount) >= 0: + amount = str('+{}'.format(amount)) - # return as string - return str(amount) + # return as string + return str(amount) diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index 2df2a0db..c70055d4 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -1,11 +1,12 @@ +from wikibaseintegrator import wbi_helpers from wikibaseintegrator.entities.item import Item from wikibaseintegrator.entities.lexeme import Lexeme from wikibaseintegrator.entities.mediainfo import MediaInfo from wikibaseintegrator.entities.property import Property -from wikibaseintegrator.wbi_helpers import Helpers class WikibaseIntegrator(object): + def __init__(self, search_only=False, is_bot=False, @@ -22,4 +23,4 @@ def __init__(self, self.mediainfo = MediaInfo(api=self) # Helpers - self.helpers = Helpers() + self.helpers = wbi_helpers From 0f3d9cc7796e7d3ab348f85fc1e11cc3bb9e9454 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 22 Aug 2021 01:41:55 +0200 Subject: [PATCH 060/308] Update wbi_fastrun.py Missing import remove --- wikibaseintegrator/wbi_fastrun.py | 1 - 1 file changed, 1 deletion(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index e5cb270f..a129a559 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -4,7 +4,6 @@ from functools import lru_cache from itertools import chain -from wikibaseintegrator import Helpers from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_helpers import format_amount, execute_sparql_query From 929a222812968156a7713edca098aea86800f0c9 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 22 Aug 2021 10:48:24 +0200 Subject: [PATCH 061/308] Solve the deepcopy issue And fix if_exists tests --- test/test_wbi_core.py | 8 ++++++-- wikibaseintegrator/wbi_helpers.py | 8 ++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 82fba532..d9e8d3fe 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -44,23 +44,27 @@ def test_basedatatype_if_exists(self): item = deepcopy(item_original) item.add_claims(instances, if_exists='APPEND') claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, only one unique added assert len(claims) == len_claims_original + 1 and 'Q1234' in claims and claims.count('Q1234') == 1 item = deepcopy(item_original) item.add_claims(instances, if_exists='FORCE_APPEND') claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, force two to be added assert len(claims) == len_claims_original + 2 and 'Q1234' in claims and claims.count('Q1234') == 2 item = deepcopy(item_original) item.add_claims(instances, if_exists='KEEP') claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, there is already claims, so nothing added assert len(claims) == len_claims_original and 'Q1234' not in claims item = deepcopy(item_original) item.add_claims(instances, if_exists='REPLACE') - claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31') if not x.removed] removed_claims = [True for x in item.claims.get('P31') if x.removed] - assert len(claims) == len_claims_original + 2 and 'Q1234' in claims and len(removed_claims) == 2 and True in removed_claims + # Append claims to item, replace already existing claims with new ones, only one if it's the same property number + assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 2 and True in removed_claims and claims.count('Q1234') == 1 def test_description(self): item = wbi.item.get('Q2') diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index c6d0cdfc..46a10e89 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -404,3 +404,11 @@ def format_amount(amount) -> str: # return as string return str(amount) + + +def __deepcopy__(memo): + # Don't return a copy of the module + # Deepcopy don't allow copy of modules (https://bugs.python.org/issue43093) + # It's really the good way to solve this? + from wikibaseintegrator import wikibaseintegrator + return wikibaseintegrator.wbi_helpers From 1cb8667d8e1608150427269b87ed5f7469e4277a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 22 Aug 2021 22:08:42 +0200 Subject: [PATCH 062/308] Update wbi_helpers.py Format --- wikibaseintegrator/wbi_helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 96f6c25d..f53c7aea 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -119,7 +119,8 @@ def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_age else: data.update({'assert': 'user'}) if 'token' in data and data['token'] == '+\\': - raise Exception("Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") + raise Exception( + "Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") elif 'assert' not in data: # Always assert anon if allow_anonymous is True data.update({'assert': 'anon'}) From 74626d4992899b5335f721bdcc15407f913a7aee Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 23 Aug 2021 09:08:12 +0200 Subject: [PATCH 063/308] Update wbi_helpers.py Format --- wikibaseintegrator/wbi_helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index f53c7aea..f757b69b 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -119,8 +119,8 @@ def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_age else: data.update({'assert': 'user'}) if 'token' in data and data['token'] == '+\\': - raise Exception( - "Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") + raise Exception("Anonymous edit are not allowed by default. " + "Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") elif 'assert' not in data: # Always assert anon if allow_anonymous is True data.update({'assert': 'anon'}) From 0e92089bf4d8ec3abd6527312c59335c141eae1a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 24 Aug 2021 18:20:51 +0200 Subject: [PATCH 064/308] Update property.py Fix Property().get() --- wikibaseintegrator/entities/property.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 8f0fdd2b..9249e652 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -27,7 +27,7 @@ def __init__(self, api, datatype=None, labels=None, descriptions=None, aliases=N def new(self, **kwargs) -> Property: return Property(self.api, **kwargs) - def get(self, entity_id) -> Property: + def get(self, entity_id, **kwargs) -> Property: json_data = super(Property, self).get(entity_id=entity_id) return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) From d4656e95e84e38aded3da5d971cbf3855a9adab8 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 24 Aug 2021 23:52:41 +0200 Subject: [PATCH 065/308] Improve useragent (#189) * Update wbi_config.py Add script name to USERAGENT * added section on user-agent-policy (#187) * added section on user-agent-policy linked to an example and updated the TOC fixes #181 * Update README.md Reword and add example Co-authored-by: Myst <1592048+LeMyst@users.noreply.github.com> * Add wbi_helpers.get_user_agent * Update README.md * Update wbi_helpers.py Add warning message * Update test_all.py Add user agent tests * Update test_all.py More tests Co-authored-by: Dennis Priskorn --- README.md | 18 +++++++++++++++++ test/test_all.py | 24 +++++++++++++++++++---- wikibaseintegrator/__init__.py | 7 +++++++ wikibaseintegrator/wbi_config.py | 13 +++---------- wikibaseintegrator/wbi_helpers.py | 32 +++++++++++++++++++++++++++---- wikibaseintegrator/wbi_login.py | 12 +++++------- 6 files changed, 81 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 00bc6ef1..1b664665 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ - [WikibaseIntegrator / WikidataIntegrator](#wikibaseintegrator--wikidataintegrator) - [Installation](#installation) - [Using a Wikibase instance](#using-a-wikibase-instance) + - [Wikimedia Foundation User-Agent policy](#wikimedia-foundation-user-agent-policy) - [The Core Parts](#the-core-parts) - [wbi_item.Item](#wbi_coreitemengine) - [wbi_functions](#wbi_functions) @@ -88,6 +89,23 @@ wbi_config['WIKIBASE_URL'] = 'http://wikibase.svc' You can find more default parameters in the file wbi_config.py +## Wikimedia Foundation User-Agent policy ## + +If you interact with a Wikibase instance hosted by the Wikimedia Foundation (like Wikidata, Mediawiki Commons, etc.), +it's highly advised to follow the User-Agent policy that you can find on the +page [User-Agent policy](https://meta.wikimedia.org/wiki/User-Agent_policy) +of the Wikimedia Meta-Wiki. + +You can set a complementary User-Agent by modifying the variable `wbi_config['USER_AGENT']` in wbi_config. + +For example, with your library name and contact information: + +```python +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/User:MyUsername)' +``` + # The Core Parts # wbi_core supports two modes it can be operated in, a normal mode, updating each item at a time and, a fast run mode, diff --git a/test/test_all.py b/test/test_all.py index d47a16ed..c413f6e0 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -7,7 +7,7 @@ from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent config['DEBUG'] = True @@ -22,9 +22,7 @@ def test_all(self): with self.assertRaises(requests.HTTPError): mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) - test = mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, - allow_anonymous=True) - print(test) + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) class TestDataType(unittest.TestCase): @@ -203,3 +201,21 @@ def test_mediainfo(): mediainfo_item_by_id = wbi.mediainfo.get(entity_id='M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') assert mediainfo_item_by_id.id == 'M75908279' + + +def test_user_agent(capfd): + # Test there is a warning + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + out, err = capfd.readouterr() + assert out + + # Test there is no warning because of the user agent + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, user_agent='MyWikibaseBot/0.5') + out, err = capfd.readouterr() + assert not out + + # Test if the user agent is correctly added + new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5', username='Wikibot') + assert new_user_agent.startswith('MyWikibaseBot/0.5') + assert 'Wikibot' in new_user_agent + assert 'WikibaseIntegrator' in new_user_agent diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index 8d1c82c7..d3f39e8d 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -1 +1,8 @@ +import pkg_resources + from .wikibaseintegrator import WikibaseIntegrator + +try: + __version__ = pkg_resources.get_distribution('wikibaseintegrator').version +except pkg_resources.DistributionNotFound as e: # pragma: no cover + __version__ = 'dev' diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index cb5d368c..7265a5c6 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -1,10 +1,3 @@ -import pkg_resources - -try: - __version__ = pkg_resources.get_distribution('wikibaseintegrator').version -except pkg_resources.DistributionNotFound as e: # pragma: no cover - __version__ = 'dev' - """ Config global options Options can be changed at run time. See tests/test_backoff.py for usage example @@ -15,14 +8,14 @@ To disable retry, set value to 1 BACKOFF_MAX_VALUE: maximum number of seconds to wait before retrying. wait time will increase to this number Default: 3600 (one hour) -USER_AGENT_DEFAULT: default user agent string used for http requests. Both to Wikibase api, query service and others. - See: https://meta.wikimedia.org/wiki/User-Agent_policy +USER_AGENT: Complementary user agent string used for http requests. Both to Wikibase api, query service and others. + See: https://meta.wikimedia.org/wiki/User-Agent_policy """ config = { 'BACKOFF_MAX_TRIES': 5, 'BACKOFF_MAX_VALUE': 3600, - 'USER_AGENT_DEFAULT': "WikibaseIntegrator/{} (https://github.com/LeMyst/WikibaseIntegrator)".format(__version__), + 'USER_AGENT': None, 'MAXLAG': 5, 'PROPERTY_CONSTRAINT_PID': 'P2302', 'DISTINCT_VALUES_CONSTRAINT_QID': 'Q21502410', diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index f757b69b..e13eab3f 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -1,5 +1,6 @@ import datetime from time import sleep +from urllib.parse import urlparse import requests @@ -92,7 +93,11 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60, is_bot=False): mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + user_agent = config['USER_AGENT'] if user_agent is None else user_agent + + if urlparse(mediawiki_api_url).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') if not allow_anonymous: if login is None: @@ -102,7 +107,7 @@ def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_age raise ValueError("mediawiki_api_url can't be different with the one in the login object.") headers = { - 'User-Agent': user_agent + 'User-Agent': get_user_agent(user_agent, login.user if login else None) } if data is not None: @@ -149,7 +154,11 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max """ sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent + user_agent = (config['USER_AGENT'] if user_agent is None else user_agent) + + if urlparse(endpoint).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') if prefix: query = prefix + '\n' + query @@ -161,7 +170,7 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max headers = { 'Accept': 'application/sparql-results+json', - 'User-Agent': user_agent, + 'User-Agent': get_user_agent(user_agent), 'Content-Type': 'multipart/form-data' } @@ -407,6 +416,21 @@ def format_amount(amount) -> str: return str(amount) +def get_user_agent(user_agent, username=None): + from wikibaseintegrator import __version__ + wbi_user_agent = "WikibaseIntegrator/{}".format(__version__) + + if user_agent is None: + return_user_agent = wbi_user_agent + else: + return_user_agent = user_agent + ' ' + wbi_user_agent + + if username: + return_user_agent += " (User:{})".format(username) + + return return_user_agent + + def __deepcopy__(memo): # Don't return a copy of the module # Deepcopy don't allow copy of modules (https://bugs.python.org/issue43093) diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 32e0a45c..44f6454e 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -8,6 +8,7 @@ from wikibaseintegrator.wbi_backoff import wbi_backoff from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_helpers import get_user_agent """ Login class for Wikidata. Takes username and password and stores the session cookies and edit tokens. @@ -50,6 +51,8 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ :return: None """ + self.user = user + self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL'] self.mediawiki_index_url = mediawiki_index_url or config['MEDIAWIKI_INDEX_URL'] self.mediawiki_rest_url = mediawiki_rest_url or config['MEDIAWIKI_REST_URL'] @@ -71,13 +74,8 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ self.response_qs = None self.callback_url = callback_url - if user_agent: - self.user_agent = user_agent - else: - # if a user is given append " (User:USER)" to the UA string and update that value in CONFIG - if user and user.casefold() not in config['USER_AGENT_DEFAULT'].casefold(): - config['USER_AGENT_DEFAULT'] += " (User:{})".format(user) - self.user_agent = config['USER_AGENT_DEFAULT'] + self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT'], self.user) + self.session.headers.update({ 'User-Agent': self.user_agent }) From c417a60e64f273e61361cb90778a2f1b7a601081 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 25 Aug 2021 00:08:45 +0200 Subject: [PATCH 066/308] Update setup.cfg v0.12 will be compatible with 3.10 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bd2a91d7..4d98d3a7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = mwoauth backoff oauthlib -python_requires = >=3.7, <3.10 +python_requires = >=3.7, <3.11 [options.extras_require] dev = From 489b0fdcc5ae6a981e3a3108062771fa37ab805e Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 26 Aug 2021 23:10:25 +0200 Subject: [PATCH 067/308] 0.12.0.dev1 Fix packaging issues --- requirements.txt | 2 +- setup.cfg | 7 ++++--- wikibaseintegrator/__init__.py | 2 ++ 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8c5f88b9..1da53e8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,6 @@ simplejson~=3.17.5 requests~=2.26.0 mwoauth~=0.3.7 backoff~=1.11.1 +oauthlib~=3.1.1 pytest~=6.2.4 setuptools~=57.4.0 -oauthlib~=3.1.1 diff --git a/setup.cfg b/setup.cfg index 4d98d3a7..8480f66d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,9 +1,9 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev0 +version = 0.12.0.dev1 author = Myst and WikidataIntegrator authors license = MIT -license_files = LICENSE.txt +license_files = LICENSE description = Python package for reading from and writing to a Wikibase instance keywords = wikibase, wikidata, mediawiki, sparql home_page = https://github.com/LeMyst/WikibaseIntegrator @@ -29,7 +29,8 @@ classifiers = Topic :: Software Development :: Libraries :: Python Modules [options] -packages = wikibaseintegrator +packages = find: +namespace_packages = wikibaseintegrator install_requires = simplejson requests diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index d3f39e8d..dc35e5fe 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -2,6 +2,8 @@ from .wikibaseintegrator import WikibaseIntegrator +__import__("pkg_resources").declare_namespace(__name__) + try: __version__ = pkg_resources.get_distribution('wikibaseintegrator').version except pkg_resources.DistributionNotFound as e: # pragma: no cover From b4ebb363ce963bc58e918ab661e80776ca7e939c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 26 Aug 2021 23:33:04 +0200 Subject: [PATCH 068/308] v0.12.0.dev2 Fix packaging issues --- setup.cfg | 3 +-- wikibaseintegrator/__init__.py | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/setup.cfg b/setup.cfg index 8480f66d..50aef5ea 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev1 +version = 0.12.0.dev2 author = Myst and WikidataIntegrator authors license = MIT license_files = LICENSE @@ -30,7 +30,6 @@ classifiers = [options] packages = find: -namespace_packages = wikibaseintegrator install_requires = simplejson requests diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index dc35e5fe..d3f39e8d 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -2,8 +2,6 @@ from .wikibaseintegrator import WikibaseIntegrator -__import__("pkg_resources").declare_namespace(__name__) - try: __version__ = pkg_resources.get_distribution('wikibaseintegrator').version except pkg_resources.DistributionNotFound as e: # pragma: no cover From fb0a27459fe36bb36b4f74528538aacb987a5b2c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 27 Aug 2021 12:38:58 +0200 Subject: [PATCH 069/308] Update WikibaseIntegrator.iml --- .idea/WikibaseIntegrator.iml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index d19b962d..7cce02b4 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -6,6 +6,10 @@ + + + + From 99817de55f4fba58fbdc7bfdde14539eccdee7e3 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 27 Aug 2021 12:40:08 +0200 Subject: [PATCH 070/308] Bump to v0.12.0.dev3 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 50aef5ea..c86fc461 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev2 +version = 0.12.0.dev3 author = Myst and WikidataIntegrator authors license = MIT license_files = LICENSE From 25bb69177247dac6d33b30901a590b78a95f9368 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 27 Aug 2021 19:47:49 +0200 Subject: [PATCH 071/308] use the correct class when import from json --- test/test_all.py | 29 +++----- test/test_wbi_core.py | 4 +- wikibaseintegrator/datatypes/basedatatype.py | 10 +++ .../datatypes/extra/__init__.py | 1 + wikibaseintegrator/datatypes/extra/edtf.py | 9 +++ wikibaseintegrator/datatypes/form.py | 17 ++--- wikibaseintegrator/datatypes/geoshape.py | 11 ++-- .../datatypes/globecoordinate.py | 33 ++++++---- wikibaseintegrator/datatypes/item.py | 19 +++--- wikibaseintegrator/datatypes/lexeme.py | 19 +++--- .../datatypes/monolingualtext.py | 17 ++--- wikibaseintegrator/datatypes/property.py | 19 +++--- wikibaseintegrator/datatypes/quantity.py | 66 ++++++++++--------- wikibaseintegrator/datatypes/sense.py | 17 ++--- wikibaseintegrator/datatypes/string.py | 11 ++-- wikibaseintegrator/datatypes/tabulardata.py | 11 ++-- wikibaseintegrator/datatypes/time.py | 30 +++++---- wikibaseintegrator/datatypes/url.py | 11 ++-- wikibaseintegrator/models/claims.py | 9 ++- wikibaseintegrator/models/snaks.py | 2 + 20 files changed, 191 insertions(+), 154 deletions(-) create mode 100644 wikibaseintegrator/datatypes/extra/__init__.py create mode 100644 wikibaseintegrator/datatypes/extra/edtf.py diff --git a/test/test_all.py b/test/test_all.py index c413f6e0..510c1d8c 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -27,36 +27,25 @@ def test_all(self): class TestDataType(unittest.TestCase): def test_quantity(self): - dt = datatypes.Quantity(quantity='34.5', prop_nr='P43') + dt = datatypes.Quantity(amount='34.5', prop_nr='P43') dt_json = dt.get_json() - if not dt_json['mainsnak']['datatype'] == 'quantity': - raise + assert dt_json['mainsnak']['datatype'] == 'quantity' value = dt_json['mainsnak']['datavalue'] - if not value['value']['amount'] == '+34.5': - raise - - if not value['value']['unit'] == '1': - raise + assert value['value']['amount'] == '+34.5' + assert value['value']['unit'] == '1' - dt2 = datatypes.Quantity(quantity='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") + dt2 = datatypes.Quantity(amount='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") value = dt2.get_json()['mainsnak']['datavalue'] - if not value['value']['amount'] == '+34.5': - raise - - if not value['value']['unit'] == 'http://www.wikidata.org/entity/Q11573': - raise - - if not value['value']['upperBound'] == '+35.3': - raise - - if not value['value']['lowerBound'] == '+33.7': - raise + assert value['value']['amount'] == '+34.5' + assert value['value']['unit'] == 'http://www.wikidata.org/entity/Q11573' + assert value['value']['upperBound'] == '+35.3' + assert value['value']['lowerBound'] == '+33.7' def test_geoshape(self): dt = datatypes.GeoShape(value='Data:Inner_West_Light_Rail_stops.map', prop_nr='P43') diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index d9e8d3fe..7af03659 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -170,8 +170,8 @@ def test_new_item_creation(self): URL(value="ssh://user@server/project.git", prop_nr="P6"), URL(value="svn+ssh://user@server:8888/path", prop_nr="P6"), MonolingualText(text="xxx", language="fr", prop_nr="P7"), - Quantity(quantity=-5.04, prop_nr="P8"), - Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), + Quantity(amount=-5.04, prop_nr="P8"), + Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), CommonsMedia(value="xxx", prop_nr="P9"), GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"), GeoShape(value="Data:xxx.map", prop_nr="P11"), diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 79dc2cfc..51bc979f 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -63,6 +63,16 @@ def __init__(self, **kwargs): self.value = None self.mainsnak.property_number = kwargs.pop('prop_nr', None) + @property + def value(self): + return self.__value + + @value.setter + def value(self, value): + if not value: + self.mainsnak.snaktype = 'novalue' + self.__value = value + def get_sparql_value(self): return self.value diff --git a/wikibaseintegrator/datatypes/extra/__init__.py b/wikibaseintegrator/datatypes/extra/__init__.py new file mode 100644 index 00000000..33a13071 --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/__init__.py @@ -0,0 +1 @@ +from .edtf import EDTF diff --git a/wikibaseintegrator/datatypes/extra/edtf.py b/wikibaseintegrator/datatypes/extra/edtf.py new file mode 100644 index 00000000..e6fa5ce2 --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/edtf.py @@ -0,0 +1,9 @@ +from wikibaseintegrator.datatypes import String + + +class EDTF(String): + """ + Implements the Wikibase data type for Wikibase Extended Date/Time Format extension. + More info at https://www.mediawiki.org/wiki/Extension:Wikibase_EDTF + """ + DTYPE = 'edtf' diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index 12871b0a..2ed6726e 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -15,7 +15,7 @@ class Form(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) @@ -44,10 +44,11 @@ def __init__(self, value, **kwargs): self.value = value - self.mainsnak.datavalue = { - 'value': { - 'entity-type': 'form', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'form', + 'id': self.value + }, + 'type': 'wikibase-entityid' + } diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 1177913e..9a838a79 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -15,7 +15,7 @@ class GeoShape(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The GeoShape map file name in Wikimedia Commons to be linked @@ -44,7 +44,8 @@ def __init__(self, value, **kwargs): self.value = value - self.mainsnak.datavalue = { - 'value': self.value, - 'type': 'string' - } + if self.value: + self.mainsnak.datavalue = { + 'value': self.value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 4b411ed1..085d3d2f 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -14,7 +14,7 @@ class GlobeCoordinate(BaseDataType): }} ''' - def __init__(self, latitude, longitude, precision, globe=None, wikibase_url=None, **kwargs): + def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param latitude: Latitute in decimal format @@ -48,23 +48,28 @@ def __init__(self, latitude, longitude, precision, globe=None, wikibase_url=None if globe.startswith('Q'): globe = wikibase_url + '/entity/' + globe - value = (latitude, longitude, precision, globe) - # TODO: Introduce validity checks for coordinates, etc. # TODO: Add check if latitude/longitude/precision is None - self.latitude, self.longitude, self.precision, self.globe = value + self.latitude = latitude + self.longitude = longitude + self.precision = precision + self.globe = globe - self.mainsnak.datavalue = { - 'value': { - 'latitude': self.latitude, - 'longitude': self.longitude, - 'precision': self.precision, - 'globe': self.globe - }, - 'type': 'globecoordinate' - } + if self.latitude and self.longitude and self.precision: + self.value = (self.latitude, self.longitude, self.precision, self.globe) + else: + self.value = None - self.value = (self.latitude, self.longitude, self.precision, self.globe) + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'latitude': self.latitude, + 'longitude': self.longitude, + 'precision': self.precision, + 'globe': self.globe + }, + 'type': 'globecoordinate' + } def get_sparql_value(self): return 'Point(' + str(self.latitude) + ', ' + str(self.longitude) + ')' diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 952483af..fa751e91 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -15,7 +15,7 @@ class Item(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The item ID to serve as the value @@ -48,11 +48,12 @@ def __init__(self, value, **kwargs): else: self.value = int(matches.group(1)) - self.mainsnak.datavalue = { - 'value': { - 'entity-type': 'item', - 'numeric-id': self.value, - 'id': 'Q{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'item', + 'numeric-id': self.value, + 'id': 'Q{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 72375d4d..851afdd2 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -15,7 +15,7 @@ class Lexeme(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The lexeme number to serve as a value @@ -48,11 +48,12 @@ def __init__(self, value, **kwargs): else: self.value = int(matches.group(1)) - self.mainsnak.datavalue = { - 'value': { - 'entity-type': 'lexeme', - 'numeric-id': self.value, - 'id': 'L{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'lexeme', + 'numeric-id': self.value, + 'id': 'L{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index cf4d6462..5395471d 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -14,7 +14,7 @@ class MonolingualText(BaseDataType): }} ''' - def __init__(self, text, language=None, **kwargs): + def __init__(self, text=None, language=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param text: The language specific string to be used as the value @@ -47,13 +47,14 @@ def __init__(self, text, language=None, **kwargs): raise ValueError("Parameter 'text' can't be 'None' if 'snaktype' is 'value'") assert isinstance(self.language, str), "Expected str, found {} ({})".format(type(self.language), self.language) - self.mainsnak.datavalue = { - 'value': { - 'text': self.text, - 'language': self.language - }, - 'type': 'monolingualtext' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'text': self.text, + 'language': self.language + }, + 'type': 'monolingualtext' + } self.value = (self.text, self.language) diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index d4a56c42..9817599a 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -15,7 +15,7 @@ class Property(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The property number to serve as a value @@ -48,11 +48,12 @@ def __init__(self, value, **kwargs): else: self.value = int(matches.group(1)) - self.mainsnak.datavalue = { - 'value': { - 'entity-type': 'property', - 'numeric-id': self.value, - 'id': 'P{}'.format(self.value) - }, - 'type': 'wikibase-entityid' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'property', + 'numeric-id': self.value, + 'id': 'P{}'.format(self.value) + }, + 'type': 'wikibase-entityid' + } diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 964832db..3294bd0f 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -15,18 +15,18 @@ class Quantity(BaseDataType): }} ''' - def __init__(self, quantity, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): + def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType - :param quantity: The quantity value - :type quantity: float, str or None + :param amount: The amount value + :type amount: float, str or None :param prop_nr: The item ID for this claim :type prop_nr: str with a 'P' prefix followed by digits :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations :type upper_bound: float, str :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations :type lower_bound: float, str - :param unit: The unit item URL or the QID a certain quantity has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). + :param unit: The unit item URL or the QID a certain amount has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). The default is dimensionless, represented by a '1' :type unit: str :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' @@ -46,17 +46,17 @@ def __init__(self, quantity, upper_bound=None, lower_bound=None, unit='1', wikib if unit.startswith('Q'): unit = wikibase_url + '/entity/' + unit - self.quantity = None + self.amount = None self.unit = None self.upper_bound = None self.lower_bound = None - value = (quantity, unit, upper_bound, lower_bound) + value = (amount, unit, upper_bound, lower_bound) - self.quantity, self.unit, self.upper_bound, self.lower_bound = value + self.amount, self.unit, self.upper_bound, self.lower_bound = value - if self.quantity is not None: - self.quantity = format_amount(self.quantity) + if self.amount is not None: + self.amount = format_amount(self.amount) self.unit = str(self.unit) if self.upper_bound: self.upper_bound = format_amount(self.upper_bound) @@ -65,37 +65,41 @@ def __init__(self, quantity, upper_bound=None, lower_bound=None, unit='1', wikib # Integrity checks for value and bounds try: - for i in [self.quantity, self.upper_bound, self.lower_bound]: + for i in [self.amount, self.upper_bound, self.lower_bound]: if i: float(i) except ValueError: raise ValueError("Value, bounds and units must parse as integers or float") if (self.lower_bound and self.upper_bound) and (float(self.lower_bound) > float(self.upper_bound) - or float(self.lower_bound) > float(self.quantity)): + or float(self.lower_bound) > float(self.amount)): raise ValueError("Lower bound too large") - if self.upper_bound and float(self.upper_bound) < float(self.quantity): + if self.upper_bound and float(self.upper_bound) < float(self.amount): raise ValueError("Upper bound too small") - self.mainsnak.datavalue = { - 'value': { - 'amount': self.quantity, - 'unit': self.unit, - 'upperBound': self.upper_bound, - 'lowerBound': self.lower_bound - }, - 'type': 'quantity' - } - - # remove bounds from json if they are undefined - if not self.upper_bound: - del self.mainsnak.datavalue['value']['upperBound'] - - if not self.lower_bound: - del self.mainsnak.datavalue['value']['lowerBound'] - - self.value = (self.quantity, self.unit, self.upper_bound, self.lower_bound) + if self.amount: + self.value = (self.amount, self.unit, self.upper_bound, self.lower_bound) + else: + self.value = None + + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'amount': self.amount, + 'unit': self.unit, + 'upperBound': self.upper_bound, + 'lowerBound': self.lower_bound + }, + 'type': 'quantity' + } + + # remove bounds from json if they are undefined + if not self.upper_bound: + del self.mainsnak.datavalue['value']['upperBound'] + + if not self.lower_bound: + del self.mainsnak.datavalue['value']['lowerBound'] def get_sparql_value(self): - return format_amount(self.quantity) + return format_amount(self.amount) diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 6a8c9101..7ef79fe3 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -15,7 +15,7 @@ class Sense(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: Value using the format "L-S" (example: L252248-S123) @@ -44,10 +44,11 @@ def __init__(self, value, **kwargs): self.value = value - self.mainsnak.datavalue = { - 'value': { - 'entity-type': 'sense', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'sense', + 'id': self.value + }, + 'type': 'wikibase-entityid' + } diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 01aa75fe..2af74779 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -8,7 +8,7 @@ class String(BaseDataType): DTYPE = 'string' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The string to be used as the value @@ -30,7 +30,8 @@ def __init__(self, value, **kwargs): assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) self.value = value - self.mainsnak.datavalue = { - 'value': self.value, - 'type': 'string' - } + if self.value: + self.mainsnak.datavalue = { + 'value': self.value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index 09195af7..8ad90b9e 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -9,7 +9,7 @@ class TabularData(BaseDataType): """ DTYPE = 'tabular-data' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: Reference to tabular data file on Wikimedia Commons. @@ -42,7 +42,8 @@ def __init__(self, value, **kwargs): self.value = value - self.mainsnak.datavalue = { - 'value': self.value, - 'type': 'string' - } + if self.value: + self.mainsnak.datavalue = { + 'value': self.value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 6e93ff3a..b32f9add 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -16,7 +16,7 @@ class Time(BaseDataType): }} ''' - def __init__(self, time, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): + def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 @@ -77,19 +77,23 @@ def __init__(self, time, before=0, after=0, precision=11, timezone=0, calendarmo if self.precision < 0 or self.precision > 15: raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - self.mainsnak.datavalue = { - 'value': { - 'time': self.time, - 'before': self.before, - 'after': self.after, - 'precision': self.precision, - 'timezone': self.timezone, - 'calendarmodel': self.calendarmodel - }, - 'type': 'time' - } + if self.time: + self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) + else: + self.value = None - self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) + if self.value: + self.mainsnak.datavalue = { + 'value': { + 'time': self.time, + 'before': self.before, + 'after': self.after, + 'precision': self.precision, + 'timezone': self.timezone, + 'calendarmodel': self.calendarmodel + }, + 'type': 'time' + } def get_sparql_value(self): return self.time diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index 0e9a2ab6..33ba8354 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -15,7 +15,7 @@ class URL(BaseDataType): }} ''' - def __init__(self, value, **kwargs): + def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType :param value: The URL to be used as the value @@ -44,7 +44,8 @@ def __init__(self, value, **kwargs): self.value = value - self.mainsnak.datavalue = { - 'value': self.value, - 'type': 'string' - } + if self.value: + self.mainsnak.datavalue = { + 'value': self.value, + 'type': 'string' + } diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 10832839..6af6539f 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -76,8 +76,12 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists='REPLACE') -> C def from_json(self, json_data) -> Claims: for property in json_data: for claim in json_data[property]: - # data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == alias['mainsnak']['datatype']][0] - self.add(claims=Claim().from_json(claim), if_exists='FORCE_APPEND') + from wikibaseintegrator.datatypes import BaseDataType + if 'datatype' in 'mainsnak': + data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] + else: + data_type = Claim + self.add(claims=data_type().from_json(claim), if_exists='FORCE_APPEND') return self @@ -90,7 +94,6 @@ def get_json(self) -> {}: json_data[property].append(claim.get_json()) return json_data - def clear(self): self.claims = {} diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index 60f6e4c5..b788fe63 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -107,6 +107,8 @@ def datavalue(self): @datavalue.setter def datavalue(self, value): + if value is not None: + self.snaktype = 'value' self.__datavalue = value @property From d2b70951c36ccc5f0d5a8cb958c4609b5329421c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 27 Aug 2021 20:05:10 +0200 Subject: [PATCH 072/308] Add extra datatype LocalMedia Add extra datatypes unit tests --- test/test_wbi_core.py | 16 ++++++++++++++++ wikibaseintegrator/datatypes/extra/__init__.py | 1 + wikibaseintegrator/datatypes/extra/localmedia.py | 9 +++++++++ 3 files changed, 26 insertions(+) create mode 100644 wikibaseintegrator/datatypes/extra/localmedia.py diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 7af03659..9faeac52 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -4,6 +4,7 @@ from wikibaseintegrator import datatypes, WikibaseIntegrator from wikibaseintegrator.datatypes import String, Math, ExternalID, Time, URL, MonolingualText, Quantity, CommonsMedia, GlobeCoordinate, GeoShape, Property, TabularData, \ MusicalNotation, Lexeme, Form, Sense +from wikibaseintegrator.datatypes.extra import EDTF, LocalMedia from wikibaseintegrator.entities import Item from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.wbi_helpers import search_entities, generate_entity_instances @@ -196,6 +197,21 @@ def test_new_item_creation(self): item = wbi.item.new().add_claims(data) assert item.get_json() + def test_new_extra_item_creation(self): + data = [ + EDTF(value='test1', prop_nr='P1'), + LocalMedia(value='test2', prop_nr='P2') + ] + + for d in data: + item = wbi.item.new().add_claims([d]) + assert item.get_json() + item = wbi.item.new().add_claims(d) + assert item.get_json() + + item = wbi.item.new().add_claims(data) + assert item.get_json() + def test_get_property_list(self): self.assertTrue(len(self.common_item.claims)) diff --git a/wikibaseintegrator/datatypes/extra/__init__.py b/wikibaseintegrator/datatypes/extra/__init__.py index 33a13071..c14a6c63 100644 --- a/wikibaseintegrator/datatypes/extra/__init__.py +++ b/wikibaseintegrator/datatypes/extra/__init__.py @@ -1 +1,2 @@ from .edtf import EDTF +from .localmedia import LocalMedia diff --git a/wikibaseintegrator/datatypes/extra/localmedia.py b/wikibaseintegrator/datatypes/extra/localmedia.py new file mode 100644 index 00000000..bd89a57e --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/localmedia.py @@ -0,0 +1,9 @@ +from wikibaseintegrator.datatypes import String + + +class LocalMedia(String): + """ + Implements the Wikibase data type for Wikibase Local Media extension. + More info at https://www.mediawiki.org/wiki/Extension:Wikibase_Local_Media + """ + DTYPE = 'localMedia' From 11a971ec950433005efe3c1faedb87788fe195c1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 27 Aug 2021 23:40:58 +0200 Subject: [PATCH 073/308] Remove self.value in BaseDataType classes (#200) --- test/test_wbi_fastrun.py | 1 - wikibaseintegrator/datatypes/basedatatype.py | 2 +- wikibaseintegrator/datatypes/form.py | 11 ++-- wikibaseintegrator/datatypes/geoshape.py | 8 ++- wikibaseintegrator/datatypes/item.py | 34 ++++++------ wikibaseintegrator/datatypes/lexeme.py | 34 ++++++------ .../datatypes/monolingualtext.py | 23 +++----- wikibaseintegrator/datatypes/property.py | 32 ++++++------ wikibaseintegrator/datatypes/quantity.py | 52 +++++++------------ wikibaseintegrator/datatypes/sense.py | 11 ++-- wikibaseintegrator/datatypes/string.py | 5 +- wikibaseintegrator/datatypes/tabulardata.py | 8 ++- wikibaseintegrator/datatypes/time.py | 44 +++++----------- wikibaseintegrator/datatypes/url.py | 8 ++- wikibaseintegrator/wbi_fastrun.py | 6 +-- 15 files changed, 117 insertions(+), 162 deletions(-) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 5dcd79e0..7883201b 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -182,7 +182,6 @@ def test_append_props(): # https://www.wikidata.org/wiki/Q3402672#P527 # don't consider refs - wbi.debug = True frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType) # with append statements = [Item(value='Q24784025', prop_nr='P527')] diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 51bc979f..5aee50b1 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -74,7 +74,7 @@ def value(self, value): self.__value = value def get_sparql_value(self): - return self.value + return self.mainsnak.datavalue['value'] def equals(self, that, include_ref=False, fref=None): """ diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index 2ed6726e..5d983519 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -35,20 +35,21 @@ def __init__(self, value=None, **kwargs): super(Form, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is not None: + + if value: pattern = re.compile(r'^L[0-9]+-F[0-9]+$') matches = pattern.match(value) if not matches: raise ValueError("Invalid form ID ({}), format must be 'L[0-9]+-F[0-9]+'".format(value)) - self.value = value - - if self.value: self.mainsnak.datavalue = { 'value': { 'entity-type': 'form', - 'id': self.value + 'id': value }, 'type': 'wikibase-entityid' } + + def get_sparql_value(self): + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 9a838a79..143cb6fd 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -35,17 +35,15 @@ def __init__(self, value=None, **kwargs): super(GeoShape, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is not None: + + if value: # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map pattern = re.compile(r'^Data:((?![:|#]).)+\.map$') matches = pattern.match(value) if not matches: raise ValueError("Value must start with Data: and end with .map. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - if self.value: self.mainsnak.datavalue = { - 'value': self.value, + 'value': value, 'type': 'string' } diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index fa751e91..90d924b7 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -11,7 +11,7 @@ class Item(BaseDataType): sparql_query = ''' SELECT * WHERE {{ ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/Q{value}> . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . }} ''' @@ -35,25 +35,25 @@ def __init__(self, value=None, **kwargs): super(Item, self).__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, 'Expected str or int, found {} ({})'.format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^Q?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - if self.value: + + if value: + if isinstance(value, str): + pattern = re.compile(r'^Q?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) + else: + value = int(matches.group(1)) + self.mainsnak.datavalue = { 'value': { 'entity-type': 'item', - 'numeric-id': self.value, - 'id': 'Q{}'.format(self.value) + 'numeric-id': value, + 'id': 'Q{}'.format(value) }, 'type': 'wikibase-entityid' } + + def get_sparql_value(self): + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 851afdd2..bcacc25f 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -11,7 +11,7 @@ class Lexeme(BaseDataType): sparql_query = ''' SELECT * WHERE {{ ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/L{value}> . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . }} ''' @@ -35,25 +35,25 @@ def __init__(self, value=None, **kwargs): super(Lexeme, self).__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^L?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - if self.value: + + if value: + if isinstance(value, str): + pattern = re.compile(r'^L?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) + else: + value = int(matches.group(1)) + self.mainsnak.datavalue = { 'value': { 'entity-type': 'lexeme', - 'numeric-id': self.value, - 'id': 'L{}'.format(self.value) + 'numeric-id': value, + 'id': 'L{}'.format(value) }, 'type': 'wikibase-entityid' } + + def get_sparql_value(self): + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index 5395471d..61313be5 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -35,28 +35,19 @@ def __init__(self, text=None, language=None, **kwargs): super(MonolingualText, self).__init__(**kwargs) - self.text = None - self.language = language or config['DEFAULT_LANGUAGE'] + language = language or config['DEFAULT_LANGUAGE'] - value = (text, self.language) + assert isinstance(text, str) or text is None, "Expected str, found {} ({})".format(type(text), text) + assert isinstance(language, str), "Expected str, found {} ({})".format(type(language), language) - self.text, self.language = value - if self.text is not None: - assert isinstance(self.text, str) or self.text is None, "Expected str, found {} ({})".format(type(self.text), self.text) - elif self.mainsnak.snaktype == 'value': - raise ValueError("Parameter 'text' can't be 'None' if 'snaktype' is 'value'") - assert isinstance(self.language, str), "Expected str, found {} ({})".format(type(self.language), self.language) - - if self.value: + if text and language: self.mainsnak.datavalue = { 'value': { - 'text': self.text, - 'language': self.language + 'text': text, + 'language': language }, 'type': 'monolingualtext' } - self.value = (self.text, self.language) - def get_sparql_value(self): - return '"' + self.text.replace('"', r'\"') + '"@' + self.language + return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language'] diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index 9817599a..a5f8e82e 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -35,25 +35,25 @@ def __init__(self, value=None, **kwargs): super(Property, self).__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^P?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) - else: - self.value = int(matches.group(1)) - - if self.value: + + if value: + if isinstance(value, str): + pattern = re.compile(r'^P?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) + else: + value = int(matches.group(1)) + self.mainsnak.datavalue = { 'value': { 'entity-type': 'property', - 'numeric-id': self.value, - 'id': 'P{}'.format(self.value) + 'numeric-id': value, + 'id': 'P{}'.format(value) }, 'type': 'wikibase-entityid' } + + def get_sparql_value(self): + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 3294bd0f..04580ae3 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -43,63 +43,49 @@ def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wi wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + unit = unit or '1' + if unit.startswith('Q'): unit = wikibase_url + '/entity/' + unit - self.amount = None - self.unit = None - self.upper_bound = None - self.lower_bound = None - - value = (amount, unit, upper_bound, lower_bound) - - self.amount, self.unit, self.upper_bound, self.lower_bound = value - - if self.amount is not None: - self.amount = format_amount(self.amount) - self.unit = str(self.unit) - if self.upper_bound: - self.upper_bound = format_amount(self.upper_bound) - if self.lower_bound: - self.lower_bound = format_amount(self.lower_bound) + if amount: + amount = format_amount(amount) + unit = str(unit) + if upper_bound: + upper_bound = format_amount(upper_bound) + if lower_bound: + lower_bound = format_amount(lower_bound) # Integrity checks for value and bounds try: - for i in [self.amount, self.upper_bound, self.lower_bound]: + for i in [amount, upper_bound, lower_bound]: if i: float(i) except ValueError: raise ValueError("Value, bounds and units must parse as integers or float") - if (self.lower_bound and self.upper_bound) and (float(self.lower_bound) > float(self.upper_bound) - or float(self.lower_bound) > float(self.amount)): + if (lower_bound and upper_bound) and (float(lower_bound) > float(upper_bound) or float(lower_bound) > float(amount)): raise ValueError("Lower bound too large") - if self.upper_bound and float(self.upper_bound) < float(self.amount): + if upper_bound and float(upper_bound) < float(amount): raise ValueError("Upper bound too small") - if self.amount: - self.value = (self.amount, self.unit, self.upper_bound, self.lower_bound) - else: - self.value = None - - if self.value: self.mainsnak.datavalue = { 'value': { - 'amount': self.amount, - 'unit': self.unit, - 'upperBound': self.upper_bound, - 'lowerBound': self.lower_bound + 'amount': amount, + 'unit': unit, + 'upperBound': upper_bound, + 'lowerBound': lower_bound }, 'type': 'quantity' } # remove bounds from json if they are undefined - if not self.upper_bound: + if not upper_bound: del self.mainsnak.datavalue['value']['upperBound'] - if not self.lower_bound: + if not lower_bound: del self.mainsnak.datavalue['value']['lowerBound'] def get_sparql_value(self): - return format_amount(self.amount) + return format_amount(self.mainsnak.datavalue['value']['amount']) diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 7ef79fe3..52ea21b3 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -35,20 +35,21 @@ def __init__(self, value=None, **kwargs): super(Sense, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is not None: + + if value: pattern = re.compile(r'^L[0-9]+-S[0-9]+$') matches = pattern.match(value) if not matches: raise ValueError("Invalid sense ID ({}), format must be 'L[0-9]+-S[0-9]+'".format(value)) - self.value = value - - if self.value: self.mainsnak.datavalue = { 'value': { 'entity-type': 'sense', - 'id': self.value + 'id': value }, 'type': 'wikibase-entityid' } + + def get_sparql_value(self): + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 2af74779..e955c6c3 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -28,10 +28,9 @@ def __init__(self, value=None, **kwargs): super(String, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - if self.value: + if value: self.mainsnak.datavalue = { - 'value': self.value, + 'value': value, 'type': 'string' } diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index 8ad90b9e..6c6283e2 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -33,17 +33,15 @@ def __init__(self, value=None, **kwargs): super(TabularData, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is not None: + + if value: # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab pattern = re.compile(r'^Data:((?![:|#]).)+\.tab$') matches = pattern.match(value) if not matches: raise ValueError("Value must start with Data: and end with .tab. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - if self.value: self.mainsnak.datavalue = { - 'value': self.value, + 'value': value, 'type': 'string' } diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index b32f9add..fb8c430d 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -51,49 +51,33 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen calendarmodel = calendarmodel or config['CALENDAR_MODEL_QID'] wikibase_url = wikibase_url or config['WIKIBASE_URL'] - self.time = None - self.before = None - self.after = None - self.precision = None - self.timezone = None - self.calendarmodel = None - if calendarmodel.startswith('Q'): calendarmodel = wikibase_url + '/entity/' + calendarmodel - value = (time, before, after, precision, timezone, calendarmodel) - - self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel = value - assert isinstance(self.time, str) or self.time is None, "Expected str, found {} ({})".format(type(self.time), self.time) + assert isinstance(time, str) or time is None, "Expected str, found {} ({})".format(type(time), time) - if self.time is not None: - if not (self.time.startswith("+") or self.time.startswith("-")): - self.time = "+" + self.time + if time: + if not (time.startswith("+") or time.startswith("-")): + time = "+" + time pattern = re.compile(r'^[+-][0-9]*-(?:1[0-2]|0[0-9])-(?:3[01]|0[0-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') - matches = pattern.match(self.time) + matches = pattern.match(time) if not matches: raise ValueError("Time time must be a string in the following format: '+%Y-%m-%dT%H:%M:%SZ'") - self.value = value - if self.precision < 0 or self.precision > 15: - raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - if self.time: - self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) - else: - self.value = None + if precision < 0 or precision > 15: + raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - if self.value: self.mainsnak.datavalue = { 'value': { - 'time': self.time, - 'before': self.before, - 'after': self.after, - 'precision': self.precision, - 'timezone': self.timezone, - 'calendarmodel': self.calendarmodel + 'time': time, + 'before': before, + 'after': after, + 'precision': precision, + 'timezone': timezone, + 'calendarmodel': calendarmodel }, 'type': 'time' } def get_sparql_value(self): - return self.time + return self.mainsnak.datavalue['value']['time'] diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index 33ba8354..eac13c22 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -35,17 +35,15 @@ def __init__(self, value=None, **kwargs): super(URL, self).__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - if value is not None: + + if value: pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') matches = pattern.match(value) if not matches: raise ValueError("Invalid URL {}".format(value)) - self.value = value - - if self.value: self.mainsnak.datavalue = { - 'value': self.value, + 'value': value, 'type': 'string' } diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index a129a559..65e3e298 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -161,7 +161,7 @@ def write_required(self, data: list, if_exists='REPLACE', cqid=None) -> bool: append_props = [x.mainsnak.property_number for x in data] for x in data: - if x.value and x.mainsnak.datatype: + if x.mainsnak.datavalue and x.mainsnak.datatype: data_props.add(x.mainsnak.property_number) write_required = False self.load_item(data, cqid) @@ -191,11 +191,11 @@ def write_required(self, data: list, if_exists='REPLACE', cqid=None) -> bool: for date in data: # ensure that statements meant for deletion get handled properly reconst_props = set([x.mainsnak.property_number for x in tmp_rs]) - if (not date.value or not date.mainsnak.datatype) and date.mainsnak.property_number in reconst_props: + if not date.mainsnak.datatype and date.mainsnak.property_number in reconst_props: if self.debug: print("returned from delete prop handling") return True - elif not date.value or not date.mainsnak.datatype: + elif not date.mainsnak.datavalue or not date.mainsnak.datatype: # Ignore the deletion statements which are not in the reconstructed statements. continue From a1b7ba0cc7681bfa544a202cff33493e6320dfa2 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 28 Aug 2021 11:23:08 +0200 Subject: [PATCH 074/308] Clean parameters --- wikibaseintegrator/entities/lexeme.py | 4 ++-- wikibaseintegrator/entities/mediainfo.py | 2 +- wikibaseintegrator/entities/property.py | 2 +- wikibaseintegrator/models/claims.py | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index 0e921840..52e6a483 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -24,8 +24,8 @@ def __init__(self, api, lemmas=None, lexical_category=None, language=None, forms def new(self, **kwargs) -> Lexeme: return Lexeme(self.api, **kwargs) - def get(self, entity_id) -> Lexeme: - json_data = super(Lexeme, self).get(entity_id=entity_id) + def get(self, entity_id, **kwargs) -> Lexeme: + json_data = super(Lexeme, self).get(entity_id=entity_id, **kwargs) return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index 49c3d130..d07afb33 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -9,7 +9,7 @@ class MediaInfo(BaseEntity): ETYPE = 'mediainfo' - def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: + def __init__(self, api, labels=None, descriptions=None, aliases=None, **kwargs) -> None: """ :param api: diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 9249e652..a6dee0ba 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -28,7 +28,7 @@ def new(self, **kwargs) -> Property: return Property(self.api, **kwargs) def get(self, entity_id, **kwargs) -> Property: - json_data = super(Property, self).get(entity_id=entity_id) + json_data = super(Property, self).get(entity_id=entity_id, **kwargs) return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 6af6539f..9fc3a9db 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -119,14 +119,14 @@ class Claim: DTYPE = 'claim' subclasses = [] - def __init__(self, **kwargs): + def __init__(self, qualifiers=None, rank=None, references=None): self.mainsnak = Snak(datatype=self.DTYPE) self.type = 'statement' - self.qualifiers = kwargs.pop('qualifiers', Qualifiers()) + self.qualifiers = qualifiers or Qualifiers() self.qualifiers_order = [] self.id = None - self.rank = kwargs.pop('rank', 'normal') - self.references = kwargs.pop('references', References()) + self.rank = rank or 'normal' + self.references = references or References self.removed = False # Allow registration of subclasses of Claim into Claim.subclasses From 5f2f37f9cc056e22af11397cf524f60aeeebed29 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 28 Aug 2021 13:41:37 +0200 Subject: [PATCH 075/308] Fix small issues --- test/test_wbi_fastrun.py | 4 ++-- wikibaseintegrator/datatypes/basedatatype.py | 4 ++-- wikibaseintegrator/models/claims.py | 2 +- wikibaseintegrator/wbi_fastrun.py | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 7883201b..47ebe64f 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -196,8 +196,8 @@ def test_append_props(): # if we are in append mode, and the refs are different, we should write frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) # with append - statements = [Item(value='Q24784025', prop_nr='P527', if_exists='APPEND')] - assert frc.write_required(data=statements, cqid=qid) is True + statements = [Item(value='Q24784025', prop_nr='P527')] + assert frc.write_required(data=statements, cqid=qid, if_exists='APPEND') is True # without append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 5aee50b1..a3c2b6ea 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -13,7 +13,7 @@ class BaseDataType(Claim): }} ''' - def __init__(self, **kwargs): + def __init__(self, prop_nr=None, **kwargs): """ Constructor, will be called by all data types. :param value: Data value of the Wikibase data snak @@ -61,7 +61,7 @@ def __init__(self, **kwargs): self.references = references self.value = None - self.mainsnak.property_number = kwargs.pop('prop_nr', None) + self.mainsnak.property_number = prop_nr or None @property def value(self): diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 9fc3a9db..bc0d7bdc 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -126,7 +126,7 @@ def __init__(self, qualifiers=None, rank=None, references=None): self.qualifiers_order = [] self.id = None self.rank = rank or 'normal' - self.references = references or References + self.references = references or References() self.removed = False # Allow registration of subclasses of Claim into Claim.subclasses diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 65e3e298..5f49b21a 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -81,7 +81,7 @@ def reconstruct_statements(self, qid: str) -> list: this_ref = [] for ref in refs: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[ref[0]]][0] - this_ref.append(f(ref[1], prop_nr=ref[0], is_reference=True)) + this_ref.append(f(ref[1], prop_nr=ref[0])) references.append(this_ref) f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[prop_nr]][0] From cf3fac0084ba17acff22e3c8d1db3e1654949183 Mon Sep 17 00:00:00 2001 From: Dennis Priskorn Date: Sat, 28 Aug 2021 21:00:25 +0200 Subject: [PATCH 076/308] Implement enums (#188) * add ActionIfExists Enum 2 values for now: APPEND and REPLACE * New file wbi_enums.py with 3 new enums: ActionIfExists, WikibaseSnakValueType, WikibaseRank Replace all string arguments with the new enums. I did not update all the comments in the type classes. If you like this, I'll clean up the comments. :) All 17 tests = OK * snaks.py: Import WikibaseSnakValueType * Update basedatatype.py Fix comment * Update baseentity.py Consistency * Update wbi_enums.py * Fix snak * Improve Add unit tests for rank and snaktype Rename WikibaseSnakValueType to WikibaseSnakType Solve FIXMEs Co-authored-by: Myst <1592048+LeMyst@users.noreply.github.com> --- test/test_all.py | 5 +- test/test_wbi_core.py | 63 ++- test/test_wbi_fastrun.py | 7 +- wikibaseintegrator/datatypes/basedatatype.py | 9 +- wikibaseintegrator/entities/baseentity.py | 3 +- wikibaseintegrator/models/aliases.py | 13 +- wikibaseintegrator/models/claims.py | 561 +++++++++---------- wikibaseintegrator/models/language_values.py | 7 +- wikibaseintegrator/models/qualifiers.py | 3 +- wikibaseintegrator/models/references.py | 5 +- wikibaseintegrator/models/senses.py | 3 +- wikibaseintegrator/models/snaks.py | 22 +- wikibaseintegrator/wbi_enums.py | 28 + wikibaseintegrator/wbi_fastrun.py | 13 +- 14 files changed, 406 insertions(+), 336 deletions(-) create mode 100644 wikibaseintegrator/wbi_enums.py diff --git a/test/test_all.py b/test/test_all.py index 510c1d8c..db4af320 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -7,6 +7,7 @@ from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent config['DEBUG'] = True @@ -117,7 +118,7 @@ def test_fastrun_label(self): item.labels.set(value="Earth") item.labels.set(value="xfgfdsg") assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} - item.aliases.set(values=["fake alias"], if_exists='APPEND') + item.aliases.set(values=["fake alias"], if_exists=ActionIfExists.APPEND) assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] # something thats empty (for now.., can change, so this just makes sure no exception is thrown) @@ -132,7 +133,7 @@ def test_fastrun_label(self): item.aliases.get(language="ak") item.labels.set(value="label", language="ak") item.descriptions.set(value="d", language="ak") - item.aliases.set(values=["a"], language="ak", if_exists='APPEND') + item.aliases.set(values=["a"], language="ak", if_exists=ActionIfExists.APPEND) def test_sitelinks(): diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 9faeac52..775091df 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -7,6 +7,7 @@ from wikibaseintegrator.datatypes.extra import EDTF, LocalMedia from wikibaseintegrator.entities import Item from wikibaseintegrator.models import LanguageValues +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank, WikibaseSnakType from wikibaseintegrator.wbi_helpers import search_entities, generate_entity_instances wbi = WikibaseIntegrator() @@ -43,25 +44,25 @@ def test_basedatatype_if_exists(self): len_claims_original = len([x.mainsnak.datavalue['value']['id'] for x in item_original.claims.get('P31')]) item = deepcopy(item_original) - item.add_claims(instances, if_exists='APPEND') + item.add_claims(instances, if_exists=ActionIfExists.APPEND) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, only one unique added assert len(claims) == len_claims_original + 1 and 'Q1234' in claims and claims.count('Q1234') == 1 item = deepcopy(item_original) - item.add_claims(instances, if_exists='FORCE_APPEND') + item.add_claims(instances, if_exists=ActionIfExists.FORCE_APPEND) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, force two to be added assert len(claims) == len_claims_original + 2 and 'Q1234' in claims and claims.count('Q1234') == 2 item = deepcopy(item_original) - item.add_claims(instances, if_exists='KEEP') + item.add_claims(instances, if_exists=ActionIfExists.KEEP) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, there is already claims, so nothing added assert len(claims) == len_claims_original and 'Q1234' not in claims item = deepcopy(item_original) - item.add_claims(instances, if_exists='REPLACE') + item.add_claims(instances, if_exists=ActionIfExists.REPLACE) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31') if not x.removed] removed_claims = [True for x in item.claims.get('P31') if x.removed] # Append claims to item, replace already existing claims with new ones, only one if it's the same property number @@ -82,17 +83,17 @@ def test_description(self): assert item.descriptions.get() == "lorem" item.descriptions.set(language='es', value="lorem ipsum") assert item.descriptions.get('es') == "lorem ipsum" - item.descriptions.set(language='en', value="lorem ipsum", if_exists='KEEP') + item.descriptions.set(language='en', value="lorem ipsum", if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem'} # set_description on empty desription item.descriptions = LanguageValues() item.descriptions.set(value='') - item.descriptions.set(language='en', value="lorem ipsum", if_exists='KEEP') + item.descriptions.set(language='en', value="lorem ipsum", if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} - item.descriptions.set(language='fr', value="lorem", if_exists='KEEP') - item.descriptions.set(language='fr', value="lorem ipsum", if_exists='REPLACE') - item.descriptions.set(language='en', value="lorem", if_exists='KEEP') + item.descriptions.set(language='fr', value="lorem", if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='fr', value="lorem ipsum", if_exists=ActionIfExists.REPLACE) + item.descriptions.set(language='en', value="lorem", if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} assert item.get_json()['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'} @@ -109,10 +110,10 @@ def test_label(self): item.labels.set(value='Earth') item.labels.set(value='xfgfdsg') - item.labels.set(language='en', value='xfgfdsgtest', if_exists='KEEP') + item.labels.set(language='en', value='xfgfdsgtest', if_exists=ActionIfExists.KEEP) assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} assert item.get_json()['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} - item.aliases.set(values=["fake alias"], if_exists='APPEND') + item.aliases.set(values=["fake alias"], if_exists=ActionIfExists.APPEND) assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] item.labels.set(language='fr', value=None) @@ -124,17 +125,17 @@ def test_label(self): item.aliases.set(language='ak') item.labels.set(value='label', language='ak') item.descriptions.set(value='d', language='ak') - item.aliases.set(values=['a'], language='ak', if_exists='APPEND') + item.aliases.set(values=['a'], language='ak', if_exists=ActionIfExists.APPEND) assert item.aliases.get('ak') == ['a'] item.aliases.set(values='b', language='ak') assert item.aliases.get('ak') == ['a', 'b'] - item.aliases.set(values='b', language='ak', if_exists='REPLACE') + item.aliases.set(values='b', language='ak', if_exists=ActionIfExists.REPLACE) assert item.aliases.get('ak') == ['b'] - item.aliases.set(values=['c'], language='ak', if_exists='REPLACE') + item.aliases.set(values=['c'], language='ak', if_exists=ActionIfExists.REPLACE) assert item.aliases.get('ak') == ['c'] - item.aliases.set(values=['d'], language='ak', if_exists='KEEP') + item.aliases.set(values=['d'], language='ak', if_exists=ActionIfExists.KEEP) assert 'd' not in item.aliases.get('ak') - item.aliases.set(language='ak', if_exists='KEEP') + item.aliases.set(language='ak', if_exists=ActionIfExists.KEEP) assert 'remove' not in item.get_json()['aliases']['ak'][0] item.aliases.set(language='ak') assert 'remove' in item.get_json()['aliases']['ak'][0] @@ -152,6 +153,36 @@ def test_entity_generator(self): for qid, entity in entity_instances: self.assertIn(qid, entities) + def test_rank(self): + t1 = String(value='test1', prop_nr='P1', rank='preferred') + assert t1.rank == WikibaseRank.PREFERRED + + t2 = String(value='test1', prop_nr='P1', rank=WikibaseRank.NORMAL) + assert t2.rank == WikibaseRank.NORMAL + + t2 = String(value='test1', prop_nr='P1', rank=WikibaseRank.DEPRECATED) + assert t2.get_json()['rank'] == WikibaseRank.DEPRECATED.value + + with self.assertRaises(ValueError): + String(value='test1', prop_nr='P1', rank='invalid_rank') + + def test_snaktype(self): + t1 = String(value='test1', prop_nr='P1') + t1.mainsnak.snaktype = 'novalue' + assert t1.mainsnak.snaktype == WikibaseSnakType.NO_VALUE + + t2 = String(value='test1', prop_nr='P1') + t2.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE + assert t2.mainsnak.snaktype == WikibaseSnakType.UNKNOWN_VALUE + + t3 = String(value='test1', prop_nr='P1') + t3.mainsnak.snaktype = WikibaseSnakType.KNOWN_VALUE + assert t3.mainsnak.get_json()['snaktype'] == WikibaseSnakType.KNOWN_VALUE.value + + t4 = String(value='test1', prop_nr='P1') + with self.assertRaises(ValueError): + t4.mainsnak.snaktype = 'invalid_value' + def test_new_item_creation(self): data = [ String(value='test1', prop_nr='P1'), diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 47ebe64f..a63be99d 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -1,6 +1,7 @@ from wikibaseintegrator import wbi_fastrun, WikibaseIntegrator from wikibaseintegrator.datatypes import BaseDataType, Item, ExternalID from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists config['DEBUG'] = True @@ -185,10 +186,10 @@ def test_append_props(): frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType) # with append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, if_exists='APPEND', cqid=qid) is False + assert frc.write_required(data=statements, if_exists=ActionIfExists.APPEND, cqid=qid) is False # with force append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, if_exists='FORCE_APPEND', cqid=qid) is True + assert frc.write_required(data=statements, if_exists=ActionIfExists.FORCE_APPEND, cqid=qid) is True # without append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True @@ -197,7 +198,7 @@ def test_append_props(): frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) # with append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, cqid=qid, if_exists='APPEND') is True + assert frc.write_required(data=statements, cqid=qid, if_exists=ActionIfExists.APPEND) is True # without append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index a3c2b6ea..a5b6e21b 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -1,4 +1,5 @@ from wikibaseintegrator.models import Claim, Snak, Snaks, References, Reference +from wikibaseintegrator.wbi_enums import WikibaseSnakType class BaseDataType(Claim): @@ -22,9 +23,9 @@ def __init__(self, prop_nr=None, **kwargs): :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int :param datatype: The Wikibase data type declaration of this snak :type datatype: str - :param snaktype: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or - unknown (somevalue). See Wikibase documentation. - :type snaktype: a str of either 'value', 'novalue' or 'somevalue' + :param snaktype: One of the values in the enum WikibaseSnakValueType denoting the state of the value: + KNOWN_VALUE, NO_VALUE or UNKNOWN_VALUE + :type snaktype: WikibaseSnakType :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, e.g. references=[[, ], []] This will create two references, the first one with two statements, the second with one @@ -70,7 +71,7 @@ def value(self): @value.setter def value(self, value): if not value: - self.mainsnak.snaktype = 'novalue' + self.mainsnak.snaktype = WikibaseSnakType.NO_VALUE self.__value = value def get_sparql_value(self): diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 5274a475..a100fcfc 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -3,6 +3,7 @@ from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.models.claims import Claims, Claim from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError from wikibaseintegrator.wbi_fastrun import FastRunContainer from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper @@ -32,7 +33,7 @@ def __init__(self, api, lastrevid=None, type=None, id=None, claims=None): self.debug = config['DEBUG'] - def add_claims(self, claims, if_exists='APPEND'): + def add_claims(self, claims, if_exists=ActionIfExists.APPEND): if isinstance(claims, Claim): claims = [claims] elif not isinstance(claims, list): diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index 05f7e732..b4f5aa7a 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -1,5 +1,6 @@ from wikibaseintegrator.models.language_values import LanguageValue from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists class Aliases: @@ -27,9 +28,9 @@ def get(self, language=None): else: return None - def set(self, language=None, values=None, if_exists='APPEND'): + def set(self, language=None, values=None, if_exists=ActionIfExists.APPEND): language = language or config['DEFAULT_LANGUAGE'] - assert if_exists in ['REPLACE', 'APPEND', 'KEEP'] + assert if_exists in ActionIfExists assert language is not None @@ -37,7 +38,7 @@ def set(self, language=None, values=None, if_exists='APPEND'): self.aliases[language] = [] if values is None or values == '': - if if_exists != 'KEEP': + if if_exists != ActionIfExists.KEEP: for alias in self.aliases[language]: alias.remove() return self.aliases[language] @@ -47,7 +48,7 @@ def set(self, language=None, values=None, if_exists='APPEND'): elif not isinstance(values, list) and values is not None: raise TypeError("value must be a str or list") - if if_exists == 'REPLACE': + if if_exists == ActionIfExists.REPLACE: aliases = [] for value in values: alias = Alias(language, value) @@ -57,10 +58,10 @@ def set(self, language=None, values=None, if_exists='APPEND'): for value in values: alias = Alias(language, value) - if if_exists == 'APPEND': + if if_exists == ActionIfExists.APPEND: if alias not in self.aliases[language]: self.aliases[language].append(alias) - elif if_exists == 'KEEP': + elif if_exists == ActionIfExists.KEEP: if not self.aliases[language]: self.aliases[language].append(alias) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index bc0d7bdc..e130f629 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -1,281 +1,280 @@ -from __future__ import annotations - -import copy -from typing import Union - -from wikibaseintegrator.models.qualifiers import Qualifiers -from wikibaseintegrator.models.references import References -from wikibaseintegrator.models.snaks import Snak - - -class Claims: - def __init__(self): - self.claims = {} - - @property - def claims(self): - return self.__claims - - @claims.setter - def claims(self, claims): - self.__claims = claims - - def get(self, property=None) -> list: - return self.claims[property] - - def add(self, claims: Union[list, Claim, None] = None, if_exists='REPLACE') -> Claims: - """ - - :param claims: - :param if_exists: Replace or append the statement. You can force an append if the statement already exists. - :type if_exists: A string of one of three allowed values: 'REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP' - :return: Claims - """ - - if if_exists not in ['REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP']: - raise ValueError('{} is not a valid if_exists value'.format(if_exists)) - - if isinstance(claims, Claim): - claims = [claims] - elif not isinstance(claims, list): - raise ValueError - - # TODO: Don't replace if claim is the same - if if_exists == 'REPLACE': - for claim in claims: - if claim is not None: - assert isinstance(claim, Claim) - property = claim.mainsnak.property_number - if property in self.claims: - for claim_to_remove in self.claims[property]: - if claim_to_remove not in claims: - claim_to_remove.remove() - - for claim in claims: - if claim is not None: - assert isinstance(claim, Claim) - property = claim.mainsnak.property_number - - if property not in self.claims: - self.claims[property] = [] - - if if_exists == 'KEEP': - if len(self.claims[property]) == 0: - self.claims[property].append(claim) - elif if_exists == 'FORCE_APPEND': - self.claims[property].append(claim) - elif if_exists == 'APPEND': - if claim not in self.claims[property]: - self.claims[property].append(claim) - elif if_exists == 'REPLACE': - if claim not in self.claims[property]: - self.claims[property].append(claim) - - return self - - def from_json(self, json_data) -> Claims: - for property in json_data: - for claim in json_data[property]: - from wikibaseintegrator.datatypes import BaseDataType - if 'datatype' in 'mainsnak': - data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] - else: - data_type = Claim - self.add(claims=data_type().from_json(claim), if_exists='FORCE_APPEND') - - return self - - def get_json(self) -> {}: - json_data = {} - for property in self.claims: - if property not in json_data: - json_data[property] = [] - for claim in self.claims[property]: - json_data[property].append(claim.get_json()) - return json_data - - def clear(self): - self.claims = {} - - def __len__(self): - return len(self.claims) - - def __iter__(self): - iterate = [] - for claim in self.claims.values(): - iterate.extend(claim) - return iter(iterate) - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), - ) - - -class Claim: - DTYPE = 'claim' - subclasses = [] - - def __init__(self, qualifiers=None, rank=None, references=None): - self.mainsnak = Snak(datatype=self.DTYPE) - self.type = 'statement' - self.qualifiers = qualifiers or Qualifiers() - self.qualifiers_order = [] - self.id = None - self.rank = rank or 'normal' - self.references = references or References() - self.removed = False - - # Allow registration of subclasses of Claim into Claim.subclasses - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - cls.subclasses.append(cls) - - @property - def mainsnak(self): - return self.__mainsnak - - @mainsnak.setter - def mainsnak(self, value): - self.__mainsnak = value - - @property - def type(self): - return self.__type - - @type.setter - def type(self, value): - self.__type = value - - @property - def qualifiers(self): - return self.__qualifiers - - @qualifiers.setter - def qualifiers(self, value): - assert isinstance(value, (Qualifiers, list)) - if isinstance(value, list): - self.__qualifiers = Qualifiers().set(value) - else: - self.__qualifiers = value - - @property - def qualifiers_order(self): - return self.__qualifiers_order - - @qualifiers_order.setter - def qualifiers_order(self, value): - self.__qualifiers_order = value - - @property - def id(self): - return self.__id - - @id.setter - def id(self, value): - self.__id = value - - @property - def rank(self): - return self.__rank - - @rank.setter - def rank(self, value): - if value not in ['normal', 'deprecated', 'preferred']: - raise ValueError("{} not a valid rank".format(value)) - - self.__rank = value - - @property - def references(self): - return self.__references - - @references.setter - def references(self, value): - self.__references = value - - @property - def removed(self): - return self.__removed - - @removed.setter - def removed(self, value): - self.__removed = value - - def remove(self, remove=True): - self.removed = remove - - def from_json(self, json_data) -> Claim: - self.mainsnak = Snak().from_json(json_data['mainsnak']) - self.type = json_data['type'] - if 'qualifiers' in json_data: - self.qualifiers = Qualifiers().from_json(json_data['qualifiers']) - if 'qualifiers-order' in json_data: - self.qualifiers_order = json_data['qualifiers-order'] - self.id = json_data['id'] - self.rank = json_data['rank'] - if 'references' in json_data: - self.references = References().from_json(json_data['references']) - - return self - - def get_json(self) -> {}: - json_data = { - 'mainsnak': self.mainsnak.get_json(), - 'type': self.type, - 'id': self.id, - 'rank': self.rank - } - # Remove id if it's a temporary one - if not self.id: - del json_data['id'] - if len(self.qualifiers) > 0: - json_data['qualifiers'] = self.qualifiers.get_json() - json_data['qualifiers-order'] = self.qualifiers_order - if len(self.references) > 0: - json_data['references'] = self.references.get_json() - if self.removed: - json_data['remove'] = '' - return json_data - - def has_equal_qualifiers(self, other): - # check if the qualifiers are equal with the 'other' object - equal_qualifiers = True - self_qualifiers = copy.deepcopy(self.qualifiers) - other_qualifiers = copy.deepcopy(other.qualifiers) - - if len(self_qualifiers) != len(other_qualifiers): - equal_qualifiers = False - else: - flg = [False for _ in range(len(self_qualifiers))] - for count, i in enumerate(self_qualifiers): - for q in other_qualifiers: - if i == q: - flg[count] = True - if not all(flg): - equal_qualifiers = False - - return equal_qualifiers - - def __contains__(self, item): - if isinstance(item, Claim): - return self == item - elif isinstance(item, str): - return self.mainsnak.datavalue == item - raise TypeError - - def __eq__(self, other): - if isinstance(other, Claim): - return self.mainsnak.datavalue == other.mainsnak.datavalue and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) - raise TypeError - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), - ) +from __future__ import annotations + +import copy +from typing import Union + +from wikibaseintegrator.models.qualifiers import Qualifiers +from wikibaseintegrator.models.references import References +from wikibaseintegrator.models.snaks import Snak +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank + + +class Claims: + def __init__(self): + self.claims = {} + + @property + def claims(self): + return self.__claims + + @claims.setter + def claims(self, claims): + self.__claims = claims + + def get(self, property=None) -> list: + return self.claims[property] + + def add(self, claims: Union[list, Claim, None] = None, if_exists=ActionIfExists.REPLACE) -> Claims: + """ + + :param claims: + :param if_exists: Replace or append the statement. You can force an append if the statement already exists. + :type if_exists: One of the values of the enum ActionIfExists: REPLACE, APPEND, FORCE_APPEND, KEEP + :return: Claims + """ + + if if_exists not in ActionIfExists: + raise ValueError('{} is not a valid if_exists value. Use the enum ActionIfExists'.format(if_exists)) + + if isinstance(claims, Claim): + claims = [claims] + elif not isinstance(claims, list): + raise ValueError + + # TODO: Don't replace if claim is the same + if if_exists == ActionIfExists.REPLACE: + for claim in claims: + if claim is not None: + assert isinstance(claim, Claim) + property = claim.mainsnak.property_number + if property in self.claims: + for claim_to_remove in self.claims[property]: + if claim_to_remove not in claims: + claim_to_remove.remove() + + for claim in claims: + if claim is not None: + assert isinstance(claim, Claim) + property = claim.mainsnak.property_number + + if property not in self.claims: + self.claims[property] = [] + + if if_exists == ActionIfExists.KEEP: + if len(self.claims[property]) == 0: + self.claims[property].append(claim) + elif if_exists == ActionIfExists.FORCE_APPEND: + self.claims[property].append(claim) + elif if_exists == ActionIfExists.APPEND: + if claim not in self.claims[property]: + self.claims[property].append(claim) + elif if_exists == ActionIfExists.REPLACE: + if claim not in self.claims[property]: + self.claims[property].append(claim) + + return self + + def from_json(self, json_data) -> Claims: + for property in json_data: + for claim in json_data[property]: + from wikibaseintegrator.datatypes import BaseDataType + if 'datatype' in 'mainsnak': + data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] + else: + data_type = Claim + self.add(claims=data_type().from_json(claim), if_exists=ActionIfExists.FORCE_APPEND) + + return self + + def get_json(self) -> {}: + json_data = {} + for property in self.claims: + if property not in json_data: + json_data[property] = [] + for claim in self.claims[property]: + json_data[property].append(claim.get_json()) + return json_data + + def clear(self): + self.claims = {} + + def __len__(self): + return len(self.claims) + + def __iter__(self): + iterate = [] + for claim in self.claims.values(): + iterate.extend(claim) + return iter(iterate) + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) + + +class Claim: + DTYPE = 'claim' + subclasses = [] + + def __init__(self, qualifiers=None, rank=None, references=None): + self.mainsnak = Snak(datatype=self.DTYPE) + self.type = 'statement' + self.qualifiers = qualifiers or Qualifiers() + self.qualifiers_order = [] + self.id = None + self.rank: WikibaseRank = rank or WikibaseRank.NORMAL + self.references = references or References() + self.removed = False + + # Allow registration of subclasses of Claim into Claim.subclasses + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + cls.subclasses.append(cls) + + @property + def mainsnak(self): + return self.__mainsnak + + @mainsnak.setter + def mainsnak(self, value): + self.__mainsnak = value + + @property + def type(self): + return self.__type + + @type.setter + def type(self, value): + self.__type = value + + @property + def qualifiers(self): + return self.__qualifiers + + @qualifiers.setter + def qualifiers(self, value): + assert isinstance(value, (Qualifiers, list)) + if isinstance(value, list): + self.__qualifiers = Qualifiers().set(value) + else: + self.__qualifiers = value + + @property + def qualifiers_order(self): + return self.__qualifiers_order + + @qualifiers_order.setter + def qualifiers_order(self, value): + self.__qualifiers_order = value + + @property + def id(self): + return self.__id + + @id.setter + def id(self, value): + self.__id = value + + @property + def rank(self): + return self.__rank + + @rank.setter + def rank(self, value): + """Parse the rank. The enum thows an error if it is not one of the recognized values""" + self.__rank = WikibaseRank(value) + + @property + def references(self): + return self.__references + + @references.setter + def references(self, value): + self.__references = value + + @property + def removed(self): + return self.__removed + + @removed.setter + def removed(self, value): + self.__removed = value + + def remove(self, remove=True): + self.removed = remove + + def from_json(self, json_data) -> Claim: + self.mainsnak = Snak().from_json(json_data['mainsnak']) + self.type = json_data['type'] + if 'qualifiers' in json_data: + self.qualifiers = Qualifiers().from_json(json_data['qualifiers']) + if 'qualifiers-order' in json_data: + self.qualifiers_order = json_data['qualifiers-order'] + self.id = json_data['id'] + self.rank: WikibaseRank = WikibaseRank(json_data['rank']) + if 'references' in json_data: + self.references = References().from_json(json_data['references']) + + return self + + def get_json(self) -> {}: + json_data = { + 'mainsnak': self.mainsnak.get_json(), + 'type': self.type, + 'id': self.id, + 'rank': self.rank.value + } + # Remove id if it's a temporary one + if not self.id: + del json_data['id'] + if len(self.qualifiers) > 0: + json_data['qualifiers'] = self.qualifiers.get_json() + json_data['qualifiers-order'] = self.qualifiers_order + if len(self.references) > 0: + json_data['references'] = self.references.get_json() + if self.removed: + json_data['remove'] = '' + return json_data + + def has_equal_qualifiers(self, other): + # check if the qualifiers are equal with the 'other' object + equal_qualifiers = True + self_qualifiers = copy.deepcopy(self.qualifiers) + other_qualifiers = copy.deepcopy(other.qualifiers) + + if len(self_qualifiers) != len(other_qualifiers): + equal_qualifiers = False + else: + flg = [False for _ in range(len(self_qualifiers))] + for count, i in enumerate(self_qualifiers): + for q in other_qualifiers: + if i == q: + flg[count] = True + if not all(flg): + equal_qualifiers = False + + return equal_qualifiers + + def __contains__(self, item): + if isinstance(item, Claim): + return self == item + elif isinstance(item, str): + return self.mainsnak.datavalue == item + raise TypeError + + def __eq__(self, other): + if isinstance(other, Claim): + return self.mainsnak.datavalue == other.mainsnak.datavalue and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) + raise TypeError + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index d853efcb..f4b070ac 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -1,4 +1,5 @@ from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists class LanguageValues: @@ -28,16 +29,16 @@ def get(self, language=None): else: return None - def set(self, language=None, value=None, if_exists='REPLACE'): + def set(self, language=None, value=None, if_exists=ActionIfExists.REPLACE): language = language or config['DEFAULT_LANGUAGE'] - assert if_exists in ['REPLACE', 'KEEP'] + assert if_exists in [ActionIfExists.REPLACE, ActionIfExists.KEEP] # Remove value if None if value is None and language in self.values: self.values[language].remove() return None - if if_exists == 'REPLACE' or self.get(language=language) is None: + if if_exists == ActionIfExists.REPLACE or self.get(language=language) is None: language_value = LanguageValue(language, value) self.add(language_value) return language_value diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index 7061b5a1..db49c21e 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -1,6 +1,7 @@ from __future__ import annotations from wikibaseintegrator.models.snaks import Snak +from wikibaseintegrator.wbi_enums import ActionIfExists class Qualifiers: @@ -31,7 +32,7 @@ def get(self, property=None): return self.qualifiers[property] # TODO: implement if_exists - def add(self, qualifier=None, if_exists='REPLACE'): + def add(self, qualifier=None, if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(qualifier, Claim): qualifier = Snak().from_json(qualifier.get_json()['mainsnak']) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index 042fd718..10caa183 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -1,6 +1,7 @@ from __future__ import annotations from wikibaseintegrator.models.snaks import Snaks, Snak +from wikibaseintegrator.wbi_enums import ActionIfExists class References: @@ -14,7 +15,7 @@ def get(self, hash=None): return None # TODO: implement if_exists - def add(self, reference=None, if_exists='REPLACE'): + def add(self, reference=None, if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(reference, Claim): reference = Reference(snaks=Snaks().add(Snak().from_json(reference.get_json()['mainsnak']))) @@ -99,7 +100,7 @@ def snaks_order(self, value): self.__snaks_order = value # TODO: implement if_exists - def add(self, snak=None, if_exists='REPLACE'): + def add(self, snak=None, if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(snak, Claim): snak = Snak().from_json(snak.get_json()['mainsnak']) diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py index d6dcd986..8d654596 100644 --- a/wikibaseintegrator/models/senses.py +++ b/wikibaseintegrator/models/senses.py @@ -1,5 +1,6 @@ from wikibaseintegrator.models.claims import Claims from wikibaseintegrator.models.language_values import LanguageValues +from wikibaseintegrator.wbi_enums import ActionIfExists class Senses: @@ -13,7 +14,7 @@ def get(self, id): return None # TODO: implement if_exists - def add(self, sense, if_exists='REPLACE'): + def add(self, sense, if_exists=ActionIfExists.REPLACE): self.senses.append(sense) return self diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index b788fe63..17231ca3 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -3,6 +3,8 @@ import re from typing import Optional +from wikibaseintegrator.wbi_enums import WikibaseSnakType + class Snaks: def __init__(self): @@ -56,8 +58,8 @@ def __repr__(self): class Snak: - def __init__(self, snaktype=None, property_number=None, hash=None, datavalue=None, datatype=None): - self.snaktype = snaktype or 'value' + def __init__(self, snaktype: WikibaseSnakType = WikibaseSnakType.KNOWN_VALUE, property_number=None, hash=None, datavalue=None, datatype=None): + self.snaktype = snaktype self.property_number = property_number self.hash = hash self.datavalue = datavalue or {} @@ -68,11 +70,9 @@ def snaktype(self): return self.__snaktype @snaktype.setter - def snaktype(self, value): - if value not in ['value', 'novalue', 'somevalue']: - raise ValueError('{} is not a valid snak type'.format(value)) - - self.__snaktype = value + def snaktype(self, value: WikibaseSnakType): + """Parse the snaktype. The enum thows an error if it is not one of the recognized values""" + self.__snaktype = WikibaseSnakType(value) @property def property_number(self): @@ -108,7 +108,7 @@ def datavalue(self): @datavalue.setter def datavalue(self, value): if value is not None: - self.snaktype = 'value' + self.snaktype = WikibaseSnakType.KNOWN_VALUE self.__datavalue = value @property @@ -120,7 +120,7 @@ def datatype(self, value): self.__datatype = value def from_json(self, json_data) -> Snak: - self.snaktype = json_data['snaktype'] + self.snaktype: WikibaseSnakType = WikibaseSnakType(json_data['snaktype']) self.property_number = json_data['property'] if 'hash' in json_data: self.hash = json_data['hash'] @@ -132,13 +132,13 @@ def from_json(self, json_data) -> Snak: def get_json(self) -> {}: json_data = { - 'snaktype': self.snaktype, + 'snaktype': self.snaktype.value, 'property': self.property_number, 'datatype': self.datatype, 'datavalue': self.datavalue } - if self.snaktype in {'novalue', 'somevalue'}: + if self.snaktype in [WikibaseSnakType.NO_VALUE, WikibaseSnakType.UNKNOWN_VALUE]: del json_data['datavalue'] return json_data diff --git a/wikibaseintegrator/wbi_enums.py b/wikibaseintegrator/wbi_enums.py new file mode 100644 index 00000000..ebd0d306 --- /dev/null +++ b/wikibaseintegrator/wbi_enums.py @@ -0,0 +1,28 @@ +from enum import Enum, auto + + +class ActionIfExists(Enum): + """ + Action to take if a statement with a property already exists on the item or lexeme. + """ + APPEND = auto() + FORCE_APPEND = auto() + KEEP = auto() + REPLACE = auto() + + +class WikibaseRank(Enum): + DEPRECATED = "deprecated" + NORMAL = "normal" + PREFERRED = "preferred" + + +class WikibaseSnakType(Enum): + """ + The snak type of the Wikibase data snak, three values possible, + depending if the value is a known (value), not existent (novalue) or + unknown (somevalue). See Wikibase documentation. + """ + KNOWN_VALUE = "value" + NO_VALUE = "novalue" + UNKNOWN_VALUE = "somevalue" diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 5f49b21a..96d8b7c6 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -5,6 +5,7 @@ from itertools import chain from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_helpers import format_amount, execute_sparql_query @@ -153,11 +154,11 @@ def load_item(self, claims: list, cqid=None) -> bool: print(qid) self.current_qid = qid - def write_required(self, data: list, if_exists='REPLACE', cqid=None) -> bool: + def write_required(self, data: list, if_exists=ActionIfExists.REPLACE, cqid=None) -> bool: del_props = set() data_props = set() append_props = [] - if if_exists == 'APPEND': + if if_exists == ActionIfExists.APPEND: append_props = [x.mainsnak.property_number for x in data] for x in data: @@ -177,7 +178,7 @@ def write_required(self, data: list, if_exists='REPLACE', cqid=None) -> bool: for x in app_data: for y in rec_app_data: if x.mainsnak.datavalue == y.mainsnak.datavalue: - if y.equals(x, include_ref=self.use_refs) and if_exists != 'FORCE_APPEND': + if y.equals(x, include_ref=self.use_refs) and if_exists != ActionIfExists.FORCE_APPEND: comp.append(True) # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] @@ -289,7 +290,9 @@ def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: all_lang_strings = [''] return all_lang_strings - def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_type: str, if_exists: str = 'APPEND') -> bool: + def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_type: str, + # Default to append + if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool: """ Method to check if certain language data exists as a label, description or aliases :param qid: Wikibase item id @@ -301,7 +304,7 @@ def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_ty """ all_lang_strings = set(x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)) - if if_exists == 'REPLACE': + if if_exists == ActionIfExists.REPLACE: return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) else: for s in lang_data: From dac07a4d76ffbfde3f7c84fd427984c859936e06 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 10:05:12 +0200 Subject: [PATCH 077/308] Some changes Change if_exists to action_if_exists (#190) Add unit tests for entities Allow more parameters in entities write() method Simplify exceptions and add unit tests --- test/test_all.py | 4 +- test/test_entity_item.py | 37 +++++++++++++++ test/test_entity_lexeme.py | 31 ++++++++++++ test/test_entity_mediainfo.py | 31 ++++++++++++ test/test_entity_property.py | 32 +++++++++++++ test/test_wbi_core.py | 34 ++++++------- test/test_wbi_exceptions.py | 50 ++++++++++++++++++++ test/test_wbi_fastrun.py | 6 +-- wikibaseintegrator/entities/baseentity.py | 8 ++-- wikibaseintegrator/entities/item.py | 19 +++++++- wikibaseintegrator/entities/lexeme.py | 19 +++++++- wikibaseintegrator/entities/mediainfo.py | 19 +++++++- wikibaseintegrator/entities/property.py | 19 +++++++- wikibaseintegrator/models/aliases.py | 12 ++--- wikibaseintegrator/models/claims.py | 22 ++++----- wikibaseintegrator/models/forms.py | 2 +- wikibaseintegrator/models/language_values.py | 6 +-- wikibaseintegrator/models/qualifiers.py | 4 +- wikibaseintegrator/models/references.py | 8 ++-- wikibaseintegrator/models/senses.py | 4 +- wikibaseintegrator/wbi_exceptions.py | 29 ++---------- wikibaseintegrator/wbi_fastrun.py | 12 ++--- 22 files changed, 315 insertions(+), 93 deletions(-) create mode 100644 test/test_entity_item.py create mode 100644 test/test_entity_lexeme.py create mode 100644 test/test_entity_mediainfo.py create mode 100644 test/test_entity_property.py create mode 100644 test/test_wbi_exceptions.py diff --git a/test/test_all.py b/test/test_all.py index db4af320..51799ccb 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -118,7 +118,7 @@ def test_fastrun_label(self): item.labels.set(value="Earth") item.labels.set(value="xfgfdsg") assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} - item.aliases.set(values=["fake alias"], if_exists=ActionIfExists.APPEND) + item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND) assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] # something thats empty (for now.., can change, so this just makes sure no exception is thrown) @@ -133,7 +133,7 @@ def test_fastrun_label(self): item.aliases.get(language="ak") item.labels.set(value="label", language="ak") item.descriptions.set(value="d", language="ak") - item.aliases.set(values=["a"], language="ak", if_exists=ActionIfExists.APPEND) + item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND) def test_sitelinks(): diff --git a/test/test_entity_item.py b/test/test_entity_item.py new file mode 100644 index 00000000..07a52f0b --- /dev/null +++ b/test/test_entity_item.py @@ -0,0 +1,37 @@ +import unittest + +from simplejson import JSONDecodeError + +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() + + +class TestEntityItem(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.item.get('Q582').id == 'Q582' + # Test with numeric id as string + assert wbi.item.get('582').id == 'Q582' + # Test with numeric id as int + assert wbi.item.get(582).id == 'Q582' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.item.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.item.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.item.get(-1) + + def test_get_json(self): + assert wbi.item.get('Q582').get_json()['labels']['fr']['value'] == 'Villeurbanne' + + def test_write(self): + with self.assertRaises(JSONDecodeError): + wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') diff --git a/test/test_entity_lexeme.py b/test/test_entity_lexeme.py new file mode 100644 index 00000000..ed65fd77 --- /dev/null +++ b/test/test_entity_lexeme.py @@ -0,0 +1,31 @@ +import unittest + +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() + + +class TestEntityLexeme(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.lexeme.get('L5').id == 'L5' + # Test with numeric id as string + assert wbi.lexeme.get('5').id == 'L5' + # Test with numeric id as int + assert wbi.lexeme.get(5).id == 'L5' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.lexeme.get('Q5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.lexeme.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.lexeme.get(-1) + + def test_get_json(self): + assert wbi.lexeme.get('L5').get_json()['forms'][0]['representations']['es']['value'] == 'pinos' diff --git a/test/test_entity_mediainfo.py b/test/test_entity_mediainfo.py new file mode 100644 index 00000000..966e862f --- /dev/null +++ b/test/test_entity_mediainfo.py @@ -0,0 +1,31 @@ +import unittest + +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() + + +class TestEntityMediaInfo(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + # Test with numeric id as string + assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + # Test with numeric id as int + assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.mediainfo.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.mediainfo.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.mediainfo.get(-1) + + def test_get_json(self): + assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json() diff --git a/test/test_entity_property.py b/test/test_entity_property.py new file mode 100644 index 00000000..24fd0f23 --- /dev/null +++ b/test/test_entity_property.py @@ -0,0 +1,32 @@ +import unittest +from pprint import pprint + +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() + + +class TestEntityProperty(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.property.get('P50').id == 'P50' + # Test with numeric id as string + assert wbi.property.get('50').id == 'P50' + # Test with numeric id as int + assert wbi.property.get(50).id == 'P50' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.property.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.property.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.property.get(-1) + + def test_get_json(self): + assert wbi.property.get('P50', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json()['labels']['fr']['value'] == 'auteur' diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 775091df..d4db7ddb 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -38,31 +38,31 @@ def test_search_only(self): assert item.labels.get('es') == "Tierra" - def test_basedatatype_if_exists(self): + def test_basedatatype_action_if_exists(self): instances = [datatypes.Item(prop_nr='P31', value='Q1234'), datatypes.Item(prop_nr='P31', value='Q1234')] item_original = wbi.item.get('Q2') len_claims_original = len([x.mainsnak.datavalue['value']['id'] for x in item_original.claims.get('P31')]) item = deepcopy(item_original) - item.add_claims(instances, if_exists=ActionIfExists.APPEND) + item.add_claims(instances, action_if_exists=ActionIfExists.APPEND) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, only one unique added assert len(claims) == len_claims_original + 1 and 'Q1234' in claims and claims.count('Q1234') == 1 item = deepcopy(item_original) - item.add_claims(instances, if_exists=ActionIfExists.FORCE_APPEND) + item.add_claims(instances, action_if_exists=ActionIfExists.FORCE_APPEND) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, force two to be added assert len(claims) == len_claims_original + 2 and 'Q1234' in claims and claims.count('Q1234') == 2 item = deepcopy(item_original) - item.add_claims(instances, if_exists=ActionIfExists.KEEP) + item.add_claims(instances, action_if_exists=ActionIfExists.KEEP) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] # Append claims to item, there is already claims, so nothing added assert len(claims) == len_claims_original and 'Q1234' not in claims item = deepcopy(item_original) - item.add_claims(instances, if_exists=ActionIfExists.REPLACE) + item.add_claims(instances, action_if_exists=ActionIfExists.REPLACE) claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31') if not x.removed] removed_claims = [True for x in item.claims.get('P31') if x.removed] # Append claims to item, replace already existing claims with new ones, only one if it's the same property number @@ -83,17 +83,17 @@ def test_description(self): assert item.descriptions.get() == "lorem" item.descriptions.set(language='es', value="lorem ipsum") assert item.descriptions.get('es') == "lorem ipsum" - item.descriptions.set(language='en', value="lorem ipsum", if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='en', value="lorem ipsum", action_if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem'} # set_description on empty desription item.descriptions = LanguageValues() item.descriptions.set(value='') - item.descriptions.set(language='en', value="lorem ipsum", if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='en', value="lorem ipsum", action_if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} - item.descriptions.set(language='fr', value="lorem", if_exists=ActionIfExists.KEEP) - item.descriptions.set(language='fr', value="lorem ipsum", if_exists=ActionIfExists.REPLACE) - item.descriptions.set(language='en', value="lorem", if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='fr', value="lorem", action_if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='fr', value="lorem ipsum", action_if_exists=ActionIfExists.REPLACE) + item.descriptions.set(language='en', value="lorem", action_if_exists=ActionIfExists.KEEP) assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} assert item.get_json()['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'} @@ -110,10 +110,10 @@ def test_label(self): item.labels.set(value='Earth') item.labels.set(value='xfgfdsg') - item.labels.set(language='en', value='xfgfdsgtest', if_exists=ActionIfExists.KEEP) + item.labels.set(language='en', value='xfgfdsgtest', action_if_exists=ActionIfExists.KEEP) assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} assert item.get_json()['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} - item.aliases.set(values=["fake alias"], if_exists=ActionIfExists.APPEND) + item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND) assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] item.labels.set(language='fr', value=None) @@ -125,17 +125,17 @@ def test_label(self): item.aliases.set(language='ak') item.labels.set(value='label', language='ak') item.descriptions.set(value='d', language='ak') - item.aliases.set(values=['a'], language='ak', if_exists=ActionIfExists.APPEND) + item.aliases.set(values=['a'], language='ak', action_if_exists=ActionIfExists.APPEND) assert item.aliases.get('ak') == ['a'] item.aliases.set(values='b', language='ak') assert item.aliases.get('ak') == ['a', 'b'] - item.aliases.set(values='b', language='ak', if_exists=ActionIfExists.REPLACE) + item.aliases.set(values='b', language='ak', action_if_exists=ActionIfExists.REPLACE) assert item.aliases.get('ak') == ['b'] - item.aliases.set(values=['c'], language='ak', if_exists=ActionIfExists.REPLACE) + item.aliases.set(values=['c'], language='ak', action_if_exists=ActionIfExists.REPLACE) assert item.aliases.get('ak') == ['c'] - item.aliases.set(values=['d'], language='ak', if_exists=ActionIfExists.KEEP) + item.aliases.set(values=['d'], language='ak', action_if_exists=ActionIfExists.KEEP) assert 'd' not in item.aliases.get('ak') - item.aliases.set(language='ak', if_exists=ActionIfExists.KEEP) + item.aliases.set(language='ak', action_if_exists=ActionIfExists.KEEP) assert 'remove' not in item.get_json()['aliases']['ak'][0] item.aliases.set(language='ak') assert 'remove' in item.get_json()['aliases']['ak'][0] diff --git a/test/test_wbi_exceptions.py b/test/test_wbi_exceptions.py new file mode 100644 index 00000000..807b5ac3 --- /dev/null +++ b/test/test_wbi_exceptions.py @@ -0,0 +1,50 @@ +from wikibaseintegrator.wbi_exceptions import NonUniqueLabelDescriptionPairError, IDMissingError, SearchError, ManualInterventionReqException, CorePropIntegrityException, \ + MergeError, SearchOnlyError, MWApiError + + +def test_mwapierror(): + assert str(MWApiError('MWApiError')) == 'MWApiError' + + +def test_nonuniquelabeldescriptionpairerror(): + json_data = { + 'error': { + 'messages': [ + { + 'parameters': [ + 'first', + 'second', + 'third|test' + ] + } + ] + } + } + + assert NonUniqueLabelDescriptionPairError(json_data).get_language() == 'second' + assert NonUniqueLabelDescriptionPairError(json_data).get_conflicting_item_qid() == 'ird' + + +def test_idmissingerror(): + assert str(IDMissingError('IDMissingError')) == 'IDMissingError' + + +def test_searcherror(): + assert str(SearchError('SearchError')) == 'SearchError' + + +def test_manualinterventionreqexception(): + assert ManualInterventionReqException(value='value', property_string='property_string', + item_list='item_list').value == 'value Property: property_string, items affected: item_list' + + +def test_corepropintegrityexception(): + assert str(CorePropIntegrityException('CorePropIntegrityException')) == 'CorePropIntegrityException' + + +def test_mergeerror(): + assert str(MergeError('MergeError')) == 'MergeError' + + +def test_searchonlyerror(): + assert str(SearchOnlyError('SearchOnlyError')) == 'SearchOnlyError' diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index a63be99d..98e00893 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -186,10 +186,10 @@ def test_append_props(): frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType) # with append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, if_exists=ActionIfExists.APPEND, cqid=qid) is False + assert frc.write_required(data=statements, action_if_exists=ActionIfExists.APPEND, cqid=qid) is False # with force append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, if_exists=ActionIfExists.FORCE_APPEND, cqid=qid) is True + assert frc.write_required(data=statements, action_if_exists=ActionIfExists.FORCE_APPEND, cqid=qid) is True # without append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True @@ -198,7 +198,7 @@ def test_append_props(): frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) # with append statements = [Item(value='Q24784025', prop_nr='P527')] - assert frc.write_required(data=statements, cqid=qid, if_exists=ActionIfExists.APPEND) is True + assert frc.write_required(data=statements, cqid=qid, action_if_exists=ActionIfExists.APPEND) is True # without append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index a100fcfc..81aa7356 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -33,13 +33,13 @@ def __init__(self, api, lastrevid=None, type=None, id=None, claims=None): self.debug = config['DEBUG'] - def add_claims(self, claims, if_exists=ActionIfExists.APPEND): + def add_claims(self, claims, action_if_exists=ActionIfExists.APPEND): if isinstance(claims, Claim): claims = [claims] elif not isinstance(claims, list): raise TypeError() - self.claims.add(claims=claims, if_exists=if_exists) + self.claims.add(claims=claims, action_if_exists=action_if_exists) return self @@ -85,7 +85,7 @@ def get(self, entity_id, **kwargs): return self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) - def _write(self, data=None, summary='', allow_anonymous=False): + def _write(self, data=None, summary='', allow_anonymous=False, **kwargs): """ Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. For new items, also returns the new QIDs. @@ -139,7 +139,7 @@ def _write(self, data=None, summary='', allow_anonymous=False): print(payload) try: - json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot) + json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot, **kwargs) if 'error' in json_data and 'messages' in json_data['error']: error_msg_names = set(x.get('name') for x in json_data['error']['messages']) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index 0bcbad5b..7e9bd10c 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + from wikibaseintegrator.entities.baseentity import BaseEntity from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions @@ -36,6 +38,19 @@ def new(self, **kwargs) -> Item: return Item(self.api, **kwargs) def get(self, entity_id, **kwargs) -> Item: + if isinstance(entity_id, str): + pattern = re.compile(r'^Q?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(entity_id)) + else: + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Item ID must be greater than 0") + + entity_id = 'Q{}'.format(entity_id) json_data = super(Item, self).get(entity_id=entity_id, **kwargs) return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) @@ -57,6 +72,6 @@ def from_json(self, json_data) -> Item: return self - def write(self, allow_anonymous=False): - json_data = super(Item, self)._write(data=self.get_json(), allow_anonymous=allow_anonymous) + def write(self, **kwargs): + json_data = super(Item, self)._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index 52e6a483..a0459501 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + from wikibaseintegrator.entities.baseentity import BaseEntity from wikibaseintegrator.models.forms import Forms from wikibaseintegrator.models.lemmas import Lemmas @@ -25,6 +27,19 @@ def new(self, **kwargs) -> Lexeme: return Lexeme(self.api, **kwargs) def get(self, entity_id, **kwargs) -> Lexeme: + if isinstance(entity_id, str): + pattern = re.compile(r'^L?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(entity_id)) + else: + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Lexeme ID must be greater than 0") + + entity_id = 'L{}'.format(entity_id) json_data = super(Lexeme, self).get(entity_id=entity_id, **kwargs) return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) @@ -54,9 +69,9 @@ def from_json(self, json_data) -> Lexeme: return self - def write(self): + def write(self, **kwargs): if self.lexical_category is None: raise ValueError("lexical_category can't be None") - json_data = super(Lexeme, self)._write(data=self.get_json()) + json_data = super(Lexeme, self)._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index d07afb33..e79771fe 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + from wikibaseintegrator.entities.baseentity import BaseEntity from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions @@ -32,6 +34,19 @@ def new(self, **kwargs) -> MediaInfo: return MediaInfo(self.api, **kwargs) def get(self, entity_id, **kwargs) -> MediaInfo: + if isinstance(entity_id, str): + pattern = re.compile(r'^M?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError("Invalid MediaInfo ID ({}), format must be 'M[0-9]+'".format(entity_id)) + else: + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("MediaInfo ID must be greater than 0") + + entity_id = 'M{}'.format(entity_id) json_data = super(MediaInfo, self).get(entity_id=entity_id, **kwargs) return MediaInfo(self.api).from_json(json_data=json_data['entities'][entity_id]) @@ -68,6 +83,6 @@ def from_json(self, json_data) -> MediaInfo: return self - def write(self, allow_anonymous=False): - json_data = super(MediaInfo, self)._write(data=self.get_json(), allow_anonymous=allow_anonymous) + def write(self, **kwargs): + json_data = super(MediaInfo, self)._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index a6dee0ba..37855ab7 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + from wikibaseintegrator.entities.baseentity import BaseEntity from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions @@ -28,6 +30,19 @@ def new(self, **kwargs) -> Property: return Property(self.api, **kwargs) def get(self, entity_id, **kwargs) -> Property: + if isinstance(entity_id, str): + pattern = re.compile(r'^P?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(entity_id)) + else: + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Property ID must be greater than 0") + + entity_id = 'P{}'.format(entity_id) json_data = super(Property, self).get(entity_id=entity_id, **kwargs) return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) @@ -50,6 +65,6 @@ def from_json(self, json_data) -> Property: return self - def write(self): - json_data = super(Property, self)._write(data=self.get_json()) + def write(self, **kwargs): + json_data = super(Property, self)._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index b4f5aa7a..ea7b180f 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -28,9 +28,9 @@ def get(self, language=None): else: return None - def set(self, language=None, values=None, if_exists=ActionIfExists.APPEND): + def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND): language = language or config['DEFAULT_LANGUAGE'] - assert if_exists in ActionIfExists + assert action_if_exists in ActionIfExists assert language is not None @@ -38,7 +38,7 @@ def set(self, language=None, values=None, if_exists=ActionIfExists.APPEND): self.aliases[language] = [] if values is None or values == '': - if if_exists != ActionIfExists.KEEP: + if action_if_exists != ActionIfExists.KEEP: for alias in self.aliases[language]: alias.remove() return self.aliases[language] @@ -48,7 +48,7 @@ def set(self, language=None, values=None, if_exists=ActionIfExists.APPEND): elif not isinstance(values, list) and values is not None: raise TypeError("value must be a str or list") - if if_exists == ActionIfExists.REPLACE: + if action_if_exists == ActionIfExists.REPLACE: aliases = [] for value in values: alias = Alias(language, value) @@ -58,10 +58,10 @@ def set(self, language=None, values=None, if_exists=ActionIfExists.APPEND): for value in values: alias = Alias(language, value) - if if_exists == ActionIfExists.APPEND: + if action_if_exists == ActionIfExists.APPEND: if alias not in self.aliases[language]: self.aliases[language].append(alias) - elif if_exists == ActionIfExists.KEEP: + elif action_if_exists == ActionIfExists.KEEP: if not self.aliases[language]: self.aliases[language].append(alias) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index e130f629..1bdad004 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -24,17 +24,17 @@ def claims(self, claims): def get(self, property=None) -> list: return self.claims[property] - def add(self, claims: Union[list, Claim, None] = None, if_exists=ActionIfExists.REPLACE) -> Claims: + def add(self, claims: Union[list, Claim, None] = None, action_if_exists=ActionIfExists.REPLACE) -> Claims: """ :param claims: - :param if_exists: Replace or append the statement. You can force an append if the statement already exists. - :type if_exists: One of the values of the enum ActionIfExists: REPLACE, APPEND, FORCE_APPEND, KEEP + :param action_if_exists: Replace or append the statement. You can force an append if the statement already exists. + :type action_if_exists: One of the values of the enum ActionIfExists: REPLACE, APPEND, FORCE_APPEND, KEEP :return: Claims """ - if if_exists not in ActionIfExists: - raise ValueError('{} is not a valid if_exists value. Use the enum ActionIfExists'.format(if_exists)) + if action_if_exists not in ActionIfExists: + raise ValueError('{} is not a valid action_if_exists value. Use the enum ActionIfExists'.format(action_if_exists)) if isinstance(claims, Claim): claims = [claims] @@ -42,7 +42,7 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists=ActionIfExists. raise ValueError # TODO: Don't replace if claim is the same - if if_exists == ActionIfExists.REPLACE: + if action_if_exists == ActionIfExists.REPLACE: for claim in claims: if claim is not None: assert isinstance(claim, Claim) @@ -60,15 +60,15 @@ def add(self, claims: Union[list, Claim, None] = None, if_exists=ActionIfExists. if property not in self.claims: self.claims[property] = [] - if if_exists == ActionIfExists.KEEP: + if action_if_exists == ActionIfExists.KEEP: if len(self.claims[property]) == 0: self.claims[property].append(claim) - elif if_exists == ActionIfExists.FORCE_APPEND: + elif action_if_exists == ActionIfExists.FORCE_APPEND: self.claims[property].append(claim) - elif if_exists == ActionIfExists.APPEND: + elif action_if_exists == ActionIfExists.APPEND: if claim not in self.claims[property]: self.claims[property].append(claim) - elif if_exists == ActionIfExists.REPLACE: + elif action_if_exists == ActionIfExists.REPLACE: if claim not in self.claims[property]: self.claims[property].append(claim) @@ -82,7 +82,7 @@ def from_json(self, json_data) -> Claims: data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] else: data_type = Claim - self.add(claims=data_type().from_json(claim), if_exists=ActionIfExists.FORCE_APPEND) + self.add(claims=data_type().from_json(claim), action_if_exists=ActionIfExists.FORCE_APPEND) return self diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index cbcc3753..9544ea7f 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -30,7 +30,7 @@ def get_json(self) -> []: def from_json(self, json_data): for form in json_data: - self.add(Form(form_id=form['id'], representations=form['representations'], grammatical_features=form['grammaticalFeatures'], + self.add(Form(form_id=form['id'], representations=LanguageValues().from_json(form['representations']), grammatical_features=form['grammaticalFeatures'], claims=Claims().from_json(form['claims']))) return self diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index f4b070ac..bf7ebebf 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -29,16 +29,16 @@ def get(self, language=None): else: return None - def set(self, language=None, value=None, if_exists=ActionIfExists.REPLACE): + def set(self, language=None, value=None, action_if_exists=ActionIfExists.REPLACE): language = language or config['DEFAULT_LANGUAGE'] - assert if_exists in [ActionIfExists.REPLACE, ActionIfExists.KEEP] + assert action_if_exists in [ActionIfExists.REPLACE, ActionIfExists.KEEP] # Remove value if None if value is None and language in self.values: self.values[language].remove() return None - if if_exists == ActionIfExists.REPLACE or self.get(language=language) is None: + if action_if_exists == ActionIfExists.REPLACE or self.get(language=language) is None: language_value = LanguageValue(language, value) self.add(language_value) return language_value diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index db49c21e..2b6e5d7d 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -31,8 +31,8 @@ def set(self, qualifiers): def get(self, property=None): return self.qualifiers[property] - # TODO: implement if_exists - def add(self, qualifier=None, if_exists=ActionIfExists.REPLACE): + # TODO: implement action_if_exists + def add(self, qualifier=None, action_if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(qualifier, Claim): qualifier = Snak().from_json(qualifier.get_json()['mainsnak']) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index 10caa183..91db961b 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -14,8 +14,8 @@ def get(self, hash=None): return reference return None - # TODO: implement if_exists - def add(self, reference=None, if_exists=ActionIfExists.REPLACE): + # TODO: implement action_if_exists + def add(self, reference=None, action_if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(reference, Claim): reference = Reference(snaks=Snaks().add(Snak().from_json(reference.get_json()['mainsnak']))) @@ -99,8 +99,8 @@ def snaks_order(self): def snaks_order(self, value): self.__snaks_order = value - # TODO: implement if_exists - def add(self, snak=None, if_exists=ActionIfExists.REPLACE): + # TODO: implement action_if_exists + def add(self, snak=None, action_if_exists=ActionIfExists.REPLACE): from wikibaseintegrator.models.claims import Claim if isinstance(snak, Claim): snak = Snak().from_json(snak.get_json()['mainsnak']) diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py index 8d654596..95789fd2 100644 --- a/wikibaseintegrator/models/senses.py +++ b/wikibaseintegrator/models/senses.py @@ -13,8 +13,8 @@ def get(self, id): return sense return None - # TODO: implement if_exists - def add(self, sense, if_exists=ActionIfExists.REPLACE): + # TODO: implement action_if_exists + def add(self, sense, action_if_exists=ActionIfExists.REPLACE): self.senses.append(sense) return self diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index a2ccdb31..3b09508f 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -6,10 +6,7 @@ def __init__(self, error_message): :type error_message: A Python json representation dictionary of the error message :return: """ - self.error_msg = error_message - - def __str__(self): - return repr(self.error_msg) + pass class NonUniqueLabelDescriptionPairError(MWApiError): @@ -44,19 +41,11 @@ def __str__(self): class IDMissingError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) + pass class SearchError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) + pass class ManualInterventionReqException(Exception): @@ -68,19 +57,11 @@ def __str__(self): class CorePropIntegrityException(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) + pass class MergeError(Exception): - def __init__(self, value): - self.value = value - - def __str__(self): - return repr(self.value) + pass class SearchOnlyError(Exception): diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 96d8b7c6..23118ffd 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -154,11 +154,11 @@ def load_item(self, claims: list, cqid=None) -> bool: print(qid) self.current_qid = qid - def write_required(self, data: list, if_exists=ActionIfExists.REPLACE, cqid=None) -> bool: + def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cqid=None) -> bool: del_props = set() data_props = set() append_props = [] - if if_exists == ActionIfExists.APPEND: + if action_if_exists == ActionIfExists.APPEND: append_props = [x.mainsnak.property_number for x in data] for x in data: @@ -178,7 +178,7 @@ def write_required(self, data: list, if_exists=ActionIfExists.REPLACE, cqid=None for x in app_data: for y in rec_app_data: if x.mainsnak.datavalue == y.mainsnak.datavalue: - if y.equals(x, include_ref=self.use_refs) and if_exists != ActionIfExists.FORCE_APPEND: + if y.equals(x, include_ref=self.use_refs) and action_if_exists != ActionIfExists.FORCE_APPEND: comp.append(True) # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] @@ -292,19 +292,19 @@ def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_type: str, # Default to append - if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool: + action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool: """ Method to check if certain language data exists as a label, description or aliases :param qid: Wikibase item id :param lang_data: list of string values to check :param lang: language code :param lang_data_type: What kind of data is it? 'label', 'description' or 'aliases'? - :param if_exists: If aliases already exist, APPEND or REPLACE + :param action_if_exists: If aliases already exist, APPEND or REPLACE :return: boolean """ all_lang_strings = set(x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)) - if if_exists == ActionIfExists.REPLACE: + if action_if_exists == ActionIfExists.REPLACE: return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) else: for s in lang_data: From 771941ff7337d174f023c70a6ab5c967eefd2df9 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 14:36:12 +0200 Subject: [PATCH 078/308] Improve coverage configuration --- .coveragerc | 23 +++++++++++++++++++++++ wikibaseintegrator/models/qualifiers.py | 8 ++++++++ 2 files changed, 31 insertions(+) create mode 100644 .coveragerc diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..5599afe2 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,23 @@ +# .coveragerc to control coverage.py +[run] +branch = True + +[report] +# Regexes for lines to exclude from consideration +exclude_lines = +# Have to re-enable the standard pragma + pragma: no cover + +# Don't complain about missing debug-only code: + def __repr__ + if self\.debug + +# Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + +# Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +ignore_errors = True diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index 2b6e5d7d..6a4f4c5c 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -73,3 +73,11 @@ def __iter__(self): def __len__(self): return len(self.qualifiers) + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + ) From ae0c2e5e7136eb7ef5a7d6a6312305b350a44c1c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 15:16:49 +0200 Subject: [PATCH 079/308] Improve entity generator tests --- test/test_wbi_core.py | 29 ++++++++++++++++++++++++++--- wikibaseintegrator/wbi_helpers.py | 2 +- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index d4db7ddb..0a9ff856 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -146,12 +146,35 @@ def test_wd_search(self): self.assertIsNot(len(t), 0) def test_entity_generator(self): - entities = ['Q408883', 'P715', 'Q18046452'] - - entity_instances = generate_entity_instances(entities=entities) + entities = { + 'Q408883': { + 'etype': 'item', + 'ctype': 'Item' + }, 'P715': { + 'etype': 'property', + 'ctype': 'Property' + }, 'Q18046452': { + 'etype': 'item', + 'ctype': 'Item' + }, 'L5': { + 'etype': 'lexeme', + 'ctype': 'Lexeme' + } + } + + entity_instances = generate_entity_instances(entities=list(entities.keys())) for qid, entity in entity_instances: self.assertIn(qid, entities) + assert entity.ETYPE == entities[qid]['etype'] + assert type(entity).__name__ == entities[qid]['ctype'] + + entity_instances = generate_entity_instances(entities='Q408883') + + for qid, entity in entity_instances: + assert qid == 'Q408883' + assert entity.ETYPE == 'item' + assert type(entity).__name__ == 'Item' def test_rank(self): t1 = String(value='test1', prop_nr='P1', rank='preferred') diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index e13eab3f..a36c2c9a 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -368,7 +368,7 @@ def generate_entity_instances(entities, allow_anonymous=True, **kwargs): :param user_agent: A custom user agent :type user_agent: str :param entities: A list of IDs. Item, Property or Lexeme. - :type entities: list + :type entities: list, str :param mediawiki_api_url: The MediaWiki url which should be used :type mediawiki_api_url: str :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. From d88a37099e07e45b7b6c56a555316bd6a4366a01 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 17:46:05 +0200 Subject: [PATCH 080/308] Fix issue with from_json() --- wikibaseintegrator/models/claims.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 1bdad004..664fd6e9 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -78,7 +78,7 @@ def from_json(self, json_data) -> Claims: for property in json_data: for claim in json_data[property]: from wikibaseintegrator.datatypes import BaseDataType - if 'datatype' in 'mainsnak': + if 'datatype' in claim['mainsnak']: data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] else: data_type = Claim From 4e418da4add08e11705b83ff44d2b73424930e95 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 20:07:20 +0200 Subject: [PATCH 081/308] Add clear support (#202) * Update baseentity.py Add clear support Close #125 * Data can be none * Add clear() method * Don't send summary field if None/empty * Add support for basrevid --- wikibaseintegrator/entities/baseentity.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 81aa7356..050c8389 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -85,7 +85,10 @@ def get(self, entity_id, **kwargs): return self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) - def _write(self, data=None, summary='', allow_anonymous=False, **kwargs): + def clear(self, **kwargs): + self._write(clear=True, **kwargs) + + def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, **kwargs): """ Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. For new items, also returns the new QIDs. @@ -97,8 +100,7 @@ def _write(self, data=None, summary='', allow_anonymous=False, **kwargs): if self.api.search_only: raise SearchOnlyError - if data is None: - raise ValueError + data = data or {} # if all_claims: # data = json.JSONEncoder().encode(self.json_representation) @@ -124,17 +126,26 @@ def _write(self, data=None, summary='', allow_anonymous=False, **kwargs): 'summary': summary } + if not summary: + payload.pop('summary') + if config['MAXLAG'] > 0: payload.update({'maxlag': config['MAXLAG']}) if self.api.is_bot: payload.update({'bot': ''}) + if clear: + payload.update({'clear': True}) + if self.id: payload.update({u'id': self.id}) else: payload.update({u'new': self.type}) + if self.lastrevid: + payload.update({u'baserevid': self.lastrevid}) + if self.debug: print(payload) From affd03a8b39a65a52db2b6aefda56506bece3a83 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 29 Aug 2021 22:21:02 +0200 Subject: [PATCH 082/308] Improve maxlag (#204) --- wikibaseintegrator/entities/baseentity.py | 3 --- wikibaseintegrator/wbi_helpers.py | 7 +++++++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 050c8389..0788b2c1 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -129,9 +129,6 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** if not summary: payload.pop('summary') - if config['MAXLAG'] > 0: - payload.update({'maxlag': config['MAXLAG']}) - if self.api.is_bot: payload.update({'bot': ''}) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index a36c2c9a..60aa44fa 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -67,6 +67,12 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries # maxlag if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': sleep_sec = json_data['error'].get('lag', retry_after) + # We multiply the number of second by the number of tries + sleep_sec *= n + 1 + # The number of second can't be less than 5 + sleep_sec = max(sleep_sec, 5) + # The number of second can't be more than retry_after + sleep_sec = min(sleep_sec, retry_after) print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) sleep(sleep_sec) continue @@ -129,6 +135,7 @@ def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_age elif 'assert' not in data: # Always assert anon if allow_anonymous is True data.update({'assert': 'anon'}) + if config['MAXLAG'] > 0: data.update({'maxlag': config['MAXLAG']}) From dae1700e3d167809a9367d6e820d79da732fd652 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 1 Sep 2021 19:35:19 +0200 Subject: [PATCH 083/308] Update test_entity_property.py --- test/test_entity_property.py | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_entity_property.py b/test/test_entity_property.py index 24fd0f23..0c7a9a6b 100644 --- a/test/test_entity_property.py +++ b/test/test_entity_property.py @@ -1,5 +1,4 @@ import unittest -from pprint import pprint from wikibaseintegrator import WikibaseIntegrator From 35ff314732b65199533fbab2d3b6b0e8751c8d5a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 1 Sep 2021 19:44:28 +0200 Subject: [PATCH 084/308] Rework authentication method (#203) * Rework authentication method * Update wbi_login.py --- test/test_wbi_backoff.py | 2 +- test/test_wbi_login.py | 58 ++++++++++++-- wikibaseintegrator/wbi_helpers.py | 5 +- wikibaseintegrator/wbi_login.py | 124 ++++++++++++++---------------- 4 files changed, 116 insertions(+), 73 deletions(-) diff --git a/test/test_wbi_backoff.py b/test/test_wbi_backoff.py index e3a29f7e..0d360737 100644 --- a/test/test_wbi_backoff.py +++ b/test/test_wbi_backoff.py @@ -52,4 +52,4 @@ def bad_request(): def bad_login(): - wbi_login.Login("name", "pass", mediawiki_api_url="www.wikidataaaaaaaaa.org") + wbi_login.Login(auth_method='clientlogin', user='name', password='pass', mediawiki_api_url="www.wikidataaaaaaaaa.org") diff --git a/test/test_wbi_login.py b/test/test_wbi_login.py index 6a03cc6e..7de95025 100644 --- a/test/test_wbi_login.py +++ b/test/test_wbi_login.py @@ -1,25 +1,73 @@ import os import sys +import unittest import pytest from wikibaseintegrator import wbi_login from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper - # look for environment variables. if none set, don't do anything +from wikibaseintegrator.wbi_login import LoginError + WDUSER = os.getenv("WDUSER") WDPASS = os.getenv("WDPASS") +OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY = os.getenv("OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY") +OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY = os.getenv("OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY") +OAUTH1_CONSUMER_TOKEN = os.getenv("OAUTH1_CONSUMER_TOKEN") +OAUTH1_CONSUMER_SECRET = os.getenv("OAUTH1_CONSUMER_SECRET") +OAUTH1_ACCESS_TOKEN = os.getenv("OAUTH1_ACCESS_TOKEN") +OAUTH1_ACCESS_SECRET = os.getenv("OAUTH1_ACCESS_SECRET") +OAUTH2_CONSUMER_TOKEN = os.getenv("OAUTH2_CONSUMER_TOKEN") +OAUTH2_CONSUMER_SECRET = os.getenv("OAUTH2_CONSUMER_SECRET") def test_login(): + with unittest.TestCase().assertRaises(LoginError): + wbi_login.Login(auth_method='clientlogin', user='wrong', password='wrong') + + with unittest.TestCase().assertRaises(LoginError): + wbi_login.Login(auth_method='login', user='wrong', password='wrong') + if WDUSER and WDPASS: - wbi_login.Login(WDUSER, WDPASS) + assert wbi_login.Login(auth_method='clientlogin', user=WDUSER, password=WDPASS) + assert wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS) else: print("no WDUSER or WDPASS found in environment variables", file=sys.stderr) -def test_write(): +def test_oauth1(): + with unittest.TestCase().assertRaises(LoginError): + wbi_login.Login(auth_method='oauth1', consumer_token='wrong', consumer_secret='wrong') + + if OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY and OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY: + wbi_login.Login(auth_method='oauth1', consumer_token=OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY, consumer_secret=OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY) + else: + print("no OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY or OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY found in environment variables", file=sys.stderr) + + +def test_oauth1_access(): + with unittest.TestCase().assertRaises(LoginError): + wbi_login.Login(auth_method='oauth1', consumer_token='wrong', consumer_secret='wrong', access_token='wrong', access_secret='wrong') + + if OAUTH1_CONSUMER_TOKEN and OAUTH1_CONSUMER_SECRET and OAUTH1_ACCESS_TOKEN and OAUTH1_ACCESS_SECRET: + wbi_login.Login(auth_method='oauth1', consumer_token=OAUTH1_CONSUMER_TOKEN, consumer_secret=OAUTH1_CONSUMER_SECRET, access_token=OAUTH1_ACCESS_TOKEN, + access_secret=OAUTH1_ACCESS_SECRET) + else: + print("no OAUTH1_CONSUMER_TOKEN or OAUTH1_CONSUMER_SECRET or OAUTH1_ACCESS_TOKEN or OAUTH1_ACCESS_SECRET found in environment variables", file=sys.stderr) + + +def test_oauth2(): + with unittest.TestCase().assertRaises(LoginError): + wbi_login.Login(consumer_token='wrong', consumer_secret='wrong') + + if OAUTH2_CONSUMER_TOKEN and OAUTH2_CONSUMER_SECRET: + wbi_login.Login(consumer_token=OAUTH2_CONSUMER_TOKEN, consumer_secret=OAUTH2_CONSUMER_SECRET) + else: + print("no OAUTH2_CONSUMER_TOKEN or CLIENT_SECRET found in environment variables", file=sys.stderr) + + +def test_mismatch_api_url(): if WDUSER and WDPASS: - login = wbi_login.Login(WDUSER, WDPASS) + login = wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS) with pytest.raises(ValueError): - mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') + mediawiki_api_call_helper(login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 60aa44fa..f9fdbb25 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -97,7 +97,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries return json_data -def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60, is_bot=False): +def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60, is_bot=False, **kwargs): mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url user_agent = config['USER_AGENT'] if user_agent is None else user_agent @@ -141,7 +141,8 @@ def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_age login_session = login.get_session() if login is not None else None - return mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) + return mediawiki_api_call('POST', mediawiki_api_url=mediawiki_api_url, session=login_session, data=data, headers=headers, max_retries=max_retries, + retry_after=retry_after, **kwargs) @wbi_backoff() diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 44f6454e..a78416f3 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -2,8 +2,8 @@ import webbrowser import requests -from mwoauth import ConsumerToken, Handshaker -from oauthlib.oauth2 import BackendApplicationClient +from mwoauth import ConsumerToken, Handshaker, OAuthException +from oauthlib.oauth2 import BackendApplicationClient, InvalidClientError from requests_oauthlib import OAuth1, OAuth2Session, OAuth2 from wikibaseintegrator.wbi_backoff import wbi_backoff @@ -21,23 +21,19 @@ class Login(object): """ @wbi_backoff() - def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_url=None, mediawiki_rest_url=None, token_renew_period=1800, use_clientlogin=False, - consumer_key=None, consumer_secret=None, access_token=None, access_secret=None, client_id=None, client_secret=None, callback_url='oob', user_agent=None, - debug=False): + def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api_url=None, mediawiki_index_url=None, mediawiki_rest_url=None, token_renew_period=1800, + consumer_token=None, consumer_secret=None, access_token=None, access_secret=None, callback_url='oob', user_agent=None, debug=False): """ This class handles several types of login procedures. Either use user and pwd authentication or OAuth. Wikidata clientlogin can also be used. If using one method, do NOT pass parameters for another method. :param user: the username which should be used for the login :type user: str - :param pwd: the password which should be used for the login - :type pwd: str + :param password: the password which should be used for the login + :type password: str :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server :type token_renew_period: int - :param use_clientlogin: use authmanager based login method instead of standard login. - For 3rd party data consumer, e.g. web clients - :type use_clientlogin: bool - :param consumer_key: The consumer key for OAuth - :type consumer_key: str + :param consumer_token: The consumer key for OAuth + :type consumer_token: str :param consumer_secret: The consumer secret for OAuth :type consumer_secret: str :param access_token: The access token for OAuth @@ -51,59 +47,57 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ :return: None """ - self.user = user - + self.auth_method = auth_method + self.consumer_token = consumer_token self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL'] self.mediawiki_index_url = mediawiki_index_url or config['MEDIAWIKI_INDEX_URL'] self.mediawiki_rest_url = mediawiki_rest_url or config['MEDIAWIKI_REST_URL'] + self.token_renew_period = token_renew_period + self.callback_url = callback_url + self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT'], user) - if debug: - print(self.mediawiki_api_url) + if self.auth_method not in ['login', 'clientlogin', 'oauth1', 'oauth2']: + raise ValueError("The auth_method must be 'login', 'clientlogin', 'oauth1' or 'oauth2'") self.session = requests.Session() - self.edit_token = '' + self.edit_token = None self.instantiation_time = time.time() - self.token_renew_period = token_renew_period - - self.consumer_key = consumer_key - self.consumer_secret = consumer_secret - self.access_token = access_token - self.access_secret = access_secret - self.client_id = client_id - self.client_secret = client_secret self.response_qs = None - self.callback_url = callback_url - - self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT'], self.user) self.session.headers.update({ 'User-Agent': self.user_agent }) - if self.consumer_key and self.consumer_secret: - if self.access_token and self.access_secret: + if auth_method == 'oauth2': + oauth = OAuth2Session(client=BackendApplicationClient(client_id=self.consumer_token)) + try: + token = oauth.fetch_token(token_url=self.mediawiki_rest_url + '/oauth2/access_token', client_id=self.consumer_token, client_secret=consumer_secret) + except InvalidClientError as err: + raise LoginError(err) + auth = OAuth2(token=token) + self.session.auth = auth + self.generate_edit_credentials() + elif auth_method == 'oauth1': + if access_token and access_secret: # OAuth procedure, based on https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers#Python - auth = OAuth1(self.consumer_key, client_secret=self.consumer_secret, resource_owner_key=self.access_token, resource_owner_secret=self.access_secret) + auth = OAuth1(self.consumer_token, client_secret=consumer_secret, resource_owner_key=access_token, resource_owner_secret=access_secret) self.session.auth = auth self.generate_edit_credentials() else: # Oauth procedure, based on https://www.mediawiki.org/wiki/OAuth/For_Developers # Consruct a "consumer" from the key/secret provided by MediaWiki - self.consumer_token = ConsumerToken(self.consumer_key, self.consumer_secret) + self.consumer_token = ConsumerToken(self.consumer_token, consumer_secret) # Construct handshaker with wiki URI and consumer self.handshaker = Handshaker(self.mediawiki_index_url, self.consumer_token, callback=self.callback_url, user_agent=self.user_agent) # Step 1: Initialize -- ask MediaWiki for a temp key/secret for user # redirect -> authorization -> callback url - self.redirect, self.request_token = self.handshaker.initiate(callback=self.callback_url) - elif self.client_id and self.client_secret: - oauth = OAuth2Session(client=BackendApplicationClient(client_id=self.client_id)) - token = oauth.fetch_token(token_url=self.mediawiki_rest_url + '/oauth2/access_token', client_id=self.client_id, client_secret=self.client_secret) - auth = OAuth2(token=token) - self.session.auth = auth - self.generate_edit_credentials() - else: + try: + self.redirect, self.request_token = self.handshaker.initiate(callback=self.callback_url) + except OAuthException as err: + raise LoginError(err) + elif auth_method == 'login' or auth_method == 'clientlogin': params_login = { 'action': 'query', 'meta': 'tokens', @@ -114,13 +108,12 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ # get login token login_token = self.session.post(self.mediawiki_api_url, data=params_login).json()['query']['tokens']['logintoken'] - if use_clientlogin: + if auth_method == 'login': params = { - 'action': 'clientlogin', - 'username': user, - 'password': pwd, - 'logintoken': login_token, - 'loginreturnurl': 'https://example.org/', + 'action': 'login', + 'lgname': user, + 'lgpassword': password, + 'lgtoken': login_token, 'format': 'json' } @@ -129,22 +122,17 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ if debug: print(login_result) - if 'clientlogin' in login_result: - if login_result['clientlogin']['status'] != 'PASS': - clientlogin = login_result['clientlogin'] - raise LoginError("Login failed ({}). Message: '{}'".format(clientlogin['messagecode'], clientlogin['message'])) - elif debug: - print("Successfully logged in as", login_result['clientlogin']['username']) + if 'login' in login_result and login_result['login']['result'] == 'Success': + print("Successfully logged in as", login_result['login']['lgusername']) else: - error = login_result['error'] - raise LoginError("Login failed ({}). Message: '{}'".format(error['code'], error['info'])) - + raise LoginError("Login failed. Reason: '{}'".format(login_result['login']['reason'])) else: params = { - 'action': 'login', - 'lgname': user, - 'lgpassword': pwd, - 'lgtoken': login_token, + 'action': 'clientlogin', + 'username': user, + 'password': password, + 'logintoken': login_token, + 'loginreturnurl': 'https://example.org/', 'format': 'json' } @@ -153,10 +141,14 @@ def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_ if debug: print(login_result) - if login_result['login']['result'] != 'Success': - raise LoginError("Login failed. Reason: '{}'".format(login_result['login']['result'])) - elif debug: - print("Successfully logged in as", login_result['login']['lgusername']) + if 'clientlogin' in login_result: + clientlogin = login_result['clientlogin'] + if clientlogin['status'] != 'PASS': + raise LoginError("Login failed ({}). Message: '{}'".format(clientlogin['messagecode'], clientlogin['message'])) + elif debug: + print("Successfully logged in as", clientlogin['username']) + else: + raise LoginError("Login failed ({}). Message: '{}'".format(login_result['error']['code'], login_result['error']['info'])) if 'warnings' in login_result: print("MediaWiki login warnings messages:") @@ -176,8 +168,10 @@ def generate_edit_credentials(self): 'type': 'csrf', 'format': 'json' } - response = self.session.get(self.mediawiki_api_url, params=params) - self.edit_token = response.json()['query']['tokens']['csrftoken'] + response = self.session.get(self.mediawiki_api_url, params=params).json() + if 'error' in response: + raise LoginError("Login failed ({}). Message: '{}'".format(response['error']['code'], response['error']['info'])) + self.edit_token = response['query']['tokens']['csrftoken'] return self.session.cookies From 6ae4498dee38f083b0d67258343c11d68505a4ab Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 3 Sep 2021 17:50:15 +0200 Subject: [PATCH 085/308] Rewrite all the require write part (#205) * Initial commit * Add Claim.write_required() Clean things Add unit tests * Try to fix lru_cache 3.7 error * Remove search_only * Update test_wbi_exceptions.py --- requirements.txt | 1 + test/test_entity_item.py | 8 ++ test/test_wbi_core.py | 2 +- test/test_wbi_exceptions.py | 6 +- wikibaseintegrator/entities/baseentity.py | 41 ++++---- wikibaseintegrator/wbi_exceptions.py | 5 - wikibaseintegrator/wbi_fastrun.py | 110 +++++++++++++++++----- wikibaseintegrator/wbi_helpers.py | 16 +++- wikibaseintegrator/wikibaseintegrator.py | 2 - 9 files changed, 133 insertions(+), 58 deletions(-) diff --git a/requirements.txt b/requirements.txt index 04c69877..3aef6e85 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ backoff~=1.11.1 oauthlib~=3.1.1 pytest~=6.2.5 setuptools~=57.4.0 +frozendict~=2.0.6 diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 07a52f0b..a60f2cbc 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -3,6 +3,7 @@ from simplejson import JSONDecodeError from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.datatypes import Item wbi = WikibaseIntegrator() @@ -35,3 +36,10 @@ def test_get_json(self): def test_write(self): with self.assertRaises(JSONDecodeError): wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') + + def test_write_required(self): + assert not wbi.item.get('Q582').write_required(base_filter={'P1791': ''}) + + item = wbi.item.get('Q582') + item.claims.add(Item(prop_nr='P1791', value='Q42')) + assert item.write_required(base_filter={'P1791': ''}) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 0a9ff856..ca7b9a4f 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -25,7 +25,7 @@ def test_item_engine(self): with self.assertRaises(TypeError): Item(api=wbi).add_claims('test') - def test_search_only(self): + def test_get(self): item = wbi.item.new().get(entity_id='Q2') assert item.labels.get('en').value == "Earth" diff --git a/test/test_wbi_exceptions.py b/test/test_wbi_exceptions.py index 807b5ac3..7617ab43 100644 --- a/test/test_wbi_exceptions.py +++ b/test/test_wbi_exceptions.py @@ -1,5 +1,5 @@ from wikibaseintegrator.wbi_exceptions import NonUniqueLabelDescriptionPairError, IDMissingError, SearchError, ManualInterventionReqException, CorePropIntegrityException, \ - MergeError, SearchOnlyError, MWApiError + MergeError, MWApiError def test_mwapierror(): @@ -44,7 +44,3 @@ def test_corepropintegrityexception(): def test_mergeerror(): assert str(MergeError('MergeError')) == 'MergeError' - - -def test_searchonlyerror(): - assert str(SearchOnlyError('SearchOnlyError')) == 'SearchOnlyError' diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 0788b2c1..1fd5c826 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -4,7 +4,7 @@ from wikibaseintegrator.models.claims import Claims, Claim from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_exceptions import SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError +from wikibaseintegrator.wbi_exceptions import NonUniqueLabelDescriptionPairError, MWApiError from wikibaseintegrator.wbi_fastrun import FastRunContainer from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper @@ -24,11 +24,6 @@ def __init__(self, api, lastrevid=None, type=None, id=None, claims=None): self.json = {} - if self.api.search_only: - self.require_write = False - else: - self.require_write = True - self.fast_run_container = None self.debug = config['DEBUG'] @@ -97,9 +92,6 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** :return: the entity ID on successful write """ - if self.api.search_only: - raise SearchOnlyError - data = data or {} # if all_claims: @@ -167,7 +159,7 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** self.lastrevid = json_data['entity']['lastrevid'] return json_data['entity'] - def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False, ): + def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False): if base_filter is None: base_filter = {} @@ -191,17 +183,24 @@ def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False, case_insensitive=case_insensitive) BaseEntity.fast_run_store.append(self.fast_run_container) - # TODO: Do something here - # if not self.search_only: - # self.require_write = self.fast_run_container.write_required(self.data, cqid=self.id) - # # set item id based on fast run data - # if not self.require_write and not self.id: - # self.id = self.fast_run_container.current_qid - # else: - # self.fast_run_container.load_item(self.data) - # # set item id based on fast run data - # if not self.id: - # self.id = self.fast_run_container.current_qid + def fr_search(self, **kwargs): + self.init_fastrun(**kwargs) + self.fast_run_container.load_item(self.claims) + + return self.fast_run_container.current_qid + + def write_required(self, base_filter=None, **kwargs): + self.init_fastrun(base_filter=base_filter, **kwargs) + + if base_filter is None: + base_filter = {} + + claims_to_check = [] + for claim in self.claims: + if claim.mainsnak.property_number in base_filter: + claims_to_check.append(claim) + + return self.fast_run_container.write_required(data=claims_to_check, cqid=self.id) def __repr__(self): """A mixin implementing a simple __repr__.""" diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 3b09508f..91a6138e 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -62,8 +62,3 @@ class CorePropIntegrityException(Exception): class MergeError(Exception): pass - - -class SearchOnlyError(Exception): - """Raised when in search_only mode""" - pass diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 23118ffd..7cfe399c 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -1,13 +1,19 @@ import collections import copy from collections import defaultdict -from functools import lru_cache +from functools import lru_cache, wraps from itertools import chain +from pprint import pprint +from frozendict import frozendict + +from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_helpers import format_amount, execute_sparql_query +fastrun_store = [] + class FastRunContainer(object): def __init__(self, base_data_type, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, case_insensitive=False, debug=None): @@ -90,12 +96,18 @@ def reconstruct_statements(self, qid: str) -> list: if self.prop_dt_map[prop_nr] == 'quantity': reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) else: + print('aaa') + pprint(qualifiers) reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) # this isn't used. done for debugging purposes self.reconstructed_statements = reconstructed_statements return reconstructed_statements + def get_item(self, claims: list, cqid=None): + self.load_item(claims=claims, cqid=cqid) + return self.current_qid + def load_item(self, claims: list, cqid=None) -> bool: match_sets = [] for claim in claims: @@ -151,7 +163,6 @@ def load_item(self, claims: list, cqid=None) -> bool: return True qid = matching_qids.pop() - print(qid) self.current_qid = qid def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cqid=None) -> bool: @@ -444,7 +455,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if self.debug: print(query) - r = execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + r = execute_sparql_query(query, endpoint=self.sparql_endpoint_url, debug=self.debug)['results']['bindings'] count = int(r[0]['c']['value']) print("Count: {}".format(count)) num_pages = (int(count) // page_size) + 1 @@ -486,25 +497,38 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: ''' # Qualifiers - query += ''' - # Get qualifiers - OPTIONAL - {{ - {{ - # Get simple values for qualifiers which are not of type quantity - ?sid ?propQualifier ?qval . - ?pq wikibase:qualifier ?propQualifier . - ?pq wikibase:propertyType ?qualifer_property_type . - FILTER (?qualifer_property_type != wikibase:Quantity) - }} - UNION - {{ - # Get amount and unit for qualifiers of type quantity - ?sid ?pqv [wikibase:quantityAmount ?qval; wikibase:quantityUnit ?qunit] . - ?pq wikibase:qualifierValue ?pqv . - }} - }} - ''' + # Amount and unit + if use_units: + query += ''' + # Get qualifiers + OPTIONAL + {{ + {{ + # Get simple values for qualifiers which are not of type quantity + ?sid ?propQualifier ?qval . + ?pq wikibase:qualifier ?propQualifier . + ?pq wikibase:propertyType ?qualifer_property_type . + FILTER (?qualifer_property_type != wikibase:Quantity) + }} + UNION + {{ + # Get amount and unit for qualifiers of type quantity + ?sid ?pqv [wikibase:quantityAmount ?qval; wikibase:quantityUnit ?qunit] . + ?pq wikibase:qualifierValue ?pqv . + }} + }} + ''' + else: + query += ''' + # Get qualifiers + OPTIONAL + {{ + # Get simple values for qualifiers + ?sid ?propQualifier ?qval . + ?pq wikibase:qualifier ?propQualifier . + ?pq wikibase:propertyType ?qualifer_property_type . + }} + ''' # References if self.use_refs: @@ -597,3 +621,45 @@ def __repr__(self) -> str: id=id(self) & 0xFFFFFF, attrs="\r\n\t ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), ) + + +def freezeargs(func): + """Transform mutable dictionnary + Into immutable + Useful to be compatible with cache + """ + + @wraps(func) + def wrapped(*args, **kwargs): + args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args]) + kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} + return func(*args, **kwargs) + + return wrapped + + +def get_fastrun_container(base_filter=None, use_refs=False, case_insensitive=False): + if base_filter is None: + base_filter = {} + + fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) + fastrun_container.current_qid = '' + fastrun_container.base_data_type = BaseDataType + + return fastrun_container + + +@freezeargs +@lru_cache() +def search_fastrun_store(base_filter=None, use_refs=False, case_insensitive=False): + for c in fastrun_store: + if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( + c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): + return c + + # In case nothing was found in the fastrun_store + if config['DEBUG']: + print("Create a new FastRunContainer") + fastrun_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive) + fastrun_store.append(fastrun_container) + return fastrun_container diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index f9fdbb25..beb2cb3a 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -9,6 +9,18 @@ from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError +class BColors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): """ :param method: 'GET' or 'POST' @@ -182,8 +194,8 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max 'Content-Type': 'multipart/form-data' } - if debug: - print(params['query']) + if debug or config['DEBUG']: + print(BColors.WARNING + params['query'] + BColors.ENDC) for n in range(max_retries): try: diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index c70055d4..81c7e29a 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -8,13 +8,11 @@ class WikibaseIntegrator(object): def __init__(self, - search_only=False, is_bot=False, login=None): # Runtime variables self.is_bot = is_bot or False self.login = login - self.search_only = search_only or False # Quick access to entities self.item = Item(api=self) From d7b8723d20ecf0c10254ac0b2c061501e017871b Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 3 Sep 2021 20:49:00 +0200 Subject: [PATCH 086/308] Update wbi_fastrun.py --- wikibaseintegrator/wbi_fastrun.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 7cfe399c..3682f513 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -3,7 +3,6 @@ from collections import defaultdict from functools import lru_cache, wraps from itertools import chain -from pprint import pprint from frozendict import frozendict @@ -96,8 +95,6 @@ def reconstruct_statements(self, qid: str) -> list: if self.prop_dt_map[prop_nr] == 'quantity': reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) else: - print('aaa') - pprint(qualifiers) reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) # this isn't used. done for debugging purposes From 4c48f0ca5a609e1d6479654f2cc10e97b64c8249 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 3 Sep 2021 22:09:25 +0200 Subject: [PATCH 087/308] Add more tests for write_required() --- test/test_entity_item.py | 12 +++++++++++- wikibaseintegrator/models/references.py | 12 ++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/test/test_entity_item.py b/test/test_entity_item.py index a60f2cbc..6149d68d 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -1,4 +1,5 @@ import unittest +from pprint import pprint from simplejson import JSONDecodeError @@ -37,9 +38,18 @@ def test_write(self): with self.assertRaises(JSONDecodeError): wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') - def test_write_required(self): + def test_write_not_required(self): assert not wbi.item.get('Q582').write_required(base_filter={'P1791': ''}) + def test_write_required(self): item = wbi.item.get('Q582') item.claims.add(Item(prop_nr='P1791', value='Q42')) assert item.write_required(base_filter={'P1791': ''}) + + def test_write_not_required_ref(self): + assert not wbi.item.get('Q582').write_required(base_filter={'P2581': ''}, use_refs=True) + + def test_write_required_ref(self): + item = wbi.item.get('Q582') + item.claims.get('P2581')[0].references.references.pop() + assert item.write_required(base_filter={'P2581': ''}, use_refs=True) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index 91db961b..c3b67cb8 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -8,6 +8,14 @@ class References: def __init__(self): self.references = [] + @property + def references(self): + return self.__references + + @references.setter + def references(self, value): + self.__references = value + def get(self, hash=None): for reference in self.references: if reference.hash == hash: @@ -54,6 +62,10 @@ def remove(self, reference_to_remove): return False + def clear(self): + self.references = [] + return self + def __iter__(self): return iter(self.references) From afccd0bcca0b98986444ef15e59a6ce405cfa549 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 4 Sep 2021 09:09:33 +0200 Subject: [PATCH 088/308] Remove Username in User-Agent There is no reliable method to get the username when using oauth method. --- test/test_all.py | 3 +-- wikibaseintegrator/wbi_helpers.py | 7 ++----- wikibaseintegrator/wbi_login.py | 2 +- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/test/test_all.py b/test/test_all.py index 51799ccb..aeb4e010 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -205,7 +205,6 @@ def test_user_agent(capfd): assert not out # Test if the user agent is correctly added - new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5', username='Wikibot') + new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') assert new_user_agent.startswith('MyWikibaseBot/0.5') - assert 'Wikibot' in new_user_agent assert 'WikibaseIntegrator' in new_user_agent diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index beb2cb3a..e9da2fbf 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -125,7 +125,7 @@ def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, use raise ValueError("mediawiki_api_url can't be different with the one in the login object.") headers = { - 'User-Agent': get_user_agent(user_agent, login.user if login else None) + 'User-Agent': get_user_agent(user_agent) } if data is not None: @@ -436,7 +436,7 @@ def format_amount(amount) -> str: return str(amount) -def get_user_agent(user_agent, username=None): +def get_user_agent(user_agent): from wikibaseintegrator import __version__ wbi_user_agent = "WikibaseIntegrator/{}".format(__version__) @@ -445,9 +445,6 @@ def get_user_agent(user_agent, username=None): else: return_user_agent = user_agent + ' ' + wbi_user_agent - if username: - return_user_agent += " (User:{})".format(username) - return return_user_agent diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index a78416f3..7cb6dafc 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -54,7 +54,7 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api self.mediawiki_rest_url = mediawiki_rest_url or config['MEDIAWIKI_REST_URL'] self.token_renew_period = token_renew_period self.callback_url = callback_url - self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT'], user) + self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT']) if self.auth_method not in ['login', 'clientlogin', 'oauth1', 'oauth2']: raise ValueError("The auth_method must be 'login', 'clientlogin', 'oauth1' or 'oauth2'") From 7f2c527f13af13d7a240c02b1f7114f0c5514c89 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 4 Sep 2021 09:13:49 +0200 Subject: [PATCH 089/308] Update notebooks --- notebooks/item_get.ipynb | 34 +++++++++++++++++++++++++++++----- notebooks/lexeme_write.ipynb | 16 +++++++++------- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/notebooks/item_get.ipynb b/notebooks/item_get.ipynb index da65042a..ef28523b 100644 --- a/notebooks/item_get.ipynb +++ b/notebooks/item_get.ipynb @@ -23,9 +23,33 @@ "metadata": {}, "outputs": [], "source": [ - "from wikibaseintegrator import WikibaseIntegrator" + "from wikibaseintegrator import WikibaseIntegrator\n", + "from wikibaseintegrator.wbi_config import config" ] }, + { + "cell_type": "markdown", + "source": [ + "Set default variables" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "config['USER_AGENT'] = 'Item Get Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, { "cell_type": "markdown", "id": "ee386bbb", @@ -36,7 +60,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "f5131e52", "metadata": {}, "outputs": [], @@ -46,7 +70,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "5ce0df1e", "metadata": {}, "outputs": [], @@ -56,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "29c22ef2", "metadata": { "pycharm": { @@ -68,7 +92,7 @@ "data": { "text/plain": "'Douglas Adams'" }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } diff --git a/notebooks/lexeme_write.ipynb b/notebooks/lexeme_write.ipynb index 88219b63..3eff937a 100644 --- a/notebooks/lexeme_write.ipynb +++ b/notebooks/lexeme_write.ipynb @@ -46,7 +46,8 @@ "execution_count": 3, "outputs": [], "source": [ - "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'" + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" ], "metadata": { "collapsed": false, @@ -72,7 +73,8 @@ "execution_count": 4, "outputs": [], "source": [ - "login = wbi_login.Login(user=WDUSER, pwd=WDPASS, mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "login = wbi_login.Login(auth_method='clientlogin', user=WDUSER, password=WDPASS,\n", + " mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", "wbi = WikibaseIntegrator(login=login)" ], "metadata": { @@ -126,7 +128,7 @@ "outputs": [ { "data": { - "text/plain": "" + "text/plain": "" }, "execution_count": 6, "metadata": {}, @@ -206,7 +208,7 @@ "outputs": [ { "data": { - "text/plain": " _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=[]>, ]}> _Reference__snaks_order=[]>]> _Claim__removed=False value='Create a string claim for claim'>]}>" + "text/plain": " _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}>" }, "execution_count": 8, "metadata": {}, @@ -241,7 +243,7 @@ "outputs": [ { "data": { - "text/plain": ", 'fr': }> claims= _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=[]>, ]}> _Reference__snaks_order=[]>]> _Claim__removed=False value='Create a string claim for sense'>]}> removed=False>]>" + "text/plain": ", 'fr': }> claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}> removed=False>]>" }, "execution_count": 9, "metadata": {}, @@ -297,7 +299,7 @@ "outputs": [ { "data": { - "text/plain": ", 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=[]>, ]}> _Reference__snaks_order=[]>]> _Claim__removed=False value='Create a string claim for form'>]}>>}>" + "text/plain": ", 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}>>}>" }, "execution_count": 10, "metadata": {}, @@ -361,7 +363,7 @@ "outputs": [ { "data": { - "text/plain": "\n\t lastrevid=543182\n\t type='lexeme'\n\t id='L1580'\n\t claims= _Claim__type='statement' _Claim__qualifiers=]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1580$B31E8D37-791A-49C8-B687-EBB134AA8CBA' _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=['P828']>, ]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False>]}>\n\t json={'type': 'lexeme', 'id': 'L1580', 'lemmas': {'en': {'language': 'en', 'value': 'English lemma'}, 'fr': {'language': 'fr', 'value': 'French lemma'}}, 'lexicalCategory': 'Q1244', 'language': 'Q1860', 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': 'dc920cec98f0e830c30011cd496108be8d50afab', 'datavalue': {'value': 'Create a string claim for claim', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '351f871bfe166697d3270cc0df7df8d09603efb0', 'datavalue': {'value': 'Claim qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1580$B31E8D37-791A-49C8-B687-EBB134AA8CBA', 'rank': 'normal', 'references': [{'hash': 'ef1ebde859c902590dfbe5a3bd7a2f7af01f4a4f', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb', 'datavalue': {'value': 'Claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '77df7e5db38ec15b7abac0755c4dc8e781ba9369', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'dcd0b956c352f2036bb7da153c4db941e74a803f', 'datavalue': {'value': 'Another claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}, 'forms': [{'id': 'L1580-F1', 'representations': {'en': {'language': 'en', 'value': 'English form representation'}, 'fr': {'language': 'fr', 'value': 'French form representation'}}, 'grammaticalFeatures': ['Q146786'], 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00', 'datavalue': {'value': 'Create a string claim for form', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '6c157568b379f4c2722f30a9fee95d3c5f99dfe9', 'datavalue': {'value': 'Form qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1580-F1$81D618F4-EA1A-4526-A563-DDBBB558F82E', 'rank': 'normal', 'references': [{'hash': '32f599c26d4251d72272b3a65294d6d5517d2445', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1cbb07e7eba6906acf68f427a3f87fefc0a53283', 'datavalue': {'value': 'Form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': 'a0f20d048c3df03c1bfb25c63869ba37b32c9e01', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '67bef049d400b9d7e2e2695320d85012c9122df5', 'datavalue': {'value': 'Another form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'senses': [{'id': 'L1580-S1', 'glosses': {'en': {'language': 'en', 'value': 'English gloss'}, 'fr': {'language': 'fr', 'value': 'French gloss'}}, 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '9781442191b38e26c55b1dfde6f6203c9127c4f3', 'datavalue': {'value': 'Create a string claim for sense', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'c1afe7627d9711627e1e48b8e015ade998d6d434', 'datavalue': {'value': 'Sense qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1580-S1$487E4CD1-99F1-4868-A2AF-FB523869642B', 'rank': 'normal', 'references': [{'hash': '87e05c6c4fc2d74529d7801340c18955516b6d96', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '617bd3516c2003df28ab90fd6ee0bd8237f1f8e6', 'datavalue': {'value': 'Sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '989b65b201e4b2fbd9d5c2c8d7bd7b7e9d2ce5c7', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1afe472d8815b3cbf50d2e5b1c497456a82f055f', 'datavalue': {'value': 'Another sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'lastrevid': 543182}\n\t require_write=True\n\t fast_run_container=None\n\t debug=False\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1244'\n\t language='Q1860'\n\t forms= _Claim__type='statement' _Claim__qualifiers=]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1580-F1$81D618F4-EA1A-4526-A563-DDBBB558F82E' _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=['P828']>, ]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False>]}>>}>\n\t senses=, 'fr': }> claims= _Claim__type='statement' _Claim__qualifiers=]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1580-S1$487E4CD1-99F1-4868-A2AF-FB523869642B' _Claim__rank='normal' _Claim__references=]}> _Reference__snaks_order=['P828']>, ]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False>]}> removed=False>]>>" + "text/plain": "\n\t lastrevid=551031\n\t type='lexeme'\n\t id='L1660'\n\t claims= _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660$D3E30A15-AC21-4E07-B953-B13D6025A861' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}>\n\t json={'type': 'lexeme', 'id': 'L1660', 'lemmas': {'en': {'language': 'en', 'value': 'English lemma'}, 'fr': {'language': 'fr', 'value': 'French lemma'}}, 'lexicalCategory': 'Q1244', 'language': 'Q1860', 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': 'dc920cec98f0e830c30011cd496108be8d50afab', 'datavalue': {'value': 'Create a string claim for claim', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '351f871bfe166697d3270cc0df7df8d09603efb0', 'datavalue': {'value': 'Claim qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660$D3E30A15-AC21-4E07-B953-B13D6025A861', 'rank': 'normal', 'references': [{'hash': 'ef1ebde859c902590dfbe5a3bd7a2f7af01f4a4f', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb', 'datavalue': {'value': 'Claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '77df7e5db38ec15b7abac0755c4dc8e781ba9369', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'dcd0b956c352f2036bb7da153c4db941e74a803f', 'datavalue': {'value': 'Another claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}, 'forms': [{'id': 'L1660-F1', 'representations': {'en': {'language': 'en', 'value': 'English form representation'}, 'fr': {'language': 'fr', 'value': 'French form representation'}}, 'grammaticalFeatures': ['Q146786'], 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00', 'datavalue': {'value': 'Create a string claim for form', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '6c157568b379f4c2722f30a9fee95d3c5f99dfe9', 'datavalue': {'value': 'Form qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660-F1$42E574AD-9956-4427-A89D-68A7127E2410', 'rank': 'normal', 'references': [{'hash': '32f599c26d4251d72272b3a65294d6d5517d2445', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1cbb07e7eba6906acf68f427a3f87fefc0a53283', 'datavalue': {'value': 'Form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': 'a0f20d048c3df03c1bfb25c63869ba37b32c9e01', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '67bef049d400b9d7e2e2695320d85012c9122df5', 'datavalue': {'value': 'Another form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'senses': [{'id': 'L1660-S1', 'glosses': {'en': {'language': 'en', 'value': 'English gloss'}, 'fr': {'language': 'fr', 'value': 'French gloss'}}, 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '9781442191b38e26c55b1dfde6f6203c9127c4f3', 'datavalue': {'value': 'Create a string claim for sense', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'c1afe7627d9711627e1e48b8e015ade998d6d434', 'datavalue': {'value': 'Sense qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660-S1$DAEABAC7-3780-4E51-AED7-3FAE7230975E', 'rank': 'normal', 'references': [{'hash': '87e05c6c4fc2d74529d7801340c18955516b6d96', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '617bd3516c2003df28ab90fd6ee0bd8237f1f8e6', 'datavalue': {'value': 'Sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '989b65b201e4b2fbd9d5c2c8d7bd7b7e9d2ce5c7', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1afe472d8815b3cbf50d2e5b1c497456a82f055f', 'datavalue': {'value': 'Another sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'lastrevid': 551031}\n\t fast_run_container=None\n\t debug=False\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1244'\n\t language='Q1860'\n\t forms=, 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash='288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00' _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='6c157568b379f4c2722f30a9fee95d3c5f99dfe9' _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660-F1$42E574AD-9956-4427-A89D-68A7127E2410' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='1cbb07e7eba6906acf68f427a3f87fefc0a53283' _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='67bef049d400b9d7e2e2695320d85012c9122df5' _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}>>}>\n\t senses=, 'fr': }> claims= _Snak__property_number='P828' _Snak__hash='9781442191b38e26c55b1dfde6f6203c9127c4f3' _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='c1afe7627d9711627e1e48b8e015ade998d6d434' _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660-S1$DAEABAC7-3780-4E51-AED7-3FAE7230975E' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='617bd3516c2003df28ab90fd6ee0bd8237f1f8e6' _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='1afe472d8815b3cbf50d2e5b1c497456a82f055f' _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}> removed=False>]>>" }, "execution_count": 11, "metadata": {}, From dd54fb7c5d132371e6caa4b0aa059ebda20ccf82 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 4 Sep 2021 10:22:36 +0200 Subject: [PATCH 090/308] Update wbi_login.py Add parameter key --- wikibaseintegrator/wbi_login.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 7cb6dafc..b46c042c 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -80,7 +80,7 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api elif auth_method == 'oauth1': if access_token and access_secret: # OAuth procedure, based on https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers#Python - auth = OAuth1(self.consumer_token, client_secret=consumer_secret, resource_owner_key=access_token, resource_owner_secret=access_secret) + auth = OAuth1(client_key=self.consumer_token, client_secret=consumer_secret, resource_owner_key=access_token, resource_owner_secret=access_secret) self.session.auth = auth self.generate_edit_credentials() else: @@ -89,7 +89,7 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api self.consumer_token = ConsumerToken(self.consumer_token, consumer_secret) # Construct handshaker with wiki URI and consumer - self.handshaker = Handshaker(self.mediawiki_index_url, self.consumer_token, callback=self.callback_url, user_agent=self.user_agent) + self.handshaker = Handshaker(mw_uri=self.mediawiki_index_url, consumer_token=self.consumer_token, callback=self.callback_url, user_agent=self.user_agent) # Step 1: Initialize -- ask MediaWiki for a temp key/secret for user # redirect -> authorization -> callback url From 80cea8c1d388da9f4f4c04af64e86302bfdcd547 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 4 Sep 2021 10:22:45 +0200 Subject: [PATCH 091/308] Update baseentity.py Hide debug message --- wikibaseintegrator/entities/baseentity.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 1fd5c826..5255e32d 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -163,7 +163,8 @@ def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False) if base_filter is None: base_filter = {} - print('Initialize Fast Run init_fastrun') + if self.debug: + print('Initialize Fast Run init_fastrun') # We search if we already have a FastRunContainer with the same parameters to re-use it for c in BaseEntity.fast_run_store: if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( From 29798ca2c1de2af79bddb5d9e58433c40248049b Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 4 Sep 2021 10:28:52 +0200 Subject: [PATCH 092/308] Update README.md --- README.md | 312 +++++++++++++++++++++++++++--------------------------- 1 file changed, 154 insertions(+), 158 deletions(-) diff --git a/README.md b/README.md index 1b664665..1036c18a 100644 --- a/README.md +++ b/README.md @@ -13,16 +13,15 @@ - [Using a Wikibase instance](#using-a-wikibase-instance) - [Wikimedia Foundation User-Agent policy](#wikimedia-foundation-user-agent-policy) - [The Core Parts](#the-core-parts) - - [wbi_item.Item](#wbi_coreitemengine) - - [wbi_functions](#wbi_functions) - - [Use MediaWiki API](#use-mediawiki-api) + - [Entity manipulation](#entity-manipulation) - [wbi_login.Login](#wbi_loginlogin) - [Login using OAuth1 or OAuth2](#login-using-oauth1-or-oauth2) + - [Login with a bot password](#login-with-a-bot-password) - [Login with a username and a password](#login-with-a-username-and-a-password) - [Wikibase Data Types](#wikibase-data-types) - [Helper Methods](#helper-methods) - - [Execute SPARQL queries](#execute-sparql-queries) - [Use Mediawiki API](#use-mediawiki-api) + - [Execute SPARQL queries](#execute-sparql-queries) - [Wikibase search entities](#wikibase-search-entities) - [Merge Wikibase items](#merge-wikibase-items) - [Examples (in "normal" mode)](#examples-in-normal-mode) @@ -108,71 +107,45 @@ wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/Use # The Core Parts # -wbi_core supports two modes it can be operated in, a normal mode, updating each item at a time and, a fast run mode, -which is pre-loading data locally and then just updating items if the new data provided is differing from what is in -Wikidata. The latter mode allows for great speedups (measured up to 9x) when tens of thousand of Wikidata items need to -be checked if they require updates but only a small number will finally be updated, a situation usually encountered when -keeping Wikidata in sync with an external resource. +WikibaseIntegrator supports two modes it can be operated in, a normal mode, updating each item at a time and, a fast run +mode, which is pre-loading data locally and then just updating items if the new data provided is differing from what is +in Wikidata. The latter mode allows for great speedups when tens of thousand of Wikidata items need to be checked if +they require updates but only a small number will finally be updated, a situation usually encountered when keeping +Wikidata in sync with an external resource. -wbi_core consists of a central class called ItemEngine and Login for authenticating with a MediaWiki isntance (like -Wikidata). +## Entity manipulation ## -## wbi_item.Item ## +WikibaseIntegrator supports Item, Property, Lexeme and MediaInfo manipulation through these classes : -This is the central class which does all the heavy lifting. +* wikibaseintegrator.entities.item.Item +* wikibaseintegrator.entities.property.Property +* wikibaseintegrator.entities.lexeme.Lexeme +* wikibaseintegrator.entities.mediainfo.MediaInfo Features: -* Load a Wikibase item based on data to be written (e.g. a unique central identifier) -* Load a Wikibase item based on its Wikibase item id (aka QID) -* Checks for conflicts automatically (e.g. multiple items carrying a unique central identifier will trigger an +* [ ] Load a Wikibase entity based on data to be written (e.g. a unique central identifier) +* [x] Load a Wikibase entity based on its Wikibase entity id +* [ ] Checks for conflicts automatically (e.g. multiple items carrying a unique central identifier will trigger an exception) -* Checks automatically if the correct item has been loaded by comparing it to the data provided -* All Wikibase data types implemented -* A dedicated wbi_item.Item.write() method allows loading and consistency checks of data before any write to Wikibase is - performed -* Full access to the whole Wikibase item as a JSON document +* [ ] Checks automatically if the correct item has been loaded by comparing it to the data provided +* [x] All Wikibase data types implemented +* [ ] A dedicated write() method allows loading and consistency checks of data before any write to Wikibase is performed +* [x] Full access to the whole Wikibase item as a JSON document -There are two ways of working with Wikibase items: +There are two ways of working with Wikibase entities: * A user can provide data, and ItemEngine will search for and load/modify an existing item or create a new one, solely based on the data provided (preferred). This also performs consistency checks based on a set of SPARQL queries. * A user can work with a selected QID to specifically modify the data on the item. This requires that the user knows what he/she is doing and should only be used with great care, as this does not perform consistency checks. -## wbi_functions ## - -wbi_functions provides a set of static functions to request or manipulate data from MediaWiki API or SPARQL Service. - -Features: - -* Minimize the number of HTTP requests for reads and writes to improve performance -* Method to easily execute [SPARQL](https://query.wikidata.org) queries on the Wikibase SPARQL endpoint. - -### Use MediaWiki API ### - -WikibaseIntegrator don't have functions to make API call to non-wikibase actions. You can -use `wbi_functions.mediawiki_api_call_helper()` to make a custom call. - -Example to get the last two revisions of entity Q42 : - -```python -from wikibaseintegrator import wbi_functions - -data = { - 'action': 'query', - 'prop': 'revisions', - 'titles': 'Q42', - 'rvlimit': 2, - 'rvprop': 'ids|timestamp|comment|user', - 'rvslots': 'main' -} - -print(wbi_functions.mediawiki_api_call_helper(data, allow_anonymous=True)) -``` - ## wbi_login.Login ## +`wbi_login.Login` provides the login functionality and also stores the cookies and edit tokens required (For security +reasons, every Mediawiki edit requires an edit token). The constructor takes multiple parameters like the server ( +default wikidata.org), and the token renewal periods can be specified. + ### Login using OAuth1 or OAuth2 ### OAuth is the authentication method recommended by the Mediawiki developpers. It can be used for authenticating a bot or @@ -184,40 +157,44 @@ If you want to use WBI with a bot account, you should use OAuth as an [Owner-only consumer](https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers). This allows to use the authentication without the "continue oauth" step. -The first step is to request a new OAuth consumer on your Mediawiki instance on the page "Special: -OAuthConsumerRegistration", the "Owner-only" (or "This consumer is for use only by ...") has to be checked. You will get -a consumer key, consumer secret, access token and access secret. +The first step is to request a new OAuth consumer on your Mediawiki instance on the page +"Special:OAuthConsumerRegistration", the "Owner-only" (or "This consumer is for use only by ...") has to be checked. You +will get a consumer token, consumer secret, access token and access secret. -Example if you use OAuth 1.0a: +Example if you use OAuth 2.0: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(consumer_key='', consumer_secret='', - access_token='', access_secret='') +login_instance = wbi_login.Login(auth_method='oauth2', consumer_token='', + consumer_secret='') ``` -Example if you use OAuth 2.0: +Example if you use OAuth 1.0a: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(client_id='', client_secret='') +login_instance = wbi_login.Login(auth_method='oauth1', consumer_token='', + consumer_secret='', access_token='', + access_secret='') ``` #### To impersonate a user (OAuth 1.0a) #### -If WBI should be used as a backend for a webapp, the script should use OAuth for authentication, WBI supports this, you -just need to specify consumer key and consumer secret when instantiating `wbi_login.Login`. In contrast to username and -password login, OAuth is a 2 steps process as manual user confirmation for OAuth login is required. This means that the -method `wbi_login.Login.continue_oauth()` needs to be called after creating the `wbi_login.Login` instance. +If WBI should be used as a backend for a web application, the script should use OAuth for authentication, WBI supports +this, you just need to specify consumer key and consumer secret when instantiating `wbi_login.Login`. In contrast to +username and password login, OAuth is a 2 steps process as manual user confirmation for OAuth login is required. This +means that the method `wbi_login.Login.continue_oauth()` needs to be called after creating the `wbi_login.Login` +instance. Example: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(consumer_key='', consumer_secret='') +login_instance = wbi_login.Login(auth_method='oauth1', consumer_token='', + consumer_secret='') login_instance.continue_oauth() ``` @@ -225,89 +202,101 @@ The method `wbi_login.Login.continue_oauth()` will either prompt the user for a will take a parameter so in the case of WBI being used as a backend for e.g. a web app, where the callback will provide the authentication information directly to the backend and so no copy and paste of the callback URL is required. +### Login with a bot password ### + +It's a good practice to use [Bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords) instead of simple +username and password, this allows limiting the permissions given to the bot. + +```python +from wikibaseintegrator import wbi_login + +login_instance = wbi_login.Login(auth_method='login', user='', password='') +``` + ### Login with a username and a password ### -`wbi_login.Login` provides the login functionality and also stores the cookies and edit tokens required (For security -reasons, every Mediawiki edit requires an edit token). The constructor takes two essential parameters, username and -password. Additionally, the server (default wikidata.org), and the token renewal periods can be specified. It's a good -practice to use [Bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords) instead of simple username and -password, this allows limiting the permissions given to the bot. +If you want to log in with your main account, you can use the "clientlogin" authentication method. ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(user='', pwd='') +login_instance = wbi_login.Login(auth_method='clientlogin', user='', password='') ``` ## Wikibase Data Types ## Currently, Wikibase supports 17 different data types. The data types are represented as their own classes in -wbi_datatype. Each data types has its specialties, which means that some of them require special parameters (e.g. Globe -Coordinates). +wikibaseintegrator.datatypes. Each data types has its specialities, which means that some of them require special +parameters (e.g. Globe Coordinates). They are available under the namespace `wikibase.datatypes`. The data types currently implemented: -* wbi_datatype.CommonsMedia -* wbi_datatype.ExternalID -* wbi_datatype.Form -* wbi_datatype.GeoShape -* wbi_datatype.GlobeCoordinate -* wbi_datatype.ItemID -* wbi_datatype.Lexeme -* wbi_datatype.Math -* wbi_datatype.MonolingualText -* wbi_datatype.MusicalNotation -* wbi_datatype.Property -* wbi_datatype.Quantity -* wbi_datatype.Sense -* wbi_datatype.String -* wbi_datatype.TabularData -* wbi_datatype.Time -* wbi_datatype.Url +* CommonsMedia +* ExternalID +* Form +* GeoShape +* GlobeCoordinate +* Item +* Lexeme +* Math +* MonolingualText +* MusicalNotation +* Property +* Quantity +* Sense +* String +* TabularData +* Time +* URL + +There is also two extra data types implemented but need Mediawiki extension installed to work properly: + +* extra.EDTF ([Wikibase EDTF](https://www.mediawiki.org/wiki/Extension:Wikibase_EDTF)) +* extra.LocalMedia ([Wikibase Local Media](https://www.mediawiki.org/wiki/Extension:Wikibase_Local_Media)) For details of how to create values (=instances) with these data types, please (for now) consult the docstrings in the source code. Of note, these data type instances hold the values and, if specified, data type instances for references -and qualifiers. Furthermore, calling the get_value() method of an instance returns either an integer, a string or a -tuple, depending on the complexity of the data type. +and qualifiers. Furthermore, calling the `value()` method of an instance returns either an integer, a string or a tuple, +depending on the complexity of the data type. # Helper Methods # -## Execute SPARQL queries ## - -The method `wbi_item.Item.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes the -actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, the -actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL server ( -user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you execute -many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email address, -or the URL to your bot code repository.) - ## Use Mediawiki API ## -The method `wbi_functions.mediawiki_api_call_helper()` allows you to execute MediaWiki API POST call. It takes a -mandatory data array (data) and multiple optionals parameters like a login object of type wbi_login.Login, a -mediawiki_api_url string if the Mediawiki is not Wikidata, a user_agent string to set a custom HTTP User Agent header, -and an allow_anonymous boolean to force authentication. +The method `wbi_helpers.mediawiki_api_call_helper()` allows you to execute MediaWiki API POST call. It takes a mandatory +data array (data) and multiple optionals parameters like a login object of type wbi_login.Login, a mediawiki_api_url +string if the Mediawiki is not Wikidata, a user_agent string to set a custom HTTP User Agent header, and an +allow_anonymous boolean to force authentication. Example: Retrieve last 10 revisions from Wikidata element Q2 (Earth): ```python -from wikibaseintegrator import wbi_functions +from wikibaseintegrator import wbi_helpers -query = { +data = { 'action': 'query', 'prop': 'revisions', 'titles': 'Q2', 'rvlimit': 10 } -print(wbi_functions.mediawiki_api_call_helper(query, allow_anonymous=True)) +print(wbi_helpers.mediawiki_api_call_helper(data=data, allow_anonymous=True)) ``` +## Execute SPARQL queries ## + +The method `wbi_helpers.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes the +actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, the +actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL server ( +user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you execute +many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email address, +or the URL to your bot code repository.) + ## Wikibase search entities ## -The method `wbi_item.Item.search_entities()` allows for string search in a Wikibase instance. This means that labels, +The method `wbi_helpers.search_entities()` allows for string search in a Wikibase instance. This means that labels, descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual search string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not Wikidata), an optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an option @@ -316,7 +305,7 @@ dict_id_label to return a dict of item id and label as a result. ## Merge Wikibase items ## Sometimes, Wikibase items need to be merged. An API call exists for that, and wbi_core implements a method accordingly. -`wbi_functions.merge_items()` takes five arguments: +`wbi_helpers.merge_items()` takes five arguments: the QID of the item which should be merged into another item (from_id), the QID of the item the first item should be merged into (to_id), a login object of type wbi_login.Login to provide the API call with the required authentication information, a server (mediawiki_api_url) if the Wikibase instance is not Wikidata and a flag for ignoring merge @@ -335,22 +324,24 @@ In order to create a minimal bot based on wbi_core, three things are required: * A ItemEngine object which takes the data, does the checks and performs write. ```python -from wikibaseintegrator import wbi_login -from wikibaseintegrator.entities import item -from wikibaseintegrator.datatypes import basedatatype +from wikibaseintegrator import WikibaseIntegrator, wbi_login +from wikibaseintegrator.datatypes import ExternalID # login object login_instance = wbi_login.Login(user='', pwd='') +wbi = WikibaseIntegrator(login=login_instance) + # data type object, e.g. for a NCBI gene entrez ID -entrez_gene_id = basedatatype.String(value='', prop_nr='P351') +entrez_gene_id = ExternalID(value='', prop_nr='P351') # data goes into a list, because many data objects can be provided to data = [entrez_gene_id] # Search for and then edit/create new item -wd_item = item.Item(data=data) -wd_item.write(login_instance) +item = wbi.item.new() +item.claims.add(data) +item.write() ``` ## A Minimal Bot for Mass Import ## @@ -359,9 +350,8 @@ An enhanced example of the previous bot just puts two of the three things into a or modification of items. ```python -from wikibaseintegrator import wbi_login -from wikibaseintegrator.entities import item -from wikibaseintegrator.datatypes import basedatatype +from wikibaseintegrator import WikibaseIntegrator, wbi_login +from wikibaseintegrator.datatypes import ExternalID, Item, Time, String # login object login_instance = wbi_login.Login(user='', pwd='') @@ -372,26 +362,29 @@ raw_data = { '1029': 'ENST00000498124' } +wbi = WikibaseIntegrator(login=login_instance) + for entrez_id, ensembl in raw_data.items(): - # add some references - references = [ - [ - basedatatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - basedatatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - basedatatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) + # add some references + references = [ + [ + Item(value='Q20641742', prop_nr='P248'), + Time(time='+2020-02-08T00:00:00Z', prop_nr='P813'), + ExternalID(value='1017', prop_nr='P351') + ] ] - ] - # data type object - entrez_gene_id = basedatatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = basedatatype.String(value=ensembl, prop_nr='P704', references=references) + # data type object + entrez_gene_id = String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = String(value=ensembl, prop_nr='P704', references=references) - # data goes into a list, because many data objects can be provided to - data = [entrez_gene_id, ensembl_transcript_id] + # data goes into a list, because many data objects can be provided to + data = [entrez_gene_id, ensembl_transcript_id] - # Search for and then edit/create new item - wd_item = item.Item(data=data) - wd_item.write(login_instance) + # Search for and then edit/create new item + item = wbi.item.new() + item.claims.add(data) + item.write() ``` # Examples (in "fast run" mode) # @@ -400,7 +393,7 @@ In order to use the fast run mode, you need to know the property/value combinati would like to operate on. E.g. for operating on human genes, you need to know that [P351](https://www.wikidata.org/entity/P351) is the NCBI entrez gene ID and you also need to know that you are dealing with humans, best represented by the [found in taxon property (P703)](https://www.wikidata.org/entity/P703) with -the value [Q15978631](https://www.wikidata.org/entity/Q15978631) for homo sapiens. +the value [Q15978631](https://www.wikidata.org/entity/Q15978631) for Homo sapiens. IMPORTANT: In order for the fast run mode to work, the data you provide in the constructor must contain at least one unique value/id only present on one Wikidata item, e.g. an NCBI entrez gene ID, Uniprot ID, etc. Usually, these would be @@ -419,12 +412,11 @@ fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} The full example: ```python -from wikibaseintegrator import wbi_login -from wikibaseintegrator.entities import item -from wikibaseintegrator.datatypes import basedatatype +from wikibaseintegrator import WikibaseIntegrator, wbi_login +from wikibaseintegrator.datatypes import Item, Time, ExternalID, String # login object -login_instance = wbi_login.Login(user='', pwd='') +login = wbi_login.Login(user='', pwd='') fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} fast_run = True @@ -432,32 +424,36 @@ fast_run = True # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs # You can iterate over any data source as long as you can map the values to Wikidata properties. raw_data = { - '50943': 'ENST00000376197', - '1029': 'ENST00000498124' + '50943': 'ENST00000376197', + '1029': 'ENST00000498124' } for entrez_id, ensembl in raw_data.items(): - # add some references - references = [ - [ - basedatatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - basedatatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - basedatatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) + # add some references + references = [ + [ + Item(value='Q20641742', prop_nr='P248') + ], + [ + Time(time='+2020-02-08T00:00:00Z', prop_nr='P813'), + ExternalID(value='1017', prop_nr='P351') + ] ] - ] - # data type object - entrez_gene_id = basedatatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = basedatatype.String(value=ensembl, prop_nr='P704', references=references) + # data type object + entrez_gene_id = String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = String(value=ensembl, prop_nr='P704', references=references) - # data goes into a list, because many data objects can be provided to - data = [entrez_gene_id, ensembl_transcript_id] + # data goes into a list, because many data objects can be provided to + data = [entrez_gene_id, ensembl_transcript_id] - # Search for and then edit/create new item - wd_item = item.Item(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) - wd_item.write(login_instance) + # Search for and then edit/create new item + wb_item = WikibaseIntegrator(login=login).item.new() + wb_item.add_claims(claims=data) + wb_item.init_fastrun(base_filter=fast_run_base_filter) + wb_item.write() ``` Note: Fastrun mode checks for equality of property/value pairs, qualifers (not including qualifier attributes), labels, aliases and description, but it ignores references by default! -References can be checked in fast run mode by setting `fast_run_use_refs` to `True`. +References can be checked in fast run mode by setting `use_refs` to `True`. From 838652999a4966a6c6649aacdad6130e2344bab1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 5 Sep 2021 11:13:23 +0200 Subject: [PATCH 093/308] Add the keyword "now" to Time datatype to set the value to the current UTC time Fix #210 Add unit test --- test/test_wbi_core.py | 1 + wikibaseintegrator/datatypes/time.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index ca7b9a4f..3b8c14be 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -219,6 +219,7 @@ def test_new_item_creation(self): Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), + Time(time='now', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), URL(value="http://www.wikidata.org", prop_nr="P6"), URL(value="https://www.wikidata.org", prop_nr="P6"), URL(value="ftp://example.com", prop_nr="P6"), diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index fb8c430d..9e260965 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -1,3 +1,4 @@ +import datetime import re from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -20,7 +21,7 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen """ Constructor, calls the superclass BaseDataType :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 - :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' + :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' or now :param prop_nr: The property number for this claim :type prop_nr: str with a 'P' prefix followed by digits :param before: explicit integer value for how many units after the given time it could be. @@ -57,6 +58,9 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen assert isinstance(time, str) or time is None, "Expected str, found {} ({})".format(type(time), time) if time: + if time == "now": + time = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") + if not (time.startswith("+") or time.startswith("-")): time = "+" + time pattern = re.compile(r'^[+-][0-9]*-(?:1[0-2]|0[0-9])-(?:3[01]|0[0-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') From 1b3d8f182dbe4533d1cebf074b121a0c4cf3776c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 5 Sep 2021 19:50:17 +0200 Subject: [PATCH 094/308] Add dummy docstring --- wikibaseintegrator/datatypes/basedatatype.py | 1 + wikibaseintegrator/datatypes/form.py | 1 + wikibaseintegrator/datatypes/geoshape.py | 7 +++++++ wikibaseintegrator/datatypes/globecoordinate.py | 1 + wikibaseintegrator/datatypes/item.py | 1 + wikibaseintegrator/datatypes/lexeme.py | 1 + wikibaseintegrator/datatypes/monolingualtext.py | 1 + wikibaseintegrator/datatypes/property.py | 1 + wikibaseintegrator/datatypes/quantity.py | 1 + wikibaseintegrator/datatypes/sense.py | 1 + wikibaseintegrator/datatypes/string.py | 1 + wikibaseintegrator/datatypes/tabulardata.py | 2 ++ wikibaseintegrator/datatypes/time.py | 1 + wikibaseintegrator/wbi_exceptions.py | 10 ++++++---- wikibaseintegrator/wbi_fastrun.py | 1 + 15 files changed, 27 insertions(+), 4 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index a5b6e21b..737a4d21 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -17,6 +17,7 @@ class BaseDataType(Claim): def __init__(self, prop_nr=None, **kwargs): """ Constructor, will be called by all data types. + :param value: Data value of the Wikibase data snak :type value: str or int or tuple :param prop_nr: The property number a Wikibase snak belongs to diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index 5d983519..c8f4b47d 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -18,6 +18,7 @@ class Form(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix :param prop_nr: The property number for this claim diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 143cb6fd..7e0f98af 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -18,6 +18,7 @@ class GeoShape(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The GeoShape map file name in Wikimedia Commons to be linked :type value: str or None :param prop_nr: The item ID for this claim @@ -30,6 +31,12 @@ def __init__(self, value=None, **kwargs): :type qualifiers: A data type with subclass of BaseDataType :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' :type rank: str + + :Keyword Arguments: + * *extra* (``list``) -- + Extra stuff + * *supplement* (``dict``) -- + Additional content """ super(GeoShape, self).__init__(**kwargs) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 085d3d2f..f931eaaa 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -17,6 +17,7 @@ class GlobeCoordinate(BaseDataType): def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param latitude: Latitute in decimal format :type latitude: float or None :param longitude: Longitude in decimal format diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 90d924b7..3f62aac3 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -18,6 +18,7 @@ class Item(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The item ID to serve as the value :type value: str with a 'Q' prefix, followed by several digits or only the digits without the 'Q' prefix :param prop_nr: The item ID for this claim diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index bcacc25f..940b473d 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -18,6 +18,7 @@ class Lexeme(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The lexeme number to serve as a value :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix :param prop_nr: The property number for this claim diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index 61313be5..b5287f08 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -17,6 +17,7 @@ class MonolingualText(BaseDataType): def __init__(self, text=None, language=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param text: The language specific string to be used as the value :type text: str or None :param prop_nr: The item ID for this claim diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index a5f8e82e..2ca64a66 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -18,6 +18,7 @@ class Property(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The property number to serve as a value :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix :param prop_nr: The property number for this claim diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 04580ae3..ad903d34 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -18,6 +18,7 @@ class Quantity(BaseDataType): def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param amount: The amount value :type amount: float, str or None :param prop_nr: The item ID for this claim diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 52ea21b3..9156a848 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -18,6 +18,7 @@ class Sense(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: Value using the format "L-S" (example: L252248-S123) :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix :param prop_nr: The property number for this claim diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index e955c6c3..46fd97eb 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -11,6 +11,7 @@ class String(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: The string to be used as the value :type value: str or None :param prop_nr: The item ID for this claim diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index 6c6283e2..a3ecd49c 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -12,6 +12,7 @@ class TabularData(BaseDataType): def __init__(self, value=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param value: Reference to tabular data file on Wikimedia Commons. :type value: str or None :param prop_nr: The item ID for this claim @@ -28,6 +29,7 @@ def __init__(self, value=None, **kwargs): :type qualifiers: A data type with subclass of BaseDataType :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' :type rank: str + :raises ValueError: Raise a ValueError Exception if the data in value parameter is not valid. """ super(TabularData, self).__init__(**kwargs) diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 9e260965..ddf74d93 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -20,6 +20,7 @@ class Time(BaseDataType): def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): """ Constructor, calls the superclass BaseDataType + :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' or now :param prop_nr: The property number for this claim diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 91a6138e..2bee9e43 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -2,6 +2,7 @@ class MWApiError(Exception): def __init__(self, error_message): """ Base class for Mediawiki API error handling + :param error_message: The error message returned by the Mediawiki API :type error_message: A Python json representation dictionary of the error message :return: @@ -12,8 +13,8 @@ def __init__(self, error_message): class NonUniqueLabelDescriptionPairError(MWApiError): def __init__(self, error_message): """ - This class handles errors returned from the API due to an attempt to create an item which has the same - label and description as an existing item in a certain language. + This class handles errors returned from the API due to an attempt to create an item which has the same label and description as an existing item in a certain language. + :param error_message: An API error message containing 'wikibase-validator-label-with-description-conflict' as the message name. :type error_message: A Python json representation dictionary of the error message @@ -24,13 +25,14 @@ def __init__(self, error_message): def get_language(self): """ :return: Returns a 2 letter language string, indicating the language which triggered the error + :rtype: string """ return self.error_msg['error']['messages'][0]['parameters'][1] def get_conflicting_item_qid(self): """ - :return: Returns the QID string of the item which has the same label and description as the one which should - be set. + :return: Returns the QID string of the item which has the same label and description as the one which should be set. + :rtype: string """ qid_string = self.error_msg['error']['messages'][0]['parameters'][2] diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 3682f513..9913fbd9 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -281,6 +281,7 @@ def init_language_data(self, lang: str, lang_data_type: str) -> None: def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: """ get language data for specified qid + :param qid: Wikibase item id :param lang: language code :param lang_data_type: 'label', 'description' or 'aliases' From cd46102dd15f70e59877067003982a949a231f0d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 6 Sep 2021 20:13:17 +0200 Subject: [PATCH 095/308] Delete test_wbi_datatype.py --- test/test_wbi_datatype.py | 48 --------------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 test/test_wbi_datatype.py diff --git a/test/test_wbi_datatype.py b/test/test_wbi_datatype.py deleted file mode 100644 index 6330668f..00000000 --- a/test/test_wbi_datatype.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest - -from wikibaseintegrator import wbi_datatype - - -class TestWbiDataType(unittest.TestCase): - def test_qualifier(self): - # Good - qualifiers = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_qualifier=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_qualifier=True)] - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - qualifiers = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=True) - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - # Bad - qualifiers = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=False) - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - bad_qualifiers = ["not a good qualifier", - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_qualifier=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_qualifier=True)] - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=bad_qualifiers) - - def test_references(self): - # Good - references = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - wbi_datatype.ItemID("Q123", "P123", references=[references]) - wbi_datatype.ItemID("Q123", "P123", references=references) - - references = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True) - wbi_datatype.ItemID("Q123", "P123", references=references) - - # Bad - references = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=False) - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", references=references) - - bad_references = ["not a good reference", - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=bad_references) From ab27bf4d1683727193715c9f332192890d1012ad Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 9 Sep 2021 20:34:33 +0200 Subject: [PATCH 096/308] Fix #218 --- test/test_all.py | 13 ++++++++++++- wikibaseintegrator/wbi_helpers.py | 2 +- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/test/test_all.py b/test/test_all.py index aeb4e010..42461aa1 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -8,7 +8,7 @@ from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent, execute_sparql_query config['DEBUG'] = True @@ -208,3 +208,14 @@ def test_user_agent(capfd): new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') assert new_user_agent.startswith('MyWikibaseBot/0.5') assert 'WikibaseIntegrator' in new_user_agent + + +def test_sparql(): + results = execute_sparql_query('''SELECT ?child ?childLabel +WHERE +{ +# ?child father Bach + ?child wdt:P22 wd:Q1339. + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } +}''') + assert len(results['results']['bindings']) > 1 diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index e9da2fbf..fac506bc 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -176,7 +176,7 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint user_agent = (config['USER_AGENT'] if user_agent is None else user_agent) - if urlparse(endpoint).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + if urlparse(sparql_endpoint_url).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') From babde2bcebf60f8e93c5a9a40a901298a17c81dc Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 9 Sep 2021 20:48:37 +0200 Subject: [PATCH 097/308] Prepare v0.12.0.dev4 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index c86fc461..b75290a1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev3 +version = 0.12.0.dev4 author = Myst and WikidataIntegrator authors license = MIT license_files = LICENSE From a1f06faad1866d761ba0865857dd807bb513f827 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 11 Sep 2021 22:18:18 +0200 Subject: [PATCH 098/308] Some linting --- .idea/WikibaseIntegrator.iml | 1 + wikibaseintegrator/datatypes/basedatatype.py | 22 ++++++------ wikibaseintegrator/datatypes/form.py | 2 +- wikibaseintegrator/datatypes/geoshape.py | 2 +- .../datatypes/globecoordinate.py | 2 +- wikibaseintegrator/datatypes/item.py | 6 ++-- wikibaseintegrator/datatypes/lexeme.py | 6 ++-- .../datatypes/monolingualtext.py | 2 +- wikibaseintegrator/datatypes/property.py | 6 ++-- wikibaseintegrator/datatypes/quantity.py | 6 ++-- wikibaseintegrator/datatypes/sense.py | 2 +- wikibaseintegrator/datatypes/string.py | 2 +- wikibaseintegrator/datatypes/tabulardata.py | 2 +- wikibaseintegrator/datatypes/time.py | 2 +- wikibaseintegrator/datatypes/url.py | 2 +- wikibaseintegrator/entities/baseentity.py | 23 ++++++------ wikibaseintegrator/entities/item.py | 14 ++++---- wikibaseintegrator/entities/lexeme.py | 12 +++---- wikibaseintegrator/entities/mediainfo.py | 14 ++++---- wikibaseintegrator/entities/property.py | 12 +++---- wikibaseintegrator/models/aliases.py | 10 +++--- wikibaseintegrator/models/claims.py | 4 ++- wikibaseintegrator/models/forms.py | 1 + wikibaseintegrator/models/language_values.py | 12 +++---- wikibaseintegrator/models/sitelinks.py | 4 +-- wikibaseintegrator/models/snaks.py | 4 +-- wikibaseintegrator/wbi_enums.py | 2 +- wikibaseintegrator/wbi_exceptions.py | 18 +++++----- wikibaseintegrator/wbi_fastrun.py | 19 +++++----- wikibaseintegrator/wbi_helpers.py | 35 ++++++++++--------- wikibaseintegrator/wbi_login.py | 12 +++---- wikibaseintegrator/wikibaseintegrator.py | 2 +- 32 files changed, 135 insertions(+), 128 deletions(-) diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index 7cce02b4..2210e94f 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -10,6 +10,7 @@ + diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 737a4d21..e188bcce 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -91,13 +91,14 @@ def equals(self, that, include_ref=False, fref=None): if not include_ref: # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers return self == that - else: - if self != that: - return False - if fref is None: - return BaseDataType.refs_equal(self, that) - else: - return fref(self, that) + + if self != that: + return False + + if fref is None: + return BaseDataType.refs_equal(self, that) + + return fref(self, that) @staticmethod def refs_equal(olditem, newitem): @@ -109,9 +110,6 @@ def refs_equal(olditem, newitem): newrefs = newitem.references def ref_equal(oldref, newref): - return True if (len(oldref) == len(newref)) and all(x in oldref for x in newref) else False + return (len(oldref) == len(newref)) and all(x in oldref for x in newref) - if len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs): - return True - else: - return False + return len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs) diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index c8f4b47d..bfedefce 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -33,7 +33,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(Form, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 7e0f98af..d489e3eb 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -39,7 +39,7 @@ def __init__(self, value=None, **kwargs): Additional content """ - super(GeoShape, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index f931eaaa..4ba2a649 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -36,7 +36,7 @@ def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wi :type rank: str """ - super(GlobeCoordinate, self).__init__(**kwargs) + super().__init__(**kwargs) globe = globe or config['COORDINATE_GLOBE_QID'] wikibase_url = wikibase_url or config['WIKIBASE_URL'] diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 3f62aac3..9d8a8c14 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -33,7 +33,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(Item, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, 'Expected str or int, found {} ({})'.format(type(value), value) @@ -44,8 +44,8 @@ def __init__(self, value=None, **kwargs): if not matches: raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) - else: - value = int(matches.group(1)) + + value = int(matches.group(1)) self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 940b473d..f8c5f9ac 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -33,7 +33,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(Lexeme, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) @@ -44,8 +44,8 @@ def __init__(self, value=None, **kwargs): if not matches: raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) - else: - value = int(matches.group(1)) + + value = int(matches.group(1)) self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index b5287f08..0a4c7a31 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -34,7 +34,7 @@ def __init__(self, text=None, language=None, **kwargs): :type rank: str """ - super(MonolingualText, self).__init__(**kwargs) + super().__init__(**kwargs) language = language or config['DEFAULT_LANGUAGE'] diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index 2ca64a66..a207650d 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -33,7 +33,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(Property, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) @@ -44,8 +44,8 @@ def __init__(self, value=None, **kwargs): if not matches: raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) - else: - value = int(matches.group(1)) + + value = int(matches.group(1)) self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index ad903d34..09924853 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -40,7 +40,7 @@ def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wi :type rank: str """ - super(Quantity, self).__init__(**kwargs) + super().__init__(**kwargs) wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url @@ -62,8 +62,8 @@ def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wi for i in [amount, upper_bound, lower_bound]: if i: float(i) - except ValueError: - raise ValueError("Value, bounds and units must parse as integers or float") + except ValueError as error: + raise ValueError("Value, bounds and units must parse as integers or float") from error if (lower_bound and upper_bound) and (float(lower_bound) > float(upper_bound) or float(lower_bound) > float(amount)): raise ValueError("Lower bound too large") diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 9156a848..edbd5dd2 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -33,7 +33,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(Sense, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 46fd97eb..c14a27b4 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -26,7 +26,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(String, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index a3ecd49c..af8a12e0 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -32,7 +32,7 @@ def __init__(self, value=None, **kwargs): :raises ValueError: Raise a ValueError Exception if the data in value parameter is not valid. """ - super(TabularData, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index ddf74d93..38fad935 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -48,7 +48,7 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen :type rank: str """ - super(Time, self).__init__(**kwargs) + super().__init__(**kwargs) calendarmodel = calendarmodel or config['CALENDAR_MODEL_QID'] wikibase_url = wikibase_url or config['WIKIBASE_URL'] diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index eac13c22..ab9d191c 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -32,7 +32,7 @@ def __init__(self, value=None, **kwargs): :type rank: str """ - super(URL, self).__init__(**kwargs) + super().__init__(**kwargs) assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 5255e32d..f4e46c7f 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -9,7 +9,7 @@ from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper -class BaseEntity(object): +class BaseEntity: fast_run_store = [] ETYPE = 'base-entity' @@ -128,12 +128,12 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** payload.update({'clear': True}) if self.id: - payload.update({u'id': self.id}) + payload.update({'id': self.id}) else: - payload.update({u'new': self.type}) + payload.update({'new': self.type}) if self.lastrevid: - payload.update({u'baserevid': self.lastrevid}) + payload.update({'baserevid': self.lastrevid}) if self.debug: print(payload) @@ -145,9 +145,10 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** error_msg_names = set(x.get('name') for x in json_data['error']['messages']) if 'wikibase-validator-label-with-description-conflict' in error_msg_names: raise NonUniqueLabelDescriptionPairError(json_data) - else: - raise MWApiError(json_data) - elif 'error' in json_data.keys(): + + raise MWApiError(json_data) + + if 'error' in json_data.keys(): raise MWApiError(json_data) except Exception: print('Error while writing to the Wikibase instance') @@ -166,10 +167,10 @@ def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False) if self.debug: print('Initialize Fast Run init_fastrun') # We search if we already have a FastRunContainer with the same parameters to re-use it - for c in BaseEntity.fast_run_store: - if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( - c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): - self.fast_run_container = c + for fast_run in BaseEntity.fast_run_store: + if (fast_run.base_filter == base_filter) and (fast_run.use_refs == use_refs) and (fast_run.case_insensitive == case_insensitive) and ( + fast_run.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): + self.fast_run_container = fast_run self.fast_run_container.current_qid = '' self.fast_run_container.base_data_type = BaseDataType if self.debug: diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index 7e9bd10c..21241397 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -24,7 +24,7 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks= """ self.api = api - super(Item, self).__init__(api=self.api, **kwargs) + super().__init__(api=self.api, **kwargs) # Item and property specific self.labels = labels or Labels() @@ -44,14 +44,14 @@ def get(self, entity_id, **kwargs) -> Item: if not matches: raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(entity_id)) - else: - entity_id = int(matches.group(1)) + + entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Item ID must be greater than 0") entity_id = 'Q{}'.format(entity_id) - json_data = super(Item, self).get(entity_id=entity_id, **kwargs) + json_data = super().get(entity_id=entity_id, **kwargs) return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: @@ -59,11 +59,11 @@ def get_json(self) -> {}: 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), 'aliases': self.aliases.get_json(), - **super(Item, self).get_json() + **super().get_json() } def from_json(self, json_data) -> Item: - super(Item, self).from_json(json_data=json_data) + super().from_json(json_data=json_data) self.labels = Labels().from_json(json_data['labels']) self.descriptions = Descriptions().from_json(json_data['descriptions']) @@ -73,5 +73,5 @@ def from_json(self, json_data) -> Item: return self def write(self, **kwargs): - json_data = super(Item, self)._write(data=self.get_json(), **kwargs) + json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index a0459501..5b4f37c0 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -33,14 +33,14 @@ def get(self, entity_id, **kwargs) -> Lexeme: if not matches: raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(entity_id)) - else: - entity_id = int(matches.group(1)) + + entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Lexeme ID must be greater than 0") entity_id = 'L{}'.format(entity_id) - json_data = super(Lexeme, self).get(entity_id=entity_id, **kwargs) + json_data = super().get(entity_id=entity_id, **kwargs) return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: @@ -50,7 +50,7 @@ def get_json(self) -> {}: 'language': self.language, 'forms': self.forms.get_json(), 'senses': self.senses.get_json(), - **super(Lexeme, self).get_json() + **super().get_json() } if self.lexical_category is None: @@ -59,7 +59,7 @@ def get_json(self) -> {}: return json_data def from_json(self, json_data) -> Lexeme: - super(Lexeme, self).from_json(json_data=json_data) + super().from_json(json_data=json_data) self.lemmas = Lemmas().from_json(json_data['lemmas']) self.lexical_category = json_data['lexicalCategory'] @@ -73,5 +73,5 @@ def write(self, **kwargs): if self.lexical_category is None: raise ValueError("lexical_category can't be None") - json_data = super(Lexeme, self)._write(data=self.get_json(), **kwargs) + json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index e79771fe..af8a834d 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -23,7 +23,7 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, **kwargs) """ self.api = api - super(MediaInfo, self).__init__(api=self.api, **kwargs) + super().__init__(api=self.api, **kwargs) # Item and property specific self.labels = labels or Labels() @@ -40,14 +40,14 @@ def get(self, entity_id, **kwargs) -> MediaInfo: if not matches: raise ValueError("Invalid MediaInfo ID ({}), format must be 'M[0-9]+'".format(entity_id)) - else: - entity_id = int(matches.group(1)) + + entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("MediaInfo ID must be greater than 0") entity_id = 'M{}'.format(entity_id) - json_data = super(MediaInfo, self).get(entity_id=entity_id, **kwargs) + json_data = super().get(entity_id=entity_id, **kwargs) return MediaInfo(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: @@ -72,11 +72,11 @@ def get_json(self) -> {}: 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), 'aliases': self.aliases.get_json(), - **super(MediaInfo, self).get_json() + **super().get_json() } def from_json(self, json_data) -> MediaInfo: - super(MediaInfo, self).from_json(json_data=json_data) + super().from_json(json_data=json_data) self.labels = Labels().from_json(json_data['labels']) self.descriptions = Descriptions().from_json(json_data['descriptions']) @@ -84,5 +84,5 @@ def from_json(self, json_data) -> MediaInfo: return self def write(self, **kwargs): - json_data = super(MediaInfo, self)._write(data=self.get_json(), **kwargs) + json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 37855ab7..829446a1 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -36,14 +36,14 @@ def get(self, entity_id, **kwargs) -> Property: if not matches: raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(entity_id)) - else: - entity_id = int(matches.group(1)) + + entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Property ID must be greater than 0") entity_id = 'P{}'.format(entity_id) - json_data = super(Property, self).get(entity_id=entity_id, **kwargs) + json_data = super().get(entity_id=entity_id, **kwargs) return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: @@ -52,11 +52,11 @@ def get_json(self) -> {}: 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), 'aliases': self.aliases.get_json(), - **super(Property, self).get_json() + **super().get_json() } def from_json(self, json_data) -> Property: - super(Property, self).from_json(json_data=json_data) + super().from_json(json_data=json_data) self.datatype = json_data['datatype'] self.labels = Labels().from_json(json_data['labels']) @@ -66,5 +66,5 @@ def from_json(self, json_data) -> Property: return self def write(self, **kwargs): - json_data = super(Property, self)._write(data=self.get_json(), **kwargs) + json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index ea7b180f..85807b24 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -22,11 +22,11 @@ def get(self, language=None): if language is None: # TODO: Don't return a list of list, just a list return [item for sublist in self.aliases.values() for item in sublist] - else: - if language in self.aliases: - return self.aliases[language] - else: - return None + + if language in self.aliases: + return self.aliases[language] + + return None def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND): language = language or config['DEFAULT_LANGUAGE'] diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 664fd6e9..7d34016b 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -262,8 +262,10 @@ def has_equal_qualifiers(self, other): def __contains__(self, item): if isinstance(item, Claim): return self == item - elif isinstance(item, str): + + if isinstance(item, str): return self.mainsnak.datavalue == item + raise TypeError def __eq__(self, other): diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 9544ea7f..7e409414 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -73,6 +73,7 @@ def grammatical_features(self): @grammatical_features.setter def grammatical_features(self, value): + # TODO: Access to member before its definition if isinstance(value, int): self.__grammatical_features.append('Q' + str(value)) elif isinstance(value, str): diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index bf7ebebf..263e656c 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -26,8 +26,8 @@ def get(self, language=None): language = language or config['DEFAULT_LANGUAGE'] if language in self.values: return self.values[language] - else: - return None + + return None def set(self, language=None, value=None, action_if_exists=ActionIfExists.REPLACE): language = language or config['DEFAULT_LANGUAGE'] @@ -42,8 +42,8 @@ def set(self, language=None, value=None, action_if_exists=ActionIfExists.REPLACE language_value = LanguageValue(language, value) self.add(language_value) return language_value - else: - return self.get(language=language) + + return self.get(language=language) def get_json(self) -> {}: json_data = {} @@ -127,8 +127,8 @@ def __contains__(self, item): def __eq__(self, other): if isinstance(other, LanguageValue): return self.value == other.value and self.language == other.language - else: - return self.value == other + + return self.value == other def __len__(self): return len(self.value) diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py index eaaa76cb..2c8aaa6d 100644 --- a/wikibaseintegrator/models/sitelinks.py +++ b/wikibaseintegrator/models/sitelinks.py @@ -5,8 +5,8 @@ def __init__(self): def get(self, site=None): if site in self.sitelinks: return self.sitelinks[site] - else: - return None + + return None def set(self, site=None, title=None, badges=None): sitelink = Sitelink(site, title, badges) diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index 17231ca3..186f139c 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -88,8 +88,8 @@ def property_number(self, value): if not matches: raise ValueError('Invalid property_number, format must be "P[0-9]+"') - else: - self.__property_number = 'P' + str(matches.group(1)) + + self.__property_number = 'P' + str(matches.group(1)) self.__property_number = value diff --git a/wikibaseintegrator/wbi_enums.py b/wikibaseintegrator/wbi_enums.py index ebd0d306..0e10dc7b 100644 --- a/wikibaseintegrator/wbi_enums.py +++ b/wikibaseintegrator/wbi_enums.py @@ -19,7 +19,7 @@ class WikibaseRank(Enum): class WikibaseSnakType(Enum): """ - The snak type of the Wikibase data snak, three values possible, + The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or unknown (somevalue). See Wikibase documentation. """ diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 2bee9e43..c3dfe498 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -1,13 +1,11 @@ class MWApiError(Exception): - def __init__(self, error_message): - """ - Base class for Mediawiki API error handling + """ + Base class for Mediawiki API error handling - :param error_message: The error message returned by the Mediawiki API - :type error_message: A Python json representation dictionary of the error message - :return: - """ - pass + :param error_message: The error message returned by the Mediawiki API + :type error_message: A Python json representation dictionary of the error message + :return: + """ class NonUniqueLabelDescriptionPairError(MWApiError): @@ -20,6 +18,8 @@ def __init__(self, error_message): :type error_message: A Python json representation dictionary of the error message :return: """ + super().__init__(error_message) + self.error_msg = error_message def get_language(self): @@ -52,6 +52,8 @@ class SearchError(Exception): class ManualInterventionReqException(Exception): def __init__(self, value, property_string, item_list): + super().__init__() + self.value = value + ' Property: {}, items affected: {}'.format(property_string, item_list) def __str__(self): diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 9913fbd9..b2a05f30 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -14,7 +14,7 @@ fastrun_store = [] -class FastRunContainer(object): +class FastRunContainer: def __init__(self, base_data_type, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, case_insensitive=False, debug=None): self.reconstructed_statements = [] self.rev_lookup = defaultdict(set) @@ -199,12 +199,13 @@ def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cq for date in data: # ensure that statements meant for deletion get handled properly - reconst_props = set([x.mainsnak.property_number for x in tmp_rs]) + reconst_props = {x.mainsnak.property_number for x in tmp_rs} if not date.mainsnak.datatype and date.mainsnak.property_number in reconst_props: if self.debug: print("returned from delete prop handling") return True - elif not date.mainsnak.datavalue or not date.mainsnak.datatype: + + if not date.mainsnak.datavalue or not date.mainsnak.datatype: # Ignore the deletion statements which are not in the reconstructed statements. continue @@ -315,12 +316,12 @@ def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_ty if action_if_exists == ActionIfExists.REPLACE: return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) - else: - for s in lang_data: - if s.strip().casefold() not in all_lang_strings: - if self.debug: - print("fastrun failed at: {}, string: {}".format(lang_data_type, s)) - return True + + for s in lang_data: + if s.strip().casefold() not in all_lang_strings: + if self.debug: + print("fastrun failed at: {}, string: {}".format(lang_data_type, s)) + return True return False diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index fac506bc..1270dd7f 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -121,7 +121,8 @@ def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, use if login is None: # Force allow_anonymous as False by default to ask for a login object raise ValueError("allow_anonymous can't be False and login is None at the same time.") - elif mediawiki_api_url != login.mediawiki_api_url: + + if mediawiki_api_url != login.mediawiki_api_url: raise ValueError("mediawiki_api_url can't be different with the one in the login object.") headers = { @@ -355,25 +356,25 @@ def search_entities(search_string, language=None, strict_language=True, search_t if search_results['success'] != 1: raise SearchError('Wikibase API wbsearchentities failed') - else: - for i in search_results['search']: - if dict_result: - description = i['description'] if 'description' in i else None - aliases = i['aliases'] if 'aliases' in i else None - results.append({ - 'id': i['id'], - 'label': i['label'], - 'match': i['match'], - 'description': description, - 'aliases': aliases - }) - else: - results.append(i['id']) + + for i in search_results['search']: + if dict_result: + description = i['description'] if 'description' in i else None + aliases = i['aliases'] if 'aliases' in i else None + results.append({ + 'id': i['id'], + 'label': i['label'], + 'match': i['match'], + 'description': description, + 'aliases': aliases + }) + else: + results.append(i['id']) if 'search-continue' not in search_results: break - else: - cont_count = search_results['search-continue'] + + cont_count = search_results['search-continue'] if cont_count >= max_results: break diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index b46c042c..7fe1fc63 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -15,7 +15,7 @@ """ -class Login(object): +class Login: """ A class which handles the login to Wikidata and the generation of edit-tokens """ @@ -73,7 +73,7 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api try: token = oauth.fetch_token(token_url=self.mediawiki_rest_url + '/oauth2/access_token', client_id=self.consumer_token, client_secret=consumer_secret) except InvalidClientError as err: - raise LoginError(err) + raise LoginError(err) from err auth = OAuth2(token=token) self.session.auth = auth self.generate_edit_credentials() @@ -96,8 +96,8 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api try: self.redirect, self.request_token = self.handshaker.initiate(callback=self.callback_url) except OAuthException as err: - raise LoginError(err) - elif auth_method == 'login' or auth_method == 'clientlogin': + raise LoginError(err) from err + elif auth_method in ('login', 'clientlogin'): params_login = { 'action': 'query', 'meta': 'tokens', @@ -145,7 +145,8 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api clientlogin = login_result['clientlogin'] if clientlogin['status'] != 'PASS': raise LoginError("Login failed ({}). Message: '{}'".format(clientlogin['messagecode'], clientlogin['message'])) - elif debug: + + if debug: print("Successfully logged in as", clientlogin['username']) else: raise LoginError("Login failed ({}). Message: '{}'".format(login_result['error']['code'], login_result['error']['info'])) @@ -232,4 +233,3 @@ def continue_oauth(self, oauth_callback_data=None): class LoginError(Exception): """Raised when there is an issue with the login""" - pass diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index 81c7e29a..bdb75e43 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -5,7 +5,7 @@ from wikibaseintegrator.entities.property import Property -class WikibaseIntegrator(object): +class WikibaseIntegrator: def __init__(self, is_bot=False, From 06cfbd13e2e23b25aa4357dd39fafc59f61c8e95 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 12 Sep 2021 10:06:32 +0200 Subject: [PATCH 099/308] Stop using requirements.txt --- .github/workflows/python-package.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 5d733871..d27484fd 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -30,8 +30,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + pip install .[dev] - name: Test with pytest run: | pytest From d4175f5a68a970109c636468816f6c948298180a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 12 Sep 2021 10:13:18 +0200 Subject: [PATCH 100/308] Add missing frozendict in setup.cfg --- .github/workflows/python-package.yml | 2 +- setup.cfg | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index d27484fd..8eec6e45 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,7 +29,7 @@ jobs: ${{ runner.os }}- - name: Install dependencies run: | - python -m pip install --upgrade pip + python -m pip install --upgrade pip setuptools pip install .[dev] - name: Test with pytest run: | diff --git a/setup.cfg b/setup.cfg index f2694b3b..8c58b747 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,6 +32,7 @@ classifiers = packages = find: install_requires = backoff~=1.11.0 + frozendict~=2.0.6 mwoauth~=0.3.6 oauthlib~=3.1.1 requests~=2.26.0 From 046f0e1d4ef6a91b95c1d2fab844b919505b0cb6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 12 Sep 2021 10:26:19 +0200 Subject: [PATCH 101/308] Improve github actions --- .github/workflows/python-package.yml | 4 +++- setup.cfg | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 8eec6e45..694474b7 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -27,9 +27,11 @@ jobs: restore-keys: | ${{ runner.os }}-pip- ${{ runner.os }}- - - name: Install dependencies + - name: Upgrade setup tools run: | python -m pip install --upgrade pip setuptools + - name: Install dependencies + run: | pip install .[dev] - name: Test with pytest run: | diff --git a/setup.cfg b/setup.cfg index 8c58b747..be366297 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,4 +42,5 @@ python_requires = >=3.7, <3.11 [options.extras_require] dev = pytest +coverage = pytest-cov From 45d168a829f75e0dc80fe9605784b32aef42fcdb Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 12 Sep 2021 21:49:40 +0200 Subject: [PATCH 102/308] Update requirements.txt --- requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 5ae23ab3..9f11034f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ backoff~=1.11.1 frozendict~=2.0.6 mwoauth~=0.3.7 oauthlib~=3.1.1 -pytest~=6.2.5 requests~=2.26.0 setuptools~=58.0.4 simplejson~=3.17.5 From ed7bc23b5d15d7e3a476ca7a994fcbd6d9db4e32 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 12 Sep 2021 21:49:55 +0200 Subject: [PATCH 103/308] Update .gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index e4cd9b76..c22045f1 100644 --- a/.gitignore +++ b/.gitignore @@ -154,5 +154,8 @@ fabric.properties # Android studio 3.1+ serialized cache file .idea/caches/build_file_checksums.ser +# Idea pylint plugin configuration file +.idea/pylint.xml + # Other stuff /drafts/ From 57fd784b6c7b337ef5c9f4d6c1f569ab36b0f3d5 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 14 Sep 2021 00:03:11 +0200 Subject: [PATCH 104/308] Prepare v0.12.0.dev5 --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 076c52b3..ec85261b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,7 +1,7 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev4 -author = Myst and WikidataIntegrator authors +version = 0.12.0.dev5 +author = Myst, WikibaseIntegrator authors and WikidataIntegrator authors license = MIT license_files = LICENSE description = Python package for reading from and writing to a Wikibase instance From 3a6016b7195acc933cfe6d2b87b6d2a9a15588b9 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 15 Sep 2021 13:02:38 +0200 Subject: [PATCH 105/308] Update forms.py --- wikibaseintegrator/models/forms.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 7e409414..ddf0be94 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -78,9 +78,11 @@ def grammatical_features(self, value): self.__grammatical_features.append('Q' + str(value)) elif isinstance(value, str): self.__grammatical_features.append(value) - else: + elif isinstance(value, list): self.__grammatical_features = value + raise TypeError("value must be an int, a str or a list of strings") + @property def claims(self): return self.__claims From d71e47c36f28ddf4c2b5777b50e6f48f06a8bf88 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 15 Sep 2021 13:06:34 +0200 Subject: [PATCH 106/308] Update aliases.py Close #227 --- wikibaseintegrator/models/aliases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index 85807b24..77cd700e 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -46,7 +46,7 @@ def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND if isinstance(values, str): values = [values] elif not isinstance(values, list) and values is not None: - raise TypeError("value must be a str or list") + raise TypeError("value must be a str or list of strings, got '{}'".format(type(values))) if action_if_exists == ActionIfExists.REPLACE: aliases = [] From 1954ee07018071664ce903d676a8ce1ecb782c1a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 15 Sep 2021 20:07:18 +0200 Subject: [PATCH 107/308] Update forms.py --- wikibaseintegrator/models/forms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index ddf0be94..0fb7bb15 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -78,10 +78,10 @@ def grammatical_features(self, value): self.__grammatical_features.append('Q' + str(value)) elif isinstance(value, str): self.__grammatical_features.append(value) - elif isinstance(value, list): + elif isinstance(value, list) or value is None: self.__grammatical_features = value - - raise TypeError("value must be an int, a str or a list of strings") + else: + raise TypeError("value must be an int, a str or a list of strings, got ''".format(type(value))) @property def claims(self): From 113e9e2430592eb0b37cb97b8448536e02c9d6eb Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 17 Sep 2021 12:10:02 +0200 Subject: [PATCH 108/308] Add better badge --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1036c18a..acea073f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,7 @@ # Wikibase Integrator # -[![Python package](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml/badge.svg)](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml) + +[![Python package](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml/badge.svg?branch=rewrite-wbi)](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml) [![CodeQL](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/codeql-analysis.yml) [![Pyversions](https://img.shields.io/pypi/pyversions/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) [![PyPi](https://img.shields.io/pypi/v/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) From f9c7329445b20e31a5b82866534ef92730d78dc5 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 17 Sep 2021 12:39:07 +0200 Subject: [PATCH 109/308] Update globecoordinate.py Update with better code --- .../datatypes/globecoordinate.py | 32 +++++++------------ 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 4ba2a649..c24effe6 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -22,7 +22,7 @@ def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wi :type latitude: float or None :param longitude: Longitude in decimal format :type longitude: float or None - :param precision: Precision of the position measurement + :param precision: Precision of the position measurement, default 1 / 3600 :type precision: float or None :param prop_nr: The item ID for this claim :type prop_nr: str with a 'P' prefix followed by digits @@ -38,39 +38,31 @@ def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wi super().__init__(**kwargs) + precision = precision or 1 / 3600 # https://github.com/wikimedia/Wikibase/blob/174450de8fdeabcf97287604dbbf04d07bb5000c/repo/includes/Rdf/Values/GlobeCoordinateRdfBuilder.php#L120 globe = globe or config['COORDINATE_GLOBE_QID'] wikibase_url = wikibase_url or config['WIKIBASE_URL'] - self.latitude = None - self.longitude = None - self.precision = None - self.globe = None - if globe.startswith('Q'): globe = wikibase_url + '/entity/' + globe # TODO: Introduce validity checks for coordinates, etc. # TODO: Add check if latitude/longitude/precision is None - self.latitude = latitude - self.longitude = longitude - self.precision = precision - self.globe = globe - if self.latitude and self.longitude and self.precision: - self.value = (self.latitude, self.longitude, self.precision, self.globe) - else: - self.value = None + if latitude and longitude: + if latitude < -90 or latitude > 90: + raise ValueError("latitude must be between -90 and 90, got '{}'".format(latitude)) + if longitude < -180 or longitude > 180: + raise ValueError("longitude must be between -180 and 180, got '{}'".format(longitude)) - if self.value: self.mainsnak.datavalue = { 'value': { - 'latitude': self.latitude, - 'longitude': self.longitude, - 'precision': self.precision, - 'globe': self.globe + 'latitude': latitude, + 'longitude': longitude, + 'precision': precision, + 'globe': globe }, 'type': 'globecoordinate' } def get_sparql_value(self): - return 'Point(' + str(self.latitude) + ', ' + str(self.longitude) + ')' + return 'Point(' + str(self.mainsnak.datavalue['value']['latitude']) + ', ' + str(self.mainsnak.datavalue['value']['longitude']) + ')' From 3422e7c7a408a16de0f685d41403fa419ee6e717 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 17 Sep 2021 18:11:31 +0200 Subject: [PATCH 110/308] Update basedatatype.py Removve self.valiue in BaseDataType --- wikibaseintegrator/datatypes/basedatatype.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index e188bcce..6e6add0c 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -62,19 +62,8 @@ def __init__(self, prop_nr=None, **kwargs): references.add(reference=reference) self.references = references - self.value = None self.mainsnak.property_number = prop_nr or None - @property - def value(self): - return self.__value - - @value.setter - def value(self, value): - if not value: - self.mainsnak.snaktype = WikibaseSnakType.NO_VALUE - self.__value = value - def get_sparql_value(self): return self.mainsnak.datavalue['value'] From 2669b59f86075f1de6cfa7a8e58d2a69fc3abf66 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 18 Sep 2021 12:04:07 +0200 Subject: [PATCH 111/308] Update with --py37-plus --keep-runtime-typing (#230) --- test/test_wbi_fastrun.py | 8 +++--- wikibaseintegrator/datatypes/form.py | 4 +-- wikibaseintegrator/datatypes/geoshape.py | 2 +- .../datatypes/globecoordinate.py | 4 +-- wikibaseintegrator/datatypes/item.py | 6 ++-- wikibaseintegrator/datatypes/lexeme.py | 6 ++-- .../datatypes/monolingualtext.py | 4 +-- wikibaseintegrator/datatypes/property.py | 6 ++-- wikibaseintegrator/datatypes/sense.py | 4 +-- wikibaseintegrator/datatypes/string.py | 2 +- wikibaseintegrator/datatypes/tabulardata.py | 2 +- wikibaseintegrator/datatypes/time.py | 2 +- wikibaseintegrator/datatypes/url.py | 4 +-- wikibaseintegrator/entities/baseentity.py | 4 +-- wikibaseintegrator/entities/item.py | 4 +-- wikibaseintegrator/entities/lexeme.py | 4 +-- wikibaseintegrator/entities/mediainfo.py | 4 +-- wikibaseintegrator/entities/property.py | 4 +-- wikibaseintegrator/models/aliases.py | 4 +-- wikibaseintegrator/models/claims.py | 6 ++-- wikibaseintegrator/models/forms.py | 6 ++-- wikibaseintegrator/models/language_values.py | 4 +-- wikibaseintegrator/models/qualifiers.py | 2 +- wikibaseintegrator/models/references.py | 4 +-- wikibaseintegrator/models/senses.py | 4 +-- wikibaseintegrator/models/sitelinks.py | 4 +-- wikibaseintegrator/models/snaks.py | 4 +-- wikibaseintegrator/wbi_exceptions.py | 2 +- wikibaseintegrator/wbi_fastrun.py | 28 +++++++++---------- wikibaseintegrator/wbi_helpers.py | 22 +++++++-------- 30 files changed, 82 insertions(+), 82 deletions(-) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 98e00893..b7e7e36e 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -38,7 +38,7 @@ def test_query_data(): assert list(frc.prop_data['Q10874']['P828'].values())[0]['v'] == "Q18228398" # uri - v = set([x['v'] for x in frc.prop_data['Q10874']['P2888'].values()]) + v = {x['v'] for x in frc.prop_data['Q10874']['P2888'].values()} assert all(y.startswith("http") for y in v) @@ -93,7 +93,7 @@ def test_query_data_ref(): class FastRunContainerFakeQueryDataEnsembl(wbi_fastrun.FastRunContainer): def __init__(self, *args, **kwargs): - super(FastRunContainerFakeQueryDataEnsembl, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { 'fake statement id': { @@ -107,7 +107,7 @@ def __init__(self, *args, **kwargs): class FastRunContainerFakeQueryDataEnsemblNoRef(wbi_fastrun.FastRunContainer): def __init__(self, *args, **kwargs): - super(FastRunContainerFakeQueryDataEnsemblNoRef, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { 'fake statement id': { @@ -150,7 +150,7 @@ def test_fastrun_ref_ensembl(): class FakeQueryDataAppendProps(wbi_fastrun.FastRunContainer): # an item with three values for the same property def __init__(self, *args, **kwargs): - super(FakeQueryDataAppendProps, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.debug = True self.prop_dt_map = {'P527': 'wikibase-item', 'P248': 'wikibase-item', 'P594': 'external-id'} self.rev_lookup = { diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index bfedefce..ab4f9884 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -35,14 +35,14 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: pattern = re.compile(r'^L[0-9]+-F[0-9]+$') matches = pattern.match(value) if not matches: - raise ValueError("Invalid form ID ({}), format must be 'L[0-9]+-F[0-9]+'".format(value)) + raise ValueError(f"Invalid form ID ({value}), format must be 'L[0-9]+-F[0-9]+'") self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index d489e3eb..3aff16fe 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -41,7 +41,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index c24effe6..cc02f219 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -50,9 +50,9 @@ def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wi if latitude and longitude: if latitude < -90 or latitude > 90: - raise ValueError("latitude must be between -90 and 90, got '{}'".format(latitude)) + raise ValueError(f"latitude must be between -90 and 90, got '{latitude}'") if longitude < -180 or longitude > 180: - raise ValueError("longitude must be between -180 and 180, got '{}'".format(longitude)) + raise ValueError(f"longitude must be between -180 and 180, got '{longitude}'") self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 9d8a8c14..6459d837 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -35,7 +35,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, (str, int)) or value is None, 'Expected str or int, found {} ({})'.format(type(value), value) + assert isinstance(value, (str, int)) or value is None, f'Expected str or int, found {type(value)} ({value})' if value: if isinstance(value, str): @@ -43,7 +43,7 @@ def __init__(self, value=None, **kwargs): matches = pattern.match(value) if not matches: - raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(value)) + raise ValueError(f"Invalid item ID ({value}), format must be 'Q[0-9]+'") value = int(matches.group(1)) @@ -51,7 +51,7 @@ def __init__(self, value=None, **kwargs): 'value': { 'entity-type': 'item', 'numeric-id': value, - 'id': 'Q{}'.format(value) + 'id': f'Q{value}' }, 'type': 'wikibase-entityid' } diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index f8c5f9ac..09e66968 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -35,7 +35,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) + assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" if value: if isinstance(value, str): @@ -43,7 +43,7 @@ def __init__(self, value=None, **kwargs): matches = pattern.match(value) if not matches: - raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(value)) + raise ValueError(f"Invalid lexeme ID ({value}), format must be 'L[0-9]+'") value = int(matches.group(1)) @@ -51,7 +51,7 @@ def __init__(self, value=None, **kwargs): 'value': { 'entity-type': 'lexeme', 'numeric-id': value, - 'id': 'L{}'.format(value) + 'id': f'L{value}' }, 'type': 'wikibase-entityid' } diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index 0a4c7a31..14cb4280 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -38,8 +38,8 @@ def __init__(self, text=None, language=None, **kwargs): language = language or config['DEFAULT_LANGUAGE'] - assert isinstance(text, str) or text is None, "Expected str, found {} ({})".format(type(text), text) - assert isinstance(language, str), "Expected str, found {} ({})".format(type(language), language) + assert isinstance(text, str) or text is None, f"Expected str, found {type(text)} ({text})" + assert isinstance(language, str), f"Expected str, found {type(language)} ({language})" if text and language: self.mainsnak.datavalue = { diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index a207650d..184aa04f 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -35,7 +35,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, (str, int)) or value is None, "Expected str or int, found {} ({})".format(type(value), value) + assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" if value: if isinstance(value, str): @@ -43,7 +43,7 @@ def __init__(self, value=None, **kwargs): matches = pattern.match(value) if not matches: - raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(value)) + raise ValueError(f"Invalid property ID ({value}), format must be 'P[0-9]+'") value = int(matches.group(1)) @@ -51,7 +51,7 @@ def __init__(self, value=None, **kwargs): 'value': { 'entity-type': 'property', 'numeric-id': value, - 'id': 'P{}'.format(value) + 'id': f'P{value}' }, 'type': 'wikibase-entityid' } diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index edbd5dd2..78cbddcc 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -35,14 +35,14 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: pattern = re.compile(r'^L[0-9]+-S[0-9]+$') matches = pattern.match(value) if not matches: - raise ValueError("Invalid sense ID ({}), format must be 'L[0-9]+-S[0-9]+'".format(value)) + raise ValueError(f"Invalid sense ID ({value}), format must be 'L[0-9]+-S[0-9]+'") self.mainsnak.datavalue = { 'value': { diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index c14a27b4..2b721b53 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -28,7 +28,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: self.mainsnak.datavalue = { diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index af8a12e0..decdba17 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -34,7 +34,7 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 38fad935..8178e872 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -56,7 +56,7 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen if calendarmodel.startswith('Q'): calendarmodel = wikibase_url + '/entity/' + calendarmodel - assert isinstance(time, str) or time is None, "Expected str, found {} ({})".format(type(time), time) + assert isinstance(time, str) or time is None, f"Expected str, found {type(time)} ({time})" if time: if time == "now": diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index ab9d191c..ec9e7fd5 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -34,14 +34,14 @@ def __init__(self, value=None, **kwargs): super().__init__(**kwargs) - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') matches = pattern.match(value) if not matches: - raise ValueError("Invalid URL {}".format(value)) + raise ValueError(f"Invalid URL {value}") self.mainsnak.datavalue = { 'value': value, diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index f4e46c7f..ca1e77be 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -142,7 +142,7 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot, **kwargs) if 'error' in json_data and 'messages' in json_data['error']: - error_msg_names = set(x.get('name') for x in json_data['error']['messages']) + error_msg_names = {x.get('name') for x in json_data['error']['messages']} if 'wikibase-validator-label-with-description-conflict' in error_msg_names: raise NonUniqueLabelDescriptionPairError(json_data) @@ -209,5 +209,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs="\r\n\t ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index 21241397..0feb6bee 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -43,14 +43,14 @@ def get(self, entity_id, **kwargs) -> Item: matches = pattern.match(entity_id) if not matches: - raise ValueError("Invalid item ID ({}), format must be 'Q[0-9]+'".format(entity_id)) + raise ValueError(f"Invalid item ID ({entity_id}), format must be 'Q[0-9]+'") entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Item ID must be greater than 0") - entity_id = 'Q{}'.format(entity_id) + entity_id = f'Q{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index 5b4f37c0..11fe5717 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -32,14 +32,14 @@ def get(self, entity_id, **kwargs) -> Lexeme: matches = pattern.match(entity_id) if not matches: - raise ValueError("Invalid lexeme ID ({}), format must be 'L[0-9]+'".format(entity_id)) + raise ValueError(f"Invalid lexeme ID ({entity_id}), format must be 'L[0-9]+'") entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Lexeme ID must be greater than 0") - entity_id = 'L{}'.format(entity_id) + entity_id = f'L{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index af8a834d..a6e94879 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -39,14 +39,14 @@ def get(self, entity_id, **kwargs) -> MediaInfo: matches = pattern.match(entity_id) if not matches: - raise ValueError("Invalid MediaInfo ID ({}), format must be 'M[0-9]+'".format(entity_id)) + raise ValueError(f"Invalid MediaInfo ID ({entity_id}), format must be 'M[0-9]+'") entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("MediaInfo ID must be greater than 0") - entity_id = 'M{}'.format(entity_id) + entity_id = f'M{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) return MediaInfo(self.api).from_json(json_data=json_data['entities'][entity_id]) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 829446a1..8c2233c5 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -35,14 +35,14 @@ def get(self, entity_id, **kwargs) -> Property: matches = pattern.match(entity_id) if not matches: - raise ValueError("Invalid property ID ({}), format must be 'P[0-9]+'".format(entity_id)) + raise ValueError(f"Invalid property ID ({entity_id}), format must be 'P[0-9]+'") entity_id = int(matches.group(1)) if entity_id < 1: raise ValueError("Property ID must be greater than 0") - entity_id = 'P{}'.format(entity_id) + entity_id = f'P{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index 77cd700e..c96f13c3 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -46,7 +46,7 @@ def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND if isinstance(values, str): values = [values] elif not isinstance(values, list) and values is not None: - raise TypeError("value must be a str or list of strings, got '{}'".format(type(values))) + raise TypeError(f"value must be a str or list of strings, got '{type(values)}'") if action_if_exists == ActionIfExists.REPLACE: aliases = [] @@ -92,7 +92,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 7d34016b..09609d8a 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -34,7 +34,7 @@ def add(self, claims: Union[list, Claim, None] = None, action_if_exists=ActionIf """ if action_if_exists not in ActionIfExists: - raise ValueError('{} is not a valid action_if_exists value. Use the enum ActionIfExists'.format(action_if_exists)) + raise ValueError(f'{action_if_exists} is not a valid action_if_exists value. Use the enum ActionIfExists') if isinstance(claims, Claim): claims = [claims] @@ -112,7 +112,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -278,5 +278,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 0fb7bb15..de6907c2 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -40,7 +40,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -81,7 +81,7 @@ def grammatical_features(self, value): elif isinstance(value, list) or value is None: self.__grammatical_features = value else: - raise TypeError("value must be an int, a str or a list of strings, got ''".format(type(value))) + raise TypeError(f"value must be an int, a str or a list of strings, got ''") @property def claims(self): @@ -110,5 +110,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index 263e656c..ec57ef3d 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -65,7 +65,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -141,5 +141,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index 6a4f4c5c..f6379c63 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -79,5 +79,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index c3b67cb8..ba7aa1af 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -77,7 +77,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -149,5 +149,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py index 95789fd2..ebc0cdc3 100644 --- a/wikibaseintegrator/models/senses.py +++ b/wikibaseintegrator/models/senses.py @@ -36,7 +36,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -72,7 +72,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py index 2c8aaa6d..4b4fd87c 100644 --- a/wikibaseintegrator/models/sitelinks.py +++ b/wikibaseintegrator/models/sitelinks.py @@ -24,7 +24,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -42,5 +42,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index 186f139c..cc5148f2 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -53,7 +53,7 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -151,5 +151,5 @@ def __repr__(self): return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs=" ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index c3dfe498..4d28cf99 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -54,7 +54,7 @@ class ManualInterventionReqException(Exception): def __init__(self, value, property_string, item_list): super().__init__() - self.value = value + ' Property: {}, items affected: {}'.format(property_string, item_list) + self.value = value + f' Property: {property_string}, items affected: {item_list}' def __str__(self): return repr(self.value) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index b2a05f30..5b42a4ba 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -65,8 +65,8 @@ def reconstruct_statements(self, qid: str) -> list: for prop_nr, dt in self.prop_data[qid].items(): # get datatypes for qualifier props - q_props = set(chain(*[[x[0] for x in d['qual']] for d in dt.values()])) - r_props = set(chain(*[set(chain(*[[y[0] for y in x] for x in d['ref'].values()])) for d in dt.values()])) + q_props = set(chain(*([x[0] for x in d['qual']] for d in dt.values()))) + r_props = set(chain(*(set(chain(*([y[0] for y in x] for x in d['ref'].values()))) for d in dt.values()))) props = q_props | r_props for prop in props: if prop not in self.prop_dt_map: @@ -116,7 +116,7 @@ def load_item(self, claims: list, cqid=None) -> bool: if prop_nr not in self.prop_dt_map: if self.debug: - print("{} not found in fastrun".format(prop_nr)) + print(f"{prop_nr} not found in fastrun") self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) self._query_data(prop_nr=prop_nr, use_units=claim.mainsnak.datatype == 'quantity') @@ -124,7 +124,7 @@ def load_item(self, claims: list, cqid=None) -> bool: if self.prop_dt_map[prop_nr] == 'wikibase-item': if not str(current_value).startswith('Q'): - current_value = 'Q{}'.format(current_value) + current_value = f'Q{current_value}' if self.debug: print(current_value) @@ -156,7 +156,7 @@ def load_item(self, claims: list, cqid=None) -> bool: # if not, a write is required no matter what if not len(matching_qids) == 1: if self.debug: - print("no matches ({})".format(len(matching_qids))) + print(f"no matches ({len(matching_qids)})") return True qid = matching_qids.pop() @@ -192,7 +192,7 @@ def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cq # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] if len(comp) != len(app_data): if self.debug: - print("failed append: {}".format(p)) + print(f"failed append: {p}") return True tmp_rs = [x for x in tmp_rs if x.mainsnak.property_number not in append_props and x.mainsnak.property_number in data_props] @@ -235,7 +235,7 @@ def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cq """ if self.debug: - print("bool_vec: {}".format(bool_vec)) + print(f"bool_vec: {bool_vec}") print("-----------------------------------") for x in tmp_rs: if date == x and x.mainsnak.property_number not in del_props: @@ -312,7 +312,7 @@ def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_ty :param action_if_exists: If aliases already exist, APPEND or REPLACE :return: boolean """ - all_lang_strings = set(x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)) + all_lang_strings = {x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)} if action_if_exists == ActionIfExists.REPLACE: return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) @@ -320,7 +320,7 @@ def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_ty for s in lang_data: if s.strip().casefold() not in all_lang_strings: if self.debug: - print("fastrun failed at: {}, string: {}".format(lang_data_type, s)) + print(f"fastrun failed at: {lang_data_type}, string: {s}") return True return False @@ -456,9 +456,9 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: r = execute_sparql_query(query, endpoint=self.sparql_endpoint_url, debug=self.debug)['results']['bindings'] count = int(r[0]['c']['value']) - print("Count: {}".format(count)) + print(f"Count: {count}") num_pages = (int(count) // page_size) + 1 - print("Query {}: {}/{}".format(prop_nr, page_count, num_pages)) + print(f"Query {prop_nr}: {page_count}/{num_pages}") while True: # Query header query = ''' @@ -555,7 +555,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: self.update_frc_from_query(results, prop_nr) page_count += 1 if num_pages: - print("Query {}: {}/{}".format(prop_nr, page_count, num_pages)) + print(f"Query {prop_nr}: {page_count}/{num_pages}") if len(results) == 0 or len(results) < page_size: break @@ -618,7 +618,7 @@ def __repr__(self) -> str: return "<{klass} @{id:x} {attrs}>".format( klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, - attrs="\r\n\t ".join("{}={!r}".format(k, v) for k, v in self.__dict__.items()), + attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) @@ -630,7 +630,7 @@ def freezeargs(func): @wraps(func) def wrapped(*args, **kwargs): - args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args]) + args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args) kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} return func(*args, **kwargs) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 1270dd7f..57d733cf 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -50,11 +50,11 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries try: response = session.request(method, mediawiki_api_url, **kwargs) except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) continue if response.status_code == 503: - print("service unavailable. sleeping for {} seconds".format(retry_after)) + print(f"service unavailable. sleeping for {retry_after} seconds") sleep(retry_after) continue @@ -69,10 +69,10 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries # rate limiting error_msg_names = set() if 'messages' in json_data['error']: - error_msg_names = set(x.get('name') for x in json_data['error']['messages']) + error_msg_names = {x.get('name') for x in json_data['error']['messages']} if 'actionthrottledtext' in error_msg_names: sleep_sec = int(response.headers.get('retry-after', retry_after)) - print("{}: rate limited. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + print(f"{datetime.datetime.utcnow()}: rate limited. sleeping for {sleep_sec} seconds") sleep(sleep_sec) continue @@ -85,13 +85,13 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries sleep_sec = max(sleep_sec, 5) # The number of second can't be more than retry_after sleep_sec = min(sleep_sec, retry_after) - print("{}: maxlag. sleeping for {} seconds".format(datetime.datetime.utcnow(), sleep_sec)) + print(f"{datetime.datetime.utcnow()}: maxlag. sleeping for {sleep_sec} seconds") sleep(sleep_sec) continue # readonly if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': - print('The Wikibase instance is currently in readonly mode, waiting for {} seconds'.format(retry_after)) + print(f'The Wikibase instance is currently in readonly mode, waiting for {retry_after} seconds') sleep(retry_after) continue @@ -202,17 +202,17 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max try: response = requests.post(sparql_endpoint_url, params=params, headers=headers) except requests.exceptions.ConnectionError as e: - print("Connection error: {}. Sleeping for {} seconds.".format(e, retry_after)) + print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) continue if response.status_code == 503: - print("Service unavailable (503). Sleeping for {} seconds".format(retry_after)) + print(f"Service unavailable (503). Sleeping for {retry_after} seconds") sleep(retry_after) continue if response.status_code == 429: if 'retry-after' in response.headers.keys(): retry_after = response.headers['retry-after'] - print("Too Many Requests (429). Sleeping for {} seconds".format(retry_after)) + print(f"Too Many Requests (429). Sleeping for {retry_after} seconds") sleep(retry_after) continue response.raise_for_status() @@ -431,7 +431,7 @@ def format_amount(amount) -> str: # Adding prefix + for positive number and 0 if not str(amount).startswith('+') and float(amount) >= 0: - amount = str('+{}'.format(amount)) + amount = str(f'+{amount}') # return as string return str(amount) @@ -439,7 +439,7 @@ def format_amount(amount) -> str: def get_user_agent(user_agent): from wikibaseintegrator import __version__ - wbi_user_agent = "WikibaseIntegrator/{}".format(__version__) + wbi_user_agent = f"WikibaseIntegrator/{__version__}" if user_agent is None: return_user_agent = wbi_user_agent From b5f0797b1edf2ae074a8183f0bb3916bedc00607 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 18 Sep 2021 21:41:44 +0200 Subject: [PATCH 112/308] Fix missing format value --- wikibaseintegrator/models/forms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index de6907c2..7136bd3f 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -81,7 +81,7 @@ def grammatical_features(self, value): elif isinstance(value, list) or value is None: self.__grammatical_features = value else: - raise TypeError(f"value must be an int, a str or a list of strings, got ''") + raise TypeError(f"value must be an int, a str or a list of strings, got '{type(value)}'") @property def claims(self): From fe87363dc4f310216367e4da723f99160fe4133d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 19 Sep 2021 23:28:15 +0200 Subject: [PATCH 113/308] Change support for entities api parameter It's now optional and pass better between multiple objects Add unit tests --- test/test_all.py | 15 +++++++++++++-- wikibaseintegrator/entities/baseentity.py | 19 ++++++++++++++----- wikibaseintegrator/entities/item.py | 9 ++++----- wikibaseintegrator/entities/lexeme.py | 10 ++++------ wikibaseintegrator/entities/mediainfo.py | 15 +++++++-------- wikibaseintegrator/entities/property.py | 10 ++++------ 6 files changed, 46 insertions(+), 32 deletions(-) diff --git a/test/test_all.py b/test/test_all.py index 42461aa1..e5fc9c11 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -3,12 +3,13 @@ import requests -from wikibaseintegrator import wbi_fastrun, WikibaseIntegrator, datatypes +from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType +from wikibaseintegrator.entities import Item from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent, execute_sparql_query +from wikibaseintegrator.wbi_helpers import execute_sparql_query, get_user_agent, mediawiki_api_call_helper config['DEBUG'] = True @@ -219,3 +220,13 @@ def test_sparql(): SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } }''') assert len(results['results']['bindings']) > 1 + + +def test_wikibaseintegrator(): + nwbi = WikibaseIntegrator(is_bot=False) + assert nwbi.item.api.is_bot is False + assert Item(api=nwbi, is_bot=True).api.is_bot is True + assert Item(api=nwbi).api.is_bot is False + assert Item().api.is_bot is False + assert nwbi.item.get('Q582').api.is_bot is False + assert Item(api=nwbi, is_bot=True).get('Q582').api.is_bot is True diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index ca1e77be..67ad7362 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -1,10 +1,12 @@ +from copy import copy + import simplejson from wikibaseintegrator.datatypes import BaseDataType -from wikibaseintegrator.models.claims import Claims, Claim +from wikibaseintegrator.models.claims import Claim, Claims from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_exceptions import NonUniqueLabelDescriptionPairError, MWApiError +from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError from wikibaseintegrator.wbi_fastrun import FastRunContainer from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper @@ -14,8 +16,15 @@ class BaseEntity: ETYPE = 'base-entity' - def __init__(self, api, lastrevid=None, type=None, id=None, claims=None): - self.api = api + def __init__(self, api=None, lastrevid=None, type=None, id=None, claims=None, is_bot=None, login=None): + if not api: + from wikibaseintegrator import WikibaseIntegrator + self.api = WikibaseIntegrator() + else: + self.api = copy(api) + + self.api.is_bot = is_bot or self.api.is_bot + self.api.login = login or self.api.login self.lastrevid = lastrevid self.type = type or self.ETYPE @@ -78,7 +87,7 @@ def get(self, entity_id, **kwargs): 'format': 'json' } - return self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) + return mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) def clear(self, **kwargs): self._write(clear=True, **kwargs) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index 0feb6bee..0baa3b47 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -12,7 +12,7 @@ class Item(BaseEntity): ETYPE = 'item' - def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: + def __init__(self, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: """ :param api: @@ -22,9 +22,8 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks= :param sitelinks: :param kwargs: """ - self.api = api - super().__init__(api=self.api, **kwargs) + super().__init__(**kwargs) # Item and property specific self.labels = labels or Labels() @@ -35,7 +34,7 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, sitelinks= self.sitelinks = sitelinks or Sitelinks() def new(self, **kwargs) -> Item: - return Item(self.api, **kwargs) + return Item(api=self.api, **kwargs) def get(self, entity_id, **kwargs) -> Item: if isinstance(entity_id, str): @@ -52,7 +51,7 @@ def get(self, entity_id, **kwargs) -> Item: entity_id = f'Q{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) - return Item(self.api).from_json(json_data=json_data['entities'][entity_id]) + return Item(api=self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: return { diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index 11fe5717..2cd4b6aa 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -12,10 +12,8 @@ class Lexeme(BaseEntity): ETYPE = 'lexeme' - def __init__(self, api, lemmas=None, lexical_category=None, language=None, forms=None, senses=None, **kwargs): - self.api = api - - super().__init__(api=self.api, **kwargs) + def __init__(self, lemmas=None, lexical_category=None, language=None, forms=None, senses=None, **kwargs): + super().__init__(**kwargs) self.lemmas = lemmas or Lemmas() self.lexical_category = lexical_category @@ -24,7 +22,7 @@ def __init__(self, api, lemmas=None, lexical_category=None, language=None, forms self.senses = senses or Senses() def new(self, **kwargs) -> Lexeme: - return Lexeme(self.api, **kwargs) + return Lexeme(api=self.api, **kwargs) def get(self, entity_id, **kwargs) -> Lexeme: if isinstance(entity_id, str): @@ -41,7 +39,7 @@ def get(self, entity_id, **kwargs) -> Lexeme: entity_id = f'L{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) - return Lexeme(self.api).from_json(json_data=json_data['entities'][entity_id]) + return Lexeme(api=self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: json_data = { diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index a6e94879..03be3352 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -6,12 +6,13 @@ from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions from wikibaseintegrator.models.labels import Labels +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper class MediaInfo(BaseEntity): ETYPE = 'mediainfo' - def __init__(self, api, labels=None, descriptions=None, aliases=None, **kwargs) -> None: + def __init__(self, labels=None, descriptions=None, aliases=None, **kwargs) -> None: """ :param api: @@ -21,9 +22,7 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, **kwargs) :param sitelinks: :param kwargs: """ - self.api = api - - super().__init__(api=self.api, **kwargs) + super().__init__(**kwargs) # Item and property specific self.labels = labels or Labels() @@ -31,7 +30,7 @@ def __init__(self, api, labels=None, descriptions=None, aliases=None, **kwargs) self.aliases = aliases or Aliases() def new(self, **kwargs) -> MediaInfo: - return MediaInfo(self.api, **kwargs) + return MediaInfo(api=self.api, **kwargs) def get(self, entity_id, **kwargs) -> MediaInfo: if isinstance(entity_id, str): @@ -48,7 +47,7 @@ def get(self, entity_id, **kwargs) -> MediaInfo: entity_id = f'M{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) - return MediaInfo(self.api).from_json(json_data=json_data['entities'][entity_id]) + return MediaInfo(api=self.api).from_json(json_data=json_data['entities'][entity_id]) def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: params = { @@ -58,14 +57,14 @@ def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: 'format': 'json' } - json_data = self.api.helpers.mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) + json_data = mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) if len(json_data['entities'].keys()) == 0: raise Exception('Title not found') if len(json_data['entities'].keys()) > 1: raise Exception('More than one element for this title') - return MediaInfo(self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) + return MediaInfo(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) def get_json(self) -> {}: return { diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 8c2233c5..b5b659a1 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -11,10 +11,8 @@ class Property(BaseEntity): ETYPE = 'property' - def __init__(self, api, datatype=None, labels=None, descriptions=None, aliases=None, **kwargs): - self.api = api - - super().__init__(api=api, **kwargs) + def __init__(self, datatype=None, labels=None, descriptions=None, aliases=None, **kwargs): + super().__init__(**kwargs) self.json = None @@ -27,7 +25,7 @@ def __init__(self, api, datatype=None, labels=None, descriptions=None, aliases=N self.aliases = aliases or Aliases() def new(self, **kwargs) -> Property: - return Property(self.api, **kwargs) + return Property(api=self.api, **kwargs) def get(self, entity_id, **kwargs) -> Property: if isinstance(entity_id, str): @@ -44,7 +42,7 @@ def get(self, entity_id, **kwargs) -> Property: entity_id = f'P{entity_id}' json_data = super().get(entity_id=entity_id, **kwargs) - return Property(self.api).from_json(json_data=json_data['entities'][entity_id]) + return Property(api=self.api).from_json(json_data=json_data['entities'][entity_id]) def get_json(self) -> {}: return { From 682fea4e3bc2e11662657a7ca5f743b5aca58d14 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 19 Sep 2021 23:42:19 +0200 Subject: [PATCH 114/308] Remove wikibaseintegrator.wbi_helpers Directly call functions from wbi_helpers.py --- wikibaseintegrator/wikibaseintegrator.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index bdb75e43..781d7e7c 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -1,4 +1,3 @@ -from wikibaseintegrator import wbi_helpers from wikibaseintegrator.entities.item import Item from wikibaseintegrator.entities.lexeme import Lexeme from wikibaseintegrator.entities.mediainfo import MediaInfo @@ -19,6 +18,3 @@ def __init__(self, self.property = Property(api=self) self.lexeme = Lexeme(api=self) self.mediainfo = MediaInfo(api=self) - - # Helpers - self.helpers = wbi_helpers From f4c9f915a6a5abdc55b05760841e79f4b81be8ff Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 19 Sep 2021 23:42:38 +0200 Subject: [PATCH 115/308] Add isort --- setup.cfg | 3 +++ test/test_entity_item.py | 1 - test/test_wbi_core.py | 8 ++++---- test/test_wbi_exceptions.py | 4 ++-- test/test_wbi_fastrun.py | 4 ++-- wikibaseintegrator/datatypes/basedatatype.py | 2 +- wikibaseintegrator/models/__init__.py | 16 ++++++++-------- wikibaseintegrator/models/references.py | 2 +- wikibaseintegrator/wbi_fastrun.py | 2 +- wikibaseintegrator/wbi_login.py | 2 +- 10 files changed, 23 insertions(+), 21 deletions(-) diff --git a/setup.cfg b/setup.cfg index ec85261b..288f1869 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,3 +44,6 @@ dev = pytest coverage = pytest-cov + +[isort] +line_length = 179 diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 6149d68d..33c2f410 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -1,5 +1,4 @@ import unittest -from pprint import pprint from simplejson import JSONDecodeError diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 3b8c14be..c0ddea30 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -1,14 +1,14 @@ import unittest from copy import deepcopy -from wikibaseintegrator import datatypes, WikibaseIntegrator -from wikibaseintegrator.datatypes import String, Math, ExternalID, Time, URL, MonolingualText, Quantity, CommonsMedia, GlobeCoordinate, GeoShape, Property, TabularData, \ - MusicalNotation, Lexeme, Form, Sense +from wikibaseintegrator import WikibaseIntegrator, datatypes +from wikibaseintegrator.datatypes import (URL, CommonsMedia, ExternalID, Form, GeoShape, GlobeCoordinate, Lexeme, Math, MonolingualText, MusicalNotation, Property, Quantity, + Sense, String, TabularData, Time) from wikibaseintegrator.datatypes.extra import EDTF, LocalMedia from wikibaseintegrator.entities import Item from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank, WikibaseSnakType -from wikibaseintegrator.wbi_helpers import search_entities, generate_entity_instances +from wikibaseintegrator.wbi_helpers import generate_entity_instances, search_entities wbi = WikibaseIntegrator() diff --git a/test/test_wbi_exceptions.py b/test/test_wbi_exceptions.py index 7617ab43..85c1f5c4 100644 --- a/test/test_wbi_exceptions.py +++ b/test/test_wbi_exceptions.py @@ -1,5 +1,5 @@ -from wikibaseintegrator.wbi_exceptions import NonUniqueLabelDescriptionPairError, IDMissingError, SearchError, ManualInterventionReqException, CorePropIntegrityException, \ - MergeError, MWApiError +from wikibaseintegrator.wbi_exceptions import (CorePropIntegrityException, IDMissingError, ManualInterventionReqException, MergeError, MWApiError, + NonUniqueLabelDescriptionPairError, SearchError) def test_mwapierror(): diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index b7e7e36e..84639e4e 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -1,5 +1,5 @@ -from wikibaseintegrator import wbi_fastrun, WikibaseIntegrator -from wikibaseintegrator.datatypes import BaseDataType, Item, ExternalID +from wikibaseintegrator import WikibaseIntegrator, wbi_fastrun +from wikibaseintegrator.datatypes import BaseDataType, ExternalID, Item from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 6e6add0c..6804b1eb 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -1,4 +1,4 @@ -from wikibaseintegrator.models import Claim, Snak, Snaks, References, Reference +from wikibaseintegrator.models import Claim, Reference, References, Snak, Snaks from wikibaseintegrator.wbi_enums import WikibaseSnakType diff --git a/wikibaseintegrator/models/__init__.py b/wikibaseintegrator/models/__init__.py index baadbeb0..9fdb50da 100644 --- a/wikibaseintegrator/models/__init__.py +++ b/wikibaseintegrator/models/__init__.py @@ -1,11 +1,11 @@ -from .aliases import Aliases, Alias -from .claims import Claims, Claim +from .aliases import Alias, Aliases +from .claims import Claim, Claims from .descriptions import Descriptions -from .forms import Forms, Form +from .forms import Form, Forms from .labels import Labels -from .language_values import LanguageValues, LanguageValue +from .language_values import LanguageValue, LanguageValues from .lemmas import Lemmas -from .references import References, Reference -from .senses import Senses, Sense, Glosses -from .sitelinks import Sitelinks, Sitelink -from .snaks import Snaks, Snak +from .references import Reference, References +from .senses import Glosses, Sense, Senses +from .sitelinks import Sitelink, Sitelinks +from .snaks import Snak, Snaks diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index ba7aa1af..9a320654 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -1,6 +1,6 @@ from __future__ import annotations -from wikibaseintegrator.models.snaks import Snaks, Snak +from wikibaseintegrator.models.snaks import Snak, Snaks from wikibaseintegrator.wbi_enums import ActionIfExists diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 5b42a4ba..877fbbbf 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -9,7 +9,7 @@ from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_helpers import format_amount, execute_sparql_query +from wikibaseintegrator.wbi_helpers import execute_sparql_query, format_amount fastrun_store = [] diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 7fe1fc63..b65db775 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -4,7 +4,7 @@ import requests from mwoauth import ConsumerToken, Handshaker, OAuthException from oauthlib.oauth2 import BackendApplicationClient, InvalidClientError -from requests_oauthlib import OAuth1, OAuth2Session, OAuth2 +from requests_oauthlib import OAuth1, OAuth2, OAuth2Session from wikibaseintegrator.wbi_backoff import wbi_backoff from wikibaseintegrator.wbi_config import config From 837bd0e2aea55fbdd94925e810327a96461a5700 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 20 Sep 2021 00:00:59 +0200 Subject: [PATCH 116/308] Remove useless BaseEntity.json --- wikibaseintegrator/entities/baseentity.py | 4 ---- wikibaseintegrator/entities/property.py | 2 -- 2 files changed, 6 deletions(-) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 67ad7362..78ec174a 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -31,8 +31,6 @@ def __init__(self, api=None, lastrevid=None, type=None, id=None, claims=None, is self.id = id self.claims = claims or Claims() - self.json = {} - self.fast_run_container = None self.debug = config['DEBUG'] @@ -61,8 +59,6 @@ def get_json(self) -> {}: return json_data def from_json(self, json_data): - self.json = json_data - if 'missing' in json_data: raise ValueError('Entity is nonexistent') diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index b5b659a1..776c55f1 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -14,8 +14,6 @@ class Property(BaseEntity): def __init__(self, datatype=None, labels=None, descriptions=None, aliases=None, **kwargs): super().__init__(**kwargs) - self.json = None - # Property specific self.datatype = datatype From dd488675cef12cb77233114efae95d44ebc1e8a9 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 21 Sep 2021 21:00:57 +0200 Subject: [PATCH 117/308] Improve unit tests for wbi_helpers --- test/test_all.py | 43 -------------------------------- test/test_wbi_helpers.py | 54 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 43 deletions(-) create mode 100644 test/test_wbi_helpers.py diff --git a/test/test_all.py b/test/test_all.py index e5fc9c11..c8a43050 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -1,32 +1,17 @@ import copy import unittest -import requests - from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.entities import Item -from wikibaseintegrator.entities.baseentity import MWApiError from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -from wikibaseintegrator.wbi_helpers import execute_sparql_query, get_user_agent, mediawiki_api_call_helper config['DEBUG'] = True wbi = WikibaseIntegrator() -class TestMediawikiApiCall(unittest.TestCase): - def test_all(self): - with self.assertRaises(MWApiError): - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, - retry_after=1, allow_anonymous=True) - with self.assertRaises(requests.HTTPError): - mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) - - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) - - class TestDataType(unittest.TestCase): def test_quantity(self): dt = datatypes.Quantity(amount='34.5', prop_nr='P43') @@ -194,34 +179,6 @@ def test_mediainfo(): assert mediainfo_item_by_id.id == 'M75908279' -def test_user_agent(capfd): - # Test there is a warning - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) - out, err = capfd.readouterr() - assert out - - # Test there is no warning because of the user agent - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, user_agent='MyWikibaseBot/0.5') - out, err = capfd.readouterr() - assert not out - - # Test if the user agent is correctly added - new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') - assert new_user_agent.startswith('MyWikibaseBot/0.5') - assert 'WikibaseIntegrator' in new_user_agent - - -def test_sparql(): - results = execute_sparql_query('''SELECT ?child ?childLabel -WHERE -{ -# ?child father Bach - ?child wdt:P22 wd:Q1339. - SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } -}''') - assert len(results['results']['bindings']) > 1 - - def test_wikibaseintegrator(): nwbi = WikibaseIntegrator(is_bot=False) assert nwbi.item.api.is_bot is False diff --git a/test/test_wbi_helpers.py b/test/test_wbi_helpers.py new file mode 100644 index 00000000..d361164b --- /dev/null +++ b/test/test_wbi_helpers.py @@ -0,0 +1,54 @@ +import unittest + +import requests + +from wikibaseintegrator.wbi_exceptions import MWApiError +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent, execute_sparql_query + + +def test_connection(): + with unittest.TestCase().assertRaises(MWApiError): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, + retry_after=1, allow_anonymous=True) + with unittest.TestCase().assertRaises(requests.HTTPError): + mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) + + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + + +def test_user_agent(capfd): + # Test there is a warning + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + out, err = capfd.readouterr() + assert out + + # Test there is no warning because of the user agent + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, user_agent='MyWikibaseBot/0.5') + out, err = capfd.readouterr() + assert not out + + # Test if the user agent is correctly added + new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') + assert new_user_agent.startswith('MyWikibaseBot/0.5') + assert 'WikibaseIntegrator' in new_user_agent + + +def test_allow_anonymous(): + # Test there is a warning because of allow_anonymous + with unittest.TestCase().assertRaises(ValueError): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, user_agent='MyWikibaseBot/0.5') + + # Test there is no warning because of allow_anonymous + assert mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, + user_agent='MyWikibaseBot/0.5') + + +def test_sparql(): + results = execute_sparql_query('''SELECT ?child ?childLabel +WHERE +{ +# ?child father Bach + ?child wdt:P22 wd:Q1339. + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } +}''') + assert len(results['results']['bindings']) > 1 From 6036f281d760c6452618040f02590ced927e55ad Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 21 Sep 2021 21:01:11 +0200 Subject: [PATCH 118/308] Add no coverage for untestable code --- wikibaseintegrator/wbi_helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 57d733cf..ac0c990d 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -53,7 +53,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) continue - if response.status_code == 503: + if response.status_code == 503: # pragma: no cover print(f"service unavailable. sleeping for {retry_after} seconds") sleep(retry_after) continue @@ -70,7 +70,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries error_msg_names = set() if 'messages' in json_data['error']: error_msg_names = {x.get('name') for x in json_data['error']['messages']} - if 'actionthrottledtext' in error_msg_names: + if 'actionthrottledtext' in error_msg_names: # pragma: no cover sleep_sec = int(response.headers.get('retry-after', retry_after)) print(f"{datetime.datetime.utcnow()}: rate limited. sleeping for {sleep_sec} seconds") sleep(sleep_sec) @@ -90,7 +90,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries continue # readonly - if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': + if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': # pragma: no cover print(f'The Wikibase instance is currently in readonly mode, waiting for {retry_after} seconds') sleep(retry_after) continue From 4764ce2c44b219643f64a05475c3bf2d01a4ba0d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 25 Sep 2021 10:38:55 +0200 Subject: [PATCH 119/308] Clean docstring --- wikibaseintegrator/datatypes/basedatatype.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 6804b1eb..4f0393cc 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -1,5 +1,4 @@ from wikibaseintegrator.models import Claim, Reference, References, Snak, Snaks -from wikibaseintegrator.wbi_enums import WikibaseSnakType class BaseDataType(Claim): @@ -18,24 +17,7 @@ def __init__(self, prop_nr=None, **kwargs): """ Constructor, will be called by all data types. - :param value: Data value of the Wikibase data snak - :type value: str or int or tuple :param prop_nr: The property number a Wikibase snak belongs to - :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int - :param datatype: The Wikibase data type declaration of this snak - :type datatype: str - :param snaktype: One of the values in the enum WikibaseSnakValueType denoting the state of the value: - KNOWN_VALUE, NO_VALUE or UNKNOWN_VALUE - :type snaktype: WikibaseSnakType - :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, - e.g. references=[[, ], []] - This will create two references, the first one with two statements, the second with one - :type references: A one level nested list with instances of BaseDataType or children of it. - :param qualifiers: A list of qualifiers for the Wikibase mainsnak - :type qualifiers: A list with instances of BaseDataType or children of it. - :param rank: The rank of a Wikibase mainsnak, should determine the status of a value - :type rank: A string of one of three allowed values: 'normal', 'deprecated', 'preferred' - :return: """ super().__init__(**kwargs) From 0c77d514a26443f5442de545f3718a50c9e0eabd Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 26 Sep 2021 01:23:01 +0200 Subject: [PATCH 120/308] Add retry for HTTP Code 500, 502 and 504 (#233) * Add retry for HTTP Code 500, 502 and 504 Add unit tests Fix #232 * Add the same for sparql requests --- test/test_wbi_helpers.py | 24 +++++++++++++++++++----- wikibaseintegrator/wbi_helpers.py | 10 +++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/test/test_wbi_helpers.py b/test/test_wbi_helpers.py index d361164b..6a804314 100644 --- a/test/test_wbi_helpers.py +++ b/test/test_wbi_helpers.py @@ -7,13 +7,27 @@ def test_connection(): + data = {'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'} + + mediawiki_api_call_helper(data=data, max_retries=2, retry_after=1, allow_anonymous=True) + with unittest.TestCase().assertRaises(MWApiError): - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=3, - retry_after=1, allow_anonymous=True) - with unittest.TestCase().assertRaises(requests.HTTPError): - mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=2, retry_after=1, allow_anonymous=True) - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + with unittest.TestCase().assertRaises(MWApiError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/500", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MWApiError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/502", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MWApiError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/503", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MWApiError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/504", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(requests.HTTPError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/400", max_retries=2, retry_after=1, allow_anonymous=True) def test_user_agent(capfd): diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index ac0c990d..9b3e1a89 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -21,7 +21,7 @@ class BColors: UNDERLINE = '\033[4m' -def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): +def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=100, retry_after=60, **kwargs): """ :param method: 'GET' or 'POST' :param mediawiki_api_url: @@ -53,8 +53,8 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) continue - if response.status_code == 503: # pragma: no cover - print(f"service unavailable. sleeping for {retry_after} seconds") + if response.status_code in (500, 502, 503, 504): + print(f"Service unavailable (HTTP Code {response.status_code}). Sleeping for {retry_after} seconds.") sleep(retry_after) continue @@ -205,8 +205,8 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) continue - if response.status_code == 503: - print(f"Service unavailable (503). Sleeping for {retry_after} seconds") + if response.status_code in (500, 502, 503, 504): + print(f"Service unavailable (HTTP Code {response.status_code}). Sleeping for {retry_after} seconds.") sleep(retry_after) continue if response.status_code == 429: From 39c485cbe7fbb8792d6c7881ca755f2defd3cb59 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 26 Sep 2021 10:13:31 +0200 Subject: [PATCH 121/308] Use mypy to setup static typing (#141) * Add static typing --- .coveragerc | 3 + .github/workflows/python-lint.yml | 38 ++++ .../{python-package.yml => python-pytest.yml} | 78 ++++----- .idea/WikibaseIntegrator.iml | 1 + pyproject.toml | 10 ++ setup.cfg | 6 +- test/test_all.py | 11 +- test/test_wbi_fastrun.py | 34 ++-- test/test_wbi_helpers.py | 2 +- wikibaseintegrator/datatypes/basedatatype.py | 77 ++------ wikibaseintegrator/datatypes/form.py | 7 +- wikibaseintegrator/datatypes/geoshape.py | 3 +- .../datatypes/globecoordinate.py | 28 ++- wikibaseintegrator/datatypes/item.py | 16 +- wikibaseintegrator/datatypes/lexeme.py | 16 +- .../datatypes/monolingualtext.py | 24 +-- wikibaseintegrator/datatypes/property.py | 15 +- wikibaseintegrator/datatypes/quantity.py | 26 +-- wikibaseintegrator/datatypes/sense.py | 16 +- wikibaseintegrator/datatypes/string.py | 15 +- wikibaseintegrator/datatypes/tabulardata.py | 19 +- wikibaseintegrator/datatypes/time.py | 25 +-- wikibaseintegrator/datatypes/url.py | 15 +- wikibaseintegrator/entities/baseentity.py | 67 ++++--- wikibaseintegrator/entities/item.py | 21 +-- wikibaseintegrator/entities/lexeme.py | 34 ++-- wikibaseintegrator/entities/mediainfo.py | 27 +-- wikibaseintegrator/entities/property.py | 22 +-- wikibaseintegrator/models/aliases.py | 28 +-- wikibaseintegrator/models/claims.py | 136 ++++++++++----- wikibaseintegrator/models/forms.py | 56 ++++-- wikibaseintegrator/models/language_values.py | 44 +++-- wikibaseintegrator/models/qualifiers.py | 17 +- wikibaseintegrator/models/references.py | 29 +-- wikibaseintegrator/models/senses.py | 44 +++-- wikibaseintegrator/models/sitelinks.py | 17 +- wikibaseintegrator/models/snaks.py | 18 +- wikibaseintegrator/wbi_config.py | 5 +- wikibaseintegrator/wbi_exceptions.py | 10 +- wikibaseintegrator/wbi_fastrun.py | 85 ++++----- wikibaseintegrator/wbi_helpers.py | 165 ++++++++---------- wikibaseintegrator/wbi_login.py | 52 +++--- wikibaseintegrator/wikibaseintegrator.py | 5 +- 43 files changed, 688 insertions(+), 679 deletions(-) create mode 100644 .github/workflows/python-lint.yml rename .github/workflows/{python-package.yml => python-pytest.yml} (94%) diff --git a/.coveragerc b/.coveragerc index 5599afe2..11f0f2ae 100644 --- a/.coveragerc +++ b/.coveragerc @@ -20,4 +20,7 @@ exclude_lines = if 0: if __name__ == .__main__.: +# Don't complain about TYPE_CHECKING specific imports: + if TYPE_CHECKING: + ignore_errors = True diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 00000000..3031d17d --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,38 @@ +name: Python Code Quality and Lint + +on: + push: + branches: [ master ] + pull_request: + branches: [ '**' ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: '3.9' + - name: Cache pip + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + - name: Upgrade setup tools + run: python -m pip install --upgrade pip setuptools + - name: Install dependencies + run: | + pip install . .[dev] + mypy --install-types --non-interactive + - name: isort imports check + run: isort --check --diff wikibaseintegrator test + - name: mypy typing check + run: mypy + - name: pylint code linting + run: pylint wikibaseintegrator test || pylint-exit $? diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-pytest.yml similarity index 94% rename from .github/workflows/python-package.yml rename to .github/workflows/python-pytest.yml index 8cc77686..dca4ef08 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-pytest.yml @@ -1,39 +1,39 @@ -name: Python package - -on: - push: - branches: [ master ] - pull_request: - branches: [ '**' ] - -jobs: - build: - name: pytest ${{ matrix.python-version }} - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10-dev' ] - - steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Cache pip - uses: actions/cache@v2 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - ${{ runner.os }}- - - name: Upgrade setup tools - run: | - python -m pip install --upgrade pip setuptools - - name: Install dependencies - run: | - python -m pip install .[dev] - - name: Test with pytest - run: | - pytest +name: Python pytest + +on: + push: + branches: [ master ] + pull_request: + branches: [ '**' ] + +jobs: + build: + name: pytest ${{ matrix.python-version }} + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [ '3.7', '3.8', '3.9', '3.10-dev' ] + + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Cache pip + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + - name: Upgrade setup tools + run: | + python -m pip install --upgrade pip setuptools + - name: Install dependencies + run: | + python -m pip install .[dev] + - name: Test with pytest + run: | + pytest diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index 2210e94f..47cb73f0 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -11,6 +11,7 @@ + diff --git a/pyproject.toml b/pyproject.toml index fa2a8b22..4913d438 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,13 @@ requires = [ "wheel" ] build-backend = "setuptools.build_meta" + +[tool.isort] +line_length = 179 + +[tool.mypy] +ignore_missing_imports = true +files = "wikibaseintegrator/**/*.py,test/*.py" + +[tool.pylint.format] +max-line-length = 179 diff --git a/setup.cfg b/setup.cfg index 288f1869..4c68650c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -42,8 +42,8 @@ python_requires = >=3.7, <3.11 [options.extras_require] dev = pytest + pylint + pylint-exit + mypy coverage = pytest-cov - -[isort] -line_length = 179 diff --git a/test/test_all.py b/test/test_all.py index c8a43050..07cb85b8 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -39,16 +39,13 @@ def test_geoshape(self): dt_json = dt.get_json() - if not dt_json['mainsnak']['datatype'] == 'geo-shape': - raise + assert dt_json['mainsnak']['datatype'] == 'geo-shape' value = dt_json['mainsnak']['datavalue'] - if not value['value'] == 'Data:Inner_West_Light_Rail_stops.map': - raise + assert value['value'] == 'Data:Inner_West_Light_Rail_stops.map' - if not value['type'] == 'string': - raise + assert value['type'] == 'string' class TestFastRun(unittest.TestCase): @@ -172,7 +169,7 @@ def test_ref_equals(): def test_mediainfo(): - mediainfo_item_by_title = wbi.mediainfo.get_by_title(title='File:2018-07-05-budapest-buda-hill.jpg', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') + mediainfo_item_by_title = wbi.mediainfo.get_by_title(titles='File:2018-07-05-budapest-buda-hill.jpg', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') assert mediainfo_item_by_title.id == 'M75908279' mediainfo_item_by_id = wbi.mediainfo.get(entity_id='M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 84639e4e..fa4305d7 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -1,3 +1,6 @@ +from collections import defaultdict +from typing import Any + from wikibaseintegrator import WikibaseIntegrator, wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType, ExternalID, Item from wikibaseintegrator.wbi_config import config @@ -92,7 +95,7 @@ def test_query_data_ref(): class FastRunContainerFakeQueryDataEnsembl(wbi_fastrun.FastRunContainer): - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { @@ -102,11 +105,12 @@ def __init__(self, *args, **kwargs): ('P248', 'Q29458763'), # stated in ensembl Release 88 ('P594', 'ENSG00000123374')}}, 'v': 'ENSG00000123374'}}} - self.rev_lookup = {'ENSG00000123374': {'Q14911732'}} + self.rev_lookup = defaultdict(set) + self.rev_lookup['ENSG00000123374'].add('Q14911732') class FastRunContainerFakeQueryDataEnsemblNoRef(wbi_fastrun.FastRunContainer): - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { @@ -114,26 +118,25 @@ def __init__(self, *args, **kwargs): 'qual': set(), 'ref': dict(), 'v': 'ENSG00000123374'}}} - self.rev_lookup = {'ENSG00000123374': {'Q14911732'}} + self.rev_lookup = defaultdict(set) + self.rev_lookup['ENSG00000123374'].add('Q14911732') def test_fastrun_ref_ensembl(): # fastrun checks refs frc = FastRunContainerFakeQueryDataEnsembl(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) + frc.debug = True # statement has no ref - frc.debug = True statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')] assert frc.write_required(data=statements) # statement has the same ref - statements = [ - ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q29458763", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q29458763", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])] assert not frc.write_required(data=statements) # new statement has an different stated in - statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', - references=[[Item("Q99999999999", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594", )]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q99999999999", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594", )]])] assert frc.write_required(data=statements) # fastrun don't check references, statement has no reference, @@ -149,15 +152,16 @@ def test_fastrun_ref_ensembl(): class FakeQueryDataAppendProps(wbi_fastrun.FastRunContainer): # an item with three values for the same property - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) self.debug = True self.prop_dt_map = {'P527': 'wikibase-item', 'P248': 'wikibase-item', 'P594': 'external-id'} - self.rev_lookup = { - 'Q24784025': {'Q3402672'}, - 'Q24743729': {'Q3402672'}, - 'Q24782625': {'Q3402672'}, - } + + self.rev_lookup = defaultdict(set) + self.rev_lookup['Q24784025'].add('Q3402672') + self.rev_lookup['Q24743729'].add('Q3402672') + self.rev_lookup['Q24782625'].add('Q3402672') + self.prop_data['Q3402672'] = {'P527': { 'Q3402672-11BA231B-857B-498B-AC4F-91D71EE007FD': {'qual': set(), 'ref': { diff --git a/test/test_wbi_helpers.py b/test/test_wbi_helpers.py index 6a804314..f29c15b8 100644 --- a/test/test_wbi_helpers.py +++ b/test/test_wbi_helpers.py @@ -3,7 +3,7 @@ import requests from wikibaseintegrator.wbi_exceptions import MWApiError -from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper, get_user_agent, execute_sparql_query +from wikibaseintegrator.wbi_helpers import execute_sparql_query, get_user_agent, mediawiki_api_call_helper def test_connection(): diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 4f0393cc..8e7f3bce 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -1,4 +1,8 @@ -from wikibaseintegrator.models import Claim, Reference, References, Snak, Snaks +from __future__ import annotations + +from typing import Any, List, Type, Union + +from wikibaseintegrator.models import Claim class BaseDataType(Claim): @@ -6,14 +10,15 @@ class BaseDataType(Claim): The base class for all Wikibase data types, they inherit from it """ DTYPE = 'base-data-type' - sparql_query = ''' + subclasses: List[Type[BaseDataType]] = [] + sparql_query: str = ''' SELECT * WHERE {{ ?item_id <{wb_url}/prop/{pid}> ?s . ?s <{wb_url}/prop/statement/{pid}> '{value}' . }} ''' - def __init__(self, prop_nr=None, **kwargs): + def __init__(self, prop_nr: Union[int, str] = None, **kwargs: Any): """ Constructor, will be called by all data types. @@ -22,65 +27,13 @@ def __init__(self, prop_nr=None, **kwargs): super().__init__(**kwargs) - if isinstance(self.references, Reference): - self.references = References().add(self.references) - elif isinstance(self.references, list): - references = References() - for ref_list in self.references: - reference = Reference() - if isinstance(ref_list, list): - snaks = Snaks() - for ref_claim in ref_list: - if isinstance(ref_claim, Claim): - snaks.add(Snak().from_json(ref_claim.get_json()['mainsnak'])) - references.add(reference=reference) - else: - raise ValueError - reference.snaks = snaks - elif isinstance(ref_list, Claim): - reference.snaks = Snaks().add(Snak().from_json(ref_list.get_json()['mainsnak'])) - elif isinstance(ref_list, Reference): - reference = ref_list - references.add(reference=reference) - self.references = references - self.mainsnak.property_number = prop_nr or None + # self.subclasses.append(self) - def get_sparql_value(self): - return self.mainsnak.datavalue['value'] - - def equals(self, that, include_ref=False, fref=None): - """ - Tests for equality of two statements. - If comparing references, the order of the arguments matters!!! - self is the current statement, the next argument is the new statement. - Allows passing in a function to use to compare the references 'fref'. Default is equality. - fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, - where each reference is a list of statements - """ - - if not include_ref: - # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers - return self == that - - if self != that: - return False + # Allow registration of subclasses of BaseDataType into BaseDataType.subclasses + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + cls.subclasses.append(cls) - if fref is None: - return BaseDataType.refs_equal(self, that) - - return fref(self, that) - - @staticmethod - def refs_equal(olditem, newitem): - """ - tests for exactly identical references - """ - - oldrefs = olditem.references - newrefs = newitem.references - - def ref_equal(oldref, newref): - return (len(oldref) == len(newref)) and all(x in oldref for x in newref) - - return len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs) + def _get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value'] diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index ab4f9884..7af179bc 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -1,4 +1,5 @@ import re +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,12 +16,12 @@ class Form(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + :type value: str with the format "L-F" :param prop_nr: The property number for this claim :type prop_nr: str with a 'P' prefix followed by digits :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' @@ -52,5 +53,5 @@ def __init__(self, value=None, **kwargs): 'type': 'wikibase-entityid' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 3aff16fe..34ded741 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -1,4 +1,5 @@ import re +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,7 +16,7 @@ class GeoShape(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index cc02f219..35483e09 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -1,3 +1,5 @@ +from typing import Any + from wikibaseintegrator.datatypes.basedatatype import BaseDataType from wikibaseintegrator.wbi_config import config @@ -14,33 +16,23 @@ class GlobeCoordinate(BaseDataType): }} ''' - def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wikibase_url=None, **kwargs): + def __init__(self, latitude: float = None, longitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param latitude: Latitute in decimal format - :type latitude: float or None :param longitude: Longitude in decimal format - :type longitude: float or None :param precision: Precision of the position measurement, default 1 / 3600 - :type precision: float or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str + :param globe: The globe entity concept URI (ex: http://www.wikidata.org/entity/Q2) or 'Q2' + :param wikibase_url: The default wikibase URL, used when the globe is only an ID like 'Q2'. Use wbi_config['WIKIBASE_URL'] by default. """ super().__init__(**kwargs) - precision = precision or 1 / 3600 # https://github.com/wikimedia/Wikibase/blob/174450de8fdeabcf97287604dbbf04d07bb5000c/repo/includes/Rdf/Values/GlobeCoordinateRdfBuilder.php#L120 - globe = globe or config['COORDINATE_GLOBE_QID'] - wikibase_url = wikibase_url or config['WIKIBASE_URL'] + # https://github.com/wikimedia/Wikibase/blob/174450de8fdeabcf97287604dbbf04d07bb5000c/repo/includes/Rdf/Values/GlobeCoordinateRdfBuilder.php#L120 + precision = precision or 1 / 3600 + globe = globe or str(config['COORDINATE_GLOBE_QID']) + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) if globe.startswith('Q'): globe = wikibase_url + '/entity/' + globe @@ -64,5 +56,5 @@ def __init__(self, latitude=None, longitude=None, precision=None, globe=None, wi 'type': 'globecoordinate' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return 'Point(' + str(self.mainsnak.datavalue['value']['latitude']) + ', ' + str(self.mainsnak.datavalue['value']['longitude']) + ')' diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 6459d837..c9690446 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -1,4 +1,5 @@ import re +from typing import Any, Union from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,22 +16,11 @@ class Item(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: The item ID to serve as the value - :type value: str with a 'Q' prefix, followed by several digits or only the digits without the 'Q' prefix - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) @@ -56,5 +46,5 @@ def __init__(self, value=None, **kwargs): 'type': 'wikibase-entityid' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 09e66968..bb8eb622 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -1,4 +1,5 @@ import re +from typing import Any, Union from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,22 +16,11 @@ class Lexeme(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: The lexeme number to serve as a value - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) @@ -56,5 +46,5 @@ def __init__(self, value=None, **kwargs): 'type': 'wikibase-entityid' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index 14cb4280..a5e9447b 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -1,3 +1,5 @@ +from typing import Any + from wikibaseintegrator.datatypes.basedatatype import BaseDataType from wikibaseintegrator.wbi_config import config @@ -14,29 +16,17 @@ class MonolingualText(BaseDataType): }} ''' - def __init__(self, text=None, language=None, **kwargs): + def __init__(self, text: str = None, language: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType - :param text: The language specific string to be used as the value - :type text: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param language: Specifies the language the value belongs to - :type language: str - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str + :param text: The language specific string to be used as the value. + :param language: Specifies the language the value belongs to. """ super().__init__(**kwargs) - language = language or config['DEFAULT_LANGUAGE'] + language = language or str(config['DEFAULT_LANGUAGE']) assert isinstance(text, str) or text is None, f"Expected str, found {type(text)} ({text})" assert isinstance(language, str), f"Expected str, found {type(language)} ({language})" @@ -50,5 +40,5 @@ def __init__(self, text=None, language=None, **kwargs): 'type': 'monolingualtext' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language'] diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index 184aa04f..6069459a 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -1,4 +1,5 @@ import re +from typing import Any, Union from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,22 +16,12 @@ class Property(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: The property number to serve as a value :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) @@ -56,5 +47,5 @@ def __init__(self, value=None, **kwargs): 'type': 'wikibase-entityid' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 09924853..014c8c3c 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -1,3 +1,5 @@ +from typing import Any, Union + from wikibaseintegrator.datatypes.basedatatype import BaseDataType from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_helpers import format_amount @@ -15,36 +17,24 @@ class Quantity(BaseDataType): }} ''' - def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): + def __init__(self, amount: Union[str, int, float] = None, upper_bound: Union[str, int, float] = None, lower_bound: Union[str, int, float] = None, unit: Union[str, int] = '1', + wikibase_url: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param amount: The amount value - :type amount: float, str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations - :type upper_bound: float, str :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations - :type lower_bound: float, str :param unit: The unit item URL or the QID a certain amount has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). The default is dimensionless, represented by a '1' - :type unit: str - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str + :param wikibase_url: The default wikibase URL, used when the unit is only an ID like 'Q2'. Use wbi_config['WIKIBASE_URL'] by default. """ super().__init__(**kwargs) - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) - unit = unit or '1' + unit = str(unit or '1') if unit.startswith('Q'): unit = wikibase_url + '/entity/' + unit @@ -88,5 +78,5 @@ def __init__(self, amount=None, upper_bound=None, lower_bound=None, unit='1', wi if not lower_bound: del self.mainsnak.datavalue['value']['lowerBound'] - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return format_amount(self.mainsnak.datavalue['value']['amount']) diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 78cbddcc..7570325b 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -1,4 +1,5 @@ import re +from typing import Any, Union from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,22 +16,11 @@ class Sense(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: Value using the format "L-S" (example: L252248-S123) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) @@ -52,5 +42,5 @@ def __init__(self, value=None, **kwargs): 'type': 'wikibase-entityid' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index 2b721b53..ee5a760d 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -1,3 +1,5 @@ +from typing import Any + from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -8,22 +10,11 @@ class String(BaseDataType): DTYPE = 'string' - def __init__(self, value=None, **kwargs): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index decdba17..efe5eb1d 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -1,4 +1,5 @@ import re +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -9,27 +10,11 @@ class TabularData(BaseDataType): """ DTYPE = 'tabular-data' - def __init__(self, value=None, **kwargs): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param value: Reference to tabular data file on Wikimedia Commons. - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - :raises ValueError: Raise a ValueError Exception if the data in value parameter is not valid. """ super().__init__(**kwargs) diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 8178e872..52c4ed3c 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -1,5 +1,6 @@ import datetime import re +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType from wikibaseintegrator.wbi_config import config @@ -17,41 +18,27 @@ class Time(BaseDataType): }} ''' - def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): + def __init__(self, time: str = None, before: int = 0, after: int = 0, precision: int = 11, timezone: int = 0, calendarmodel: str = None, wikibase_url: str = None, + **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 - :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' or now :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits :param before: explicit integer value for how many units after the given time it could be. The unit is given by the precision. - :type before: int :param after: explicit integer value for how many units before the given time it could be. The unit is given by the precision. - :type after: int :param precision: Precision value for dates and time as specified in the Wikibase data model (https://www.wikidata.org/wiki/Special:ListDatatypes#time) - :type precision: int :param timezone: The timezone which applies to the date and time as specified in the Wikibase data model - :type timezone: int :param calendarmodel: The calendar model used for the date. URL to the Wikibase calendar model item or the QID. - :type calendarmodel: str - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) - calendarmodel = calendarmodel or config['CALENDAR_MODEL_QID'] - wikibase_url = wikibase_url or config['WIKIBASE_URL'] + calendarmodel = calendarmodel or str(config['CALENDAR_MODEL_QID']) + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) if calendarmodel.startswith('Q'): calendarmodel = wikibase_url + '/entity/' + calendarmodel @@ -84,5 +71,5 @@ def __init__(self, time=None, before=0, after=0, precision=11, timezone=0, calen 'type': 'time' } - def get_sparql_value(self): + def _get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['time'] diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index ec9e7fd5..f6ceeb95 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -1,4 +1,5 @@ import re +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -15,21 +16,11 @@ class URL(BaseDataType): }} ''' - def __init__(self, value=None, **kwargs): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType + :param value: The URL to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' - :type snaktype: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str """ super().__init__(**kwargs) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 78ec174a..213f4288 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -1,4 +1,7 @@ +from __future__ import annotations + from copy import copy +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union import simplejson @@ -9,14 +12,18 @@ from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError from wikibaseintegrator.wbi_fastrun import FastRunContainer from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper +from wikibaseintegrator.wbi_login import Login + +if TYPE_CHECKING: + from wikibaseintegrator import WikibaseIntegrator class BaseEntity: - fast_run_store = [] + fast_run_store: List[FastRunContainer] = [] ETYPE = 'base-entity' - def __init__(self, api=None, lastrevid=None, type=None, id=None, claims=None, is_bot=None, login=None): + def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type: str = None, id: str = None, claims: Claims = None, is_bot: bool = None, login: Login = None): if not api: from wikibaseintegrator import WikibaseIntegrator self.api = WikibaseIntegrator() @@ -27,15 +34,15 @@ def __init__(self, api=None, lastrevid=None, type=None, id=None, claims=None, is self.api.login = login or self.api.login self.lastrevid = lastrevid - self.type = type or self.ETYPE + self.type = str(type or self.ETYPE) self.id = id self.claims = claims or Claims() - self.fast_run_container = None + self.fast_run_container: Optional[FastRunContainer] = None self.debug = config['DEBUG'] - def add_claims(self, claims, action_if_exists=ActionIfExists.APPEND): + def add_claims(self, claims: Union[Claim, list], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity: if isinstance(claims, Claim): claims = [claims] elif not isinstance(claims, list): @@ -45,35 +52,37 @@ def add_claims(self, claims, action_if_exists=ActionIfExists.APPEND): return self - def get_json(self) -> {}: - json_data = { + def get_json(self) -> Dict[str, Union[str, Dict[str, list]]]: + json_data: Dict = { 'type': self.type, - 'id': self.id, 'claims': self.claims.get_json() } + if self.id: + json_data['id'] = self.id if self.type == 'mediainfo': # MediaInfo change name of 'claims' to 'statements' json_data['statements'] = json_data.pop('claims') - if not self.id: - del json_data['id'] return json_data - def from_json(self, json_data): + def from_json(self, json_data: Dict[str, Any]) -> BaseEntity: if 'missing' in json_data: raise ValueError('Entity is nonexistent') - self.lastrevid = json_data['lastrevid'] - self.type = json_data['type'] - self.id = json_data['id'] + self.lastrevid = int(json_data['lastrevid']) + self.type = str(json_data['type']) + self.id = str(json_data['id']) if self.type == 'mediainfo': # 'claims' is named 'statements' in Wikimedia Commons MediaInfo self.claims = Claims().from_json(json_data['statements']) else: self.claims = Claims().from_json(json_data['claims']) - def get(self, entity_id, **kwargs): + return self + + # noinspection PyMethodMayBeStatic + def _get(self, entity_id: str, **kwargs: Any) -> Dict: """ retrieve an item in json representation from the Wikibase instance - :rtype: dict + :return: python complex dictionary representation of a json """ @@ -85,15 +94,15 @@ def get(self, entity_id, **kwargs): return mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) - def clear(self, **kwargs): + def clear(self, **kwargs: Any) -> None: self._write(clear=True, **kwargs) - def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, **kwargs): + def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = False, clear: bool = False, **kwargs: Any) -> Dict[str, Any]: """ Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. For new items, also returns the new QIDs. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool :return: the entity ID on successful write """ @@ -116,7 +125,7 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** data = simplejson.JSONEncoder().encode(data) - payload = { + payload: Dict[str, Any] = { 'action': 'wbeditentity', 'data': data, 'format': 'json', @@ -165,7 +174,7 @@ def _write(self, data=None, summary=None, allow_anonymous=False, clear=False, ** self.lastrevid = json_data['entity']['lastrevid'] return json_data['entity'] - def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False): + def init_fastrun(self, base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> None: if base_filter is None: base_filter = {} @@ -184,21 +193,25 @@ def init_fastrun(self, base_filter=None, use_refs=False, case_insensitive=False) if not self.fast_run_container: if self.debug: print("Create a new FastRunContainer") - self.fast_run_container = FastRunContainer(base_filter=base_filter, - use_refs=use_refs, - base_data_type=BaseDataType, - case_insensitive=case_insensitive) + self.fast_run_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive) BaseEntity.fast_run_store.append(self.fast_run_container) - def fr_search(self, **kwargs): + def fr_search(self, **kwargs: Any) -> str: self.init_fastrun(**kwargs) + + if self.fast_run_container is None: + raise ValueError("FastRunContainer is not initialized.") + self.fast_run_container.load_item(self.claims) return self.fast_run_container.current_qid - def write_required(self, base_filter=None, **kwargs): + def write_required(self, base_filter: Dict[str, str] = None, **kwargs: Any) -> bool: self.init_fastrun(base_filter=base_filter, **kwargs) + if self.fast_run_container is None: + raise ValueError("FastRunContainer is not initialized.") + if base_filter is None: base_filter = {} diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py index 0baa3b47..09317b5a 100644 --- a/wikibaseintegrator/entities/item.py +++ b/wikibaseintegrator/entities/item.py @@ -1,8 +1,10 @@ from __future__ import annotations import re +from typing import Any, Dict, Union from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions from wikibaseintegrator.models.labels import Labels @@ -12,7 +14,7 @@ class Item(BaseEntity): ETYPE = 'item' - def __init__(self, labels=None, descriptions=None, aliases=None, sitelinks=None, **kwargs) -> None: + def __init__(self, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, sitelinks: Sitelinks = None, **kwargs: Any) -> None: """ :param api: @@ -22,21 +24,20 @@ def __init__(self, labels=None, descriptions=None, aliases=None, sitelinks=None, :param sitelinks: :param kwargs: """ - super().__init__(**kwargs) # Item and property specific - self.labels = labels or Labels() - self.descriptions = descriptions or Descriptions() + self.labels: LanguageValues = labels or Labels() + self.descriptions: LanguageValues = descriptions or Descriptions() self.aliases = aliases or Aliases() # Item specific self.sitelinks = sitelinks or Sitelinks() - def new(self, **kwargs) -> Item: + def new(self, **kwargs: Any) -> Item: return Item(api=self.api, **kwargs) - def get(self, entity_id, **kwargs) -> Item: + def get(self, entity_id: Union[str, int], **kwargs: Any) -> Item: if isinstance(entity_id, str): pattern = re.compile(r'^Q?([0-9]+)$') matches = pattern.match(entity_id) @@ -50,10 +51,10 @@ def get(self, entity_id, **kwargs) -> Item: raise ValueError("Item ID must be greater than 0") entity_id = f'Q{entity_id}' - json_data = super().get(entity_id=entity_id, **kwargs) + json_data = super()._get(entity_id=entity_id, **kwargs) return Item(api=self.api).from_json(json_data=json_data['entities'][entity_id]) - def get_json(self) -> {}: + def get_json(self) -> Dict[str, Union[str, Dict]]: return { 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), @@ -61,7 +62,7 @@ def get_json(self) -> {}: **super().get_json() } - def from_json(self, json_data) -> Item: + def from_json(self, json_data: Dict[str, Any]) -> Item: super().from_json(json_data=json_data) self.labels = Labels().from_json(json_data['labels']) @@ -71,6 +72,6 @@ def from_json(self, json_data) -> Item: return self - def write(self, **kwargs): + def write(self, **kwargs: Any) -> Item: json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py index 2cd4b6aa..59b3abaa 100644 --- a/wikibaseintegrator/entities/lexeme.py +++ b/wikibaseintegrator/entities/lexeme.py @@ -1,8 +1,10 @@ from __future__ import annotations import re +from typing import Any, Dict, Union from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.models.forms import Forms from wikibaseintegrator.models.lemmas import Lemmas from wikibaseintegrator.models.senses import Senses @@ -12,19 +14,19 @@ class Lexeme(BaseEntity): ETYPE = 'lexeme' - def __init__(self, lemmas=None, lexical_category=None, language=None, forms=None, senses=None, **kwargs): + def __init__(self, lemmas: Lemmas = None, lexical_category: str = None, language: str = None, forms: Forms = None, senses: Senses = None, **kwargs: Any): super().__init__(**kwargs) - self.lemmas = lemmas or Lemmas() + self.lemmas: LanguageValues = lemmas or Lemmas() self.lexical_category = lexical_category - self.language = language or config['DEFAULT_LEXEME_LANGUAGE'] + self.language = str(language or config['DEFAULT_LEXEME_LANGUAGE']) self.forms = forms or Forms() self.senses = senses or Senses() - def new(self, **kwargs) -> Lexeme: + def new(self, **kwargs: Any) -> Lexeme: return Lexeme(api=self.api, **kwargs) - def get(self, entity_id, **kwargs) -> Lexeme: + def get(self, entity_id: Union[str, int], **kwargs: Any) -> Lexeme: if isinstance(entity_id, str): pattern = re.compile(r'^L?([0-9]+)$') matches = pattern.match(entity_id) @@ -38,38 +40,34 @@ def get(self, entity_id, **kwargs) -> Lexeme: raise ValueError("Lexeme ID must be greater than 0") entity_id = f'L{entity_id}' - json_data = super().get(entity_id=entity_id, **kwargs) + json_data = super()._get(entity_id=entity_id, **kwargs) return Lexeme(api=self.api).from_json(json_data=json_data['entities'][entity_id]) - def get_json(self) -> {}: - json_data = { + def get_json(self) -> Dict[str, Union[str, dict]]: + json_data: Dict = { 'lemmas': self.lemmas.get_json(), - 'lexicalCategory': self.lexical_category, 'language': self.language, 'forms': self.forms.get_json(), 'senses': self.senses.get_json(), **super().get_json() } - if self.lexical_category is None: - del json_data['lexicalCategory'] + if self.lexical_category: + json_data['lexicalCategory'] = self.lexical_category return json_data - def from_json(self, json_data) -> Lexeme: + def from_json(self, json_data: Dict[str, Any]) -> Lexeme: super().from_json(json_data=json_data) self.lemmas = Lemmas().from_json(json_data['lemmas']) - self.lexical_category = json_data['lexicalCategory'] - self.language = json_data['language'] + self.lexical_category = str(json_data['lexicalCategory']) + self.language = str(json_data['language']) self.forms = Forms().from_json(json_data['forms']) self.senses = Senses().from_json(json_data['senses']) return self - def write(self, **kwargs): - if self.lexical_category is None: - raise ValueError("lexical_category can't be None") - + def write(self, **kwargs: Any) -> Lexeme: json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py index 03be3352..24786e68 100644 --- a/wikibaseintegrator/entities/mediainfo.py +++ b/wikibaseintegrator/entities/mediainfo.py @@ -1,8 +1,10 @@ from __future__ import annotations import re +from typing import Any, Dict, List, Union from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions from wikibaseintegrator.models.labels import Labels @@ -12,7 +14,7 @@ class MediaInfo(BaseEntity): ETYPE = 'mediainfo' - def __init__(self, labels=None, descriptions=None, aliases=None, **kwargs) -> None: + def __init__(self, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, **kwargs: Any) -> None: """ :param api: @@ -25,14 +27,14 @@ def __init__(self, labels=None, descriptions=None, aliases=None, **kwargs) -> No super().__init__(**kwargs) # Item and property specific - self.labels = labels or Labels() - self.descriptions = descriptions or Descriptions() + self.labels: LanguageValues = labels or Labels() + self.descriptions: LanguageValues = descriptions or Descriptions() self.aliases = aliases or Aliases() - def new(self, **kwargs) -> MediaInfo: + def new(self, **kwargs: Any) -> MediaInfo: return MediaInfo(api=self.api, **kwargs) - def get(self, entity_id, **kwargs) -> MediaInfo: + def get(self, entity_id: Union[str, int], **kwargs: Any) -> MediaInfo: if isinstance(entity_id, str): pattern = re.compile(r'^M?([0-9]+)$') matches = pattern.match(entity_id) @@ -46,14 +48,17 @@ def get(self, entity_id, **kwargs) -> MediaInfo: raise ValueError("MediaInfo ID must be greater than 0") entity_id = f'M{entity_id}' - json_data = super().get(entity_id=entity_id, **kwargs) + json_data = super()._get(entity_id=entity_id, **kwargs) return MediaInfo(api=self.api).from_json(json_data=json_data['entities'][entity_id]) - def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: + def get_by_title(self, titles: Union[List[str], str], sites: str = 'commonswiki', **kwargs: Any) -> MediaInfo: + if isinstance(titles, list): + titles = '|'.join(titles) + params = { 'action': 'wbgetentities', 'sites': sites, - 'titles': title, + 'titles': titles, 'format': 'json' } @@ -66,7 +71,7 @@ def get_by_title(self, title, sites='commonswiki', **kwargs) -> MediaInfo: return MediaInfo(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) - def get_json(self) -> {}: + def get_json(self) -> Dict[str, Union[str, Dict]]: return { 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), @@ -74,7 +79,7 @@ def get_json(self) -> {}: **super().get_json() } - def from_json(self, json_data) -> MediaInfo: + def from_json(self, json_data: Dict[str, Any]) -> MediaInfo: super().from_json(json_data=json_data) self.labels = Labels().from_json(json_data['labels']) @@ -82,6 +87,6 @@ def from_json(self, json_data) -> MediaInfo: return self - def write(self, **kwargs): + def write(self, **kwargs: Any) -> MediaInfo: json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py index 776c55f1..ebbb1013 100644 --- a/wikibaseintegrator/entities/property.py +++ b/wikibaseintegrator/entities/property.py @@ -1,8 +1,10 @@ from __future__ import annotations import re +from typing import Any, Dict, Union from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import LanguageValues from wikibaseintegrator.models.aliases import Aliases from wikibaseintegrator.models.descriptions import Descriptions from wikibaseintegrator.models.labels import Labels @@ -11,21 +13,21 @@ class Property(BaseEntity): ETYPE = 'property' - def __init__(self, datatype=None, labels=None, descriptions=None, aliases=None, **kwargs): + def __init__(self, datatype: str = None, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, **kwargs: Any): super().__init__(**kwargs) # Property specific self.datatype = datatype # Items and property specific - self.labels = labels or Labels() - self.descriptions = descriptions or Descriptions() + self.labels: LanguageValues = labels or Labels() + self.descriptions: LanguageValues = descriptions or Descriptions() self.aliases = aliases or Aliases() - def new(self, **kwargs) -> Property: + def new(self, **kwargs: Any) -> Property: return Property(api=self.api, **kwargs) - def get(self, entity_id, **kwargs) -> Property: + def get(self, entity_id: Union[str, int], **kwargs: Any) -> Property: if isinstance(entity_id, str): pattern = re.compile(r'^P?([0-9]+)$') matches = pattern.match(entity_id) @@ -39,19 +41,19 @@ def get(self, entity_id, **kwargs) -> Property: raise ValueError("Property ID must be greater than 0") entity_id = f'P{entity_id}' - json_data = super().get(entity_id=entity_id, **kwargs) + json_data = super()._get(entity_id=entity_id, **kwargs) return Property(api=self.api).from_json(json_data=json_data['entities'][entity_id]) - def get_json(self) -> {}: + def get_json(self) -> Dict[str, Union[str, dict]]: return { - 'datatype': self.datatype, + 'datatype': str(self.datatype), 'labels': self.labels.get_json(), 'descriptions': self.descriptions.get_json(), 'aliases': self.aliases.get_json(), **super().get_json() } - def from_json(self, json_data) -> Property: + def from_json(self, json_data: Dict[str, Any]) -> Property: super().from_json(json_data=json_data) self.datatype = json_data['datatype'] @@ -61,6 +63,6 @@ def from_json(self, json_data) -> Property: return self - def write(self, **kwargs): + def write(self, **kwargs: Any) -> Property: json_data = super()._write(data=self.get_json(), **kwargs) return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index c96f13c3..cc9f168e 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -1,24 +1,28 @@ +from __future__ import annotations + +from typing import Dict, List, Optional, Union + from wikibaseintegrator.models.language_values import LanguageValue from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists class Aliases: - def __init__(self, language=None, value=None): - self.__aliases = {} + def __init__(self, language: str = None, value: str = None): + self.aliases: Dict[str, str] = {} if language is not None: self.set(language=language, values=value) @property - def aliases(self): + def aliases(self) -> Dict[str, List[Alias]]: return self.__aliases @aliases.setter - def aliases(self, value): + def aliases(self, value: Dict[str, List[Alias]]): self.__aliases = value - def get(self, language=None): + def get(self, language: str = None) -> Optional[List[Alias]]: if language is None: # TODO: Don't return a list of list, just a list return [item for sublist in self.aliases.values() for item in sublist] @@ -28,8 +32,8 @@ def get(self, language=None): return None - def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND): - language = language or config['DEFAULT_LANGUAGE'] + def set(self, language: str = None, values: Union[str, list] = None, action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> Aliases: + language = str(language or config['DEFAULT_LANGUAGE']) assert action_if_exists in ActionIfExists assert language is not None @@ -41,11 +45,11 @@ def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND if action_if_exists != ActionIfExists.KEEP: for alias in self.aliases[language]: alias.remove() - return self.aliases[language] + return self if isinstance(values, str): values = [values] - elif not isinstance(values, list) and values is not None: + elif values is not None and not isinstance(values, list): raise TypeError(f"value must be a str or list of strings, got '{type(values)}'") if action_if_exists == ActionIfExists.REPLACE: @@ -67,8 +71,8 @@ def set(self, language=None, values=None, action_if_exists=ActionIfExists.APPEND return self - def get_json(self) -> {}: - json_data = {} + def get_json(self) -> Dict[str, list]: + json_data: Dict[str, list] = {} for language in self.aliases: if language not in json_data: json_data[language] = [] @@ -76,7 +80,7 @@ def get_json(self) -> {}: json_data[language].append(alias.get_json()) return json_data - def from_json(self, json_data): + def from_json(self, json_data: Dict[str, list]) -> Aliases: for language in json_data: for alias in json_data[language]: self.set(alias['language'], alias['value']) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 09609d8a..8650a8a4 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -1,11 +1,11 @@ from __future__ import annotations import copy -from typing import Union +from typing import Any, Callable, Dict, List, Optional, Union from wikibaseintegrator.models.qualifiers import Qualifiers -from wikibaseintegrator.models.references import References -from wikibaseintegrator.models.snaks import Snak +from wikibaseintegrator.models.references import Reference, References +from wikibaseintegrator.models.snaks import Snak, Snaks from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank @@ -21,10 +21,10 @@ def claims(self): def claims(self, claims): self.__claims = claims - def get(self, property=None) -> list: + def get(self, property: str = None) -> List: return self.claims[property] - def add(self, claims: Union[list, Claim, None] = None, action_if_exists=ActionIfExists.REPLACE) -> Claims: + def add(self, claims: Union[list, Claim, None] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Claims: """ :param claims: @@ -74,20 +74,20 @@ def add(self, claims: Union[list, Claim, None] = None, action_if_exists=ActionIf return self - def from_json(self, json_data) -> Claims: + def from_json(self, json_data: Dict[str, Any]) -> Claims: for property in json_data: for claim in json_data[property]: from wikibaseintegrator.datatypes import BaseDataType if 'datatype' in claim['mainsnak']: data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] else: - data_type = Claim + data_type = BaseDataType self.add(claims=data_type().from_json(claim), action_if_exists=ActionIfExists.FORCE_APPEND) return self - def get_json(self) -> {}: - json_data = {} + def get_json(self) -> Dict[str, list]: + json_data: Dict[str, list] = {} for property in self.claims: if property not in json_data: json_data[property] = [] @@ -95,7 +95,7 @@ def get_json(self) -> {}: json_data[property].append(claim.get_json()) return json_data - def clear(self): + def clear(self) -> None: self.claims = {} def __len__(self): @@ -118,45 +118,59 @@ def __repr__(self): class Claim: DTYPE = 'claim' - subclasses = [] - def __init__(self, qualifiers=None, rank=None, references=None): + def __init__(self, qualifiers: Qualifiers = None, rank: WikibaseRank = None, references: Union[References, List[Union[Claim, List[Claim]]]] = None) -> None: self.mainsnak = Snak(datatype=self.DTYPE) self.type = 'statement' self.qualifiers = qualifiers or Qualifiers() self.qualifiers_order = [] self.id = None - self.rank: WikibaseRank = rank or WikibaseRank.NORMAL - self.references = references or References() + self.rank = rank or WikibaseRank.NORMAL self.removed = False - # Allow registration of subclasses of Claim into Claim.subclasses - def __init_subclass__(cls, **kwargs): - super().__init_subclass__(**kwargs) - cls.subclasses.append(cls) + self.references = References() + + if isinstance(references, References): + self.references = references + elif isinstance(references, list): + for ref_list in references: + ref = Reference() + if isinstance(ref_list, list): + snaks = Snaks() + for ref_claim in ref_list: + if isinstance(ref_claim, Claim): + snaks.add(Snak().from_json(ref_claim.get_json()['mainsnak'])) + else: + raise ValueError + ref.snaks = snaks + elif isinstance(ref_list, Claim): + ref.snaks = Snaks().add(Snak().from_json(ref_list.get_json()['mainsnak'])) + elif isinstance(ref_list, Reference): + ref = ref_list + self.references.add(reference=ref) @property - def mainsnak(self): + def mainsnak(self) -> Snak: return self.__mainsnak @mainsnak.setter - def mainsnak(self, value): + def mainsnak(self, value: Snak): self.__mainsnak = value @property - def type(self): + def type(self) -> Union[str, Dict]: return self.__type @type.setter - def type(self, value): + def type(self, value: Union[str, Dict]): self.__type = value @property - def qualifiers(self): + def qualifiers(self) -> Qualifiers: return self.__qualifiers @qualifiers.setter - def qualifiers(self, value): + def qualifiers(self, value: Qualifiers) -> None: assert isinstance(value, (Qualifiers, list)) if isinstance(value, list): self.__qualifiers = Qualifiers().set(value) @@ -164,65 +178,65 @@ def qualifiers(self, value): self.__qualifiers = value @property - def qualifiers_order(self): + def qualifiers_order(self) -> List[str]: return self.__qualifiers_order @qualifiers_order.setter - def qualifiers_order(self, value): + def qualifiers_order(self, value: List[str]): self.__qualifiers_order = value @property - def id(self): + def id(self) -> Optional[str]: return self.__id @id.setter - def id(self, value): + def id(self, value: Optional[str]): self.__id = value @property - def rank(self): + def rank(self) -> WikibaseRank: return self.__rank @rank.setter - def rank(self, value): + def rank(self, value: WikibaseRank): """Parse the rank. The enum thows an error if it is not one of the recognized values""" self.__rank = WikibaseRank(value) @property - def references(self): + def references(self) -> References: return self.__references @references.setter - def references(self, value): + def references(self, value: References): self.__references = value @property - def removed(self): + def removed(self) -> bool: return self.__removed @removed.setter - def removed(self, value): + def removed(self, value: bool): self.__removed = value - def remove(self, remove=True): + def remove(self, remove=True) -> None: self.removed = remove - def from_json(self, json_data) -> Claim: + def from_json(self, json_data: Dict[str, Any]) -> Claim: self.mainsnak = Snak().from_json(json_data['mainsnak']) - self.type = json_data['type'] + self.type = str(json_data['type']) if 'qualifiers' in json_data: self.qualifiers = Qualifiers().from_json(json_data['qualifiers']) if 'qualifiers-order' in json_data: - self.qualifiers_order = json_data['qualifiers-order'] - self.id = json_data['id'] + self.qualifiers_order = list(json_data['qualifiers-order']) + self.id = str(json_data['id']) self.rank: WikibaseRank = WikibaseRank(json_data['rank']) if 'references' in json_data: self.references = References().from_json(json_data['references']) return self - def get_json(self) -> {}: - json_data = { + def get_json(self) -> Dict[str, Any]: + json_data: Dict[str, Union[str, list, dict, None]] = { 'mainsnak': self.mainsnak.get_json(), 'type': self.type, 'id': self.id, @@ -233,14 +247,14 @@ def get_json(self) -> {}: del json_data['id'] if len(self.qualifiers) > 0: json_data['qualifiers'] = self.qualifiers.get_json() - json_data['qualifiers-order'] = self.qualifiers_order + json_data['qualifiers-order'] = list(self.qualifiers_order) if len(self.references) > 0: json_data['references'] = self.references.get_json() if self.removed: json_data['remove'] = '' return json_data - def has_equal_qualifiers(self, other): + def has_equal_qualifiers(self, other: Claim) -> bool: # check if the qualifiers are equal with the 'other' object equal_qualifiers = True self_qualifiers = copy.deepcopy(self.qualifiers) @@ -280,3 +294,39 @@ def __repr__(self): id=id(self) & 0xFFFFFF, attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) + + def equals(self, that: Claim, include_ref: bool = False, fref: Callable = None) -> bool: + """ + Tests for equality of two statements. + If comparing references, the order of the arguments matters!!! + self is the current statement, the next argument is the new statement. + Allows passing in a function to use to compare the references 'fref'. Default is equality. + fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, + where each reference is a list of statements + """ + + if not include_ref: + # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers + return self == that + + if self != that: + return False + + if fref is None: + return Claim.refs_equal(self, that) + + return fref(self, that) + + @staticmethod + def refs_equal(olditem: Claim, newitem: Claim) -> bool: + """ + tests for exactly identical references + """ + + oldrefs = olditem.references + newrefs = newitem.references + + def ref_equal(oldref: References, newref: References) -> bool: + return (len(oldref) == len(newref)) and all(x in oldref for x in newref) + + return len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs) diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 7136bd3f..60beccca 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Union + from wikibaseintegrator.models.claims import Claims from wikibaseintegrator.models.language_values import LanguageValues @@ -14,27 +18,27 @@ def forms(self): def forms(self, value): self.__forms = value - def get(self, id): + def get(self, id: str) -> Form: return self.forms[id] - def add(self, form): + def add(self, form: Form) -> Forms: self.forms[form.id] = form return self - def get_json(self) -> []: - json_data = [] - for form in self.forms: - json_data.append(self.forms[form].get_json()) - return json_data - - def from_json(self, json_data): + def from_json(self, json_data: List[Dict]) -> Forms: for form in json_data: - self.add(Form(form_id=form['id'], representations=LanguageValues().from_json(form['representations']), grammatical_features=form['grammaticalFeatures'], - claims=Claims().from_json(form['claims']))) + self.add(form=Form().from_json(form)) return self + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] + for form in self.forms: + json_data.append(self.forms[form].get_json()) + + return json_data + def __repr__(self): """A mixin implementing a simple __repr__.""" return "<{klass} @{id:x} {attrs}>".format( @@ -45,9 +49,9 @@ def __repr__(self): class Form: - def __init__(self, form_id=None, representations=None, grammatical_features=None, claims=None): + def __init__(self, form_id: str = None, representations: Representations = None, grammatical_features: Union[str, int, List[str]] = None, claims: Claims = None): self.id = form_id - self.representations = representations or LanguageValues() + self.representations: Representations = representations or LanguageValues() self.grammatical_features = grammatical_features or [] self.claims = claims or Claims() @@ -72,16 +76,18 @@ def grammatical_features(self): return self.__grammatical_features @grammatical_features.setter - def grammatical_features(self, value): - # TODO: Access to member before its definition + def grammatical_features(self, value: Union[str, int, List[str]]): + if not hasattr(self, '__grammatical_features') or value is None: + self.__grammatical_features = [] + if isinstance(value, int): self.__grammatical_features.append('Q' + str(value)) elif isinstance(value, str): self.__grammatical_features.append(value) - elif isinstance(value, list) or value is None: + elif isinstance(value, list): self.__grammatical_features = value else: - raise TypeError(f"value must be an int, a str or a list of strings, got '{type(value)}'") + raise TypeError(f"value must be a str, an int or a list of strings, got '{type(value)}'") @property def claims(self): @@ -91,8 +97,16 @@ def claims(self): def claims(self, value): self.__claims = value - def get_json(self) -> {}: - json_data = { + def from_json(self, json_data: Dict[str, Any]) -> Form: + self.id = json_data['id'] + self.representations = Representations().from_json(json_data['representations']) + self.grammatical_features = json_data['grammaticalFeatures'] + self.claims = Claims().from_json(json_data['claims']) + + return self + + def get_json(self) -> Dict[str, Union[str, Dict, List]]: + json_data: Dict[str, Union[str, Dict, list]] = { 'id': self.id, 'representations': self.representations.get_json(), 'grammaticalFeatures': self.grammatical_features, @@ -112,3 +126,7 @@ def __repr__(self): id=id(self) & 0xFFFFFF, attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) + + +class Representations(LanguageValues): + pass diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index ec57ef3d..556afe7b 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import Dict, Optional + from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists @@ -14,7 +18,7 @@ def values(self): def values(self, value): self.__values = value - def add(self, language_value): + def add(self, language_value: LanguageValue) -> LanguageValues: assert isinstance(language_value, LanguageValue) if language_value.value: @@ -22,15 +26,15 @@ def add(self, language_value): return self - def get(self, language=None): - language = language or config['DEFAULT_LANGUAGE'] + def get(self, language: str = None) -> Optional[LanguageValue]: + language = str(language or config['DEFAULT_LANGUAGE']) if language in self.values: return self.values[language] return None - def set(self, language=None, value=None, action_if_exists=ActionIfExists.REPLACE): - language = language or config['DEFAULT_LANGUAGE'] + def set(self, language: str = None, value: str = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Optional[LanguageValue]: + language = str(language or config['DEFAULT_LANGUAGE']) assert action_if_exists in [ActionIfExists.REPLACE, ActionIfExists.KEEP] # Remove value if None @@ -45,18 +49,19 @@ def set(self, language=None, value=None, action_if_exists=ActionIfExists.REPLACE return self.get(language=language) - def get_json(self) -> {}: - json_data = {} - for value in self.values: - json_data[value] = self.values[value].get_json() - return json_data - - def from_json(self, json_data): + def from_json(self, json_data: Dict[str, Dict]) -> LanguageValues: for language_value in json_data: - self.set(language=json_data[language_value]['language'], value=json_data[language_value]['value']) + self.add(language_value=LanguageValue(language=json_data[language_value]['language']).from_json(json_data=json_data[language_value])) return self + def get_json(self) -> Dict[str, Dict]: + json_data: Dict[str, Dict] = {} + for value in self.values: + json_data[value] = self.values[value].get_json() + + return json_data + def __iter__(self): return iter(self.values.values()) @@ -70,7 +75,7 @@ def __repr__(self): class LanguageValue: - def __init__(self, language, value=None): + def __init__(self, language: str, value: str = None): self.language = language self.value = value self.removed = False @@ -108,11 +113,18 @@ def removed(self): def removed(self, value): self.__removed = value - def remove(self): + def remove(self) -> LanguageValue: self.removed = True + + return self + + def from_json(self, json_data: Dict[str, str]) -> LanguageValue: + self.language = json_data['language'] + self.value = json_data['value'] + return self - def get_json(self) -> {}: + def get_json(self) -> Dict[str, str]: json_data = { 'language': self.language, 'value': self.value diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index f6379c63..7bc45596 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -1,8 +1,13 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Dict, List, Union + from wikibaseintegrator.models.snaks import Snak from wikibaseintegrator.wbi_enums import ActionIfExists +if TYPE_CHECKING: + from wikibaseintegrator.models.claims import Claim + class Qualifiers: def __init__(self): @@ -17,7 +22,7 @@ def qualifiers(self, value): assert isinstance(value, dict) self.__qualifiers = value - def set(self, qualifiers): + def set(self, qualifiers: Union[Qualifiers, List, None]) -> Qualifiers: if isinstance(qualifiers, list): for qualifier in qualifiers: self.add(qualifier) @@ -28,11 +33,11 @@ def set(self, qualifiers): return self - def get(self, property=None): + def get(self, property: str = None) -> Snak: return self.qualifiers[property] # TODO: implement action_if_exists - def add(self, qualifier=None, action_if_exists=ActionIfExists.REPLACE): + def add(self, qualifier: Union[Snak, Claim], action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Qualifiers: from wikibaseintegrator.models.claims import Claim if isinstance(qualifier, Claim): qualifier = Snak().from_json(qualifier.get_json()['mainsnak']) @@ -49,14 +54,14 @@ def add(self, qualifier=None, action_if_exists=ActionIfExists.REPLACE): return self - def from_json(self, json_data) -> Qualifiers: + def from_json(self, json_data: Dict[str, List]) -> Qualifiers: for property in json_data: for snak in json_data[property]: self.add(qualifier=Snak().from_json(snak)) return self - def get_json(self) -> {}: - json_data = {} + def get_json(self) -> Dict[str, List]: + json_data: Dict[str, list] = {} for property in self.qualifiers: if property not in json_data: json_data[property] = [] diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index 9a320654..1b82ee84 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -1,8 +1,13 @@ from __future__ import annotations +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + from wikibaseintegrator.models.snaks import Snak, Snaks from wikibaseintegrator.wbi_enums import ActionIfExists +if TYPE_CHECKING: + from wikibaseintegrator.models.claims import Claim + class References: def __init__(self): @@ -16,14 +21,14 @@ def references(self): def references(self, value): self.__references = value - def get(self, hash=None): + def get(self, hash: str = None) -> Optional[Reference]: for reference in self.references: if reference.hash == hash: return reference return None # TODO: implement action_if_exists - def add(self, reference=None, action_if_exists=ActionIfExists.REPLACE): + def add(self, reference: Union[Reference, Claim] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> References: from wikibaseintegrator.models.claims import Claim if isinstance(reference, Claim): reference = Reference(snaks=Snaks().add(Snak().from_json(reference.get_json()['mainsnak']))) @@ -36,19 +41,19 @@ def add(self, reference=None, action_if_exists=ActionIfExists.REPLACE): return self - def from_json(self, json_data) -> References: + def from_json(self, json_data: List[Dict]) -> References: for reference in json_data: self.add(reference=Reference().from_json(reference)) return self - def get_json(self) -> []: - json_data = [] + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] for reference in self.references: json_data.append(reference.get_json()) return json_data - def remove(self, reference_to_remove): + def remove(self, reference_to_remove: Union[Claim, Reference]) -> bool: from wikibaseintegrator.models.claims import Claim if isinstance(reference_to_remove, Claim): reference_to_remove = Reference(snaks=Snaks().add(Snak().from_json(reference_to_remove.get_json()['mainsnak']))) @@ -62,7 +67,7 @@ def remove(self, reference_to_remove): return False - def clear(self): + def clear(self) -> References: self.references = [] return self @@ -82,7 +87,7 @@ def __repr__(self): class Reference: - def __init__(self, snaks=None, snaks_order=None): + def __init__(self, snaks: Snaks = None, snaks_order: List = None): self.hash = None self.snaks = snaks or Snaks() self.snaks_order = snaks_order or [] @@ -112,7 +117,7 @@ def snaks_order(self, value): self.__snaks_order = value # TODO: implement action_if_exists - def add(self, snak=None, action_if_exists=ActionIfExists.REPLACE): + def add(self, snak: Union[Snak, Claim] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Reference: from wikibaseintegrator.models.claims import Claim if isinstance(snak, Claim): snak = Snak().from_json(snak.get_json()['mainsnak']) @@ -124,15 +129,15 @@ def add(self, snak=None, action_if_exists=ActionIfExists.REPLACE): return self - def from_json(self, json_data) -> Reference: + def from_json(self, json_data: Dict[str, Any]) -> Reference: self.hash = json_data['hash'] self.snaks = Snaks().from_json(json_data['snaks']) self.snaks_order = json_data['snaks-order'] return self - def get_json(self) -> {}: - json_data = { + def get_json(self) -> Dict[str, Union[Dict, list]]: + json_data: Dict[str, Union[Dict, list]] = { 'snaks': self.snaks.get_json(), 'snaks-order': self.snaks_order } diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py index ebc0cdc3..73285a96 100644 --- a/wikibaseintegrator/models/senses.py +++ b/wikibaseintegrator/models/senses.py @@ -1,3 +1,7 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Union + from wikibaseintegrator.models.claims import Claims from wikibaseintegrator.models.language_values import LanguageValues from wikibaseintegrator.wbi_enums import ActionIfExists @@ -7,30 +11,31 @@ class Senses: def __init__(self): self.senses = [] - def get(self, id): + def get(self, id: str) -> Optional[Sense]: for sense in self.senses: if sense.id == id: return sense return None # TODO: implement action_if_exists - def add(self, sense, action_if_exists=ActionIfExists.REPLACE): + def add(self, sense: Sense, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Senses: self.senses.append(sense) return self - def get_json(self) -> []: - json_data = [] - for sense in self.senses: - json_data.append(sense.get_json()) - return json_data - - def from_json(self, json_data): + def from_json(self, json_data: List[Dict]) -> Senses: for sense in json_data: - self.add(Sense(sense_id=sense['id'], glosses=Glosses().from_json(sense['glosses']), claims=Claims().from_json(sense['claims']))) + self.add(sense=Sense().from_json(sense)) return self + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] + for sense in self.senses: + json_data.append(sense.get_json()) + + return json_data + def __repr__(self): """A mixin implementing a simple __repr__.""" return "<{klass} @{id:x} {attrs}>".format( @@ -41,15 +46,22 @@ def __repr__(self): class Sense: - def __init__(self, sense_id=None, glosses=None, claims=None): + def __init__(self, sense_id: str = None, glosses: Glosses = None, claims: Claims = None): self.id = sense_id - self.glosses = glosses or Glosses() + self.glosses: LanguageValues = glosses or Glosses() self.claims = claims or Claims() self.removed = False - def get_json(self) -> {}: - json_data = { - 'id': self.id, + def from_json(self, json_data: Dict[str, Any]) -> Sense: + self.id = json_data['id'] + self.glosses = Glosses().from_json(json_data['glosses']) + self.claims = Claims().from_json(json_data['claims']) + + return self + + def get_json(self) -> Dict[str, Union[str, Dict]]: + json_data: Dict[str, Union[str, Dict]] = { + 'id': str(self.id), 'glosses': self.glosses.get_json(), 'claims': self.claims.get_json() } @@ -63,7 +75,7 @@ def get_json(self) -> {}: return json_data - def remove(self): + def remove(self) -> Sense: self.removed = True return self diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py index 4b4fd87c..ac95fc54 100644 --- a/wikibaseintegrator/models/sitelinks.py +++ b/wikibaseintegrator/models/sitelinks.py @@ -1,19 +1,24 @@ +from __future__ import annotations + +from typing import Dict, List, Optional + + class Sitelinks: def __init__(self): - self.sitelinks = {} + self.sitelinks: Dict[str, Sitelink] = {} - def get(self, site=None): + def get(self, site: str = None) -> Optional[Sitelink]: if site in self.sitelinks: return self.sitelinks[site] return None - def set(self, site=None, title=None, badges=None): + def set(self, site: str, title: str = None, badges: List[str] = None) -> Sitelink: sitelink = Sitelink(site, title, badges) self.sitelinks[site] = sitelink return sitelink - def from_json(self, json_data): + def from_json(self, json_data: Dict[str, Dict]) -> Sitelinks: for sitelink in json_data: self.set(site=json_data[sitelink]['site'], title=json_data[sitelink]['title'], badges=json_data[sitelink]['badges']) @@ -29,10 +34,10 @@ def __repr__(self): class Sitelink: - def __init__(self, site=None, title=None, badges=None): + def __init__(self, site: str = None, title: str = None, badges: List[str] = None): self.site = site self.title = title - self.badges = badges + self.badges: List[str] = badges or [] def __str__(self): return self.title diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index cc5148f2..6dfb1dd9 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -1,7 +1,7 @@ from __future__ import annotations import re -from typing import Optional +from typing import Any, Dict from wikibaseintegrator.wbi_enums import WikibaseSnakType @@ -10,10 +10,10 @@ class Snaks: def __init__(self): self.snaks = {} - def get(self, property=None): + def get(self, property: str = None) -> Snak: return self.snaks[property] - def add(self, snak: Optional[Snak] = None): + def add(self, snak: Snak) -> Snaks: property = snak.property_number if property not in self.snaks: @@ -23,15 +23,15 @@ def add(self, snak: Optional[Snak] = None): return self - def from_json(self, json_data) -> Snaks: + def from_json(self, json_data: Dict[str, list]) -> Snaks: for property in json_data: for snak in json_data[property]: self.add(snak=Snak().from_json(snak)) return self - def get_json(self) -> {}: - json_data = {} + def get_json(self) -> Dict[str, list]: + json_data: Dict[str, list] = {} for property in self.snaks: if property not in json_data: json_data[property] = [] @@ -58,7 +58,7 @@ def __repr__(self): class Snak: - def __init__(self, snaktype: WikibaseSnakType = WikibaseSnakType.KNOWN_VALUE, property_number=None, hash=None, datavalue=None, datatype=None): + def __init__(self, snaktype: WikibaseSnakType = WikibaseSnakType.KNOWN_VALUE, property_number: str = None, hash: str = None, datavalue: Dict = None, datatype: str = None): self.snaktype = snaktype self.property_number = property_number self.hash = hash @@ -119,7 +119,7 @@ def datatype(self): def datatype(self, value): self.__datatype = value - def from_json(self, json_data) -> Snak: + def from_json(self, json_data: Dict[str, Any]) -> Snak: self.snaktype: WikibaseSnakType = WikibaseSnakType(json_data['snaktype']) self.property_number = json_data['property'] if 'hash' in json_data: @@ -130,7 +130,7 @@ def from_json(self, json_data) -> Snak: self.datatype = json_data['datatype'] return self - def get_json(self) -> {}: + def get_json(self) -> Dict[str, str]: json_data = { 'snaktype': self.snaktype.value, 'property': self.property_number, diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index 7265a5c6..42f4327f 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -1,3 +1,5 @@ +from typing import Dict, Union + """ Config global options Options can be changed at run time. See tests/test_backoff.py for usage example @@ -12,11 +14,10 @@ See: https://meta.wikimedia.org/wiki/User-Agent_policy """ -config = { +config: Dict[str, Union[str, int, None, bool]] = { 'BACKOFF_MAX_TRIES': 5, 'BACKOFF_MAX_VALUE': 3600, 'USER_AGENT': None, - 'MAXLAG': 5, 'PROPERTY_CONSTRAINT_PID': 'P2302', 'DISTINCT_VALUES_CONSTRAINT_QID': 'Q21502410', 'COORDINATE_GLOBE_QID': 'http://www.wikidata.org/entity/Q2', diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index 4d28cf99..29986476 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -1,10 +1,6 @@ class MWApiError(Exception): """ Base class for Mediawiki API error handling - - :param error_message: The error message returned by the Mediawiki API - :type error_message: A Python json representation dictionary of the error message - :return: """ @@ -22,17 +18,15 @@ def __init__(self, error_message): self.error_msg = error_message - def get_language(self): + def get_language(self) -> str: """ :return: Returns a 2 letter language string, indicating the language which triggered the error - :rtype: string """ return self.error_msg['error']['messages'][0]['parameters'][1] - def get_conflicting_item_qid(self): + def get_conflicting_item_qid(self) -> str: """ :return: Returns the QID string of the item which has the same label and description as the one which should be set. - :rtype: string """ qid_string = self.error_msg['error']['messages'][0]['parameters'][2] diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 877fbbbf..8582c589 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -1,8 +1,11 @@ +from __future__ import annotations + import collections import copy from collections import defaultdict from functools import lru_cache, wraps from itertools import chain +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, Union from frozendict import frozendict @@ -11,20 +14,24 @@ from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_helpers import execute_sparql_query, format_amount -fastrun_store = [] +if TYPE_CHECKING: + from wikibaseintegrator.models import Claims + +fastrun_store: List[FastRunContainer] = [] class FastRunContainer: - def __init__(self, base_data_type, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, case_insensitive=False, debug=None): - self.reconstructed_statements = [] - self.rev_lookup = defaultdict(set) - self.rev_lookup_ci = defaultdict(set) - self.prop_data = {} - self.loaded_langs = {} - self.statements = [] + def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None, + base_filter: Dict[str, str] = None, + use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): + self.reconstructed_statements: List[BaseDataType] = [] + self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set) + self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set) + self.prop_data: Dict[str, dict] = {} + self.loaded_langs: Dict[str, dict] = {} self.base_filter = {} self.base_filter_string = '' - self.prop_dt_map = {} + self.prop_dt_map: Dict[str, str] = {} self.current_qid = '' self.base_data_type = base_data_type @@ -56,8 +63,8 @@ def __init__(self, base_data_type, mediawiki_api_url=None, sparql_endpoint_url=N else: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format(wb_url=self.wikibase_url, prop_nr=k) - def reconstruct_statements(self, qid: str) -> list: - reconstructed_statements = [] + def reconstruct_statements(self, qid: str) -> List[BaseDataType]: + reconstructed_statements: List[BaseDataType] = [] if qid not in self.prop_data: self.reconstructed_statements = reconstructed_statements @@ -78,34 +85,34 @@ def reconstruct_statements(self, qid: str) -> list: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[q[0]]][0] # TODO: Add support for more data type (Time, MonolingualText, GlobeCoordinate) if self.prop_dt_map[q[0]] == 'quantity': - qualifiers.append(f(q[1], prop_nr=q[0], is_qualifier=True, unit=q[2])) + qualifiers.append(f(value=q[1], prop_nr=q[0], is_qualifier=True, unit=q[2])) else: - qualifiers.append(f(q[1], prop_nr=q[0], is_qualifier=True)) + qualifiers.append(f(value=q[1], prop_nr=q[0], is_qualifier=True)) references = [] for ref_id, refs in d['ref'].items(): this_ref = [] for ref in refs: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[ref[0]]][0] - this_ref.append(f(ref[1], prop_nr=ref[0])) + this_ref.append(f(value=ref[1], prop_nr=ref[0])) references.append(this_ref) f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[prop_nr]][0] # TODO: Add support for more data type if self.prop_dt_map[prop_nr] == 'quantity': - reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) + reconstructed_statements.append(f(value=d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) else: - reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) + reconstructed_statements.append(f(value=d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) # this isn't used. done for debugging purposes self.reconstructed_statements = reconstructed_statements return reconstructed_statements - def get_item(self, claims: list, cqid=None): + def get_item(self, claims: List, cqid: str = None) -> str: self.load_item(claims=claims, cqid=cqid) return self.current_qid - def load_item(self, claims: list, cqid=None) -> bool: + def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: match_sets = [] for claim in claims: # skip to next if statement has no value or no data type defined, e.g. for deletion objects @@ -120,7 +127,8 @@ def load_item(self, claims: list, cqid=None) -> bool: self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) self._query_data(prop_nr=prop_nr, use_units=claim.mainsnak.datatype == 'quantity') - current_value = claim.get_sparql_value() + # noinspection PyProtectedMember + current_value = claim._get_sparql_value() if self.prop_dt_map[prop_nr] == 'wikibase-item': if not str(current_value).startswith('Q'): @@ -161,8 +169,9 @@ def load_item(self, claims: list, cqid=None) -> bool: qid = matching_qids.pop() self.current_qid = qid + return False - def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cqid=None) -> bool: + def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExists = ActionIfExists.REPLACE, cqid: str = None) -> bool: del_props = set() data_props = set() append_props = [] @@ -223,10 +232,7 @@ def write_required(self, data: list, action_if_exists=ActionIfExists.REPLACE, cq for x in tmp_rs: if (x.mainsnak.datavalue == date.mainsnak.datavalue or ( self.case_insensitive and x.mainsnak.datavalue.casefold() == date.mainsnak.datavalue.casefold())) and x.mainsnak.property_number not in del_props: - if x.equals(date, include_ref=self.use_refs): - bool_vec.append(True) - else: - bool_vec.append(False) + bool_vec.append(x.equals(date, include_ref=self.use_refs)) else: bool_vec.append(False) """ @@ -276,10 +282,11 @@ def init_language_data(self, lang: str, lang_data_type: str) -> None: if lang_data_type not in self.loaded_langs[lang]: result = self._query_lang(lang=lang, lang_data_type=lang_data_type) - data = self._process_lang(result) - self.loaded_langs[lang].update({lang_data_type: data}) + if result is not None: + data = self._process_lang(result=result) + self.loaded_langs[lang].update({lang_data_type: data}) - def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: + def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> List[str]: """ get language data for specified qid @@ -300,9 +307,7 @@ def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: all_lang_strings = [''] return all_lang_strings - def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_type: str, - # Default to append - action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool: + def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_type: str, action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> bool: """ Method to check if certain language data exists as a label, description or aliases :param qid: Wikibase item id @@ -325,10 +330,10 @@ def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_ty return False - def get_all_data(self) -> dict: + def get_all_data(self) -> Dict[str, dict]: return self.prop_data - def format_query_results(self, r: list, prop_nr: str) -> None: + def format_query_results(self, r: List, prop_nr: str) -> None: """ `r` is the results of the sparql query in _query_data and is modified in place `prop_nr` is needed to get the property datatype to determine how to format the value @@ -404,7 +409,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: else: i['rval'] = i['rval']['value'] - def update_frc_from_query(self, r: list, prop_nr: str) -> None: + def update_frc_from_query(self, r: List, prop_nr: str) -> None: # r is the output of format_query_results # this updates the frc from the query (result of _query_data) for i in r: @@ -439,7 +444,7 @@ def update_frc_from_query(self, r: list, prop_nr: str) -> None: if 'unit' in i: self.prop_data[qid][prop_nr][i['sid']]['unit'] = i['unit'] - def _query_data(self, prop_nr: str, use_units=False) -> None: + def _query_data(self, prop_nr: str, use_units: bool = False) -> None: page_size = 10000 page_count = 0 num_pages = None @@ -559,7 +564,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: if len(results) == 0 or len(results) < page_size: break - def _query_lang(self, lang: str, lang_data_type: str): + def _query_lang(self, lang: str, lang_data_type: str) -> Optional[List[Dict[str, dict]]]: """ :param lang: @@ -589,7 +594,7 @@ def _query_lang(self, lang: str, lang_data_type: str): return execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] @staticmethod - def _process_lang(result: list): + def _process_lang(result: List) -> defaultdict[str, set]: data = defaultdict(set) for r in result: qid = r['item']['value'].split("/")[-1] @@ -598,7 +603,7 @@ def _process_lang(result: list): return data @lru_cache(maxsize=100000) - def get_prop_datatype(self, prop_nr: str) -> str: + def get_prop_datatype(self, prop_nr: str) -> Optional[str]: from wikibaseintegrator import WikibaseIntegrator wbi = WikibaseIntegrator() property = wbi.property.get(prop_nr) @@ -629,7 +634,7 @@ def freezeargs(func): """ @wraps(func) - def wrapped(*args, **kwargs): + def wrapped(*args: Any, **kwargs: Any) -> Any: args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args) kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} return func(*args, **kwargs) @@ -637,7 +642,7 @@ def wrapped(*args, **kwargs): return wrapped -def get_fastrun_container(base_filter=None, use_refs=False, case_insensitive=False): +def get_fastrun_container(base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: if base_filter is None: base_filter = {} @@ -650,7 +655,7 @@ def get_fastrun_container(base_filter=None, use_refs=False, case_insensitive=Fal @freezeargs @lru_cache() -def search_fastrun_store(base_filter=None, use_refs=False, case_insensitive=False): +def search_fastrun_store(base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: for c in fastrun_store: if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 9b3e1a89..f71c6357 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -1,13 +1,21 @@ +from __future__ import annotations + import datetime from time import sleep +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse import requests +from requests import Session from wikibaseintegrator.wbi_backoff import wbi_backoff from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError +if TYPE_CHECKING: + from wikibaseintegrator.entities.baseentity import BaseEntity + from wikibaseintegrator.wbi_login import Login + class BColors: HEADER = '\033[95m' @@ -21,7 +29,7 @@ class BColors: UNDERLINE = '\033[4m' -def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=100, retry_after=60, **kwargs): +def mediawiki_api_call(method: str, mediawiki_api_url: str = None, session: Session = None, max_retries: int = 100, retry_after: int = 60, **kwargs: Any) -> Dict: """ :param method: 'GET' or 'POST' :param mediawiki_api_url: @@ -34,7 +42,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries :return: """ - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) @@ -48,7 +56,7 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries session = session if session else requests.Session() for n in range(max_retries): try: - response = session.request(method, mediawiki_api_url, **kwargs) + response = session.request(method=method, url=mediawiki_api_url, **kwargs) except requests.exceptions.ConnectionError as e: print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") sleep(retry_after) @@ -109,11 +117,13 @@ def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries return json_data -def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60, is_bot=False, **kwargs): - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - user_agent = config['USER_AGENT'] if user_agent is None else user_agent +def mediawiki_api_call_helper(data: Dict[str, Any] = None, login: Login = None, mediawiki_api_url: str = None, user_agent: str = None, allow_anonymous: bool = False, + max_retries: int = 1000, retry_after: int = 60, maxlag: int = 5, is_bot: bool = False, **kwargs: Any) -> Dict: + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None) - if urlparse(mediawiki_api_url).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + hostname = urlparse(mediawiki_api_url).hostname + if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') @@ -149,8 +159,8 @@ def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, use # Always assert anon if allow_anonymous is True data.update({'assert': 'anon'}) - if config['MAXLAG'] > 0: - data.update({'maxlag': config['MAXLAG']}) + if maxlag > 0: + data.update({'maxlag': maxlag}) login_session = login.get_session() if login is not None else None @@ -159,14 +169,14 @@ def mediawiki_api_call_helper(data=None, login=None, mediawiki_api_url=None, use @wbi_backoff() -def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): +def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, user_agent: str = None, max_retries: int = 1000, retry_after: int = 60, + debug: bool = False) -> Optional[Dict[str, dict]]: """ Static method which can be used to execute any SPARQL query :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes :param query: The actual SPARQL query string :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str :param max_retries: The number time this function should retry in case of header reports. :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. :param debug: Enable debug output. @@ -174,10 +184,11 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max :return: The results of the query are returned in JSON format """ - sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint - user_agent = (config['USER_AGENT'] if user_agent is None else user_agent) + sparql_endpoint_url = str(endpoint or config['SPARQL_ENDPOINT_URL']) + user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None) - if urlparse(sparql_endpoint_url).hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + hostname = urlparse(sparql_endpoint_url).hostname + if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') @@ -211,7 +222,7 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max continue if response.status_code == 429: if 'retry-after' in response.headers.keys(): - retry_after = response.headers['retry-after'] + retry_after = int(response.headers['retry-after']) print(f"Too Many Requests (429). Sleeping for {retry_after} seconds") sleep(retry_after) continue @@ -220,121 +231,104 @@ def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max return results + return None -def merge_items(from_id, to_id, ignore_conflicts='', **kwargs): + +def merge_items(from_id: str, to_id: str, ignore_conflicts: List[str] = None, is_bot: bool = False, **kwargs: Any) -> Dict: """ A static method to merge two items - :param from_id: The QID which should be merged into another item - :type from_id: string with 'Q' prefix - :param to_id: The QID into which another item should be merged - :type to_id: string with 'Q' prefix - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. - :type ignore_conflicts: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str + + :param from_id: The ID to merge from. This parameter is required. + :param to_id: The ID to merge to. This parameter is required. + :param ignore_conflicts: List of elements of the item to ignore conflicts for. Can only contain values of "description", "sitelink" and "statement" + :param is_bot: Mark this edit as bot. """ params = { 'action': 'wbmergeitems', 'fromid': from_id, 'toid': to_id, - 'format': 'json', - 'bot': '', - 'ignoreconflicts': ignore_conflicts + 'format': 'json' } + if ignore_conflicts is not None: + params.update({'ignoreconflicts': '|'.join(ignore_conflicts)}) + + if is_bot: + params.update({'bot': ''}) + return mediawiki_api_call_helper(data=params, **kwargs) -def merge_lexemes(source, target, summary=None, **kwargs): +def merge_lexemes(source: str, target: str, summary: str = None, is_bot: bool = False, **kwargs: Any) -> Dict: """ A static method to merge two items - :param source: The QID which should be merged into another item - :type source: string with 'Q' prefix - :param target: The QID into which another item should be merged - :type target: string with 'Q' prefix + :param source: The ID to merge from. This parameter is required. + :param target: The ID to merge to. This parameter is required. + :param summary: Summary for the edit. + :param is_bot: Mark this edit as bot. """ params = { 'action': 'wblmergelexemes', 'fromid': source, 'toid': target, - 'format': 'json', - 'bot': '' + 'format': 'json' } if summary: params.update({'summary': summary}) - return mediawiki_api_call_helper(data=params, **kwargs) + if is_bot: + params.update({'bot': ''}) + return mediawiki_api_call_helper(data=params, is_bot=is_bot, **kwargs) -def remove_claims(claim_id, summary=None, revision=None, **kwargs): + +def remove_claims(claim_id: str, summary: str = None, baserevid: int = None, is_bot: bool = False, **kwargs: Any) -> Dict: """ Delete an item + :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. - :type claim_id: string :param summary: Summary for the edit. Will be prepended by an automatically generated comment. - :type summary: str - :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. - :type revision: str - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str + :param baserevid: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. + :param is_bot: Mark this edit as bot. """ - params = { + params: Dict[str, Union[str, int]] = { 'action': 'wbremoveclaims', 'claim': claim_id, - 'bot': '', 'format': 'json' } if summary: params.update({'summary': summary}) - if revision: - params.update({'revision': revision}) + if baserevid: + params.update({'baserevid': baserevid}) + + if is_bot: + params.update({'bot': ''}) return mediawiki_api_call_helper(data=params, **kwargs) -def search_entities(search_string, language=None, strict_language=True, search_type='item', max_results=500, dict_result=False, allow_anonymous=True, **kwargs): +def search_entities(search_string: str, language: str = None, strict_language: bool = True, search_type: str = 'item', max_results: int = 500, dict_result: bool = False, + allow_anonymous: bool = True, **kwargs: Any) -> List[Dict[str, Any]]: """ Performs a search for entities in the Wikibase instance using labels and aliases. + :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) - :type search_string: str :param language: The language in which to perform the search. - :type language: str :param strict_language: Whether to disable language fallback - :type strict_language: bool :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense - :type search_type: str - :param mediawiki_api_url: Specify the mediawiki_api_url. - :type mediawiki_api_url: str :param max_results: The maximum number of search results returned. Default 500 - :type max_results: int :param dict_result: - :type dict_result: boolean - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: The user agent string transmitted in the http header - :type user_agent: str - :return: list """ - language = config['DEFAULT_LANGUAGE'] if language is None else language + language = str(language or config['DEFAULT_LANGUAGE']) params = { 'action': 'wbsearchentities', @@ -382,20 +376,14 @@ def search_entities(search_string, language=None, strict_language=True, search_t return results -def generate_entity_instances(entities, allow_anonymous=True, **kwargs): +def generate_entity_instances(entities: Union[str, List[str]], allow_anonymous: bool = True, **kwargs: Any) -> List[Tuple[str, BaseEntity]]: """ A method which allows for retrieval of a list of Wikidata entities. The method generates a list of tuples where the first value in the tuple is the entity's ID, whereas the second is the new instance of a subclass of BaseEntity containing all the data of the entity. This is most useful for mass retrieval of entities. - :param user_agent: A custom user agent - :type user_agent: str + :param entities: A list of IDs. Item, Property or Lexeme. - :type entities: list, str - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool + :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. """ from wikibaseintegrator.entities.baseentity import BaseEntity @@ -424,7 +412,7 @@ def generate_entity_instances(entities, allow_anonymous=True, **kwargs): return entity_instances -def format_amount(amount) -> str: +def format_amount(amount: Union[int, str, float]) -> str: # Remove .0 by casting to int if float(amount) % 1 == 0: amount = int(float(amount)) @@ -437,7 +425,7 @@ def format_amount(amount) -> str: return str(amount) -def get_user_agent(user_agent): +def get_user_agent(user_agent: Optional[str]) -> str: from wikibaseintegrator import __version__ wbi_user_agent = f"WikibaseIntegrator/{__version__}" @@ -448,10 +436,9 @@ def get_user_agent(user_agent): return return_user_agent - -def __deepcopy__(memo): - # Don't return a copy of the module - # Deepcopy don't allow copy of modules (https://bugs.python.org/issue43093) - # It's really the good way to solve this? - from wikibaseintegrator import wikibaseintegrator - return wikibaseintegrator.wbi_helpers +# def __deepcopy__(memo): +# # Don't return a copy of the module +# # Deepcopy don't allow copy of modules (https://bugs.python.org/issue43093) +# # It's really the good way to solve this? +# from wikibaseintegrator import wikibaseintegrator +# return wikibaseintegrator.wbi_helpers diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index b65db775..afe72965 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -1,9 +1,12 @@ import time import webbrowser +from typing import Optional import requests from mwoauth import ConsumerToken, Handshaker, OAuthException from oauthlib.oauth2 import BackendApplicationClient, InvalidClientError +from requests import Session +from requests.cookies import RequestsCookieJar from requests_oauthlib import OAuth1, OAuth2, OAuth2Session from wikibaseintegrator.wbi_backoff import wbi_backoff @@ -21,48 +24,39 @@ class Login: """ @wbi_backoff() - def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api_url=None, mediawiki_index_url=None, mediawiki_rest_url=None, token_renew_period=1800, - consumer_token=None, consumer_secret=None, access_token=None, access_secret=None, callback_url='oob', user_agent=None, debug=False): + def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str = None, mediawiki_api_url: str = None, mediawiki_index_url: str = None, + mediawiki_rest_url: str = None, token_renew_period: int = 1800, consumer_token: str = None, consumer_secret: str = None, + access_token: str = None, access_secret: str = None, callback_url: str = 'oob', user_agent: str = None, debug: bool = False): """ This class handles several types of login procedures. Either use user and pwd authentication or OAuth. Wikidata clientlogin can also be used. If using one method, do NOT pass parameters for another method. :param user: the username which should be used for the login - :type user: str :param password: the password which should be used for the login - :type password: str :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server - :type token_renew_period: int :param consumer_token: The consumer key for OAuth - :type consumer_token: str :param consumer_secret: The consumer secret for OAuth - :type consumer_secret: str :param access_token: The access token for OAuth - :type access_token: str :param access_secret: The access secret for OAuth - :type access_secret: str :param callback_url: URL which should be used as the callback URL - :type callback_url: str :param user_agent: UA string to use for API requests. - :type user_agent: str - :return: None """ self.auth_method = auth_method self.consumer_token = consumer_token - self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL'] - self.mediawiki_index_url = mediawiki_index_url or config['MEDIAWIKI_INDEX_URL'] - self.mediawiki_rest_url = mediawiki_rest_url or config['MEDIAWIKI_REST_URL'] + self.mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + self.mediawiki_index_url = str(mediawiki_index_url or config['MEDIAWIKI_INDEX_URL']) + self.mediawiki_rest_url = str(mediawiki_rest_url or config['MEDIAWIKI_REST_URL']) self.token_renew_period = token_renew_period self.callback_url = callback_url - self.user_agent = get_user_agent(user_agent if user_agent else config['USER_AGENT']) + self.user_agent = get_user_agent(user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None)) if self.auth_method not in ['login', 'clientlogin', 'oauth1', 'oauth2']: raise ValueError("The auth_method must be 'login', 'clientlogin', 'oauth1' or 'oauth2'") self.session = requests.Session() - self.edit_token = None + self.edit_token: Optional[str] = None self.instantiation_time = time.time() - self.response_qs = None + self.response_qs: Optional[str] = None self.session.headers.update({ 'User-Agent': self.user_agent @@ -86,10 +80,10 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api else: # Oauth procedure, based on https://www.mediawiki.org/wiki/OAuth/For_Developers # Consruct a "consumer" from the key/secret provided by MediaWiki - self.consumer_token = ConsumerToken(self.consumer_token, consumer_secret) + self.oauth1_consumer_token = ConsumerToken(self.consumer_token, consumer_secret) # Construct handshaker with wiki URI and consumer - self.handshaker = Handshaker(mw_uri=self.mediawiki_index_url, consumer_token=self.consumer_token, callback=self.callback_url, user_agent=self.user_agent) + self.handshaker = Handshaker(mw_uri=self.mediawiki_index_url, consumer_token=self.oauth1_consumer_token, callback=self.callback_url, user_agent=self.user_agent) # Step 1: Initialize -- ask MediaWiki for a temp key/secret for user # redirect -> authorization -> callback url @@ -158,7 +152,7 @@ def __init__(self, auth_method='oauth2', user=None, password=None, mediawiki_api self.generate_edit_credentials() - def generate_edit_credentials(self): + def generate_edit_credentials(self) -> RequestsCookieJar: """ request an edit token and update the cookie_jar in order to add the session cookie :return: Returns a json with all relevant cookies, aka cookie jar @@ -176,7 +170,7 @@ def generate_edit_credentials(self): return self.session.cookies - def get_edit_cookie(self): + def get_edit_cookie(self) -> RequestsCookieJar: """ Can be called in order to retrieve the cookies from an instance of wbi_login.Login :return: Returns a json with all relevant cookies, aka cookie jar @@ -187,7 +181,7 @@ def get_edit_cookie(self): return self.session.cookies - def get_edit_token(self): + def get_edit_token(self) -> Optional[str]: """ Can be called in order to retrieve the edit token from an instance of wbi_login.Login :return: returns the edit token @@ -198,14 +192,14 @@ def get_edit_token(self): return self.edit_token - def get_session(self): + def get_session(self) -> Session: """ returns the requests session object used for the login. :return: Object of type requests.Session() """ return self.session - def continue_oauth(self, oauth_callback_data=None): + def continue_oauth(self, oauth_callback_data: str = None) -> None: """ Continuation of OAuth procedure. Method must be explicitly called in order to complete OAuth. This allows external entities, e.g. websites, to provide tokens through callback URLs directly. @@ -220,13 +214,17 @@ def continue_oauth(self, oauth_callback_data=None): self.response_qs = input("Callback URL: ") # input the url from redirect after authorization - response_qs = self.response_qs.split(b'?')[-1] + response_qs = self.response_qs.split('?')[-1] # Step 3: Complete -- obtain authorized key/secret for "resource owner" access_token = self.handshaker.complete(self.request_token, response_qs) + if self.oauth1_consumer_token is None: + raise ValueError("consumer_token can't be None") + # input the access token to return a csrf (edit) token - auth = OAuth1(self.consumer_token.key, client_secret=self.consumer_token.secret, resource_owner_key=access_token.key, resource_owner_secret=access_token.secret) + auth = OAuth1(client_key=self.oauth1_consumer_token.key, client_secret=self.oauth1_consumer_token.secret, resource_owner_key=access_token.key, + resource_owner_secret=access_token.secret) self.session.auth = auth self.generate_edit_credentials() diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index 781d7e7c..d42bf388 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -2,13 +2,12 @@ from wikibaseintegrator.entities.lexeme import Lexeme from wikibaseintegrator.entities.mediainfo import MediaInfo from wikibaseintegrator.entities.property import Property +from wikibaseintegrator.wbi_login import Login class WikibaseIntegrator: - def __init__(self, - is_bot=False, - login=None): + def __init__(self, is_bot: bool = False, login: Login = None): # Runtime variables self.is_bot = is_bot or False self.login = login From b118ff42f79896b7852cf6daa142d8339a7e97fb Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 26 Sep 2021 20:15:54 +0200 Subject: [PATCH 122/308] Fix some typos --- wikibaseintegrator/wbi_fastrun.py | 3 +-- wikibaseintegrator/wikibaseintegrator.py | 8 +++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 8582c589..cd3b5135 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -22,8 +22,7 @@ class FastRunContainer: def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None, - base_filter: Dict[str, str] = None, - use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): + base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): self.reconstructed_statements: List[BaseDataType] = [] self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set) self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set) diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py index d42bf388..05deca34 100644 --- a/wikibaseintegrator/wikibaseintegrator.py +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -1,8 +1,14 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + from wikibaseintegrator.entities.item import Item from wikibaseintegrator.entities.lexeme import Lexeme from wikibaseintegrator.entities.mediainfo import MediaInfo from wikibaseintegrator.entities.property import Property -from wikibaseintegrator.wbi_login import Login + +if TYPE_CHECKING: + from wikibaseintegrator.wbi_login import Login class WikibaseIntegrator: From a8aee02e41b58f2c9e645f4268cf2639a302867c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 27 Sep 2021 21:59:05 +0200 Subject: [PATCH 123/308] Prepare v0.12.0.dev6 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 4c68650c..6e4e5d65 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev5 +version = 0.12.0.dev6 author = Myst, WikibaseIntegrator authors and WikidataIntegrator authors license = MIT license_files = LICENSE From 831198213284ac019d9e8b6f4e7fbc3781ccc708 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:21:45 +0200 Subject: [PATCH 124/308] Fix test --- test/test_wbi_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index c0ddea30..042c52e9 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -66,7 +66,7 @@ def test_basedatatype_action_if_exists(self): claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31') if not x.removed] removed_claims = [True for x in item.claims.get('P31') if x.removed] # Append claims to item, replace already existing claims with new ones, only one if it's the same property number - assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 2 and True in removed_claims and claims.count('Q1234') == 1 + assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == len_claims_original and True in removed_claims and claims.count('Q1234') == 1 def test_description(self): item = wbi.item.get('Q2') From 880a6722378e423405ce44106404877665a7d27e Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:22:21 +0200 Subject: [PATCH 125/308] Add 3.11-dev to workflow --- .github/workflows/python-pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-pytest.yml b/.github/workflows/python-pytest.yml index dca4ef08..80c8f81a 100644 --- a/.github/workflows/python-pytest.yml +++ b/.github/workflows/python-pytest.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10-dev' ] + python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11-dev' ] steps: - uses: actions/checkout@v2 From 5fae7985a2af9b364f58899ada151cdb4131c5a5 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 13 Oct 2021 18:39:06 +0200 Subject: [PATCH 126/308] Fix missing version --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 6e4e5d65..b55e3f2e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = oauthlib~=3.1.1 requests~=2.26.0 simplejson~=3.17.5 -python_requires = >=3.7, <3.11 +python_requires = >=3.7, <=3.11 [options.extras_require] dev = From 3f21e7903066d1c539c171a511ea6d5a31742979 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 16 Oct 2021 18:53:35 +0200 Subject: [PATCH 127/308] Update IDEA project to 3.10 --- .idea/WikibaseIntegrator.iml | 2 +- .idea/misc.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index 47cb73f0..c3051748 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -13,7 +13,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index c430bbcb..80b44c9f 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + From de49240f45b8e0a193d658deaa8e2751752480f2 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 16 Oct 2021 18:53:57 +0200 Subject: [PATCH 128/308] Remove 3.11-dev Fail with pytest --- .github/workflows/python-pytest.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-pytest.yml b/.github/workflows/python-pytest.yml index 80c8f81a..4e1ccecd 100644 --- a/.github/workflows/python-pytest.yml +++ b/.github/workflows/python-pytest.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11-dev' ] + python-version: [ '3.7', '3.8', '3.9', '3.10' ] # '3.11-dev' pytest failing steps: - uses: actions/checkout@v2 From 4ae604ff30494711853ab98a672387fe4e46ff4d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 21 Oct 2021 18:40:09 +0200 Subject: [PATCH 129/308] Add item creation notebook --- notebooks/item_create_new.ipynb | 310 ++++++++++++++++++++++++++++++++ 1 file changed, 310 insertions(+) create mode 100644 notebooks/item_create_new.ipynb diff --git a/notebooks/item_create_new.ipynb b/notebooks/item_create_new.ipynb new file mode 100644 index 00000000..e30c88e0 --- /dev/null +++ b/notebooks/item_create_new.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3bc67e37", + "metadata": {}, + "source": [ + "# Create a new Item" + ] + }, + { + "cell_type": "markdown", + "id": "3646845d", + "metadata": {}, + "source": [ + "Load wikibaseintegrator" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "498b50c2", + "metadata": {}, + "outputs": [], + "source": [ + "from wikibaseintegrator import WikibaseIntegrator\n", + "from wikibaseintegrator import wbi_login\n", + "from wikibaseintegrator.datatypes import String\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "WDUSER = ''\n", + "WDPASS = ''" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Set default variables" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Create login and WikibaseIntegrator object" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Successfully logged in as MystBot\n" + ] + } + ], + "source": [ + "login = wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS,\n", + " mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "wbi = WikibaseIntegrator(login=login)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Create a new Item object" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5ce0df1e", + "metadata": {}, + "outputs": [], + "source": [ + "new_item = wbi.item.new()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Set labels" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.labels.set('en', 'New item')\n", + "new_item.labels.set('fr', 'Nouvel élément')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Set aliases" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": "], 'fr': []}>" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.aliases.set('en', 'Item')\n", + "new_item.aliases.set('fr', 'Élément')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Set descriptions" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.descriptions.set('en', 'A freshly created element')\n", + "new_item.descriptions.set('fr', 'Un élément fraichement créé')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Create a claim and add it to the new item" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": " _Snak__property_number='P31533' _Snak__hash=None _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references=>]}>" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_claim = String(prop_nr='P31533', value='A String property')\n", + "\n", + "new_item.claims.add(new_claim)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Write the new item to the Wikibase instance" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "29c22ef2", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "\n\t lastrevid=551951\n\t type='item'\n\t id='Q222825'\n\t claims= _Snak__property_number='P31533' _Snak__hash='112d32b098a091cc1398c779e76c763a523d4ffc' _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='Q222825$FE2928B1-3353-49D1-B6E2-C87ACCDCAB0D' _Claim__rank= _Claim__removed=False _Claim__references=>]}>\n\t fast_run_container=None\n\t debug=False\n\t labels=, 'fr': }>\n\t descriptions=, 'fr': }>\n\t aliases=], 'fr': []}>\n\t sitelinks=>" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.write()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file From d221031fdb1b0d1376eedb234027d0a929ed7247 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 21 Oct 2021 18:56:51 +0200 Subject: [PATCH 130/308] Update lexeme_write.ipynb --- notebooks/lexeme_write.ipynb | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/notebooks/lexeme_write.ipynb b/notebooks/lexeme_write.ipynb index 3eff937a..509fd43a 100644 --- a/notebooks/lexeme_write.ipynb +++ b/notebooks/lexeme_write.ipynb @@ -1,5 +1,14 @@ { "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Write a new Lexeme" + ], + "metadata": { + "collapsed": false + } + }, { "cell_type": "code", "execution_count": 1, From c70536ec80c6c8ed9b850d10196fc343be31af19 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 21 Oct 2021 20:33:40 +0200 Subject: [PATCH 131/308] Update WikibaseIntegrator.iml --- .idea/WikibaseIntegrator.iml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml index 461f0a35..a4941bcb 100644 --- a/.idea/WikibaseIntegrator.iml +++ b/.idea/WikibaseIntegrator.iml @@ -7,6 +7,9 @@ + + + From c0a49b17df7766d4a771727629fb73eced991ab2 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 10:37:35 +0200 Subject: [PATCH 132/308] Fix versions in GitHub workflows --- .github/workflows/python-lint.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 3031d17d..dfdd6b37 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -11,13 +11,13 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + - uses: actions/checkout@v2.3.5 + - name: Set up Python 3.9 + uses: actions/setup-python@v2.2.2 with: python-version: '3.9' - name: Cache pip - uses: actions/cache@v2 + uses: actions/cache@v2.1.6 with: path: ~/.cache/pip key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} From 0e7aef4dfe0d6583d2985c203fe72873f405d6bd Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 10:38:29 +0200 Subject: [PATCH 133/308] Code Quality and Lint with Python 3.10 --- .github/workflows/python-lint.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index dfdd6b37..1f9a2d65 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -12,10 +12,10 @@ jobs: steps: - uses: actions/checkout@v2.3.5 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v2.2.2 with: - python-version: '3.9' + python-version: '3.10' - name: Cache pip uses: actions/cache@v2.1.6 with: From 0a8a0411745831068d366291117a514eb98cee2d Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 10:50:27 +0200 Subject: [PATCH 134/308] Remove redundant code --- wikibaseintegrator/models/aliases.py | 11 ++-------- wikibaseintegrator/models/basemodel.py | 8 +++++++ wikibaseintegrator/models/claims.py | 21 +++---------------- wikibaseintegrator/models/forms.py | 21 +++---------------- wikibaseintegrator/models/language_values.py | 21 +++---------------- wikibaseintegrator/models/qualifiers.py | 11 ++-------- wikibaseintegrator/models/references.py | 21 +++---------------- wikibaseintegrator/models/senses.py | 21 +++---------------- wikibaseintegrator/models/sitelinks.py | 22 ++++---------------- wikibaseintegrator/models/snaks.py | 21 +++---------------- 10 files changed, 34 insertions(+), 144 deletions(-) create mode 100644 wikibaseintegrator/models/basemodel.py diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index cc9f168e..a97edae7 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -2,12 +2,13 @@ from typing import Dict, List, Optional, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.language_values import LanguageValue from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -class Aliases: +class Aliases(BaseModel): def __init__(self, language: str = None, value: str = None): self.aliases: Dict[str, str] = {} @@ -91,14 +92,6 @@ def from_json(self, json_data: Dict[str, list]) -> Aliases: # all_aliases = [item for sublist in list(self.aliases.values()) for item in sublist] # return item in list(map(lambda x: x.value, all_aliases)) - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - class Alias(LanguageValue): pass diff --git a/wikibaseintegrator/models/basemodel.py b/wikibaseintegrator/models/basemodel.py new file mode 100644 index 00000000..dbcbdf2e --- /dev/null +++ b/wikibaseintegrator/models/basemodel.py @@ -0,0 +1,8 @@ +class BaseModel: + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 8650a8a4..409bfe8f 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -3,13 +3,14 @@ import copy from typing import Any, Callable, Dict, List, Optional, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.qualifiers import Qualifiers from wikibaseintegrator.models.references import Reference, References from wikibaseintegrator.models.snaks import Snak, Snaks from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank -class Claims: +class Claims(BaseModel): def __init__(self): self.claims = {} @@ -107,16 +108,8 @@ def __iter__(self): iterate.extend(claim) return iter(iterate) - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Claim: +class Claim(BaseModel): DTYPE = 'claim' def __init__(self, qualifiers: Qualifiers = None, rank: WikibaseRank = None, references: Union[References, List[Union[Claim, List[Claim]]]] = None) -> None: @@ -287,14 +280,6 @@ def __eq__(self, other): return self.mainsnak.datavalue == other.mainsnak.datavalue and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) raise TypeError - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - def equals(self, that: Claim, include_ref: bool = False, fref: Callable = None) -> bool: """ Tests for equality of two statements. diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 60beccca..72a3ec0b 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -2,11 +2,12 @@ from typing import Any, Dict, List, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.claims import Claims from wikibaseintegrator.models.language_values import LanguageValues -class Forms: +class Forms(BaseModel): def __init__(self): self.forms = {} @@ -39,16 +40,8 @@ def get_json(self) -> List[Dict]: return json_data - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Form: +class Form(BaseModel): def __init__(self, form_id: str = None, representations: Representations = None, grammatical_features: Union[str, int, List[str]] = None, claims: Claims = None): self.id = form_id self.representations: Representations = representations or LanguageValues() @@ -119,14 +112,6 @@ def get_json(self) -> Dict[str, Union[str, Dict, List]]: return json_data - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - class Representations(LanguageValues): pass diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index 556afe7b..d5d8487d 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -2,11 +2,12 @@ from typing import Dict, Optional +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -class LanguageValues: +class LanguageValues(BaseModel): def __init__(self): self.values = {} @@ -65,16 +66,8 @@ def get_json(self) -> Dict[str, Dict]: def __iter__(self): return iter(self.values.values()) - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class LanguageValue: +class LanguageValue(BaseModel): def __init__(self, language: str, value: str = None): self.language = language self.value = value @@ -147,11 +140,3 @@ def __len__(self): def __str__(self): return self.value - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py index 7bc45596..31c6cd4c 100644 --- a/wikibaseintegrator/models/qualifiers.py +++ b/wikibaseintegrator/models/qualifiers.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.snaks import Snak from wikibaseintegrator.wbi_enums import ActionIfExists @@ -9,7 +10,7 @@ from wikibaseintegrator.models.claims import Claim -class Qualifiers: +class Qualifiers(BaseModel): def __init__(self): self.qualifiers = {} @@ -78,11 +79,3 @@ def __iter__(self): def __len__(self): return len(self.qualifiers) - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py index 1b82ee84..c0c1a1f3 100644 --- a/wikibaseintegrator/models/references.py +++ b/wikibaseintegrator/models/references.py @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.snaks import Snak, Snaks from wikibaseintegrator.wbi_enums import ActionIfExists @@ -9,7 +10,7 @@ from wikibaseintegrator.models.claims import Claim -class References: +class References(BaseModel): def __init__(self): self.references = [] @@ -77,16 +78,8 @@ def __iter__(self): def __len__(self): return len(self.references) - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Reference: +class Reference(BaseModel): def __init__(self, snaks: Snaks = None, snaks_order: List = None): self.hash = None self.snaks = snaks or Snaks() @@ -148,11 +141,3 @@ def __iter__(self): def __len__(self): return len(self.snaks) - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py index 73285a96..42896ecc 100644 --- a/wikibaseintegrator/models/senses.py +++ b/wikibaseintegrator/models/senses.py @@ -2,12 +2,13 @@ from typing import Any, Dict, List, Optional, Union +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.models.claims import Claims from wikibaseintegrator.models.language_values import LanguageValues from wikibaseintegrator.wbi_enums import ActionIfExists -class Senses: +class Senses(BaseModel): def __init__(self): self.senses = [] @@ -36,16 +37,8 @@ def get_json(self) -> List[Dict]: return json_data - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Sense: +class Sense(BaseModel): def __init__(self, sense_id: str = None, glosses: Glosses = None, claims: Claims = None): self.id = sense_id self.glosses: LanguageValues = glosses or Glosses() @@ -79,14 +72,6 @@ def remove(self) -> Sense: self.removed = True return self - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - class Glosses(LanguageValues): pass diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py index ac95fc54..e833c44c 100644 --- a/wikibaseintegrator/models/sitelinks.py +++ b/wikibaseintegrator/models/sitelinks.py @@ -2,8 +2,10 @@ from typing import Dict, List, Optional +from wikibaseintegrator.models.basemodel import BaseModel -class Sitelinks: + +class Sitelinks(BaseModel): def __init__(self): self.sitelinks: Dict[str, Sitelink] = {} @@ -24,16 +26,8 @@ def from_json(self, json_data: Dict[str, Dict]) -> Sitelinks: return self - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Sitelink: +class Sitelink(BaseModel): def __init__(self, site: str = None, title: str = None, badges: List[str] = None): self.site = site self.title = title @@ -41,11 +35,3 @@ def __init__(self, site: str = None, title: str = None, badges: List[str] = None def __str__(self): return self.title - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index 6dfb1dd9..4358137a 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -3,10 +3,11 @@ import re from typing import Any, Dict +from wikibaseintegrator.models.basemodel import BaseModel from wikibaseintegrator.wbi_enums import WikibaseSnakType -class Snaks: +class Snaks(BaseModel): def __init__(self): self.snaks = {} @@ -48,16 +49,8 @@ def __iter__(self): def __len__(self): return len(self.snaks) - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - -class Snak: +class Snak(BaseModel): def __init__(self, snaktype: WikibaseSnakType = WikibaseSnakType.KNOWN_VALUE, property_number: str = None, hash: str = None, datavalue: Dict = None, datatype: str = None): self.snaktype = snaktype self.property_number = property_number @@ -145,11 +138,3 @@ def get_json(self) -> Dict[str, str]: def __eq__(self, other): return self.snaktype == other.snaktype and self.property_number == other.property_number and self.datatype == other.datatype and self.datavalue == other.datavalue - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) From 307e69793a5c5cb5761b6c38f8333b9c635fe39c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 11:22:59 +0200 Subject: [PATCH 135/308] Fix consider-using-dict-items pylint issue --- wikibaseintegrator/models/aliases.py | 4 ++-- wikibaseintegrator/models/claims.py | 4 ++-- wikibaseintegrator/models/forms.py | 6 +++--- wikibaseintegrator/models/language_values.py | 4 ++-- wikibaseintegrator/models/snaks.py | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py index a97edae7..72cd15b8 100644 --- a/wikibaseintegrator/models/aliases.py +++ b/wikibaseintegrator/models/aliases.py @@ -74,10 +74,10 @@ def set(self, language: str = None, values: Union[str, list] = None, action_if_e def get_json(self) -> Dict[str, list]: json_data: Dict[str, list] = {} - for language in self.aliases: + for language, aliases in self.aliases.items(): if language not in json_data: json_data[language] = [] - for alias in self.aliases[language]: + for alias in aliases: json_data[language].append(alias.get_json()) return json_data diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 409bfe8f..2378fdd3 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -89,10 +89,10 @@ def from_json(self, json_data: Dict[str, Any]) -> Claims: def get_json(self) -> Dict[str, list]: json_data: Dict[str, list] = {} - for property in self.claims: + for property, claims in self.claims.items(): if property not in json_data: json_data[property] = [] - for claim in self.claims[property]: + for claim in claims: json_data[property].append(claim.get_json()) return json_data diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index 72a3ec0b..e13ff8de 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -12,7 +12,7 @@ def __init__(self): self.forms = {} @property - def forms(self): + def forms(self) -> Dict: return self.__forms @forms.setter @@ -35,8 +35,8 @@ def from_json(self, json_data: List[Dict]) -> Forms: def get_json(self) -> List[Dict]: json_data: List[Dict] = [] - for form in self.forms: - json_data.append(self.forms[form].get_json()) + for key, form in self.forms.items(): + json_data.append(form.get_json()) return json_data diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py index d5d8487d..aa2ba814 100644 --- a/wikibaseintegrator/models/language_values.py +++ b/wikibaseintegrator/models/language_values.py @@ -58,8 +58,8 @@ def from_json(self, json_data: Dict[str, Dict]) -> LanguageValues: def get_json(self) -> Dict[str, Dict]: json_data: Dict[str, Dict] = {} - for value in self.values: - json_data[value] = self.values[value].get_json() + for language, language_value in self.values.items(): + json_data[language] = language_value.get_json() return json_data diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py index 4358137a..ffea3d9e 100644 --- a/wikibaseintegrator/models/snaks.py +++ b/wikibaseintegrator/models/snaks.py @@ -33,10 +33,10 @@ def from_json(self, json_data: Dict[str, list]) -> Snaks: def get_json(self) -> Dict[str, list]: json_data: Dict[str, list] = {} - for property in self.snaks: + for property, snaks in self.snaks.items(): if property not in json_data: json_data[property] = [] - for snak in self.snaks[property]: + for snak in snaks: json_data[property].append(snak.get_json()) return json_data From ac0d11371dfd932ec49fc55f105eba19503c6b7b Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 11:23:25 +0200 Subject: [PATCH 136/308] Fix use-dict-literal pylint issue --- test/test_wbi_fastrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index fa4305d7..40ce8a9b 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -116,7 +116,7 @@ def __init__(self, *args: Any, **kwargs: Any): self.prop_data['Q14911732'] = {'P594': { 'fake statement id': { 'qual': set(), - 'ref': dict(), + 'ref': {}, 'v': 'ENSG00000123374'}}} self.rev_lookup = defaultdict(set) self.rev_lookup['ENSG00000123374'].add('Q14911732') From 96792291ffd085220d7cfbe2b0c89bed5c3b297c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 11:24:38 +0200 Subject: [PATCH 137/308] Fix unneeded-not pylint issue --- wikibaseintegrator/wbi_fastrun.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index cd3b5135..c4742eb0 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -319,7 +319,7 @@ def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_ty all_lang_strings = {x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)} if action_if_exists == ActionIfExists.REPLACE: - return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) + return collections.Counter(all_lang_strings) != collections.Counter(map(lambda x: x.casefold(), lang_data)) for s in lang_data: if s.strip().casefold() not in all_lang_strings: From a9767dc38c7183810b4ef51d438246bd70ff5d4f Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 24 Oct 2021 12:12:57 +0200 Subject: [PATCH 138/308] Pylint improvements --- pyproject.toml | 16 ++++++++++++- wikibaseintegrator/entities/baseentity.py | 4 ++-- wikibaseintegrator/models/basemodel.py | 2 +- wikibaseintegrator/models/forms.py | 2 +- wikibaseintegrator/wbi_backoff.py | 4 ++-- wikibaseintegrator/wbi_config.py | 4 ++-- wikibaseintegrator/wbi_fastrun.py | 28 +++++++++++------------ wikibaseintegrator/wbi_helpers.py | 10 ++++---- wikibaseintegrator/wbi_login.py | 18 +++++++-------- 9 files changed, 49 insertions(+), 39 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4913d438..8606b543 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,5 +12,19 @@ line_length = 179 ignore_missing_imports = true files = "wikibaseintegrator/**/*.py,test/*.py" -[tool.pylint.format] +[tool.pylint.messages_control] max-line-length = 179 +disable = [ + "fixme", + "missing-docstring", + "redefined-builtin", + "invalid-name", # To remove later + "too-few-public-methods", + "too-many-arguments", + "too-many-statements", + "too-many-locals", + "too-many-branches", + "too-many-instance-attributes", + "import-outside-toplevel" # To remove later +] + diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 213f4288..31350901 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -79,7 +79,7 @@ def from_json(self, json_data: Dict[str, Any]) -> BaseEntity: return self # noinspection PyMethodMayBeStatic - def _get(self, entity_id: str, **kwargs: Any) -> Dict: + def _get(self, entity_id: str, **kwargs: Any) -> Dict: # pylint: disable=no-self-use """ retrieve an item in json representation from the Wikibase instance @@ -224,7 +224,7 @@ def write_required(self, base_filter: Dict[str, str] = None, **kwargs: Any) -> b def __repr__(self): """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), diff --git a/wikibaseintegrator/models/basemodel.py b/wikibaseintegrator/models/basemodel.py index dbcbdf2e..abbb5d60 100644 --- a/wikibaseintegrator/models/basemodel.py +++ b/wikibaseintegrator/models/basemodel.py @@ -1,7 +1,7 @@ class BaseModel: def __repr__(self): """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py index e13ff8de..8a613315 100644 --- a/wikibaseintegrator/models/forms.py +++ b/wikibaseintegrator/models/forms.py @@ -35,7 +35,7 @@ def from_json(self, json_data: List[Dict]) -> Forms: def get_json(self) -> List[Dict]: json_data: List[Dict] = [] - for key, form in self.forms.items(): + for _, form in self.forms.items(): json_data.append(form.get_json()) return json_data diff --git a/wikibaseintegrator/wbi_backoff.py b/wikibaseintegrator/wbi_backoff.py index 4836fc62..77347daf 100644 --- a/wikibaseintegrator/wbi_backoff.py +++ b/wikibaseintegrator/wbi_backoff.py @@ -12,7 +12,7 @@ def wbi_backoff_backoff_hdlr(details): exc_type, exc_value, _ = sys.exc_info() if exc_type == JSONDecodeError: print(exc_value.doc) # pragma: no cover - print("Backing off {wait:0.1f} seconds afters {tries} tries calling function with args {args} and kwargs {kwargs}".format(**details)) + print("Backing off {wait:0.1f} seconds afters {tries} tries calling function with args {args} and kwargs {kwargs}".format(**details)) # pylint: disable=consider-using-f-string def wbi_backoff_check_json_decode_error(e): @@ -22,7 +22,7 @@ def wbi_backoff_check_json_decode_error(e): :param e: :return: """ - return type(e) == JSONDecodeError and str(e) != "Expecting value: line 1 column 1 (char 0)" + return isinstance(e, JSONDecodeError) and str(e) != "Expecting value: line 1 column 1 (char 0)" wbi_backoff_exceptions = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError) diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index 42f4327f..92d4fac1 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -1,5 +1,3 @@ -from typing import Dict, Union - """ Config global options Options can be changed at run time. See tests/test_backoff.py for usage example @@ -14,6 +12,8 @@ See: https://meta.wikimedia.org/wiki/User-Agent_policy """ +from typing import Dict, Union + config: Dict[str, Union[str, int, None, bool]] = { 'BACKOFF_MAX_TRIES': 5, 'BACKOFF_MAX_VALUE': 3600, diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index c4742eb0..e8b35fd0 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -234,10 +234,8 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi bool_vec.append(x.equals(date, include_ref=self.use_refs)) else: bool_vec.append(False) - """ - bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and - x.mainsnak.property_number not in del_props for x in tmp_rs] - """ + # bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and + # x.mainsnak.property_number not in del_props for x in tmp_rs] if self.debug: print(f"bool_vec: {bool_vec}") @@ -383,7 +381,7 @@ def format_query_results(self, r: List, prop_nr: str) -> None: # Note: no-value and some-value don't actually show up in the results here # see for example: select * where { wd:Q7207 p:P40 ?c . ?c ?d ?e } - if type(i['v']) is not dict: + if not isinstance(i['v'], dict): self.rev_lookup[i['v']].add(i['item']) if self.case_insensitive: self.rev_lookup_ci[i['v'].casefold()].add(i['item']) @@ -449,11 +447,11 @@ def _query_data(self, prop_nr: str, use_units: bool = False) -> None: num_pages = None if self.debug: # get the number of pages/queries so we can show a progress bar - query = """ + query = f""" SELECT (COUNT(?item) as ?c) where {{ - {base_filter} - ?item <{wb_url}/prop/{prop_nr}> ?sid . - }}""".format(wb_url=self.wikibase_url, base_filter=self.base_filter_string, prop_nr=prop_nr) + {self.base_filter_string} + ?item <{self.wikibase_url}/prop/{prop_nr}> ?sid . + }}""" if self.debug: print(query) @@ -576,16 +574,16 @@ def _query_lang(self, lang: str, lang_data_type: str) -> Optional[List[Dict[str, 'aliases': 'skos:altLabel' } - query = ''' + query = f''' #Tool: WikibaseIntegrator wbi_fastrun._query_lang SELECT ?item ?label WHERE {{ - {base_filter} + {self.base_filter_string} OPTIONAL {{ - ?item {lang_data_type} ?label FILTER (lang(?label) = "{lang}") . + ?item {lang_data_type_dict[lang_data_type]} ?label FILTER (lang(?label) = "{lang}") . }} }} - '''.format(base_filter=self.base_filter_string, lang_data_type=lang_data_type_dict[lang_data_type], lang=lang) + ''' if self.debug: print(query) @@ -602,7 +600,7 @@ def _process_lang(result: List) -> defaultdict[str, set]: return data @lru_cache(maxsize=100000) - def get_prop_datatype(self, prop_nr: str) -> Optional[str]: + def get_prop_datatype(self, prop_nr: str) -> Optional[str]: # pylint: disable=no-self-use from wikibaseintegrator import WikibaseIntegrator wbi = WikibaseIntegrator() property = wbi.property.get(prop_nr) @@ -619,7 +617,7 @@ def clear(self) -> None: def __repr__(self) -> str: """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index f71c6357..3b5b6b5e 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -68,11 +68,9 @@ def mediawiki_api_call(method: str, mediawiki_api_url: str = None, session: Sess response.raise_for_status() json_data = response.json() - """ - Mediawiki api response has code = 200 even if there are errors. - rate limit doesn't return HTTP 429 either. may in the future - https://phabricator.wikimedia.org/T172293 - """ + # Mediawiki api response has code = 200 even if there are errors. + # Rate limit doesn't return HTTP 429 either, may in the future. + # https://phabricator.wikimedia.org/T172293 if 'error' in json_data: # rate limiting error_msg_names = set() @@ -391,7 +389,7 @@ def generate_entity_instances(entities: Union[str, List[str]], allow_anonymous: if isinstance(entities, str): entities = [entities] - assert type(entities) == list + assert isinstance(entities, list) params = { 'action': 'wbgetentities', diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index afe72965..34f00118 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -1,3 +1,7 @@ +""" +Login class for Wikidata. Takes username and password and stores the session cookies and edit tokens. +""" + import time import webbrowser from typing import Optional @@ -13,10 +17,6 @@ from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_helpers import get_user_agent -""" -Login class for Wikidata. Takes username and password and stores the session cookies and edit tokens. -""" - class Login: """ @@ -119,7 +119,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str if 'login' in login_result and login_result['login']['result'] == 'Success': print("Successfully logged in as", login_result['login']['lgusername']) else: - raise LoginError("Login failed. Reason: '{}'".format(login_result['login']['reason'])) + raise LoginError(f"Login failed. Reason: '{login_result['login']['reason']}'") else: params = { 'action': 'clientlogin', @@ -138,17 +138,17 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str if 'clientlogin' in login_result: clientlogin = login_result['clientlogin'] if clientlogin['status'] != 'PASS': - raise LoginError("Login failed ({}). Message: '{}'".format(clientlogin['messagecode'], clientlogin['message'])) + raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") if debug: print("Successfully logged in as", clientlogin['username']) else: - raise LoginError("Login failed ({}). Message: '{}'".format(login_result['error']['code'], login_result['error']['info'])) + raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") if 'warnings' in login_result: print("MediaWiki login warnings messages:") for message in login_result['warnings']: - print("* {}: {}".format(message, login_result['warnings'][message]['*'])) + print(f"* {message}: {login_result['warnings'][message]['*']}") self.generate_edit_credentials() @@ -165,7 +165,7 @@ def generate_edit_credentials(self) -> RequestsCookieJar: } response = self.session.get(self.mediawiki_api_url, params=params).json() if 'error' in response: - raise LoginError("Login failed ({}). Message: '{}'".format(response['error']['code'], response['error']['info'])) + raise LoginError(f"Login failed ({response['error']['code']}). Message: '{response['error']['info']}'") self.edit_token = response['query']['tokens']['csrftoken'] return self.session.cookies From a80cb71ae229acb5b5b47751ae1edd343571430c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 17 Nov 2021 13:18:03 +0100 Subject: [PATCH 139/308] Add missing extra_require --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 38d83ab1..18d9caaa 100644 --- a/setup.py +++ b/setup.py @@ -12,7 +12,12 @@ "simplejson ~= 3.17.5" ], extras_require={ - "dev": ["pytest"], + "dev": [ + "pytest", + "pylint", + "pylint-exit", + "mypy" + ], "coverage": ["pytest-cov"], }, ) From d0f208a610af094533707568c5bd51d1d493d2b6 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 17 Nov 2021 13:23:26 +0100 Subject: [PATCH 140/308] Add missing install_requires and update version --- requirements.txt | 2 +- setup.cfg | 4 ++-- setup.py | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 089ff763..c859d7be 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ backoff~=1.11.1 -frozendict~=2.0.6 +frozendict~=2.0.7 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 diff --git a/setup.cfg b/setup.cfg index 58bfdea8..14c49e86 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,11 +33,11 @@ classifiers = packages = find: install_requires = backoff~=1.11.1 - frozendict~=2.0.6 + frozendict~=2.0.7 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 - simplejson~=3.17.5 + simplejson~=3.17.6 python_requires = >=3.7, <=3.11 [options.extras_require] diff --git a/setup.py b/setup.py index 18d9caaa..b14a34a8 100644 --- a/setup.py +++ b/setup.py @@ -6,10 +6,11 @@ name="wikibaseintegrator", install_requires=[ "backoff ~= 1.11.1", + "frozendict ~= 2.0.7", "mwoauth ~= 0.3.7", "oauthlib ~= 3.1.1", "requests ~= 2.26.0", - "simplejson ~= 3.17.5" + "simplejson ~= 3.17.6" ], extras_require={ "dev": [ From 76340d051321c5b11fec10e7b98d766137931ac1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 17 Nov 2021 14:45:04 +0100 Subject: [PATCH 141/308] Rework FastRun for using WBI DataModel (#249) * Rework FastRun for using WBI DataModel * base_filter use a list of BaseDataType * Add a set_value() function in BaseDataType classes * Fix multiple _get_sparql_value() * Comment fr_search() * Dont query datatype if already available * Add support for subclasses is FastRunContainer * Remove unwanted require --- requirements.txt | 1 - setup.cfg | 1 - setup.py | 1 - test/test_all.py | 6 +- test/test_entity_item.py | 10 +- test/test_wbi_fastrun.py | 37 +++-- wikibaseintegrator/datatypes/basedatatype.py | 26 ++- wikibaseintegrator/datatypes/commonsmedia.py | 15 ++ wikibaseintegrator/datatypes/form.py | 2 + wikibaseintegrator/datatypes/geoshape.py | 2 + .../datatypes/globecoordinate.py | 39 ++++- wikibaseintegrator/datatypes/item.py | 4 +- wikibaseintegrator/datatypes/lexeme.py | 2 + .../datatypes/monolingualtext.py | 12 ++ wikibaseintegrator/datatypes/property.py | 2 + wikibaseintegrator/datatypes/quantity.py | 9 +- wikibaseintegrator/datatypes/sense.py | 6 +- wikibaseintegrator/datatypes/string.py | 2 + wikibaseintegrator/datatypes/tabulardata.py | 2 + wikibaseintegrator/datatypes/time.py | 2 + wikibaseintegrator/datatypes/url.py | 15 ++ wikibaseintegrator/entities/baseentity.py | 28 ++-- wikibaseintegrator/models/claims.py | 9 +- wikibaseintegrator/wbi_fastrun.py | 156 +++++++++++------- 24 files changed, 276 insertions(+), 113 deletions(-) diff --git a/requirements.txt b/requirements.txt index c859d7be..c6a13f5c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,4 @@ backoff~=1.11.1 -frozendict~=2.0.7 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 diff --git a/setup.cfg b/setup.cfg index 14c49e86..d39a2c2f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -33,7 +33,6 @@ classifiers = packages = find: install_requires = backoff~=1.11.1 - frozendict~=2.0.7 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 diff --git a/setup.py b/setup.py index b14a34a8..fbd8e466 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,6 @@ name="wikibaseintegrator", install_requires=[ "backoff ~= 1.11.1", - "frozendict ~= 2.0.7", "mwoauth ~= 0.3.7", "oauthlib ~= 3.1.1", "requests ~= 2.26.0", diff --git a/test/test_all.py b/test/test_all.py index 07cb85b8..db179555 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -59,7 +59,7 @@ def test_fast_run(self): datatypes.ExternalID(value='YER158C', prop_nr='P705') ] - frc = wbi_fastrun.FastRunContainer(base_filter={'P352': '', 'P703': 'Q27510868'}, base_data_type=datatypes.BaseDataType) + frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType) fast_run_result = frc.write_required(data=statements) @@ -75,13 +75,13 @@ def test_fast_run(self): def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language - fast_run_base_filter = {'P361': 'Q18589965'} + fast_run_base_filter = [datatypes.Item(prop_nr='P361', value='Q18589965')] item = WikibaseIntegrator().item.get('Q2') item.init_fastrun(base_filter=fast_run_base_filter) item.init_fastrun(base_filter=fast_run_base_filter) # Test if we found the same FastRunContainer item.claims.add(datatypes.ExternalID(value='/m/02j71', prop_nr='P646')) - frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=BaseDataType) + frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=datatypes.BaseDataType) assert item.labels.get(language='en') == "Earth" descr = item.descriptions.get(language='en') diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 33c2f410..1cc38677 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -3,7 +3,7 @@ from simplejson import JSONDecodeError from wikibaseintegrator import WikibaseIntegrator -from wikibaseintegrator.datatypes import Item +from wikibaseintegrator.datatypes import BaseDataType, Item wbi = WikibaseIntegrator() @@ -38,17 +38,17 @@ def test_write(self): wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') def test_write_not_required(self): - assert not wbi.item.get('Q582').write_required(base_filter={'P1791': ''}) + assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')]) def test_write_required(self): item = wbi.item.get('Q582') item.claims.add(Item(prop_nr='P1791', value='Q42')) - assert item.write_required(base_filter={'P1791': ''}) + assert item.write_required([BaseDataType(prop_nr='P1791')]) def test_write_not_required_ref(self): - assert not wbi.item.get('Q582').write_required(base_filter={'P2581': ''}, use_refs=True) + assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True) def test_write_required_ref(self): item = wbi.item.get('Q582') item.claims.get('P2581')[0].references.references.pop() - assert item.write_required(base_filter={'P2581': ''}, use_refs=True) + assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True) diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index 40ce8a9b..e37cd829 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -19,7 +19,7 @@ def test_query_data(): This tests that the fast run container correctly queries data from wikidata and stores it in the appropriate format without getting references """ - frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=BaseDataType) + frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=BaseDataType) # get a string value frc._query_data('P699') # wikidata-item value @@ -35,14 +35,14 @@ def test_query_data(): d = frc.prop_data['Q10874']['P699'][statement_id] # d looks like: {'qual': set(), 'ref': {}, 'v': 'DOID:1432'} assert all(x in d for x in {'qual', 'ref', 'v'}) - assert frc.prop_data['Q10874']['P699'][statement_id]['v'].startswith('DOID:') + assert frc.prop_data['Q10874']['P699'][statement_id]['v'].startswith('"DOID:') # item assert list(frc.prop_data['Q10874']['P828'].values())[0]['v'] == "Q18228398" # uri v = {x['v'] for x in frc.prop_data['Q10874']['P2888'].values()} - assert all(y.startswith("http") for y in v) + assert all(y.startswith(" 0 ref_id = list(d['ref'].keys())[0] ref = d['ref'][ref_id] @@ -102,11 +102,14 @@ def __init__(self, *args: Any, **kwargs: Any): 'fake statement id': { 'qual': set(), 'ref': {'fake ref id': { - ('P248', 'Q29458763'), # stated in ensembl Release 88 - ('P594', 'ENSG00000123374')}}, - 'v': 'ENSG00000123374'}}} + ('P248', + 'Q106833387'), + ('P594', + 'ENSG00000123374')}}, + 'unit': '1', + 'v': '"ENSG00000123374"'}}} self.rev_lookup = defaultdict(set) - self.rev_lookup['ENSG00000123374'].add('Q14911732') + self.rev_lookup['"ENSG00000123374"'].add('Q14911732') class FastRunContainerFakeQueryDataEnsemblNoRef(wbi_fastrun.FastRunContainer): @@ -119,12 +122,12 @@ def __init__(self, *args: Any, **kwargs: Any): 'ref': {}, 'v': 'ENSG00000123374'}}} self.rev_lookup = defaultdict(set) - self.rev_lookup['ENSG00000123374'].add('Q14911732') + self.rev_lookup['"ENSG00000123374"'].add('Q14911732') def test_fastrun_ref_ensembl(): # fastrun checks refs - frc = FastRunContainerFakeQueryDataEnsembl(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) + frc = FastRunContainerFakeQueryDataEnsembl(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True) frc.debug = True # statement has no ref @@ -132,7 +135,7 @@ def test_fastrun_ref_ensembl(): assert frc.write_required(data=statements) # statement has the same ref - statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q29458763", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q106833387", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])] assert not frc.write_required(data=statements) # new statement has an different stated in @@ -140,12 +143,14 @@ def test_fastrun_ref_ensembl(): assert frc.write_required(data=statements) # fastrun don't check references, statement has no reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=False) + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, + use_refs=False) statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')] assert not frc.write_required(data=statements) # fastrun don't check references, statement has reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=False) + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, + use_refs=False) statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q123", prop_nr="P31")]])] assert not frc.write_required(data=statements) @@ -187,7 +192,7 @@ def test_append_props(): # https://www.wikidata.org/wiki/Q3402672#P527 # don't consider refs - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType) + frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType) # with append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, action_if_exists=ActionIfExists.APPEND, cqid=qid) is False @@ -199,7 +204,7 @@ def test_append_props(): assert frc.write_required(data=statements, cqid=qid) is True # if we are in append mode, and the refs are different, we should write - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=BaseDataType, use_refs=True) + frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True) # with append statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid, action_if_exists=ActionIfExists.APPEND) is True diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 8e7f3bce..33518525 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from typing import Any, List, Type, Union from wikibaseintegrator.models import Claim @@ -35,5 +36,28 @@ def __init_subclass__(cls, **kwargs): super().__init_subclass__(**kwargs) cls.subclasses.append(cls) + def set_value(self, value: Any = None): + pass + def _get_sparql_value(self) -> str: - return self.mainsnak.datavalue['value'] + return '"' + self.mainsnak.datavalue['value'] + '"' + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + if type == 'uri': + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + elif type == 'literal': + pattern = re.compile(r'^"?(.*?)"?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + else: + raise ValueError + + return True diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py index 177b9802..caa4cd58 100644 --- a/wikibaseintegrator/datatypes/commonsmedia.py +++ b/wikibaseintegrator/datatypes/commonsmedia.py @@ -1,3 +1,6 @@ +import re +import urllib.parse + from wikibaseintegrator.datatypes.string import String @@ -6,3 +9,15 @@ class CommonsMedia(String): Implements the Wikibase data type for Wikimedia commons media files """ DTYPE = 'commonsMedia' + + def _get_sparql_value(self) -> str: + return '<' + self.mainsnak.datavalue['value'] + '>' + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=urllib.parse.unquote(matches.group(1))) + return True diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index 7af179bc..105c01b0 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -35,7 +35,9 @@ def __init__(self, value: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py index 34ded741..3e013387 100644 --- a/wikibaseintegrator/datatypes/geoshape.py +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -41,7 +41,9 @@ def __init__(self, value: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 35483e09..b866e487 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -1,6 +1,8 @@ +import re from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.models import Claim from wikibaseintegrator.wbi_config import config @@ -16,19 +18,23 @@ class GlobeCoordinate(BaseDataType): }} ''' - def __init__(self, latitude: float = None, longitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None, **kwargs: Any): + def __init__(self, latitude: float = None, longitude: float = None, altitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None, + **kwargs: Any): """ Constructor, calls the superclass BaseDataType :param latitude: Latitute in decimal format :param longitude: Longitude in decimal format + :param altitude: Altitude (in decimal format?) (Always None at this moment) :param precision: Precision of the position measurement, default 1 / 3600 :param globe: The globe entity concept URI (ex: http://www.wikidata.org/entity/Q2) or 'Q2' :param wikibase_url: The default wikibase URL, used when the globe is only an ID like 'Q2'. Use wbi_config['WIKIBASE_URL'] by default. """ super().__init__(**kwargs) + self.set_value(latitude=latitude, longitude=longitude, altitude=altitude, precision=precision, globe=globe, wikibase_url=wikibase_url) + def set_value(self, latitude: float = None, longitude: float = None, altitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None): # https://github.com/wikimedia/Wikibase/blob/174450de8fdeabcf97287604dbbf04d07bb5000c/repo/includes/Rdf/Values/GlobeCoordinateRdfBuilder.php#L120 precision = precision or 1 / 3600 globe = globe or str(config['COORDINATE_GLOBE_QID']) @@ -40,7 +46,7 @@ def __init__(self, latitude: float = None, longitude: float = None, precision: f # TODO: Introduce validity checks for coordinates, etc. # TODO: Add check if latitude/longitude/precision is None - if latitude and longitude: + if latitude is not None and longitude is not None: if latitude < -90 or latitude > 90: raise ValueError(f"latitude must be between -90 and 90, got '{latitude}'") if longitude < -180 or longitude > 180: @@ -50,11 +56,38 @@ def __init__(self, latitude: float = None, longitude: float = None, precision: f 'value': { 'latitude': latitude, 'longitude': longitude, + 'altitude': altitude, 'precision': precision, 'globe': globe }, 'type': 'globecoordinate' } + def __eq__(self, other): + if isinstance(other, Claim) and other.mainsnak.datavalue['type'] == 'globecoordinate': + tmp_datavalue_self = self.mainsnak.datavalue + tmp_datavalue_other = other.mainsnak.datavalue + + tmp_datavalue_self['value']['latitude'] = round(tmp_datavalue_self['value']['latitude'], 6) + tmp_datavalue_self['value']['longitude'] = round(tmp_datavalue_self['value']['longitude'], 6) + tmp_datavalue_self['value']['precision'] = round(tmp_datavalue_self['value']['precision'], 17) + + tmp_datavalue_other['value']['latitude'] = round(tmp_datavalue_other['value']['latitude'], 6) + tmp_datavalue_other['value']['longitude'] = round(tmp_datavalue_other['value']['longitude'], 6) + tmp_datavalue_other['value']['precision'] = round(tmp_datavalue_other['value']['precision'], 17) + + return tmp_datavalue_self == tmp_datavalue_other and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) + + return super().__eq__(other) + def _get_sparql_value(self) -> str: - return 'Point(' + str(self.mainsnak.datavalue['value']['latitude']) + ', ' + str(self.mainsnak.datavalue['value']['longitude']) + ')' + return '"Point(' + str(self.mainsnak.datavalue['value']['longitude']) + ' ' + str(self.mainsnak.datavalue['value']['latitude']) + ')"' + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^"?Point\((.*) (.*)\)"?(?:\^\^geo:wktLiteral)?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(longitude=float(matches.group(1)), latitude=float(matches.group(2))) + return True diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index c9690446..294f0524 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -24,7 +24,9 @@ def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: Union[str, int] = None): assert isinstance(value, (str, int)) or value is None, f'Expected str or int, found {type(value)} ({value})' if value: @@ -47,4 +49,4 @@ def __init__(self, value: Union[str, int] = None, **kwargs: Any): } def _get_sparql_value(self) -> str: - return self.mainsnak.datavalue['value']['id'] + return '<{wb_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>' diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index bb8eb622..71728e87 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -24,7 +24,9 @@ def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: Union[str, int] = None): assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index a5e9447b..5c4eb776 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -1,3 +1,4 @@ +import re from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -25,7 +26,9 @@ def __init__(self, text: str = None, language: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(text=text, language=language) + def set_value(self, text: str = None, language: str = None): language = language or str(config['DEFAULT_LANGUAGE']) assert isinstance(text, str) or text is None, f"Expected str, found {type(text)} ({text})" @@ -42,3 +45,12 @@ def __init__(self, text: str = None, language: str = None, **kwargs: Any): def _get_sparql_value(self) -> str: return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language'] + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^"(.*?)"@([a-z\-]*)$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(text=matches.group(1), language=matches.group(2)) + return True diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index 6069459a..a2241b25 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -25,7 +25,9 @@ def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: Union[str, int] = None): assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 014c8c3c..a7264181 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -31,7 +31,10 @@ def __init__(self, amount: Union[str, int, float] = None, upper_bound: Union[str """ super().__init__(**kwargs) + self.set_value(amount=amount, upper_bound=upper_bound, lower_bound=lower_bound, unit=unit, wikibase_url=wikibase_url) + def set_value(self, amount: Union[str, int, float] = None, upper_bound: Union[str, int, float] = None, lower_bound: Union[str, int, float] = None, unit: Union[str, int] = '1', + wikibase_url: str = None): wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) unit = str(unit or '1') @@ -79,4 +82,8 @@ def __init__(self, amount: Union[str, int, float] = None, upper_bound: Union[str del self.mainsnak.datavalue['value']['lowerBound'] def _get_sparql_value(self) -> str: - return format_amount(self.mainsnak.datavalue['value']['amount']) + return '"' + format_amount(self.mainsnak.datavalue['value']['amount']) + '"^^xsd:decimal' + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + self.set_value(amount=value, unit=unit) + return True diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index 7570325b..b7f83dcd 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -1,5 +1,5 @@ import re -from typing import Any, Union +from typing import Any from wikibaseintegrator.datatypes.basedatatype import BaseDataType @@ -16,7 +16,7 @@ class Sense(BaseDataType): }} ''' - def __init__(self, value: Union[str, int] = None, **kwargs: Any): + def __init__(self, value: str = None, **kwargs: Any): """ Constructor, calls the superclass BaseDataType @@ -24,7 +24,9 @@ def __init__(self, value: Union[str, int] = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py index ee5a760d..a6afd761 100644 --- a/wikibaseintegrator/datatypes/string.py +++ b/wikibaseintegrator/datatypes/string.py @@ -18,7 +18,9 @@ def __init__(self, value: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py index efe5eb1d..118e75ca 100644 --- a/wikibaseintegrator/datatypes/tabulardata.py +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -18,7 +18,9 @@ def __init__(self, value: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 52c4ed3c..97a12519 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -36,7 +36,9 @@ def __init__(self, time: str = None, before: int = 0, after: int = 0, precision: """ super().__init__(**kwargs) + self.set_value(time=time, before=before, after=after, precision=precision, timezone=timezone, calendarmodel=calendarmodel, wikibase_url=wikibase_url) + def set_value(self, time: str = None, before: int = 0, after: int = 0, precision: int = 11, timezone: int = 0, calendarmodel: str = None, wikibase_url: str = None): calendarmodel = calendarmodel or str(config['CALENDAR_MODEL_QID']) wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index f6ceeb95..c56705cb 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -24,7 +24,9 @@ def __init__(self, value: str = None, **kwargs: Any): """ super().__init__(**kwargs) + self.set_value(value=value) + def set_value(self, value: str = None): assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" if value: @@ -38,3 +40,16 @@ def __init__(self, value: str = None, **kwargs: Any): 'value': value, 'type': 'string' } + + def _get_sparql_value(self) -> str: + return '<' + self.mainsnak.datavalue['value'] + '>' + + def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + + return True diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 31350901..b0c65739 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -174,9 +174,9 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = self.lastrevid = json_data['entity']['lastrevid'] return json_data['entity'] - def init_fastrun(self, base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> None: + def init_fastrun(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> None: if base_filter is None: - base_filter = {} + base_filter = [] if self.debug: print('Initialize Fast Run init_fastrun') @@ -196,24 +196,24 @@ def init_fastrun(self, base_filter: Dict[str, str] = None, use_refs: bool = Fals self.fast_run_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive) BaseEntity.fast_run_store.append(self.fast_run_container) - def fr_search(self, **kwargs: Any) -> str: - self.init_fastrun(**kwargs) - - if self.fast_run_container is None: - raise ValueError("FastRunContainer is not initialized.") - - self.fast_run_container.load_item(self.claims) - - return self.fast_run_container.current_qid - - def write_required(self, base_filter: Dict[str, str] = None, **kwargs: Any) -> bool: + # def fr_search(self, **kwargs: Any) -> str: + # self.init_fastrun(**kwargs) + # + # if self.fast_run_container is None: + # raise ValueError("FastRunContainer is not initialized.") + # + # self.fast_run_container.load_item(self.claims) + # + # return self.fast_run_container.current_qid + + def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, **kwargs: Any) -> bool: self.init_fastrun(base_filter=base_filter, **kwargs) if self.fast_run_container is None: raise ValueError("FastRunContainer is not initialized.") if base_filter is None: - base_filter = {} + base_filter = [] claims_to_check = [] for claim in self.claims: diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 2378fdd3..66c78365 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -266,6 +266,7 @@ def has_equal_qualifiers(self, other: Claim) -> bool: return equal_qualifiers + # TODO: rewrite this? def __contains__(self, item): if isinstance(item, Claim): return self == item @@ -273,12 +274,16 @@ def __contains__(self, item): if isinstance(item, str): return self.mainsnak.datavalue == item - raise TypeError + return super().__contains__(item) def __eq__(self, other): if isinstance(other, Claim): return self.mainsnak.datavalue == other.mainsnak.datavalue and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) - raise TypeError + + if isinstance(other, str): + return self.mainsnak.property_number == other + + raise super().__eq__(other) def equals(self, that: Claim, include_ref: bool = False, fref: Callable = None) -> bool: """ diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index e8b35fd0..f99e19d6 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -3,11 +3,9 @@ import collections import copy from collections import defaultdict -from functools import lru_cache, wraps +from functools import lru_cache from itertools import chain -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type, Union - -from frozendict import frozendict +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.wbi_config import config @@ -22,13 +20,13 @@ class FastRunContainer: def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None, - base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): + base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): self.reconstructed_statements: List[BaseDataType] = [] self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set) self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set) self.prop_data: Dict[str, dict] = {} self.loaded_langs: Dict[str, dict] = {} - self.base_filter = {} + self.base_filter: List[BaseDataType | List[BaseDataType]] = [] self.base_filter_string = '' self.prop_dt_map: Dict[str, str] = {} self.current_qid = '' @@ -44,23 +42,29 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = if base_filter and any(base_filter): self.base_filter = base_filter - for k, v in self.base_filter.items(): - ks = [] - if k.count('/') == 1: - ks = k.split('/') - if v: - if ks: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ' \ - '<{wb_url}/entity/{entity}> .\n'.format(wb_url=self.wikibase_url, prop_nr1=ks[0], prop_nr2=ks[1], entity=v) + for k in self.base_filter: + # TODO: Reimplement "subclasses of" support + # ks = False + if isinstance(k, BaseDataType): + if k.mainsnak.datavalue: + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number, entity=k._get_sparql_value().format(wb_url=self.wikibase_url)) else: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> <{wb_url}/entity/{entity}> .\n'.format(wb_url=self.wikibase_url, - prop_nr=k, entity=v) - else: - if ks: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ' \ - '?zz{prop_nr1}{prop_nr2} .\n'.format(wb_url=self.wikibase_url, prop_nr1=ks[0], prop_nr2=ks[1]) + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number) + elif isinstance(k, list) and len(k) == 2 and isinstance(k[0], BaseDataType) and isinstance(k[1], BaseDataType): + if k[0].mainsnak.datavalue: + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}>/<{wb_url}/prop/direct/{prop_nr2}>* {entity} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number, + entity=k[0]._get_sparql_value().format(wb_url=self.wikibase_url)) else: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format(wb_url=self.wikibase_url, prop_nr=k) + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format( + wb_url=self.wikibase_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number) + + else: + raise ValueError + + self.__initialized = True def reconstruct_statements(self, qid: str) -> List[BaseDataType]: reconstructed_statements: List[BaseDataType] = [] @@ -84,9 +88,9 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[q[0]]][0] # TODO: Add support for more data type (Time, MonolingualText, GlobeCoordinate) if self.prop_dt_map[q[0]] == 'quantity': - qualifiers.append(f(value=q[1], prop_nr=q[0], is_qualifier=True, unit=q[2])) + qualifiers.append(f(value=q[1], prop_nr=q[0], unit=q[2])) else: - qualifiers.append(f(value=q[1], prop_nr=q[0], is_qualifier=True)) + qualifiers.append(f(value=q[1], prop_nr=q[0])) references = [] for ref_id, refs in d['ref'].items(): @@ -99,9 +103,12 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[prop_nr]][0] # TODO: Add support for more data type if self.prop_dt_map[prop_nr] == 'quantity': - reconstructed_statements.append(f(value=d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) + datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit']) + datatype._parse_sparql_value(value=d['v'], unit=d['unit']) else: - reconstructed_statements.append(f(value=d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) + datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references) + datatype._parse_sparql_value(value=d['v']) + reconstructed_statements.append(datatype) # this isn't used. done for debugging purposes self.reconstructed_statements = reconstructed_statements @@ -123,15 +130,18 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: if prop_nr not in self.prop_dt_map: if self.debug: print(f"{prop_nr} not found in fastrun") - self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) - self._query_data(prop_nr=prop_nr, use_units=claim.mainsnak.datatype == 'quantity') + + if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: + self.prop_dt_map.update({prop_nr: claim.DTYPE}) + else: + self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) + self._query_data(prop_nr=prop_nr, use_units=self.prop_dt_map[prop_nr] == 'quantity') # noinspection PyProtectedMember current_value = claim._get_sparql_value() if self.prop_dt_map[prop_nr] == 'wikibase-item': - if not str(current_value).startswith('Q'): - current_value = f'Q{current_value}' + current_value = claim.mainsnak.datavalue['value']['id'] if self.debug: print(current_value) @@ -148,7 +158,7 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: match_sets.append(set(self.rev_lookup_ci[current_value.casefold()])) else: if self.debug: - print("no matches for rev lookup") + print(f"no matches for rev lookup for {current_value}") # return True if not match_sets: @@ -180,9 +190,11 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi for x in data: if x.mainsnak.datavalue and x.mainsnak.datatype: data_props.add(x.mainsnak.property_number) - write_required = False self.load_item(data, cqid) + if not self.current_qid: + return True + reconstructed_statements = self.reconstruct_statements(self.current_qid) tmp_rs = copy.deepcopy(reconstructed_statements) @@ -229,8 +241,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # tmp_rs are the reconstructed statements == current state of the item bool_vec = [] for x in tmp_rs: - if (x.mainsnak.datavalue == date.mainsnak.datavalue or ( - self.case_insensitive and x.mainsnak.datavalue.casefold() == date.mainsnak.datavalue.casefold())) and x.mainsnak.property_number not in del_props: + if (x == date or (self.case_insensitive and x.mainsnak.datavalue.casefold() == date.mainsnak.datavalue.casefold())) and x.mainsnak.property_number not in del_props: bool_vec.append(x.equals(date, include_ref=self.use_refs)) else: bool_vec.append(False) @@ -241,18 +252,18 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi print(f"bool_vec: {bool_vec}") print("-----------------------------------") for x in tmp_rs: - if date == x and x.mainsnak.property_number not in del_props: - print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) - print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.mainsnak.datavalue for z in date.qualifiers]) + if x == date and x.mainsnak.property_number not in del_props: + print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) + print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) elif x.mainsnak.property_number == date.mainsnak.property_number: - print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) - print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.mainsnak.datavalue for z in date.qualifiers]) + print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) + print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) if not any(bool_vec): if self.debug: print(len(bool_vec)) print("fast run failed at", date.mainsnak.property_number) - write_required = True + return True else: if self.debug: print("fast run success") @@ -264,8 +275,9 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi for x in tmp_rs: print("xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) print("failed because not zero--END") - write_required = True - return write_required + return True + + return False def init_language_data(self, lang: str, lang_data_type: str) -> None: """ @@ -376,8 +388,17 @@ def format_query_results(self, r: List, prop_nr: str) -> None: i['v'] = i['v']['value'].split('/')[-1] elif i['v']['type'] == 'literal' and prop_dt == 'quantity': i['v'] = format_amount(i['v']['value']) + elif i['v']['type'] == 'literal' and prop_dt == 'monolingualtext': + f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr, text=i['v']['value'], language=i['v']['xml:lang']) + # noinspection PyProtectedMember + i['v'] = f._get_sparql_value() else: - i['v'] = i['v']['value'] + f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr) + # noinspection PyProtectedMember + if not f._parse_sparql_value(value=i['v']['value'], type=i['v']['type']): + raise ValueError + # noinspection PyProtectedMember + i['v'] = f._get_sparql_value() # Note: no-value and some-value don't actually show up in the results here # see for example: select * where { wd:Q7207 p:P40 ?c . ?c ?d ?e } @@ -624,24 +645,35 @@ def __repr__(self) -> str: ) -def freezeargs(func): - """Transform mutable dictionnary - Into immutable - Useful to be compatible with cache - """ - - @wraps(func) - def wrapped(*args: Any, **kwargs: Any) -> Any: - args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args) - kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} - return func(*args, **kwargs) - - return wrapped - - -def get_fastrun_container(base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: +# def fr_search(**kwargs: Any) -> str: +# FastRunContainer.init_fastrun(**kwargs) +# +# if self.fast_run_container is None: +# raise ValueError("FastRunContainer is not initialized.") +# +# self.fast_run_container.load_item(self.claims) +# +# return self.fast_run_container.current_qid + + +# def freezeargs(func): +# """Transform mutable dictionnary +# Into immutable +# Useful to be compatible with cache +# """ +# +# @wraps(func) +# def wrapped(*args: Any, **kwargs: Any) -> Any: +# args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args) +# kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} +# return func(*args, **kwargs) +# +# return wrapped + + +def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: if base_filter is None: - base_filter = {} + base_filter = [] fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) fastrun_container.current_qid = '' @@ -650,9 +682,9 @@ def get_fastrun_container(base_filter: Dict[str, str] = None, use_refs: bool = F return fastrun_container -@freezeargs -@lru_cache() -def search_fastrun_store(base_filter: Dict[str, str] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: +# @freezeargs +# @lru_cache() +def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: for c in fastrun_store: if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): From 0f706a14b91b35dcfde84f034d22aa69f9bf21cb Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Wed, 17 Nov 2021 21:11:45 +0100 Subject: [PATCH 142/308] Rework Fastrun (#251) * Replace load_item() with get_item() * Move some fastrun function from baseentity to wbi_fastrun * Update tests --- test/test_all.py | 22 ++++---- wikibaseintegrator/entities/baseentity.py | 47 ++-------------- wikibaseintegrator/wbi_fastrun.py | 66 ++++++----------------- 3 files changed, 30 insertions(+), 105 deletions(-) diff --git a/test/test_all.py b/test/test_all.py index db179555..37f09309 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -6,6 +6,7 @@ from wikibaseintegrator.entities import Item from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists +from wikibaseintegrator.wbi_fastrun import get_fastrun_container config['DEBUG'] = True @@ -53,7 +54,7 @@ class TestFastRun(unittest.TestCase): some basic tests for fastrun mode """ - def test_fast_run(self): + def test_fastrun(self): statements = [ datatypes.ExternalID(value='P40095', prop_nr='P352'), datatypes.ExternalID(value='YER158C', prop_nr='P705') @@ -61,35 +62,30 @@ def test_fast_run(self): frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType) - fast_run_result = frc.write_required(data=statements) + fastrun_result = frc.write_required(data=statements) - if fast_run_result: + if fastrun_result: message = 'fastrun failed' else: message = 'successful fastrun' - print(fast_run_result, message) + print(fastrun_result, message) # here, fastrun should succeed, if not, test failed - if fast_run_result: + if fastrun_result: raise ValueError def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language - fast_run_base_filter = [datatypes.Item(prop_nr='P361', value='Q18589965')] + frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')]) item = WikibaseIntegrator().item.get('Q2') - item.init_fastrun(base_filter=fast_run_base_filter) - item.init_fastrun(base_filter=fast_run_base_filter) # Test if we found the same FastRunContainer - item.claims.add(datatypes.ExternalID(value='/m/02j71', prop_nr='P646')) - - frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=datatypes.BaseDataType) assert item.labels.get(language='en') == "Earth" descr = item.descriptions.get(language='en') assert len(descr) > 3 assert "Terra" in item.aliases.get() - assert list(item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth" - assert item.fast_run_container.check_language_data("Q2", ['not the Earth'], 'en', 'label') + assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en') + assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label') assert "Terra" in item.aliases.get() assert "planet" in item.descriptions.get() diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index b0c65739..646dc171 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -1,16 +1,16 @@ from __future__ import annotations from copy import copy -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, List, Union import simplejson +from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.models.claims import Claim, Claims from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError -from wikibaseintegrator.wbi_fastrun import FastRunContainer from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper from wikibaseintegrator.wbi_login import Login @@ -19,8 +19,6 @@ class BaseEntity: - fast_run_store: List[FastRunContainer] = [] - ETYPE = 'base-entity' def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type: str = None, id: str = None, claims: Claims = None, is_bot: bool = None, login: Login = None): @@ -38,8 +36,6 @@ def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type self.id = id self.claims = claims or Claims() - self.fast_run_container: Optional[FastRunContainer] = None - self.debug = config['DEBUG'] def add_claims(self, claims: Union[Claim, list], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity: @@ -174,43 +170,8 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = self.lastrevid = json_data['entity']['lastrevid'] return json_data['entity'] - def init_fastrun(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> None: - if base_filter is None: - base_filter = [] - - if self.debug: - print('Initialize Fast Run init_fastrun') - # We search if we already have a FastRunContainer with the same parameters to re-use it - for fast_run in BaseEntity.fast_run_store: - if (fast_run.base_filter == base_filter) and (fast_run.use_refs == use_refs) and (fast_run.case_insensitive == case_insensitive) and ( - fast_run.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): - self.fast_run_container = fast_run - self.fast_run_container.current_qid = '' - self.fast_run_container.base_data_type = BaseDataType - if self.debug: - print("Found an already existing FastRunContainer") - - if not self.fast_run_container: - if self.debug: - print("Create a new FastRunContainer") - self.fast_run_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive) - BaseEntity.fast_run_store.append(self.fast_run_container) - - # def fr_search(self, **kwargs: Any) -> str: - # self.init_fastrun(**kwargs) - # - # if self.fast_run_container is None: - # raise ValueError("FastRunContainer is not initialized.") - # - # self.fast_run_container.load_item(self.claims) - # - # return self.fast_run_container.current_qid - def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, **kwargs: Any) -> bool: - self.init_fastrun(base_filter=base_filter, **kwargs) - - if self.fast_run_container is None: - raise ValueError("FastRunContainer is not initialized.") + fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs) if base_filter is None: base_filter = [] @@ -220,7 +181,7 @@ def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = if claim.mainsnak.property_number in base_filter: claims_to_check.append(claim) - return self.fast_run_container.write_required(data=claims_to_check, cqid=self.id) + return fastrun_container.write_required(data=claims_to_check, cqid=self.id) def __repr__(self): """A mixin implementing a simple __repr__.""" diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index f99e19d6..ac9155db 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -29,7 +29,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = self.base_filter: List[BaseDataType | List[BaseDataType]] = [] self.base_filter_string = '' self.prop_dt_map: Dict[str, str] = {} - self.current_qid = '' self.base_data_type = base_data_type self.mediawiki_api_url = mediawiki_api_url or config['MEDIAWIKI_API_URL'] @@ -114,11 +113,7 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: self.reconstructed_statements = reconstructed_statements return reconstructed_statements - def get_item(self, claims: List, cqid: str = None) -> str: - self.load_item(claims=claims, cqid=cqid) - return self.current_qid - - def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: + def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[str]: match_sets = [] for claim in claims: # skip to next if statement has no value or no data type defined, e.g. for deletion objects @@ -162,7 +157,7 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: # return True if not match_sets: - return True + return None if cqid: matching_qids = {cqid} @@ -174,11 +169,9 @@ def load_item(self, claims: Union[list, Claims], cqid: str = None) -> bool: if not len(matching_qids) == 1: if self.debug: print(f"no matches ({len(matching_qids)})") - return True + return None - qid = matching_qids.pop() - self.current_qid = qid - return False + return matching_qids.pop() def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExists = ActionIfExists.REPLACE, cqid: str = None) -> bool: del_props = set() @@ -190,12 +183,12 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi for x in data: if x.mainsnak.datavalue and x.mainsnak.datatype: data_props.add(x.mainsnak.property_number) - self.load_item(data, cqid) + qid = self.get_item(data, cqid) - if not self.current_qid: + if not qid: return True - reconstructed_statements = self.reconstruct_statements(self.current_qid) + reconstructed_statements = self.reconstruct_statements(qid) tmp_rs = copy.deepcopy(reconstructed_statements) # handle append properties @@ -645,54 +638,29 @@ def __repr__(self) -> str: ) -# def fr_search(**kwargs: Any) -> str: -# FastRunContainer.init_fastrun(**kwargs) -# -# if self.fast_run_container is None: -# raise ValueError("FastRunContainer is not initialized.") -# -# self.fast_run_container.load_item(self.claims) -# -# return self.fast_run_container.current_qid - - -# def freezeargs(func): -# """Transform mutable dictionnary -# Into immutable -# Useful to be compatible with cache -# """ -# -# @wraps(func) -# def wrapped(*args: Any, **kwargs: Any) -> Any: -# args = tuple(frozendict(arg) if isinstance(arg, dict) else arg for arg in args) -# kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()} -# return func(*args, **kwargs) -# -# return wrapped - - def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: if base_filter is None: base_filter = [] + if config['DEBUG']: + print('Initialize Fast Run get_fastrun_container') + + # We search if we already have a FastRunContainer with the same parameters to re-use it fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) - fastrun_container.current_qid = '' - fastrun_container.base_data_type = BaseDataType return fastrun_container -# @freezeargs -# @lru_cache() def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: - for c in fastrun_store: - if (c.base_filter == base_filter) and (c.use_refs == use_refs) and (c.case_insensitive == case_insensitive) and ( - c.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): - return c + for fastrun in fastrun_store: + if (fastrun.base_filter == base_filter) and (fastrun.use_refs == use_refs) and (fastrun.case_insensitive == case_insensitive) and ( + fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): + return fastrun # In case nothing was found in the fastrun_store if config['DEBUG']: print("Create a new FastRunContainer") - fastrun_container = FastRunContainer(base_filter=base_filter, use_refs=use_refs, base_data_type=BaseDataType, case_insensitive=case_insensitive) + + fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) fastrun_store.append(fastrun_container) return fastrun_container From 65e7147eab41a11deb921dbf6030a96ef211eca8 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 19:24:10 +0100 Subject: [PATCH 143/308] Prepare v0.12.0.dev7 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index d39a2c2f..cad321c1 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = wikibaseintegrator -version = 0.12.0.dev6 +version = 0.12.0.dev7 author = Myst, Wikibase Integrator authors and Wikidata Integrator authors license = MIT license_files = LICENSE From a081e1ffdf0a84ba99b907751cd0e2ca70e8c174 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 19:33:12 +0100 Subject: [PATCH 144/308] Update wbi_helpers.py (#255) Add a default session instead of recreating a new one everytime. --- wikibaseintegrator/wbi_helpers.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 3b5b6b5e..3956e1e0 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -29,6 +29,10 @@ class BColors: UNDERLINE = '\033[4m' +# Session used for anonymous requests +default_session = requests.Session() + + def mediawiki_api_call(method: str, mediawiki_api_url: str = None, session: Session = None, max_retries: int = 100, retry_after: int = 60, **kwargs: Any) -> Dict: """ :param method: 'GET' or 'POST' @@ -53,7 +57,7 @@ def mediawiki_api_call(method: str, mediawiki_api_url: str = None, session: Sess raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") response = None - session = session if session else requests.Session() + session = session if session else default_session for n in range(max_retries): try: response = session.request(method=method, url=mediawiki_api_url, **kwargs) @@ -138,7 +142,7 @@ def mediawiki_api_call_helper(data: Dict[str, Any] = None, login: Login = None, } if data is not None: - if login is not None and 'token' not in data: + if not allow_anonymous and login is not None and 'token' not in data: data.update({'token': login.get_edit_token()}) elif 'token' not in data: data.update({'token': '+\\'}) From bd620eb88f5b923ad11ff5d6f7aee3e22407e15a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 19:34:22 +0100 Subject: [PATCH 145/308] Implement logging instead of config['DEBUG'] (#259) --- .coveragerc | 1 - pyproject.toml | 2 + test/test_all.py | 3 - test/test_wbi_fastrun.py | 5 - test/test_wbi_helpers.py | 19 ++-- wikibaseintegrator/entities/baseentity.py | 9 +- wikibaseintegrator/wbi_config.py | 3 +- wikibaseintegrator/wbi_fastrun.py | 110 ++++++++-------------- wikibaseintegrator/wbi_helpers.py | 19 ++-- wikibaseintegrator/wbi_login.py | 17 ++-- 10 files changed, 73 insertions(+), 115 deletions(-) diff --git a/.coveragerc b/.coveragerc index 11f0f2ae..f55fa189 100644 --- a/.coveragerc +++ b/.coveragerc @@ -10,7 +10,6 @@ exclude_lines = # Don't complain about missing debug-only code: def __repr__ - if self\.debug # Don't complain if tests don't hit defensive assertion code: raise AssertionError diff --git a/pyproject.toml b/pyproject.toml index 8606b543..32cf7568 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,3 +28,5 @@ disable = [ "import-outside-toplevel" # To remove later ] +[tool.pytest.ini_options] +log_cli = true diff --git a/test/test_all.py b/test/test_all.py index 918abd8c..90930a36 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -4,12 +4,9 @@ from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.entities import Item -from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_fastrun import get_fastrun_container -config['DEBUG'] = True - wbi = WikibaseIntegrator() diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index e37cd829..3379194f 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -3,11 +3,8 @@ from wikibaseintegrator import WikibaseIntegrator, wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType, ExternalID, Item -from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists -config['DEBUG'] = True - wbi = WikibaseIntegrator() @@ -128,7 +125,6 @@ def __init__(self, *args: Any, **kwargs: Any): def test_fastrun_ref_ensembl(): # fastrun checks refs frc = FastRunContainerFakeQueryDataEnsembl(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True) - frc.debug = True # statement has no ref statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')] @@ -159,7 +155,6 @@ class FakeQueryDataAppendProps(wbi_fastrun.FastRunContainer): # an item with three values for the same property def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) - self.debug = True self.prop_dt_map = {'P527': 'wikibase-item', 'P248': 'wikibase-item', 'P594': 'external-id'} self.rev_lookup = defaultdict(set) diff --git a/test/test_wbi_helpers.py b/test/test_wbi_helpers.py index f29c15b8..6587e1aa 100644 --- a/test/test_wbi_helpers.py +++ b/test/test_wbi_helpers.py @@ -1,3 +1,4 @@ +import logging import unittest import requests @@ -30,16 +31,16 @@ def test_connection(): mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/400", max_retries=2, retry_after=1, allow_anonymous=True) -def test_user_agent(capfd): - # Test there is a warning - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) - out, err = capfd.readouterr() - assert out - +def test_user_agent(caplog): # Test there is no warning because of the user agent - mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, user_agent='MyWikibaseBot/0.5') - out, err = capfd.readouterr() - assert not out + with caplog.at_level(logging.WARNING): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, user_agent='MyWikibaseBot/0.5') + assert 'WARNING' not in caplog.text + + # Test there is a warning + with caplog.at_level(logging.WARNING): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + assert 'Please set an user agent' in caplog.text # Test if the user agent is correctly added new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index 646dc171..ed80733e 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -1,5 +1,6 @@ from __future__ import annotations +import logging from copy import copy from typing import TYPE_CHECKING, Any, Dict, List, Union @@ -8,7 +9,6 @@ from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType from wikibaseintegrator.models.claims import Claim, Claims -from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_enums import ActionIfExists from wikibaseintegrator.wbi_exceptions import MWApiError, NonUniqueLabelDescriptionPairError from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper @@ -17,6 +17,8 @@ if TYPE_CHECKING: from wikibaseintegrator import WikibaseIntegrator +log = logging.getLogger(__name__) + class BaseEntity: ETYPE = 'base-entity' @@ -36,8 +38,6 @@ def __init__(self, api: 'WikibaseIntegrator' = None, lastrevid: int = None, type self.id = id self.claims = claims or Claims() - self.debug = config['DEBUG'] - def add_claims(self, claims: Union[Claim, list], action_if_exists: ActionIfExists = ActionIfExists.APPEND) -> BaseEntity: if isinstance(claims, Claim): claims = [claims] @@ -145,8 +145,7 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = if self.lastrevid: payload.update({'baserevid': self.lastrevid}) - if self.debug: - print(payload) + log.debug(payload) try: json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot, **kwargs) diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index 92d4fac1..cbfd458b 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -28,6 +28,5 @@ 'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql', 'WIKIBASE_URL': 'http://www.wikidata.org', 'DEFAULT_LANGUAGE': 'en', - 'DEFAULT_LEXEME_LANGUAGE': 'Q1860', - 'DEBUG': False + 'DEFAULT_LEXEME_LANGUAGE': 'Q1860' } diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index ac9155db..efd7c21a 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -2,6 +2,7 @@ import collections import copy +import logging from collections import defaultdict from functools import lru_cache from itertools import chain @@ -15,12 +16,14 @@ if TYPE_CHECKING: from wikibaseintegrator.models import Claims +log = logging.getLogger(__name__) + fastrun_store: List[FastRunContainer] = [] class FastRunContainer: def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None, - base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False, debug: bool = None): + base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False): self.reconstructed_statements: List[BaseDataType] = [] self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set) self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set) @@ -37,8 +40,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = self.use_refs = use_refs self.case_insensitive = case_insensitive - self.debug = debug or config['DEBUG'] - if base_filter and any(base_filter): self.base_filter = base_filter for k in self.base_filter: @@ -123,8 +124,7 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st prop_nr = claim.mainsnak.property_number if prop_nr not in self.prop_dt_map: - if self.debug: - print(f"{prop_nr} not found in fastrun") + log.debug(f"{prop_nr} not found in fastrun") if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: self.prop_dt_map.update({prop_nr: claim.DTYPE}) @@ -138,13 +138,12 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st if self.prop_dt_map[prop_nr] == 'wikibase-item': current_value = claim.mainsnak.datavalue['value']['id'] - if self.debug: - print(current_value) - if self.case_insensitive: - print("case insensitive enabled") - print(self.rev_lookup_ci) - else: - print(self.rev_lookup) + log.debug(current_value) + if self.case_insensitive: + log.debug("case insensitive enabled") + log.debug(self.rev_lookup_ci) + else: + log.debug(self.rev_lookup) if current_value in self.rev_lookup: # quick check for if the value has ever been seen before, if not, write required @@ -152,8 +151,7 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st elif self.case_insensitive and current_value.casefold() in self.rev_lookup_ci: match_sets.append(set(self.rev_lookup_ci[current_value.casefold()])) else: - if self.debug: - print(f"no matches for rev lookup for {current_value}") + log.debug(f"no matches for rev lookup for {current_value}") # return True if not match_sets: @@ -167,8 +165,7 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st # check if there are any items that have all of these values # if not, a write is required no matter what if not len(matching_qids) == 1: - if self.debug: - print(f"no matches ({len(matching_qids)})") + log.debug(f"no matches ({len(matching_qids)})") return None return matching_qids.pop() @@ -204,8 +201,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] if len(comp) != len(app_data): - if self.debug: - print(f"failed append: {p}") + log.debug(f"failed append: {p}") return True tmp_rs = [x for x in tmp_rs if x.mainsnak.property_number not in append_props and x.mainsnak.property_number in data_props] @@ -214,8 +210,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # ensure that statements meant for deletion get handled properly reconst_props = {x.mainsnak.property_number for x in tmp_rs} if not date.mainsnak.datatype and date.mainsnak.property_number in reconst_props: - if self.debug: - print("returned from delete prop handling") + log.debug("returned from delete prop handling") return True if not date.mainsnak.datavalue or not date.mainsnak.datatype: @@ -241,33 +236,29 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and # x.mainsnak.property_number not in del_props for x in tmp_rs] - if self.debug: - print(f"bool_vec: {bool_vec}") - print("-----------------------------------") - for x in tmp_rs: - if x == date and x.mainsnak.property_number not in del_props: - print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) - print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) - elif x.mainsnak.property_number == date.mainsnak.property_number: - print(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) - print(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) + log.debug(f"bool_vec: {bool_vec}") + log.debug("-----------------------------------") + for x in tmp_rs: + if x == date and x.mainsnak.property_number not in del_props: + log.debug(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) + log.debug(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) + elif x.mainsnak.property_number == date.mainsnak.property_number: + log.debug(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) + log.debug(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) if not any(bool_vec): - if self.debug: - print(len(bool_vec)) - print("fast run failed at", date.mainsnak.property_number) + log.debug(len(bool_vec)) + log.debug("fast run failed at", date.mainsnak.property_number) return True else: - if self.debug: - print("fast run success") + log.debug("fast run success") tmp_rs.pop(bool_vec.index(True)) if len(tmp_rs) > 0: - if self.debug: - print("failed because not zero") - for x in tmp_rs: - print("xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) - print("failed because not zero--END") + log.debug("failed because not zero") + for x in tmp_rs: + log.debug("xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) + log.debug("failed because not zero--END") return True return False @@ -326,8 +317,7 @@ def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_ty for s in lang_data: if s.strip().casefold() not in all_lang_strings: - if self.debug: - print(f"fastrun failed at: {lang_data_type}, string: {s}") + log.debug(f"fastrun failed at: {lang_data_type}, string: {s}") return True return False @@ -455,26 +445,9 @@ def update_frc_from_query(self, r: List, prop_nr: str) -> None: if 'unit' in i: self.prop_data[qid][prop_nr][i['sid']]['unit'] = i['unit'] - def _query_data(self, prop_nr: str, use_units: bool = False) -> None: - page_size = 10000 + def _query_data(self, prop_nr: str, use_units: bool = False, page_size: int = 10000) -> None: page_count = 0 - num_pages = None - if self.debug: - # get the number of pages/queries so we can show a progress bar - query = f""" - SELECT (COUNT(?item) as ?c) where {{ - {self.base_filter_string} - ?item <{self.wikibase_url}/prop/{prop_nr}> ?sid . - }}""" - - if self.debug: - print(query) - - r = execute_sparql_query(query, endpoint=self.sparql_endpoint_url, debug=self.debug)['results']['bindings'] - count = int(r[0]['c']['value']) - print(f"Count: {count}") - num_pages = (int(count) // page_size) + 1 - print(f"Query {prop_nr}: {page_count}/{num_pages}") + while True: # Query header query = ''' @@ -563,15 +536,13 @@ def _query_data(self, prop_nr: str, use_units: bool = False) -> None: # Format the query query = query.format(wb_url=self.wikibase_url, base_filter=self.base_filter_string, prop_nr=prop_nr, offset=str(page_count * page_size), page_size=str(page_size)) - if self.debug: - print(query) + log.debug(query) results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] self.format_query_results(results, prop_nr) self.update_frc_from_query(results, prop_nr) page_count += 1 - if num_pages: - print(f"Query {prop_nr}: {page_count}/{num_pages}") + if len(results) == 0 or len(results) < page_size: break @@ -599,8 +570,7 @@ def _query_lang(self, lang: str, lang_data_type: str) -> Optional[List[Dict[str, }} ''' - if self.debug: - print(query) + log.debug(query) return execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] @@ -642,8 +612,7 @@ def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = if base_filter is None: base_filter = [] - if config['DEBUG']: - print('Initialize Fast Run get_fastrun_container') + log.debug('Initialize Fast Run get_fastrun_container') # We search if we already have a FastRunContainer with the same parameters to re-use it fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) @@ -658,8 +627,7 @@ def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = return fastrun # In case nothing was found in the fastrun_store - if config['DEBUG']: - print("Create a new FastRunContainer") + log.debug("Create a new FastRunContainer") fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) fastrun_store.append(fastrun_container) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 3956e1e0..2bb09d44 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -1,6 +1,7 @@ from __future__ import annotations import datetime +import logging from time import sleep from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union from urllib.parse import urlparse @@ -16,6 +17,8 @@ from wikibaseintegrator.entities.baseentity import BaseEntity from wikibaseintegrator.wbi_login import Login +log = logging.getLogger(__name__) + class BColors: HEADER = '\033[95m' @@ -126,8 +129,8 @@ def mediawiki_api_call_helper(data: Dict[str, Any] = None, login: Login = None, hostname = urlparse(mediawiki_api_url).hostname if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: - print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') - print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') + log.warning('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + log.warning('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') if not allow_anonymous: if login is None: @@ -171,8 +174,7 @@ def mediawiki_api_call_helper(data: Dict[str, Any] = None, login: Login = None, @wbi_backoff() -def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, user_agent: str = None, max_retries: int = 1000, retry_after: int = 60, - debug: bool = False) -> Optional[Dict[str, dict]]: +def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, user_agent: str = None, max_retries: int = 1000, retry_after: int = 60) -> Optional[Dict[str, dict]]: """ Static method which can be used to execute any SPARQL query :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes @@ -181,8 +183,6 @@ def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, u :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. :param max_retries: The number time this function should retry in case of header reports. :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. - :param debug: Enable debug output. - :type debug: boolean :return: The results of the query are returned in JSON format """ @@ -191,8 +191,8 @@ def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, u hostname = urlparse(sparql_endpoint_url).hostname if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: - print('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') - print('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') + log.warning('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + log.warning('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') if prefix: query = prefix + '\n' + query @@ -208,8 +208,7 @@ def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, u 'Content-Type': 'multipart/form-data' } - if debug or config['DEBUG']: - print(BColors.WARNING + params['query'] + BColors.ENDC) + log.debug(BColors.WARNING + params['query'] + BColors.ENDC) for n in range(max_retries): try: diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 34f00118..6b5f050a 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -1,7 +1,7 @@ """ Login class for Wikidata. Takes username and password and stores the session cookies and edit tokens. """ - +import logging import time import webbrowser from typing import Optional @@ -17,6 +17,8 @@ from wikibaseintegrator.wbi_config import config from wikibaseintegrator.wbi_helpers import get_user_agent +log = logging.getLogger(__name__) + class Login: """ @@ -25,8 +27,8 @@ class Login: @wbi_backoff() def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str = None, mediawiki_api_url: str = None, mediawiki_index_url: str = None, - mediawiki_rest_url: str = None, token_renew_period: int = 1800, consumer_token: str = None, consumer_secret: str = None, - access_token: str = None, access_secret: str = None, callback_url: str = 'oob', user_agent: str = None, debug: bool = False): + mediawiki_rest_url: str = None, token_renew_period: int = 1800, consumer_token: str = None, consumer_secret: str = None, access_token: str = None, + access_secret: str = None, callback_url: str = 'oob', user_agent: str = None): """ This class handles several types of login procedures. Either use user and pwd authentication or OAuth. Wikidata clientlogin can also be used. If using one method, do NOT pass parameters for another method. @@ -113,8 +115,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str login_result = self.session.post(self.mediawiki_api_url, data=params).json() - if debug: - print(login_result) + log.debug(login_result) if 'login' in login_result and login_result['login']['result'] == 'Success': print("Successfully logged in as", login_result['login']['lgusername']) @@ -132,16 +133,14 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str login_result = self.session.post(self.mediawiki_api_url, data=params).json() - if debug: - print(login_result) + log.debug(login_result) if 'clientlogin' in login_result: clientlogin = login_result['clientlogin'] if clientlogin['status'] != 'PASS': raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") - if debug: - print("Successfully logged in as", clientlogin['username']) + log.debug("Successfully logged in as", clientlogin['username']) else: raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") From 4eb0ec95876ad92013472aaf50d0af290a09c665 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 19:52:50 +0100 Subject: [PATCH 146/308] Amount can be zero (#257) --- wikibaseintegrator/datatypes/quantity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index a7264181..2870a919 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -42,7 +42,7 @@ def set_value(self, amount: Union[str, int, float] = None, upper_bound: Union[st if unit.startswith('Q'): unit = wikibase_url + '/entity/' + unit - if amount: + if amount is not None: amount = format_amount(amount) unit = str(unit) if upper_bound: From 396f9fa90edf503e994274f9d1128d89b7b2df66 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 19:57:15 +0100 Subject: [PATCH 147/308] Empty claim (#258) * Avoid empty claim list in Claims() * Don't return claim if removed and don't have an wikibase ID * Remove unnecessary part * Improve Claims().remove() Add remove flag for identified claims and simply delete others. Remove empty list * Remove Claims.clear() Far from perfect. Can be easily done outside. --- wikibaseintegrator/models/claims.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py index 66c78365..12366851 100644 --- a/wikibaseintegrator/models/claims.py +++ b/wikibaseintegrator/models/claims.py @@ -25,6 +25,16 @@ def claims(self, claims): def get(self, property: str = None) -> List: return self.claims[property] + def remove(self, property: str = None) -> None: + if property in self.claims: + for prop in self.claims[property]: + if prop.id: + prop.remove() + else: + self.claims[property].remove(prop) + if len(self.claims[property]) == 0: + del self.claims[property] + def add(self, claims: Union[list, Claim, None] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE) -> Claims: """ @@ -93,12 +103,12 @@ def get_json(self) -> Dict[str, list]: if property not in json_data: json_data[property] = [] for claim in claims: - json_data[property].append(claim.get_json()) + if not claim.removed or claim.id: + json_data[property].append(claim.get_json()) + if len(json_data[property]) == 0: + del json_data[property] return json_data - def clear(self) -> None: - self.claims = {} - def __len__(self): return len(self.claims) @@ -244,7 +254,8 @@ def get_json(self) -> Dict[str, Any]: if len(self.references) > 0: json_data['references'] = self.references.get_json() if self.removed: - json_data['remove'] = '' + if self.id: + json_data['remove'] = '' return json_data def has_equal_qualifiers(self, other: Claim) -> bool: From 31098e6839386f2a5d2e445ecd8ce3292fb46ee1 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 20:05:34 +0100 Subject: [PATCH 148/308] Fix wrongly formatted debug messages --- wikibaseintegrator/wbi_fastrun.py | 12 ++++++------ wikibaseintegrator/wbi_login.py | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index efd7c21a..fb1de037 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -240,15 +240,15 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi log.debug("-----------------------------------") for x in tmp_rs: if x == date and x.mainsnak.property_number not in del_props: - log.debug(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) - log.debug(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]]) + log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]]) elif x.mainsnak.property_number == date.mainsnak.property_number: - log.debug(x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]) - log.debug(date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]) + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]]) + log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]]) if not any(bool_vec): log.debug(len(bool_vec)) - log.debug("fast run failed at", date.mainsnak.property_number) + log.debug(f"fast run failed at {date.mainsnak.property_number}") return True else: log.debug("fast run success") @@ -257,7 +257,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi if len(tmp_rs) > 0: log.debug("failed because not zero") for x in tmp_rs: - log.debug("xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]) + log.debug(["xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]]) log.debug("failed because not zero--END") return True diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 6b5f050a..c17c5580 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -140,7 +140,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str if clientlogin['status'] != 'PASS': raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") - log.debug("Successfully logged in as", clientlogin['username']) + log.debug(f"Successfully logged in as {clientlogin['username']}") else: raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") From ded696a5c2c05e6689d710a7585ffe1bfd3ddce3 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 21:37:39 +0100 Subject: [PATCH 149/308] Remove duplicated debug --- wikibaseintegrator/wbi_fastrun.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index fb1de037..7231e330 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -536,8 +536,6 @@ def _query_data(self, prop_nr: str, use_units: bool = False, page_size: int = 10 # Format the query query = query.format(wb_url=self.wikibase_url, base_filter=self.base_filter_string, prop_nr=prop_nr, offset=str(page_count * page_size), page_size=str(page_size)) - log.debug(query) - results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] self.format_query_results(results, prop_nr) self.update_frc_from_query(results, prop_nr) From 97771d0563e87d8a754a41ac54aa919c32735783 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 21:52:05 +0100 Subject: [PATCH 150/308] Change default success login message to INFO --- wikibaseintegrator/wbi_login.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index c17c5580..50e4576f 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -118,7 +118,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str log.debug(login_result) if 'login' in login_result and login_result['login']['result'] == 'Success': - print("Successfully logged in as", login_result['login']['lgusername']) + log.info(f"Successfully logged in as {login_result['login']['lgusername']}") else: raise LoginError(f"Login failed. Reason: '{login_result['login']['reason']}'") else: @@ -140,7 +140,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str if clientlogin['status'] != 'PASS': raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") - log.debug(f"Successfully logged in as {clientlogin['username']}") + log.info(f"Successfully logged in as {clientlogin['username']}") else: raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") From 967e1f18c4e8db031cd77a92719556f74b4cad8a Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 28 Nov 2021 21:58:00 +0100 Subject: [PATCH 151/308] Update notebooks --- notebooks/item_create_new.ipynb | 20 ++++++-------------- notebooks/lexeme_write.ipynb | 12 ++++++------ 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/notebooks/item_create_new.ipynb b/notebooks/item_create_new.ipynb index e30c88e0..ff8c2a90 100644 --- a/notebooks/item_create_new.ipynb +++ b/notebooks/item_create_new.ipynb @@ -80,15 +80,7 @@ { "cell_type": "code", "execution_count": 4, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Successfully logged in as MystBot\n" - ] - } - ], + "outputs": [], "source": [ "login = wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS,\n", " mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", @@ -135,7 +127,7 @@ "outputs": [ { "data": { - "text/plain": "" + "text/plain": "" }, "execution_count": 6, "metadata": {}, @@ -168,7 +160,7 @@ "outputs": [ { "data": { - "text/plain": "], 'fr': []}>" + "text/plain": "], 'fr': []}>" }, "execution_count": 7, "metadata": {}, @@ -201,7 +193,7 @@ "outputs": [ { "data": { - "text/plain": "" + "text/plain": "" }, "execution_count": 8, "metadata": {}, @@ -234,7 +226,7 @@ "outputs": [ { "data": { - "text/plain": " _Snak__property_number='P31533' _Snak__hash=None _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references=>]}>" + "text/plain": " _Snak__property_number='P31533' _Snak__hash=None _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references=>]}>" }, "execution_count": 9, "metadata": {}, @@ -274,7 +266,7 @@ "outputs": [ { "data": { - "text/plain": "\n\t lastrevid=551951\n\t type='item'\n\t id='Q222825'\n\t claims= _Snak__property_number='P31533' _Snak__hash='112d32b098a091cc1398c779e76c763a523d4ffc' _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='Q222825$FE2928B1-3353-49D1-B6E2-C87ACCDCAB0D' _Claim__rank= _Claim__removed=False _Claim__references=>]}>\n\t fast_run_container=None\n\t debug=False\n\t labels=, 'fr': }>\n\t descriptions=, 'fr': }>\n\t aliases=], 'fr': []}>\n\t sitelinks=>" + "text/plain": "\n\t lastrevid=553075\n\t type='item'\n\t id='Q223584'\n\t claims= _Snak__property_number='P31533' _Snak__hash='112d32b098a091cc1398c779e76c763a523d4ffc' _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='Q223584$458446DC-F46F-4CC5-A491-2D6DB1C10400' _Claim__rank= _Claim__removed=False _Claim__references=>]}>\n\t labels=, 'fr': }>\n\t descriptions=, 'fr': }>\n\t aliases=], 'fr': []}>\n\t sitelinks=>" }, "execution_count": 10, "metadata": {}, diff --git a/notebooks/lexeme_write.ipynb b/notebooks/lexeme_write.ipynb index 509fd43a..12a27247 100644 --- a/notebooks/lexeme_write.ipynb +++ b/notebooks/lexeme_write.ipynb @@ -82,7 +82,7 @@ "execution_count": 4, "outputs": [], "source": [ - "login = wbi_login.Login(auth_method='clientlogin', user=WDUSER, password=WDPASS,\n", + "login = wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS,\n", " mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", "wbi = WikibaseIntegrator(login=login)" ], @@ -137,7 +137,7 @@ "outputs": [ { "data": { - "text/plain": "" + "text/plain": "" }, "execution_count": 6, "metadata": {}, @@ -217,7 +217,7 @@ "outputs": [ { "data": { - "text/plain": " _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}>" + "text/plain": " _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}>" }, "execution_count": 8, "metadata": {}, @@ -252,7 +252,7 @@ "outputs": [ { "data": { - "text/plain": ", 'fr': }> claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}> removed=False>]>" + "text/plain": ", 'fr': }> claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}> removed=False>]>" }, "execution_count": 9, "metadata": {}, @@ -308,7 +308,7 @@ "outputs": [ { "data": { - "text/plain": ", 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]> _Claim__removed=False _BaseDataType__value=None>]}>>}>" + "text/plain": ", 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}>>}>" }, "execution_count": 10, "metadata": {}, @@ -372,7 +372,7 @@ "outputs": [ { "data": { - "text/plain": "\n\t lastrevid=551031\n\t type='lexeme'\n\t id='L1660'\n\t claims= _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660$D3E30A15-AC21-4E07-B953-B13D6025A861' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}>\n\t json={'type': 'lexeme', 'id': 'L1660', 'lemmas': {'en': {'language': 'en', 'value': 'English lemma'}, 'fr': {'language': 'fr', 'value': 'French lemma'}}, 'lexicalCategory': 'Q1244', 'language': 'Q1860', 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': 'dc920cec98f0e830c30011cd496108be8d50afab', 'datavalue': {'value': 'Create a string claim for claim', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '351f871bfe166697d3270cc0df7df8d09603efb0', 'datavalue': {'value': 'Claim qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660$D3E30A15-AC21-4E07-B953-B13D6025A861', 'rank': 'normal', 'references': [{'hash': 'ef1ebde859c902590dfbe5a3bd7a2f7af01f4a4f', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb', 'datavalue': {'value': 'Claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '77df7e5db38ec15b7abac0755c4dc8e781ba9369', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'dcd0b956c352f2036bb7da153c4db941e74a803f', 'datavalue': {'value': 'Another claim string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}, 'forms': [{'id': 'L1660-F1', 'representations': {'en': {'language': 'en', 'value': 'English form representation'}, 'fr': {'language': 'fr', 'value': 'French form representation'}}, 'grammaticalFeatures': ['Q146786'], 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00', 'datavalue': {'value': 'Create a string claim for form', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '6c157568b379f4c2722f30a9fee95d3c5f99dfe9', 'datavalue': {'value': 'Form qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660-F1$42E574AD-9956-4427-A89D-68A7127E2410', 'rank': 'normal', 'references': [{'hash': '32f599c26d4251d72272b3a65294d6d5517d2445', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1cbb07e7eba6906acf68f427a3f87fefc0a53283', 'datavalue': {'value': 'Form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': 'a0f20d048c3df03c1bfb25c63869ba37b32c9e01', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '67bef049d400b9d7e2e2695320d85012c9122df5', 'datavalue': {'value': 'Another form string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'senses': [{'id': 'L1660-S1', 'glosses': {'en': {'language': 'en', 'value': 'English gloss'}, 'fr': {'language': 'fr', 'value': 'French gloss'}}, 'claims': {'P828': [{'mainsnak': {'snaktype': 'value', 'property': 'P828', 'hash': '9781442191b38e26c55b1dfde6f6203c9127c4f3', 'datavalue': {'value': 'Create a string claim for sense', 'type': 'string'}, 'datatype': 'string'}, 'type': 'statement', 'qualifiers': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': 'c1afe7627d9711627e1e48b8e015ade998d6d434', 'datavalue': {'value': 'Sense qualifier', 'type': 'string'}, 'datatype': 'string'}]}, 'qualifiers-order': ['P828'], 'id': 'L1660-S1$DAEABAC7-3780-4E51-AED7-3FAE7230975E', 'rank': 'normal', 'references': [{'hash': '87e05c6c4fc2d74529d7801340c18955516b6d96', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '617bd3516c2003df28ab90fd6ee0bd8237f1f8e6', 'datavalue': {'value': 'Sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}, {'hash': '989b65b201e4b2fbd9d5c2c8d7bd7b7e9d2ce5c7', 'snaks': {'P828': [{'snaktype': 'value', 'property': 'P828', 'hash': '1afe472d8815b3cbf50d2e5b1c497456a82f055f', 'datavalue': {'value': 'Another sense string reference', 'type': 'string'}, 'datatype': 'string'}]}, 'snaks-order': ['P828']}]}]}}], 'lastrevid': 551031}\n\t fast_run_container=None\n\t debug=False\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1244'\n\t language='Q1860'\n\t forms=, 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash='288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00' _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='6c157568b379f4c2722f30a9fee95d3c5f99dfe9' _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660-F1$42E574AD-9956-4427-A89D-68A7127E2410' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='1cbb07e7eba6906acf68f427a3f87fefc0a53283' _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='67bef049d400b9d7e2e2695320d85012c9122df5' _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}>>}>\n\t senses=, 'fr': }> claims= _Snak__property_number='P828' _Snak__hash='9781442191b38e26c55b1dfde6f6203c9127c4f3' _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='c1afe7627d9711627e1e48b8e015ade998d6d434' _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1660-S1$DAEABAC7-3780-4E51-AED7-3FAE7230975E' _Claim__rank= _Claim__references= _Snak__property_number='P828' _Snak__hash='617bd3516c2003df28ab90fd6ee0bd8237f1f8e6' _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='1afe472d8815b3cbf50d2e5b1c497456a82f055f' _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]> _Claim__removed=False _BaseDataType__value=None>]}> removed=False>]>>" + "text/plain": "\n\t lastrevid=553076\n\t type='lexeme'\n\t id='L1734'\n\t claims= _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734$692E2FA0-0970-4280-8A59-B9A59B3DC86E' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1244'\n\t language='Q1860'\n\t forms=, 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash='288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00' _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='6c157568b379f4c2722f30a9fee95d3c5f99dfe9' _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734-F1$36902198-7926-41E1-BAC9-5E8601F4A2A7' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='1cbb07e7eba6906acf68f427a3f87fefc0a53283' _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='67bef049d400b9d7e2e2695320d85012c9122df5' _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>>}>\n\t senses=, 'fr': }> claims= _Snak__property_number='P828' _Snak__hash='9781442191b38e26c55b1dfde6f6203c9127c4f3' _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='c1afe7627d9711627e1e48b8e015ade998d6d434' _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734-S1$37E31B12-1BB8-454A-8ADE-84AAED4A49EA' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='617bd3516c2003df28ab90fd6ee0bd8237f1f8e6' _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='1afe472d8815b3cbf50d2e5b1c497456a82f055f' _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}> removed=False>]>>" }, "execution_count": 11, "metadata": {}, From efa426798a32910923a31b6fcd534cef4612a0f4 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 2 Dec 2021 11:42:06 +0100 Subject: [PATCH 152/308] Update globecoordinate.py --- wikibaseintegrator/datatypes/globecoordinate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index b866e487..04971d9c 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -43,9 +43,6 @@ def set_value(self, latitude: float = None, longitude: float = None, altitude: f if globe.startswith('Q'): globe = wikibase_url + '/entity/' + globe - # TODO: Introduce validity checks for coordinates, etc. - # TODO: Add check if latitude/longitude/precision is None - if latitude is not None and longitude is not None: if latitude < -90 or latitude > 90: raise ValueError(f"latitude must be between -90 and 90, got '{latitude}'") @@ -62,6 +59,8 @@ def set_value(self, latitude: float = None, longitude: float = None, altitude: f }, 'type': 'globecoordinate' } + else: + raise ValueError(f"latitude or longitude can't be None") def __eq__(self, other): if isinstance(other, Claim) and other.mainsnak.datavalue['type'] == 'globecoordinate': From f985b528a08d006c677c53d944c6df5e0f89d852 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 2 Dec 2021 11:42:27 +0100 Subject: [PATCH 153/308] Cleanup fastrun --- wikibaseintegrator/wbi_fastrun.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 7231e330..b4bbf908 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -43,8 +43,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = if base_filter and any(base_filter): self.base_filter = base_filter for k in self.base_filter: - # TODO: Reimplement "subclasses of" support - # ks = False if isinstance(k, BaseDataType): if k.mainsnak.datavalue: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format( @@ -60,7 +58,6 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = else: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format( wb_url=self.wikibase_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number) - else: raise ValueError @@ -257,7 +254,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi if len(tmp_rs) > 0: log.debug("failed because not zero") for x in tmp_rs: - log.debug(["xxx", x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]]) + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]]) log.debug("failed because not zero--END") return True @@ -610,8 +607,6 @@ def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = if base_filter is None: base_filter = [] - log.debug('Initialize Fast Run get_fastrun_container') - # We search if we already have a FastRunContainer with the same parameters to re-use it fastrun_container = search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) @@ -625,7 +620,7 @@ def search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = return fastrun # In case nothing was found in the fastrun_store - log.debug("Create a new FastRunContainer") + log.info("Create a new FastRunContainer") fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) fastrun_store.append(fastrun_container) From 239ed644d49d65edad5777f5e7a890847b01f3cb Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 5 Dec 2021 12:09:38 +0100 Subject: [PATCH 154/308] Add TODO --- wikibaseintegrator/entities/baseentity.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index ed80733e..bedca230 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -180,6 +180,8 @@ def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = if claim.mainsnak.property_number in base_filter: claims_to_check.append(claim) + # TODO: Add check_language_data + return fastrun_container.write_required(data=claims_to_check, cqid=self.id) def __repr__(self): From a933710ebddd2d2965be5e6b008b2ff6e119eb71 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sun, 5 Dec 2021 16:11:56 +0100 Subject: [PATCH 155/308] Update globecoordinate.py --- wikibaseintegrator/datatypes/globecoordinate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 04971d9c..62ac6667 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -59,8 +59,6 @@ def set_value(self, latitude: float = None, longitude: float = None, altitude: f }, 'type': 'globecoordinate' } - else: - raise ValueError(f"latitude or longitude can't be None") def __eq__(self, other): if isinstance(other, Claim) and other.mainsnak.datavalue['type'] == 'globecoordinate': From b7b04b4f482e15550c9d86ce9598536f099f7fa5 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 14 Dec 2021 19:47:48 +0100 Subject: [PATCH 156/308] Update lexeme_write.ipynb --- notebooks/lexeme_write.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/notebooks/lexeme_write.ipynb b/notebooks/lexeme_write.ipynb index 12a27247..8d80cafd 100644 --- a/notebooks/lexeme_write.ipynb +++ b/notebooks/lexeme_write.ipynb @@ -380,7 +380,7 @@ } ], "source": [ - "lexeme.write()" + "lexeme.write()\n" ], "metadata": { "collapsed": false, From cac53e8e9e90161159cd98197835214de5198ab7 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Thu, 16 Dec 2021 14:16:23 +0100 Subject: [PATCH 157/308] Replace simplejson by ujson (#264) * Replace simplejson by json * Ignore missing dependency simplejson * Cleanup * Whitelist ujson in pylint * Use ujson instead of default json * Add ujson imports * use json default error * Remove ignore simplejson import * Fix variable reuse * Update test_wbi_backoff.py --- pyproject.toml | 5 ++++ requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 2 +- test/test_entity_item.py | 5 ++-- test/test_wbi_backoff.py | 6 ++--- wikibaseintegrator/entities/baseentity.py | 28 +++++++++++------------ wikibaseintegrator/wbi_backoff.py | 2 +- 8 files changed, 28 insertions(+), 24 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32cf7568..6088a68e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,11 @@ line_length = 179 ignore_missing_imports = true files = "wikibaseintegrator/**/*.py,test/*.py" +[tool.pylint.MASTER] +extension-pkg-allow-list = [ + "ujson" +] + [tool.pylint.messages_control] max-line-length = 179 disable = [ diff --git a/requirements.txt b/requirements.txt index c6a13f5c..2eb06bb4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ backoff~=1.11.1 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 -simplejson~=3.17.6 +ujson~=4.3.0 diff --git a/setup.cfg b/setup.cfg index cad321c1..6be3d80f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 - simplejson~=3.17.6 + ujson~=4.3.0 python_requires = >=3.7, <=3.11 [options.extras_require] diff --git a/setup.py b/setup.py index fbd8e466..0152d8b5 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ "mwoauth ~= 0.3.7", "oauthlib ~= 3.1.1", "requests ~= 2.26.0", - "simplejson ~= 3.17.6" + "ujson ~= 4.3.0" ], extras_require={ "dev": [ diff --git a/test/test_entity_item.py b/test/test_entity_item.py index 1cc38677..31c914ab 100644 --- a/test/test_entity_item.py +++ b/test/test_entity_item.py @@ -1,7 +1,6 @@ +import json import unittest -from simplejson import JSONDecodeError - from wikibaseintegrator import WikibaseIntegrator from wikibaseintegrator.datatypes import BaseDataType, Item @@ -34,7 +33,7 @@ def test_get_json(self): assert wbi.item.get('Q582').get_json()['labels']['fr']['value'] == 'Villeurbanne' def test_write(self): - with self.assertRaises(JSONDecodeError): + with self.assertRaises(json.JSONDecodeError): wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') def test_write_not_required(self): diff --git a/test/test_wbi_backoff.py b/test/test_wbi_backoff.py index 0d360737..5eeaac70 100644 --- a/test/test_wbi_backoff.py +++ b/test/test_wbi_backoff.py @@ -1,7 +1,7 @@ -import json import unittest import requests +import ujson from wikibaseintegrator import wbi_login from wikibaseintegrator.wbi_backoff import wbi_backoff @@ -21,7 +21,7 @@ def test_all(self): assert good_http_code() == 200 - with self.assertRaises(json.JSONDecodeError): + with self.assertRaises(ValueError): bad_json() @@ -43,7 +43,7 @@ def good_http_code(): @wbi_backoff() def bad_json(): - json.loads("I failed :(") + ujson.loads("I failed :(") @wbi_backoff() diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py index bedca230..06c7f11b 100644 --- a/wikibaseintegrator/entities/baseentity.py +++ b/wikibaseintegrator/entities/baseentity.py @@ -4,7 +4,7 @@ from copy import copy from typing import TYPE_CHECKING, Any, Dict, List, Union -import simplejson +import ujson from wikibaseintegrator import wbi_fastrun from wikibaseintegrator.datatypes import BaseDataType @@ -119,11 +119,11 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = # new_json_repr['claims'].pop(claim) # data = json.JSONEncoder().encode(new_json_repr) - data = simplejson.JSONEncoder().encode(data) + json_data: str = ujson.dumps(data) payload: Dict[str, Any] = { 'action': 'wbeditentity', - 'data': data, + 'data': json_data, 'format': 'json', 'summary': summary } @@ -148,26 +148,26 @@ def _write(self, data: Dict = None, summary: str = None, allow_anonymous: bool = log.debug(payload) try: - json_data = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot, **kwargs) + json_result: dict = mediawiki_api_call_helper(data=payload, login=self.api.login, allow_anonymous=allow_anonymous, is_bot=self.api.is_bot, **kwargs) - if 'error' in json_data and 'messages' in json_data['error']: - error_msg_names = {x.get('name') for x in json_data['error']['messages']} + if 'error' in json_result and 'messages' in json_result['error']: + error_msg_names = {x.get('name') for x in json_result['error']['messages']} if 'wikibase-validator-label-with-description-conflict' in error_msg_names: - raise NonUniqueLabelDescriptionPairError(json_data) + raise NonUniqueLabelDescriptionPairError(json_result) - raise MWApiError(json_data) + raise MWApiError(json_result) - if 'error' in json_data.keys(): - raise MWApiError(json_data) + if 'error' in json_result.keys(): + raise MWApiError(json_result) except Exception: print('Error while writing to the Wikibase instance') raise # after successful write, update this object with latest json, QID and parsed data types. - self.id = json_data['entity']['id'] - if 'success' in json_data and 'entity' in json_data and 'lastrevid' in json_data['entity']: - self.lastrevid = json_data['entity']['lastrevid'] - return json_data['entity'] + self.id = json_result['entity']['id'] + if 'success' in json_result and 'entity' in json_result and 'lastrevid' in json_result['entity']: + self.lastrevid = json_result['entity']['lastrevid'] + return json_result['entity'] def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, **kwargs: Any) -> bool: fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs) diff --git a/wikibaseintegrator/wbi_backoff.py b/wikibaseintegrator/wbi_backoff.py index 77347daf..93b15b9f 100644 --- a/wikibaseintegrator/wbi_backoff.py +++ b/wikibaseintegrator/wbi_backoff.py @@ -1,9 +1,9 @@ import sys from functools import partial +from json import JSONDecodeError import backoff import requests -from simplejson import JSONDecodeError from wikibaseintegrator.wbi_config import config From b3051059b92f55d568c805ff5777544b77f2f882 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Fri, 17 Dec 2021 10:20:50 +0100 Subject: [PATCH 158/308] Create lexeme_update.ipynb (#263) --- notebooks/lexeme_update.ipynb | 293 ++++++++++++++++++++++++++++++++++ 1 file changed, 293 insertions(+) create mode 100644 notebooks/lexeme_update.ipynb diff --git a/notebooks/lexeme_update.ipynb b/notebooks/lexeme_update.ipynb new file mode 100644 index 00000000..980bae66 --- /dev/null +++ b/notebooks/lexeme_update.ipynb @@ -0,0 +1,293 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Update an existing Lexeme" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from wikibaseintegrator import WikibaseIntegrator, wbi_login, datatypes\n", + "from wikibaseintegrator.models import Reference, References\n", + "from wikibaseintegrator.models.qualifiers import Qualifiers\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "WDUSER = ''\n", + "WDPASS = ''" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set default variables" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Create login and WikibaseIntegrator object" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "login = wbi_login.Login(auth_method='login', user=WDUSER, password=WDPASS,\n", + " mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "wbi = WikibaseIntegrator(login=login)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Get existing lexeme entity" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "lexeme = wbi.lexeme.get(entity_id='L42')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set french lemmas" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "source": [ + "# Lemmas\n", + "lexeme.lemmas.set(language='fr', value='réponse')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create a new claim" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "# Claims\n", + "claim_references = References() # Create a group of references\n", + "\n", + "claim_reference1 = Reference()\n", + "claim_reference1.add(datatypes.String(prop_nr='P828', value='Claim string reference'))\n", + "\n", + "claim_reference2 = Reference()\n", + "claim_reference2.add(datatypes.String(prop_nr='P828', value='Another claim string reference'))\n", + "\n", + "# Add each reference to the group of references\n", + "claim_references.add(claim_reference1)\n", + "claim_references.add(claim_reference2)\n", + "\n", + "claim_qualifiers = Qualifiers()\n", + "claim_qualifiers.add(datatypes.String(prop_nr='P828', value='Claim qualifier'))\n", + "\n", + "claim = datatypes.String(prop_nr='P828', value=\"Create a string claim for claim\", references=claim_references,\n", + " qualifiers=claim_qualifiers)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Add claim to lexeme" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": " _Snak__property_number='P77771' _Snak__hash='5362eb3a0f7cba043a1b2afbc0352fc16463b0de' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L41-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$7bd50233-4080-3f02-5fc3-c01b905a58ed' _Claim__rank= _Claim__removed=False _Claim__references=>, _Snak__property_number='P77771' _Snak__hash='1ad1672f2ebc9e96628b20091d1c0f7f17f2ec65' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L103-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$e6a6d9c6-4874-05b9-b7e6-433e7904e624' _Claim__rank= _Claim__removed=False _Claim__references=>], 'P828': [ _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L42$1B581F6D-71B6-4861-8300-6C5CFEE80337' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.claims.add(claim)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Write the lexeme to the Wikibase" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": "\n\t lastrevid=553469\n\t type='lexeme'\n\t id='L42'\n\t claims= _Snak__property_number='P77771' _Snak__hash='5362eb3a0f7cba043a1b2afbc0352fc16463b0de' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L41-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$7bd50233-4080-3f02-5fc3-c01b905a58ed' _Claim__rank= _Claim__removed=False _Claim__references=>, _Snak__property_number='P77771' _Snak__hash='1ad1672f2ebc9e96628b20091d1c0f7f17f2ec65' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L103-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$e6a6d9c6-4874-05b9-b7e6-433e7904e624' _Claim__rank= _Claim__removed=False _Claim__references=>], 'P828': [ _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L42$1B581F6D-71B6-4861-8300-6C5CFEE80337' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1084'\n\t language='Q1860'\n\t forms=\n\t senses=>" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.write()\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From a37bb93f810bf797a8411326cc99a7009ed51670 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 18 Dec 2021 18:53:38 +0100 Subject: [PATCH 159/308] Linting --- wikibaseintegrator/datatypes/basedatatype.py | 4 +- wikibaseintegrator/datatypes/commonsmedia.py | 4 +- wikibaseintegrator/datatypes/form.py | 2 +- .../datatypes/globecoordinate.py | 4 +- wikibaseintegrator/datatypes/item.py | 2 +- wikibaseintegrator/datatypes/lexeme.py | 2 +- .../datatypes/monolingualtext.py | 4 +- wikibaseintegrator/datatypes/property.py | 2 +- wikibaseintegrator/datatypes/quantity.py | 4 +- wikibaseintegrator/datatypes/sense.py | 2 +- wikibaseintegrator/datatypes/time.py | 2 +- wikibaseintegrator/datatypes/url.py | 4 +- wikibaseintegrator/wbi_fastrun.py | 49 +++++++++---------- wikibaseintegrator/wbi_helpers.py | 4 +- wikibaseintegrator/wbi_login.py | 4 +- 15 files changed, 45 insertions(+), 48 deletions(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index 33518525..bdb85d7d 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -39,10 +39,10 @@ def __init_subclass__(cls, **kwargs): def set_value(self, value: Any = None): pass - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '"' + self.mainsnak.datavalue['value'] + '"' - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal') -> bool: if type == 'uri': pattern = re.compile(r'^?$') matches = pattern.match(value) diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py index caa4cd58..c444437d 100644 --- a/wikibaseintegrator/datatypes/commonsmedia.py +++ b/wikibaseintegrator/datatypes/commonsmedia.py @@ -10,10 +10,10 @@ class CommonsMedia(String): """ DTYPE = 'commonsMedia' - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '<' + self.mainsnak.datavalue['value'] + '>' - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: pattern = re.compile(r'^?$') matches = pattern.match(value) if not matches: diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py index 105c01b0..c0237154 100644 --- a/wikibaseintegrator/datatypes/form.py +++ b/wikibaseintegrator/datatypes/form.py @@ -55,5 +55,5 @@ def set_value(self, value: str = None): 'type': 'wikibase-entityid' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py index 62ac6667..d1b3b320 100644 --- a/wikibaseintegrator/datatypes/globecoordinate.py +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -77,10 +77,10 @@ def __eq__(self, other): return super().__eq__(other) - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '"Point(' + str(self.mainsnak.datavalue['value']['longitude']) + ' ' + str(self.mainsnak.datavalue['value']['latitude']) + ')"' - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: pattern = re.compile(r'^"?Point\((.*) (.*)\)"?(?:\^\^geo:wktLiteral)?$') matches = pattern.match(value) if not matches: diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py index 294f0524..205079e7 100644 --- a/wikibaseintegrator/datatypes/item.py +++ b/wikibaseintegrator/datatypes/item.py @@ -48,5 +48,5 @@ def set_value(self, value: Union[str, int] = None): 'type': 'wikibase-entityid' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '<{wb_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>' diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py index 71728e87..01a3c9c0 100644 --- a/wikibaseintegrator/datatypes/lexeme.py +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -48,5 +48,5 @@ def set_value(self, value: Union[str, int] = None): 'type': 'wikibase-entityid' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py index 5c4eb776..c7044e4e 100644 --- a/wikibaseintegrator/datatypes/monolingualtext.py +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -43,10 +43,10 @@ def set_value(self, text: str = None, language: str = None): 'type': 'monolingualtext' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language'] - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: pattern = re.compile(r'^"(.*?)"@([a-z\-]*)$') matches = pattern.match(value) if not matches: diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py index a2241b25..96003da4 100644 --- a/wikibaseintegrator/datatypes/property.py +++ b/wikibaseintegrator/datatypes/property.py @@ -49,5 +49,5 @@ def set_value(self, value: Union[str, int] = None): 'type': 'wikibase-entityid' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py index 2870a919..a9a95209 100644 --- a/wikibaseintegrator/datatypes/quantity.py +++ b/wikibaseintegrator/datatypes/quantity.py @@ -81,9 +81,9 @@ def set_value(self, amount: Union[str, int, float] = None, upper_bound: Union[st if not lower_bound: del self.mainsnak.datavalue['value']['lowerBound'] - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '"' + format_amount(self.mainsnak.datavalue['value']['amount']) + '"^^xsd:decimal' - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: self.set_value(amount=value, unit=unit) return True diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py index b7f83dcd..6dcbd826 100644 --- a/wikibaseintegrator/datatypes/sense.py +++ b/wikibaseintegrator/datatypes/sense.py @@ -44,5 +44,5 @@ def set_value(self, value: str = None): 'type': 'wikibase-entityid' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py index 97a12519..7dc1e5b3 100644 --- a/wikibaseintegrator/datatypes/time.py +++ b/wikibaseintegrator/datatypes/time.py @@ -73,5 +73,5 @@ def set_value(self, time: str = None, before: int = 0, after: int = 0, precision 'type': 'time' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return self.mainsnak.datavalue['value']['time'] diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py index c56705cb..fc0184dc 100644 --- a/wikibaseintegrator/datatypes/url.py +++ b/wikibaseintegrator/datatypes/url.py @@ -41,10 +41,10 @@ def set_value(self, value: str = None): 'type': 'string' } - def _get_sparql_value(self) -> str: + def get_sparql_value(self) -> str: return '<' + self.mainsnak.datavalue['value'] + '>' - def _parse_sparql_value(self, value, type='literal', unit='1') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: pattern = re.compile(r'^?$') matches = pattern.match(value) if not matches: diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index b4bbf908..f38cae04 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -46,7 +46,7 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = if isinstance(k, BaseDataType): if k.mainsnak.datavalue: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format( - wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number, entity=k._get_sparql_value().format(wb_url=self.wikibase_url)) + wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number, entity=k.get_sparql_value().format(wb_url=self.wikibase_url)) else: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format( wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number) @@ -54,15 +54,13 @@ def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = if k[0].mainsnak.datavalue: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}>/<{wb_url}/prop/direct/{prop_nr2}>* {entity} .\n'.format( wb_url=self.wikibase_url, prop_nr=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number, - entity=k[0]._get_sparql_value().format(wb_url=self.wikibase_url)) + entity=k[0].get_sparql_value().format(wb_url=self.wikibase_url)) else: self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format( wb_url=self.wikibase_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number) else: raise ValueError - self.__initialized = True - def reconstruct_statements(self, qid: str) -> List[BaseDataType]: reconstructed_statements: List[BaseDataType] = [] @@ -79,7 +77,7 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: if prop not in self.prop_dt_map: self.prop_dt_map.update({prop: self.get_prop_datatype(prop)}) # reconstruct statements from frc (including unit, qualifiers, and refs) - for uid, d in dt.items(): + for _, d in dt.items(): qualifiers = [] for q in d['qual']: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[q[0]]][0] @@ -90,7 +88,7 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: qualifiers.append(f(value=q[1], prop_nr=q[0])) references = [] - for ref_id, refs in d['ref'].items(): + for _, refs in d['ref'].items(): this_ref = [] for ref in refs: f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[ref[0]]][0] @@ -101,10 +99,10 @@ def reconstruct_statements(self, qid: str) -> List[BaseDataType]: # TODO: Add support for more data type if self.prop_dt_map[prop_nr] == 'quantity': datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit']) - datatype._parse_sparql_value(value=d['v'], unit=d['unit']) + datatype.parse_sparql_value(value=d['v'], unit=d['unit']) else: datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references) - datatype._parse_sparql_value(value=d['v']) + datatype.parse_sparql_value(value=d['v']) reconstructed_statements.append(datatype) # this isn't used. done for debugging purposes @@ -121,16 +119,16 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st prop_nr = claim.mainsnak.property_number if prop_nr not in self.prop_dt_map: - log.debug(f"{prop_nr} not found in fastrun") + log.debug("%s not found in fastrun", prop_nr) - if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: + if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: # pylint: disable=unidiomatic-typecheck self.prop_dt_map.update({prop_nr: claim.DTYPE}) else: self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) self._query_data(prop_nr=prop_nr, use_units=self.prop_dt_map[prop_nr] == 'quantity') # noinspection PyProtectedMember - current_value = claim._get_sparql_value() + current_value = claim.get_sparql_value() if self.prop_dt_map[prop_nr] == 'wikibase-item': current_value = claim.mainsnak.datavalue['value']['id'] @@ -148,8 +146,7 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st elif self.case_insensitive and current_value.casefold() in self.rev_lookup_ci: match_sets.append(set(self.rev_lookup_ci[current_value.casefold()])) else: - log.debug(f"no matches for rev lookup for {current_value}") - # return True + log.debug("no matches for rev lookup for %s", current_value) if not match_sets: return None @@ -162,7 +159,7 @@ def get_item(self, claims: Union[list, Claims], cqid: str = None) -> Optional[st # check if there are any items that have all of these values # if not, a write is required no matter what if not len(matching_qids) == 1: - log.debug(f"no matches ({len(matching_qids)})") + log.debug("no matches (%s)", len(matching_qids)) return None return matching_qids.pop() @@ -198,7 +195,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] if len(comp) != len(app_data): - log.debug(f"failed append: {p}") + log.debug("failed append: %s", p) return True tmp_rs = [x for x in tmp_rs if x.mainsnak.property_number not in append_props and x.mainsnak.property_number in data_props] @@ -233,7 +230,7 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi # bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and # x.mainsnak.property_number not in del_props for x in tmp_rs] - log.debug(f"bool_vec: {bool_vec}") + log.debug("bool_vec: %s", bool_vec) log.debug("-----------------------------------") for x in tmp_rs: if x == date and x.mainsnak.property_number not in del_props: @@ -245,11 +242,11 @@ def write_required(self, data: List[BaseDataType], action_if_exists: ActionIfExi if not any(bool_vec): log.debug(len(bool_vec)) - log.debug(f"fast run failed at {date.mainsnak.property_number}") + log.debug("fast run failed at %s", date.mainsnak.property_number) return True - else: - log.debug("fast run success") - tmp_rs.pop(bool_vec.index(True)) + + log.debug("fast run success") + tmp_rs.pop(bool_vec.index(True)) if len(tmp_rs) > 0: log.debug("failed because not zero") @@ -314,7 +311,7 @@ def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_ty for s in lang_data: if s.strip().casefold() not in all_lang_strings: - log.debug(f"fastrun failed at: {lang_data_type}, string: {s}") + log.debug("fastrun failed at: %s, string: %s", lang_data_type, s) return True return False @@ -341,7 +338,7 @@ def format_query_results(self, r: List, prop_nr: str) -> None: """ prop_dt = self.get_prop_datatype(prop_nr) for i in r: - for value in {'item', 'sid', 'pq', 'pr', 'ref', 'unit', 'qunit'}: + for value in ['item', 'sid', 'pq', 'pr', 'ref', 'unit', 'qunit']: if value in i: if i[value]['value'].startswith(self.wikibase_url): i[value] = i[value]['value'].split('/')[-1] @@ -355,7 +352,7 @@ def format_query_results(self, r: List, prop_nr: str) -> None: # make sure datetimes are formatted correctly. # the correct format is '+%Y-%m-%dT%H:%M:%SZ', but is sometimes missing the plus?? # some difference between RDF and xsd:dateTime that I don't understand - for value in {'v', 'qval', 'rval'}: + for value in ['v', 'qval', 'rval']: if value in i: if i[value].get("datatype") == 'http://www.w3.org/2001/XMLSchema#dateTime' and not i[value]['value'][0] in '+-': # if it is a dateTime and doesn't start with plus or minus, add a plus @@ -371,14 +368,14 @@ def format_query_results(self, r: List, prop_nr: str) -> None: elif i['v']['type'] == 'literal' and prop_dt == 'monolingualtext': f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr, text=i['v']['value'], language=i['v']['xml:lang']) # noinspection PyProtectedMember - i['v'] = f._get_sparql_value() + i['v'] = f.get_sparql_value() else: f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr) # noinspection PyProtectedMember - if not f._parse_sparql_value(value=i['v']['value'], type=i['v']['type']): + if not f.parse_sparql_value(value=i['v']['value'], type=i['v']['type']): raise ValueError # noinspection PyProtectedMember - i['v'] = f._get_sparql_value() + i['v'] = f.get_sparql_value() # Note: no-value and some-value don't actually show up in the results here # see for example: select * where { wd:Q7207 p:P40 ?c . ?c ?d ?e } diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py index 2bb09d44..c7256249 100644 --- a/wikibaseintegrator/wbi_helpers.py +++ b/wikibaseintegrator/wbi_helpers.py @@ -208,9 +208,9 @@ def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, u 'Content-Type': 'multipart/form-data' } - log.debug(BColors.WARNING + params['query'] + BColors.ENDC) + log.debug("%s%s%s", BColors.WARNING, params['query'], BColors.ENDC) - for n in range(max_retries): + for _ in range(max_retries): try: response = requests.post(sparql_endpoint_url, params=params, headers=headers) except requests.exceptions.ConnectionError as e: diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index 50e4576f..281026ca 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -118,7 +118,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str log.debug(login_result) if 'login' in login_result and login_result['login']['result'] == 'Success': - log.info(f"Successfully logged in as {login_result['login']['lgusername']}") + log.info("Successfully logged in as %s", login_result['login']['lgusername']) else: raise LoginError(f"Login failed. Reason: '{login_result['login']['reason']}'") else: @@ -140,7 +140,7 @@ def __init__(self, auth_method: str = 'oauth2', user: str = None, password: str if clientlogin['status'] != 'PASS': raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") - log.info(f"Successfully logged in as {clientlogin['username']}") + log.info("Successfully logged in as %s", clientlogin['username']) else: raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") From 5adea1d4d88749dbca8c89eef50475ecc7151b3f Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 18 Dec 2021 22:54:28 +0100 Subject: [PATCH 160/308] rollback changes --- wikibaseintegrator/datatypes/basedatatype.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py index bdb85d7d..94c62b7b 100644 --- a/wikibaseintegrator/datatypes/basedatatype.py +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -42,7 +42,7 @@ def set_value(self, value: Any = None): def get_sparql_value(self) -> str: return '"' + self.mainsnak.datavalue['value'] + '"' - def parse_sparql_value(self, value, type='literal') -> bool: + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: if type == 'uri': pattern = re.compile(r'^?$') matches = pattern.match(value) From bfbf061a2e062d5a83017854a6126552e292657c Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Sat, 18 Dec 2021 22:54:44 +0100 Subject: [PATCH 161/308] Improve lint workflow --- .github/workflows/python-lint.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml index 1f9a2d65..46ce8a55 100644 --- a/.github/workflows/python-lint.yml +++ b/.github/workflows/python-lint.yml @@ -27,12 +27,10 @@ jobs: - name: Upgrade setup tools run: python -m pip install --upgrade pip setuptools - name: Install dependencies - run: | - pip install . .[dev] - mypy --install-types --non-interactive + run: pip install .[dev] - name: isort imports check run: isort --check --diff wikibaseintegrator test - name: mypy typing check - run: mypy + run: mypy --install-types --non-interactive - name: pylint code linting run: pylint wikibaseintegrator test || pylint-exit $? From db1542ae4bd065ec500ff16e728d0cb1e578fef5 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Mon, 20 Dec 2021 15:59:36 +0100 Subject: [PATCH 162/308] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6088a68e..8b387b68 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ extension-pkg-allow-list = [ ] [tool.pylint.messages_control] -max-line-length = 179 +max-line-length = 180 disable = [ "fixme", "missing-docstring", From bba5fcd2923e01b11994d4c2044665e8ddb1a706 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 21 Dec 2021 15:36:33 +0100 Subject: [PATCH 163/308] Update ujson to 5.1.0 --- requirements.txt | 2 +- setup.cfg | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2eb06bb4..283ecf5b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,4 @@ backoff~=1.11.1 mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 -ujson~=4.3.0 +ujson~=5.1.0 diff --git a/setup.cfg b/setup.cfg index 6be3d80f..7e481acb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -36,7 +36,7 @@ install_requires = mwoauth~=0.3.7 oauthlib~=3.1.1 requests~=2.26.0 - ujson~=4.3.0 + ujson~=5.1.0 python_requires = >=3.7, <=3.11 [options.extras_require] diff --git a/setup.py b/setup.py index 0152d8b5..7f34b072 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ "mwoauth ~= 0.3.7", "oauthlib ~= 3.1.1", "requests ~= 2.26.0", - "ujson ~= 4.3.0" + "ujson ~= 5.1.0" ], extras_require={ "dev": [ From db8a7f3d2929f22333ac7d439bbc7867cb9f8753 Mon Sep 17 00:00:00 2001 From: Myst <1592048+LeMyst@users.noreply.github.com> Date: Tue, 21 Dec 2021 15:36:48 +0100 Subject: [PATCH 164/308] Fix typos --- .idea/codeStyles/codeStyleConfig.xml | 2 +- .idea/inspectionProfiles/WikibaseIntegrator.xml | 2 +- .idea/inspectionProfiles/profiles_settings.xml | 2 +- .idea/misc.xml | 2 +- .idea/modules.xml | 2 +- .idea/vagrant.xml | 2 +- .idea/vcs.xml | 2 +- README.md | 2 +- notebooks/item_create_new.ipynb | 2 +- notebooks/item_get.ipynb | 2 +- notebooks/lexeme_update.ipynb | 2 +- notebooks/lexeme_write.ipynb | 2 +- wikibaseintegrator/wbi_fastrun.py | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml index df5f35dc..f0fda952 100644 --- a/.idea/codeStyles/codeStyleConfig.xml +++ b/.idea/codeStyles/codeStyleConfig.xml @@ -2,4 +2,4 @@ - \ No newline at end of file + diff --git a/.idea/inspectionProfiles/WikibaseIntegrator.xml b/.idea/inspectionProfiles/WikibaseIntegrator.xml index 9a537699..ff9e317a 100644 --- a/.idea/inspectionProfiles/WikibaseIntegrator.xml +++ b/.idea/inspectionProfiles/WikibaseIntegrator.xml @@ -57,4 +57,4 @@ \ No newline at end of file + diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml index 25c0d868..8e251f6f 100644 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -3,4 +3,4 @@