diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 00000000..f55fa189 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,25 @@ +# .coveragerc to control coverage.py +[run] +branch = True + +[report] +# Regexes for lines to exclude from consideration +exclude_lines = +# Have to re-enable the standard pragma + pragma: no cover + +# Don't complain about missing debug-only code: + def __repr__ + +# Don't complain if tests don't hit defensive assertion code: + raise AssertionError + raise NotImplementedError + +# Don't complain if non-runnable code isn't run: + if 0: + if __name__ == .__main__.: + +# Don't complain about TYPE_CHECKING specific imports: + if TYPE_CHECKING: + +ignore_errors = True diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 7422f7c8..4fb1a267 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,6 +1,15 @@ version: 2 updates: + # Maintain dependencies for pip + - package-ecosystem: "pip" + directory: "/" + schedule: + interval: "daily" + assignees: + - "LeMyst" + open-pull-requests-limit: 10 + # Maintain dependencies for GitHub Actions - package-ecosystem: "github-actions" directory: "/" @@ -8,11 +17,13 @@ updates: interval: "daily" assignees: - "LeMyst" + open-pull-requests-limit: 10 - # Maintain dependencies for pip + # Maintain dependencies for documentation - package-ecosystem: "pip" - directory: "/" + directory: "/docs/" schedule: interval: "daily" assignees: - "LeMyst" + open-pull-requests-limit: 10 diff --git a/.github/workflows/publish-to-pypi.yml b/.github/workflows/publish-to-pypi.yml index 58e22ce9..73102b63 100644 --- a/.github/workflows/publish-to-pypi.yml +++ b/.github/workflows/publish-to-pypi.yml @@ -11,10 +11,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.9 + - name: Set up Python 3.10 uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: '3.10' - name: Install pypa/build run: >- diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 00000000..f415cee4 --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,62 @@ +name: Python Code Quality and Lint + +on: + push: + branches: [ master ] + paths: + - 'wikibaseintegrator/**.py' + - 'test/**.py' + pull_request: + branches: [ '**' ] + paths: + - 'wikibaseintegrator/**.py' + - 'test/**.py' + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2.4.0 + + - name: Set up Python 3.10 + uses: actions/setup-python@v2.3.1 + with: + python-version: '3.10' + + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + + - name: Upgrade setup tools + run: | + python -m pip install --upgrade pip setuptools + + - name: Install dependencies + run: | + python -m pip install .[dev] + + - name: isort imports check + run: | + python -m isort --check --diff wikibaseintegrator test + + - name: mypy typing check + run: | + python -m mypy --install-types --non-interactive + + - name: pylint code linting + run: | + python -m pylint wikibaseintegrator test || pylint-exit $? + + - name: codespell spell checking + run: | + codespell wikibaseintegrator test + + - name: flynt string formatter converting + run: | + python -m flynt -f wikibaseintegrator test diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-pytest.yml similarity index 69% rename from .github/workflows/python-package.yml rename to .github/workflows/python-pytest.yml index a1f0cb94..92a3b704 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-pytest.yml @@ -1,45 +1,59 @@ -name: Python package - -on: - push: - branches: [ master ] - pull_request: - branches: [ '**' ] - -jobs: - build: - name: pytest ${{ matrix.python-version }} - runs-on: ubuntu-latest - - strategy: - matrix: - python-version: [ '3.7', '3.8', '3.9', '3.10', '3.11-dev' ] - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - - name: Cache pip - uses: actions/cache@v3 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - ${{ runner.os }}- - - - name: Upgrade setup tools - run: | - python -m pip install --upgrade pip setuptools - - - name: Install dependencies - run: | - python -m pip install .[dev] - - - name: Test with pytest - run: | - python -m pytest +name: Python pytest + +on: + push: + branches: [ master ] + paths: + - 'wikibaseintegrator/**.py' + - 'test/**.py' + - 'setup.cfg' + - 'setup.py' + - 'requirements.txt' + - 'pyproject.toml' + pull_request: + branches: [ '**' ] + paths: + - 'wikibaseintegrator/**.py' + - 'test/**.py' + - 'setup.cfg' + - 'setup.py' + - 'requirements.txt' + - 'pyproject.toml' + +jobs: + build: + name: pytest ${{ matrix.python-version }} + runs-on: ubuntu-latest + + strategy: + matrix: + python-version: [ '3.7', '3.8', '3.9', '3.10' ] # '3.11-dev' + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + + - name: Cache pip + uses: actions/cache@v3 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + ${{ runner.os }}- + + - name: Upgrade setup tools + run: | + python -m pip install --upgrade pip setuptools + + - name: Install dependencies + run: | + python -m pip install .[dev] + + - name: Test with pytest + run: | + python -m pytest diff --git a/.gitignore b/.gitignore index c22045f1..db4a00d4 100644 --- a/.gitignore +++ b/.gitignore @@ -157,5 +157,8 @@ fabric.properties # Idea pylint plugin configuration file .idea/pylint.xml +# Completely remove .idea folder +.idea + # Other stuff /drafts/ diff --git a/.idea/WikibaseIntegrator.iml b/.idea/WikibaseIntegrator.iml deleted file mode 100644 index 0f6053e6..00000000 --- a/.idea/WikibaseIntegrator.iml +++ /dev/null @@ -1,24 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/.idea/codeStyles/codeStyleConfig.xml b/.idea/codeStyles/codeStyleConfig.xml deleted file mode 100644 index df5f35dc..00000000 --- a/.idea/codeStyles/codeStyleConfig.xml +++ /dev/null @@ -1,5 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/WikibaseIntegrator.xml b/.idea/inspectionProfiles/WikibaseIntegrator.xml deleted file mode 100644 index c122a9d0..00000000 --- a/.idea/inspectionProfiles/WikibaseIntegrator.xml +++ /dev/null @@ -1,50 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml deleted file mode 100644 index 25c0d868..00000000 --- a/.idea/inspectionProfiles/profiles_settings.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 80b44c9f..00000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index 7044df13..00000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vagrant.xml b/.idea/vagrant.xml deleted file mode 100644 index 9b406386..00000000 --- a/.idea/vagrant.xml +++ /dev/null @@ -1,7 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index b156bfce..00000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 00000000..9da2e983 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,23 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/source/conf.py + +# Optionally build your docs in additional formats such as PDF +formats: + - pdf + +# Optionally set the version of Python and requirements required to build your docs +python: + version: "3.8" + install: + - method: pip + path: . + extra_requirements: + - docs diff --git a/LICENSE.txt b/LICENSE similarity index 94% rename from LICENSE.txt rename to LICENSE index 3c42ddc4..09efaf17 100644 --- a/LICENSE.txt +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2017 The Su Lab, The Scripps Research Institute +Copyright (c) 2017 Wikibase Integrator contributors Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/README.md b/README.md index 20fdb8fd..65b275bf 100644 --- a/README.md +++ b/README.md @@ -1,102 +1,110 @@ # Wikibase Integrator # +[![PyPi](https://img.shields.io/pypi/v/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) [![Python package](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml/badge.svg)](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/python-package.yml) [![CodeQL](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/codeql-analysis.yml/badge.svg)](https://github.com/LeMyst/WikibaseIntegrator/actions/workflows/codeql-analysis.yml) -[![Pyversions](https://img.shields.io/pypi/pyversions/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) -[![PyPi](https://img.shields.io/pypi/v/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) - -# Breaking changes in future major version # +[![Pyversions](https://img.shields.io/pypi/implementation/wikibaseintegrator.svg)](https://pypi.python.org/pypi/wikibaseintegrator) +[![Read the Docs](https://readthedocs.org/projects/pip/badge/?version=latest&style=flat)](https://wikibaseintegrator.readthedocs.io) -A complete rewrite of the core of WikibaseIntegrator is in progress. You can track the evolution and ask questions in -the related Pull Request [#152](https://github.com/LeMyst/WikibaseIntegrator/pull/152). The changes will break -compatibility with existing scripts. +Wikibase Integrator is a python package whose purpose is to manipulate data present on a Wikibase instance (like +Wikidata). -It offers a new object-oriented approach, a better readability and a support of Property, Lexeme and MediaInfo entities. +# Breaking changes in v0.12 # -The new version is currently in "beta" state, but I invite people to start using it. If you want to install it, you can -use this command in your project to get the latest pre-release: +A complete rewrite of the WikibaseIntegrator core has been done in v0.12 which has led to some important changes. -```bash -python -m pip install --pre wikibaseintegrator -``` +It offers a new object-oriented approach, better code readability and support for Property, Lexeme and MediaInfo +entities (in addition to Item). -If you want to avoid an unwanted upgrade to the v0.12, you can put this line in your requirements.txt: +If you want to stay on v0.11.x, you can put this line in your requirements.txt: ``` wikibaseintegrator~=0.11.3 ``` -I will continue to bug-fixes the current version (v0.11) even after the release of v0.12+. - -
+--- - [WikibaseIntegrator / WikidataIntegrator](#wikibaseintegrator--wikidataintegrator) +- [Documentation](#documentation) - [Installation](#installation) - [Using a Wikibase instance](#using-a-wikibase-instance) + - [Wikimedia Foundation User-Agent policy](#wikimedia-foundation-user-agent-policy) - [The Core Parts](#the-core-parts) - - [wbi_core.ItemEngine](#wbi_coreitemengine) - - [wbi_functions](#wbi_functions) - - [Use MediaWiki API](#use-mediawiki-api) - - [wbi_login.Login](#wbi_loginlogin) + - [Entity manipulation](#entity-manipulation) + - [wbi_login](#wbi_login) - [Login using OAuth1 or OAuth2](#login-using-oauth1-or-oauth2) + - [As a bot](#as-a-bot) + - [To impersonate a user (OAuth 1.0a)](#to-impersonate-a-user-oauth-10a) + - [Login with a bot password](#login-with-a-bot-password) - [Login with a username and a password](#login-with-a-username-and-a-password) - [Wikibase Data Types](#wikibase-data-types) + - [Structured Data on Commons](#structured-data-on-commons) + - [Retrieve data](#retrieve-data) + - [Write data](#write-data) - [Helper Methods](#helper-methods) + - [Use MediaWiki API](#use-mediawiki-api) - [Execute SPARQL queries](#execute-sparql-queries) - - [Use Mediawiki API](#use-mediawiki-api) - [Wikibase search entities](#wikibase-search-entities) - [Merge Wikibase items](#merge-wikibase-items) - [Examples (in "normal" mode)](#examples-in-normal-mode) - - [A Minimal Bot](#a-minimal-bot) - - [A Minimal Bot for Mass Import](#a-minimal-bot-for-mass-import) + - [Create a new Item](#create-a-new-item) + - [Modify an existing item](#modify-an-existing-item) + - [A bot for Mass Import](#a-bot-for-mass-import) - [Examples (in "fast run" mode)](#examples-in-fast-run-mode) +- [Debugging](#debugging) # WikibaseIntegrator / WikidataIntegrator # -WikibaseIntegrator (wbi) is a fork from [WikidataIntegrator](https://github.com/SuLab/WikidataIntegrator) (wdi) whose -purpose is to be focused on Wikibase compatibility. There have been many improvements that have led to breaking changes -in the code. Refer to the [release notes](https://github.com/LeMyst/WikibaseIntegrator/releases) to find out what has +WikibaseIntegrator (wbi) is a fork of [WikidataIntegrator](https://github.com/SuLab/WikidataIntegrator) (wdi) whose +purpose is to focus on compatibility with Wikibase. There have been many improvements which have led to radical changes +in the code. See the [release notes](https://github.com/LeMyst/WikibaseIntegrator/releases) to find out what has changed. +# Documentation # + +A (basic) documentation generated from the python source code is available on +the [Read the Docs website](https://wikibaseintegrator.readthedocs.io/). + # Installation # -The easiest way to install WikibaseIntegrator is using `pip`. WikibaseIntegrator supports Python 3.7 and higher. If -Python 2 is installed `pip` will lead to an error indicating missing dependencies. +The easiest way to install WikibaseIntegrator is to use the `pip` package manager. WikibaseIntegrator supports Python +3.7 and above. If Python 2 is installed, `pip` will lead to an error indicating missing dependencies. ```bash python -m pip install wikibaseintegrator ``` -You can also clone the repo and execute with administrator rights or install into a virtualenv. +You can also clone the repo and run it with administrator rights or install it in a virtualenv. ```bash git clone https://github.com/LeMyst/WikibaseIntegrator.git cd WikibaseIntegrator -python -m pip install pip setuptools +python -m pip install --upgrade pip setuptools python -m pip install . ``` -To test for correct installation, start a Python console and execute the following (Will retrieve the Wikidata item -for ['Human'](https://www.wikidata.org/entity/Q5)): +To check that the installation is correct, launch a Python console and run the following code (which will retrieve the +Wikidata element for [Human](https://www.wikidata.org/entity/Q5)): ```python -from wikibaseintegrator import wbi_core +from wikibaseintegrator import WikibaseIntegrator -my_first_wikidata_item = wbi_core.ItemEngine(item_id='Q5') +wbi = WikibaseIntegrator() +my_first_wikidata_item = wbi.item.get(entity_id='Q5') # to check successful installation and retrieval of the data, you can print the json representation of the item -print(my_first_wikidata_item.get_json_representation()) +print(my_first_wikidata_item.get_json()) ``` # Using a Wikibase instance # -WikibaseIntegrator use Wikidata as default endpoint. To use a Wikibase instance instead, you can overload the -wbi_config. +WikibaseIntegrator uses Wikidata as default endpoint. To use another instance of Wikibase instead, you can override the +wbi_config module. An example for a Wikibase instance installed with [wikibase-docker](https://github.com/wmde/wikibase-release-pipeline/tree/main/example), add this to the top of your @@ -110,79 +118,65 @@ wbi_config['SPARQL_ENDPOINT_URL'] = 'http://localhost:8834/proxy/wdqs/bigdata/na wbi_config['WIKIBASE_URL'] = 'http://wikibase.svc' ``` -You can find more default parameters in the file wbi_config.py +You can find more default settings in the file wbi_config.py -# The Core Parts # +## Wikimedia Foundation User-Agent policy ## -wbi_core supports two modes it can be operated in, a normal mode, updating each item at a time and, a fast run mode, -which is pre-loading data locally and then just updating items if the new data provided is differing from what is in -Wikidata. The latter mode allows for great speedups (measured up to 9x) when tens of thousand of Wikidata items need to -be checked if they require updates but only a small number will finally be updated, a situation usually encountered when -keeping Wikidata in sync with an external resource. +If you interact with a Wikibase instance hosted by the Wikimedia Foundation (like Wikidata, Wikimedia Commons, etc.), +it's highly advised to follow the User-Agent policy that you can find on the +page [User-Agent policy](https://meta.wikimedia.org/wiki/User-Agent_policy) +of the Wikimedia Meta-Wiki. -wbi_core consists of a central class called ItemEngine and Login for authenticating with a MediaWiki instance (like -Wikidata). +You can set a complementary User-Agent by modifying the variable `wbi_config['USER_AGENT']` in wbi_config. -## wbi_core.ItemEngine ## +For example, with your library name and contact information: -This is the central class which does all the heavy lifting. +```python +from wikibaseintegrator.wbi_config import config as wbi_config -Features: +wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/User:MyUsername)' +``` -* Load a Wikibase item based on data to be written (e.g. a unique central identifier) -* Load a Wikibase item based on its Wikibase item id (aka QID) -* Checks for conflicts automatically (e.g. multiple items carrying a unique central identifier will trigger an - exception) -* Checks automatically if the correct item has been loaded by comparing it to the data provided -* All Wikibase data types implemented -* A dedicated wbi_core.ItemEngine.write() method allows loading and consistency checks of data before any write to - Wikibase is performed -* Full access to the whole Wikibase item as a JSON document +# The Core Parts # -There are two ways of working with Wikibase items: +WikibaseIntegrator supports two modes in which it can be used, a normal mode, updating each item at a time, and a fast +run mode, which preloads some data locally and then just updates items if the new data provided differs from Wikidata. +The latter mode allows for great speedups when tens of thousands of Wikidata elements need to be checked for updates, +but only a small number will eventually be updated, a situation typically encountered when synchronising Wikidata with +an external resource. -* A user can provide data, and ItemEngine will search for and load/modify an existing item or create a new one, solely - based on the data provided (preferred). This also performs consistency checks based on a set of SPARQL queries. -* A user can work with a selected QID to specifically modify the data on the item. This requires that the user knows - what he/she is doing and should only be used with great care, as this does not perform consistency checks. +## Entity manipulation ## -## wbi_functions ## +WikibaseIntegrator supports the manipulation of Item, Property, Lexeme and MediaInfo entities through these classes: -wbi_functions provides a set of static functions to request or manipulate data from MediaWiki API or SPARQL Service. +* wikibaseintegrator.entities.item.Item +* wikibaseintegrator.entities.property.Property +* wikibaseintegrator.entities.lexeme.Lexeme +* wikibaseintegrator.entities.mediainfo.MediaInfo Features: -* Minimize the number of HTTP requests for reads and writes to improve performance -* Method to easily execute [SPARQL](https://query.wikidata.org) queries on the Wikibase SPARQL endpoint. - -### Use MediaWiki API ### - -WikibaseIntegrator don't have functions to make API call to non-wikibase actions. You can -use `wbi_functions.mediawiki_api_call_helper()` to make a custom call. +* Loading a Wikibase entity based on its Wikibase entity ID. +* All Wikibase data types are implemented (and some data types implemented by extensions). +* Full access to the entire Wikibase entity in the form of a JSON dict representation. -Example to get the last two revisions of entity Q42 : +## wbi_login ## -```python -from wikibaseintegrator import wbi_functions +`wbi_login` provides the login functionality and also stores the cookies and edit tokens required (For security reasons, +every MediaWiki edit requires an edit token). There is multiple methods to login: -data = { - 'action': 'query', - 'prop': 'revisions', - 'titles': 'Q42', - 'rvlimit': 2, - 'rvprop': 'ids|timestamp|comment|user', - 'rvslots': 'main' -} +* `wbi_login.OAuth2(consumer_token, consumer_secret)` (recommended) +* `wbi_login.OAuth1(consumer_token, consumer_secret, access_token, access_secret)` +* `wbi_login.Clientlogin(user, password)` +* `wbi_login.Login(user, password)` -print(wbi_functions.mediawiki_api_call_helper(data, allow_anonymous=True)) -``` - -## wbi_login.Login ## +There is more parameters available. If you want to authenticate on another instance than Wikidata, you can set the +mediawiki_api_url, mediawiki_rest_url or mediawiki_index_url. Read the documentation for more information. ### Login using OAuth1 or OAuth2 ### -OAuth is the authentication method recommended by the Mediawiki developers. It can be used for authenticating a bot or -to use WBI as a backend for an application. +OAuth is the authentication method recommended by the MediaWiki developers. It can be used to authenticate a bot or to +use WBI as a backend for an application. #### As a bot #### @@ -190,185 +184,285 @@ If you want to use WBI with a bot account, you should use OAuth as an [Owner-only consumer](https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers). This allows to use the authentication without the "continue oauth" step. -The first step is to request a new OAuth consumer on your Mediawiki instance on the page "Special: -OAuthConsumerRegistration", the "Owner-only" (or "This consumer is for use only by ...") has to be checked. You will get -a consumer key, consumer secret, access token and access secret. +The first step is to request a new OAuth consumer on your MediaWiki instance on the page +"Special:OAuthConsumerRegistration", the "Owner-only" (or "This consumer is for use only by ...") has to be checked and +the correct version of the OAuth protocol must be set (OAuth 2.0). You will get a consumer token and consumer secret +(and an access token and access secret if you chose OAuth 1.0a). For a Wikimedia instance (like Wikidata), you need to +use the [Meta-Wiki website](https://meta.wikimedia.org/wiki/Special:OAuthConsumerRegistration). -Example if you use OAuth 1.0a: +Example if you use OAuth 2.0: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(consumer_key='', consumer_secret='', - access_token='', access_secret='') +login_instance = wbi_login.OAuth2(consumer_token='', consumer_secret='') ``` -Example if you use OAuth 2.0: +Example if you use OAuth 1.0a: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(client_id='', client_secret='') +login_instance = wbi_login.OAuth1(consumer_token='', consumer_secret='', + access_token='', access_secret='') ``` #### To impersonate a user (OAuth 1.0a) #### -If WBI should be used as a backend for a webapp, the script should use OAuth for authentication, WBI supports this, you -just need to specify consumer key and consumer secret when instantiating `wbi_login.Login`. In contrast to username and -password login, OAuth is a 2 steps process as manual user confirmation for OAuth login is required. This means that the -method `wbi_login.Login.continue_oauth()` needs to be called after creating the `wbi_login.Login` instance. +If WBI is to be used as a backend for a web application, the script must use OAuth for authentication, WBI supports +this, you just need to specify consumer key and consumer secret when instantiating `wbi_login.Login`. Unlike login by +username and password, OAuth is a 2-step process, as manual confirmation of the user for the OAuth login is required. +This means that the `wbi_login.OAuth1.continue_oauth()` method must be called after creating the `wbi_login.Login` +instance. Example: ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(consumer_key='', consumer_secret='') -login_instance.continue_oauth() +login_instance = wbi_login.OAuth1(consumer_token='', consumer_secret='') +login_instance.continue_oauth(oauth_callback_data='') ``` -The method `wbi_login.Login.continue_oauth()` will either prompt the user for a callback URL (normal bot runs), or it -will take a parameter so in the case of WBI being used as a backend for e.g. a web app, where the callback will provide -the authentication information directly to the backend and so no copy and paste of the callback URL is required. +The `wbi_login.OAuth1.continue_oauth()` method will either ask the user for a callback URL (normal bot execution) or +take a parameter. Thus, in the case where WBI is used as a backend for a web application for example, the callback will +provide the authentication information directly to the backend and thus no copy and paste of the callback URL is needed. + +### Login with a bot password ### + +It's a good practice to use [Bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords) instead of simple +username and password, this allows limiting the permissions given to the bot. + +```python +from wikibaseintegrator import wbi_login + +login_instance = wbi_login.Login(user='', password='') +``` ### Login with a username and a password ### -`wbi_login.Login` provides the login functionality and also stores the cookies and edit tokens required (For security -reasons, every Mediawiki edit requires an edit token). The constructor takes two essential parameters, username and -password. Additionally, the server (default wikidata.org), and the token renewal periods can be specified. It's a good -practice to use [Bot password](https://www.mediawiki.org/wiki/Manual:Bot_passwords) instead of simple username and -password, this allows limiting the permissions given to the bot. +If you want to log in with your user account, you can use the "clientlogin" authentication method. This method is not +recommended. ```python from wikibaseintegrator import wbi_login -login_instance = wbi_login.Login(user='', pwd='') +login_instance = wbi_login.Clientlogin(user='', password='') ``` ## Wikibase Data Types ## Currently, Wikibase supports 17 different data types. The data types are represented as their own classes in -wbi_datatype. Each data types has its specialties, which means that some of them require special parameters (e.g. Globe -Coordinates). +wikibaseintegrator.datatypes. Each datatype has its own peculiarities, which means that some of them require special +parameters (e.g. Globe Coordinates). They are available under the namespace `wikibase.datatypes`. The data types currently implemented: -* wbi_datatype.CommonsMedia -* wbi_datatype.EDTF -* wbi_datatype.ExternalID -* wbi_datatype.Form -* wbi_datatype.GeoShape -* wbi_datatype.GlobeCoordinate -* wbi_datatype.ItemID -* wbi_datatype.Lexeme -* wbi_datatype.LocalMedia -* wbi_datatype.Math -* wbi_datatype.MonolingualText -* wbi_datatype.MusicalNotation -* wbi_datatype.Property -* wbi_datatype.Quantity -* wbi_datatype.Sense -* wbi_datatype.String -* wbi_datatype.TabularData -* wbi_datatype.Time -* wbi_datatype.Url +* CommonsMedia +* ExternalID +* Form +* GeoShape +* GlobeCoordinate +* Item +* Lexeme +* Math +* MonolingualText +* MusicalNotation +* Property +* Quantity +* Sense +* String +* TabularData +* Time +* URL + +Two additional data types are also implemented but require the installation of the MediaWiki extension to work properly: + +* extra.EDTF ([Wikibase EDTF](https://www.mediawiki.org/wiki/Extension:Wikibase_EDTF)) +* extra.LocalMedia ([Wikibase Local Media](https://www.mediawiki.org/wiki/Extension:Wikibase_Local_Media)) For details of how to create values (=instances) with these data types, please (for now) consult the docstrings in the -source code. Of note, these data type instances hold the values and, if specified, data type instances for references -and qualifiers. Furthermore, calling the get_value() method of an instance returns either an integer, a string or a -tuple, depending on the complexity of the data type. +source code or the documentation website. Of note, these data type instances hold the values and, if specified, data +type instances for references and qualifiers. -# Helper Methods # +## Structured Data on Commons ## -## Execute SPARQL queries ## +WikibaseIntegrator supports SDC (Structured Data on Commons) to update a media file hosted on Wikimedia Commons. + +### Retrieve data ### -The method `wbi_core.ItemEngine.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes -the actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, -the actual entpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL -server (user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you -execute many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email -address, or the URL to your bot code repository.) +```python +from wikibaseintegrator import WikibaseIntegrator + +wbi = WikibaseIntegrator() +media = wbi.mediainfo.get('M16431477') -## Use Mediawiki API ## +# Retrieve the first "depicts" (P180) claim +print(media.claims.get('P180')[0].mainsnak.datavalue['value']['id']) +``` -The method `wbi_functions.mediawiki_api_call_helper()` allows you to execute MediaWiki API POST call. It takes a -mandatory data array (data) and multiple optionals parameters like a login object of type wbi_login.Login, a -mediawiki_api_url string if the Mediawiki is not Wikidata, a user_agent string to set a custom HTTP User Agent header, -and an allow_anonymous boolean to force authentication. +### Write data ### + +```python +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.datatypes import Item + +wbi = WikibaseIntegrator() +media = wbi.mediainfo.get('M16431477') + +# Add the "depicts" (P180) claim +media.claims.add(Item(prop_nr='P180', value='Q3146211')) + +media.write() +``` + +# Helper Methods # + +## Use MediaWiki API ## + +The method `wbi_helpers.mediawiki_api_call_helper()` allows you to execute MediaWiki API POST call. It takes a mandatory +data array (data) and multiple optionals parameters like a login object of type wbi_login.Login, a mediawiki_api_url +string if the MediaWiki is not Wikidata, a user_agent string to set a custom HTTP User Agent header, and an +allow_anonymous boolean to force authentication. Example: Retrieve last 10 revisions from Wikidata element Q2 (Earth): ```python -from wikibaseintegrator import wbi_functions +from wikibaseintegrator import wbi_helpers -query = { +data = { 'action': 'query', 'prop': 'revisions', 'titles': 'Q2', 'rvlimit': 10 } -print(wbi_functions.mediawiki_api_call_helper(query, allow_anonymous=True)) +print(wbi_helpers.mediawiki_api_call_helper(data=data, allow_anonymous=True)) ``` +## Execute SPARQL queries ## + +The method `wbi_helpers.execute_sparql_query()` allows you to execute SPARQL queries without a hassle. It takes the +actual query string (query), optional prefixes (prefix) if you do not want to use the standard prefixes of Wikidata, the +actual endpoint URL (endpoint), and you can also specify a user agent for the http header sent to the SPARQL server ( +user_agent). The latter is very useful to let the operators of the endpoint know who you are, especially if you execute +many queries on the endpoint. This allows the operators of the endpoint to contact you (e.g. specify an email address, +or the URL to your bot code repository.) + ## Wikibase search entities ## -The method `wbi_core.ItemEngine.search_entities()` allows for string search in a Wikibase instance. This means that -labels, descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual -search string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not -Wikidata), an optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an -option dict_id_label to return a dict of item id and label as a result. +The method `wbi_helpers.search_entities()` allows for string search in a Wikibase instance. This means that labels, +descriptions and aliases can be searched for a string of interest. The method takes five arguments: The actual search +string (search_string), an optional server (mediawiki_api_url, in case the Wikibase instance used is not Wikidata), an +optional user_agent, an optional max_results (default 500), an optional language (default 'en'), and an option +dict_id_label to return a dict of item id and label as a result. ## Merge Wikibase items ## Sometimes, Wikibase items need to be merged. An API call exists for that, and wbi_core implements a method accordingly. -`wbi_functions.merge_items()` takes five arguments: -the QID of the item which should be merged into another item (from_id), the QID of the item the first item should be -merged into (to_id), a login object of type wbi_login.Login to provide the API call with the required authentication -information, a server (mediawiki_api_url) if the Wikibase instance is not Wikidata and a flag for ignoring merge -conflicts (ignore_conflicts). The last parameter will do a partial merge for all statements which do not conflict. This -should generally be avoided because it leaves a crippled item in Wikibase. Before a merge, any potential conflicts -should be resolved first. +`wbi_helpers.merge_items()` takes five arguments: + +* the QID of the item which should be merged into another item (from_id) +* the QID of the item the first item should be merged into (to_id) +* a login object of type wbi_login.Login to provide the API call with the required authentication information +* a boolean if the changes need to be marked as made by a bot (is_bot) +* a flag for ignoring merge conflicts (ignore_conflicts), will do a partial merge for all statements which do not + conflict. This should generally be avoided because it leaves a crippled item in Wikibase. Before a merge, any + potential conflicts should be resolved first. # Examples (in "normal" mode) # -## A Minimal Bot ## +In order to create a minimal bot based on wbi_core, two things are required: + +* A datatype object containing a value. +* An entity object (Item/Property/Lexeme/...) which takes the data, does the checks and performs write. + +An optional Login object can be used to be authenticated on the Wikibase instance. + +## Create a new Item ## + +```python +from wikibaseintegrator import wbi_login, WikibaseIntegrator +from wikibaseintegrator.datatypes import ExternalID +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/User:MyUsername)' + +# login object +login_instance = wbi_login.OAuth2(consumer_token='', consumer_secret='') + +wbi = WikibaseIntegrator(login=login_instance) + +# data type object, e.g. for a NCBI gene entrez ID +entrez_gene_id = ExternalID(value='', prop_nr='P351') + +# data goes into a list, because many data objects can be provided to +data = [entrez_gene_id] + +# Create a new item +item = wbi.item.new() -In order to create a minimal bot based on wbi_core, three things are required: +# Set an english label +item.labels.set(language='en', value='Newly created item') + +# Set a French description +item.descriptions.set(language='fr', value='Une description un peu longue') + +item.claims.add(data) +item.write() +``` -* A login object, as described above. -* A data type object containing a value. -* A ItemEngine object which takes the data, does the checks and performs write. +## Modify an existing item ## ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import wbi_login, WikibaseIntegrator +from wikibaseintegrator.datatypes import ExternalID +from wikibaseintegrator.wbi_enums import ActionIfExists +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/User:MyUsername)' # login object -login_instance = wbi_login.Login(user='', pwd='') +login_instance = wbi_login.OAuth2(consumer_token='', consumer_secret='') + +wbi = WikibaseIntegrator(login=login_instance) # data type object, e.g. for a NCBI gene entrez ID -entrez_gene_id = wbi_datatype.String(value='', prop_nr='P351') +entrez_gene_id = ExternalID(value='', prop_nr='P351') # data goes into a list, because many data objects can be provided to data = [entrez_gene_id] -# Search for and then edit/create new item -wd_item = wbi_core.ItemEngine(data=data) -wd_item.write(login_instance) +# Search and then edit an Item +item = wbi.item.get(entity_id='Q141806') + +# Set an english label but don't modify it if there is already an entry +item.labels.set(language='en', value='An updated item', action_if_exists=ActionIfExists.KEEP) + +# Set a French description and replace the existing one +item.descriptions.set(language='fr', value='Une description un peu longue', action_if_exists=ActionIfExists.REPLACE_ALL) + +item.claims.add(data) +item.write() ``` -## A Minimal Bot for Mass Import ## +## A bot for Mass Import ## An enhanced example of the previous bot just puts two of the three things into a 'for loop' and so allows mass creation, or modification of items. ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import WikibaseIntegrator, wbi_login +from wikibaseintegrator.datatypes import ExternalID, Item, String, Time +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_enums import WikibaseDatePrecision + +wbi_config['USER_AGENT'] = 'MyWikibaseBot/1.0 (https://www.wikidata.org/wiki/User:MyUsername)' # login object -login_instance = wbi_login.Login(user='', pwd='') +login_instance = wbi_login.OAuth2(consumer_token='', consumer_secret='') # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs raw_data = { @@ -376,39 +470,42 @@ raw_data = { '1029': 'ENST00000498124' } +wbi = WikibaseIntegrator(login=login_instance) + for entrez_id, ensembl in raw_data.items(): # add some references references = [ [ - wbi_datatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - wbi_datatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - wbi_datatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) + Item(value='Q20641742', prop_nr='P248'), + Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', precision=WikibaseDatePrecision.DAY), + ExternalID(value='1017', prop_nr='P351') ] ] # data type object - entrez_gene_id = wbi_datatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = wbi_datatype.String(value=ensembl, prop_nr='P704', references=references) + entrez_gene_id = String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = String(value=ensembl, prop_nr='P704', references=references) # data goes into a list, because many data objects can be provided to data = [entrez_gene_id, ensembl_transcript_id] # Search for and then edit/create new item - wd_item = wbi_core.ItemEngine(data=data) - wd_item.write(login_instance) + item = wbi.item.new() + item.claims.add(data) + item.write() ``` # Examples (in "fast run" mode) # In order to use the fast run mode, you need to know the property/value combination which determines the data corpus you would like to operate on. E.g. for operating on human genes, you need to know -that [P351](https://www.wikidata.org/entity/P351) is the NCBI entrez gene ID and you also need to know that you are +that [P351](https://www.wikidata.org/entity/P351) is the NCBI Entrez Gene ID and you also need to know that you are dealing with humans, best represented by the [found in taxon property (P703)](https://www.wikidata.org/entity/P703) with the value [Q15978631](https://www.wikidata.org/entity/Q15978631) for Homo sapiens. IMPORTANT: In order for the fast run mode to work, the data you provide in the constructor must contain at least one -unique value/id only present on one Wikidata item, e.g. an NCBI entrez gene ID, Uniprot ID, etc. Usually, these would be -the same unique core properties used for defining domains in wbi_core, e.g. for genes, proteins, drugs or your custom +unique value/id only present on one Wikidata element, e.g. an NCBI entrez gene ID, Uniprot ID, etc. Usually, these would +be the same unique core properties used for defining domains in wbi_core, e.g. for genes, proteins, drugs or your custom domains. Below, the normal mode run example from above, slightly modified, to meet the requirements for the fast run mode. To @@ -417,18 +514,22 @@ holding the properties to filter for as keys, and the item QIDs as dict values. just provide an empty string. For the above example, the dictionary looks like this: ```python -fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} +from wikibaseintegrator.datatypes import ExternalID, Item + +fast_run_base_filter = [ExternalID(prop_nr='P351'), Item(prop_nr='P703', value='Q15978631')] ``` The full example: ```python -from wikibaseintegrator import wbi_core, wbi_login, wbi_datatype +from wikibaseintegrator import WikibaseIntegrator, wbi_login +from wikibaseintegrator.datatypes import ExternalID, Item, String, Time +from wikibaseintegrator.wbi_enums import WikibaseDatePrecision # login object -login_instance = wbi_login.Login(user='', pwd='') +login = wbi_login.OAuth2(consumer_token='', consumer_secret='') -fast_run_base_filter = {'P351': '', 'P703': 'Q15978631'} +fast_run_base_filter = [ExternalID(prop_nr='P351'), Item(prop_nr='P703', value='Q15978631')] fast_run = True # We have raw data, which should be written to Wikidata, namely two human NCBI entrez gene IDs mapped to two Ensembl Gene IDs @@ -442,24 +543,38 @@ for entrez_id, ensembl in raw_data.items(): # add some references references = [ [ - wbi_datatype.ItemID(value='Q20641742', prop_nr='P248', is_reference=True), - wbi_datatype.Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', is_reference=True), - wbi_datatype.ExternalID(value='1017', prop_nr='P351', is_reference=True) + Item(value='Q20641742', prop_nr='P248') + ], + [ + Time(time='+2020-02-08T00:00:00Z', prop_nr='P813', precision=WikibaseDatePrecision.DAY), + ExternalID(value='1017', prop_nr='P351') ] ] # data type object - entrez_gene_id = wbi_datatype.String(value=entrez_id, prop_nr='P351', references=references) - ensembl_transcript_id = wbi_datatype.String(value=ensembl, prop_nr='P704', references=references) + entrez_gene_id = String(value=entrez_id, prop_nr='P351', references=references) + ensembl_transcript_id = String(value=ensembl, prop_nr='P704', references=references) # data goes into a list, because many data objects can be provided to data = [entrez_gene_id, ensembl_transcript_id] # Search for and then edit/create new item - wd_item = wbi_core.ItemEngine(data=data, fast_run=fast_run, fast_run_base_filter=fast_run_base_filter) - wd_item.write(login_instance) + wb_item = WikibaseIntegrator(login=login).item.new() + wb_item.add_claims(claims=data) + wb_item.init_fastrun(base_filter=fast_run_base_filter) + wb_item.write() ``` -Note: Fastrun mode checks for equality of property/value pairs, qualifers (not including qualifier attributes), labels, +Note: Fastrun mode checks for equality of property/value pairs, qualifiers (not including qualifier attributes), labels, aliases and description, but it ignores references by default! -References can be checked in fast run mode by setting `fast_run_use_refs` to `True`. +References can be checked in fast run mode by setting `use_refs` to `True`. + +# Debugging # + +You can enable debugging by adding this piece of code to the top of your project: + +```python +import logging + +logging.basicConfig(level=logging.DEBUG) +``` diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 00000000..ba501f6f --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,19 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line. +SPHINXOPTS = +SPHINXBUILD = sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 00000000..216bff58 --- /dev/null +++ b/docs/README.md @@ -0,0 +1,5 @@ +# Sphinx-apidoc + +```shell +sphinx-apidoc.exe -e -f -o docs\source .\wikibaseintegrator\ -t docs\source\_templates +``` diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 00000000..6247f7e2 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 00000000..43dea041 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,6 @@ +Sphinx~=4.5.0 +readthedocs-sphinx-ext~=2.1.5 +sphinx-rtd-theme~=1.0.0 +sphinx_github_changelog~=1.2.0 +m2r2~=0.3.2 +sphinx-autodoc-typehints==1.18.1 diff --git a/docs/source/_templates/module.rst_t b/docs/source/_templates/module.rst_t new file mode 100644 index 00000000..d886dcfb --- /dev/null +++ b/docs/source/_templates/module.rst_t @@ -0,0 +1,9 @@ +{%- if show_headings %} +{{- [basename] | join(' ') | e | heading }} + +{% endif -%} +.. automodule:: {{ qualname }} +{%- for option in automodule_options %} + :{{ option }}: +{%- endfor %} + diff --git a/docs/source/_templates/package.rst_t b/docs/source/_templates/package.rst_t new file mode 100644 index 00000000..2eb85377 --- /dev/null +++ b/docs/source/_templates/package.rst_t @@ -0,0 +1,48 @@ +{%- macro automodule(modname, options) -%} +.. automodule:: {{ modname }} +{%- for option in options %} + :{{ option }}: +{%- endfor %} +{%- endmacro %} + +{%- macro toctree(docnames) -%} +.. toctree:: + :maxdepth: {{ maxdepth }} +{% for docname in docnames %} + {{ docname }} +{%- endfor %} +{%- endmacro %} + +{%- if is_namespace %} +{{- [pkgname, "namespace"] | join(" ") | e | heading }} +{% else %} +{{- [pkgname] | join(" ") | e | heading }} +{% endif %} + +{%- if is_namespace %} +.. py:module:: {{ pkgname }} +{% endif %} + +{%- if subpackages %} +Subpackages +----------- + +{{ toctree(subpackages) }} +{% endif %} + +{%- if submodules %} +{%- if subpackages %} +Submodules +---------- +{%- endif %} +{% if separatemodules %} +{{ toctree(submodules) }} +{% else %} +{%- for submodule in submodules %} +{% if show_headings %} +{{- [submodule, "module"] | join(" ") | e | heading(2) }} +{% endif %} +{{ automodule(submodule, automodule_options) }} +{% endfor %} +{%- endif %} +{%- endif %} diff --git a/docs/source/_templates/toc.rst_t b/docs/source/_templates/toc.rst_t new file mode 100644 index 00000000..846c6b16 --- /dev/null +++ b/docs/source/_templates/toc.rst_t @@ -0,0 +1,7 @@ +{{ header | heading }} + +.. toctree:: + :maxdepth: {{ maxdepth }} +{% for docname in docnames %} + {{ docname }} +{%- endfor %} diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst new file mode 100644 index 00000000..85fb4e01 --- /dev/null +++ b/docs/source/changelog.rst @@ -0,0 +1,9 @@ +.. _changelog: + +Changelog +********* + +.. changelog:: + :changelog-url: https://wikibaseintegrator.readthedocs.io/en/stable/#changelog + :github: https://github.com/LeMyst/WikibaseIntegrator/releases/ + :pypi: https://pypi.org/project/wikibaseintegrator/ diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 00000000..a1ddd9dd --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,119 @@ +# -*- coding: utf-8 -*- +# +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +from datetime import datetime + +sys.path.insert(0, os.path.abspath('../..')) + +# -- General configuration ------------------------------------------------ + +# If your documentation needs a minimal Sphinx version, state it here. +# +# needs_sphinx = '1.0' + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', + 'sphinx.ext.mathjax', + 'sphinx.ext.viewcode', + 'sphinx_rtd_theme', + 'sphinx_github_changelog', + 'm2r2', + 'sphinx_autodoc_typehints'] + +# Provide a GitHub API token: +# Pass the SPHINX_GITHUB_CHANGELOG_TOKEN environment variable to your build +# OR +# sphinx_github_changelog_token = "" + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The suffix(es) of source filenames. +# You can specify multiple suffix as a list of string: +# +source_suffix = {'.rst': 'restructuredtext'} + +# The master toctree document. +master_doc = 'index' + +# General information about the project. +project = u'WikibaseIntegrator' +copyright = u'%d, LeMyst' % datetime.now().year +author = u'LeMyst and WikibaseIntegrator contributors' + +# The version info for the project you're documenting, acts as replacement for +# |version| and |release|, also used in various other places throughout the +# built documents. +# +# The short X.Y version. +version = u'0.12.0' +# The full version, including alpha/beta/rc tags. +release = u'0.12.0' + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This patterns also effect to html_static_path and html_extra_path +exclude_patterns = [] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = False + +autodoc_typehints = 'both' +autodoc_default_options = { + 'special-members': '__init__', + 'members': True, + 'undoc-members': True, + 'inherited-members': True, + 'show-inheritance': True, + 'exclude-members': 'subclasses' +} + +# -- Options for HTML output ---------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' + +html_theme_options = { + 'style_external_links': False, + # Toc options + 'collapse_navigation': False +} + +html_context = { + 'display_github': True, + 'github_user': 'LeMyst', + 'github_repo': 'WikibaseIntegrator', + 'github_version': 'master', + "conf_py_path": "/docs/" +} + + +def skip(app, what, name, obj, would_skip, options): + if name == "__init__": + return False + if name == "sparql_query": + return True + return would_skip + + +def setup(app): + app.connect("autodoc-skip-member", skip) diff --git a/docs/source/index.rst b/docs/source/index.rst new file mode 100644 index 00000000..5774be85 --- /dev/null +++ b/docs/source/index.rst @@ -0,0 +1,33 @@ +.. _index: + +WikibaseIntegrator +================== + +.. toctree:: + :maxdepth: 5 + + wikibaseintegrator + +.. + README + ====== + + .. mdinclude:: ../../README.md + + +Changelog +========= + +.. toctree:: + :maxdepth: 5 + :caption: Miscellaneous + + changelog + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/source/modules.rst b/docs/source/modules.rst new file mode 100644 index 00000000..c8c8f23d --- /dev/null +++ b/docs/source/modules.rst @@ -0,0 +1,7 @@ +wikibaseintegrator +================== + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator \ No newline at end of file diff --git a/docs/source/wikibaseintegrator.datatypes.basedatatype.rst b/docs/source/wikibaseintegrator.datatypes.basedatatype.rst new file mode 100644 index 00000000..db7f462f --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.basedatatype.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.basedatatype +========================================= + +.. automodule:: wikibaseintegrator.datatypes.basedatatype + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.commonsmedia.rst b/docs/source/wikibaseintegrator.datatypes.commonsmedia.rst new file mode 100644 index 00000000..b210ec2a --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.commonsmedia.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.commonsmedia +========================================= + +.. automodule:: wikibaseintegrator.datatypes.commonsmedia + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.externalid.rst b/docs/source/wikibaseintegrator.datatypes.externalid.rst new file mode 100644 index 00000000..0c5ffb0d --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.externalid.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.externalid +======================================= + +.. automodule:: wikibaseintegrator.datatypes.externalid + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.extra.edtf.rst b/docs/source/wikibaseintegrator.datatypes.extra.edtf.rst new file mode 100644 index 00000000..82db2eb9 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.extra.edtf.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.extra.edtf +======================================= + +.. automodule:: wikibaseintegrator.datatypes.extra.edtf + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.extra.localmedia.rst b/docs/source/wikibaseintegrator.datatypes.extra.localmedia.rst new file mode 100644 index 00000000..b8a40e8c --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.extra.localmedia.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.extra.localmedia +============================================= + +.. automodule:: wikibaseintegrator.datatypes.extra.localmedia + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.extra.rst b/docs/source/wikibaseintegrator.datatypes.extra.rst new file mode 100644 index 00000000..13426ded --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.extra.rst @@ -0,0 +1,9 @@ +wikibaseintegrator.datatypes.extra +================================== + + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.datatypes.extra.edtf + wikibaseintegrator.datatypes.extra.localmedia diff --git a/docs/source/wikibaseintegrator.datatypes.form.rst b/docs/source/wikibaseintegrator.datatypes.form.rst new file mode 100644 index 00000000..56ee30d2 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.form.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.form +================================= + +.. automodule:: wikibaseintegrator.datatypes.form + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.geoshape.rst b/docs/source/wikibaseintegrator.datatypes.geoshape.rst new file mode 100644 index 00000000..dbf46e2b --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.geoshape.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.geoshape +===================================== + +.. automodule:: wikibaseintegrator.datatypes.geoshape + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.globecoordinate.rst b/docs/source/wikibaseintegrator.datatypes.globecoordinate.rst new file mode 100644 index 00000000..4b5f8df8 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.globecoordinate.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.globecoordinate +============================================ + +.. automodule:: wikibaseintegrator.datatypes.globecoordinate + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.item.rst b/docs/source/wikibaseintegrator.datatypes.item.rst new file mode 100644 index 00000000..8113deb9 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.item.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.item +================================= + +.. automodule:: wikibaseintegrator.datatypes.item + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.lexeme.rst b/docs/source/wikibaseintegrator.datatypes.lexeme.rst new file mode 100644 index 00000000..8f9141ee --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.lexeme.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.lexeme +=================================== + +.. automodule:: wikibaseintegrator.datatypes.lexeme + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.math.rst b/docs/source/wikibaseintegrator.datatypes.math.rst new file mode 100644 index 00000000..10ff4221 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.math.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.math +================================= + +.. automodule:: wikibaseintegrator.datatypes.math + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.monolingualtext.rst b/docs/source/wikibaseintegrator.datatypes.monolingualtext.rst new file mode 100644 index 00000000..95257695 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.monolingualtext.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.monolingualtext +============================================ + +.. automodule:: wikibaseintegrator.datatypes.monolingualtext + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.musicalnotation.rst b/docs/source/wikibaseintegrator.datatypes.musicalnotation.rst new file mode 100644 index 00000000..c941619f --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.musicalnotation.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.musicalnotation +============================================ + +.. automodule:: wikibaseintegrator.datatypes.musicalnotation + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.property.rst b/docs/source/wikibaseintegrator.datatypes.property.rst new file mode 100644 index 00000000..f6845640 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.property.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.property +===================================== + +.. automodule:: wikibaseintegrator.datatypes.property + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.quantity.rst b/docs/source/wikibaseintegrator.datatypes.quantity.rst new file mode 100644 index 00000000..5d9ecc3f --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.quantity.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.quantity +===================================== + +.. automodule:: wikibaseintegrator.datatypes.quantity + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.rst b/docs/source/wikibaseintegrator.datatypes.rst new file mode 100644 index 00000000..fb450673 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.rst @@ -0,0 +1,35 @@ +wikibaseintegrator.datatypes +============================ + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.datatypes.extra + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.datatypes.basedatatype + wikibaseintegrator.datatypes.commonsmedia + wikibaseintegrator.datatypes.externalid + wikibaseintegrator.datatypes.form + wikibaseintegrator.datatypes.geoshape + wikibaseintegrator.datatypes.globecoordinate + wikibaseintegrator.datatypes.item + wikibaseintegrator.datatypes.lexeme + wikibaseintegrator.datatypes.math + wikibaseintegrator.datatypes.monolingualtext + wikibaseintegrator.datatypes.musicalnotation + wikibaseintegrator.datatypes.property + wikibaseintegrator.datatypes.quantity + wikibaseintegrator.datatypes.sense + wikibaseintegrator.datatypes.string + wikibaseintegrator.datatypes.tabulardata + wikibaseintegrator.datatypes.time + wikibaseintegrator.datatypes.url diff --git a/docs/source/wikibaseintegrator.datatypes.sense.rst b/docs/source/wikibaseintegrator.datatypes.sense.rst new file mode 100644 index 00000000..601d91a7 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.sense.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.sense +================================== + +.. automodule:: wikibaseintegrator.datatypes.sense + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.string.rst b/docs/source/wikibaseintegrator.datatypes.string.rst new file mode 100644 index 00000000..d5feba7b --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.string.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.string +=================================== + +.. automodule:: wikibaseintegrator.datatypes.string + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.tabulardata.rst b/docs/source/wikibaseintegrator.datatypes.tabulardata.rst new file mode 100644 index 00000000..e363421d --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.tabulardata.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.tabulardata +======================================== + +.. automodule:: wikibaseintegrator.datatypes.tabulardata + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.time.rst b/docs/source/wikibaseintegrator.datatypes.time.rst new file mode 100644 index 00000000..5a26f7fc --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.time.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.time +================================= + +.. automodule:: wikibaseintegrator.datatypes.time + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.datatypes.url.rst b/docs/source/wikibaseintegrator.datatypes.url.rst new file mode 100644 index 00000000..feda6427 --- /dev/null +++ b/docs/source/wikibaseintegrator.datatypes.url.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.datatypes.url +================================ + +.. automodule:: wikibaseintegrator.datatypes.url + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.baseentity.rst b/docs/source/wikibaseintegrator.entities.baseentity.rst new file mode 100644 index 00000000..03f52769 --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.baseentity.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.entities.baseentity +====================================== + +.. automodule:: wikibaseintegrator.entities.baseentity + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.item.rst b/docs/source/wikibaseintegrator.entities.item.rst new file mode 100644 index 00000000..c8943e29 --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.item.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.entities.item +================================ + +.. automodule:: wikibaseintegrator.entities.item + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.lexeme.rst b/docs/source/wikibaseintegrator.entities.lexeme.rst new file mode 100644 index 00000000..c4d1e683 --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.lexeme.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.entities.lexeme +================================== + +.. automodule:: wikibaseintegrator.entities.lexeme + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.mediainfo.rst b/docs/source/wikibaseintegrator.entities.mediainfo.rst new file mode 100644 index 00000000..a3283d0f --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.mediainfo.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.entities.mediainfo +===================================== + +.. automodule:: wikibaseintegrator.entities.mediainfo + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.property.rst b/docs/source/wikibaseintegrator.entities.property.rst new file mode 100644 index 00000000..183603b4 --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.property.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.entities.property +==================================== + +.. automodule:: wikibaseintegrator.entities.property + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.entities.rst b/docs/source/wikibaseintegrator.entities.rst new file mode 100644 index 00000000..905ae27b --- /dev/null +++ b/docs/source/wikibaseintegrator.entities.rst @@ -0,0 +1,12 @@ +wikibaseintegrator.entities +=========================== + + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.entities.baseentity + wikibaseintegrator.entities.item + wikibaseintegrator.entities.lexeme + wikibaseintegrator.entities.mediainfo + wikibaseintegrator.entities.property diff --git a/docs/source/wikibaseintegrator.models.aliases.rst b/docs/source/wikibaseintegrator.models.aliases.rst new file mode 100644 index 00000000..57bb0c0f --- /dev/null +++ b/docs/source/wikibaseintegrator.models.aliases.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.aliases +================================= + +.. automodule:: wikibaseintegrator.models.aliases + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.basemodel.rst b/docs/source/wikibaseintegrator.models.basemodel.rst new file mode 100644 index 00000000..697e03cd --- /dev/null +++ b/docs/source/wikibaseintegrator.models.basemodel.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.basemodel +=================================== + +.. automodule:: wikibaseintegrator.models.basemodel + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.claims.rst b/docs/source/wikibaseintegrator.models.claims.rst new file mode 100644 index 00000000..16385a5b --- /dev/null +++ b/docs/source/wikibaseintegrator.models.claims.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.claims +================================ + +.. automodule:: wikibaseintegrator.models.claims + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.descriptions.rst b/docs/source/wikibaseintegrator.models.descriptions.rst new file mode 100644 index 00000000..40822474 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.descriptions.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.descriptions +====================================== + +.. automodule:: wikibaseintegrator.models.descriptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.forms.rst b/docs/source/wikibaseintegrator.models.forms.rst new file mode 100644 index 00000000..c21b0da0 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.forms.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.forms +=============================== + +.. automodule:: wikibaseintegrator.models.forms + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.labels.rst b/docs/source/wikibaseintegrator.models.labels.rst new file mode 100644 index 00000000..e5532e78 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.labels.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.labels +================================ + +.. automodule:: wikibaseintegrator.models.labels + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.language_values.rst b/docs/source/wikibaseintegrator.models.language_values.rst new file mode 100644 index 00000000..ae17c258 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.language_values.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.language\_values +========================================== + +.. automodule:: wikibaseintegrator.models.language_values + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.lemmas.rst b/docs/source/wikibaseintegrator.models.lemmas.rst new file mode 100644 index 00000000..c39994e0 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.lemmas.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.lemmas +================================ + +.. automodule:: wikibaseintegrator.models.lemmas + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.qualifiers.rst b/docs/source/wikibaseintegrator.models.qualifiers.rst new file mode 100644 index 00000000..eea12fb5 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.qualifiers.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.qualifiers +==================================== + +.. automodule:: wikibaseintegrator.models.qualifiers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.references.rst b/docs/source/wikibaseintegrator.models.references.rst new file mode 100644 index 00000000..138e7212 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.references.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.references +==================================== + +.. automodule:: wikibaseintegrator.models.references + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.rst b/docs/source/wikibaseintegrator.models.rst new file mode 100644 index 00000000..833ef137 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.rst @@ -0,0 +1,20 @@ +wikibaseintegrator.models +========================= + + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.models.aliases + wikibaseintegrator.models.basemodel + wikibaseintegrator.models.claims + wikibaseintegrator.models.descriptions + wikibaseintegrator.models.forms + wikibaseintegrator.models.labels + wikibaseintegrator.models.language_values + wikibaseintegrator.models.lemmas + wikibaseintegrator.models.qualifiers + wikibaseintegrator.models.references + wikibaseintegrator.models.senses + wikibaseintegrator.models.sitelinks + wikibaseintegrator.models.snaks diff --git a/docs/source/wikibaseintegrator.models.senses.rst b/docs/source/wikibaseintegrator.models.senses.rst new file mode 100644 index 00000000..6e0c99ca --- /dev/null +++ b/docs/source/wikibaseintegrator.models.senses.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.senses +================================ + +.. automodule:: wikibaseintegrator.models.senses + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.sitelinks.rst b/docs/source/wikibaseintegrator.models.sitelinks.rst new file mode 100644 index 00000000..b7ac4ab0 --- /dev/null +++ b/docs/source/wikibaseintegrator.models.sitelinks.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.sitelinks +=================================== + +.. automodule:: wikibaseintegrator.models.sitelinks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.models.snaks.rst b/docs/source/wikibaseintegrator.models.snaks.rst new file mode 100644 index 00000000..eece442f --- /dev/null +++ b/docs/source/wikibaseintegrator.models.snaks.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.models.snaks +=============================== + +.. automodule:: wikibaseintegrator.models.snaks + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.rst b/docs/source/wikibaseintegrator.rst new file mode 100644 index 00000000..802df089 --- /dev/null +++ b/docs/source/wikibaseintegrator.rst @@ -0,0 +1,27 @@ +wikibaseintegrator +================== + +Subpackages +----------- + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.datatypes + wikibaseintegrator.entities + wikibaseintegrator.models + +Submodules +---------- + +.. toctree:: + :maxdepth: 4 + + wikibaseintegrator.wbi_backoff + wikibaseintegrator.wbi_config + wikibaseintegrator.wbi_enums + wikibaseintegrator.wbi_exceptions + wikibaseintegrator.wbi_fastrun + wikibaseintegrator.wbi_helpers + wikibaseintegrator.wbi_login + wikibaseintegrator.wikibaseintegrator diff --git a/docs/source/wikibaseintegrator.wbi_backoff.rst b/docs/source/wikibaseintegrator.wbi_backoff.rst new file mode 100644 index 00000000..fe4665a4 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_backoff.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_backoff +=============================== + +.. automodule:: wikibaseintegrator.wbi_backoff + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_config.rst b/docs/source/wikibaseintegrator.wbi_config.rst new file mode 100644 index 00000000..95c816c3 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_config.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_config +============================== + +.. automodule:: wikibaseintegrator.wbi_config + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_enums.rst b/docs/source/wikibaseintegrator.wbi_enums.rst new file mode 100644 index 00000000..a2161406 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_enums.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_enums +============================= + +.. automodule:: wikibaseintegrator.wbi_enums + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_exceptions.rst b/docs/source/wikibaseintegrator.wbi_exceptions.rst new file mode 100644 index 00000000..a9175027 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_exceptions.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_exceptions +================================== + +.. automodule:: wikibaseintegrator.wbi_exceptions + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_fastrun.rst b/docs/source/wikibaseintegrator.wbi_fastrun.rst new file mode 100644 index 00000000..15e436ed --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_fastrun.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_fastrun +=============================== + +.. automodule:: wikibaseintegrator.wbi_fastrun + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_helpers.rst b/docs/source/wikibaseintegrator.wbi_helpers.rst new file mode 100644 index 00000000..4a36b544 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_helpers.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_helpers +=============================== + +.. automodule:: wikibaseintegrator.wbi_helpers + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wbi_login.rst b/docs/source/wikibaseintegrator.wbi_login.rst new file mode 100644 index 00000000..f287a9f5 --- /dev/null +++ b/docs/source/wikibaseintegrator.wbi_login.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wbi\_login +============================= + +.. automodule:: wikibaseintegrator.wbi_login + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/source/wikibaseintegrator.wikibaseintegrator.rst b/docs/source/wikibaseintegrator.wikibaseintegrator.rst new file mode 100644 index 00000000..cee861b9 --- /dev/null +++ b/docs/source/wikibaseintegrator.wikibaseintegrator.rst @@ -0,0 +1,7 @@ +wikibaseintegrator.wikibaseintegrator +===================================== + +.. automodule:: wikibaseintegrator.wikibaseintegrator + :members: + :undoc-members: + :show-inheritance: diff --git a/notebooks/item_create_new.ipynb b/notebooks/item_create_new.ipynb new file mode 100644 index 00000000..d498f68e --- /dev/null +++ b/notebooks/item_create_new.ipynb @@ -0,0 +1,356 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3bc67e37", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "# Create a new Item" + ] + }, + { + "cell_type": "markdown", + "id": "3646845d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Load wikibaseintegrator" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "498b50c2", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from wikibaseintegrator.models import Qualifiers, References, Reference\n", + "\n", + "from wikibaseintegrator import WikibaseIntegrator\n", + "from wikibaseintegrator import wbi_login\n", + "from wikibaseintegrator import datatypes\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "90b179a6", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "WDUSER = ''\n", + "WDPASS = ''" + ] + }, + { + "cell_type": "markdown", + "id": "bb9948a9", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Set default variables" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2ffe3908", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" + ] + }, + { + "cell_type": "markdown", + "id": "27e3932f", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Create login and WikibaseIntegrator object" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4963fc82", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "login = wbi_login.Login(user=WDUSER, password=WDPASS, mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "wbi = WikibaseIntegrator(login=login)" + ] + }, + { + "cell_type": "markdown", + "id": "c8b2ce02", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Create a new Item object" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "5ce0df1e", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_item = wbi.item.new()" + ] + }, + { + "cell_type": "markdown", + "id": "00f46d7d", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Set labels" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "71cd2848", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.labels.set('en', 'New item')\n", + "new_item.labels.set('fr', 'Nouvel élément')" + ] + }, + { + "cell_type": "markdown", + "id": "cef7ce93", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Set aliases" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7ada4b6a", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "], 'fr': []}>" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.aliases.set('en', 'Item')\n", + "new_item.aliases.set('fr', 'Élément')" + ] + }, + { + "cell_type": "markdown", + "id": "7185bec3", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Set descriptions" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "a2a3b081", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.descriptions.set('en', 'A freshly created element')\n", + "new_item.descriptions.set('fr', 'Un élément fraichement créé')" + ] + }, + { + "cell_type": "markdown", + "id": "078c3b4b", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Create a claim, with qualifiers and references, and add it to the new item entity" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ac4b06df", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " _Snak__property_number='P31533' _Snak__hash=None _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Item qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Item string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another item string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}>" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_qualifiers = Qualifiers()\n", + "new_qualifiers.add(datatypes.String(prop_nr='P828', value='Item qualifier'))\n", + "\n", + "new_references = References()\n", + "new_reference1 = Reference()\n", + "new_reference1.add(datatypes.String(prop_nr='P828', value='Item string reference'))\n", + "\n", + "new_reference2 = Reference()\n", + "new_reference2.add(datatypes.String(prop_nr='P828', value='Another item string reference'))\n", + "\n", + "new_references.add(new_reference1)\n", + "new_references.add(new_reference2)\n", + "\n", + "new_claim = datatypes.String(prop_nr='P31533', value='A String property', qualifiers=new_qualifiers, references=new_references)\n", + "\n", + "new_item.claims.add(new_claim)" + ] + }, + { + "cell_type": "markdown", + "id": "b2e30176", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "Write the new item to the Wikibase instance" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "29c22ef2", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "\n\t _BaseEntity__title=None\n\t _BaseEntity__pageid=None\n\t _BaseEntity__lastrevid=579081\n\t _BaseEntity__type='item'\n\t _BaseEntity__id='Q225256'\n\t _BaseEntity__claims= _Snak__property_number='P31533' _Snak__hash='112d32b098a091cc1398c779e76c763a523d4ffc' _Snak__datavalue={'value': 'A String property', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='8d721edd0365e35ed006822601a4837b35e68fd6' _Snak__datavalue={'value': 'Item qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='Q225256$A1CB5069-5FF4-4EE4-BE99-D1607BFFB705' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='811577f0f42a7059f39bd6b169366bb1fb2f9af3' _Snak__datavalue={'value': 'Item string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='774c2b3d70f072fb26d05a95d24445fbc8b2534e' _Snak__datavalue={'value': 'Another item string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>\n\t _ItemEntity__labels=, 'fr': }>\n\t _ItemEntity__descriptions=, 'fr': }>\n\t _ItemEntity__aliases=], 'fr': []}>\n\t _ItemEntity__sitelinks=>" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_item.write()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/notebooks/item_get.ipynb b/notebooks/item_get.ipynb new file mode 100644 index 00000000..02ee483e --- /dev/null +++ b/notebooks/item_get.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3bc67e37", + "metadata": {}, + "source": [ + "# Get english label and height for entity Q42 (Douglas Adams)" + ] + }, + { + "cell_type": "markdown", + "id": "3646845d", + "metadata": {}, + "source": [ + "Load wikibaseintegrator" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "498b50c2", + "metadata": {}, + "outputs": [], + "source": [ + "from wikibaseintegrator import WikibaseIntegrator\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Set default variables" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "config['USER_AGENT'] = 'Item Get Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "id": "ee386bbb", + "metadata": {}, + "source": [ + "Create WikibaseIntegrator object" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f5131e52", + "metadata": {}, + "outputs": [], + "source": [ + "wbi = WikibaseIntegrator()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "5ce0df1e", + "metadata": {}, + "outputs": [], + "source": [ + "q42 = wbi.item.get('Q42')" + ] + }, + { + "cell_type": "markdown", + "id": "b642a951", + "metadata": {}, + "source": [ + "Get english label" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "29c22ef2", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "'Douglas Adams'" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q42.labels.get('en').value" + ] + }, + { + "cell_type": "markdown", + "id": "b3f25818", + "metadata": {}, + "source": [ + "Get height" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e3639082", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "'+1.96'" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "q42.claims.get('P2048')[0].mainsnak.datavalue['value']['amount']\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/notebooks/lexeme_update.ipynb b/notebooks/lexeme_update.ipynb new file mode 100644 index 00000000..85f76ff4 --- /dev/null +++ b/notebooks/lexeme_update.ipynb @@ -0,0 +1,292 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Update an existing Lexeme" + ], + "metadata": { + "collapsed": false + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from wikibaseintegrator import WikibaseIntegrator, wbi_login, datatypes\n", + "from wikibaseintegrator.models import Reference, References\n", + "from wikibaseintegrator.models.qualifiers import Qualifiers\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "WDUSER = ''\n", + "WDPASS = ''" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set default variables" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Create login and WikibaseIntegrator object" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "login = wbi_login.Login(user=WDUSER, password=WDPASS, mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "wbi = WikibaseIntegrator(login=login)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Get existing lexeme entity" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "lexeme = wbi.lexeme.get(entity_id='L42')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Set french lemmas" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "source": [ + "# Lemmas\n", + "lexeme.lemmas.set(language='fr', value='réponse')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + }, + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Create a new claim" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "# Claims\n", + "claim_references = References() # Create a group of references\n", + "\n", + "claim_reference1 = Reference()\n", + "claim_reference1.add(datatypes.String(prop_nr='P828', value='Claim string reference'))\n", + "\n", + "claim_reference2 = Reference()\n", + "claim_reference2.add(datatypes.String(prop_nr='P828', value='Another claim string reference'))\n", + "\n", + "# Add each reference to the group of references\n", + "claim_references.add(claim_reference1)\n", + "claim_references.add(claim_reference2)\n", + "\n", + "claim_qualifiers = Qualifiers()\n", + "claim_qualifiers.add(datatypes.String(prop_nr='P828', value='Claim qualifier'))\n", + "\n", + "claim = datatypes.String(prop_nr='P828', value=\"Create a string claim for claim\", references=claim_references,\n", + " qualifiers=claim_qualifiers)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Add claim to lexeme" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": " _Snak__property_number='P77771' _Snak__hash='5362eb3a0f7cba043a1b2afbc0352fc16463b0de' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L41-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$7bd50233-4080-3f02-5fc3-c01b905a58ed' _Claim__rank= _Claim__removed=False _Claim__references=>,
_Snak__property_number='P77771' _Snak__hash='1ad1672f2ebc9e96628b20091d1c0f7f17f2ec65' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L103-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$e6a6d9c6-4874-05b9-b7e6-433e7904e624' _Claim__rank= _Claim__removed=False _Claim__references=>], 'P828': [ _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L42$1B581F6D-71B6-4861-8300-6C5CFEE80337' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.claims.add(claim)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Write the lexeme to the Wikibase" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": "\n\t lastrevid=553469\n\t type='lexeme'\n\t id='L42'\n\t claims= _Snak__property_number='P77771' _Snak__hash='5362eb3a0f7cba043a1b2afbc0352fc16463b0de' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L41-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$7bd50233-4080-3f02-5fc3-c01b905a58ed' _Claim__rank= _Claim__removed=False _Claim__references=>, _Snak__property_number='P77771' _Snak__hash='1ad1672f2ebc9e96628b20091d1c0f7f17f2ec65' _Snak__datavalue={'value': {'entity-type': 'form', 'id': 'L103-F1'}, 'type': 'wikibase-entityid'} _Snak__datatype='wikibase-form'> _Claim__type='statement' _Claim__qualifiers= _Claim__qualifiers_order=[] _Claim__id='L42$e6a6d9c6-4874-05b9-b7e6-433e7904e624' _Claim__rank= _Claim__removed=False _Claim__references=>], 'P828': [ _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L42$1B581F6D-71B6-4861-8300-6C5CFEE80337' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1084'\n\t language='Q1860'\n\t forms=\n\t senses=>" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.write()\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/notebooks/lexeme_write.ipynb b/notebooks/lexeme_write.ipynb new file mode 100644 index 00000000..c4a4147a --- /dev/null +++ b/notebooks/lexeme_write.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Write a new Lexeme" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": true, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from wikibaseintegrator import WikibaseIntegrator, wbi_login, datatypes\n", + "from wikibaseintegrator.models import Reference, References, Form, Sense\n", + "from wikibaseintegrator.models.qualifiers import Qualifiers\n", + "from wikibaseintegrator.wbi_config import config" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "WDUSER = ''\n", + "WDPASS = ''" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Set default variables" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "config['MEDIAWIKI_API_URL'] = 'https://test.wikidata.org/w/api.php'\n", + "config['USER_AGENT'] = 'Lexeme Write Notebook'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Create login and WikibaseIntegrator object" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "login = wbi_login.Login(user=WDUSER, password=WDPASS, mediawiki_api_url='https://test.wikidata.org/w/api.php')\n", + "wbi = WikibaseIntegrator(login=login)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "# Create a new verb lexeme" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "lexeme = wbi.lexeme.new(lexical_category='Q1244')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Set english and French lemmas" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": "" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Lemmas\n", + "lexeme.lemmas.set(language='en', value='English lemma')\n", + "lexeme.lemmas.set(language='fr', value='French lemma')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Create and add a claim" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "# Claims\n", + "claim_references = References() # Create a group of references\n", + "\n", + "claim_reference1 = Reference()\n", + "claim_reference1.add(datatypes.String(prop_nr='P828', value='Claim string reference'))\n", + "\n", + "claim_reference2 = Reference()\n", + "claim_reference2.add(datatypes.String(prop_nr='P828', value='Another claim string reference'))\n", + "\n", + "# Add each reference to the group of references\n", + "claim_references.add(claim_reference1)\n", + "claim_references.add(claim_reference2)\n", + "\n", + "claim_qualifiers = Qualifiers()\n", + "claim_qualifiers.add(datatypes.String(prop_nr='P828', value='Claim qualifier'))\n", + "\n", + "claim = datatypes.String(prop_nr='P828', value=\"Create a string claim for claim\", references=claim_references,\n", + " qualifiers=claim_qualifiers)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "Add claim to lexeme" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": " _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}>" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.claims.add(claim)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Create and add a sense" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": ", 'fr': }> claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}> removed=False>]>" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Senses\n", + "sense = Sense()\n", + "sense.glosses.set(language='en', value='English gloss')\n", + "sense.glosses.set(language='fr', value='French gloss')\n", + "\n", + "# Senses claims\n", + "sense_references = References()\n", + "sense_reference1 = Reference()\n", + "sense_reference1.add(datatypes.String(prop_nr='P828', value='Sense string reference'))\n", + "\n", + "sense_reference2 = Reference()\n", + "sense_reference2.add(datatypes.String(prop_nr='P828', value='Another sense string reference'))\n", + "\n", + "sense_references.add(sense_reference1)\n", + "sense_references.add(sense_reference2)\n", + "\n", + "sense_qualifiers = Qualifiers()\n", + "sense_qualifiers.add(datatypes.String(prop_nr='P828', value='Sense qualifier'))\n", + "\n", + "claim = datatypes.String(prop_nr='P828', value=\"Create a string claim for sense\", references=sense_references,\n", + " qualifiers=sense_qualifiers)\n", + "\n", + "sense.claims.add(claim)\n", + "\n", + "lexeme.senses.add(sense)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Create and add a form" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": ", 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=[] _Claim__id=None _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>, _Snak__property_number='P828' _Snak__hash=None _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=[]>]>>]}>>}>" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Forms\n", + "form = Form()\n", + "\n", + "# Form representations\n", + "form.representations.set(language='en', value='English form representation')\n", + "form.representations.set(language='fr', value='French form representation')\n", + "\n", + "# Add a list of grammatical features to the form\n", + "form.grammatical_features = ['Q146786']\n", + "\n", + "# Form claims\n", + "form_references = References() # Create a group of references\n", + "form_reference1 = Reference()\n", + "form_reference1.add(datatypes.String(prop_nr='P828', value='Form string reference'))\n", + "\n", + "form_reference2 = Reference()\n", + "form_reference2.add(datatypes.String(prop_nr='P828', value='Another form string reference'))\n", + "\n", + "form_references.add(form_reference1)\n", + "form_references.add(form_reference2)\n", + "\n", + "form_qualifiers = Qualifiers()\n", + "form_qualifiers.add(datatypes.String(prop_nr='P828', value='Form qualifier'))\n", + "\n", + "claim = datatypes.String(prop_nr='P828', value=\"Create a string claim for form\", references=form_references,\n", + " qualifiers=form_qualifiers)\n", + "\n", + "form.claims.add(claim)\n", + "\n", + "lexeme.forms.add(form)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## Write the lexeme to the Wikibase" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "data": { + "text/plain": "\n\t lastrevid=553076\n\t type='lexeme'\n\t id='L1734'\n\t claims= _Snak__property_number='P828' _Snak__hash='dc920cec98f0e830c30011cd496108be8d50afab' _Snak__datavalue={'value': 'Create a string claim for claim', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='351f871bfe166697d3270cc0df7df8d09603efb0' _Snak__datavalue={'value': 'Claim qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734$692E2FA0-0970-4280-8A59-B9A59B3DC86E' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='bae62d7b26cff18d5a9d277e04475fcb6bd9bcfb' _Snak__datavalue={'value': 'Claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='dcd0b956c352f2036bb7da153c4db941e74a803f' _Snak__datavalue={'value': 'Another claim string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>\n\t lemmas=, 'fr': }>\n\t lexical_category='Q1244'\n\t language='Q1860'\n\t forms=, 'fr': }> _Form__grammatical_features=['Q146786'] _Form__claims= _Snak__property_number='P828' _Snak__hash='288a8a8f1e12b9bacb056319c4ed0f3e6bafdd00' _Snak__datavalue={'value': 'Create a string claim for form', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='6c157568b379f4c2722f30a9fee95d3c5f99dfe9' _Snak__datavalue={'value': 'Form qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734-F1$36902198-7926-41E1-BAC9-5E8601F4A2A7' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='1cbb07e7eba6906acf68f427a3f87fefc0a53283' _Snak__datavalue={'value': 'Form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='67bef049d400b9d7e2e2695320d85012c9122df5' _Snak__datavalue={'value': 'Another form string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}>>}>\n\t senses=, 'fr': }> claims= _Snak__property_number='P828' _Snak__hash='9781442191b38e26c55b1dfde6f6203c9127c4f3' _Snak__datavalue={'value': 'Create a string claim for sense', 'type': 'string'} _Snak__datatype='string'> _Claim__type='statement' _Claim__qualifiers= _Snak__property_number='P828' _Snak__hash='c1afe7627d9711627e1e48b8e015ade998d6d434' _Snak__datavalue={'value': 'Sense qualifier', 'type': 'string'} _Snak__datatype='string'>]}> _Claim__qualifiers_order=['P828'] _Claim__id='L1734-S1$37E31B12-1BB8-454A-8ADE-84AAED4A49EA' _Claim__rank= _Claim__removed=False _Claim__references= _Snak__property_number='P828' _Snak__hash='617bd3516c2003df28ab90fd6ee0bd8237f1f8e6' _Snak__datavalue={'value': 'Sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>, _Snak__property_number='P828' _Snak__hash='1afe472d8815b3cbf50d2e5b1c497456a82f055f' _Snak__datavalue={'value': 'Another sense string reference', 'type': 'string'} _Snak__datatype='string'>]}> _Reference__snaks_order=['P828']>]>>]}> removed=False>]>>" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "lexeme.write()\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index fa2a8b22..80143fed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,3 +4,33 @@ requires = [ "wheel" ] build-backend = "setuptools.build_meta" + +[tool.isort] +line_length = 179 + +[tool.mypy] +ignore_missing_imports = true +files = "wikibaseintegrator/**/*.py,test/*.py" + +[tool.pylint.MASTER] +extension-pkg-allow-list = [ + "ujson" +] + +[tool.pylint.messages_control] +max-line-length = 180 +disable = [ + "fixme", + "redefined-builtin", + "invalid-name", # To remove later + "too-few-public-methods", + "too-many-arguments", + "too-many-statements", + "too-many-locals", + "too-many-branches", + "too-many-instance-attributes", + "import-outside-toplevel" # To remove later +] + +[tool.pytest.ini_options] +log_cli = true diff --git a/requirements.txt b/requirements.txt index 0a70b0de..0d8a9867 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ backoff~=2.1.2 mwoauth~=0.3.8 oauthlib~=3.2.0 requests~=2.28.1 -simplejson~=3.17.6 +requests-oauthlib~=1.3.1 +ujson~=5.4.0 diff --git a/setup.cfg b/setup.cfg index 95b8f6b3..00c311f7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,14 +1,16 @@ [metadata] name = wikibaseintegrator -version = 0.11.3 +version = 0.12.0 author = Myst, Wikibase Integrator authors and Wikidata Integrator authors license = MIT -license_files = LICENSE.txt +license_files = LICENSE description = Python package for reading from and writing to a Wikibase instance keywords = wikibase, wikidata, mediawiki, sparql home_page = https://github.com/LeMyst/WikibaseIntegrator project_urls = + Documentation = https://wikibaseintegrator.readthedocs.io/ Bug Tracker = https://github.com/LeMyst/WikibaseIntegrator/issues + Changelog = https://github.com/LeMyst/WikibaseIntegrator/releases long_description = file: README.md long_description_content_type = text/markdown platform = any @@ -30,17 +32,32 @@ classifiers = Topic :: Software Development :: Libraries :: Python Modules [options] -packages = wikibaseintegrator +packages = find: install_requires = backoff>=1.11.1,<2.2.0 - mwoauth~=0.3.7 + mwoauth~=0.3.8 oauthlib~=3.2.0 requests>=2.27.1,<2.29.0 - simplejson~=3.17.5 + requests-oauthlib~=1.3.1 + ujson~=5.4.0 python_requires = >=3.7, <=3.11 [options.extras_require] dev = pytest + pylint + pylint-exit + mypy + codespell + flynt +docs = + Sphinx~=4.5.0 + readthedocs-sphinx-ext~=2.1.5 + sphinx-rtd-theme~=1.0.0 + sphinx_github_changelog~=1.2.0 + m2r2~=0.3.2 + sphinx-autodoc-typehints~=1.18.1 +notebooks = + jupyter coverage = pytest-cov diff --git a/setup.py b/setup.py index b604cead..62a7a637 100644 --- a/setup.py +++ b/setup.py @@ -6,13 +6,30 @@ name="wikibaseintegrator", install_requires=[ "backoff >= 1.11.1,< 2.2.0", - "mwoauth ~= 0.3.7", + "mwoauth ~= 0.3.8", "oauthlib ~= 3.2.0", "requests >= 2.27.1,< 2.29.0", - "simplejson ~= 3.17.5" + "requests-oauthlib ~= 1.3.1", + "ujson ~= 5.4.0" ], extras_require={ - "dev": ["pytest"], + "dev": [ + "pytest", + "pylint", + "pylint-exit", + "mypy", + "codespell", + "flynt" + ], + "docs": [ + "Sphinx ~= 4.5.0", + "readthedocs-sphinx-ext ~= 2.1.5", + "sphinx-rtd-theme ~= 1.0.0", + "sphinx_github_changelog ~= 1.2.0", + "m2r2 ~= 0.3.2", + "sphinx-autodoc-typehints ~= 1.18.1" + ], + "notebooks": ["jupyter"], "coverage": ["pytest-cov"], }, ) diff --git a/test/test_all.py b/test/test_all.py index 22fdc120..0eb2ba57 100644 --- a/test/test_all.py +++ b/test/test_all.py @@ -1,92 +1,52 @@ import copy -import pprint import unittest -import requests +from wikibaseintegrator import WikibaseIntegrator, datatypes, wbi_fastrun +from wikibaseintegrator.datatypes import BaseDataType, Item +from wikibaseintegrator.entities import ItemEntity +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype +from wikibaseintegrator.wbi_fastrun import get_fastrun_container -from wikibaseintegrator import wbi_core, wbi_fastrun, wbi_functions, wbi_datatype -from wikibaseintegrator.wbi_core import MWApiError +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_all.py)' -__author__ = 'Sebastian Burgstaller-Muehlbacher' -__license__ = 'AGPLv3' - - -class TestMediawikiApiCall(unittest.TestCase): - def test_all(self): - with self.assertRaises(MWApiError): - wbi_functions.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, mediawiki_api_url="https://www.wikidataaaaaaa.org", - max_retries=3, retry_after=1, allow_anonymous=True) - with self.assertRaises(requests.HTTPError): - wbi_functions.mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/400", max_retries=3, retry_after=1, allow_anonymous=True) - - wbi_functions.mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) - - with self.assertRaises(MWApiError): - wbi_functions.mediawiki_api_call_helper(data=None, mediawiki_api_url="https://httpbin.org/status/502", max_retries=3, retry_after=1, allow_anonymous=True) +wbi = WikibaseIntegrator() class TestDataType(unittest.TestCase): def test_quantity(self): - dt = wbi_datatype.Quantity(quantity='34.5', prop_nr='P43') + dt = datatypes.Quantity(amount='34.5', prop_nr='P43') - dt_json = dt.get_json_representation() + dt_json = dt.get_json() - if not dt_json['mainsnak']['datatype'] == 'quantity': - raise + assert dt_json['mainsnak']['datatype'] == WikibaseDatatype.QUANTITY.value value = dt_json['mainsnak']['datavalue'] - if not value['value']['amount'] == '+34.5': - raise - - if not value['value']['unit'] == '1': - raise - - dt2 = wbi_datatype.Quantity(quantity='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") + assert value['value']['amount'] == '+34.5' + assert value['value']['unit'] == '1' - value = dt2.get_json_representation()['mainsnak']['datavalue'] + dt2 = datatypes.Quantity(amount='34.5', prop_nr='P43', upper_bound='35.3', lower_bound='33.7', unit="Q11573") - if not value['value']['amount'] == '+34.5': - raise + value = dt2.get_json()['mainsnak']['datavalue'] - if not value['value']['unit'] == 'http://www.wikidata.org/entity/Q11573': - raise - - if not value['value']['upperBound'] == '+35.3': - raise - - if not value['value']['lowerBound'] == '+33.7': - raise + assert value['value']['amount'] == '+34.5' + assert value['value']['unit'] == 'http://www.wikidata.org/entity/Q11573' + assert value['value']['upperBound'] == '+35.3' + assert value['value']['lowerBound'] == '+33.7' def test_geoshape(self): - dt = wbi_datatype.GeoShape(value='Data:Inner_West_Light_Rail_stops.map', prop_nr='P43') + dt = datatypes.GeoShape(value='Data:Inner_West_Light_Rail_stops.map', prop_nr='P43') - dt_json = dt.get_json_representation() + dt_json = dt.get_json() - if not dt_json['mainsnak']['datatype'] == 'geo-shape': - raise + assert dt_json['mainsnak']['datatype'] == WikibaseDatatype.GEOSHAPE.value value = dt_json['mainsnak']['datavalue'] - if not value['value'] == 'Data:Inner_West_Light_Rail_stops.map': - raise - - if not value['type'] == 'string': - raise - - def test_live_item(self): - """ - Test an item against Wikidata - """ - item = wbi_core.ItemEngine(item_id='Q423111') - - mass_statement = [x for x in item.statements if x.get_prop_nr() == 'P2067'].pop() - pprint.pprint(mass_statement.get_json_representation()) + assert value['value'] == 'Data:Inner_West_Light_Rail_stops.map' - if not mass_statement: - raise - - # TODO: get json directly from the API and compare part to ItemEngine + assert value['type'] == 'string' class TestFastRun(unittest.TestCase): @@ -94,57 +54,50 @@ class TestFastRun(unittest.TestCase): some basic tests for fastrun mode """ - def test_fast_run(self): + def test_fastrun(self): statements = [ - wbi_datatype.ExternalID(value='P40095', prop_nr='P352'), - wbi_datatype.ExternalID(value='YER158C', prop_nr='P705') + datatypes.ExternalID(value='P40095', prop_nr='P352'), + datatypes.ExternalID(value='YER158C', prop_nr='P705') ] - frc = wbi_fastrun.FastRunContainer(base_filter={'P352': '', 'P703': 'Q27510868'}, - base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P352'), datatypes.Item(prop_nr='P703', value='Q27510868')], base_data_type=datatypes.BaseDataType) - fast_run_result = frc.write_required(data=statements) + fastrun_result = frc.write_required(data=statements) - if fast_run_result: + if fastrun_result: message = 'fastrun failed' else: message = 'successful fastrun' - print(fast_run_result, message) # here, fastrun should succeed, if not, test failed - # if fast_run_result: - # raise ValueError + if fastrun_result: + raise ValueError def test_fastrun_label(self): # tests fastrun label, description and aliases, and label in another language - data = [wbi_datatype.ExternalID('/m/02j71', 'P646')] - fast_run_base_filter = {'P361': 'Q18589965'} - item = wbi_core.ItemEngine(item_id="Q2", data=data, fast_run=True, fast_run_base_filter=fast_run_base_filter) - - frc = wbi_core.ItemEngine.fast_run_store[0] - frc.debug = True + frc = get_fastrun_container(base_filter=[datatypes.ExternalID(value='/m/02j71', prop_nr='P646')]) + item = WikibaseIntegrator().item.get('Q2') - assert item.get_label('en') == "Earth" - descr = item.get_description('en') + assert item.labels.get(language='en') == "Earth" + descr = item.descriptions.get(language='en') assert len(descr) > 3 - aliases = item.get_aliases() - assert "the Earth" in aliases + assert "the Earth" in item.aliases.get() - assert list(item.fast_run_container.get_language_data("Q2", 'en', 'label'))[0] == "Earth" - assert item.fast_run_container.check_language_data("Q2", ['not the Earth'], 'en', 'label') - assert "the Earth" in item.get_aliases() - assert "planet" in item.get_description() + assert list(frc.get_language_data("Q2", 'en', 'label'))[0] == item.labels.get(language='en') + assert frc.check_language_data("Q2", ['not the Earth'], 'en', 'label') + assert "the Earth" in item.aliases.get() + assert "planet" in item.descriptions.get() - assert item.get_label("es") == "Tierra" + assert item.labels.get('es') == "Tierra" - item.set_description(descr) - item.set_description("fghjkl") - assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'} - item.set_label("Earth") - item.set_label("xfgfdsg") - assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} - item.set_aliases(["fake alias"], if_exists='APPEND') - assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en'] + item.descriptions.set(value=descr) + item.descriptions.set(value="fghjkl") + assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'fghjkl'} + item.labels.set(value="Earth") + item.labels.set(value="xfgfdsg") + assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} + item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND_OR_REPLACE) + assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] # something that's empty (for now.., can change, so this just makes sure no exception is thrown) frc.check_language_data("Q2", ['Ewiase'], 'ak', 'label') @@ -153,33 +106,31 @@ def test_fastrun_label(self): frc.check_language_data("Q2", [], 'ak', 'aliases') frc.check_language_data("Q2", ['sdf', 'sdd'], 'ak', 'aliases') - item.get_label("ak") - item.get_description("ak") - item.get_aliases("ak") - item.set_label("label", lang="ak") - item.set_description("d", lang="ak") - item.set_aliases(["a"], lang="ak", if_exists='APPEND') + item.labels.get(language="ak") + item.descriptions.get(language='ak') + item.aliases.get(language="ak") + item.labels.set(value="label", language="ak") + item.descriptions.set(value="d", language="ak") + item.aliases.set(values=["a"], language="ak", action_if_exists=ActionIfExists.APPEND_OR_REPLACE) def test_sitelinks(): - data = [wbi_datatype.ItemID(value='Q12136', prop_nr='P31')] - item = wbi_core.ItemEngine(item_id='Q622901', data=data) - item.get_sitelink("enwiki") - assert "enwiki" not in item.json_representation['sitelinks'] - item.set_sitelink("enwiki", "something") - assert item.get_sitelink("enwiki")['title'] == "something" - assert "enwiki" in item.json_representation['sitelinks'] + item = wbi.item.get('Q622901') + item.claims.add(datatypes.Item(value='Q12136', prop_nr='P31')) + assert item.sitelinks.get('enwiki') is not None + item.sitelinks.set(site="enwiki", title="something") + assert item.sitelinks.get('enwiki').title == "something" + assert item.sitelinks.get('enwiki') is not None def test_nositelinks(): # this item doesn't and probably won't ever have any sitelinks (but who knows?? maybe one day..) - data = [wbi_datatype.ItemID(value='Q5', prop_nr='P31')] - item = wbi_core.ItemEngine(item_id='Q27869338', data=data) - item.get_sitelink("enwiki") - assert "enwiki" not in item.json_representation['sitelinks'] - item.set_sitelink("enwiki", "something") - assert item.get_sitelink("enwiki")['title'] == "something" - assert "enwiki" in item.json_representation['sitelinks'] + item = wbi.item.get('Q27869338') + item.claims.add(datatypes.Item(value='Q5', prop_nr='P31')) + assert item.sitelinks.get('enwiki') is None + item.sitelinks.set(site="enwiki", title="something") + assert item.sitelinks.get('enwiki').title == "something" + assert item.sitelinks.get('enwiki') is not None #### @@ -187,24 +138,59 @@ def test_nositelinks(): #### def test_ref_equals(): # statements are identical - oldref = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - olditem = wbi_datatype.ItemID("Q123", "P123", references=[oldref]) + oldref = [datatypes.ExternalID(value='P58742', prop_nr='P352'), + datatypes.Item(value='Q24784025', prop_nr='P527'), + datatypes.Time(time='+2001-12-31T00:00:00Z', prop_nr='P813')] + olditem = datatypes.Item(value='Q123', prop_nr='P123', references=[oldref]) newitem = copy.deepcopy(olditem) + assert olditem.equals(newitem, include_ref=False) assert olditem.equals(newitem, include_ref=True) # dates are a month apart newitem = copy.deepcopy(olditem) - newitem.references[0][2] = wbi_datatype.Time(time='+2002-01-31T12:01:13Z', prop_nr='P813') + newitem.references.remove(datatypes.Time(time='+2001-12-31T00:00:00Z', prop_nr='P813')) + newitem.references.add(datatypes.Time(time='+2002-01-31T00:00:00Z', prop_nr='P813')) assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) # multiple refs newitem = copy.deepcopy(olditem) - newitem.references.append([wbi_datatype.ExternalID(value='99999', prop_nr='P352')]) + newitem.references.add(datatypes.ExternalID(value='99999', prop_nr='P352')) assert olditem.equals(newitem, include_ref=False) assert not olditem.equals(newitem, include_ref=True) - olditem.references.append([wbi_datatype.ExternalID(value='99999', prop_nr='P352')]) + olditem.references.add(datatypes.ExternalID(value='99999', prop_nr='P352')) assert olditem.equals(newitem, include_ref=True) + + +def test_equal_qualifiers(): + claim1 = Item(prop_nr='P1') + claim1.qualifiers.set([Item(prop_nr='P2', value='Q1'), Item(prop_nr='P2', value='Q2')]) + claim2 = Item(prop_nr='P4') + claim2.qualifiers.set([Item(prop_nr='P2', value='Q1')]) + claim3 = Item(prop_nr='P4') + claim3.qualifiers.set([Item(prop_nr='P2', value='Q1'), Item(prop_nr='P2', value='Q2')]) + claim4 = Item(prop_nr='P4') + claim4.qualifiers.set([Item(prop_nr='P2', value='Q1'), Item(prop_nr='P2', value='Q3')]) + + assert claim1.has_equal_qualifiers(claim2) is False + assert claim1.has_equal_qualifiers(claim3) is True + assert claim1.has_equal_qualifiers(claim4) is False + + +def test_mediainfo(): + mediainfo_item_by_title = wbi.mediainfo.get_by_title(titles='File:2018-07-05-budapest-buda-hill.jpg', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') + assert mediainfo_item_by_title.id == 'M75908279' + + mediainfo_item_by_id = wbi.mediainfo.get(entity_id='M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php') + assert mediainfo_item_by_id.id == 'M75908279' + + +def test_wikibaseintegrator(): + nwbi = WikibaseIntegrator(is_bot=False) + assert nwbi.item.api.is_bot is False + assert ItemEntity(api=nwbi, is_bot=True).api.is_bot is True + assert ItemEntity(api=nwbi).api.is_bot is False + assert ItemEntity().api.is_bot is False + assert nwbi.item.get('Q582').api.is_bot is False + assert ItemEntity(api=nwbi, is_bot=True).get('Q582').api.is_bot is True diff --git a/test/test_entity_item.py b/test/test_entity_item.py new file mode 100644 index 00000000..a2dd4588 --- /dev/null +++ b/test/test_entity_item.py @@ -0,0 +1,65 @@ +import unittest + +import requests + +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.datatypes import BaseDataType, Item +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_exceptions import NonExistentEntityError + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_item.py)' + +wbi = WikibaseIntegrator() + + +class TestEntityItem(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.item.get('Q582').id == 'Q582' + # Test with numeric id as string + assert wbi.item.get('582').id == 'Q582' + # Test with numeric id as int + assert wbi.item.get(582).id == 'Q582' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.item.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.item.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.item.get(-1) + + # Test with negative id + with self.assertRaises(NonExistentEntityError): + wbi.item.get("Q99999999999999") + + def test_get_json(self): + assert wbi.item.get('Q582').get_json()['labels']['fr']['value'] == 'Villeurbanne' + + def test_write(self): + with self.assertRaises(requests.exceptions.JSONDecodeError): + wbi.item.get('Q582').write(allow_anonymous=True, mediawiki_api_url='https://httpstat.us/200') + + def test_write_not_required(self): + assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P1791')]) + + def test_write_required(self): + item = wbi.item.get('Q582') + item.claims.add(Item(prop_nr='P1791', value='Q42')) + assert item.write_required([BaseDataType(prop_nr='P1791')]) + + def test_write_not_required_ref(self): + assert not wbi.item.get('Q582').write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True) + + def test_write_required_ref(self): + item = wbi.item.get('Q582') + item.claims.get('P2581')[0].references.references.pop() + assert item.write_required(base_filter=[BaseDataType(prop_nr='P2581')], use_refs=True) + + def test_long_item_id(self): + assert wbi.item.get('Item:Q582').id == 'Q582' diff --git a/test/test_entity_lexeme.py b/test/test_entity_lexeme.py new file mode 100644 index 00000000..c9006883 --- /dev/null +++ b/test/test_entity_lexeme.py @@ -0,0 +1,37 @@ +import unittest + +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_lexeme.py)' + +wbi = WikibaseIntegrator() + + +class TestEntityLexeme(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.lexeme.get('L5').id == 'L5' + # Test with numeric id as string + assert wbi.lexeme.get('5').id == 'L5' + # Test with numeric id as int + assert wbi.lexeme.get(5).id == 'L5' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.lexeme.get('Q5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.lexeme.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.lexeme.get(-1) + + def test_get_json(self): + assert wbi.lexeme.get('L5').get_json()['forms'][0]['representations']['es']['value'] == 'pinos' + + def test_long_item_id(self): + assert wbi.lexeme.get('Lexeme:L582').id == 'L582' diff --git a/test/test_entity_mediainfo.py b/test/test_entity_mediainfo.py new file mode 100644 index 00000000..9590ed4a --- /dev/null +++ b/test/test_entity_mediainfo.py @@ -0,0 +1,34 @@ +import unittest + +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)' + +wbi = WikibaseIntegrator() + + +class TestEntityMediaInfo(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + # Test with numeric id as string + assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + # Test with numeric id as int + assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.mediainfo.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.mediainfo.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.mediainfo.get(-1) + + def test_get_json(self): + assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json() diff --git a/test/test_entity_property.py b/test/test_entity_property.py new file mode 100644 index 00000000..2b948e3b --- /dev/null +++ b/test/test_entity_property.py @@ -0,0 +1,40 @@ +import unittest + +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.wbi_config import config as wbi_config + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_property.py)' + +wbi = WikibaseIntegrator() + + +class TestEntityProperty(unittest.TestCase): + + def test_get(self): + # Test with complete id + assert wbi.property.get('P50').id == 'P50' + # Test with numeric id as string + assert wbi.property.get('50').id == 'P50' + # Test with numeric id as int + assert wbi.property.get(50).id == 'P50' + + # Test with invalid id + with self.assertRaises(ValueError): + wbi.property.get('L5') + + # Test with zero id + with self.assertRaises(ValueError): + wbi.property.get(0) + + # Test with negative id + with self.assertRaises(ValueError): + wbi.property.get(-1) + + def test_get_json(self): + assert wbi.property.get('P50', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json()['labels']['fr']['value'] == 'auteur ou autrice' + + def test_create_property(self): + wbi.property.new(datatype='wikibase-item') + + def test_long_item_id(self): + assert wbi.property.get('Property:P582').id == 'P582' diff --git a/test/test_wbi_backoff.py b/test/test_wbi_backoff.py index 1411778f..9d77d484 100644 --- a/test/test_wbi_backoff.py +++ b/test/test_wbi_backoff.py @@ -1,7 +1,7 @@ -import json import unittest import requests +import ujson from wikibaseintegrator import wbi_login from wikibaseintegrator.wbi_backoff import wbi_backoff @@ -11,7 +11,7 @@ class TestMethods(unittest.TestCase): def test_all(self): config['BACKOFF_MAX_TRIES'] = 2 - config['BACKOFF_MAX_VALUE'] = 2 + config['BACKOFF_MAX_VALUE'] = 3 with self.assertRaises(requests.RequestException): bad_http_code() with self.assertRaises(requests.RequestException): @@ -21,10 +21,12 @@ def test_all(self): assert good_http_code() == 200 - with self.assertRaises(json.JSONDecodeError): + with self.assertRaises(ValueError): bad_json() +# @backoff.on_exception(backoff.expo, (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError), max_time=60) + @wbi_backoff() def bad_http_code(): r = requests.get("https://httpbin.org/status/400") @@ -41,7 +43,7 @@ def good_http_code(): @wbi_backoff() def bad_json(): - json.loads("I failed :(") + ujson.loads("I failed :(") @wbi_backoff() @@ -50,4 +52,4 @@ def bad_request(): def bad_login(): - wbi_login.Login("name", "pass", mediawiki_api_url="www.wikidataaaaaaaaa.org") + wbi_login.Clientlogin(user='name', password='pass', mediawiki_api_url="www.wikidataaaaaaaaa.org") diff --git a/test/test_wbi_core.py b/test/test_wbi_core.py index 0a3523f7..4bb39419 100644 --- a/test/test_wbi_core.py +++ b/test/test_wbi_core.py @@ -1,195 +1,284 @@ import unittest +from copy import deepcopy -from wikibaseintegrator import wbi_core, wbi_functions, wbi_datatype +from wikibaseintegrator import WikibaseIntegrator +from wikibaseintegrator.datatypes import (URL, CommonsMedia, ExternalID, Form, GeoShape, GlobeCoordinate, Item, Lexeme, Math, MonolingualText, MusicalNotation, Property, Quantity, + Sense, String, TabularData, Time) +from wikibaseintegrator.datatypes.extra import EDTF, LocalMedia +from wikibaseintegrator.entities import ItemEntity +from wikibaseintegrator.models import Descriptions +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatePrecision, WikibaseRank, WikibaseSnakType +from wikibaseintegrator.wbi_helpers import generate_entity_instances, search_entities + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_core.py)' + +wbi = WikibaseIntegrator() class TestWbiCore(unittest.TestCase): - common_item = wbi_core.ItemEngine(item_id="Q2") + common_item = wbi.item.new().get('Q2') def test_item_engine(self): - wbi_core.ItemEngine(debug=True) - wbi_core.ItemEngine(data=None, debug=True) - wbi_core.ItemEngine(data=wbi_datatype.String(value='test', prop_nr='P1'), debug=True) - wbi_core.ItemEngine(data=[wbi_datatype.String(value='test', prop_nr='P1')], debug=True) - with self.assertRaises(TypeError): - wbi_core.ItemEngine(data='test', debug=True) - with self.assertRaises(ValueError): - wbi_core.ItemEngine(fast_run_case_insensitive=True, debug=True) + ItemEntity(api=wbi) + wbi.item.new() + ItemEntity(api=wbi).add_claims(String(value='test', prop_nr='P1')) + ItemEntity(api=wbi).add_claims([String(value='test', prop_nr='P1')]) + ItemEntity(api=wbi, id='Q2') with self.assertRaises(TypeError): - wbi_core.ItemEngine(ref_handler='test', debug=True) - with self.assertRaises(ValueError): - wbi_core.ItemEngine(global_ref_mode='CUSTOM', debug=True) - wbi_core.ItemEngine(item_id='Q2', fast_run=True, debug=True) - - def test_search_only(self): - item = wbi_core.ItemEngine(item_id="Q2", search_only=True) + ItemEntity(api=wbi).add_claims('test') - assert item.get_label('en') == "Earth" - descr = item.get_description('en') - assert len(descr) > 3 + def test_get(self): + item = wbi.item.new().get(entity_id='Q2') - assert "the Earth" in item.get_aliases() - assert "planet" in item.get_description() + assert item.labels.get('en').value == "Earth" - assert item.get_label("es") == "Tierra" - - def test_basedatatype_if_exists(self): - instance_of_append = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='APPEND') - instance_of_forceappend = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='FORCE_APPEND') - instance_of_replace = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='REPLACE') - instance_of_keep = wbi_datatype.ItemID(prop_nr='P31', value='Q1234', if_exists='KEEP') - - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_append, instance_of_append]) - claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] - assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 1 - - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_forceappend, instance_of_forceappend]) - claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] - assert len(claims) > 1 and 'Q1234' in claims and claims.count('Q1234') == 2 - - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_replace], debug=True) - claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31'] if 'remove' not in x] - removed_claims = [True for x in item.get_json_representation()['claims']['P31'] if 'remove' in x] - assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == 3 and True in removed_claims + descr = item.descriptions.get('en').value + assert len(descr) > 3 - item = wbi_core.ItemEngine(item_id="Q2", data=[instance_of_keep], debug=True) - claims = [x['mainsnak']['datavalue']['value']['id'] for x in item.get_json_representation()['claims']['P31']] - assert len(claims) == 3 and 'Q1234' not in claims + assert "la Terre" in item.aliases.get('fr') + assert "planet" in item.descriptions.get('en') + + assert item.labels.get('es') == "Tierra" + + def test_basedatatype_action_if_exists(self): + instances = [Item(prop_nr='P31', value='Q1234'), Item(prop_nr='P31', value='Q1234')] + item_original = wbi.item.get('Q2') + len_claims_original = len([x.mainsnak.datavalue['value']['id'] for x in item_original.claims.get('P31')]) + + item = deepcopy(item_original) + item.add_claims(instances, action_if_exists=ActionIfExists.APPEND_OR_REPLACE) + claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, only one unique added + assert len(claims) == len_claims_original + 1 and 'Q1234' in claims and claims.count('Q1234') == 1 + + item = deepcopy(item_original) + item.add_claims(instances, action_if_exists=ActionIfExists.FORCE_APPEND) + claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, force two to be added + assert len(claims) == len_claims_original + 2 and 'Q1234' in claims and claims.count('Q1234') == 2 + + item = deepcopy(item_original) + item.add_claims(instances, action_if_exists=ActionIfExists.KEEP) + claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31')] + # Append claims to item, there is already claims, so nothing added + assert len(claims) == len_claims_original and 'Q1234' not in claims + + item = deepcopy(item_original) + item.add_claims(instances, action_if_exists=ActionIfExists.REPLACE_ALL) + item.add_claims(instances, action_if_exists=ActionIfExists.REPLACE_ALL) # We add the instances a second time, in case everything is marked as removed. + claims = [x.mainsnak.datavalue['value']['id'] for x in item.claims.get('P31') if not x.removed] + removed_claims = [True for x in item.claims.get('P31') if x.removed] + # Append claims to item, replace already existing claims with new ones, only one if it's the same property number + assert len(claims) == 1 and 'Q1234' in claims and len(removed_claims) == len_claims_original and True in removed_claims and claims.count('Q1234') == 1 def test_description(self): - item = wbi_core.ItemEngine(item_id="Q2") - descr = item.get_description('en') + item = wbi.item.get('Q2') + + descr = item.descriptions.get('en').value assert len(descr) > 3 - assert "planet" in item.get_description() + assert "planet" in item.descriptions.get('en') # set_description on already existing description - item.set_description(descr) - item.set_description("lorem") - item.set_description("lorem ipsum", lang='en', if_exists='KEEP') - assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem'} - # set_description on empty desription - item.set_description("") - item.set_description("lorem ipsum", lang='en', if_exists='KEEP') - assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} - - item.set_description("lorem", lang='fr', if_exists='KEEP') - item.set_description("lorem ipsum", lang='fr', if_exists='REPLACE') - item.set_description("lorem", lang='en', if_exists='KEEP') - assert item.json_representation['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} - assert item.json_representation['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'} + item.descriptions.set(value=descr) + assert item.descriptions.get() == descr + item.descriptions.set(value="lorem") + assert item.descriptions.get() == "lorem" + item.descriptions.set(language='es', value="lorem ipsum") + assert item.descriptions.get('es') == "lorem ipsum" + item.descriptions.set(language='en', value="lorem ipsum", action_if_exists=ActionIfExists.KEEP) + assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem'} + # set_description on empty description + item.descriptions = Descriptions() + item.descriptions.set(value='') + item.descriptions.set(language='en', value="lorem ipsum", action_if_exists=ActionIfExists.KEEP) + assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} + + item.descriptions.set(language='fr', value="lorem", action_if_exists=ActionIfExists.KEEP) + item.descriptions.set(language='fr', value="lorem ipsum", action_if_exists=ActionIfExists.REPLACE_ALL) + item.descriptions.set(language='en', value="lorem", action_if_exists=ActionIfExists.KEEP) + assert item.get_json()['descriptions']['en'] == {'language': 'en', 'value': 'lorem ipsum'} + assert item.get_json()['descriptions']['fr'] == {'language': 'fr', 'value': 'lorem ipsum'} + + # TODO: Test deletion of description? def test_label(self): - item = wbi_core.ItemEngine(item_id="Q2") - - assert item.get_label('en') == "Earth" - - assert "the Earth" in item.get_aliases() - - assert item.get_label("es") == "Tierra" - - item.set_label("Earth") - item.set_label("lorem") - item.set_label("lorem ipsum", lang='en', if_exists='KEEP') - assert item.json_representation['labels']['en'] == {'language': 'en', 'value': 'lorem'} - assert item.json_representation['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} - item.set_aliases(["fake alias"], if_exists='APPEND') - assert {'language': 'en', 'value': 'fake alias'} in item.json_representation['aliases']['en'] - - item.set_label(label=None, lang='fr') - item.set_label(label=None, lang='non-exist-key') - assert 'remove' in item.json_representation['labels']['fr'] - - item.get_label("ak") - item.get_description("ak") - item.get_aliases("ak") - item.set_label("label", lang='ak') - item.set_description("d", lang='ak') - item.set_aliases(["a"], lang='ak', if_exists='APPEND') - assert 'a' in item.get_aliases('ak') - item.set_aliases("b", lang='ak') - assert 'a' in item.get_aliases('ak') and 'b' in item.get_aliases('ak') and len(item.get_aliases('ak')) > 2 - item.set_aliases("b", lang='ak', if_exists='REPLACE') - assert item.get_aliases('ak') == ['b'] - item.set_aliases(["c"], lang='ak', if_exists='REPLACE') - assert item.get_aliases('ak') == ['c'] + item = wbi.item.get('Q2') + + assert item.labels.get('en') == "Earth" + + assert "la Terre" in item.aliases.get('fr') + + assert item.labels.get("es") == "Tierra" + + item.labels.set(value='Earth') + item.labels.set(value='xfgfdsg') + item.labels.set(language='en', value='xfgfdsgtest', action_if_exists=ActionIfExists.KEEP) + assert item.get_json()['labels']['en'] == {'language': 'en', 'value': 'xfgfdsg'} + assert item.get_json()['labels']['fr'] == {'language': 'fr', 'value': 'Terre'} + item.aliases.set(values=["fake alias"], action_if_exists=ActionIfExists.APPEND_OR_REPLACE) + assert {'language': 'en', 'value': 'fake alias'} in item.get_json()['aliases']['en'] + + item.labels.set(language='fr', value=None) + item.labels.set(language='non-exist-key', value=None) + assert 'remove' in item.get_json()['labels']['fr'] + + item.labels.set(language='ak') + item.descriptions.set(language='ak') + item.aliases.set(language='ak') + item.labels.set(value='label', language='ak') + item.descriptions.set(value='d', language='ak') + item.aliases.set(values=['a'], language='ak', action_if_exists=ActionIfExists.APPEND_OR_REPLACE) + assert 'a' in item.aliases.get('ak') + item.aliases.set(values='b', language='ak') + assert all(i in item.aliases.get('ak') for i in ['a', 'b']) and len(item.aliases.get('ak')) >= 2 + item.aliases.set(values='b', language='ak', action_if_exists=ActionIfExists.REPLACE_ALL) + assert item.aliases.get('ak') == ['b'] + item.aliases.set(values=['c'], language='ak', action_if_exists=ActionIfExists.REPLACE_ALL) + assert item.aliases.get('ak') == ['c'] + item.aliases.set(values=['d'], language='ak', action_if_exists=ActionIfExists.KEEP) + assert 'd' not in item.aliases.get('ak') + item.aliases.set(language='ak', action_if_exists=ActionIfExists.KEEP) + assert 'remove' not in item.get_json()['aliases']['ak'][0] + item.aliases.set(language='ak') + assert 'remove' in item.get_json()['aliases']['ak'][0] def test_wd_search(self): - t = wbi_functions.search_entities('rivaroxaban') + t = search_entities('rivaroxaban') print('Number of results: ', len(t)) self.assertIsNot(len(t), 0) - def test_item_generator(self): - items = ['Q408883', 'P715', 'Q18046452'] + def test_entity_generator(self): + entities = { + 'Q408883': { + 'etype': 'item', + 'ctype': 'ItemEntity' + }, 'P715': { + 'etype': 'property', + 'ctype': 'PropertyEntity' + }, 'Q18046452': { + 'etype': 'item', + 'ctype': 'ItemEntity' + }, 'L5': { + 'etype': 'lexeme', + 'ctype': 'LexemeEntity' + } + } + + entity_instances = generate_entity_instances(entities=list(entities.keys())) + + for qid, entity in entity_instances: + self.assertIn(qid, entities) + assert entity.ETYPE == entities[qid]['etype'] + assert type(entity).__name__ == entities[qid]['ctype'] + + entity_instances = generate_entity_instances(entities='Q408883') + + for qid, entity in entity_instances: + assert qid == 'Q408883' + assert entity.ETYPE == 'item' + assert type(entity).__name__ == 'ItemEntity' + + def test_rank(self): + t1 = String(value='test1', prop_nr='P1', rank='preferred') + assert t1.rank == WikibaseRank.PREFERRED + + t2 = String(value='test1', prop_nr='P1', rank=WikibaseRank.NORMAL) + assert t2.rank == WikibaseRank.NORMAL + + t2 = String(value='test1', prop_nr='P1', rank=WikibaseRank.DEPRECATED) + assert t2.get_json()['rank'] == WikibaseRank.DEPRECATED.value - item_instances = wbi_functions.generate_item_instances(items=items) + with self.assertRaises(ValueError): + String(value='test1', prop_nr='P1', rank='invalid_rank') + + def test_snaktype(self): + t1 = String(value='test1', prop_nr='P1') + t1.mainsnak.snaktype = 'novalue' + assert t1.mainsnak.snaktype == WikibaseSnakType.NO_VALUE - for qid, item in item_instances: - self.assertIn(qid, items) + t2 = String(value='test1', prop_nr='P1') + t2.mainsnak.snaktype = WikibaseSnakType.UNKNOWN_VALUE + assert t2.mainsnak.snaktype == WikibaseSnakType.UNKNOWN_VALUE + + t3 = String(value='test1', prop_nr='P1') + t3.mainsnak.snaktype = WikibaseSnakType.KNOWN_VALUE + assert t3.mainsnak.get_json()['snaktype'] == WikibaseSnakType.KNOWN_VALUE.value + + t4 = String(value='test1', prop_nr='P1') + with self.assertRaises(ValueError): + t4.mainsnak.snaktype = 'invalid_value' def test_new_item_creation(self): data = [ - wbi_datatype.String(value='test1', prop_nr='P1'), - wbi_datatype.String(value='test2', prop_nr='1'), - wbi_datatype.String(value='test3', prop_nr=1), - wbi_datatype.Math("xxx", prop_nr="P2"), - wbi_datatype.ExternalID("xxx", prop_nr="P3"), - wbi_datatype.ItemID("Q123", prop_nr="P4"), - wbi_datatype.ItemID("123", prop_nr="P4"), - wbi_datatype.ItemID(123, prop_nr="P4"), - wbi_datatype.Time(time='-0458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), - wbi_datatype.Time(time='458-00-00T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), - wbi_datatype.Time(time='+2021-01-01T15:15:15Z', before=1, after=2, precision=3, timezone=4, prop_nr="P5"), - wbi_datatype.Url("http://www.wikidata.org", prop_nr="P6"), - wbi_datatype.Url("https://www.wikidata.org", prop_nr="P6"), - wbi_datatype.Url("ftp://example.com", prop_nr="P6"), - wbi_datatype.Url("ssh://user@server/project.git", prop_nr="P6"), - wbi_datatype.Url("svn+ssh://user@server:8888/path", prop_nr="P6"), - wbi_datatype.MonolingualText(text="xxx", language="fr", prop_nr="P7"), - wbi_datatype.Quantity(quantity=-5.04, prop_nr="P8"), - wbi_datatype.Quantity(quantity=5.06, upper_bound=9.99, lower_bound=-2.22, unit="Q11573", prop_nr="P8"), - wbi_datatype.CommonsMedia("xxx", prop_nr="P9"), - wbi_datatype.GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr="P10"), - wbi_datatype.GeoShape("Data:xxx.map", prop_nr="P11"), - wbi_datatype.Property("P123", prop_nr="P12"), - wbi_datatype.Property("123", prop_nr="P12"), - wbi_datatype.Property(123, prop_nr="P12"), - wbi_datatype.TabularData("Data:Taipei+Population.tab", prop_nr="P13"), - wbi_datatype.MusicalNotation("\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr="P14"), - wbi_datatype.Lexeme("L123", prop_nr="P15"), - wbi_datatype.Lexeme("123", prop_nr="P15"), - wbi_datatype.Lexeme(123, prop_nr="P15"), - wbi_datatype.Form("L123-F123", prop_nr="P16"), - wbi_datatype.Sense("L123-S123", prop_nr="P17"), - wbi_datatype.EDTF("2004-06-~01/2004-06-~20", prop_nr="P18"), - wbi_datatype.LocalMedia("DemoCat 2.png", prop_nr="P19") + String(value='test1', prop_nr='P1'), + String(value='test2', prop_nr='1'), + String(value='test3', prop_nr=1), + Math(value='xxx', prop_nr='P2'), + ExternalID(value='xxx', prop_nr='P3'), + Item(value='Q123', prop_nr='P4'), + Item(value='123', prop_nr='P4'), + Item(value=123, prop_nr='P4'), + Item(value='Item:Q123', prop_nr='P4'), + Time(time='-0458-01-01T00:00:00Z', before=1, after=2, precision=WikibaseDatePrecision.MILLION_YEARS, timezone=4, prop_nr='P5'), + Time(time='+458-01-01T00:00:00Z', before=1, after=2, precision=WikibaseDatePrecision.MILLION_YEARS, timezone=4, prop_nr='P5'), + Time(time='+2021-01-01T00:00:00Z', before=1, after=2, precision=3, timezone=4, prop_nr='P5'), + Time(time='now', before=1, after=2, precision=WikibaseDatePrecision.MILLION_YEARS, timezone=4, prop_nr='P5'), + URL(value="http://www.wikidata.org", prop_nr='P6'), + URL(value="https://www.wikidata.org", prop_nr='P6'), + URL(value="ftp://example.com", prop_nr='P6'), + URL(value="ssh://user@server/project.git", prop_nr='P6'), + URL(value="svn+ssh://user@server:8888/path", prop_nr='P6'), + MonolingualText(text='xxx', language='fr', prop_nr='P7'), + Quantity(amount=-5.04, prop_nr='P8'), + Quantity(amount=5.06, upper_bound=9.99, lower_bound=-2.22, unit='Q11573', prop_nr='P8'), + CommonsMedia(value='xxx', prop_nr='P9'), + GlobeCoordinate(latitude=1.2345, longitude=-1.2345, precision=12, prop_nr='P10'), + GeoShape(value='Data:xxx.map', prop_nr='P11'), + Property(value='P123', prop_nr='P12'), + Property(value='123', prop_nr='P12'), + Property(value=123, prop_nr='P12'), + Property(value='Property:P123', prop_nr='P12'), + TabularData(value="Data:Taipei+Population.tab", prop_nr='P13'), + MusicalNotation(value="\relative c' { c d e f | g2 g | a4 a a a | g1 |}", prop_nr='P14'), + Lexeme(value='L123', prop_nr='P15'), + Lexeme(value='123', prop_nr='P15'), + Lexeme(value=123, prop_nr='P15'), + Lexeme(value='Lexeme:L123', prop_nr='P15'), + Form(value='L123-F123', prop_nr='P16'), + Sense(value='L123-S123', prop_nr='P17') ] - core_props = {f"P{x}" for x in range(20)} for d in data: - item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=core_props) - assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=d, core_props=core_props) - assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=[d], core_props=set()) - assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=d, core_props=set()) - assert item.get_json_representation() - - item = wbi_core.ItemEngine(new_item=True, data=data, core_props=core_props) - assert item.get_json_representation() - item = wbi_core.ItemEngine(new_item=True, data=data, core_props=set()) - assert item.get_json_representation() + item = wbi.item.new().add_claims([d]) + assert item.get_json() + item = wbi.item.new().add_claims(d) + assert item.get_json() + + item = wbi.item.new().add_claims(data) + assert item.get_json() + + def test_new_extra_item_creation(self): + data = [ + EDTF(value='test1', prop_nr='P1'), + LocalMedia(value='test2', prop_nr='P2') + ] + + for d in data: + item = wbi.item.new().add_claims([d]) + assert item.get_json() + item = wbi.item.new().add_claims(d) + assert item.get_json() + + item = wbi.item.new().add_claims(data) + assert item.get_json() def test_get_property_list(self): - self.assertTrue(len(self.common_item.get_property_list())) + self.assertTrue(len(self.common_item.claims)) def test_count_references(self): - self.assertTrue(len(self.common_item.count_references(prop_id='P2067'))) - - def test_get_reference_properties(self): - self.assertTrue(len(self.common_item.get_reference_properties(prop_id='P2067'))) + self.assertTrue(len(self.common_item.claims.get('P2067')[0].references)) def test_get_qualifier_properties(self): - print(self.common_item.get_qualifier_properties(prop_id='P170')) - self.assertTrue(len(self.common_item.get_qualifier_properties(prop_id='P2067'))) + self.assertTrue(len(self.common_item.claims.get(property='P2067'))) diff --git a/test/test_wbi_datatype.py b/test/test_wbi_datatype.py deleted file mode 100644 index 6330668f..00000000 --- a/test/test_wbi_datatype.py +++ /dev/null @@ -1,48 +0,0 @@ -import unittest - -from wikibaseintegrator import wbi_datatype - - -class TestWbiDataType(unittest.TestCase): - def test_qualifier(self): - # Good - qualifiers = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_qualifier=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_qualifier=True)] - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - qualifiers = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=True) - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - # Bad - qualifiers = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_qualifier=False) - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=qualifiers) - - bad_qualifiers = ["not a good qualifier", - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_qualifier=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_qualifier=True)] - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=bad_qualifiers) - - def test_references(self): - # Good - references = [wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True), - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - wbi_datatype.ItemID("Q123", "P123", references=[references]) - wbi_datatype.ItemID("Q123", "P123", references=references) - - references = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=True) - wbi_datatype.ItemID("Q123", "P123", references=references) - - # Bad - references = wbi_datatype.ExternalID(value='P58742', prop_nr='P352', is_reference=False) - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", references=references) - - bad_references = ["not a good reference", - wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', is_reference=True), - wbi_datatype.Time(time='+2001-12-31T12:01:13Z', prop_nr='P813', is_reference=True)] - with self.assertRaises(ValueError): - wbi_datatype.ItemID("Q123", "P123", qualifiers=bad_references) diff --git a/test/test_wbi_exceptions.py b/test/test_wbi_exceptions.py new file mode 100644 index 00000000..d535529d --- /dev/null +++ b/test/test_wbi_exceptions.py @@ -0,0 +1,136 @@ +from unittest import TestCase + +from wikibaseintegrator.wbi_exceptions import ModificationFailed, SaveFailed, SearchError + + +class TestWbiExceptions(TestCase): + @staticmethod + def test_modification_failed(): + error_dict = {'error': {'*': 'See https://test.wikidata.org/w/api.php for API usage. ' + 'Subscribe to the mediawiki-api-announce mailing list at ' + '<https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/> ' + 'for notice of API deprecations and breaking changes.', + 'code': 'modification-failed', + 'info': 'Item [[Q582|Q582]] already has label "MODIFIED LABEL" ' + 'associated with language code en, using the same ' + 'description text.', + 'messages': [{'html': {'*': 'Item Q582 already has ' + 'label "MODIFIED LABEL" associated with ' + 'language code en, using the same ' + 'description text.'}, + 'name': 'wikibase-validator-label-with-description-conflict', + 'parameters': ['MODIFIED LABEL', + 'en', + '[[Q582|Q582]]']}]}, + 'servedby': 'mw1375'} + + modification_failed = ModificationFailed(error_dict['error']) + + assert str(modification_failed) == "'Item [[Q582|Q582]] already has label \"MODIFIED LABEL\" associated with language code en, using the same description text.'" + assert modification_failed.code == 'modification-failed' + assert modification_failed.info == 'Item [[Q582|Q582]] already has label "MODIFIED LABEL" associated with language code en, using the same description text.' + assert 'wikibase-validator-label-with-description-conflict' in modification_failed.messages_names + assert 'Q582' in modification_failed.get_conflicting_entity_ids + assert 'en' in modification_failed.get_languages + + def test_modification_failed_no_dict(self): + error_dict = {} + with self.assertRaises(KeyError): + ModificationFailed(error_dict['error']) + + def test_modification_failed_no_message(self): + error_dict = {'error': {'*': 'See https://test.wikidata.org/w/api.php for API usage. ' + 'Subscribe to the mediawiki-api-announce mailing list at ' + '<https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/> ' + 'for notice of API deprecations and breaking changes.', + 'code': 'modification-failed', + 'info': 'Item [[Q582|Q582]] already has label "MODIFIED LABEL" ' + 'associated with language code en, using the same ' + 'description text.' + }, + 'servedby': 'mw1375'} + + with self.assertRaises(KeyError): + ModificationFailed(error_dict['error']) + + def test_failed_save_no_conflict(self): + error_dict = {'error': {'*': 'See https://test.wikidata.org/w/api.php for API usage. ' + 'Subscribe to the mediawiki-api-announce mailing list at ' + '<https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/> ' + 'for notice of API deprecations and breaking changes.', + 'code': 'failed-save', + 'info': 'The save has failed.', + 'messages': [{'html': {'*': 'The save has failed.'}, + 'name': 'wikibase-api-failed-save', + 'parameters': []}]}, + 'servedby': 'mw1425'} + + failed_save = SaveFailed(error_dict['error']) + + assert failed_save.get_conflicting_entity_ids == [] + + def test_modification_failed_no_parameters(self): + error_dict = {'error': {'*': 'See https://test.wikidata.org/w/api.php for API usage. ' + 'Subscribe to the mediawiki-api-announce mailing list at ' + '<https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/> ' + 'for notice of API deprecations and breaking changes.', + 'code': 'modification-failed', + 'info': 'Item [[Q582|Q582]] already has label "MODIFIED LABEL" ' + 'associated with language code en, using the same ' + 'description text.', + 'messages': [{'html': {'*': 'Item Q582 already has ' + 'label "MODIFIED LABEL" associated with ' + 'language code en, using the same ' + 'description text.'}, + 'name': 'wikibase-validator-label-with-description-conflict', + }]}, + 'servedby': 'mw1375'} + + modification_failed = ModificationFailed(error_dict['error']) + with self.assertRaises(KeyError): + _ = modification_failed.get_languages + + @staticmethod + def test_failed_save(): + error_dict = {'error': {'*': 'See https://test.wikidata.org/w/api.php for API usage. ' + 'Subscribe to the mediawiki-api-announce mailing list at ' + '<https://lists.wikimedia.org/postorius/lists/mediawiki-api-announce.lists.wikimedia.org/> ' + 'for notice of API deprecations and breaking changes.', + 'code': 'failed-save', + 'info': 'The save has failed.', + 'messages': [{'html': {'*': 'The save has failed.'}, + 'name': 'wikibase-api-failed-save', + 'parameters': []}, + {'html': {'*': 'Property P50 already ' + 'has label "Depiction" associated with ' + 'language code en.'}, + 'name': 'wikibase-validator-label-conflict', + 'parameters': ['Depiction', + 'en', + '[[Property:P50|P50]]']}, + {'html': {'*': 'Property P50 already ' + 'has label "representación" associated ' + 'with language code es.'}, + 'name': 'wikibase-validator-label-conflict', + 'parameters': ['representación', + 'es', + '[[Property:P50|P50]]']}]}, + 'servedby': 'mw1425'} + + failed_save = SaveFailed(error_dict['error']) + + assert str(failed_save) == "'The save has failed.'" + assert failed_save.code == 'failed-save' + assert failed_save.info == 'The save has failed.' + assert 'wikibase-api-failed-save' in failed_save.messages_names + assert 'P50' in failed_save.get_conflicting_entity_ids + assert len(failed_save.get_conflicting_entity_ids) == 1 + assert 'en' in failed_save.get_languages + + @staticmethod + def test_searcherror(): + assert str(SearchError('SearchError')) == 'SearchError' diff --git a/test/test_wbi_fastrun.py b/test/test_wbi_fastrun.py index bac3d865..92c379a5 100644 --- a/test/test_wbi_fastrun.py +++ b/test/test_wbi_fastrun.py @@ -1,6 +1,14 @@ -from wikibaseintegrator import wbi_core, wbi_fastrun, wbi_datatype +from collections import defaultdict +from typing import Any -wbi_fastrun.FastRunContainer.debug = True +from wikibaseintegrator import WikibaseIntegrator, wbi_fastrun +from wikibaseintegrator.datatypes import BaseDataType, ExternalID, Item +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_enums import ActionIfExists + +wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_fastrun.py)' + +wbi = WikibaseIntegrator() def test_query_data(): @@ -11,7 +19,7 @@ def test_query_data(): This tests that the fast run container correctly queries data from wikidata and stores it in the appropriate format without getting references """ - frc = wbi_fastrun.FastRunContainer(base_filter={'P699': ''}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + frc = wbi_fastrun.FastRunContainer(base_filter=[BaseDataType(prop_nr='P699')], base_data_type=BaseDataType) # get a string value frc._query_data('P699') # wikidata-item value @@ -27,14 +35,14 @@ def test_query_data(): d = frc.prop_data['Q10874']['P699'][statement_id] # d looks like: {'qual': set(), 'ref': {}, 'v': 'DOID:1432'} assert all(x in d for x in {'qual', 'ref', 'v'}) - assert frc.prop_data['Q10874']['P699'][statement_id]['v'].startswith('DOID:') + assert frc.prop_data['Q10874']['P699'][statement_id]['v'].startswith('"DOID:') # item assert list(frc.prop_data['Q10874']['P828'].values())[0]['v'] == "Q18228398" # uri v = {x['v'] for x in frc.prop_data['Q10874']['P2888'].values()} - assert all(y.startswith("http") for y in v) + assert all(y.startswith(" 0 ref_id = list(d['ref'].keys())[0] ref = d['ref'][ref_id] @@ -87,74 +95,76 @@ def test_query_data_ref(): class FastRunContainerFakeQueryDataEnsembl(wbi_fastrun.FastRunContainer): - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { 'fake statement id': { 'qual': set(), 'ref': {'fake ref id': { - ('P248', 'Q29458763'), # stated in ensembl Release 88 - ('P594', 'ENSG00000123374')}}, - 'v': 'ENSG00000123374'}}} - self.rev_lookup = {'ENSG00000123374': {'Q14911732'}} + ('P248', + 'Q106833387'), + ('P594', + 'ENSG00000123374')}}, + 'unit': '1', + 'v': '"ENSG00000123374"'}}} + self.rev_lookup = defaultdict(set) + self.rev_lookup['"ENSG00000123374"'].add('Q14911732') class FastRunContainerFakeQueryDataEnsemblNoRef(wbi_fastrun.FastRunContainer): - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) self.prop_dt_map = {'P248': 'wikibase-item', 'P594': 'external-id'} self.prop_data['Q14911732'] = {'P594': { 'fake statement id': { 'qual': set(), - 'ref': dict(), + 'ref': {}, 'v': 'ENSG00000123374'}}} - self.rev_lookup = {'ENSG00000123374': {'Q14911732'}} + self.rev_lookup = defaultdict(set) + self.rev_lookup['"ENSG00000123374"'].add('Q14911732') def test_fastrun_ref_ensembl(): # fastrun checks refs - frc = FastRunContainerFakeQueryDataEnsembl(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) + frc = FastRunContainerFakeQueryDataEnsembl(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True) # statement has no ref - frc.debug = True - statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594')] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')] assert frc.write_required(data=statements) # statement has the same ref - statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[wbi_datatype.ItemID("Q29458763", "P248", is_reference=True), - wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q106833387", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594")]])] assert not frc.write_required(data=statements) # new statement has an different stated in - statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[wbi_datatype.ItemID("Q99999999999", "P248", is_reference=True), - wbi_datatype.ExternalID("ENSG00000123374", "P594", is_reference=True)]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q99999999999", prop_nr="P248"), ExternalID("ENSG00000123374", prop_nr="P594", )]])] assert frc.write_required(data=statements) # fastrun don't check references, statement has no reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=False) - statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594')] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594')] assert not frc.write_required(data=statements) # fastrun don't check references, statement has reference, - frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter={'P594': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, + frc = FastRunContainerFakeQueryDataEnsemblNoRef(base_filter=[BaseDataType(prop_nr='P594'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=False) - statements = [wbi_datatype.ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[wbi_datatype.ItemID("Q123", "P31", is_reference=True)]])] + statements = [ExternalID(value='ENSG00000123374', prop_nr='P594', references=[[Item("Q123", prop_nr="P31")]])] assert not frc.write_required(data=statements) class FakeQueryDataAppendProps(wbi_fastrun.FastRunContainer): # an item with three values for the same property - def __init__(self, *args, **kwargs): + def __init__(self, *args: Any, **kwargs: Any): super().__init__(*args, **kwargs) - self.debug = True self.prop_dt_map = {'P527': 'wikibase-item', 'P248': 'wikibase-item', 'P594': 'external-id'} - self.rev_lookup = { - 'Q24784025': {'Q3402672'}, - 'Q24743729': {'Q3402672'}, - 'Q24782625': {'Q3402672'}, - } + + self.rev_lookup = defaultdict(set) + self.rev_lookup['Q24784025'].add('Q3402672') + self.rev_lookup['Q24743729'].add('Q3402672') + self.rev_lookup['Q24782625'].add('Q3402672') + self.prop_data['Q3402672'] = {'P527': { 'Q3402672-11BA231B-857B-498B-AC4F-91D71EE007FD': {'qual': set(), 'ref': { @@ -180,22 +190,22 @@ def test_append_props(): # https://www.wikidata.org/wiki/Q3402672#P527 # don't consider refs - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine) + frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType) # with append - statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', if_exists='APPEND')] - assert frc.write_required(data=statements, cqid=qid) is False + statements = [Item(value='Q24784025', prop_nr='P527')] + assert frc.write_required(data=statements, action_if_exists=ActionIfExists.APPEND_OR_REPLACE, cqid=qid) is False # with force append - statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', if_exists='FORCE_APPEND')] - assert frc.write_required(data=statements, cqid=qid) is True + statements = [Item(value='Q24784025', prop_nr='P527')] + assert frc.write_required(data=statements, action_if_exists=ActionIfExists.FORCE_APPEND, cqid=qid) is True # without append - statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527')] + statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True # if we are in append mode, and the refs are different, we should write - frc = FakeQueryDataAppendProps(base_filter={'P352': '', 'P703': 'Q15978631'}, base_data_type=wbi_datatype.BaseDataType, engine=wbi_core.ItemEngine, use_refs=True) + frc = FakeQueryDataAppendProps(base_filter=[BaseDataType(prop_nr='P352'), Item(prop_nr='P703', value='Q15978631')], base_data_type=BaseDataType, use_refs=True) # with append - statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527', if_exists='APPEND')] - assert frc.write_required(data=statements, cqid=qid) is True + statements = [Item(value='Q24784025', prop_nr='P527')] + assert frc.write_required(data=statements, cqid=qid, action_if_exists=ActionIfExists.APPEND_OR_REPLACE) is True # without append - statements = [wbi_datatype.ItemID(value='Q24784025', prop_nr='P527')] + statements = [Item(value='Q24784025', prop_nr='P527')] assert frc.write_required(data=statements, cqid=qid) is True diff --git a/test/test_wbi_helpers.py b/test/test_wbi_helpers.py new file mode 100644 index 00000000..db6b382b --- /dev/null +++ b/test/test_wbi_helpers.py @@ -0,0 +1,75 @@ +import logging +import unittest + +import requests + +from wikibaseintegrator.wbi_config import config as wbi_config +from wikibaseintegrator.wbi_exceptions import MaxRetriesReachedException +from wikibaseintegrator.wbi_helpers import execute_sparql_query, get_user_agent, mediawiki_api_call_helper + + +def test_connection(): + wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_helpers.py)' + data = {'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'} + + mediawiki_api_call_helper(data=data, max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MaxRetriesReachedException): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://www.wikidataaaaaaa.org", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MaxRetriesReachedException): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/500", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MaxRetriesReachedException): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/502", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MaxRetriesReachedException): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/503", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(MaxRetriesReachedException): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/504", max_retries=2, retry_after=1, allow_anonymous=True) + + with unittest.TestCase().assertRaises(requests.HTTPError): + mediawiki_api_call_helper(data=data, mediawiki_api_url="https://httpbin.org/status/400", max_retries=2, retry_after=1, allow_anonymous=True) + + +def test_user_agent(caplog): + wbi_config['USER_AGENT'] = None # Reset user agent + # Test there is no warning because of the user agent + with caplog.at_level(logging.WARNING): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, + user_agent='MyWikibaseBot/0.5') + assert 'WARNING' not in caplog.text + + # Test there is a warning + with caplog.at_level(logging.WARNING): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True) + assert 'Please set an user agent' in caplog.text + + # Test if the user agent is correctly added + new_user_agent = get_user_agent(user_agent='MyWikibaseBot/0.5') + assert new_user_agent.startswith('MyWikibaseBot/0.5') + assert 'WikibaseIntegrator' in new_user_agent + + +def test_allow_anonymous(): + wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_helpers.py)' + # Test there is a warning because of allow_anonymous + with unittest.TestCase().assertRaises(ValueError): + mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, user_agent='MyWikibaseBot/0.5') + + # Test there is no warning because of allow_anonymous + assert mediawiki_api_call_helper(data={'format': 'json', 'action': 'wbgetentities', 'ids': 'Q42'}, max_retries=3, retry_after=1, allow_anonymous=True, + user_agent='MyWikibaseBot/0.5') + + +def test_sparql(): + wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_wbi_helpers.py)' + results = execute_sparql_query('''SELECT ?child ?childLabel +WHERE +{ +# ?child father Bach + ?child wdt:P22 wd:Q1339. + SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". } +}''') + assert len(results['results']['bindings']) > 1 diff --git a/test/test_wbi_login.py b/test/test_wbi_login.py index 6ac69ed5..dda8906d 100644 --- a/test/test_wbi_login.py +++ b/test/test_wbi_login.py @@ -1,24 +1,80 @@ import os import sys +import unittest import pytest -from wikibaseintegrator import wbi_login, wbi_functions - +from wikibaseintegrator import wbi_login +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper # look for environment variables. if none set, don't do anything +from wikibaseintegrator.wbi_login import LoginError + WDUSER = os.getenv("WDUSER") WDPASS = os.getenv("WDPASS") +OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY = os.getenv("OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY") +OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY = os.getenv("OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY") +OAUTH1_CONSUMER_TOKEN = os.getenv("OAUTH1_CONSUMER_TOKEN") +OAUTH1_CONSUMER_SECRET = os.getenv("OAUTH1_CONSUMER_SECRET") +OAUTH1_ACCESS_TOKEN = os.getenv("OAUTH1_ACCESS_TOKEN") +OAUTH1_ACCESS_SECRET = os.getenv("OAUTH1_ACCESS_SECRET") +OAUTH2_CONSUMER_TOKEN = os.getenv("OAUTH2_CONSUMER_TOKEN") +OAUTH2_CONSUMER_SECRET = os.getenv("OAUTH2_CONSUMER_SECRET") def test_login(): + with unittest.TestCase().assertRaises(LoginError): + login = wbi_login.Clientlogin(user='wrong', password='wrong') + login.generate_edit_credentials() + + with unittest.TestCase().assertRaises(LoginError): + login = wbi_login.Login(user='wrong', password='wrong') + login.generate_edit_credentials() + if WDUSER and WDPASS: - wbi_login.Login(WDUSER, WDPASS) + assert wbi_login.Clientlogin(user=WDUSER, password=WDPASS) + assert wbi_login.Login(user=WDUSER, password=WDPASS) else: print("no WDUSER or WDPASS found in environment variables", file=sys.stderr) -def test_write(): +def test_oauth1(): + with unittest.TestCase().assertRaises(LoginError): + login = wbi_login.OAuth1(consumer_token='wrong', consumer_secret='wrong') + login.generate_edit_credentials() + + if OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY and OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY: + wbi_login.OAuth1(consumer_token=OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY, consumer_secret=OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY) + else: + print("no OAUTH1_CONSUMER_TOKEN_NOT_OWNER_ONLY or OAUTH1_CONSUMER_SECRET_NOT_OWNER_ONLY found in environment variables", file=sys.stderr) + + +def test_oauth1_access(): + with unittest.TestCase().assertRaises(LoginError): + login = wbi_login.OAuth1(consumer_token='wrong', consumer_secret='wrong', access_token='wrong', access_secret='wrong') + login.generate_edit_credentials() + + if OAUTH1_CONSUMER_TOKEN and OAUTH1_CONSUMER_SECRET and OAUTH1_ACCESS_TOKEN and OAUTH1_ACCESS_SECRET: + login = wbi_login.OAuth1(consumer_token=OAUTH1_CONSUMER_TOKEN, consumer_secret=OAUTH1_CONSUMER_SECRET, access_token=OAUTH1_ACCESS_TOKEN, access_secret=OAUTH1_ACCESS_SECRET) + login.generate_edit_credentials() + else: + print("no OAUTH1_CONSUMER_TOKEN or OAUTH1_CONSUMER_SECRET or OAUTH1_ACCESS_TOKEN or OAUTH1_ACCESS_SECRET found in environment variables", file=sys.stderr) + + +def test_oauth2(): + with unittest.TestCase().assertRaises(LoginError): + login = wbi_login.OAuth2(consumer_token='wrong', consumer_secret='wrong') + login.generate_edit_credentials() + + if OAUTH2_CONSUMER_TOKEN and OAUTH2_CONSUMER_SECRET: + login = wbi_login.OAuth2(consumer_token=OAUTH2_CONSUMER_TOKEN, consumer_secret=OAUTH2_CONSUMER_SECRET) + login.generate_edit_credentials() + else: + print("no OAUTH2_CONSUMER_TOKEN or CLIENT_SECRET found in environment variables", file=sys.stderr) + + +def test_mismatch_api_url(): if WDUSER and WDPASS: - login = wbi_login.Login(WDUSER, WDPASS) + login = wbi_login.Login(user=WDUSER, password=WDPASS) + login.generate_edit_credentials() with pytest.raises(ValueError): - wbi_functions.mediawiki_api_call_helper(data=None, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') + mediawiki_api_call_helper(data={}, login=login, mediawiki_api_url='https://unsdfdskfjljzkerezr.org/w/api.php') diff --git a/wikibaseintegrator/__init__.py b/wikibaseintegrator/__init__.py index e69de29b..7e2e06e9 100644 --- a/wikibaseintegrator/__init__.py +++ b/wikibaseintegrator/__init__.py @@ -0,0 +1,25 @@ +""" +WikibaseIntegrator Library +~~~~~~~~~~~~~~~~~~~~~ + +WikibaseIntegrator is a Wikibase library, written in Python, for human beings. +Basic read usage: + + >>> from wikibaseintegrator import WikibaseIntegrator + >>> from wikibaseintegrator.wbi_config import config + >>> config['USER_AGENT'] = 'Item Get Notebook' + >>> wbi = WikibaseIntegrator() + >>> q42 = wbi.item.get('Q42') + >>> q42.labels.get('en').value + 'Douglas Adams' + +Full documentation is available at . +""" +import pkg_resources + +from .wikibaseintegrator import WikibaseIntegrator + +try: + __version__ = pkg_resources.get_distribution('wikibaseintegrator').version +except pkg_resources.DistributionNotFound as e: # pragma: no cover + __version__ = 'dev' diff --git a/wikibaseintegrator/datatypes/__init__.py b/wikibaseintegrator/datatypes/__init__.py new file mode 100644 index 00000000..03dfebef --- /dev/null +++ b/wikibaseintegrator/datatypes/__init__.py @@ -0,0 +1,18 @@ +from .basedatatype import BaseDataType +from .commonsmedia import CommonsMedia +from .externalid import ExternalID +from .form import Form +from .geoshape import GeoShape +from .globecoordinate import GlobeCoordinate +from .item import Item +from .lexeme import Lexeme +from .math import Math +from .monolingualtext import MonolingualText +from .musicalnotation import MusicalNotation +from .property import Property +from .quantity import Quantity +from .sense import Sense +from .string import String +from .tabulardata import TabularData +from .time import Time +from .url import URL diff --git a/wikibaseintegrator/datatypes/basedatatype.py b/wikibaseintegrator/datatypes/basedatatype.py new file mode 100644 index 00000000..94c62b7b --- /dev/null +++ b/wikibaseintegrator/datatypes/basedatatype.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import re +from typing import Any, List, Type, Union + +from wikibaseintegrator.models import Claim + + +class BaseDataType(Claim): + """ + The base class for all Wikibase data types, they inherit from it + """ + DTYPE = 'base-data-type' + subclasses: List[Type[BaseDataType]] = [] + sparql_query: str = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}' . + }} + ''' + + def __init__(self, prop_nr: Union[int, str] = None, **kwargs: Any): + """ + Constructor, will be called by all data types. + + :param prop_nr: The property number a Wikibase snak belongs to + """ + + super().__init__(**kwargs) + + self.mainsnak.property_number = prop_nr or None + # self.subclasses.append(self) + + # Allow registration of subclasses of BaseDataType into BaseDataType.subclasses + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + cls.subclasses.append(cls) + + def set_value(self, value: Any = None): + pass + + def get_sparql_value(self) -> str: + return '"' + self.mainsnak.datavalue['value'] + '"' + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + if type == 'uri': + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + elif type == 'literal': + pattern = re.compile(r'^"?(.*?)"?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + else: + raise ValueError + + return True diff --git a/wikibaseintegrator/datatypes/commonsmedia.py b/wikibaseintegrator/datatypes/commonsmedia.py new file mode 100644 index 00000000..c444437d --- /dev/null +++ b/wikibaseintegrator/datatypes/commonsmedia.py @@ -0,0 +1,23 @@ +import re +import urllib.parse + +from wikibaseintegrator.datatypes.string import String + + +class CommonsMedia(String): + """ + Implements the Wikibase data type for Wikimedia commons media files + """ + DTYPE = 'commonsMedia' + + def get_sparql_value(self) -> str: + return '<' + self.mainsnak.datavalue['value'] + '>' + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=urllib.parse.unquote(matches.group(1))) + return True diff --git a/wikibaseintegrator/datatypes/externalid.py b/wikibaseintegrator/datatypes/externalid.py new file mode 100644 index 00000000..c4838138 --- /dev/null +++ b/wikibaseintegrator/datatypes/externalid.py @@ -0,0 +1,8 @@ +from wikibaseintegrator.datatypes.string import String + + +class ExternalID(String): + """ + Implements the Wikibase data type 'external-id' + """ + DTYPE = 'external-id' diff --git a/wikibaseintegrator/datatypes/extra/__init__.py b/wikibaseintegrator/datatypes/extra/__init__.py new file mode 100644 index 00000000..c14a6c63 --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/__init__.py @@ -0,0 +1,2 @@ +from .edtf import EDTF +from .localmedia import LocalMedia diff --git a/wikibaseintegrator/datatypes/extra/edtf.py b/wikibaseintegrator/datatypes/extra/edtf.py new file mode 100644 index 00000000..e6fa5ce2 --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/edtf.py @@ -0,0 +1,9 @@ +from wikibaseintegrator.datatypes import String + + +class EDTF(String): + """ + Implements the Wikibase data type for Wikibase Extended Date/Time Format extension. + More info at https://www.mediawiki.org/wiki/Extension:Wikibase_EDTF + """ + DTYPE = 'edtf' diff --git a/wikibaseintegrator/datatypes/extra/localmedia.py b/wikibaseintegrator/datatypes/extra/localmedia.py new file mode 100644 index 00000000..bd89a57e --- /dev/null +++ b/wikibaseintegrator/datatypes/extra/localmedia.py @@ -0,0 +1,9 @@ +from wikibaseintegrator.datatypes import String + + +class LocalMedia(String): + """ + Implements the Wikibase data type for Wikibase Local Media extension. + More info at https://www.mediawiki.org/wiki/Extension:Wikibase_Local_Media + """ + DTYPE = 'localMedia' diff --git a/wikibaseintegrator/datatypes/form.py b/wikibaseintegrator/datatypes/form.py new file mode 100644 index 00000000..c0237154 --- /dev/null +++ b/wikibaseintegrator/datatypes/form.py @@ -0,0 +1,59 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Form(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-form' + """ + DTYPE = 'wikibase-form' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) + :type value: str with the format "L-F" + :param prop_nr: The property number for this claim + :type prop_nr: str with a 'P' prefix followed by digits + :param snaktype: The snak type, either 'value', 'somevalue' or 'novalue' + :type snaktype: str + :param references: List with reference objects + :type references: A data type with subclass of BaseDataType + :param qualifiers: List with qualifier objects + :type qualifiers: A data type with subclass of BaseDataType + :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' + :type rank: str + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + pattern = re.compile(r'^L[0-9]+-F[0-9]+$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid form ID ({value}), format must be 'L[0-9]+-F[0-9]+'") + + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'form', + 'id': value + }, + 'type': 'wikibase-entityid' + } + + def get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/geoshape.py b/wikibaseintegrator/datatypes/geoshape.py new file mode 100644 index 00000000..7a7210bc --- /dev/null +++ b/wikibaseintegrator/datatypes/geoshape.py @@ -0,0 +1,55 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class GeoShape(BaseDataType): + """ + Implements the Wikibase data type 'geo-shape' + """ + DTYPE = 'geo-shape' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{value}> . + }} + ''' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The GeoShape map file name in Wikimedia Commons to be linked + :param kwargs: + + :Keyword Arguments: + * *prop_nr* (``str``) -- + The item ID for this claim + * *snaktype* (``str``) -- + The snak type, either 'value', 'somevalue' or 'novalue' + * *references* (``References`` or list of ``Claim``) -- + List with reference objects + * *qualifiers* (``Qualifiers``) -- + List with qualifier objects + * *rank* (``WikibaseRank``) -- + The snak type, either 'value', 'somevalue' or 'novalue' + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map + pattern = re.compile(r'^Data:((?![:|#]).)+\.map$') + matches = pattern.match(value) + if not matches: + raise ValueError("Value must start with Data: and end with .map. In addition title should not contain characters like colon, hash or pipe.") + + self.mainsnak.datavalue = { + 'value': value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/globecoordinate.py b/wikibaseintegrator/datatypes/globecoordinate.py new file mode 100644 index 00000000..30e206ec --- /dev/null +++ b/wikibaseintegrator/datatypes/globecoordinate.py @@ -0,0 +1,90 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.models import Claim +from wikibaseintegrator.wbi_config import config + + +class GlobeCoordinate(BaseDataType): + """ + Implements the Wikibase data type for globe coordinates + """ + DTYPE = 'globe-coordinate' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^geo:wktLiteral . + }} + ''' + + def __init__(self, latitude: float = None, longitude: float = None, altitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None, + **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param latitude: Latitude in decimal format + :param longitude: Longitude in decimal format + :param altitude: Altitude (in decimal format?) (Always None at this moment) + :param precision: Precision of the position measurement, default 1 / 3600 + :param globe: The globe entity concept URI (ex: http://www.wikidata.org/entity/Q2) or 'Q2' + :param wikibase_url: The default wikibase URL, used when the globe is only an ID like 'Q2'. Use wbi_config['WIKIBASE_URL'] by default. + """ + + super().__init__(**kwargs) + self.set_value(latitude=latitude, longitude=longitude, altitude=altitude, precision=precision, globe=globe, wikibase_url=wikibase_url) + + def set_value(self, latitude: float = None, longitude: float = None, altitude: float = None, precision: float = None, globe: str = None, wikibase_url: str = None): + # https://github.com/wikimedia/Wikibase/blob/174450de8fdeabcf97287604dbbf04d07bb5000c/repo/includes/Rdf/Values/GlobeCoordinateRdfBuilder.php#L120 + precision = precision or 1 / 3600 + globe = globe or str(config['COORDINATE_GLOBE_QID']) + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) + + if globe.startswith('Q'): + globe = wikibase_url + '/entity/' + globe + + if latitude is not None and longitude is not None: + if latitude < -90 or latitude > 90: + raise ValueError(f"latitude must be between -90 and 90, got '{latitude}'") + if longitude < -180 or longitude > 180: + raise ValueError(f"longitude must be between -180 and 180, got '{longitude}'") + + self.mainsnak.datavalue = { + 'value': { + 'latitude': latitude, + 'longitude': longitude, + 'altitude': altitude, + 'precision': precision, + 'globe': globe + }, + 'type': 'globecoordinate' + } + + def __eq__(self, other): + if isinstance(other, Claim) and other.mainsnak.datavalue['type'] == 'globecoordinate': + tmp_datavalue_self = self.mainsnak.datavalue + tmp_datavalue_other = other.mainsnak.datavalue + + tmp_datavalue_self['value']['latitude'] = round(tmp_datavalue_self['value']['latitude'], 6) + tmp_datavalue_self['value']['longitude'] = round(tmp_datavalue_self['value']['longitude'], 6) + tmp_datavalue_self['value']['precision'] = round(tmp_datavalue_self['value']['precision'], 17) + + tmp_datavalue_other['value']['latitude'] = round(tmp_datavalue_other['value']['latitude'], 6) + tmp_datavalue_other['value']['longitude'] = round(tmp_datavalue_other['value']['longitude'], 6) + tmp_datavalue_other['value']['precision'] = round(tmp_datavalue_other['value']['precision'], 17) + + return tmp_datavalue_self == tmp_datavalue_other and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) + + return super().__eq__(other) + + def get_sparql_value(self) -> str: + return '"Point(' + str(self.mainsnak.datavalue['value']['longitude']) + ' ' + str(self.mainsnak.datavalue['value']['latitude']) + ')"' + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^"?Point\((.*) (.*)\)"?(?:\^\^geo:wktLiteral)?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(longitude=float(matches.group(1)), latitude=float(matches.group(2))) + return True diff --git a/wikibaseintegrator/datatypes/item.py b/wikibaseintegrator/datatypes/item.py new file mode 100644 index 00000000..1e8e3e52 --- /dev/null +++ b/wikibaseintegrator/datatypes/item.py @@ -0,0 +1,52 @@ +import re +from typing import Any, Union + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Item(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-item' with a value being another item ID + """ + DTYPE = 'wikibase-item' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value: Union[str, int] = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The item ID to serve as the value + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: Union[str, int] = None): + assert isinstance(value, (str, int)) or value is None, f'Expected str or int, found {type(value)} ({value})' + + if value: + if isinstance(value, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?Q?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid item ID ({value}), format must be 'Q[0-9]+'") + + value = int(matches.group(1)) + + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'item', + 'numeric-id': value, + 'id': f'Q{value}' + }, + 'type': 'wikibase-entityid' + } + + def get_sparql_value(self) -> str: + return '<{wb_url}/entity/' + self.mainsnak.datavalue['value']['id'] + '>' diff --git a/wikibaseintegrator/datatypes/lexeme.py b/wikibaseintegrator/datatypes/lexeme.py new file mode 100644 index 00000000..109da96f --- /dev/null +++ b/wikibaseintegrator/datatypes/lexeme.py @@ -0,0 +1,52 @@ +import re +from typing import Any, Union + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Lexeme(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-lexeme' + """ + DTYPE = 'wikibase-lexeme' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value: Union[str, int] = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The lexeme number to serve as a value + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: Union[str, int] = None): + assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" + + if value: + if isinstance(value, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?L?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid lexeme ID ({value}), format must be 'L[0-9]+'") + + value = int(matches.group(1)) + + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'lexeme', + 'numeric-id': value, + 'id': f'L{value}' + }, + 'type': 'wikibase-entityid' + } + + def get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/math.py b/wikibaseintegrator/datatypes/math.py new file mode 100644 index 00000000..7ad3f3cc --- /dev/null +++ b/wikibaseintegrator/datatypes/math.py @@ -0,0 +1,8 @@ +from wikibaseintegrator.datatypes.string import String + + +class Math(String): + """ + Implements the Wikibase data type 'math' for mathematical formula in TEX format + """ + DTYPE = 'math' diff --git a/wikibaseintegrator/datatypes/monolingualtext.py b/wikibaseintegrator/datatypes/monolingualtext.py new file mode 100644 index 00000000..c7044e4e --- /dev/null +++ b/wikibaseintegrator/datatypes/monolingualtext.py @@ -0,0 +1,56 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config + + +class MonolingualText(BaseDataType): + """ + Implements the Wikibase data type for Monolingual Text strings + """ + DTYPE = 'monolingualtext' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> {value} . + }} + ''' + + def __init__(self, text: str = None, language: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param text: The language specific string to be used as the value. + :param language: Specifies the language the value belongs to. + """ + + super().__init__(**kwargs) + self.set_value(text=text, language=language) + + def set_value(self, text: str = None, language: str = None): + language = language or str(config['DEFAULT_LANGUAGE']) + + assert isinstance(text, str) or text is None, f"Expected str, found {type(text)} ({text})" + assert isinstance(language, str), f"Expected str, found {type(language)} ({language})" + + if text and language: + self.mainsnak.datavalue = { + 'value': { + 'text': text, + 'language': language + }, + 'type': 'monolingualtext' + } + + def get_sparql_value(self) -> str: + return '"' + self.mainsnak.datavalue['value']['text'].replace('"', r'\"') + '"@' + self.mainsnak.datavalue['value']['language'] + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^"(.*?)"@([a-z\-]*)$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(text=matches.group(1), language=matches.group(2)) + return True diff --git a/wikibaseintegrator/datatypes/musicalnotation.py b/wikibaseintegrator/datatypes/musicalnotation.py new file mode 100644 index 00000000..25d1de76 --- /dev/null +++ b/wikibaseintegrator/datatypes/musicalnotation.py @@ -0,0 +1,8 @@ +from wikibaseintegrator.datatypes.string import String + + +class MusicalNotation(String): + """ + Implements the Wikibase data type 'musical-notation' + """ + DTYPE = 'musical-notation' diff --git a/wikibaseintegrator/datatypes/property.py b/wikibaseintegrator/datatypes/property.py new file mode 100644 index 00000000..a76e4263 --- /dev/null +++ b/wikibaseintegrator/datatypes/property.py @@ -0,0 +1,53 @@ +import re +from typing import Any, Union + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Property(BaseDataType): + """ + Implements the Wikibase data type 'property' + """ + DTYPE = 'wikibase-property' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/P{value}> . + }} + ''' + + def __init__(self, value: Union[str, int] = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The property number to serve as a value + :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: Union[str, int] = None): + assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" + + if value: + if isinstance(value, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?P?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid property ID ({value}), format must be 'P[0-9]+'") + + value = int(matches.group(1)) + + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'property', + 'numeric-id': value, + 'id': f'P{value}' + }, + 'type': 'wikibase-entityid' + } + + def get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/quantity.py b/wikibaseintegrator/datatypes/quantity.py new file mode 100644 index 00000000..a9a95209 --- /dev/null +++ b/wikibaseintegrator/datatypes/quantity.py @@ -0,0 +1,89 @@ +from typing import Any, Union + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_helpers import format_amount + + +class Quantity(BaseDataType): + """ + Implements the Wikibase data type for quantities + """ + DTYPE = 'quantity' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:decimal . + }} + ''' + + def __init__(self, amount: Union[str, int, float] = None, upper_bound: Union[str, int, float] = None, lower_bound: Union[str, int, float] = None, unit: Union[str, int] = '1', + wikibase_url: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param amount: The amount value + :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations + :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations + :param unit: The unit item URL or the QID a certain amount has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). + The default is dimensionless, represented by a '1' + :param wikibase_url: The default wikibase URL, used when the unit is only an ID like 'Q2'. Use wbi_config['WIKIBASE_URL'] by default. + """ + + super().__init__(**kwargs) + self.set_value(amount=amount, upper_bound=upper_bound, lower_bound=lower_bound, unit=unit, wikibase_url=wikibase_url) + + def set_value(self, amount: Union[str, int, float] = None, upper_bound: Union[str, int, float] = None, lower_bound: Union[str, int, float] = None, unit: Union[str, int] = '1', + wikibase_url: str = None): + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) + + unit = str(unit or '1') + + if unit.startswith('Q'): + unit = wikibase_url + '/entity/' + unit + + if amount is not None: + amount = format_amount(amount) + unit = str(unit) + if upper_bound: + upper_bound = format_amount(upper_bound) + if lower_bound: + lower_bound = format_amount(lower_bound) + + # Integrity checks for value and bounds + try: + for i in [amount, upper_bound, lower_bound]: + if i: + float(i) + except ValueError as error: + raise ValueError("Value, bounds and units must parse as integers or float") from error + + if (lower_bound and upper_bound) and (float(lower_bound) > float(upper_bound) or float(lower_bound) > float(amount)): + raise ValueError("Lower bound too large") + + if upper_bound and float(upper_bound) < float(amount): + raise ValueError("Upper bound too small") + + self.mainsnak.datavalue = { + 'value': { + 'amount': amount, + 'unit': unit, + 'upperBound': upper_bound, + 'lowerBound': lower_bound + }, + 'type': 'quantity' + } + + # remove bounds from json if they are undefined + if not upper_bound: + del self.mainsnak.datavalue['value']['upperBound'] + + if not lower_bound: + del self.mainsnak.datavalue['value']['lowerBound'] + + def get_sparql_value(self) -> str: + return '"' + format_amount(self.mainsnak.datavalue['value']['amount']) + '"^^xsd:decimal' + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + self.set_value(amount=value, unit=unit) + return True diff --git a/wikibaseintegrator/datatypes/sense.py b/wikibaseintegrator/datatypes/sense.py new file mode 100644 index 00000000..6dcbd826 --- /dev/null +++ b/wikibaseintegrator/datatypes/sense.py @@ -0,0 +1,48 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class Sense(BaseDataType): + """ + Implements the Wikibase data type 'wikibase-sense' + """ + DTYPE = 'wikibase-sense' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . + }} + ''' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: Value using the format "L-S" (example: L252248-S123) + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + pattern = re.compile(r'^L[0-9]+-S[0-9]+$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid sense ID ({value}), format must be 'L[0-9]+-S[0-9]+'") + + self.mainsnak.datavalue = { + 'value': { + 'entity-type': 'sense', + 'id': value + }, + 'type': 'wikibase-entityid' + } + + def get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value']['id'] diff --git a/wikibaseintegrator/datatypes/string.py b/wikibaseintegrator/datatypes/string.py new file mode 100644 index 00000000..a6afd761 --- /dev/null +++ b/wikibaseintegrator/datatypes/string.py @@ -0,0 +1,30 @@ +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class String(BaseDataType): + """ + Implements the Wikibase data type 'string' + """ + + DTYPE = 'string' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The string to be used as the value + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + self.mainsnak.datavalue = { + 'value': value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/tabulardata.py b/wikibaseintegrator/datatypes/tabulardata.py new file mode 100644 index 00000000..118e75ca --- /dev/null +++ b/wikibaseintegrator/datatypes/tabulardata.py @@ -0,0 +1,36 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class TabularData(BaseDataType): + """ + Implements the Wikibase data type 'tabular-data' + """ + DTYPE = 'tabular-data' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: Reference to tabular data file on Wikimedia Commons. + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab + pattern = re.compile(r'^Data:((?![:|#]).)+\.tab$') + matches = pattern.match(value) + if not matches: + raise ValueError("Value must start with Data: and end with .tab. In addition title should not contain characters like colon, hash or pipe.") + + self.mainsnak.datavalue = { + 'value': value, + 'type': 'string' + } diff --git a/wikibaseintegrator/datatypes/time.py b/wikibaseintegrator/datatypes/time.py new file mode 100644 index 00000000..6c9a480d --- /dev/null +++ b/wikibaseintegrator/datatypes/time.py @@ -0,0 +1,84 @@ +import datetime +import re +from typing import Any, Union + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import WikibaseDatePrecision + + +class Time(BaseDataType): + """ + Implements the Wikibase data type with date and time values + """ + DTYPE = 'time' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:dateTime . + }} + ''' + + def __init__(self, time: str = None, before: int = 0, after: int = 0, precision: Union[int, WikibaseDatePrecision] = WikibaseDatePrecision.DAY, timezone: int = 0, + calendarmodel: str = None, wikibase_url: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 + :param prop_nr: The property number for this claim + :param before: explicit integer value for how many units after the given time it could be. + The unit is given by the precision. + :param after: explicit integer value for how many units before the given time it could be. + The unit is given by the precision. + :param precision: Precision value for dates and time as specified in the Wikibase data model + (https://www.wikidata.org/wiki/Special:ListDatatypes#time) + :param timezone: The timezone which applies to the date and time as specified in the Wikibase data model + :param calendarmodel: The calendar model used for the date. URL to the Wikibase calendar model item or the QID. + """ + + super().__init__(**kwargs) + self.set_value(time=time, before=before, after=after, precision=precision, timezone=timezone, calendarmodel=calendarmodel, wikibase_url=wikibase_url) + + def set_value(self, time: str = None, before: int = 0, after: int = 0, precision: Union[int, WikibaseDatePrecision] = WikibaseDatePrecision.DAY, timezone: int = 0, + calendarmodel: str = None, wikibase_url: str = None): + calendarmodel = calendarmodel or str(config['CALENDAR_MODEL_QID']) + wikibase_url = wikibase_url or str(config['WIKIBASE_URL']) + + if calendarmodel.startswith('Q'): + calendarmodel = wikibase_url + '/entity/' + calendarmodel + + assert isinstance(time, str) or time is None, f"Expected str, found {type(time)} ({time})" + + if time: + if time == "now": + time = datetime.datetime.utcnow().strftime("+%Y-%m-%dT00:00:00Z") + + if not (time.startswith("+") or time.startswith("-")): + time = "+" + time + # Pattern with precision lower than day supported + # pattern = re.compile(r'^[+-][0-9]{1,16}-(?:1[0-2]|0[1-9])-(?:3[01]|0[1-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') + pattern = re.compile(r'^[+-][0-9]{1,16}-(?:1[0-2]|0[1-9])-(?:3[01]|0[1-9]|[12][0-9])T00:00:00Z$') + matches = pattern.match(time) + if not matches: + raise ValueError(f"Time value ({time}) must be a string in the following format: '+%Y-%m-%dT00:00:00Z'") + + if isinstance(precision, int): + precision = WikibaseDatePrecision(precision) + + if precision not in WikibaseDatePrecision: + raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") + + self.mainsnak.datavalue = { + 'value': { + 'time': time, + 'before': before, + 'after': after, + 'precision': precision.value, + 'timezone': timezone, + 'calendarmodel': calendarmodel + }, + 'type': 'time' + } + + def get_sparql_value(self) -> str: + return self.mainsnak.datavalue['value']['time'] diff --git a/wikibaseintegrator/datatypes/url.py b/wikibaseintegrator/datatypes/url.py new file mode 100644 index 00000000..fc0184dc --- /dev/null +++ b/wikibaseintegrator/datatypes/url.py @@ -0,0 +1,55 @@ +import re +from typing import Any + +from wikibaseintegrator.datatypes.basedatatype import BaseDataType + + +class URL(BaseDataType): + """ + Implements the Wikibase data type for URL strings + """ + DTYPE = 'url' + sparql_query = ''' + SELECT * WHERE {{ + ?item_id <{wb_url}/prop/{pid}> ?s . + ?s <{wb_url}/prop/statement/{pid}> <{value}> . + }} + ''' + + def __init__(self, value: str = None, **kwargs: Any): + """ + Constructor, calls the superclass BaseDataType + + :param value: The URL to be used as the value + """ + + super().__init__(**kwargs) + self.set_value(value=value) + + def set_value(self, value: str = None): + assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" + + if value: + pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') + matches = pattern.match(value) + + if not matches: + raise ValueError(f"Invalid URL {value}") + + self.mainsnak.datavalue = { + 'value': value, + 'type': 'string' + } + + def get_sparql_value(self) -> str: + return '<' + self.mainsnak.datavalue['value'] + '>' + + def parse_sparql_value(self, value, type='literal', unit='1') -> bool: + pattern = re.compile(r'^?$') + matches = pattern.match(value) + if not matches: + return False + + self.set_value(value=matches.group(1)) + + return True diff --git a/wikibaseintegrator/entities/__init__.py b/wikibaseintegrator/entities/__init__.py new file mode 100644 index 00000000..d5b9110c --- /dev/null +++ b/wikibaseintegrator/entities/__init__.py @@ -0,0 +1,5 @@ +from .baseentity import BaseEntity +from .item import ItemEntity +from .lexeme import LexemeEntity +from .mediainfo import MediaInfoEntity +from .property import PropertyEntity diff --git a/wikibaseintegrator/entities/baseentity.py b/wikibaseintegrator/entities/baseentity.py new file mode 100644 index 00000000..ffd8543f --- /dev/null +++ b/wikibaseintegrator/entities/baseentity.py @@ -0,0 +1,316 @@ +from __future__ import annotations + +import logging +from copy import copy +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +import ujson + +from wikibaseintegrator import wbi_fastrun +from wikibaseintegrator.datatypes import BaseDataType +from wikibaseintegrator.models.claims import Claim, Claims +from wikibaseintegrator.wbi_enums import ActionIfExists +from wikibaseintegrator.wbi_exceptions import MissingEntityException, ModificationFailed, MWApiError +from wikibaseintegrator.wbi_helpers import delete_page, mediawiki_api_call_helper +from wikibaseintegrator.wbi_login import _Login + +if TYPE_CHECKING: + from wikibaseintegrator import WikibaseIntegrator + +log = logging.getLogger(__name__) + + +class BaseEntity: + ETYPE = 'base-entity' + + def __init__(self, api: 'WikibaseIntegrator' = None, title: str = None, pageid: int = None, lastrevid: int = None, type: str = None, id: str = None, claims: Claims = None, + is_bot: bool = None, login: _Login = None): + if not api: + from wikibaseintegrator import WikibaseIntegrator + self.api = WikibaseIntegrator() + else: + self.api = copy(api) + + self.api.is_bot = is_bot or self.api.is_bot + self.api.login = login or self.api.login + + self.title = title + self.pageid = pageid + self.lastrevid = lastrevid + self.type = str(type or self.ETYPE) + self.id = id + self.claims = claims or Claims() + + @property + def api(self) -> WikibaseIntegrator: + return self.__api + + @api.setter + def api(self, value: WikibaseIntegrator): + from wikibaseintegrator import WikibaseIntegrator + if not isinstance(value, WikibaseIntegrator): + raise TypeError + self.__api = value + + @property + def title(self) -> Optional[str]: + return self.__title + + @title.setter + def title(self, value: Optional[str]): + self.__title = value + + @property + def pageid(self) -> Union[str, int, None]: + return self.__pageid + + @pageid.setter + def pageid(self, value: Union[str, int, None]): + if isinstance(value, str): + self.__pageid: Union[str, int, None] = int(value) + else: + self.__pageid = value + + @property + def lastrevid(self) -> Optional[int]: + return self.__lastrevid + + @lastrevid.setter + def lastrevid(self, value: Optional[int]): + self.__lastrevid = value + + @property + def type(self) -> str: + return self.__type + + @type.setter + def type(self, value: str): + self.__type = value + + @property + def id(self) -> Optional[str]: + return self.__id + + @id.setter + def id(self, value: Optional[str]): + self.__id = value + + @property + def claims(self) -> Claims: + return self.__claims + + @claims.setter + def claims(self, value: Claims): + if not isinstance(value, Claims): + raise TypeError + self.__claims = value + + def add_claims(self, claims: Union[Claim, List[Claim], Claims], action_if_exists: ActionIfExists = ActionIfExists.APPEND_OR_REPLACE) -> BaseEntity: + """ + + :param claims: A Claim, list of Claim or just a Claims object to add to this Claims object. + :param action_if_exists: Replace or append the statement. You can force an addition if the declaration already exists. + KEEP: The original claim will be kept and the new one will not be added (because there is already one with this property number) + APPEND_OR_REPLACE: The new claim will be added only if the new one is different (by comparing values) + FORCE_APPEND: The new claim will be added even if already exists + REPLACE_ALL: The new claim will replace the old one + :return: Return the updated entity object. + """ + + self.claims.add(claims=claims, action_if_exists=action_if_exists) + + return self + + def get_json(self) -> Dict[str, Union[str, Dict[str, List]]]: + """ + To get the dict equivalent of the JSON representation of the entity. + + :return: + """ + json_data: Dict = { + 'type': self.type, + 'claims': self.claims.get_json() + } + if self.id: + json_data['id'] = self.id + + return json_data + + def from_json(self, json_data: Dict[str, Any]) -> BaseEntity: + """ + Import a dictionary into BaseEntity attributes. + + :param json_data: A specific dictionary from MediaWiki API + :return: + """ + if 'missing' in json_data: # TODO: 1.35 compatibility + raise MissingEntityException('The MW API returned that the entity was missing.') + + if 'title' in json_data: # TODO: 1.35 compatibility + self.title = str(json_data['title']) + if 'pageid' in json_data: # TODO: 1.35 compatibility + self.pageid = int(json_data['pageid']) + self.lastrevid = int(json_data['lastrevid']) + self.type = str(json_data['type']) + self.id = str(json_data['id']) + if 'claims' in json_data: # 'claims' is named 'statements' in Wikimedia Commons MediaInfo + self.claims = Claims().from_json(json_data['claims']) + + return self + + # noinspection PyMethodMayBeStatic + def _get(self, entity_id: str, login: _Login = None, allow_anonymous: bool = True, is_bot: bool = None, **kwargs: Any) -> Dict: # pylint: disable=no-self-use + """ + Retrieve an entity in json representation from the Wikibase instance + + :param entity_id: The ID of the entity to retrieve + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: python complex dictionary representation of a json + """ + + params = { + 'action': 'wbgetentities', + 'ids': entity_id, + 'format': 'json' + } + + login = login or self.api.login + is_bot = is_bot if is_bot is not None else self.api.is_bot + + return mediawiki_api_call_helper(data=params, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs) + + def clear(self, **kwargs: Any) -> Dict[str, Any]: + """ + Use the `clear` parameter of `wbeditentity` API call to clear the content of the entity. + The entity will be updated with an empty dictionary. + + :param kwargs: More arguments for _write() and Python requests + :return: A dictionary representation of the edited Entity + """ + return self._write(data={}, clear=True, **kwargs) + + def _write(self, data: Dict = None, summary: str = None, login: _Login = None, allow_anonymous: bool = False, clear: bool = False, is_bot: bool = None, **kwargs: Any) -> Dict[ + str, Any]: + """ + Writes the entity JSON to the Wikibase instance and after successful write, returns the "entity" part of the response. + + :param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'. + :param summary: A summary of the edit + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param clear: If set, the complete entity is emptied before proceeding. The entity will not be saved before it is filled with the "data", possibly with parts excluded. + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: A dictionary representation of the edited Entity + """ + + data = data or {} + + # if all_claims: + # data = json.JSONEncoder().encode(self.json_representation) + # else: + # new_json_repr = {k: self.json_representation[k] for k in set(list(self.json_representation.keys())) - {'claims'}} + # new_json_repr['claims'] = {} + # for claim in self.json_representation['claims']: + # if [True for x in self.json_representation['claims'][claim] if 'id' not in x or 'remove' in x]: + # new_json_repr['claims'][claim] = copy.deepcopy(self.json_representation['claims'][claim]) + # for statement in new_json_repr['claims'][claim]: + # if 'id' in statement and 'remove' not in statement: + # new_json_repr['claims'][claim].remove(statement) + # if not new_json_repr['claims'][claim]: + # new_json_repr['claims'].pop(claim) + # data = json.JSONEncoder().encode(new_json_repr) + + payload: Dict[str, Any] = { + 'action': 'wbeditentity', + 'data': ujson.dumps(data), + 'format': 'json', + 'summary': summary + } + + if not summary: + payload.pop('summary') + + is_bot = is_bot if is_bot is not None else self.api.is_bot + if is_bot: + payload.update({'bot': ''}) + + if clear: + payload.update({'clear': True}) + + if self.id: + payload.update({'id': self.id}) + else: + payload.update({'new': self.type}) + + if self.lastrevid: + payload.update({'baserevid': self.lastrevid}) + + login = login or self.api.login + + try: + json_result: dict = mediawiki_api_call_helper(data=payload, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs) + except Exception: + logging.exception('Error while writing to the Wikibase instance') + raise + else: + if 'error' in json_result: + if 'code' in json_result['error'] and json_result['error']['code'] == 'modification-failed': + raise ModificationFailed(json_result['error']) + + raise MWApiError(json_result) + + return json_result['entity'] + + def delete(self, login: _Login = None, allow_anonymous: bool = False, is_bot: bool = None, **kwargs: Any): + """ + Delete the current entity. Use the pageid first if available and fallback to the page title. + + :param login: A wbi_login.Login instance + :param allow_anonymous: Allow an unidentified edit to the MediaWiki API (default False) + :param is_bot: Flag the edit as a bot + :param reason: Reason for the deletion. If not set, an automatically generated reason will be used. + :param deletetalk: Delete the talk page, if it exists. + :param kwargs: Any additional keyword arguments to pass to mediawiki_api_call_helper and requests.request + :return: The data returned by the API as a dictionary + """ + + login = login or self.api.login + + if not self.pageid and not self.title: + raise ValueError("A pageid or a page title attribute must be set before deleting an entity object.") + + # If there is no pageid, fallback to using the page title. It's not the preferred method. + if not self.pageid: + return delete_page(title=self.title, pageid=None, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs) + else: + if not isinstance(self.pageid, int): + raise ValueError(f"The entity must have a pageid attribute correctly set ({self.pageid})") + + return delete_page(title=None, pageid=self.pageid, login=login, allow_anonymous=allow_anonymous, is_bot=is_bot, **kwargs) + + def write_required(self, base_filter: List[BaseDataType | List[BaseDataType]] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL, **kwargs: Any) -> bool: + fastrun_container = wbi_fastrun.get_fastrun_container(base_filter=base_filter, **kwargs) + + if base_filter is None: + base_filter = [] + + claims_to_check = [] + for claim in self.claims: + if claim.mainsnak.property_number in base_filter: + claims_to_check.append(claim) + + # TODO: Add check_language_data + + return fastrun_container.write_required(data=claims_to_check, cqid=self.id, action_if_exists=action_if_exists) + + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/entities/item.py b/wikibaseintegrator/entities/item.py new file mode 100644 index 00000000..227fe2cf --- /dev/null +++ b/wikibaseintegrator/entities/item.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, Union + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import LanguageValues +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels +from wikibaseintegrator.models.sitelinks import Sitelinks + + +class ItemEntity(BaseEntity): + ETYPE = 'item' + + def __init__(self, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, sitelinks: Sitelinks = None, **kwargs: Any) -> None: + """ + + :param api: + :param labels: + :param descriptions: + :param aliases: + :param sitelinks: + :param kwargs: + """ + super().__init__(**kwargs) + + # Item, Property and MediaInfo specific + self.labels: LanguageValues = labels or Labels() + self.descriptions: LanguageValues = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + # Item specific + self.sitelinks = sitelinks or Sitelinks() + + @property + def labels(self) -> Labels: + return self.__labels + + @labels.setter + def labels(self, labels: Labels): + if not isinstance(labels, Labels): + raise TypeError + self.__labels = labels + + @property + def descriptions(self) -> Descriptions: + return self.__descriptions + + @descriptions.setter + def descriptions(self, descriptions: Descriptions): + if not isinstance(descriptions, Descriptions): + raise TypeError + self.__descriptions = descriptions + + @property + def aliases(self) -> Aliases: + return self.__aliases + + @aliases.setter + def aliases(self, aliases: Aliases): + if not isinstance(aliases, Aliases): + raise TypeError + self.__aliases = aliases + + @property + def sitelinks(self) -> Sitelinks: + return self.__sitelinks + + @sitelinks.setter + def sitelinks(self, sitelinks: Sitelinks): + if not isinstance(sitelinks, Sitelinks): + raise TypeError + self.__sitelinks = sitelinks + + def new(self, **kwargs: Any) -> ItemEntity: + return ItemEntity(api=self.api, **kwargs) + + def get(self, entity_id: Union[str, int] = None, **kwargs: Any) -> ItemEntity: + """ + Request the MediaWiki API to get data for the entity specified in argument. + + :param entity_id: The entity_id of the Item entity you want. Must start with a 'Q'. + :param kwargs: + :return: an ItemEntity instance + """ + + if entity_id is None and self.id is not None: + entity_id = self.id + elif entity_id is None: + raise ValueError("You must provide an entity_id") + + if isinstance(entity_id, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?Q?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError(f"Invalid item ID ({entity_id}), format must be 'Q[0-9]+'") + + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Item ID must be greater than 0") + + entity_id = f'Q{entity_id}' + json_data = super()._get(entity_id=entity_id, **kwargs) + return ItemEntity(api=self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_json(self) -> Dict[str, Union[str, Dict]]: + """ + To get the dict equivalent of the JSON representation of the Item. + + :return: A dict representation of the Item. + """ + return { + 'labels': self.labels.get_json(), + 'descriptions': self.descriptions.get_json(), + 'aliases': self.aliases.get_json(), + **super().get_json() + } + + def from_json(self, json_data: Dict[str, Any]) -> ItemEntity: + super().from_json(json_data=json_data) + + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + self.aliases = Aliases().from_json(json_data['aliases']) + self.sitelinks = Sitelinks().from_json(json_data['sitelinks']) + + return self + + def write(self, **kwargs: Any) -> ItemEntity: + """ + Write the ItemEntity data to the Wikibase instance and return the ItemEntity object returned by the instance. + extend :func:`~wikibaseintegrator.entities.BaseEntity._write` + + :param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'. + :param summary: A summary of the edit + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param clear: Clear the existing entity before updating + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: an ItemEntity of the response from the instance + """ + json_data = super()._write(data=self.get_json(), **kwargs) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/lexeme.py b/wikibaseintegrator/entities/lexeme.py new file mode 100644 index 00000000..997fc53b --- /dev/null +++ b/wikibaseintegrator/entities/lexeme.py @@ -0,0 +1,130 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, Optional, Union + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.forms import Forms +from wikibaseintegrator.models.lemmas import Lemmas +from wikibaseintegrator.models.senses import Senses +from wikibaseintegrator.wbi_config import config + + +class LexemeEntity(BaseEntity): + ETYPE = 'lexeme' + + def __init__(self, lemmas: Lemmas = None, lexical_category: str = None, language: str = None, forms: Forms = None, senses: Senses = None, **kwargs: Any): + super().__init__(**kwargs) + + self.lemmas: Lemmas = lemmas or Lemmas() + self.lexical_category: Optional[str] = lexical_category + self.language: str = str(language or config['DEFAULT_LEXEME_LANGUAGE']) + self.forms: Forms = forms or Forms() + self.senses: Senses = senses or Senses() + + @property + def lemmas(self) -> Lemmas: + return self.__lemmas + + @lemmas.setter + def lemmas(self, lemmas: Lemmas): + if not isinstance(lemmas, Lemmas): + raise TypeError + self.__lemmas = lemmas + + @property + def lexical_category(self) -> Optional[str]: + return self.__lexical_category + + @lexical_category.setter + def lexical_category(self, lexical_category: Optional[str]): + self.__lexical_category = lexical_category + + @property + def language(self) -> str: + return self.__language + + @language.setter + def language(self, language: str): + self.__language = language + + @property + def forms(self) -> Forms: + return self.__forms + + @forms.setter + def forms(self, forms: Forms): + if not isinstance(forms, Forms): + raise TypeError + self.__forms = forms + + @property + def senses(self) -> Senses: + return self.__senses + + @senses.setter + def senses(self, senses: Senses): + if not isinstance(senses, Senses): + raise TypeError + self.__senses = senses + + def new(self, **kwargs: Any) -> LexemeEntity: + return LexemeEntity(api=self.api, **kwargs) + + def get(self, entity_id: Union[str, int], **kwargs: Any) -> LexemeEntity: + if isinstance(entity_id, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?L?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError(f"Invalid lexeme ID ({entity_id}), format must be 'L[0-9]+'") + + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Lexeme ID must be greater than 0") + + entity_id = f'L{entity_id}' + json_data = super()._get(entity_id=entity_id, **kwargs) + return LexemeEntity(api=self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_json(self) -> Dict[str, Union[str, Dict]]: + json_data: Dict = { + 'lemmas': self.lemmas.get_json(), + 'language': self.language, + 'forms': self.forms.get_json(), + 'senses': self.senses.get_json(), + **super().get_json() + } + + if self.lexical_category: + json_data['lexicalCategory'] = self.lexical_category + + return json_data + + def from_json(self, json_data: Dict[str, Any]) -> LexemeEntity: + super().from_json(json_data=json_data) + + self.lemmas = Lemmas().from_json(json_data['lemmas']) + self.lexical_category = str(json_data['lexicalCategory']) + self.language = str(json_data['language']) + self.forms = Forms().from_json(json_data['forms']) + self.senses = Senses().from_json(json_data['senses']) + + return self + + def write(self, **kwargs: Any) -> LexemeEntity: + """ + Write the LexemeEntity data to the Wikibase instance and return the LexemeEntity object returned by the instance. + + :param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'. + :param summary: A summary of the edit + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param clear: Clear the existing entity before updating + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: an LexemeEntity of the response from the instance + """ + json_data = super()._write(data=self.get_json(), **kwargs) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/mediainfo.py b/wikibaseintegrator/entities/mediainfo.py new file mode 100644 index 00000000..a0a5dbbf --- /dev/null +++ b/wikibaseintegrator/entities/mediainfo.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, List, Union + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models import Claims, LanguageValues +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels +from wikibaseintegrator.wbi_helpers import mediawiki_api_call_helper + + +class MediaInfoEntity(BaseEntity): + ETYPE = 'mediainfo' + + def __init__(self, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, **kwargs: Any) -> None: + """ + + :param api: + :param labels: + :param descriptions: + :param aliases: + :param sitelinks: + :param kwargs: + """ + super().__init__(**kwargs) + + # Item, Property and MediaInfo specific + self.labels: LanguageValues = labels or Labels() + self.descriptions: LanguageValues = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + @property + def labels(self) -> Labels: + return self.__labels + + @labels.setter + def labels(self, labels: Labels): + if not isinstance(labels, Labels): + raise TypeError + self.__labels = labels + + @property + def descriptions(self) -> Descriptions: + return self.__descriptions + + @descriptions.setter + def descriptions(self, descriptions: Descriptions): + if not isinstance(descriptions, Descriptions): + raise TypeError + self.__descriptions = descriptions + + @property + def aliases(self) -> Aliases: + return self.__aliases + + @aliases.setter + def aliases(self, aliases: Aliases): + if not isinstance(aliases, Aliases): + raise TypeError + self.__aliases = aliases + + def new(self, **kwargs: Any) -> MediaInfoEntity: + return MediaInfoEntity(api=self.api, **kwargs) + + def get(self, entity_id: Union[str, int], **kwargs: Any) -> MediaInfoEntity: + if isinstance(entity_id, str): + pattern = re.compile(r'^M?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError(f"Invalid MediaInfo ID ({entity_id}), format must be 'M[0-9]+'") + + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("MediaInfo ID must be greater than 0") + + entity_id = f'M{entity_id}' + json_data = super()._get(entity_id=entity_id, **kwargs) + return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_by_title(self, titles: Union[List[str], str], sites: str = 'commonswiki', **kwargs: Any) -> MediaInfoEntity: + if isinstance(titles, list): + titles = '|'.join(titles) + + params = { + 'action': 'wbgetentities', + 'sites': sites, + 'titles': titles, + 'format': 'json' + } + + json_data = mediawiki_api_call_helper(data=params, allow_anonymous=True, **kwargs) + + if len(json_data['entities'].keys()) == 0: + raise Exception('Title not found') + if len(json_data['entities'].keys()) > 1: + raise Exception('More than one element for this title') + + return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]]) + + def get_json(self) -> Dict[str, Union[str, Dict]]: + return { + 'labels': self.labels.get_json(), + 'descriptions': self.descriptions.get_json(), + **super().get_json() + } + + # if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements' + # json_data['statements'] = json_data.pop('claims') + + # if 'statements' in json_data: + # for prop_nr in json_data['statements']: + # for statement in json_data['statements'][prop_nr]: + # if 'mainsnak' in statement and 'datatype' in statement['mainsnak']: + # del statement['mainsnak']['datatype'] + + def from_json(self, json_data: Dict[str, Any]) -> MediaInfoEntity: + super().from_json(json_data=json_data) + + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + self.claims = Claims().from_json(json_data['statements']) + + return self + + def write(self, **kwargs: Any) -> MediaInfoEntity: + """ + Write the MediaInfoEntity data to the Wikibase instance and return the MediaInfoEntity object returned by the instance. + + :param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'. + :param summary: A summary of the edit + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param clear: Clear the existing entity before updating + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: an MediaInfoEntity of the response from the instance + """ + json_data = super()._write(data=self.get_json(), **kwargs) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/entities/property.py b/wikibaseintegrator/entities/property.py new file mode 100644 index 00000000..18c77d27 --- /dev/null +++ b/wikibaseintegrator/entities/property.py @@ -0,0 +1,126 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, Union + +from wikibaseintegrator.entities.baseentity import BaseEntity +from wikibaseintegrator.models.aliases import Aliases +from wikibaseintegrator.models.descriptions import Descriptions +from wikibaseintegrator.models.labels import Labels +from wikibaseintegrator.wbi_enums import WikibaseDatatype + + +class PropertyEntity(BaseEntity): + ETYPE = 'property' + + def __init__(self, datatype: Union[str, WikibaseDatatype, None] = None, labels: Labels = None, descriptions: Descriptions = None, aliases: Aliases = None, **kwargs: Any): + super().__init__(**kwargs) + + # Property specific + self.datatype = datatype + + # Item, Property and MediaInfo specific + self.labels: Labels = labels or Labels() + self.descriptions: Descriptions = descriptions or Descriptions() + self.aliases = aliases or Aliases() + + @property + def datatype(self) -> Union[str, WikibaseDatatype, None]: + return self.__datatype + + @datatype.setter + def datatype(self, value: Union[str, WikibaseDatatype, None]): + if isinstance(value, str): + self.__datatype: Union[str, WikibaseDatatype, None] = WikibaseDatatype(value) + else: + self.__datatype = value + + @property + def labels(self) -> Labels: + return self.__labels + + @labels.setter + def labels(self, labels: Labels): + if not isinstance(labels, Labels): + raise TypeError + self.__labels = labels + + @property + def descriptions(self) -> Descriptions: + return self.__descriptions + + @descriptions.setter + def descriptions(self, descriptions: Descriptions): + if not isinstance(descriptions, Descriptions): + raise TypeError + self.__descriptions = descriptions + + @property + def aliases(self) -> Aliases: + return self.__aliases + + @aliases.setter + def aliases(self, aliases: Aliases): + if not isinstance(aliases, Aliases): + raise TypeError + self.__aliases = aliases + + def new(self, **kwargs: Any) -> PropertyEntity: + return PropertyEntity(api=self.api, **kwargs) + + def get(self, entity_id: Union[str, int], **kwargs: Any) -> PropertyEntity: + if isinstance(entity_id, str): + pattern = re.compile(r'^(?:[a-zA-Z]+:)?P?([0-9]+)$') + matches = pattern.match(entity_id) + + if not matches: + raise ValueError(f"Invalid property ID ({entity_id}), format must be 'P[0-9]+'") + + entity_id = int(matches.group(1)) + + if entity_id < 1: + raise ValueError("Property ID must be greater than 0") + + entity_id = f'P{entity_id}' + json_data = super()._get(entity_id=entity_id, **kwargs) + return PropertyEntity(api=self.api).from_json(json_data=json_data['entities'][entity_id]) + + def get_json(self) -> Dict[str, Union[str, Any]]: + json = { + 'labels': self.labels.get_json(), + 'descriptions': self.descriptions.get_json(), + 'aliases': self.aliases.get_json(), + **super().get_json() + } + + if self.datatype and isinstance(self.datatype, WikibaseDatatype): + json.update({'datatype': self.datatype.value}) + + return json + + def from_json(self, json_data: Dict[str, Any]) -> PropertyEntity: + super().from_json(json_data=json_data) + + if 'datatype' in json_data: # TODO: 1.35 compatibility + self.datatype = json_data['datatype'] + self.labels = Labels().from_json(json_data['labels']) + self.descriptions = Descriptions().from_json(json_data['descriptions']) + self.aliases = Aliases().from_json(json_data['aliases']) + + return self + + def write(self, **kwargs: Any) -> PropertyEntity: + """ + Write the PropertyEntity data to the Wikibase instance and return the PropertyEntity object returned by the instance. + + :param data: The serialized object that is used as the data source. A newly created entity will be assigned an 'id'. + :param summary: A summary of the edit + :param login: A login instance + :param allow_anonymous: Force a check if the query can be anonymous or not + :param clear: Clear the existing entity before updating + :param is_bot: Add the bot flag to the query + :param kwargs: More arguments for Python requests + :return: an PropertyEntity of the response from the instance + """ + json_data = super()._write(data=self.get_json(), **kwargs) + return self.from_json(json_data=json_data) diff --git a/wikibaseintegrator/models/__init__.py b/wikibaseintegrator/models/__init__.py new file mode 100644 index 00000000..cff30bec --- /dev/null +++ b/wikibaseintegrator/models/__init__.py @@ -0,0 +1,13 @@ +from .aliases import Alias, Aliases +from .basemodel import BaseModel +from .claims import Claim, Claims +from .descriptions import Descriptions +from .forms import Form, Forms +from .labels import Labels +from .language_values import LanguageValue, LanguageValues +from .lemmas import Lemmas +from .qualifiers import Qualifiers +from .references import Reference, References +from .senses import Glosses, Sense, Senses +from .sitelinks import Sitelink, Sitelinks +from .snaks import Snak, Snaks diff --git a/wikibaseintegrator/models/aliases.py b/wikibaseintegrator/models/aliases.py new file mode 100644 index 00000000..20e599d0 --- /dev/null +++ b/wikibaseintegrator/models/aliases.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import Dict, List, Optional, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.language_values import LanguageValue +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists + + +class Aliases(BaseModel): + def __init__(self, language: str = None, value: str = None): + self.aliases: Dict[str, List[Alias]] = {} + + if language is not None: + self.set(language=language, values=value) + + @property + def aliases(self) -> Dict[str, List[Alias]]: + return self.__aliases + + @aliases.setter + def aliases(self, value: Dict[str, List[Alias]]): + self.__aliases = value + + def get(self, language: str = None) -> Optional[List[Alias]]: + if language is None: + # TODO: Don't return a list of list, just a list + return [item for sublist in self.aliases.values() for item in sublist] + + if language in self.aliases: + return self.aliases[language] + + return None + + def set(self, language: str = None, values: Union[str, List] = None, action_if_exists: ActionIfExists = ActionIfExists.APPEND_OR_REPLACE) -> Aliases: + language = str(language or config['DEFAULT_LANGUAGE']) + assert action_if_exists in ActionIfExists + + assert language is not None + + if language not in self.aliases: + self.aliases[language] = [] + + if values is None or values == '': + if action_if_exists != ActionIfExists.KEEP: + for alias in self.aliases[language]: + alias.remove() + return self + + if isinstance(values, str): + values = [values] + elif values is not None and not isinstance(values, list): + raise TypeError(f"value must be a str or list of strings, got '{type(values)}'") + + if action_if_exists == ActionIfExists.REPLACE_ALL: + aliases = [] + for value in values: + alias = Alias(language, value) + aliases.append(alias) + self.aliases[language] = aliases + else: + for value in values: + alias = Alias(language, value) + + if action_if_exists == ActionIfExists.APPEND_OR_REPLACE: + if alias not in self.aliases[language]: + self.aliases[language].append(alias) + elif action_if_exists == ActionIfExists.KEEP: + if not self.aliases[language]: + self.aliases[language].append(alias) + + return self + + def get_json(self) -> Dict[str, List]: + json_data: Dict[str, List] = {} + for language, aliases in self.aliases.items(): + if language not in json_data: + json_data[language] = [] + for alias in aliases: + json_data[language].append(alias.get_json()) + return json_data + + def from_json(self, json_data: Dict[str, List]) -> Aliases: + for language in json_data: + for alias in json_data[language]: + self.set(alias['language'], alias['value']) + + return self + + # def __contains__(self, item): + # all_aliases = [item for sublist in list(self.aliases.values()) for item in sublist] + # return item in list(map(lambda x: x.value, all_aliases)) + + +class Alias(LanguageValue): + pass diff --git a/wikibaseintegrator/models/basemodel.py b/wikibaseintegrator/models/basemodel.py new file mode 100644 index 00000000..abbb5d60 --- /dev/null +++ b/wikibaseintegrator/models/basemodel.py @@ -0,0 +1,8 @@ +class BaseModel: + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), + ) diff --git a/wikibaseintegrator/models/claims.py b/wikibaseintegrator/models/claims.py new file mode 100644 index 00000000..2049485c --- /dev/null +++ b/wikibaseintegrator/models/claims.py @@ -0,0 +1,366 @@ +from __future__ import annotations + +import copy +from typing import Any, Callable, Dict, List, Optional, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.qualifiers import Qualifiers +from wikibaseintegrator.models.references import Reference, References +from wikibaseintegrator.models.snaks import Snak, Snaks +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseRank + + +class Claims(BaseModel): + def __init__(self): + self.claims: Dict[str, List[Claim]] = {} + + @property + def claims(self) -> Dict[str, List[Claim]]: + return self.__claims + + @claims.setter + def claims(self, claims: Dict[str, List[Claim]]): + self.__claims = claims + + def get(self, property: str) -> List[Claim]: + return self.claims[property] + + def remove(self, property: str = None) -> None: + if property in self.claims: + for prop in self.claims[property]: + if prop.id: + prop.remove() + else: + self.claims[property].remove(prop) + if len(self.claims[property]) == 0: + del self.claims[property] + + def add(self, claims: Union[Claims, List[Claim], Claim], action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> Claims: + """ + + :param claims: A Claim, list of Claim or just a Claims object to add to this Claims object. + :param action_if_exists: Replace or append the statement. You can force an addition if the declaration already exists. + KEEP: The original claim will be kept and the new one will not be added (because there is already one with this property number) + APPEND_OR_REPLACE: The new claim will be added only if the new one is different (by comparing values) + FORCE_APPEND: The new claim will be added even if already exists + REPLACE_ALL: The new claim will replace the old one + :return: Return the updated Claims object. + """ + + if action_if_exists not in ActionIfExists: + raise ValueError(f'{action_if_exists} is not a valid action_if_exists value. Use the enum ActionIfExists') + + if isinstance(claims, Claim): + claims = [claims] + elif claims is None or ((not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims)): + raise TypeError("claims must be an instance of Claim or Claims or a list of Claim") + + # TODO: Don't replace if claim is the same + # This code is separated from the rest to avoid looping multiple over `self.claims`. + if action_if_exists == ActionIfExists.REPLACE_ALL: + for claim in claims: + if claim is not None: + assert isinstance(claim, Claim) + + property = claim.mainsnak.property_number + if property in self.claims: + for claim_to_remove in self.claims[property]: + if claim_to_remove not in claims: + claim_to_remove.remove() + + for claim in claims: + if claim is not None: + assert isinstance(claim, Claim) + property = claim.mainsnak.property_number + + if property not in self.claims: + self.claims[property] = [] + + if action_if_exists == ActionIfExists.KEEP: + if len(self.claims[property]) == 0: + self.claims[property].append(claim) + elif action_if_exists == ActionIfExists.FORCE_APPEND: + self.claims[property].append(claim) + elif action_if_exists == ActionIfExists.APPEND_OR_REPLACE: + if claim not in self.claims[property]: + self.claims[property].append(claim) + else: + # Force update the claim if already present + self.claims[property][self.claims[property].index(claim)].update(claim) + elif action_if_exists == ActionIfExists.REPLACE_ALL: + if claim not in self.claims[property]: + self.claims[property].append(claim) + + return self + + def from_json(self, json_data: Dict[str, Any]) -> Claims: + for property in json_data: + for claim in json_data[property]: + from wikibaseintegrator.datatypes import BaseDataType + if 'datatype' in claim['mainsnak']: + data_type = [x for x in BaseDataType.subclasses if x.DTYPE == claim['mainsnak']['datatype']][0] + else: + data_type = BaseDataType + self.add(claims=data_type().from_json(claim), action_if_exists=ActionIfExists.FORCE_APPEND) + + return self + + def get_json(self) -> Dict[str, List]: + json_data: Dict[str, List] = {} + for property, claims in self.claims.items(): + if property not in json_data: + json_data[property] = [] + for claim in claims: + if not claim.removed or claim.id: + json_data[property].append(claim.get_json()) + if len(json_data[property]) == 0: + del json_data[property] + return json_data + + def __len__(self): + return len(self.claims) + + def __iter__(self): + iterate = [] + for claim in self.claims.values(): + iterate.extend(claim) + return iter(iterate) + + +class Claim(BaseModel): + DTYPE = 'claim' + + def __init__(self, qualifiers: Qualifiers = None, rank: WikibaseRank = None, references: Union[References, List[Union[Claim, List[Claim]]]] = None) -> None: + """ + + :param qualifiers: + :param rank: + :param references: A References object, a list of Claim object or a list of list of Claim object + """ + self.mainsnak = Snak(datatype=self.DTYPE) + self.type = 'statement' + self.qualifiers = qualifiers or Qualifiers() + self.qualifiers_order = [] + self.id = None + self.rank = rank or WikibaseRank.NORMAL + self.removed = False + + self.references = References() + + if isinstance(references, References): + self.references = references + elif isinstance(references, list): + for ref_list in references: + ref = Reference() + if isinstance(ref_list, list): + snaks = Snaks() + for ref_claim in ref_list: + if isinstance(ref_claim, Claim): + snaks.add(Snak().from_json(ref_claim.get_json()['mainsnak'])) + else: + raise ValueError("The references must be a References object or a list of Claim object") + ref.snaks = snaks + elif isinstance(ref_list, Claim): + ref.snaks = Snaks().add(Snak().from_json(ref_list.get_json()['mainsnak'])) + elif isinstance(ref_list, Reference): + ref = ref_list + self.references.add(reference=ref) + elif references is not None: + raise ValueError("The references must be a References object or a list of Claim object") + + @property + def mainsnak(self) -> Snak: + return self.__mainsnak + + @mainsnak.setter + def mainsnak(self, value: Snak): + self.__mainsnak = value + + @property + def type(self) -> Union[str, Dict]: + return self.__type + + @type.setter + def type(self, value: Union[str, Dict]): + self.__type = value + + @property + def qualifiers(self) -> Qualifiers: + return self.__qualifiers + + @qualifiers.setter + def qualifiers(self, value: Qualifiers) -> None: + assert isinstance(value, (Qualifiers, list)) + self.__qualifiers: Qualifiers = Qualifiers().set(value) if isinstance(value, list) else value + + @property + def qualifiers_order(self) -> List[str]: + return self.__qualifiers_order + + @qualifiers_order.setter + def qualifiers_order(self, value: List[str]): + self.__qualifiers_order = value + + @property + def id(self) -> Optional[str]: + return self.__id + + @id.setter + def id(self, value: Optional[str]): + self.__id = value + + @property + def rank(self) -> WikibaseRank: + return self.__rank + + @rank.setter + def rank(self, value: WikibaseRank): + """Parse the rank. The enum thows an error if it is not one of the recognized values""" + self.__rank = WikibaseRank(value) + + @property + def references(self) -> References: + return self.__references + + @references.setter + def references(self, value: References): + self.__references = value + + @property + def removed(self) -> bool: + return self.__removed + + @removed.setter + def removed(self, value: bool): + self.__removed = value + + def remove(self, remove=True) -> None: + self.removed = remove + + def update(self, claim: Claim) -> None: + self.mainsnak = claim.mainsnak + self.qualifiers = claim.qualifiers + self.qualifiers_order = claim.qualifiers_order + self.rank = claim.rank + self.references = claim.references + + def from_json(self, json_data: Dict[str, Any]) -> Claim: + """ + + :param json_data: a JSON representation of a Claim + """ + self.mainsnak = Snak().from_json(json_data['mainsnak']) + self.type = str(json_data['type']) + if 'qualifiers' in json_data: + self.qualifiers = Qualifiers().from_json(json_data['qualifiers']) + if 'qualifiers-order' in json_data: + self.qualifiers_order = list(json_data['qualifiers-order']) + self.id = str(json_data['id']) + self.rank: WikibaseRank = WikibaseRank(json_data['rank']) + if 'references' in json_data: + self.references = References().from_json(json_data['references']) + + return self + + def get_json(self) -> Dict[str, Any]: + json_data: Dict[str, Union[str, List[Dict], List[str], Dict[str, str], Dict[str, List], None]] = { + 'mainsnak': self.mainsnak.get_json(), + 'type': self.type, + 'id': self.id, + 'rank': self.rank.value + } + # Remove id if it's a temporary one + if not self.id: + del json_data['id'] + if len(self.qualifiers) > 0: + json_data['qualifiers'] = self.qualifiers.get_json() + json_data['qualifiers-order'] = list(self.qualifiers_order) + if len(self.references) > 0: + json_data['references'] = self.references.get_json() + if self.removed: + if self.id: + json_data['remove'] = '' + return json_data + + def has_equal_qualifiers(self, other: Claim) -> bool: + # check if the qualifiers are equal with the 'other' object + self_qualifiers = copy.deepcopy(self.qualifiers) + other_qualifiers = copy.deepcopy(other.qualifiers) + + if len(self_qualifiers) != len(other_qualifiers): + return False + + for property_number in self_qualifiers.qualifiers: + if property_number not in other_qualifiers.qualifiers: + return False + + if len(self_qualifiers.qualifiers[property_number]) != len(other_qualifiers.qualifiers[property_number]): + return False + + flg = [False for _ in range(len(self_qualifiers.qualifiers[property_number]))] + for count, i in enumerate(self_qualifiers.qualifiers[property_number]): + for q in other_qualifiers: + if i == q: + flg[count] = True + if not all(flg): + return False + + return True + + # TODO: rewrite this? + def __contains__(self, item): + if isinstance(item, Claim): + return self == item + + if isinstance(item, str): + return self.mainsnak.datavalue == item + + return super().__contains__(item) + + def __eq__(self, other): + if isinstance(other, Claim): + return self.mainsnak.datavalue == other.mainsnak.datavalue and self.mainsnak.property_number == other.mainsnak.property_number and self.has_equal_qualifiers(other) + + if isinstance(other, str): + return self.mainsnak.property_number == other + + raise super().__eq__(other) + + def equals(self, that: Claim, include_ref: bool = False, fref: Callable = None) -> bool: + """ + Tests for equality of two statements. + If comparing references, the order of the arguments matters!!! + self is the current statement, the next argument is the new statement. + Allows passing in a function to use to compare the references 'fref'. Default is equality. + fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, + where each reference is a list of statements + """ + + if not include_ref: + # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers + return self == that + + if self != that: + return False + + if fref is None: + return Claim.refs_equal(self, that) + + return fref(self, that) + + @staticmethod + def refs_equal(olditem: Claim, newitem: Claim) -> bool: + """ + tests for exactly identical references + """ + + oldrefs = olditem.references + newrefs = newitem.references + + def ref_equal(oldref: References, newref: References) -> bool: + return (len(oldref) == len(newref)) and all(x in oldref for x in newref) + + return len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs) + + def get_sparql_value(self) -> str: + pass diff --git a/wikibaseintegrator/models/descriptions.py b/wikibaseintegrator/models/descriptions.py new file mode 100644 index 00000000..872e730c --- /dev/null +++ b/wikibaseintegrator/models/descriptions.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import Dict + +from wikibaseintegrator.models.language_values import LanguageValue, LanguageValues + + +class Descriptions(LanguageValues): + def from_json(self, json_data: Dict[str, Dict]) -> Descriptions: + """ + Create a new Descriptions object from a JSON/dict object. + + :param json_data: A dict object who use the same format as Wikibase. + :return: The newly created or updated object. + """ + for language_value in json_data: + self.add(language_value=LanguageValue(language=json_data[language_value]['language']).from_json(json_data=json_data[language_value])) + + return self diff --git a/wikibaseintegrator/models/forms.py b/wikibaseintegrator/models/forms.py new file mode 100644 index 00000000..eb90dbd7 --- /dev/null +++ b/wikibaseintegrator/models/forms.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.claims import Claims +from wikibaseintegrator.models.language_values import LanguageValues + + +class Forms(BaseModel): + def __init__(self): + self.forms: Dict[str, Form] = {} + + @property + def forms(self) -> Dict: + return self.__forms + + @forms.setter + def forms(self, value): + self.__forms = value + + def get(self, id: str) -> Form: + return self.forms[id] + + def add(self, form: Form) -> Forms: + self.forms[form.id] = form + + return self + + def from_json(self, json_data: List[Dict]) -> Forms: + for form in json_data: + self.add(form=Form().from_json(form)) + + return self + + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] + for _, form in self.forms.items(): + json_data.append(form.get_json()) + + return json_data + + +class Form(BaseModel): + def __init__(self, form_id: str = None, representations: Representations = None, grammatical_features: Union[str, int, List[str]] = None, claims: Claims = None): + self.id = form_id + self.representations: Representations = representations or LanguageValues() + self.grammatical_features = grammatical_features or [] + self.claims = claims or Claims() + + @property + def id(self): + return self.__id + + @id.setter + def id(self, value): + self.__id = value + + @property + def representations(self): + return self.__representations + + @representations.setter + def representations(self, value): + self.__representations = value + + @property + def grammatical_features(self): + return self.__grammatical_features + + @grammatical_features.setter + def grammatical_features(self, value: Union[str, int, List[str]]): + if not hasattr(self, '__grammatical_features') or value is None: + self.__grammatical_features = [] + + if isinstance(value, int): + self.__grammatical_features.append('Q' + str(value)) + elif isinstance(value, str): + self.__grammatical_features.append(value) + elif isinstance(value, list): + self.__grammatical_features = value + else: + raise TypeError(f"value must be a str, an int or a list of strings, got '{type(value)}'") + + @property + def claims(self): + return self.__claims + + @claims.setter + def claims(self, value): + self.__claims = value + + def from_json(self, json_data: Dict[str, Any]) -> Form: + self.id = json_data['id'] + self.representations = Representations().from_json(json_data['representations']) + self.grammatical_features = json_data['grammaticalFeatures'] + self.claims = Claims().from_json(json_data['claims']) + + return self + + def get_json(self) -> Dict[str, Union[str, Dict, List]]: + json_data: Dict[str, Union[str, Dict, List]] = { + 'id': self.id, + 'representations': self.representations.get_json(), + 'grammaticalFeatures': self.grammatical_features, + 'claims': self.claims.get_json() + } + + if self.id is None: + json_data['add'] = '' + del json_data['id'] + + return json_data + + +class Representations(LanguageValues): + pass diff --git a/wikibaseintegrator/models/labels.py b/wikibaseintegrator/models/labels.py new file mode 100644 index 00000000..6c186446 --- /dev/null +++ b/wikibaseintegrator/models/labels.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import Dict + +from wikibaseintegrator.models.language_values import LanguageValue, LanguageValues + + +class Labels(LanguageValues): + def from_json(self, json_data: Dict[str, Dict]) -> Labels: + """ + Create a new Labels object from a JSON/dict object. + + :param json_data: A dict object who use the same format as Wikibase. + :return: The newly created or updated object. + """ + for language_value in json_data: + self.add(language_value=LanguageValue(language=json_data[language_value]['language']).from_json(json_data=json_data[language_value])) + + return self diff --git a/wikibaseintegrator/models/language_values.py b/wikibaseintegrator/models/language_values.py new file mode 100644 index 00000000..b83dd2ea --- /dev/null +++ b/wikibaseintegrator/models/language_values.py @@ -0,0 +1,184 @@ +from __future__ import annotations + +from typing import Dict, Optional + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists + + +class LanguageValues(BaseModel): + def __init__(self): + self.values: Dict[str, LanguageValue] = {} + + @property + def values(self) -> Dict[str, LanguageValue]: + """ + A dict of LanguageValue with the language as key. + """ + return self.__values + + @values.setter + def values(self, value: Dict[str, LanguageValue]): + self.__values = value + + def add(self, language_value: LanguageValue) -> LanguageValues: + """ + Add a LanguageValue object to the list + + :param language_value: A LanguageValue object + :return: The current LanguageValues object + """ + assert isinstance(language_value, LanguageValue) + + if language_value.value: + self.values[language_value.language] = language_value + + return self + + def get(self, language: str = None) -> Optional[LanguageValue]: + """ + Get a LanguageValue object with the specified language. Use the default language in wbi_config if none specified. + + :param language: The requested language. + :return: The related LanguageValue object or None if none found. + """ + language = str(language or config['DEFAULT_LANGUAGE']) + if language in self.values: + return self.values[language] + + return None + + def set(self, language: str = None, value: str = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> Optional[LanguageValue]: + """ + Create or update the specified language with the valued passed in arguments. + + :param language: The desired language. + :param value: The desired value of the LanguageValue object. Use None to delete an existing LanguageValue object from the list. + :param action_if_exists: The action if the LanguageValue object is already defined. Can be ActionIfExists.REPLACE_ALL (default) or ActionIfExists.KEEP. + :return: The created or updated LanguageValue. None if there's no LanguageValue object with this language. + """ + language = str(language or config['DEFAULT_LANGUAGE']) + assert action_if_exists in [ActionIfExists.REPLACE_ALL, ActionIfExists.KEEP] + + # Remove value if None + if value is None: + if language in self.values: + self.values[language].remove() + return None + + if action_if_exists == ActionIfExists.REPLACE_ALL or self.get(language=language) is None: + language_value = LanguageValue(language, value) + self.add(language_value) + return language_value + + return self.get(language=language) + + def from_json(self, json_data: Dict[str, Dict]) -> LanguageValues: + """ + Create a new LanguageValues object from a JSON/dict object. + + :param json_data: A dict object who use the same format as Wikibase. + :return: The newly created or updated object. + """ + for language_value in json_data: + self.add(language_value=LanguageValue(language=json_data[language_value]['language']).from_json(json_data=json_data[language_value])) + + return self + + def get_json(self) -> Dict[str, Dict]: + """ + Get a formatted dict who respect the Wikibase format. + + :return: A dict using Wikibase format. + """ + json_data: Dict[str, Dict] = {} + for language, language_value in self.values.items(): + json_data[language] = language_value.get_json() + + return json_data + + def __contains__(self, language: str) -> bool: + return language in self.values + + def __iter__(self): + return iter(self.values.values()) + + +class LanguageValue(BaseModel): + def __init__(self, language: str, value: str = None): + self.language = language + self.value = value + self.removed = False + + @property + def language(self) -> str: + return self.__language + + @language.setter + def language(self, value: Optional[str]): + if value is None: + raise ValueError("language can't be None") + + if value == '': + raise ValueError("language can't be empty") + + if not isinstance(value, str): + raise ValueError("language must be a str") + + self.__language = value + + @property + def value(self) -> Optional[str]: + """ + The value of the LanguageValue instance. + :return: A string with the value of the LanguageValue instance. + """ + return self.__value + + @value.setter + def value(self, value: Optional[str]): + self.__value = value + + @property + def removed(self) -> bool: + return self.__removed + + @removed.setter + def removed(self, value: bool): + self.__removed = value + + def remove(self) -> LanguageValue: + self.removed = True + + return self + + def from_json(self, json_data: Dict[str, str]) -> LanguageValue: + self.language = json_data['language'] + self.value = json_data['value'] + + return self + + def get_json(self) -> Dict[str, Optional[str]]: + json_data = { + 'language': self.language, + 'value': self.value + } + if self.removed: + json_data['remove'] = '' + return json_data + + def __contains__(self, item): + return item in self.value + + def __eq__(self, other): + if isinstance(other, LanguageValue): + return self.value == other.value and self.language == other.language + + return self.value == other + + def __len__(self): + return len(self.value) + + def __str__(self): + return self.value diff --git a/wikibaseintegrator/models/lemmas.py b/wikibaseintegrator/models/lemmas.py new file mode 100644 index 00000000..01a9b268 --- /dev/null +++ b/wikibaseintegrator/models/lemmas.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from typing import Dict + +from wikibaseintegrator.models.language_values import LanguageValue, LanguageValues + + +class Lemmas(LanguageValues): + def from_json(self, json_data: Dict[str, Dict]) -> Lemmas: + """ + Create a new Lemmas object from a JSON/dict object. + + :param json_data: A dict object who use the same format as Wikibase. + :return: The newly created or updated object. + """ + for language_value in json_data: + self.add(language_value=LanguageValue(language=json_data[language_value]['language']).from_json(json_data=json_data[language_value])) + + return self diff --git a/wikibaseintegrator/models/qualifiers.py b/wikibaseintegrator/models/qualifiers.py new file mode 100644 index 00000000..dcd50670 --- /dev/null +++ b/wikibaseintegrator/models/qualifiers.py @@ -0,0 +1,81 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Dict, List, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.snaks import Snak +from wikibaseintegrator.wbi_enums import ActionIfExists + +if TYPE_CHECKING: + from wikibaseintegrator.models.claims import Claim + + +class Qualifiers(BaseModel): + def __init__(self): + self.qualifiers: Dict[str, List[Snak]] = {} + + @property + def qualifiers(self): + return self.__qualifiers + + @qualifiers.setter + def qualifiers(self, value): + assert isinstance(value, dict) + self.__qualifiers = value + + def set(self, qualifiers: Union[Qualifiers, List[Union[Snak, Claim]], None]) -> Qualifiers: + if isinstance(qualifiers, list): + for qualifier in qualifiers: + self.add(qualifier) + elif qualifiers is None: + self.qualifiers = {} + else: + self.qualifiers = qualifiers + + return self + + def get(self, property: str) -> List[Snak]: + return self.qualifiers[property] + + # TODO: implement action_if_exists + def add(self, qualifier: Union[Snak, Claim], action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> Qualifiers: + from wikibaseintegrator.models.claims import Claim + if isinstance(qualifier, Claim): + qualifier = Snak().from_json(qualifier.get_json()['mainsnak']) + + if qualifier is not None: + assert isinstance(qualifier, Snak) + + property = qualifier.property_number + + if property not in self.qualifiers: + self.qualifiers[property] = [] + + self.qualifiers[property].append(qualifier) + + return self + + def from_json(self, json_data: Dict[str, List]) -> Qualifiers: + for property in json_data: + for snak in json_data[property]: + self.add(qualifier=Snak().from_json(snak)) + return self + + def get_json(self) -> Dict[str, List]: + json_data: Dict[str, List] = {} + for property in self.qualifiers: + if property not in json_data: + json_data[property] = [] + + for qualifier in self.qualifiers[property]: + json_data[property].append(qualifier.get_json()) + return json_data + + def __iter__(self): + iterate = [] + for qualifier in self.qualifiers.values(): + iterate.extend(qualifier) + return iter(iterate) + + def __len__(self): + return len(self.qualifiers) diff --git a/wikibaseintegrator/models/references.py b/wikibaseintegrator/models/references.py new file mode 100644 index 00000000..3869c6b5 --- /dev/null +++ b/wikibaseintegrator/models/references.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.snaks import Snak, Snaks +from wikibaseintegrator.wbi_enums import ActionIfExists + +if TYPE_CHECKING: + from wikibaseintegrator.models.claims import Claim + + +class References(BaseModel): + def __init__(self): + self.references: List[Reference] = [] + + @property + def references(self) -> List[Reference]: + return self.__references + + @references.setter + def references(self, value: List[Reference]): + self.__references = value + + def get(self, hash: str = None) -> Optional[Reference]: + for reference in self.references: + if reference.hash == hash: + return reference + return None + + # TODO: implement action_if_exists + def add(self, reference: Union[Reference, Claim] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> References: + from wikibaseintegrator.models.claims import Claim + if isinstance(reference, Claim): + reference = Reference(snaks=Snaks().add(Snak().from_json(reference.get_json()['mainsnak']))) + + if reference is not None: + assert isinstance(reference, Reference) + + if reference not in self.references: + self.references.append(reference) + + return self + + def from_json(self, json_data: List[Dict]) -> References: + for reference in json_data: + self.add(reference=Reference().from_json(reference)) + + return self + + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] + for reference in self.references: + json_data.append(reference.get_json()) + return json_data + + def remove(self, reference_to_remove: Union[Claim, Reference]) -> bool: + from wikibaseintegrator.models.claims import Claim + if isinstance(reference_to_remove, Claim): + reference_to_remove = Reference(snaks=Snaks().add(Snak().from_json(reference_to_remove.get_json()['mainsnak']))) + + assert isinstance(reference_to_remove, Reference) + + for reference in self.references: + if reference == reference_to_remove: + self.references.remove(reference) + return True + + return False + + def clear(self) -> References: + self.references = [] + return self + + def __iter__(self): + return iter(self.references) + + def __len__(self): + return len(self.references) + + +class Reference(BaseModel): + def __init__(self, snaks: Snaks = None, snaks_order: List = None): + self.hash = None + self.snaks = snaks or Snaks() + self.snaks_order = snaks_order or [] + + @property + def hash(self): + return self.__hash + + @hash.setter + def hash(self, value): + self.__hash = value + + @property + def snaks(self): + return self.__snaks + + @snaks.setter + def snaks(self, value): + self.__snaks = value + + @property + def snaks_order(self): + return self.__snaks_order + + @snaks_order.setter + def snaks_order(self, value): + self.__snaks_order = value + + # TODO: implement action_if_exists + def add(self, snak: Union[Snak, Claim] = None, action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> Reference: + from wikibaseintegrator.models.claims import Claim + if isinstance(snak, Claim): + snak = Snak().from_json(snak.get_json()['mainsnak']) + + if snak is not None: + assert isinstance(snak, Snak) + + self.snaks.add(snak) + + return self + + def from_json(self, json_data: Dict[str, Any]) -> Reference: + self.hash = json_data['hash'] + self.snaks = Snaks().from_json(json_data['snaks']) + self.snaks_order = json_data['snaks-order'] + + return self + + def get_json(self) -> Dict[str, Union[Dict, List]]: + json_data: Dict[str, Union[Dict, List]] = { + 'snaks': self.snaks.get_json(), + 'snaks-order': self.snaks_order + } + return json_data + + def __iter__(self): + return iter(self.snaks) + + def __len__(self): + return len(self.snaks) diff --git a/wikibaseintegrator/models/senses.py b/wikibaseintegrator/models/senses.py new file mode 100644 index 00000000..5a8cbb0a --- /dev/null +++ b/wikibaseintegrator/models/senses.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Optional, Union + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.models.claims import Claims +from wikibaseintegrator.models.language_values import LanguageValues +from wikibaseintegrator.wbi_enums import ActionIfExists + + +class Senses(BaseModel): + def __init__(self): + self.senses: List[Sense] = [] + + def get(self, id: str) -> Optional[Sense]: + for sense in self.senses: + if sense.id == id: + return sense + return None + + # TODO: implement action_if_exists + def add(self, sense: Sense, action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL) -> Senses: + self.senses.append(sense) + + return self + + def from_json(self, json_data: List[Dict]) -> Senses: + for sense in json_data: + self.add(sense=Sense().from_json(sense)) + + return self + + def get_json(self) -> List[Dict]: + json_data: List[Dict] = [] + for sense in self.senses: + json_data.append(sense.get_json()) + + return json_data + + +class Sense(BaseModel): + def __init__(self, sense_id: str = None, glosses: Glosses = None, claims: Claims = None): + self.id = sense_id + self.glosses: LanguageValues = glosses or Glosses() + self.claims = claims or Claims() + self.removed = False + + def from_json(self, json_data: Dict[str, Any]) -> Sense: + self.id = json_data['id'] + self.glosses = Glosses().from_json(json_data['glosses']) + self.claims = Claims().from_json(json_data['claims']) + + return self + + def get_json(self) -> Dict[str, Union[str, Dict]]: + json_data: Dict[str, Union[str, Dict]] = { + 'id': str(self.id), + 'glosses': self.glosses.get_json(), + 'claims': self.claims.get_json() + } + + if self.id is None: + json_data['add'] = '' + del json_data['id'] + + if self.removed: + json_data['remove'] = '' + + return json_data + + def remove(self) -> Sense: + self.removed = True + return self + + +class Glosses(LanguageValues): + pass diff --git a/wikibaseintegrator/models/sitelinks.py b/wikibaseintegrator/models/sitelinks.py new file mode 100644 index 00000000..e833c44c --- /dev/null +++ b/wikibaseintegrator/models/sitelinks.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +from typing import Dict, List, Optional + +from wikibaseintegrator.models.basemodel import BaseModel + + +class Sitelinks(BaseModel): + def __init__(self): + self.sitelinks: Dict[str, Sitelink] = {} + + def get(self, site: str = None) -> Optional[Sitelink]: + if site in self.sitelinks: + return self.sitelinks[site] + + return None + + def set(self, site: str, title: str = None, badges: List[str] = None) -> Sitelink: + sitelink = Sitelink(site, title, badges) + self.sitelinks[site] = sitelink + return sitelink + + def from_json(self, json_data: Dict[str, Dict]) -> Sitelinks: + for sitelink in json_data: + self.set(site=json_data[sitelink]['site'], title=json_data[sitelink]['title'], badges=json_data[sitelink]['badges']) + + return self + + +class Sitelink(BaseModel): + def __init__(self, site: str = None, title: str = None, badges: List[str] = None): + self.site = site + self.title = title + self.badges: List[str] = badges or [] + + def __str__(self): + return self.title diff --git a/wikibaseintegrator/models/snaks.py b/wikibaseintegrator/models/snaks.py new file mode 100644 index 00000000..eceaaef8 --- /dev/null +++ b/wikibaseintegrator/models/snaks.py @@ -0,0 +1,144 @@ +from __future__ import annotations + +import re +from typing import Any, Dict, List + +from wikibaseintegrator.models.basemodel import BaseModel +from wikibaseintegrator.wbi_enums import WikibaseSnakType + + +class Snaks(BaseModel): + def __init__(self): + self.snaks: Dict[str, List[Snak]] = {} + + def get(self, property: str) -> List[Snak]: + return self.snaks[property] + + def add(self, snak: Snak) -> Snaks: + property = snak.property_number + + if property not in self.snaks: + self.snaks[property] = [] + + self.snaks[property].append(snak) + + return self + + def from_json(self, json_data: Dict[str, List]) -> Snaks: + for property in json_data: + for snak in json_data[property]: + self.add(snak=Snak().from_json(snak)) + + return self + + def get_json(self) -> Dict[str, List]: + json_data: Dict[str, List] = {} + for property, snaks in self.snaks.items(): + if property not in json_data: + json_data[property] = [] + for snak in snaks: + json_data[property].append(snak.get_json()) + return json_data + + def __iter__(self): + iterate = [] + for snak in self.snaks.values(): + iterate.extend(snak) + return iter(iterate) + + def __len__(self): + return len(self.snaks) + + +class Snak(BaseModel): + def __init__(self, snaktype: WikibaseSnakType = WikibaseSnakType.KNOWN_VALUE, property_number: str = None, hash: str = None, datavalue: Dict = None, datatype: str = None): + self.snaktype = snaktype + self.property_number = property_number + self.hash = hash + self.datavalue = datavalue or {} + self.datatype = datatype + + @property + def snaktype(self): + return self.__snaktype + + @snaktype.setter + def snaktype(self, value: WikibaseSnakType): + """Parse the snaktype. The enum thows an error if it is not one of the recognized values""" + self.__snaktype = WikibaseSnakType(value) + + @property + def property_number(self): + return self.__property_number + + @property_number.setter + def property_number(self, value): + if isinstance(value, int): + self.__property_number = 'P' + str(value) + elif value is not None: + pattern = re.compile(r'^P?([0-9]+)$') + matches = pattern.match(value) + + if not matches: + raise ValueError('Invalid property_number, format must be "P[0-9]+"') + + self.__property_number = 'P' + str(matches.group(1)) + else: + self.__property_number = value + + @property + def hash(self): + return self.__hash + + @hash.setter + def hash(self, value): + self.__hash = value + + @property + def datavalue(self): + return self.__datavalue + + @datavalue.setter + def datavalue(self, value): + if value is not None: + self.snaktype = WikibaseSnakType.KNOWN_VALUE + self.__datavalue = value + + @property + def datatype(self): + return self.__datatype + + @datatype.setter + def datatype(self, value): + self.__datatype = value + + def from_json(self, json_data: Dict[str, Any]) -> Snak: + self.snaktype: WikibaseSnakType = WikibaseSnakType(json_data['snaktype']) + self.property_number = json_data['property'] + if 'hash' in json_data: + self.hash = json_data['hash'] + if 'datavalue' in json_data: + self.datavalue = json_data['datavalue'] + if 'datatype' in json_data: # datatype can be null with MediaInfo + self.datatype = json_data['datatype'] + return self + + def get_json(self) -> Dict[str, str]: + json_data = { + 'snaktype': self.snaktype.value, + 'property': self.property_number, + 'datatype': self.datatype, + 'datavalue': self.datavalue + } + + if self.snaktype in [WikibaseSnakType.NO_VALUE, WikibaseSnakType.UNKNOWN_VALUE]: + del json_data['datavalue'] + + # datatype can be null with MediaInfo + if not self.datatype: + del json_data['datatype'] + + return json_data + + def __eq__(self, other): + return self.snaktype == other.snaktype and self.property_number == other.property_number and self.datatype == other.datatype and self.datavalue == other.datavalue diff --git a/wikibaseintegrator/wbi_backoff.py b/wikibaseintegrator/wbi_backoff.py index 9e52cde4..ebd3350a 100644 --- a/wikibaseintegrator/wbi_backoff.py +++ b/wikibaseintegrator/wbi_backoff.py @@ -1,37 +1,33 @@ +""" +WikibaseIntegrator implementation of backoff python library. +""" +import logging import sys from functools import partial +from json import JSONDecodeError import backoff import requests -import simplejson as json from wikibaseintegrator.wbi_config import config -JSONDecodeError = json.JSONDecodeError - -def get_config(name): - return partial(config.get, name) - - -def backoff_hdlr(details): +def wbi_backoff_backoff_hdlr(details): exc_type, exc_value, _ = sys.exc_info() if exc_type == JSONDecodeError: - print(exc_value.doc) # pragma: no cover - print("Backing off {wait:0.1f} seconds afters {tries} tries calling function with args {args} and kwargs {kwargs}".format(**details)) + logging.error(exc_value.doc) # pragma: no cover + logging.error("Backing off %0.1f seconds afters %s tries calling function with args %r and kwargs %r", details['wait'], details['tries'], details['args'], details['kwargs']) -def check_json_decode_error(e): +def wbi_backoff_check_json_decode_error(e) -> bool: """ Check if the error message is "Expecting value: line 1 column 1 (char 0)" if not, its a real error and we shouldn't retry - :param e: - :return: """ - return type(e) == JSONDecodeError and str(e) != "Expecting value: line 1 column 1 (char 0)" + return isinstance(e, JSONDecodeError) and str(e) != "Expecting value: line 1 column 1 (char 0)" -exceptions = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError) +wbi_backoff_exceptions = (requests.exceptions.Timeout, requests.exceptions.ConnectionError, requests.HTTPError, JSONDecodeError) -wbi_backoff = partial(backoff.on_exception, backoff.expo, exceptions, max_value=get_config("BACKOFF_MAX_VALUE"), giveup=check_json_decode_error, on_backoff=backoff_hdlr, - jitter=None, max_tries=get_config("BACKOFF_MAX_TRIES")) +wbi_backoff = partial(backoff.on_exception, backoff.expo, wbi_backoff_exceptions, max_value=partial(config.get, 'BACKOFF_MAX_VALUE'), giveup=wbi_backoff_check_json_decode_error, + on_backoff=wbi_backoff_backoff_hdlr, jitter=None, max_tries=partial(config.get, 'BACKOFF_MAX_TRIES')) diff --git a/wikibaseintegrator/wbi_config.py b/wikibaseintegrator/wbi_config.py index 42562622..cbfd458b 100644 --- a/wikibaseintegrator/wbi_config.py +++ b/wikibaseintegrator/wbi_config.py @@ -1,10 +1,3 @@ -import pkg_resources - -try: - __version__ = pkg_resources.get_distribution('wikibaseintegrator').version -except pkg_resources.DistributionNotFound as e: # pragma: no cover - __version__ = 'dev' - """ Config global options Options can be changed at run time. See tests/test_backoff.py for usage example @@ -15,15 +8,16 @@ To disable retry, set value to 1 BACKOFF_MAX_VALUE: maximum number of seconds to wait before retrying. wait time will increase to this number Default: 3600 (one hour) -USER_AGENT_DEFAULT: default user agent string used for http requests. Both to Wikibase api, query service and others. - See: https://meta.wikimedia.org/wiki/User-Agent_policy +USER_AGENT: Complementary user agent string used for http requests. Both to Wikibase api, query service and others. + See: https://meta.wikimedia.org/wiki/User-Agent_policy """ -config = { - 'BACKOFF_MAX_TRIES': None, +from typing import Dict, Union + +config: Dict[str, Union[str, int, None, bool]] = { + 'BACKOFF_MAX_TRIES': 5, 'BACKOFF_MAX_VALUE': 3600, - 'USER_AGENT_DEFAULT': f"WikibaseIntegrator/{__version__} (https://github.com/LeMyst/WikibaseIntegrator)", - 'MAXLAG': 5, + 'USER_AGENT': None, 'PROPERTY_CONSTRAINT_PID': 'P2302', 'DISTINCT_VALUES_CONSTRAINT_QID': 'Q21502410', 'COORDINATE_GLOBE_QID': 'http://www.wikidata.org/entity/Q2', @@ -33,5 +27,6 @@ 'MEDIAWIKI_REST_URL': 'https://www.wikidata.org/w/rest.php', 'SPARQL_ENDPOINT_URL': 'https://query.wikidata.org/sparql', 'WIKIBASE_URL': 'http://www.wikidata.org', - 'DEFAULT_LANGUAGE': 'en' + 'DEFAULT_LANGUAGE': 'en', + 'DEFAULT_LEXEME_LANGUAGE': 'Q1860' } diff --git a/wikibaseintegrator/wbi_core.py b/wikibaseintegrator/wbi_core.py deleted file mode 100644 index 85063d0d..00000000 --- a/wikibaseintegrator/wbi_core.py +++ /dev/null @@ -1,929 +0,0 @@ -import copy -import json -from collections import defaultdict - -from wikibaseintegrator import wbi_functions -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_datatype import BaseDataType -from wikibaseintegrator.wbi_exceptions import (IDMissingError, SearchError, SearchOnlyError, NonUniqueLabelDescriptionPairError, MWApiError, CorePropIntegrityException, - ManualInterventionReqException) -from wikibaseintegrator.wbi_fastrun import FastRunContainer - - -class ItemEngine: - fast_run_store = [] - distinct_value_props = {} - - def __init__(self, item_id='', new_item=False, data=None, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, fast_run=False, fast_run_base_filter=None, - fast_run_use_refs=False, ref_handler=None, global_ref_mode='KEEP_GOOD', good_refs=None, keep_good_ref_statements=False, search_only=False, item_data=None, - user_agent=None, core_props=None, core_prop_match_thresh=0.66, property_constraint_pid=None, distinct_values_constraint_qid=None, fast_run_case_insensitive=False, - debug=False) -> None: - """ - constructor - :param item_id: Wikibase item id - :type item_id: str - :param new_item: This parameter lets the user indicate if a new item should be created - :type new_item: bool - :param data: a dictionary with property strings as keys and the data which should be written to a item as the property values - :type data: list[BaseDataType] or BaseDataType or None - :param mediawiki_api_url: - :type mediawiki_api_url: str - :param sparql_endpoint_url: - :type sparql_endpoint_url: str - :param wikibase_url: - :type wikibase_url: str - :param fast_run: True if this item should be run in fastrun mode, otherwise False. User setting this to True should also specify the - fast_run_base_filter for these item types - :type fast_run: bool - :param fast_run_base_filter: A property value dict determining the Wikibase property and the corresponding value which should be used as a filter for - this item type. Several filter criteria can be specified. The values can be either Wikibase item QIDs, strings or empty strings if the value should - be a variable in SPARQL. - Example: {'P352': '', 'P703': 'Q15978631'} if the basic common type of things this bot runs on is human proteins (specified by Uniprot IDs (P352) - and 'found in taxon' homo sapiens 'Q15978631'). - :type fast_run_base_filter: dict - :param fast_run_use_refs: If `True`, fastrun mode will consider references in determining if a statement should be updated and written to Wikibase. - Otherwise, only the value and qualifiers are used. Default: False - :type fast_run_use_refs: bool - :param ref_handler: This parameter defines a function that will manage the reference handling in a custom manner. This argument should be a function - handle that accepts two arguments, the old/current statement (first argument) and new/proposed/to be written statement (second argument), both of - type: a subclass of BaseDataType. The function should return an new item that is the item to be written. The item's values properties or qualifiers - should not be modified; only references. This function is also used in fastrun mode. This will only be used if the ref_mode is set to "CUSTOM". - :type ref_handler: function - :param global_ref_mode: sets the reference handling mode for an item. Four modes are possible, 'STRICT_KEEP' keeps all references as they are, - 'STRICT_KEEP_APPEND' keeps the references as they are and appends new ones. 'STRICT_OVERWRITE' overwrites all existing references for given. - 'KEEP_GOOD' will use the refs defined in good_refs. 'CUSTOM' will use the function defined in ref_handler - :type global_ref_mode: str - :param good_refs: This parameter lets the user define blocks of good references. It is a list of dictionaries. One block is a dictionary with Wikidata - properties as keys and potential values as the required value for a property. There can be arbitrarily many key: value pairs in one reference block. - Example: [{'P248': 'Q905695', 'P352': None, 'P407': None, 'P1476': None, 'P813': None}] This example contains one good reference block, stated in: - Uniprot, Uniprot ID, title of Uniprot entry, language of work and date when the information has been retrieved. A None type indicates that the value - varies from reference to reference. In this case, only the value for the Wikidata item for the Uniprot database stays stable over all of these - references. Key value pairs work here, as Wikidata references can hold only one value for one property. The number of good reference blocks is not - limited. This parameter OVERRIDES any other reference mode set!! - :type good_refs: list[dict] - :param keep_good_ref_statements: Do not delete any statement which has a good reference, either defined in the good_refs list or by any other - referencing mode. - :type keep_good_ref_statements: bool - :param search_only: If this flag is set to True, the data provided will only be used to search for the corresponding Wikibase item, but no actual data - updates will performed. This is useful, if certain states or values on the target item need to be checked before certain data is written to it. In - order to write new data to the item, the method update() will take data, modify the Wikibase item and a write() call will then perform the actual - write to the Wikibase instance. - :type search_only: bool - :param item_data: A Python JSON object corresponding to the item in item_id. This can be used in conjunction with item_id in order to provide raw data. - :type item_data: - :param user_agent: The user agent string to use when making http requests - :type user_agent: str - :param core_props: Core properties are used to retrieve an item based on `data` if a `item_id` is not given. This is a set of PIDs to use. If None, - all Wikibase properties with a distinct values constraint will be used. (see: get_core_props) - :type core_props: set - :param core_prop_match_thresh: The proportion of core props that must match during retrieval of an item when the item_id is not specified. - :type core_prop_match_thresh: float - :param property_constraint_pid: - :param distinct_values_constraint_qid: - :param fast_run_case_insensitive: - :param debug: Enable debug output. - :type debug: boolean - """ - - self.core_prop_match_thresh = core_prop_match_thresh - self.item_id = item_id - self.new_item = new_item - self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url - self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - self.property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid - self.distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid - if data is None: - self.data = [] - elif isinstance(data, list) and all(isinstance(x, BaseDataType) for x in data): - self.data = data - elif isinstance(data, BaseDataType): - self.data = [data] - else: - raise TypeError("`data` must be a list of BaseDataType or an instance of BaseDataType") - self.fast_run = fast_run - self.fast_run_base_filter = fast_run_base_filter - self.fast_run_use_refs = fast_run_use_refs - self.fast_run_case_insensitive = fast_run_case_insensitive - self.ref_handler = ref_handler - self.global_ref_mode = global_ref_mode - self.good_refs = good_refs - self.keep_good_ref_statements = keep_good_ref_statements - self.search_only = search_only - self.item_data = item_data - self.user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - self.create_new_item = False - self.json_representation = {} - self.statements = [] - self.original_statements = [] - self.entity_metadata = {} - self.fast_run_container = None - if self.search_only: - self.require_write = False - else: - self.require_write = True - self.sitelinks = {} - self.lastrevid = None # stores last revisionid after a write occurs - - self.debug = debug - - if fast_run_case_insensitive and not self.search_only: - raise ValueError("If using fast run case insensitive, search_only must be set") - - if self.ref_handler and not callable(self.ref_handler): - raise TypeError("ref_handler must be callable") - if self.global_ref_mode == 'CUSTOM' and self.ref_handler is None: - raise ValueError("If using a custom ref mode, ref_handler must be set") - - if (core_props is None) and (self.sparql_endpoint_url not in ItemEngine.distinct_value_props): - ItemEngine.distinct_value_props[self.sparql_endpoint_url] = wbi_functions.get_distinct_value_props(self.sparql_endpoint_url, - self.wikibase_url, - self.property_constraint_pid, - self.distinct_values_constraint_qid) - self.core_props = core_props if core_props is not None else ItemEngine.distinct_value_props[self.sparql_endpoint_url] - - if self.fast_run: - self.init_fastrun() - if self.debug: - if self.require_write: - if self.search_only: - print("Successful fastrun, search_only mode, we can't determine if data is up to date.") - else: - print("Successful fastrun, because no full data match you need to update the item.") - else: - print("Successful fastrun, no write to Wikibase instance required.") - - if self.item_id != '' and self.create_new_item: - raise IDMissingError("Cannot create a new item, when an identifier is given.") - elif self.new_item and len(self.data) > 0: - self.create_new_item = True - self.__construct_claim_json() - elif self.require_write or self.search_only: - self.init_data_load() - - def init_data_load(self): - if self.item_id and self.item_data: - if self.debug: - print("Load item " + self.item_id + " from item_data") - self.json_representation = self.parse_json(self.item_data) - elif self.item_id: - if self.debug: - print("Load item " + self.item_id + " from MW API from item_id") - self.json_representation = self.get_entity() - else: - if self.debug: - print("Try to guess item QID from props") - qids_by_props = '' - try: - qids_by_props = self.__select_item() - except SearchError as e: - print("ERROR init_data_load: " + str(e)) - - if qids_by_props: - self.item_id = qids_by_props - if self.debug: - print("Item ID guessed is " + self.item_id) - print("Load item " + self.item_id + " from MW API") - self.json_representation = self.get_entity() - self.__check_integrity() - - if not self.search_only: - self.__construct_claim_json() - else: - self.data = [] - - def init_fastrun(self): - # We search if we already have a FastRunContainer with the same parameters to re-use it - for c in ItemEngine.fast_run_store: - if (c.base_filter == self.fast_run_base_filter) and (c.use_refs == self.fast_run_use_refs) and (c.sparql_endpoint_url == self.sparql_endpoint_url): - self.fast_run_container = c - self.fast_run_container.ref_handler = self.ref_handler - self.fast_run_container.current_qid = '' - self.fast_run_container.base_data_type = BaseDataType - self.fast_run_container.engine = self.__class__ - self.fast_run_container.mediawiki_api_url = self.mediawiki_api_url - self.fast_run_container.wikibase_url = self.wikibase_url - self.fast_run_container.debug = self.debug - if self.debug: - print("Found an already existing FastRunContainer") - - if not self.fast_run_container: - self.fast_run_container = FastRunContainer(base_filter=self.fast_run_base_filter, - base_data_type=BaseDataType, - engine=self.__class__, - sparql_endpoint_url=self.sparql_endpoint_url, - mediawiki_api_url=self.mediawiki_api_url, - wikibase_url=self.wikibase_url, - use_refs=self.fast_run_use_refs, - ref_handler=self.ref_handler, - case_insensitive=self.fast_run_case_insensitive, - debug=self.debug) - ItemEngine.fast_run_store.append(self.fast_run_container) - - if not self.search_only: - self.require_write = self.fast_run_container.write_required(self.data, cqid=self.item_id) - # set item id based on fast run data - if not self.require_write and not self.item_id: - self.item_id = self.fast_run_container.current_qid - else: - self.fast_run_container.load_item(self.data) - # set item id based on fast run data - if not self.item_id: - self.item_id = self.fast_run_container.current_qid - - def parse_json(self, json_data): - """ - Parses an entity json and generates the datatype objects, sets self.json_representation - :param json_data: the json of an entity - :type json_data: A Python Json representation of an item - :return: returns the json representation containing 'labels', 'descriptions', 'claims', 'aliases', 'sitelinks'. - """ - - data = {x: json_data[x] for x in ('labels', 'descriptions', 'claims', 'aliases') if x in json_data} - data['sitelinks'] = {} - self.entity_metadata = {x: json_data[x] for x in json_data if x not in ('labels', 'descriptions', 'claims', 'aliases', 'sitelinks')} - self.sitelinks = json_data.get('sitelinks', {}) - - self.statements = [] - for prop in data['claims']: - for z in data['claims'][prop]: - data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == z['mainsnak']['datatype']][0] - statement = data_type.from_json(z) - self.statements.append(statement) - - self.json_representation = data - self.original_statements = copy.deepcopy(self.statements) - - return data - - def update(self, data): - """ - This method takes data, and modifies the Wikidata item. This works together with the data already provided via the constructor or if the constructor is - being instantiated with search_only=True. In the latter case, this allows for checking the item data before deciding which new data should be written to - the Wikidata item. The actual write to Wikidata only happens on calling of the write() method. If data has been provided already via the constructor, - data provided via the update() method will be appended to these data. - :param data: A list of Wikidata statement items inheriting from BaseDataType - :type data: list - """ - - if self.search_only: - raise SearchOnlyError - - assert type(data) == list - - self.data.extend(data) - self.statements = copy.deepcopy(self.original_statements) - - if self.debug: - print(self.data) - - if self.fast_run: - self.init_fastrun() - - if self.require_write and self.fast_run: - self.init_data_load() - self.__construct_claim_json() - self.__check_integrity() - elif not self.fast_run: - self.__construct_claim_json() - self.__check_integrity() - - def get_entity(self): - """ - retrieve an item in json representation from the Wikibase instance - :rtype: dict - :return: python complex dictionary representation of a json - """ - - params = { - 'action': 'wbgetentities', - 'sites': 'enwiki', - 'ids': self.item_id, - 'format': 'json' - } - - json_data = wbi_functions.mediawiki_api_call_helper(data=params, allow_anonymous=True) - return self.parse_json(json_data=json_data['entities'][self.item_id]) - - def get_property_list(self): - """ - List of properties on the current item - :return: a list of property ID strings (Pxxxx). - """ - - property_list = set() - for x in self.statements: - property_list.add(x.get_prop_nr()) - - return list(property_list) - - def get_json_representation(self): - """ - A method to access the internal json representation of the item, mainly for testing - :return: returns a Python json representation object of the item at the current state of the instance - """ - - return self.json_representation - - def get_label(self, lang=None): - """ - Returns the label for a certain language - :param lang: - :type lang: str - :return: returns the label in the specified language, an empty string if the label does not exist - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'label'))[0] - try: - return self.json_representation['labels'][lang]['value'] - except KeyError: - return '' - - def set_label(self, label, lang=None, if_exists='REPLACE'): - """ - Set the label for an item in a certain language - :param label: The label of the item in a certain language or None to remove the label in that language - :type label: str or None - :param lang: The language a label should be set for. - :type lang: str - :param if_exists: If a label already exist, 'REPLACE' it or 'KEEP' it - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if if_exists not in ('KEEP', 'REPLACE'): - raise ValueError(f"{if_exists} is not a valid value for if_exists (REPLACE or KEEP)") - - # Skip set_label if the item already have one and if_exists is at 'KEEP' - if if_exists == 'KEEP': - if lang in self.json_representation['labels']: - return - - if self.fast_run_container and self.fast_run_container.get_language_data(self.item_id, lang, 'label') != ['']: - return - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=[label], lang=lang, lang_data_type='label') - if self.require_write: - self.init_data_load() - else: - return - - if 'labels' not in self.json_representation or not self.json_representation['labels']: - self.json_representation['labels'] = {} - - if label is None: - self.json_representation['labels'][lang] = { - 'language': lang, - 'remove': '' - } - else: - self.json_representation['labels'][lang] = { - 'language': lang, - 'value': label - } - - def get_aliases(self, lang=None): - """ - Retrieve the aliases in a certain language - :param lang: The language the description should be retrieved for - :return: Returns a list of aliases, an empty list if none exist for the specified language - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'aliases')) - - alias_list = [] - if 'aliases' in self.json_representation and lang in self.json_representation['aliases']: - for alias in self.json_representation['aliases'][lang]: - alias_list.append(alias['value']) - - return alias_list - - def set_aliases(self, aliases, lang=None, if_exists='APPEND'): - """ - set the aliases for an item - :param aliases: a string or a list of strings representing the aliases of an item - :param lang: The language a description should be set for - :param if_exists: If aliases already exist, APPEND or REPLACE - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if isinstance(aliases, str): - aliases = [aliases] - if not isinstance(aliases, list): - raise TypeError("aliases must be a list or a string") - - if if_exists != 'APPEND' and if_exists != 'REPLACE': - raise ValueError(f"{if_exists} is not a valid value for if_exists (REPLACE or APPEND)") - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=aliases, lang=lang, lang_data_type='aliases', if_exists=if_exists) - if self.require_write: - self.init_data_load() - else: - return - - if 'aliases' not in self.json_representation: - self.json_representation['aliases'] = {} - - if if_exists == 'REPLACE' or lang not in self.json_representation['aliases']: - self.json_representation['aliases'][lang] = [] - for alias in aliases: - self.json_representation['aliases'][lang].append({ - 'language': lang, - 'value': alias - }) - else: - for alias in aliases: - found = False - for current_aliases in self.json_representation['aliases'][lang]: - if alias.strip().casefold() != current_aliases['value'].strip().casefold(): - continue - else: - found = True - break - - if not found: - self.json_representation['aliases'][lang].append({ - 'language': lang, - 'value': alias - }) - - def get_description(self, lang=None): - """ - Retrieve the description in a certain language - :param lang: The language the description should be retrieved for - :return: Returns the description string - """ - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if self.fast_run: - return list(self.fast_run_container.get_language_data(self.item_id, lang, 'description'))[0] - if 'descriptions' not in self.json_representation or lang not in self.json_representation['descriptions']: - return '' - else: - return self.json_representation['descriptions'][lang]['value'] - - def set_description(self, description, lang=None, if_exists='REPLACE'): - """ - Set the description for an item in a certain language - :param description: The description of the item in a certain language - :type description: str - :param lang: The language a description should be set for. - :type lang: str - :param if_exists: If a description already exist, REPLACE it or KEEP it. - :return: None - """ - - if self.search_only: - raise SearchOnlyError - - lang = config['DEFAULT_LANGUAGE'] if lang is None else lang - - if if_exists != 'KEEP' and if_exists != 'REPLACE': - raise ValueError(f"{if_exists} is not a valid value for if_exists (REPLACE or KEEP)") - - # Skip set_description if the item already have one and if_exists is at 'KEEP' - if if_exists == 'KEEP': - if self.get_description(lang): - return - - if self.fast_run_container and self.fast_run_container.get_language_data(self.item_id, lang, 'description') != ['']: - return - - if self.fast_run and not self.require_write: - self.require_write = self.fast_run_container.check_language_data(qid=self.item_id, lang_data=[description], lang=lang, lang_data_type='description') - if self.require_write: - self.init_data_load() - else: - return - - if 'descriptions' not in self.json_representation or not self.json_representation['descriptions']: - self.json_representation['descriptions'] = {} - - self.json_representation['descriptions'][lang] = { - 'language': lang, - 'value': description - } - - def get_sitelink(self, site): - """ - A method to access the interwiki links in the json.model - :param site: The Wikipedia site the interwiki/sitelink should be returned for - :return: The interwiki/sitelink string for the specified Wikipedia will be returned. - """ - - if site in self.sitelinks: - return self.sitelinks[site] - else: - return None - - def set_sitelink(self, site, title, badges=()): - """ - Set sitelinks to corresponding Wikipedia pages - :param site: The Wikipedia page a sitelink is directed to (e.g. 'enwiki') - :param title: The title of the Wikipedia page the sitelink is directed to - :param badges: An iterable containing Wikipedia badge strings. - :return: - """ - - if self.search_only: - raise SearchOnlyError - - sitelink = { - 'site': site, - 'title': title, - 'badges': badges - } - self.json_representation['sitelinks'][site] = sitelink - self.sitelinks[site] = sitelink - - def count_references(self, prop_id): - counts = {} - for claim in self.get_json_representation()['claims'][prop_id]: - counts[claim['id']] = len(claim['references']) - return counts - - def get_reference_properties(self, prop_id): - references = [] - statements = [x for x in self.get_json_representation()['claims'][prop_id] if 'references' in x] - for statement in statements: - for reference in statement['references']: - references.append(reference['snaks'].keys()) - return references - - def get_qualifier_properties(self, prop_id): - qualifiers = [] - for statements in self.get_json_representation()['claims'][prop_id]: - qualifiers.append(statements['qualifiers'].keys()) - return qualifiers - - def write(self, login, bot_account=True, edit_summary='', entity_type='item', property_datatype='string', max_retries=1000, retry_after=60, all_claims=False, - allow_anonymous=False): - """ - Writes the item Json to the Wikibase instance and after successful write, updates the object with new ids and hashes generated by the Wikibase instance. - For new items, also returns the new QIDs. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param bot_account: Tell the Wikidata API whether the script should be run as part of a bot account or not. - :type bot_account: bool - :param edit_summary: A short (max 250 characters) summary of the purpose of the edit. This will be displayed as the revision summary of the item. - :type edit_summary: str - :param entity_type: Decides whether the object will become a 'form', 'item' (default), 'lexeme', 'property' or 'sense' - :type entity_type: str - :param property_datatype: When payload_type is 'property' then this parameter set the datatype for the property - :type property_datatype: str - :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times - :type max_retries: int - :param retry_after: Number of seconds to wait before retrying request (see max_retries) - :type retry_after: int - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :return: the entity ID on successful write - """ - - if self.search_only: - raise SearchOnlyError - - if not self.require_write: - return self.item_id - - if entity_type == 'property': - self.json_representation['datatype'] = property_datatype - if 'sitelinks' in self.json_representation: - del self.json_representation['sitelinks'] - - if all_claims: - data = json.JSONEncoder().encode(self.json_representation) - else: - new_json_repr = {k: self.json_representation[k] for k in set(list(self.json_representation.keys())) - {'claims'}} - new_json_repr['claims'] = {} - for claim in self.json_representation['claims']: - if [True for x in self.json_representation['claims'][claim] if 'id' not in x or 'remove' in x]: - new_json_repr['claims'][claim] = copy.deepcopy(self.json_representation['claims'][claim]) - for statement in new_json_repr['claims'][claim]: - if 'id' in statement and 'remove' not in statement: - new_json_repr['claims'][claim].remove(statement) - if not new_json_repr['claims'][claim]: - new_json_repr['claims'].pop(claim) - data = json.JSONEncoder().encode(new_json_repr) - - payload = { - 'action': 'wbeditentity', - 'data': data, - 'format': 'json', - 'token': login.get_edit_token(), - 'summary': edit_summary - } - - if config['MAXLAG'] > 0: - payload.update({'maxlag': config['MAXLAG']}) - - if bot_account: - payload.update({'bot': ''}) - - if self.create_new_item: - payload.update({'new': entity_type}) - else: - payload.update({'id': self.item_id}) - - if self.debug: - print(payload) - - try: - json_data = wbi_functions.mediawiki_api_call_helper(data=payload, login=login, max_retries=max_retries, retry_after=retry_after, allow_anonymous=allow_anonymous) - - if 'error' in json_data and 'messages' in json_data['error']: - error_msg_names = {x.get('name') for x in json_data['error']['messages']} - if 'wikibase-validator-label-with-description-conflict' in error_msg_names: - raise NonUniqueLabelDescriptionPairError(json_data) - else: - raise MWApiError(json_data) - elif 'error' in json_data.keys(): - raise MWApiError(json_data) - except Exception: - print('Error while writing to the Wikibase instance') - raise - - # after successful write, update this object with latest json, QID and parsed data types. - self.create_new_item = False - self.item_id = json_data['entity']['id'] - self.parse_json(json_data=json_data['entity']) - self.data = [] - if 'success' in json_data and 'entity' in json_data and 'lastrevid' in json_data['entity']: - self.lastrevid = json_data['entity']['lastrevid'] - return self.item_id - - def __check_integrity(self): - """ - A method to check if when invoking __select_item() and the item does not exist yet, but another item - has a property of the current domain with a value like submitted in the data dict, this item does not get - selected but a ManualInterventionReqException() is raised. This check is dependent on the core identifiers - of a certain domain. - :return: boolean True if test passed - """ - - # all core props - wbi_core_props = self.core_props - # core prop statements that exist on the item - cp_statements = [x for x in self.statements if x.get_prop_nr() in wbi_core_props] - item_core_props = {x.get_prop_nr() for x in cp_statements} - # core prop statements we are loading - cp_data = [x for x in self.data if x.get_prop_nr() in wbi_core_props] - - # compare the claim values of the currently loaded QIDs to the data provided in self.data - # this is the number of core_ids in self.data that are also on the item - count_existing_ids = len([x for x in self.data if x.get_prop_nr() in item_core_props]) - - core_prop_match_count = 0 - for new_stat in self.data: - for stat in self.statements: - if (new_stat.get_prop_nr() == stat.get_prop_nr()) and (new_stat.get_value() == stat.get_value()) and ( - new_stat.get_prop_nr() in item_core_props): - core_prop_match_count += 1 - - if core_prop_match_count < count_existing_ids * self.core_prop_match_thresh: - existing_core_pv = defaultdict(set) - for s in cp_statements: - existing_core_pv[s.get_prop_nr()].add(s.get_value()) - new_core_pv = defaultdict(set) - for s in cp_data: - new_core_pv[s.get_prop_nr()].add(s.get_value()) - nomatch_existing = {k: v - new_core_pv[k] for k, v in existing_core_pv.items()} - nomatch_existing = {k: v for k, v in nomatch_existing.items() if v} - nomatch_new = {k: v - existing_core_pv[k] for k, v in new_core_pv.items()} - nomatch_new = {k: v for k, v in nomatch_new.items() if v} - raise CorePropIntegrityException("Retrieved item ({}) does not match provided core IDs. " - "Matching count {}, non-matching count {}. " - .format(self.item_id, core_prop_match_count, - count_existing_ids - core_prop_match_count) + - f"existing unmatched core props: {nomatch_existing}. " + - f"statement unmatched core props: {nomatch_new}.") - else: - return True - - def __select_item(self): - """ - The most likely item QID should be returned, after querying the Wikibase instance for all values in core_id properties - :return: Either a single QID is returned, or an empty string if no suitable item in the Wikibase instance - """ - - qid_list = set() - conflict_source = {} - - for statement in self.data: - property_nr = statement.get_prop_nr() - - core_props = self.core_props - if property_nr in core_props: - tmp_qids = set() - query = statement.sparql_query.format(wb_url=self.wikibase_url, pid=property_nr, value=str(statement.get_sparql_value()).replace("'", r"\'")) - results = wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url, debug=self.debug) - - for i in results['results']['bindings']: - qid = i['item_id']['value'].split('/')[-1] - tmp_qids.add(qid) - - qid_list.update(tmp_qids) - - # Protocol in what property the conflict arises - if property_nr in conflict_source: - conflict_source[property_nr].append(tmp_qids) - else: - conflict_source[property_nr] = [tmp_qids] - - if len(tmp_qids) > 1: - raise ManualInterventionReqException("More than one item has the same property value", property_nr, tmp_qids) - - if len(qid_list) == 0: - self.create_new_item = True - return '' - - if self.debug: - print(qid_list) - - unique_qids = set(qid_list) - if len(unique_qids) > 1: - raise ManualInterventionReqException("More than one item has the same property value", conflict_source, unique_qids) - elif len(unique_qids) == 1: - return list(unique_qids)[0] - - def __construct_claim_json(self): - """ - Writes the properties from self.data to a new or existing json in self.json_representation - :return: None - """ - - def handle_qualifiers(old_item, new_item): - if not new_item.check_qualifier_equality: - old_item.set_qualifiers(new_item.get_qualifiers()) - - def is_good_ref(ref_block): - prop_nrs = [x.get_prop_nr() for x in ref_block] - values = [x.get_value() for x in ref_block] - good_ref = True - prop_value_map = dict(zip(prop_nrs, values)) - - # if self.good_refs has content, use these to determine good references - if self.good_refs and len(self.good_refs) > 0: - found_good = True - for rblock in self.good_refs: - - if not all([k in prop_value_map for k, v in rblock.items()]): - found_good = False - - if not all([v in prop_value_map[k] for k, v in rblock.items() if v]): - found_good = False - - if found_good: - return True - - return False - - return good_ref - - def handle_references(old_item, new_item): - """ - Local function to handle references - :param old_item: An item containing the data as currently in the Wikibase instance - :type old_item: A child of BaseDataType - :param new_item: An item containing the new data which should be written to the Wikibase instance - :type new_item: A child of BaseDataType - """ - - old_references = old_item.get_references() - new_references = new_item.get_references() - - if sum(map(lambda z: len(z), old_references)) == 0 or self.global_ref_mode == 'STRICT_OVERWRITE': - old_item.set_references(new_references) - - elif self.global_ref_mode == 'STRICT_KEEP' or new_item.statement_ref_mode == 'STRICT_KEEP': - pass - - elif self.global_ref_mode == 'STRICT_KEEP_APPEND' or new_item.statement_ref_mode == 'STRICT_KEEP_APPEND': - old_references.extend(new_references) - old_item.set_references(old_references) - - elif self.global_ref_mode == 'CUSTOM' or new_item.statement_ref_mode == 'CUSTOM' and self.ref_handler and callable(self.ref_handler): - self.ref_handler(old_item, new_item) - - elif self.global_ref_mode == 'KEEP_GOOD' or new_item.statement_ref_mode == 'KEEP_GOOD': - # Copy only good_ref - refs = [x for x in old_references if is_good_ref(x)] - - # Don't add already existing references - for new_ref in new_references: - if new_ref not in old_references: - refs.append(new_ref) - - # Set the references - old_item.set_references(refs) - - # sort the incoming data according to the property number - self.data.sort(key=lambda z: z.get_prop_nr().lower()) - - # collect all statements which should be deleted because of an empty value - statements_for_deletion = [] - for item in self.data: - if isinstance(item, BaseDataType) and item.get_value() == '': - statements_for_deletion.append(item.get_prop_nr()) - - if self.create_new_item: - self.statements = copy.copy(self.data) - else: - for stat in self.data: - prop_nr = stat.get_prop_nr() - - prop_data = [x for x in self.statements if x.get_prop_nr() == prop_nr] - if prop_data and stat.if_exists == 'KEEP': - continue - prop_pos = [x.get_prop_nr() == prop_nr for x in self.statements] - prop_pos.reverse() - insert_pos = len(prop_pos) - (prop_pos.index(True) if any(prop_pos) else 0) - - # If value should be appended, check if values exists, if not, append - if 'APPEND' in stat.if_exists: - equal_items = [stat == x for x in prop_data] - if True not in equal_items or stat.if_exists == 'FORCE_APPEND': - self.statements.insert(insert_pos + 1, stat) - else: - # if item exists, modify rank - current_item = prop_data[equal_items.index(True)] - current_item.set_rank(stat.get_rank()) - handle_references(old_item=current_item, new_item=stat) - handle_qualifiers(old_item=current_item, new_item=stat) - continue - - # set all existing values of a property for removal - for x in prop_data: - # for deletion of single statements, do not set all others to delete - if hasattr(stat, 'remove'): - break - elif x.get_id() and not hasattr(x, 'retain'): - # keep statements with good references if keep_good_ref_statements is True - if self.keep_good_ref_statements: - if any([is_good_ref(r) for r in x.get_references()]): - setattr(x, 'retain', '') - else: - setattr(x, 'remove', '') - - match = [] - for i in prop_data: - if stat == i and hasattr(stat, 'remove'): - match.append(True) - setattr(i, 'remove', '') - elif stat == i: - match.append(True) - setattr(i, 'retain', '') - if hasattr(i, 'remove'): - delattr(i, 'remove') - handle_references(old_item=i, new_item=stat) - handle_qualifiers(old_item=i, new_item=stat) - - i.set_rank(rank=stat.get_rank()) - # if there is no value, do not add an element, this is also used to delete whole properties. - elif i.get_value(): - match.append(False) - - if True not in match and not hasattr(stat, 'remove'): - self.statements.insert(insert_pos + 1, stat) - - # For whole property deletions, add remove flag to all statements which should be deleted - for item in copy.deepcopy(self.statements): - if item.get_prop_nr() in statements_for_deletion: - if item.get_id() != '': - setattr(item, 'remove', '') - else: - self.statements.remove(item) - - # regenerate claim json - self.json_representation['claims'] = {} - for stat in self.statements: - prop_nr = stat.get_prop_nr() - if prop_nr not in self.json_representation['claims']: - self.json_representation['claims'][prop_nr] = [] - self.json_representation['claims'][prop_nr].append(stat.get_json_representation()) - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) diff --git a/wikibaseintegrator/wbi_datatype.py b/wikibaseintegrator/wbi_datatype.py deleted file mode 100644 index 4cdfa279..00000000 --- a/wikibaseintegrator/wbi_datatype.py +++ /dev/null @@ -1,1676 +0,0 @@ -import copy -import datetime -import re - -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_jsonparser import JsonParser - - -class BaseDataType: - """ - The base class for all Wikibase data types, they inherit from it - """ - DTYPE = 'base-data-type' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}' . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, will be called by all data types. - :param value: Data value of the Wikibase data snak - :type value: str or int or tuple - :param prop_nr: The property number a Wikibase snak belongs to - :type prop_nr: A string with a prefixed 'P' and several digits e.g. 'P715' (Drugbank ID) or an int - :param data_type: The Wikibase data type declaration of this snak - :type data_type: str - :param snak_type: The snak type of the Wikibase data snak, three values possible, depending if the value is a known (value), not existent (novalue) or - unknown (somevalue). See Wikibase documentation. - :type snak_type: a str of either 'value', 'novalue' or 'somevalue' - :param references: A one level nested list with reference Wikibase snaks of base type BaseDataType, - e.g. references=[[, ], []] - This will create two references, the first one with two statements, the second with one - :type references: A one level nested list with instances of BaseDataType or children of it. - :param qualifiers: A list of qualifiers for the Wikibase mainsnak - :type qualifiers: A list with instances of BaseDataType or children of it. - :param is_reference: States if the snak is a reference, mutually exclusive with qualifier - :type is_reference: boolean - :param is_qualifier: States if the snak is a qualifier, mutually exlcusive with reference - :type is_qualifier: boolean - :param rank: The rank of a Wikibase mainsnak, should determine the status of a value - :type rank: A string of one of three allowed values: 'normal', 'deprecated', 'preferred' - :param check_qualifier_equality: When comparing two objects, test if qualifiers are equals between them. Default to true. - :type check_qualifier_equality: boolean - :param if_exists: Replace or append the statement. You can force an append if the statement already exists. - :type if_exists: A string of one of three allowed values: 'REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP' - :return: - """ - - self.value = value - self.data_type = kwargs.pop('data_type', self.DTYPE) - self.snak_type = kwargs.pop('snak_type', 'value') - self.references = kwargs.pop('references', None) - self.qualifiers = kwargs.pop('qualifiers', None) - self.is_reference = kwargs.pop('is_reference', None) - self.is_qualifier = kwargs.pop('is_qualifier', None) - self.rank = kwargs.pop('rank', 'normal') - self.check_qualifier_equality = kwargs.pop('check_qualifier_equality', True) - self.if_exists = kwargs.pop('if_exists', 'REPLACE') - - self._statement_ref_mode = 'KEEP_GOOD' - - if not self.references: - self.references = [] - else: - if isinstance(self.references, BaseDataType): - self.references = [[self.references]] - - for ref_list in self.references: - if isinstance(ref_list, BaseDataType): - ref_list = [ref_list] - for reference in ref_list: - if not isinstance(reference, BaseDataType): - raise ValueError('A reference must be an instance of class BaseDataType.') - - if reference.is_reference is False: - raise ValueError('A reference can\'t be declared as is_reference=False') - elif reference.is_reference is None: - reference.is_reference = True - - if not self.qualifiers: - self.qualifiers = [] - else: - if isinstance(self.qualifiers, BaseDataType): - self.qualifiers = [self.qualifiers] - - for qualifier in self.qualifiers: - if not isinstance(qualifier, BaseDataType): - raise ValueError('A qualifier must be an instance of class BaseDataType.') - if qualifier.is_qualifier is False: - raise ValueError('A qualifier can\'t be declared as is_qualifier=False') - elif qualifier.is_qualifier is None: - qualifier.is_qualifier = True - - if isinstance(prop_nr, int): - self.prop_nr = 'P' + str(prop_nr) - else: - pattern = re.compile(r'^P?([0-9]+)$') - matches = pattern.match(prop_nr) - - if not matches: - raise ValueError(f'Invalid prop_nr, format must be "P[0-9]+", got {prop_nr}') - else: - self.prop_nr = 'P' + str(matches.group(1)) - - # Internal ID and hash are issued by the Wikibase instance - self.id = '' - self.hash = '' - - self.json_representation = { - 'snaktype': self.snak_type, - 'property': self.prop_nr, - 'datavalue': {}, - 'datatype': self.data_type - } - - if self.snak_type not in ['value', 'novalue', 'somevalue']: - raise ValueError(f'{self.snak_type} is not a valid snak type') - - if self.if_exists not in ['REPLACE', 'APPEND', 'FORCE_APPEND', 'KEEP']: - raise ValueError(f'{self.if_exists} is not a valid if_exists value') - - if self.value is None and self.snak_type == 'value': - raise ValueError('Parameter \'value\' can\'t be \'None\' if \'snak_type\' is \'value\'') - - if self.is_qualifier and self.is_reference: - raise ValueError('A claim cannot be a reference and a qualifer at the same time') - if (len(self.references) > 0 or len(self.qualifiers) > 0) and (self.is_qualifier or self.is_reference): - raise ValueError('Qualifiers or references cannot have references or qualifiers') - - def has_equal_qualifiers(self, other): - # check if the qualifiers are equal with the 'other' object - equal_qualifiers = True - self_qualifiers = copy.deepcopy(self.get_qualifiers()) - other_qualifiers = copy.deepcopy(other.get_qualifiers()) - - if len(self_qualifiers) != len(other_qualifiers): - equal_qualifiers = False - else: - flg = [False for _ in range(len(self_qualifiers))] - for count, i in enumerate(self_qualifiers): - for q in other_qualifiers: - if i == q: - flg[count] = True - if not all(flg): - equal_qualifiers = False - - return equal_qualifiers - - def __eq__(self, other): - equal_qualifiers = self.has_equal_qualifiers(other) - equal_values = self.get_value() == other.get_value() and self.get_prop_nr() == other.get_prop_nr() - - if not (self.check_qualifier_equality and other.check_qualifier_equality) and equal_values: - return True - elif equal_values and equal_qualifiers: - return True - else: - return False - - @property - def statement_ref_mode(self): - return self._statement_ref_mode - - @statement_ref_mode.setter - def statement_ref_mode(self, value): - """Set the reference mode for a statement, always overrides the global reference state.""" - valid_values = ['STRICT_KEEP', 'STRICT_KEEP_APPEND', 'STRICT_OVERWRITE', 'KEEP_GOOD', 'CUSTOM'] - if value not in valid_values: - raise ValueError('Not an allowed reference mode, allowed values {}'.format(' '.join(valid_values))) - - self._statement_ref_mode = value - - def get_value(self): - return self.value - - def get_sparql_value(self): - return self.value - - def set_value(self, value): - if value is None and self.snak_type not in {'novalue', 'somevalue'}: - raise ValueError("If 'value' is None, snak_type must be novalue or somevalue") - if self.snak_type in {'novalue', 'somevalue'}: - del self.json_representation['datavalue'] - elif 'datavalue' not in self.json_representation: - self.json_representation['datavalue'] = {} - - self.value = value - - def get_references(self): - return self.references - - def set_references(self, references): - if len(references) > 0 and (self.is_qualifier or self.is_reference): - raise ValueError("Qualifiers or references cannot have references") - - # Force clean duplicate references - temp_references = [] - for reference in references: - if reference not in temp_references: - temp_references.append(reference) - references = temp_references - - self.references = references - - def get_qualifiers(self): - return self.qualifiers - - def set_qualifiers(self, qualifiers): - # TODO: introduce a check to prevent duplicate qualifiers, those are not allowed in Wikibase - if len(qualifiers) > 0 and (self.is_qualifier or self.is_reference): - raise ValueError("Qualifiers or references cannot have qualifiers") - - self.qualifiers = qualifiers - - def get_rank(self): - if self.is_qualifier or self.is_reference: - return '' - else: - return self.rank - - def set_rank(self, rank): - if self.is_qualifier or self.is_reference: - raise ValueError("References or qualifiers do not have ranks") - - valid_ranks = ['normal', 'deprecated', 'preferred'] - - if rank not in valid_ranks: - raise ValueError(f"{rank} not a valid rank") - - self.rank = rank - - def get_id(self): - return self.id - - def set_id(self, claim_id): - self.id = claim_id - - def set_hash(self, claim_hash): - self.hash = claim_hash - - def get_hash(self): - return self.hash - - def get_prop_nr(self): - return self.prop_nr - - def set_prop_nr(self, prop_nr): - if prop_nr[0] != 'P': - raise ValueError("Invalid property number") - - self.prop_nr = prop_nr - - def get_json_representation(self): - if self.is_qualifier or self.is_reference: - tmp_json = { - self.prop_nr: [self.json_representation] - } - if self.hash != '' and self.is_qualifier: - self.json_representation.update({'hash': self.hash}) - - return tmp_json - else: - ref_json = [] - for count, ref in enumerate(self.references): - snaks_order = [] - snaks = {} - ref_json.append({ - 'snaks': snaks, - 'snaks-order': snaks_order - }) - for sub_ref in ref: - prop_nr = sub_ref.get_prop_nr() - # set the hash for the reference block - if sub_ref.get_hash() != '': - ref_json[count].update({'hash': sub_ref.get_hash()}) - tmp_json = sub_ref.get_json_representation() - - # if more reference values with the same property number, append to its specific property list. - if prop_nr in snaks: - snaks[prop_nr].append(tmp_json[prop_nr][0]) - else: - snaks.update(tmp_json) - snaks_order.append(prop_nr) - - qual_json = {} - qualifiers_order = [] - for qual in self.qualifiers: - prop_nr = qual.get_prop_nr() - if prop_nr in qual_json: - qual_json[prop_nr].append(qual.get_json_representation()[prop_nr][0]) - else: - qual_json.update(qual.get_json_representation()) - qualifiers_order.append(qual.get_prop_nr()) - - if hasattr(self, 'remove'): - statement = { - 'remove': '' - } - else: - statement = { - 'mainsnak': self.json_representation, - 'type': 'statement', - 'rank': self.rank - } - if qual_json: - statement['qualifiers'] = qual_json - if qualifiers_order: - statement['qualifiers-order'] = qualifiers_order - if ref_json: - statement['references'] = ref_json - if self.id != '': - statement.update({'id': self.id}) - - return statement - - @classmethod - @JsonParser - def from_json(cls, json_representation): - pass - - def equals(self, that, include_ref=False, fref=None): - """ - Tests for equality of two statements. - If comparing references, the order of the arguments matters!!! - self is the current statement, the next argument is the new statement. - Allows passing in a function to use to compare the references 'fref'. Default is equality. - fref accepts two arguments 'oldrefs' and 'newrefs', each of which are a list of references, - where each reference is a list of statements - """ - - if not include_ref: - # return the result of BaseDataType.__eq__, which is testing for equality of value and qualifiers - return self == that - else: - if self != that: - return False - if fref is None: - return BaseDataType.refs_equal(self, that) - else: - return fref(self, that) - - @staticmethod - def refs_equal(olditem, newitem): - """ - tests for exactly identical references - """ - - oldrefs = olditem.references - newrefs = newitem.references - - def ref_equal(oldref, newref): - return True if (len(oldref) == len(newref)) and all(x in oldref for x in newref) else False - - if len(oldrefs) == len(newrefs) and all(any(ref_equal(oldref, newref) for oldref in oldrefs) for newref in newrefs): - return True - else: - return False - - def __repr__(self): - """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( - klass=self.__class__.__name__, - id=id(self) & 0xFFFFFF, - attrs=" ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), - ) - - -class CommonsMedia(BaseDataType): - """ - Implements the Wikibase data type for Wikimedia commons media files - """ - DTYPE = 'commonsMedia' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The media file name from Wikimedia commons to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - self.value = None - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class EDTF(BaseDataType): - """ - Implements the data type for Extended Date/Time Format (EDTF) extension. - More info: https://github.com/ProfessionalWiki/WikibaseEdtf - """ - DTYPE = 'edtf' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:edtf . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Value using the Extended Date/Time Format (EDTF) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(EDTF, self).__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super(EDTF, self).set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class ExternalID(BaseDataType): - """ - Implements the Wikibase data type 'external-id' - """ - DTYPE = 'external-id' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Form(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-form' - """ - DTYPE = 'wikibase-form' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The form number to serve as a value using the format "L-F" (example: L252248-F2) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - if value is None: - self.value = value - else: - pattern = re.compile(r'^L[0-9]+-F[0-9]+$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid form ID ({value}), format must be 'L[0-9]+-F[0-9]+'") - - self.value = value - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'form', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) - - -class GeoShape(BaseDataType): - """ - Implements the Wikibase data type 'geo-shape' - """ - DTYPE = 'geo-shape' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The GeoShape map file name in Wikimedia Commons to be linked - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - if value is None: - self.value = value - else: - # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Paris.map - pattern = re.compile(r'^Data:((?![:|#]).)+\.map$') - matches = pattern.match(value) - if not matches: - raise ValueError("Value must start with Data: and end with .map. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class GlobeCoordinate(BaseDataType): - """ - Implements the Wikibase data type for globe coordinates - """ - DTYPE = 'globe-coordinate' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^geo:wktLiteral . - }} - ''' - - def __init__(self, latitude, longitude, precision, prop_nr, globe=None, wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param latitude: Latitute in decimal format - :type latitude: float or None - :param longitude: Longitude in decimal format - :type longitude: float or None - :param precision: Precision of the position measurement - :type precision: float or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - globe = config['COORDINATE_GLOBE_QID'] if globe is None else globe - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - self.latitude = None - self.longitude = None - self.precision = None - self.globe = None - - if globe.startswith('Q'): - globe = wikibase_url + '/entity/' + globe - - value = (latitude, longitude, precision, globe) - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - # TODO: Introduce validity checks for coordinates, etc. - # TODO: Add check if latitude/longitude/precision is None - self.latitude, self.longitude, self.precision, self.globe = value - - self.json_representation['datavalue'] = { - 'value': { - 'latitude': self.latitude, - 'longitude': self.longitude, - 'precision': self.precision, - 'globe': self.globe - }, - 'type': 'globecoordinate' - } - - self.value = (self.latitude, self.longitude, self.precision, self.globe) - super().set_value(value=self.value) - - def get_sparql_value(self): - return 'Point(' + str(self.longitude) + ' ' + str(self.latitude) + ')' - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(latitude=None, longitude=None, precision=None, prop_nr=jsn['property'], - snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(latitude=value['latitude'], longitude=value['longitude'], precision=value['precision'], - prop_nr=jsn['property']) - - -class ItemID(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-item' with a value being another item ID - """ - DTYPE = 'wikibase-item' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/Q{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The item ID to serve as the value - :type value: str with a 'Q' prefix, followed by several digits or only the digits without the 'Q' prefix - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, f'Expected str or int, found {type(value)} ({value})' - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^Q?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid item ID ({value}), format must be 'Q[0-9]+'") - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'item', - 'numeric-id': self.value, - 'id': f'Q{self.value}' - }, - 'type': 'wikibase-entityid' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class LocalMedia(BaseDataType): - """ - Implements the data type for Wikibase local media files. - The new data type is introduced via the LocalMedia extension - https://github.com/ProfessionalWiki/WikibaseLocalMedia - """ - DTYPE = 'localMedia' - - def __init__(self, value, prop_nr, is_reference=False, is_qualifier=False, snak_type='value', references=None, - qualifiers=None, rank='normal', check_qualifier_equality=True): - """ - Constructor, calls the superclass BaseDataType - :param value: The media file name from the local Mediawiki to be used as the value - :type value: str - :param prop_nr: The property id for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super(LocalMedia, self).__init__(value=value, snak_type=snak_type, data_type=self.DTYPE, - is_reference=is_reference, is_qualifier=is_qualifier, references=references, - qualifiers=qualifiers, rank=rank, prop_nr=prop_nr, - check_qualifier_equality=check_qualifier_equality) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, "Expected str, found {} ({})".format(type(value), value) - self.json_representation['datavalue'] = { - 'value': value, - 'type': 'string' - } - - super(LocalMedia, self).set_value(value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Lexeme(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-lexeme' - """ - DTYPE = 'wikibase-lexeme' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/L{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The lexeme number to serve as a value - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^L?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid lexeme ID ({value}), format must be 'L[0-9]+'") - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'lexeme', - 'numeric-id': self.value, - 'id': f'L{self.value}' - }, - 'type': 'wikibase-entityid' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class Math(BaseDataType): - """ - Implements the Wikibase data type 'math' for mathematical formula in TEX format - """ - DTYPE = 'math' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class MonolingualText(BaseDataType): - """ - Implements the Wikibase data type for Monolingual Text strings - """ - DTYPE = 'monolingualtext' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> {value} . - }} - ''' - - def __init__(self, text, prop_nr, language=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param text: The language specific string to be used as the value - :type text: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param language: Specifies the language the value belongs to - :type language: str - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - self.text = None - self.language = config['DEFAULT_LANGUAGE'] if language is None else language - - value = (text, self.language) - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.text, self.language = value - if self.text is not None: - assert isinstance(self.text, str) or self.text is None, f"Expected str, found {type(self.text)} ({self.text})" - elif self.snak_type == 'value': - raise ValueError("Parameter 'text' can't be 'None' if 'snak_type' is 'value'") - assert isinstance(self.language, str), f"Expected str, found {type(self.language)} ({self.language})" - - self.json_representation['datavalue'] = { - 'value': { - 'text': self.text, - 'language': self.language - }, - 'type': 'monolingualtext' - } - - self.value = (self.text, self.language) - super().set_value(value=self.value) - - def get_sparql_value(self): - return '"' + self.text.replace('"', r'\"') + '"@' + self.language - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(text=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(text=value['text'], prop_nr=jsn['property'], language=value['language']) - - -class MusicalNotation(BaseDataType): - """ - Implements the Wikibase data type 'musical-notation' - """ - DTYPE = 'musical-notation' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Values for that data type are strings describing music following LilyPond syntax. - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Property(BaseDataType): - """ - Implements the Wikibase data type 'property' - """ - DTYPE = 'wikibase-property' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/P{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The property number to serve as a value - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, (str, int)) or value is None, f"Expected str or int, found {type(value)} ({value})" - if value is None: - self.value = value - elif isinstance(value, int): - self.value = value - else: - pattern = re.compile(r'^P?([0-9]+)$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid property ID ({value}), format must be 'P[0-9]+'") - else: - self.value = int(matches.group(1)) - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'property', - 'numeric-id': self.value, - 'id': f'P{self.value}' - }, - 'type': 'wikibase-entityid' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['numeric-id'], prop_nr=jsn['property']) - - -class Quantity(BaseDataType): - """ - Implements the Wikibase data type for quantities - """ - DTYPE = 'quantity' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:decimal . - }} - ''' - - def __init__(self, quantity, prop_nr, upper_bound=None, lower_bound=None, unit='1', wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param quantity: The quantity value - :type quantity: float, str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param upper_bound: Upper bound of the value if it exists, e.g. for standard deviations - :type upper_bound: float, str - :param lower_bound: Lower bound of the value if it exists, e.g. for standard deviations - :type lower_bound: float, str - :param unit: The unit item URL or the QID a certain quantity has been measured in (https://www.wikidata.org/wiki/Wikidata:Units). - The default is dimensionless, represented by a '1' - :type unit: str - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - if unit.startswith('Q'): - unit = wikibase_url + '/entity/' + unit - - self.quantity = None - self.unit = None - self.upper_bound = None - self.lower_bound = None - - value = (quantity, unit, upper_bound, lower_bound) - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.quantity, self.unit, self.upper_bound, self.lower_bound = value - - if self.quantity is not None: - self.quantity = self.format_amount(self.quantity) - self.unit = str(self.unit) - if self.upper_bound: - self.upper_bound = self.format_amount(self.upper_bound) - if self.lower_bound: - self.lower_bound = self.format_amount(self.lower_bound) - - # Integrity checks for value and bounds - try: - for i in [self.quantity, self.upper_bound, self.lower_bound]: - if i: - float(i) - except ValueError: - raise ValueError("Value, bounds and units must parse as integers or float") - - if (self.lower_bound and self.upper_bound) and (float(self.lower_bound) > float(self.upper_bound) - or float(self.lower_bound) > float(self.quantity)): - raise ValueError("Lower bound too large") - - if self.upper_bound and float(self.upper_bound) < float(self.quantity): - raise ValueError("Upper bound too small") - elif self.snak_type == 'value': - raise ValueError("Parameter 'quantity' can't be 'None' if 'snak_type' is 'value'") - - self.json_representation['datavalue'] = { - 'value': { - 'amount': self.quantity, - 'unit': self.unit, - 'upperBound': self.upper_bound, - 'lowerBound': self.lower_bound - }, - 'type': 'quantity' - } - - # remove bounds from json if they are undefined - if not self.upper_bound: - del self.json_representation['datavalue']['value']['upperBound'] - - if not self.lower_bound: - del self.json_representation['datavalue']['value']['lowerBound'] - - self.value = (self.quantity, self.unit, self.upper_bound, self.lower_bound) - super().set_value(value=self.value) - - def get_sparql_value(self): - return self.quantity - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(quantity=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - upper_bound = value['upperBound'] if 'upperBound' in value else None - lower_bound = value['lowerBound'] if 'lowerBound' in value else None - return cls(quantity=value['amount'], prop_nr=jsn['property'], upper_bound=upper_bound, lower_bound=lower_bound, - unit=value['unit']) - - @staticmethod - def format_amount(amount): - # Remove .0 by casting to int - if float(amount) % 1 == 0: - amount = int(float(amount)) - - # Adding prefix + for positive number and 0 - if not str(amount).startswith('+') and float(amount) >= 0: - amount = str(f'+{amount}') - - # return as string - return str(amount) - - -class Sense(BaseDataType): - """ - Implements the Wikibase data type 'wikibase-sense' - """ - DTYPE = 'wikibase-sense' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{wb_url}/entity/{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Value using the format "L-S" (example: L252248-S123) - :type value: str with a 'P' prefix, followed by several digits or only the digits without the 'P' prefix - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - if value is None: - self.value = value - else: - pattern = re.compile(r'^L[0-9]+-S[0-9]+$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid sense ID ({value}), format must be 'L[0-9]+-S[0-9]+'") - - self.value = value - - self.json_representation['datavalue'] = { - 'value': { - 'entity-type': 'sense', - 'id': self.value - }, - 'type': 'wikibase-entityid' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value']['id'], prop_nr=jsn['property']) - - -class String(BaseDataType): - """ - Implements the Wikibase data type 'string' - """ - - DTYPE = 'string' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The string to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class TabularData(BaseDataType): - """ - Implements the Wikibase data type 'tabular-data' - """ - DTYPE = 'tabular-data' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: Reference to tabular data file on Wikimedia Commons. - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - if value is None: - self.value = value - else: - # TODO: Need to check if the value is a full URl like http://commons.wikimedia.org/data/main/Data:Taipei+Population.tab - pattern = re.compile(r'^Data:((?![:|#]).)+\.tab$') - matches = pattern.match(value) - if not matches: - raise ValueError("Value must start with Data: and end with .tab. In addition title should not contain characters like colon, hash or pipe.") - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) - - -class Time(BaseDataType): - """ - Implements the Wikibase data type with date and time values - """ - DTYPE = 'time' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> '{value}'^^xsd:dateTime . - }} - ''' - - def __init__(self, time, prop_nr, before=0, after=0, precision=11, timezone=0, calendarmodel=None, wikibase_url=None, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param time: Explicit value for point in time, represented as a timestamp resembling ISO 8601 - :type time: str in the format '+%Y-%m-%dT%H:%M:%SZ', e.g. '+2001-12-31T12:01:13Z' or 'now' - :param prop_nr: The property number for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param before: explicit integer value for how many units after the given time it could be. - The unit is given by the precision. - :type before: int - :param after: explicit integer value for how many units before the given time it could be. - The unit is given by the precision. - :type after: int - :param precision: Precision value for dates and time as specified in the Wikibase data model - (https://www.wikidata.org/wiki/Special:ListDatatypes#time) - :type precision: int - :param timezone: The timezone which applies to the date and time as specified in the Wikibase data model - :type timezone: int - :param calendarmodel: The calendar model used for the date. URL to the Wikibase calendar model item or the QID. - :type calendarmodel: str - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - calendarmodel = config['CALENDAR_MODEL_QID'] if calendarmodel is None else calendarmodel - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - - self.time = None - self.before = None - self.after = None - self.precision = None - self.timezone = None - self.calendarmodel = None - - if calendarmodel.startswith('Q'): - calendarmodel = wikibase_url + '/entity/' + calendarmodel - - value = (time, before, after, precision, timezone, calendarmodel) - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel = value - assert isinstance(self.time, str) or self.time is None, f"Expected str, found {type(self.time)} ({self.time})" - - if self.time is not None: - if self.time == "now": - self.time = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ") - - if not (self.time.startswith("+") or self.time.startswith("-")): - self.time = "+" + self.time - pattern = re.compile(r'^[+-][0-9]*-(?:1[0-2]|0[0-9])-(?:3[01]|0[0-9]|[12][0-9])T(?:2[0-3]|[01][0-9]):[0-5][0-9]:[0-5][0-9]Z$') - matches = pattern.match(self.time) - if not matches: - raise ValueError("Time time must be a string in the following format: '+%Y-%m-%dT%H:%M:%SZ'") - self.value = value - if self.precision < 0 or self.precision > 15: - raise ValueError("Invalid value for time precision, see https://www.mediawiki.org/wiki/Wikibase/DataModel/JSON#time") - elif self.snak_type == 'value': - raise ValueError("Parameter 'time' can't be 'None' if 'snak_type' is 'value'") - - self.json_representation['datavalue'] = { - 'value': { - 'time': self.time, - 'before': self.before, - 'after': self.after, - 'precision': self.precision, - 'timezone': self.timezone, - 'calendarmodel': self.calendarmodel - }, - 'type': 'time' - } - - self.value = (self.time, self.before, self.after, self.precision, self.timezone, self.calendarmodel) - super().set_value(value=self.value) - - def get_sparql_value(self): - return self.time - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(time=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - - value = jsn['datavalue']['value'] - return cls(time=value['time'], prop_nr=jsn['property'], before=value['before'], after=value['after'], precision=value['precision'], timezone=value['timezone'], - calendarmodel=value['calendarmodel']) - - -class Url(BaseDataType): - """ - Implements the Wikibase data type for URL strings - """ - DTYPE = 'url' - sparql_query = ''' - SELECT * WHERE {{ - ?item_id <{wb_url}/prop/{pid}> ?s . - ?s <{wb_url}/prop/statement/{pid}> <{value}> . - }} - ''' - - def __init__(self, value, prop_nr, **kwargs): - """ - Constructor, calls the superclass BaseDataType - :param value: The URL to be used as the value - :type value: str or None - :param prop_nr: The item ID for this claim - :type prop_nr: str with a 'P' prefix followed by digits - :param is_reference: Whether this snak is a reference - :type is_reference: boolean - :param is_qualifier: Whether this snak is a qualifier - :type is_qualifier: boolean - :param snak_type: The snak type, either 'value', 'somevalue' or 'novalue' - :type snak_type: str - :param references: List with reference objects - :type references: A data type with subclass of BaseDataType - :param qualifiers: List with qualifier objects - :type qualifiers: A data type with subclass of BaseDataType - :param rank: rank of a snak with value 'preferred', 'normal' or 'deprecated' - :type rank: str - """ - - super().__init__(value=value, prop_nr=prop_nr, **kwargs) - - self.set_value(value) - - def set_value(self, value): - assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})" - if value is None: - self.value = value - else: - pattern = re.compile(r'^([a-z][a-z\d+.-]*):([^][<>\"\x00-\x20\x7F])+$') - matches = pattern.match(value) - - if not matches: - raise ValueError(f"Invalid URL {value}") - self.value = value - - self.json_representation['datavalue'] = { - 'value': self.value, - 'type': 'string' - } - - super().set_value(value=self.value) - - @classmethod - @JsonParser - def from_json(cls, jsn): - if jsn['snaktype'] == 'novalue' or jsn['snaktype'] == 'somevalue': - return cls(value=None, prop_nr=jsn['property'], snak_type=jsn['snaktype']) - return cls(value=jsn['datavalue']['value'], prop_nr=jsn['property']) diff --git a/wikibaseintegrator/wbi_enums.py b/wikibaseintegrator/wbi_enums.py new file mode 100644 index 00000000..b334f485 --- /dev/null +++ b/wikibaseintegrator/wbi_enums.py @@ -0,0 +1,68 @@ +from enum import Enum, auto + + +class ActionIfExists(Enum): + """ + Action to take if a statement with a property already exists on the entity. + + APPEND_OR_REPLACE: Add the new element to the property if it does not exist, otherwise replace the existing element. + FORCE_APPEND: Forces the addition of the new element to the property, even if it already exists. + KEEP: Does nothing if the property already has elements stated. + REPLACE_ALL: Replace all elements with the same property number. + """ + APPEND_OR_REPLACE = auto() + FORCE_APPEND = auto() + KEEP = auto() + REPLACE_ALL = auto() + + +class WikibaseDatatype(Enum): + COMMONSMEDIA = 'commonsMedia' + EXTERNALID = 'external-id' + FORM = 'wikibase-form' + GEOSHAPE = 'geo-shape' + GLOBECOORDINATE = 'globe-coordinate' + ITEM = 'wikibase-item' + LEXEME = 'wikibase-lexeme' + MATH = 'math' + MONOLINGUALTEXT = 'monolingualtext' + MUSICALNOTATION = 'musical-notation' + PROPERTY = 'wikibase-property' + QUANTITY = 'quantity' + SENSE = 'wikibase-sense' + STRING = 'string' + TABULARDATA = 'tabular-data' + TIME = 'time' + URL = 'url' + + +class WikibaseRank(Enum): + DEPRECATED = "deprecated" + NORMAL = "normal" + PREFERRED = "preferred" + + +class WikibaseSnakType(Enum): + """ + The snak type of the Wikibase data snak, three values possible, + depending if the value is a known (value), not existent (novalue) or + unknown (somevalue). See Wikibase documentation. + """ + KNOWN_VALUE = "value" + NO_VALUE = "novalue" + UNKNOWN_VALUE = "somevalue" + + +class WikibaseDatePrecision(Enum): + # SECOND = 14 # UNSUPPORTED TO DATE (REL1_37) + # MINUTE = 13 # UNSUPPORTED TO DATE (REL1_37) + # HOUR = 12 # UNSUPPORTED TO DATE (REL1_37) + DAY = 11 + MONTH = 10 + YEAR = 9 + DECADE = 8 + CENTURY = 7 + MILLENNIUM = 6 + HUNDRED_THOUSAND_YEARS = 4 + MILLION_YEARS = 3 + BILLION_YEARS = 0 diff --git a/wikibaseintegrator/wbi_exceptions.py b/wikibaseintegrator/wbi_exceptions.py index a1d19565..29c5b244 100644 --- a/wikibaseintegrator/wbi_exceptions.py +++ b/wikibaseintegrator/wbi_exceptions.py @@ -1,88 +1,91 @@ -class MWApiError(Exception): - def __init__(self, error_message): - """ - Base class for Mediawiki API error handling - :param error_message: The error message returned by the Mediawiki API - :type error_message: A Python json representation dictionary of the error message - :return: - """ - self.error_msg = error_message - - def __str__(self): - return repr(self.error_msg) +from typing import Any, Dict, List -class NonUniqueLabelDescriptionPairError(MWApiError): - def __init__(self, error_message): - """ - This class handles errors returned from the API due to an attempt to create an item which has the same - label and description as an existing item in a certain language. - :param error_message: An API error message containing 'wikibase-validator-label-with-description-conflict' - as the message name. - :type error_message: A Python json representation dictionary of the error message - :return: +class MWApiError(Exception): + """ + Base class for MediaWiki API error handling + """ + code: str + info: Dict[str, Any] + messages: List[Dict[str, Any]] + messages_names: List[str] + + @property + def get_conflicting_entity_ids(self) -> List[str]: """ - self.error_msg = error_message + Compute the list of conflicting entities from the error messages. - def get_language(self): - """ - :return: Returns a 2 letter language string, indicating the language which triggered the error + :return: A list of conflicting entities or an empty list """ - return self.error_msg['error']['messages'][0]['parameters'][1] - def get_conflicting_item_qid(self): - """ - :return: Returns the QID string of the item which has the same label and description as the one which should - be set. - """ - qid_string = self.error_msg['error']['messages'][0]['parameters'][2] + return list( + { + message['parameters'][2].split('|')[0][2:].replace("Property:", "") for message in self.messages + if message['name'].endswith('-conflict') + } + ) - return qid_string.split('|')[0][2:] + @property + def get_languages(self) -> List[str]: + """ + Compute a list of language identifiers from the error messages. Indicating the language which triggered the error. - def __str__(self): - return repr(self.error_msg) + :return: A list of language identifiers or an empty list + """ + return list( + { + message['parameters'][1] for message in self.messages + if message['name'].endswith('-conflict') + } + ) -class IDMissingError(Exception): - def __init__(self, value): - self.value = value + def __init__(self, error_dict: Dict[str, Any]): + super().__init__(error_dict['info']) + self.code = error_dict['code'] + self.info = error_dict['info'] + self.messages = error_dict['messages'] + self.messages_names = [message['name'] for message in error_dict['messages']] def __str__(self): - return repr(self.value) + return repr(self.info) + def __repr__(self): + """A mixin implementing a simple __repr__.""" + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string + klass=self.__class__.__name__, + id=id(self) & 0xFFFFFF, + attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), + ) -class SearchError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) +class ModificationFailed(MWApiError): + """ + When the API return a 'modification-failed' error + """ + pass -class ManualInterventionReqException(Exception): - def __init__(self, value, property_string, item_list): - self.value = value + f' Property: {property_string}, items affected: {item_list}' +class SaveFailed(MWApiError): + """ + When the API return a 'save-failed' error + """ - def __str__(self): - return repr(self.value) + def __init__(self, error_dict: Dict[str, Any]): + super().__init__(error_dict) -class CorePropIntegrityException(Exception): - def __init__(self, value): - self.value = value +class NonExistentEntityError(MWApiError): + pass - def __str__(self): - return repr(self.value) +class MaxRetriesReachedException(Exception): + pass -class MergeError(Exception): - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) +class MissingEntityException(Exception): + pass -class SearchOnlyError(Exception): - """Raised when the ItemEngine is in search_only mode""" +class SearchError(Exception): pass diff --git a/wikibaseintegrator/wbi_fastrun.py b/wikibaseintegrator/wbi_fastrun.py index 265dc519..7ce7cc31 100644 --- a/wikibaseintegrator/wbi_fastrun.py +++ b/wikibaseintegrator/wbi_fastrun.py @@ -1,59 +1,69 @@ +from __future__ import annotations + import collections import copy +import logging from collections import defaultdict from functools import lru_cache from itertools import chain +from typing import TYPE_CHECKING, Dict, List, Optional, Set, Type, Union -from wikibaseintegrator import wbi_functions +from wikibaseintegrator.datatypes import BaseDataType +from wikibaseintegrator.models import Claim from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_enums import ActionIfExists, WikibaseDatatype +from wikibaseintegrator.wbi_helpers import execute_sparql_query, format_amount + +if TYPE_CHECKING: + from wikibaseintegrator.models import Claims + +log = logging.getLogger(__name__) + +fastrun_store: List[FastRunContainer] = [] class FastRunContainer: - def __init__(self, base_data_type, engine, mediawiki_api_url=None, sparql_endpoint_url=None, wikibase_url=None, base_filter=None, use_refs=False, ref_handler=None, - case_insensitive=False, debug=False): - self.reconstructed_statements = [] - self.rev_lookup = defaultdict(set) - self.rev_lookup_ci = defaultdict(set) - self.prop_data = {} - self.loaded_langs = {} - self.statements = [] - self.base_filter = {} + def __init__(self, base_data_type: Type[BaseDataType], mediawiki_api_url: str = None, sparql_endpoint_url: str = None, wikibase_url: str = None, + base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False): + self.reconstructed_statements: List[BaseDataType] = [] + self.rev_lookup: defaultdict[str, Set[str]] = defaultdict(set) + self.rev_lookup_ci: defaultdict[str, Set[str]] = defaultdict(set) + self.prop_data: Dict[str, Dict] = {} + self.loaded_langs: Dict[str, Dict] = {} + self.base_filter: List[BaseDataType | List[BaseDataType]] = [] self.base_filter_string = '' - self.prop_dt_map = {} - self.current_qid = '' - - self.base_data_type = base_data_type - self.engine = engine - self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - self.sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if sparql_endpoint_url is None else sparql_endpoint_url - self.wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - self.use_refs = use_refs - self.ref_handler = ref_handler - self.case_insensitive = case_insensitive - self.debug = debug + self.prop_dt_map: Dict[str, str] = {} + + self.base_data_type: Type[BaseDataType] = base_data_type + self.mediawiki_api_url: str = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + self.sparql_endpoint_url: str = str(sparql_endpoint_url or config['SPARQL_ENDPOINT_URL']) + self.wikibase_url: str = str(wikibase_url or config['WIKIBASE_URL']) + self.use_refs: bool = use_refs + self.case_insensitive: bool = case_insensitive if base_filter and any(base_filter): self.base_filter = base_filter - for k, v in self.base_filter.items(): - ks = [] - if k.count('/') == 1: - ks = k.split('/') - if v: - if ks: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ' \ - '<{wb_url}/entity/{entity}> .\n'.format(wb_url=self.wikibase_url, prop_nr1=ks[0], prop_nr2=ks[1], entity=v) + for k in self.base_filter: + if isinstance(k, BaseDataType): + if k.mainsnak.datavalue: + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> {entity} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number, entity=k.get_sparql_value().format(wb_url=self.wikibase_url)) else: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> <{wb_url}/entity/{entity}> .\n'.format(wb_url=self.wikibase_url, - prop_nr=k, entity=v) - else: - if ks: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ' \ - '?zz{prop_nr1}{prop_nr2} .\n'.format(wb_url=self.wikibase_url, prop_nr1=ks[0], prop_nr2=ks[1]) + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k.mainsnak.property_number) + elif isinstance(k, list) and len(k) == 2 and isinstance(k[0], BaseDataType) and isinstance(k[1], BaseDataType): + if k[0].mainsnak.datavalue: + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}>/<{wb_url}/prop/direct/{prop_nr2}>* {entity} .\n'.format( + wb_url=self.wikibase_url, prop_nr=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number, + entity=k[0].get_sparql_value().format(wb_url=self.wikibase_url)) else: - self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr}> ?zz{prop_nr} .\n'.format(wb_url=self.wikibase_url, prop_nr=k) + self.base_filter_string += '?item <{wb_url}/prop/direct/{prop_nr1}>/<{wb_url}/prop/direct/{prop_nr2}>* ?zz{prop_nr1}{prop_nr2} .\n'.format( + wb_url=self.wikibase_url, prop_nr1=k[0].mainsnak.property_number, prop_nr2=k[1].mainsnak.property_number) + else: + raise ValueError("base_filter must be an instance of BaseDataType or a list of instances of BaseDataType") - def reconstruct_statements(self, qid: str) -> list: - reconstructed_statements = [] + def reconstruct_statements(self, qid: str) -> List[BaseDataType]: + reconstructed_statements: List[BaseDataType] = [] if qid not in self.prop_data: self.reconstructed_statements = reconstructed_statements @@ -68,206 +78,227 @@ def reconstruct_statements(self, qid: str) -> list: if prop not in self.prop_dt_map: self.prop_dt_map.update({prop: self.get_prop_datatype(prop)}) # reconstruct statements from frc (including unit, qualifiers, and refs) - for uid, d in dt.items(): + for _, d in dt.items(): qualifiers = [] for q in d['qual']: - f = [x for x in self.base_data_type.__subclasses__() if x.DTYPE == self.prop_dt_map[q[0]]][0] + f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[q[0]]][0] # TODO: Add support for more data type (Time, MonolingualText, GlobeCoordinate) if self.prop_dt_map[q[0]] == 'quantity': - qualifiers.append(f(q[1], prop_nr=q[0], is_qualifier=True, unit=q[2])) + qualifiers.append(f(value=q[1], prop_nr=q[0], unit=q[2])) else: - qualifiers.append(f(q[1], prop_nr=q[0], is_qualifier=True)) + qualifiers.append(f(value=q[1], prop_nr=q[0])) references = [] - for ref_id, refs in d['ref'].items(): + for _, refs in d['ref'].items(): this_ref = [] for ref in refs: - f = [x for x in self.base_data_type.__subclasses__() if x.DTYPE == self.prop_dt_map[ref[0]]][0] - this_ref.append(f(ref[1], prop_nr=ref[0], is_reference=True)) + f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[ref[0]]][0] + this_ref.append(f(value=ref[1], prop_nr=ref[0])) references.append(this_ref) - f = [x for x in self.base_data_type.__subclasses__() if x.DTYPE == self.prop_dt_map[prop_nr]][0] + f = [x for x in self.base_data_type.subclasses if x.DTYPE == self.prop_dt_map[prop_nr]][0] # TODO: Add support for more data type if self.prop_dt_map[prop_nr] == 'quantity': - reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit'])) + datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references, unit=d['unit']) + datatype.parse_sparql_value(value=d['v'], unit=d['unit']) else: - reconstructed_statements.append(f(d['v'], prop_nr=prop_nr, qualifiers=qualifiers, references=references)) + datatype = f(prop_nr=prop_nr, qualifiers=qualifiers, references=references) + datatype.parse_sparql_value(value=d['v']) + reconstructed_statements.append(datatype) # this isn't used. done for debugging purposes self.reconstructed_statements = reconstructed_statements return reconstructed_statements - def load_item(self, data: list, cqid=None) -> bool: + def get_items(self, claims: Union[List[Claim], Claims, Claim], cqid: str = None) -> Optional[Set[str]]: + """ + Get items ID from a SPARQL endpoint + + :param claims: A list of claims the entities should have + :param cqid: + :return: a list of entity ID or None + :exception: if there is more than one claim + """ match_sets = [] - for date in data: + + if isinstance(claims, Claim): + claims = [claims] + elif (not isinstance(claims, list) or not all(isinstance(n, Claim) for n in claims)) and not isinstance(claims, Claims): + raise ValueError("claims must be an instance of Claim or Claims or a list of Claim") + + for claim in claims: # skip to next if statement has no value or no data type defined, e.g. for deletion objects - current_value = date.get_value() - if not current_value and not date.data_type: + if not claim.mainsnak.datavalue and not claim.mainsnak.datatype: continue - prop_nr = date.get_prop_nr() + prop_nr = claim.mainsnak.property_number if prop_nr not in self.prop_dt_map: - if self.debug: - print(f"{prop_nr} not found in fastrun") - self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) - self._query_data(prop_nr=prop_nr, use_units=date.data_type == 'quantity') - - # more sophisticated data types like dates and globe coordinates need special treatment here - if self.prop_dt_map[prop_nr] == 'time': - current_value = current_value[0] - elif self.prop_dt_map[prop_nr] == 'wikibase-item': - if not str(current_value).startswith('Q'): - current_value = f'Q{current_value}' - elif self.prop_dt_map[prop_nr] == 'quantity': - current_value = self.format_amount(current_value[0]) - - if self.debug: - print(current_value) + log.debug("%s not found in fastrun", prop_nr) + + if isinstance(claim, BaseDataType) and type(claim) != BaseDataType: # pylint: disable=unidiomatic-typecheck + self.prop_dt_map.update({prop_nr: claim.DTYPE}) + else: + self.prop_dt_map.update({prop_nr: self.get_prop_datatype(prop_nr)}) + self._query_data(prop_nr=prop_nr, use_units=self.prop_dt_map[prop_nr] == 'quantity') + + # noinspection PyProtectedMember + current_value = claim.get_sparql_value() + + if self.prop_dt_map[prop_nr] == 'wikibase-item': + current_value = claim.mainsnak.datavalue['value']['id'] + + log.debug(current_value) + # if self.case_insensitive: + # log.debug("case insensitive enabled") + # log.debug(self.rev_lookup_ci) + # else: + # log.debug(self.rev_lookup) if current_value in self.rev_lookup: # quick check for if the value has ever been seen before, if not, write required - temp_set = set(self.rev_lookup[current_value]) + match_sets.append(set(self.rev_lookup[current_value])) elif self.case_insensitive and current_value.casefold() in self.rev_lookup_ci: - temp_set = set(self.rev_lookup_ci[current_value.casefold()]) + match_sets.append(set(self.rev_lookup_ci[current_value.casefold()])) else: - if self.debug: - if self.case_insensitive: - print("case insensitive enabled") - print(self.rev_lookup_ci) - else: - print(self.rev_lookup) - print("no matches for rev lookup") - return True - match_sets.append(temp_set) + log.debug("no matches for rev lookup for %s", current_value) + + if not match_sets: + return None if cqid: matching_qids = {cqid} else: matching_qids = match_sets[0].intersection(*match_sets[1:]) + return matching_qids + + def get_item(self, claims: Union[List[Claim], Claims, Claim], cqid: str = None) -> Optional[str]: + """ + + :param claims: A list of claims the entity should have + :param cqid: + :return: An entity ID, None if there is more than one. + """ + + matching_qids: Optional[Set[str]] = self.get_items(claims=claims, cqid=cqid) + + if matching_qids is None: + return None + # check if there are any items that have all of these values # if not, a write is required no matter what if not len(matching_qids) == 1: - if self.debug: - print(f"no matches ({len(matching_qids)})") - return True + log.debug("no matches (%s)", len(matching_qids)) + return None - qid = matching_qids.pop() - self.current_qid = qid + return matching_qids.pop() - def write_required(self, data: list, cqid=None) -> bool: + def write_required(self, data: List[Claim], action_if_exists: ActionIfExists = ActionIfExists.REPLACE_ALL, cqid: str = None) -> bool: + """ + Check if a write is required + + :param data: + :param action_if_exists: + :param cqid: + :return: Return True if the write is required + """ del_props = set() data_props = set() - append_props = [x.get_prop_nr() for x in data if 'APPEND' in x.if_exists] + append_props = [] + if action_if_exists == ActionIfExists.APPEND_OR_REPLACE: + append_props = [x.mainsnak.property_number for x in data] for x in data: - if x.value and x.data_type: - data_props.add(x.get_prop_nr()) - write_required = False - self.load_item(data, cqid) + if x.mainsnak.datavalue and x.mainsnak.datatype: + data_props.add(x.mainsnak.property_number) + qid = self.get_item(data, cqid) - reconstructed_statements = self.reconstruct_statements(self.current_qid) + if not qid: + return True + + reconstructed_statements = self.reconstruct_statements(qid) tmp_rs = copy.deepcopy(reconstructed_statements) # handle append properties for p in append_props: - app_data = [x for x in data if x.get_prop_nr() == p] # new statements - rec_app_data = [x for x in tmp_rs if x.get_prop_nr() == p] # orig statements + app_data = [x for x in data if x.mainsnak.property_number == p] # new statements + rec_app_data = [x for x in tmp_rs if x.mainsnak.property_number == p] # orig statements comp = [] for x in app_data: for y in rec_app_data: - if x.get_value() == y.get_value(): - if self.use_refs and self.ref_handler: - to_be = copy.deepcopy(y) - self.ref_handler(to_be, x) - else: - to_be = x - if y.equals(to_be, include_ref=self.use_refs) and x.if_exists != 'FORCE_APPEND': + if x.mainsnak.datavalue == y.mainsnak.datavalue: + if y.equals(x, include_ref=self.use_refs) and action_if_exists != ActionIfExists.FORCE_APPEND: comp.append(True) # comp = [True for x in app_data for y in rec_app_data if x.equals(y, include_ref=self.use_refs)] if len(comp) != len(app_data): - if self.debug: - print(f"failed append: {p}") + log.debug("failed append: %s", p) return True - tmp_rs = [x for x in tmp_rs if x.get_prop_nr() not in append_props and x.get_prop_nr() in data_props] + tmp_rs = [x for x in tmp_rs if x.mainsnak.property_number not in append_props and x.mainsnak.property_number in data_props] for date in data: # ensure that statements meant for deletion get handled properly - reconst_props = {x.get_prop_nr() for x in tmp_rs} - if (not date.value or not date.data_type) and date.get_prop_nr() in reconst_props: - if self.debug: - print("returned from delete prop handling") + reconst_props = {x.mainsnak.property_number for x in tmp_rs} + if not date.mainsnak.datatype and date.mainsnak.property_number in reconst_props: + log.debug("returned from delete prop handling") return True - elif not date.value or not date.data_type: + + if not date.mainsnak.datavalue or not date.mainsnak.datatype: # Ignore the deletion statements which are not in the reconstructed statements. continue - if date.get_prop_nr() in append_props: + if date.mainsnak.property_number in append_props: # TODO: check if value already exist and already have the same value continue - if not date.get_value() and not date.data_type: - del_props.add(date.get_prop_nr()) + if not date.mainsnak.datavalue and not date.mainsnak.datatype: + del_props.add(date.mainsnak.property_number) # this is where the magic happens # date is a new statement, proposed to be written # tmp_rs are the reconstructed statements == current state of the item bool_vec = [] for x in tmp_rs: - if (x.get_value() == date.get_value() or ( - self.case_insensitive and x.get_value().casefold() == date.get_value().casefold())) and x.get_prop_nr() not in del_props: - if self.use_refs and self.ref_handler and callable(self.ref_handler): - to_be = copy.deepcopy(x) - self.ref_handler(to_be, date) - else: - to_be = date - if x.equals(to_be, include_ref=self.use_refs): - bool_vec.append(True) - else: - bool_vec.append(False) + if (x == date or (self.case_insensitive and x.mainsnak.datavalue.casefold() == date.mainsnak.datavalue.casefold())) and x.mainsnak.property_number not in del_props: + bool_vec.append(x.equals(date, include_ref=self.use_refs)) else: bool_vec.append(False) - """ - bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and - x.get_prop_nr() not in del_props for x in tmp_rs] - """ - - if self.debug: - print(f"bool_vec: {bool_vec}") - print("-----------------------------------") - for x in tmp_rs: - if date == x and x.get_prop_nr() not in del_props: - print(x.get_prop_nr(), x.get_value(), [z.get_value() for z in x.get_qualifiers()]) - print(date.get_prop_nr(), date.get_value(), [z.get_value() for z in date.get_qualifiers()]) - elif x.get_prop_nr() == date.get_prop_nr(): - print(x.get_prop_nr(), x.get_value(), [z.get_value() for z in x.get_qualifiers()]) - print(date.get_prop_nr(), date.get_value(), [z.get_value() for z in date.get_qualifiers()]) + # bool_vec = [x.equals(date, include_ref=self.use_refs, fref=self.ref_comparison_f) and + # x.mainsnak.property_number not in del_props for x in tmp_rs] + + log.debug("bool_vec: %s", bool_vec) + log.debug("-----------------------------------") + for x in tmp_rs: + if x == date and x.mainsnak.property_number not in del_props: + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]]) + log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]]) + elif x.mainsnak.property_number == date.mainsnak.property_number: + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.datavalue for z in x.qualifiers]]) + log.debug([date.mainsnak.property_number, date.mainsnak.datavalue, [z.datavalue for z in date.qualifiers]]) if not any(bool_vec): - if self.debug: - print(len(bool_vec)) - print("fast run failed at", date.get_prop_nr()) - write_required = True - else: - if self.debug: - print("fast run success") - tmp_rs.pop(bool_vec.index(True)) + log.debug(len(bool_vec)) + log.debug("fast run failed at %s", date.mainsnak.property_number) + return True + + log.debug("fast run success") + tmp_rs.pop(bool_vec.index(True)) if len(tmp_rs) > 0: - if self.debug: - print("failed because not zero") - for x in tmp_rs: - print("xxx", x.get_prop_nr(), x.get_value(), [z.get_value() for z in x.get_qualifiers()]) - print("failed because not zero--END") - write_required = True - return write_required + log.debug("failed because not zero") + for x in tmp_rs: + log.debug([x.mainsnak.property_number, x.mainsnak.datavalue, [z.mainsnak.datavalue for z in x.qualifiers]]) + log.debug("failed because not zero--END") + return True + + return False def init_language_data(self, lang: str, lang_data_type: str) -> None: """ Initialize language data store + :param lang: language code :param lang_data_type: 'label', 'description' or 'aliases' :return: None @@ -277,12 +308,14 @@ def init_language_data(self, lang: str, lang_data_type: str) -> None: if lang_data_type not in self.loaded_langs[lang]: result = self._query_lang(lang=lang, lang_data_type=lang_data_type) - data = self._process_lang(result) - self.loaded_langs[lang].update({lang_data_type: data}) + if result is not None: + data = self._process_lang(result=result) + self.loaded_langs[lang].update({lang_data_type: data}) - def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: + def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> List[str]: """ get language data for specified qid + :param qid: Wikibase item id :param lang: language code :param lang_data_type: 'label', 'description' or 'aliases' @@ -300,33 +333,32 @@ def get_language_data(self, qid: str, lang: str, lang_data_type: str) -> list: all_lang_strings = [''] return all_lang_strings - def check_language_data(self, qid: str, lang_data: list, lang: str, lang_data_type: str, if_exists: str = 'APPEND') -> bool: + def check_language_data(self, qid: str, lang_data: List, lang: str, lang_data_type: str, action_if_exists: ActionIfExists = ActionIfExists.APPEND_OR_REPLACE) -> bool: """ Method to check if certain language data exists as a label, description or aliases :param qid: Wikibase item id :param lang_data: list of string values to check :param lang: language code :param lang_data_type: What kind of data is it? 'label', 'description' or 'aliases'? - :param if_exists: If aliases already exist, APPEND or REPLACE + :param action_if_exists: If aliases already exist, APPEND_OR_REPLACE or REPLACE_ALL :return: boolean """ all_lang_strings = {x.strip().casefold() for x in self.get_language_data(qid, lang, lang_data_type)} - if if_exists == 'REPLACE': - return not collections.Counter(all_lang_strings) == collections.Counter(map(lambda x: x.casefold(), lang_data)) - else: - for s in lang_data: - if s.strip().casefold() not in all_lang_strings: - if self.debug: - print(f"fastrun failed at: {lang_data_type}, string: {s}") - return True + if action_if_exists == ActionIfExists.REPLACE_ALL: + return collections.Counter(all_lang_strings) != collections.Counter(map(lambda x: x.casefold(), lang_data)) + + for s in lang_data: + if s.strip().casefold() not in all_lang_strings: + log.debug("fastrun failed at: %s, string: %s", lang_data_type, s) + return True return False - def get_all_data(self) -> dict: + def get_all_data(self) -> Dict[str, Dict]: return self.prop_data - def format_query_results(self, r: list, prop_nr: str) -> None: + def format_query_results(self, r: List, prop_nr: str) -> None: """ `r` is the results of the sparql query in _query_data and is modified in place `prop_nr` is needed to get the property datatype to determine how to format the value @@ -345,7 +377,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: """ prop_dt = self.get_prop_datatype(prop_nr) for i in r: - for value in {'item', 'sid', 'pq', 'pr', 'ref', 'unit', 'qunit'}: + for value in ['item', 'sid', 'pq', 'pr', 'ref', 'unit', 'qunit']: if value in i: if i[value]['value'].startswith(self.wikibase_url): i[value] = i[value]['value'].split('/')[-1] @@ -359,7 +391,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: # make sure datetimes are formatted correctly. # the correct format is '+%Y-%m-%dT%H:%M:%SZ', but is sometimes missing the plus?? # some difference between RDF and xsd:dateTime that I don't understand - for value in {'v', 'qval', 'rval'}: + for value in ['v', 'qval', 'rval']: if value in i: if i[value].get("datatype") == 'http://www.w3.org/2001/XMLSchema#dateTime' and not i[value]['value'][0] in '+-': # if it is a dateTime and doesn't start with plus or minus, add a plus @@ -371,13 +403,19 @@ def format_query_results(self, r: list, prop_nr: str) -> None: if i['v']['type'] == 'uri' and prop_dt == 'wikibase-item': i['v'] = i['v']['value'].split('/')[-1] elif i['v']['type'] == 'literal' and prop_dt == 'quantity': - i['v'] = self.format_amount(i['v']['value']) + i['v'] = format_amount(i['v']['value']) + elif i['v']['type'] == 'literal' and prop_dt == 'monolingualtext': + f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr, text=i['v']['value'], language=i['v']['xml:lang']) + i['v'] = f.get_sparql_value() else: - i['v'] = i['v']['value'] + f = [x for x in self.base_data_type.subclasses if x.DTYPE == prop_dt][0](prop_nr=prop_nr) + if not f.parse_sparql_value(value=i['v']['value'], type=i['v']['type']): + raise ValueError("Can't parse the value with parse_sparql_value()") + i['v'] = f.get_sparql_value() # Note: no-value and some-value don't actually show up in the results here # see for example: select * where { wd:Q7207 p:P40 ?c . ?c ?d ?e } - if type(i['v']) is not dict: + if not isinstance(i['v'], dict): self.rev_lookup[i['v']].add(i['item']) if self.case_insensitive: self.rev_lookup_ci[i['v'].casefold()].add(i['item']) @@ -388,7 +426,7 @@ def format_query_results(self, r: list, prop_nr: str) -> None: if i['qval']['type'] == 'uri' and qual_prop_dt == 'wikibase-item': i['qval'] = i['qval']['value'].split('/')[-1] elif i['qval']['type'] == 'literal' and qual_prop_dt == 'quantity': - i['qval'] = self.format_amount(i['qval']['value']) + i['qval'] = format_amount(i['qval']['value']) else: i['qval'] = i['qval']['value'] @@ -397,23 +435,12 @@ def format_query_results(self, r: list, prop_nr: str) -> None: ref_prop_dt = self.get_prop_datatype(prop_nr=i['pr']) if i['rval']['type'] == 'uri' and ref_prop_dt == 'wikibase-item': i['rval'] = i['rval']['value'].split('/')[-1] + elif i['rval']['type'] == 'literal' and ref_prop_dt == 'quantity': + i['rval'] = format_amount(i['rval']['value']) else: i['rval'] = i['rval']['value'] - @staticmethod - def format_amount(amount) -> str: - # Remove .0 by casting to int - if float(amount) % 1 == 0: - amount = int(float(amount)) - - # Adding prefix + for positive number and 0 - if not str(amount).startswith('+') and float(amount) >= 0: - amount = str(f'+{amount}') - - # return as string - return str(amount) - - def update_frc_from_query(self, r: list, prop_nr: str) -> None: + def update_frc_from_query(self, r: List, prop_nr: str) -> None: # r is the output of format_query_results # this updates the frc from the query (result of _query_data) for i in r: @@ -448,26 +475,9 @@ def update_frc_from_query(self, r: list, prop_nr: str) -> None: if 'unit' in i: self.prop_data[qid][prop_nr][i['sid']]['unit'] = i['unit'] - def _query_data(self, prop_nr: str, use_units=False) -> None: - page_size = 10000 + def _query_data(self, prop_nr: str, use_units: bool = False, page_size: int = 10000) -> None: page_count = 0 - num_pages = None - if self.debug: - # get the number of pages/queries so we can show a progress bar - query = f""" - SELECT (COUNT(?item) as ?c) where {{ - {self.base_filter_string} - ?item <{self.wikibase_url}/prop/{prop_nr}> ?sid . - }}""" - - if self.debug: - print(query) - - r = wbi_functions.execute_sparql_query(query, endpoint=self.sparql_endpoint_url)['results']['bindings'] - count = int(r[0]['c']['value']) - print(f"Count: {count}") - num_pages = (int(count) // page_size) + 1 - print(f"Query {prop_nr}: {page_count}/{num_pages}") + while True: # Query header query = ''' @@ -480,7 +490,7 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: # Base filter query += ''' {base_filter} - + ?item <{wb_url}/prop/{prop_nr}> ?sid . ''' @@ -505,25 +515,38 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: ''' # Qualifiers - query += ''' - # Get qualifiers - OPTIONAL - {{ - {{ - # Get simple values for qualifiers which are not of type quantity - ?sid ?propQualifier ?qval . - ?pq wikibase:qualifier ?propQualifier . - ?pq wikibase:propertyType ?qualifer_property_type . - FILTER (?qualifer_property_type != wikibase:Quantity) - }} - UNION - {{ - # Get amount and unit for qualifiers of type quantity - ?sid ?pqv [wikibase:quantityAmount ?qval; wikibase:quantityUnit ?qunit] . - ?pq wikibase:qualifierValue ?pqv . - }} - }} - ''' + # Amount and unit + if use_units: + query += ''' + # Get qualifiers + OPTIONAL + {{ + {{ + # Get simple values for qualifiers which are not of type quantity + ?sid ?propQualifier ?qval . + ?pq wikibase:qualifier ?propQualifier . + ?pq wikibase:propertyType ?qualifer_property_type . + FILTER (?qualifer_property_type != wikibase:Quantity) + }} + UNION + {{ + # Get amount and unit for qualifiers of type quantity + ?sid ?pqv [wikibase:quantityAmount ?qval; wikibase:quantityUnit ?qunit] . + ?pq wikibase:qualifierValue ?pqv . + }} + }} + ''' + else: + query += ''' + # Get qualifiers + OPTIONAL + {{ + # Get simple values for qualifiers + ?sid ?propQualifier ?qval . + ?pq wikibase:qualifier ?propQualifier . + ?pq wikibase:propertyType ?qualifer_property_type . + }} + ''' # References if self.use_refs: @@ -543,19 +566,15 @@ def _query_data(self, prop_nr: str, use_units=False) -> None: # Format the query query = query.format(wb_url=self.wikibase_url, base_filter=self.base_filter_string, prop_nr=prop_nr, offset=str(page_count * page_size), page_size=str(page_size)) - if self.debug: - print(query) - - results = wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + results = execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] self.format_query_results(results, prop_nr) self.update_frc_from_query(results, prop_nr) page_count += 1 - if num_pages: - print(f"Query {prop_nr}: {page_count}/{num_pages}") + if len(results) == 0 or len(results) < page_size: break - def _query_lang(self, lang: str, lang_data_type: str): + def _query_lang(self, lang: str, lang_data_type: str) -> Optional[List[Dict[str, Dict]]]: """ :param lang: @@ -568,24 +587,23 @@ def _query_lang(self, lang: str, lang_data_type: str): 'aliases': 'skos:altLabel' } - query = ''' + query = f''' #Tool: WikibaseIntegrator wbi_fastrun._query_lang SELECT ?item ?label WHERE {{ - {base_filter} + {self.base_filter_string} OPTIONAL {{ - ?item {lang_data_type} ?label FILTER (lang(?label) = "{lang}") . + ?item {lang_data_type_dict[lang_data_type]} ?label FILTER (lang(?label) = "{lang}") . }} }} - '''.format(base_filter=self.base_filter_string, lang_data_type=lang_data_type_dict[lang_data_type], lang=lang) + ''' - if self.debug: - print(query) + log.debug(query) - return wbi_functions.execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] + return execute_sparql_query(query=query, endpoint=self.sparql_endpoint_url)['results']['bindings'] @staticmethod - def _process_lang(result: list): + def _process_lang(result: List) -> defaultdict[str, set]: data = defaultdict(set) for r in result: qid = r['item']['value'].split("/")[-1] @@ -594,10 +612,14 @@ def _process_lang(result: list): return data @lru_cache(maxsize=100000) - def get_prop_datatype(self, prop_nr: str) -> str: - item = self.engine(item_id=prop_nr, sparql_endpoint_url=self.sparql_endpoint_url, mediawiki_api_url=self.mediawiki_api_url, wikibase_url=self.wikibase_url, - debug=self.debug) - return item.entity_metadata['datatype'] + def get_prop_datatype(self, prop_nr: str) -> Optional[str]: # pylint: disable=no-self-use + from wikibaseintegrator import WikibaseIntegrator + wbi = WikibaseIntegrator() + property = wbi.property.get(prop_nr) + datatype = property.datatype + if isinstance(datatype, WikibaseDatatype): + return datatype.value + return datatype def clear(self) -> None: """ @@ -610,8 +632,32 @@ def clear(self) -> None: def __repr__(self) -> str: """A mixin implementing a simple __repr__.""" - return "<{klass} @{id:x} {attrs}>".format( + return "<{klass} @{id:x} {attrs}>".format( # pylint: disable=consider-using-f-string klass=self.__class__.__name__, id=id(self) & 0xFFFFFF, attrs="\r\n\t ".join(f"{k}={v!r}" for k, v in self.__dict__.items()), ) + + +def get_fastrun_container(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: + if base_filter is None: + base_filter = [] + + # We search if we already have a FastRunContainer with the same parameters to re-use it + fastrun_container = _search_fastrun_store(base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) + + return fastrun_container + + +def _search_fastrun_store(base_filter: List[BaseDataType | List[BaseDataType]] = None, use_refs: bool = False, case_insensitive: bool = False) -> FastRunContainer: + for fastrun in fastrun_store: + if (fastrun.base_filter == base_filter) and (fastrun.use_refs == use_refs) and (fastrun.case_insensitive == case_insensitive) and ( + fastrun.sparql_endpoint_url == config['SPARQL_ENDPOINT_URL']): + return fastrun + + # In case nothing was found in the fastrun_store + log.info("Create a new FastRunContainer") + + fastrun_container = FastRunContainer(base_data_type=BaseDataType, base_filter=base_filter, use_refs=use_refs, case_insensitive=case_insensitive) + fastrun_store.append(fastrun_container) + return fastrun_container diff --git a/wikibaseintegrator/wbi_functions.py b/wikibaseintegrator/wbi_functions.py deleted file mode 100644 index 41e08f31..00000000 --- a/wikibaseintegrator/wbi_functions.py +++ /dev/null @@ -1,395 +0,0 @@ -import datetime -from time import sleep -from warnings import warn - -import requests - -from wikibaseintegrator import wbi_login -from wikibaseintegrator.wbi_backoff import wbi_backoff -from wikibaseintegrator.wbi_config import config -from wikibaseintegrator.wbi_exceptions import MWApiError, SearchError - - -def mediawiki_api_call(method, mediawiki_api_url=None, session=None, max_retries=1000, retry_after=60, **kwargs): - """ - :param method: 'GET' or 'POST' - :param mediawiki_api_url: - :param session: If a session is passed, it will be used. Otherwise a new requests session is created - :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to - `max_retries` times - :type max_retries: int - :param retry_after: Number of seconds to wait before retrying request (see max_retries) - :type retry_after: int - :param kwargs: Passed to requests.request - :return: - """ - - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - - # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) - - if 'data' in kwargs and kwargs['data']: - if 'format' not in kwargs['data']: - kwargs['data'].update({'format': 'json'}) - elif kwargs['data']['format'] != 'json': - raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") - - response = None - session = session if session else requests.session() - for n in range(max_retries): - try: - response = session.request(method, mediawiki_api_url, **kwargs) - except requests.exceptions.ConnectionError as e: - print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") - sleep(retry_after) - continue - if response.status_code in (500, 502, 503, 504): - print(f"Service unavailable (HTTP Code {response.status_code}). Sleeping for {retry_after} seconds.") - sleep(retry_after) - continue - - response.raise_for_status() - json_data = response.json() - """ - Mediawiki api response has code = 200 even if there are errors. - rate limit doesn't return HTTP 429 either. may in the future - https://phabricator.wikimedia.org/T172293 - """ - if 'error' in json_data: - # rate limiting - error_msg_names = set() - if 'messages' in json_data['error']: - error_msg_names = {x.get('name') for x in json_data['error']['messages']} - if 'actionthrottledtext' in error_msg_names: - sleep_sec = int(response.headers.get('retry-after', retry_after)) - print(f"{datetime.datetime.utcnow()}: rate limited. sleeping for {sleep_sec} seconds") - sleep(sleep_sec) - continue - - # maxlag - if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': - sleep_sec = json_data['error'].get('lag', retry_after) - print(f"{datetime.datetime.utcnow()}: maxlag. sleeping for {sleep_sec} seconds") - sleep(sleep_sec) - continue - - # readonly - if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': - print(f'The Wikibase instance is currently in readonly mode, waiting for {retry_after} seconds') - sleep(retry_after) - continue - - # others case - raise MWApiError(response.json() if response else {}) - - # there is no error or waiting. break out of this loop and parse response - break - else: - # the first time I've ever used for - else!! - # else executes if the for loop completes normally. i.e. does not encouter a `break` - # in this case, that means it tried this api call 10 times - raise MWApiError(response.json() if response else {}) - - return json_data - - -def mediawiki_api_call_helper(data, login=None, mediawiki_api_url=None, user_agent=None, allow_anonymous=False, max_retries=1000, retry_after=60): - mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if not allow_anonymous: - if login is None: - # Force allow_anonymous as False by default to ask for a login object - raise ValueError("allow_anonymous can't be False and login is None at the same time.") - elif mediawiki_api_url != login.mediawiki_api_url: - raise ValueError("mediawiki_api_url can't be different with the one in the login object.") - - headers = { - 'User-Agent': user_agent - } - - if data is not None: - if login is not None and 'token' not in data: - data.update({'token': login.get_edit_token()}) - - if not allow_anonymous: - # Always assert user if allow_anonymous is False - if 'assert' not in data: - data.update({'assert': 'user'}) - if 'token' in data and data['token'] == '+\\': - raise wbi_login.LoginError( - "Anonymous edit are not allowed by default. Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") - elif 'assert' not in data: - # Always assert anon if allow_anonymous is True - data.update({'assert': 'anon'}) - - login_session = login.get_session() if login is not None else None - - return mediawiki_api_call('POST', mediawiki_api_url, login_session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after) - - -@wbi_backoff() -def execute_sparql_query(query, prefix=None, endpoint=None, user_agent=None, max_retries=1000, retry_after=60, debug=False): - """ - Static method which can be used to execute any SPARQL query - :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes - :param query: The actual SPARQL query string - :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - :param max_retries: The number time this function should retry in case of header reports. - :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. - :param debug: Enable debug output. - :type debug: boolean - :return: The results of the query are returned in JSON format - """ - - sparql_endpoint_url = config['SPARQL_ENDPOINT_URL'] if endpoint is None else endpoint - user_agent = config['USER_AGENT_DEFAULT'] if user_agent is None else user_agent - - if prefix: - query = prefix + '\n' + query - - params = { - 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, - 'format': 'json' - } - - headers = { - 'Accept': 'application/sparql-results+json', - 'User-Agent': user_agent - } - - if debug: - print(params['query']) - - for n in range(max_retries): - try: - response = requests.post(sparql_endpoint_url, params=params, headers=headers) - except requests.exceptions.ConnectionError as e: - print(f"Connection error: {e}. Sleeping for {retry_after} seconds.") - sleep(retry_after) - continue - if response.status_code == 503: - print(f"Service unavailable (503). Sleeping for {retry_after} seconds") - sleep(retry_after) - continue - if response.status_code == 429: - if 'retry-after' in response.headers.keys(): - retry_after = response.headers['retry-after'] - print(f"Too Many Requests (429). Sleeping for {retry_after} seconds") - sleep(retry_after) - continue - response.raise_for_status() - results = response.json() - - return results - - -def merge_items(from_id, to_id, ignore_conflicts='', mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): - """ - A static method to merge two items - :param from_id: The QID which should be merged into another item - :type from_id: string with 'Q' prefix - :param to_id: The QID into which another item should be merged - :type to_id: string with 'Q' prefix - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param ignore_conflicts: A string with the values 'description', 'statement' or 'sitelink', separated by a pipe ('|') if using more than one of those. - :type ignore_conflicts: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ - - params = { - 'action': 'wbmergeitems', - 'fromid': from_id, - 'toid': to_id, - 'format': 'json', - 'bot': '', - 'ignoreconflicts': ignore_conflicts - } - - if config['MAXLAG'] > 0: - params.update({'maxlag': config['MAXLAG']}) - - return mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - -def remove_claims(claim_id, summary=None, revision=None, mediawiki_api_url=None, login=None, allow_anonymous=False, user_agent=None): - """ - Delete an item - :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. - :type claim_id: string - :param summary: Summary for the edit. Will be prepended by an automatically generated comment. - :type summary: str - :param revision: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. - :type revision: str - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. - :type user_agent: str - """ - - params = { - 'action': 'wbremoveclaims', - 'claim': claim_id, - 'summary': summary, - 'baserevid': revision, - 'bot': True, - 'format': 'json' - } - - if config['MAXLAG'] > 0: - params.update({'maxlag': config['MAXLAG']}) - - return mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - -def search_entities(search_string, language=None, strict_language=True, search_type='item', mediawiki_api_url=None, max_results=500, dict_result=False, login=None, - allow_anonymous=True, user_agent=None): - """ - Performs a search for entities in the Wikibase instance using labels and aliases. - :param search_string: a string which should be searched for in the Wikibase instance (labels and aliases) - :type search_string: str - :param language: The language in which to perform the search. - :type language: str - :param strict_language: Whether to disable language fallback - :type strict_language: bool - :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense - :type search_type: str - :param mediawiki_api_url: Specify the mediawiki_api_url. - :type mediawiki_api_url: str - :param max_results: The maximum number of search results returned. Default 500 - :type max_results: int - :param dict_result: - :type dict_result: boolean - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - :param user_agent: The user agent string transmitted in the http header - :type user_agent: str - :return: list - """ - - language = config['DEFAULT_LANGUAGE'] if language is None else language - - params = { - 'action': 'wbsearchentities', - 'search': search_string, - 'language': language, - 'strict_language': strict_language, - 'type': search_type, - 'limit': 50, - 'format': 'json' - } - - cont_count = 0 - results = [] - - while True: - params.update({'continue': cont_count}) - - search_results = mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, - allow_anonymous=allow_anonymous) - - if search_results['success'] != 1: - raise SearchError('Wikibase API wbsearchentities failed') - else: - for i in search_results['search']: - if dict_result: - description = i['description'] if 'description' in i else None - aliases = i['aliases'] if 'aliases' in i else None - results.append({ - 'id': i['id'], - 'label': i['label'], - 'match': i['match'], - 'description': description, - 'aliases': aliases - }) - else: - results.append(i['id']) - - if 'search-continue' not in search_results: - break - else: - cont_count = search_results['search-continue'] - - if cont_count >= max_results: - break - - return results - - -def generate_item_instances(items, mediawiki_api_url=None, login=None, allow_anonymous=True, user_agent=None): - """ - A method which allows for retrieval of a list of Wikidata items or properties. The method generates a list of - tuples where the first value in the tuple is the QID or property ID, whereas the second is the new instance of - ItemEngine containing all the data of the item. This is most useful for mass retrieval of items. - :param user_agent: A custom user agent - :type user_agent: str - :param items: A list of QIDs or property IDs - :type items: list - :param mediawiki_api_url: The MediaWiki url which should be used - :type mediawiki_api_url: str - :return: A list of tuples, first value in the tuple is the QID or property ID string, second value is the instance of ItemEngine with the corresponding - item data. - :param login: The object containing the login credentials and cookies. An instance of wbi_login.Login. - :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. - :type allow_anonymous: bool - """ - - assert type(items) == list - - from wikibaseintegrator.wbi_core import ItemEngine - - params = { - 'action': 'wbgetentities', - 'ids': '|'.join(items), - 'format': 'json' - } - - reply = mediawiki_api_call_helper(data=params, login=login, mediawiki_api_url=mediawiki_api_url, user_agent=user_agent, allow_anonymous=allow_anonymous) - - item_instances = [] - for qid, v in reply['entities'].items(): - ii = ItemEngine(item_id=qid, item_data=v) - ii.mediawiki_api_url = mediawiki_api_url - item_instances.append((qid, ii)) - - return item_instances - - -def get_distinct_value_props(sparql_endpoint_url=None, wikibase_url=None, property_constraint_pid=None, distinct_values_constraint_qid=None): - """ - On wikidata, the default core IDs will be the properties with a distinct values constraint select ?p where {?p wdt:P2302 wd:Q21502410} - See: https://www.wikidata.org/wiki/Help:Property_constraints_portal - https://www.wikidata.org/wiki/Help:Property_constraints_portal/Unique_value - """ - - wikibase_url = config['WIKIBASE_URL'] if wikibase_url is None else wikibase_url - property_constraint_pid = config['PROPERTY_CONSTRAINT_PID'] if property_constraint_pid is None else property_constraint_pid - distinct_values_constraint_qid = config['DISTINCT_VALUES_CONSTRAINT_QID'] if distinct_values_constraint_qid is None else distinct_values_constraint_qid - - pcpid = property_constraint_pid - dvcqid = distinct_values_constraint_qid - - query = ''' - SELECT ?p WHERE {{ - ?p <{wb_url}/prop/direct/{prop_nr}> <{wb_url}/entity/{entity}> - }} - '''.format(wb_url=wikibase_url, prop_nr=pcpid, entity=dvcqid) - results = execute_sparql_query(query, endpoint=sparql_endpoint_url)['results']['bindings'] - if not results: - warn("Warning: No distinct value properties found\n" + - "Please set P2302 and Q21502410 in your Wikibase or set `core_props` manually.\n" + - "Continuing with no core_props") - return set() - else: - return set(map(lambda x: x['p']['value'].rsplit('/', 1)[-1], results)) diff --git a/wikibaseintegrator/wbi_helpers.py b/wikibaseintegrator/wbi_helpers.py new file mode 100644 index 00000000..37ac4e34 --- /dev/null +++ b/wikibaseintegrator/wbi_helpers.py @@ -0,0 +1,551 @@ +""" +Multiple functions or classes that can be used to interact with the Wikibase instance. +""" +from __future__ import annotations + +import datetime +import logging +from time import sleep +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union +from urllib.parse import urlparse + +import requests +from requests import Session + +from wikibaseintegrator.wbi_backoff import wbi_backoff +from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_exceptions import MaxRetriesReachedException, ModificationFailed, MWApiError, NonExistentEntityError, SearchError + +if TYPE_CHECKING: + from wikibaseintegrator.entities.baseentity import BaseEntity + from wikibaseintegrator.wbi_login import _Login + +log = logging.getLogger(__name__) + +helpers_session = requests.Session() + + +class BColors: + """ + Default colors for pretty outputs. + """ + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + + +# Session used for all anonymous requests +default_session = requests.Session() + + +def mediawiki_api_call(method: str, mediawiki_api_url: str = None, session: Session = None, max_retries: int = 100, retry_after: int = 60, **kwargs: Any) -> Dict: + """ + A function to call the MediaWiki API. + + :param method: 'GET' or 'POST' + :param mediawiki_api_url: + :param session: If a session is passed, it will be used. Otherwise, a new requests session is created + :param max_retries: If api request fails due to rate limiting, maxlag, or readonly mode, retry up to `max_retries` times + :param retry_after: Number of seconds to wait before retrying request (see max_retries) + :param kwargs: Any additional keyword arguments to pass to requests.request + :return: The data returned by the API as a dictionary + """ + + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + + # TODO: Add support for 'multipart/form-data' when using POST (https://www.mediawiki.org/wiki/API:Edit#Large_edits) + + if 'data' in kwargs and kwargs['data']: + if 'format' not in kwargs['data']: + kwargs['data'].update({'format': 'json'}) + elif kwargs['data']['format'] != 'json': + raise ValueError("'format' can only be 'json' when using mediawiki_api_call()") + + response = None + session = session if session else default_session + for n in range(max_retries): + try: + response = session.request(method=method, url=mediawiki_api_url, **kwargs) + except requests.exceptions.ConnectionError as e: + logging.exception("Connection error: %s. Sleeping for %d seconds.", e, retry_after) + sleep(retry_after) + continue + if response.status_code in (500, 502, 503, 504): + logging.error("Service unavailable (HTTP Code %d). Sleeping for %d seconds.", response.status_code, retry_after) + sleep(retry_after) + continue + + response.raise_for_status() + json_data = response.json() + # MediaWiki api response has code = 200 even if there are errors. + # Rate limit doesn't return HTTP 429 either, may in the future. + # https://phabricator.wikimedia.org/T172293 + if 'error' in json_data: + # rate limiting + if 'messages' in json_data['error'] and 'actionthrottledtext' in [message['name'] for message in json_data['error']['messages']]: # pragma: no cover + sleep_sec = int(response.headers.get('retry-after', retry_after)) + logging.error("%s: rate limited. sleeping for %d seconds", datetime.datetime.utcnow(), sleep_sec) + sleep(sleep_sec) + continue + + # maxlag + if 'code' in json_data['error'] and json_data['error']['code'] == 'maxlag': + sleep_sec = json_data['error'].get('lag', retry_after) + # We multiply the number of second by the number of tries + sleep_sec *= n + 1 + # The number of second can't be less than 5 + sleep_sec = max(sleep_sec, 5) + # The number of second can't be more than retry_after + sleep_sec = min(sleep_sec, retry_after) + logging.error("%s: maxlag. sleeping for %d seconds", datetime.datetime.utcnow(), sleep_sec) + sleep(sleep_sec) + continue + + # readonly + if 'code' in json_data['error'] and json_data['error']['code'] == 'readonly': # pragma: no cover + logging.error("The Wikibase instance is currently in readonly mode, waiting for %s seconds", retry_after) + sleep(retry_after) + continue + + # non-existent error + if 'code' in json_data['error'] and json_data['error']['code'] in ['no-such-entity', 'missingtitle']: + raise NonExistentEntityError(json_data['error']) + + # duplicate error + if 'code' in json_data['error'] and json_data['error']['code'] == 'modification-failed': # pragma: no cover + raise ModificationFailed(json_data['error']) + + # others case + raise MWApiError(json_data) + + # there is no error or waiting. break out of this loop and parse response + break + else: + # the first time I've ever used "for - else!!" + # else executes if the for loop completes normally. i.e. does not encounter a `break` + # in this case, that means it tried this api call 10 times + raise MaxRetriesReachedException(f'The number of retries ({max_retries}) have been reached.') + + return json_data + + +def mediawiki_api_call_helper(data: Dict[str, Any], login: _Login = None, mediawiki_api_url: str = None, user_agent: str = None, allow_anonymous: bool = False, + max_retries: int = 1000, retry_after: int = 60, maxlag: int = 5, is_bot: bool = False, **kwargs: Any) -> Dict: + """ + A simplified function to call the MediaWiki API. + Pass the data, as a dictionary, related to the action you want to call, all commons options will be automatically managed. + + :param data: A dictionary containing the JSON data to send to the API + :param login: A wbi_login._Login instance + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) + :param user_agent: The user agent (Recommended for Wikimedia Foundation instances) + :param allow_anonymous: Allow an unidentified edit to the MediaWiki API (default False) + :param max_retries: The maximum number of retries + :param retry_after: The timeout between each retry + :param maxlag: If applicable, the maximum lag allowed for the replication (An lower number reduce the load on the replicated database) + :param is_bot: Flag the edit as a bot + :param kwargs: Any additional keyword arguments to pass to requests.request + :return: The data returned by the API as a dictionary + """ + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None) + + hostname = urlparse(mediawiki_api_url).hostname + if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + log.warning('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + log.warning('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') + + if not allow_anonymous: + if login is None: + # Force allow_anonymous as False by default to ask for a login object + raise ValueError("allow_anonymous can't be False and login is None at the same time.") + + if mediawiki_api_url != login.mediawiki_api_url: + raise ValueError("mediawiki_api_url can't be different with the one in the login object.") + + headers = { + 'User-Agent': get_user_agent(user_agent) + } + + # Default token is anonymous + if isinstance(data, dict) and 'token' not in data: + data.update({'token': '+\\'}) + + if data is not None: + if not allow_anonymous: + # Get edit token if there is a login instance + if login is not None: + data.update({'token': login.get_edit_token()}) + + # Always assert user if allow_anonymous is False + if 'assert' not in data: + if is_bot: + data.update({'assert': 'bot'}) + else: + data.update({'assert': 'user'}) + + if 'token' in data and data['token'] == '+\\': + raise Exception("Anonymous edit are not allowed by default. " + "Set allow_anonymous to True to edit mediawiki anonymously or set the login parameter with a valid Login object.") + else: + if 'assert' not in data and login is None: + # Assert anon if allow_anonymous is True and no Login instance + data.update({'assert': 'anon'}) + + if maxlag > 0: + data.update({'maxlag': maxlag}) + + if login is not None: + session = login.get_session() + else: + session = None + + log.debug(data) + + return mediawiki_api_call('POST', mediawiki_api_url=mediawiki_api_url, session=session, data=data, headers=headers, max_retries=max_retries, retry_after=retry_after, **kwargs) + + +@wbi_backoff() +def execute_sparql_query(query: str, prefix: str = None, endpoint: str = None, user_agent: str = None, max_retries: int = 1000, retry_after: int = 60) -> Dict[str, Dict]: + """ + Static method which can be used to execute any SPARQL query + + :param prefix: The URI prefixes required for an endpoint, default is the Wikidata specific prefixes + :param query: The actual SPARQL query string + :param endpoint: The URL string for the SPARQL endpoint. Default is the URL for the Wikidata SPARQL endpoint + :param user_agent: Set a user agent string for the HTTP header to let the Query Service know who you are. + :param max_retries: The number time this function should retry in case of header reports. + :param retry_after: the number of seconds should wait upon receiving either an error code or the Query Service is not reachable. + :return: The results of the query are returned in JSON format + """ + + sparql_endpoint_url = str(endpoint or config['SPARQL_ENDPOINT_URL']) + user_agent = user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None) + + hostname = urlparse(sparql_endpoint_url).hostname + if hostname is not None and hostname.endswith(('wikidata.org', 'wikipedia.org', 'wikimedia.org')) and user_agent is None: + log.warning('WARNING: Please set an user agent if you interact with a Wikibase instance from the Wikimedia Foundation.') + log.warning('More information in the README.md and https://meta.wikimedia.org/wiki/User-Agent_policy') + + if prefix: + query = prefix + '\n' + query + + params = { + 'query': '#Tool: WikibaseIntegrator wbi_functions.execute_sparql_query\n' + query, + 'format': 'json' + } + + headers = { + 'Accept': 'application/sparql-results+json', + 'User-Agent': get_user_agent(user_agent), + 'Content-Type': 'multipart/form-data' + } + + log.debug("%s%s%s", BColors.WARNING, params['query'], BColors.ENDC) + + for _ in range(max_retries): + try: + response = helpers_session.post(sparql_endpoint_url, params=params, headers=headers) + except requests.exceptions.ConnectionError as e: + logging.exception("Connection error: %s. Sleeping for %d seconds.", e, retry_after) + sleep(retry_after) + continue + if response.status_code in (500, 502, 503, 504): + logging.error("Service unavailable (HTTP Code %d). Sleeping for %d seconds.", response.status_code, retry_after) + sleep(retry_after) + continue + if response.status_code == 429: + if 'retry-after' in response.headers.keys(): + retry_after = int(response.headers['retry-after']) + logging.error("Too Many Requests (429). Sleeping for %d seconds", retry_after) + sleep(retry_after) + continue + response.raise_for_status() + results = response.json() + + return results + + raise Exception(f"No result after {max_retries} retries.") + + +def merge_items(from_id: str, to_id: str, login: _Login = None, ignore_conflicts: List[str] = None, is_bot: bool = False, **kwargs: Any) -> Dict: + """ + A static method to merge two items + + :param from_id: The ID to merge from. This parameter is required. + :param to_id: The ID to merge to. This parameter is required. + :param login: A wbi_login.Login instance + :param ignore_conflicts: List of elements of the item to ignore conflicts for. Can only contain values of "description", "sitelink" and "statement" + :param is_bot: Mark this edit as bot. + """ + + params = { + 'action': 'wbmergeitems', + 'fromid': from_id, + 'toid': to_id, + 'format': 'json' + } + + if ignore_conflicts is not None: + params.update({'ignoreconflicts': '|'.join(ignore_conflicts)}) + + if is_bot: + params.update({'bot': ''}) + + return mediawiki_api_call_helper(data=params, login=login, is_bot=is_bot, **kwargs) + + +def merge_lexemes(source: str, target: str, login: _Login = None, summary: str = None, is_bot: bool = False, **kwargs: Any) -> Dict: + """ + A static method to merge two lexemes + + :param source: The ID to merge from. This parameter is required. + :param target: The ID to merge to. This parameter is required. + :param login: A wbi_login.Login instance + :param summary: Summary for the edit. + :param is_bot: Mark this edit as bot. + """ + + params = { + 'action': 'wblmergelexemes', + 'fromid': source, + 'toid': target, + 'format': 'json' + } + + if summary: + params.update({'summary': summary}) + + if is_bot: + params.update({'bot': ''}) + + return mediawiki_api_call_helper(data=params, login=login, is_bot=is_bot, **kwargs) + + +def remove_claims(claim_id: str, summary: str = None, baserevid: int = None, is_bot: bool = False, **kwargs: Any) -> Dict: + """ + Delete a claim from an entity + + :param claim_id: One GUID or several (pipe-separated) GUIDs identifying the claims to be removed. All claims must belong to the same entity. + :param summary: Summary for the edit. Will be prepended by an automatically generated comment. + :param baserevid: The numeric identifier for the revision to base the modification on. This is used for detecting conflicts during save. + :param is_bot: Mark this edit as bot. + """ + + params: Dict[str, Union[str, int]] = { + 'action': 'wbremoveclaims', + 'claim': claim_id, + 'format': 'json' + } + + if summary: + params.update({'summary': summary}) + + if baserevid: + params.update({'baserevid': baserevid}) + + if is_bot: + params.update({'bot': ''}) + + return mediawiki_api_call_helper(data=params, is_bot=is_bot, **kwargs) + + +def delete_page(title: str = None, pageid: int = None, reason: str = None, deletetalk: bool = False, watchlist: str = 'preferences', watchlistexpiry: str = None, + login: _Login = None, **kwargs: Any) -> Dict: + """ + Delete a page + + :param title: Title of the page to delete. Cannot be used together with pageid. + :param pageid: Page ID of the page to delete. Cannot be used together with title. + :param reason: Reason for the deletion. If not set, an automatically generated reason will be used. + :param deletetalk: Delete the talk page, if it exists. + :param watchlist: Unconditionally add or remove the page from the current user's watchlist, use preferences (ignored for bot users) or do not change watch. + One of the following values: nochange, preferences, unwatch, watch + :param watchlistexpiry: Watchlist expiry timestamp. Omit this parameter entirely to leave the current expiry unchanged. + :param login: A wbi_login.Login instance + :param kwargs: + :return: + """ + + if not title and not pageid: + raise ValueError("A title or a pageid must be specified.") + + if title and pageid: + raise ValueError("You can't specify a title and a pageid at the same time.") + + if pageid and not isinstance(pageid, int): + raise ValueError("pageid must be an integer.") + + params: Dict[str, Any] = { + 'action': 'delete', + 'watchlist': watchlist, + 'format': 'json' + } + + if title: + params.update({'title': title}) + + if pageid: + params.update({'pageid': pageid}) + + if reason: + params.update({'reason': reason}) + + if deletetalk: + params.update({'deletetalk': ''}) + + if watchlistexpiry: + params.update({'watchlistexpiry': watchlistexpiry}) + + return mediawiki_api_call_helper(data=params, login=login, **kwargs) + + +def search_entities(search_string: str, language: str = None, strict_language: bool = False, search_type: str = 'item', max_results: int = 50, dict_result: bool = False, + allow_anonymous: bool = True, **kwargs: Any) -> List[Dict[str, Any]]: + """ + Performs a search for entities in the Wikibase instance using labels and aliases. + You can have more information on the parameters in the MediaWiki API help (https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities) + + :param search_string: A string which should be searched for in the Wikibase instance (labels and aliases) + :param language: The language in which to perform the search. This only affects how entities are selected. Default is 'en' from wbi_config. + You can see the list of languages for Wikidata at https://www.wikidata.org/wiki/Help:Wikimedia_language_codes/lists/all (Use the WMF code) + :param strict_language: Whether to disable language fallback. Default is 'False'. + :param search_type: Search for this type of entity. One of the following values: form, item, lexeme, property, sense, mediainfo + :param max_results: The maximum number of search results returned. The value must be between 0 and 50. Default is 50 + :param dict_result: Return the results as a detailed dictionary instead of a list of IDs. + :param allow_anonymous: Allow anonymous interaction with the MediaWiki API. 'True' by default. + """ + + language = str(language or config['DEFAULT_LANGUAGE']) + + params = { + 'action': 'wbsearchentities', + 'search': search_string, + 'language': language, + 'type': search_type, + 'limit': 50, + 'format': 'json' + } + + if strict_language: + params.update({'strict_language': ''}) + + cont_count = 0 + results = [] + + while True: + params.update({'continue': cont_count}) + + search_results = mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) + + if search_results['success'] != 1: + raise SearchError('Wikibase API wbsearchentities failed') + + for i in search_results['search']: + if dict_result: + description = i['description'] if 'description' in i else None + aliases = i['aliases'] if 'aliases' in i else None + results.append({ + 'id': i['id'], + 'label': i['label'], + 'match': i['match'], + 'description': description, + 'aliases': aliases + }) + else: + results.append(i['id']) + + if 'search-continue' not in search_results: + break + + cont_count = search_results['search-continue'] + + if cont_count >= max_results: + break + + return results + + +def generate_entity_instances(entities: Union[str, List[str]], allow_anonymous: bool = True, **kwargs: Any) -> List[Tuple[str, BaseEntity]]: + """ + A method which allows for retrieval of a list of Wikidata entities. The method generates a list of tuples where the first value in the tuple is the entity's ID, whereas the + second is the new instance of a subclass of BaseEntity containing all the data of the entity. This is most useful for mass retrieval of entities. + + :param entities: A list of IDs. Item, Property or Lexeme. + :param allow_anonymous: Allow anonymous edit to the MediaWiki API. Disabled by default. + :return: A list of tuples, first value in the tuple is the entity's ID, second value is the instance of a subclass of BaseEntity with the corresponding entity data. + """ + + from wikibaseintegrator.entities.baseentity import BaseEntity + + if isinstance(entities, str): + entities = [entities] + + assert isinstance(entities, list) + + params = { + 'action': 'wbgetentities', + 'ids': '|'.join(entities), + 'format': 'json' + } + + reply = mediawiki_api_call_helper(data=params, allow_anonymous=allow_anonymous, **kwargs) + + entity_instances = [] + for qid, v in reply['entities'].items(): + from wikibaseintegrator import WikibaseIntegrator + wbi = WikibaseIntegrator() + f = [x for x in BaseEntity.__subclasses__() if x.ETYPE == v['type']][0] + ii = f(api=wbi).from_json(v) + entity_instances.append((qid, ii)) + + return entity_instances + + +def format_amount(amount: Union[int, str, float]) -> str: + """ + A formatting function mostly used for Quantity datatype. + :param amount: A int, float or str you want to pass to Quantity value. + :return: A correctly formatted string amount by Wikibase standard. + """ + # Remove .0 by casting to int + if float(amount) % 1 == 0: + amount = int(float(amount)) + + # Adding prefix + for positive number and 0 + if not str(amount).startswith('+') and float(amount) >= 0: + amount = str(f'+{amount}') + + # return as string + return str(amount) + + +def get_user_agent(user_agent: Optional[str]) -> str: + """ + Return a user agent string suitable for interacting with the Wikibase instance. + + :param user_agent: An optional user-agent. If not provided, will generate a default user-agent. + :return: A correctly formatted user agent. + """ + from wikibaseintegrator import __version__ + wbi_user_agent = f"WikibaseIntegrator/{__version__}" + + if user_agent is None: + return_user_agent = wbi_user_agent + else: + return_user_agent = user_agent + ' ' + wbi_user_agent + + return return_user_agent + +# def __deepcopy__(memo): +# # Don't return a copy of the module +# # Deepcopy don't allow copy of modules (https://bugs.python.org/issue43093) +# # It's really the good way to solve this? +# from wikibaseintegrator import wikibaseintegrator +# return wikibaseintegrator.wbi_helpers diff --git a/wikibaseintegrator/wbi_jsonparser.py b/wikibaseintegrator/wbi_jsonparser.py deleted file mode 100644 index c809d98b..00000000 --- a/wikibaseintegrator/wbi_jsonparser.py +++ /dev/null @@ -1,77 +0,0 @@ -import copy - - -class JsonParser: - references = [] - qualifiers = [] - final = False - current_type = None - - def __init__(self, f): - self.f = f - - def __call__(self, *args): - self.json_representation = args[1] - - if self.final: - self.final = False - return self.f(cls=self.current_type, jsn=self.json_representation) - - if 'mainsnak' in self.json_representation: - self.mainsnak = None - self.references = [] - self.qualifiers = [] - json_representation = self.json_representation - - if 'references' in json_representation: - self.references.extend([[] for _ in json_representation['references']]) - for count, ref_block in enumerate(json_representation['references']): - ref_hash = '' - if 'hash' in ref_block: - ref_hash = ref_block['hash'] - for prop in ref_block['snaks-order']: - jsn = ref_block['snaks'][prop] - - for prop_ref in jsn: - ref_class = self.get_class_representation(prop_ref) - ref_class.is_reference = True - ref_class.snak_type = prop_ref['snaktype'] - ref_class.set_hash(ref_hash) - - self.references[count].append(copy.deepcopy(ref_class)) - - # print(self.references) - if 'qualifiers' in json_representation: - for prop in json_representation['qualifiers-order']: - for qual in json_representation['qualifiers'][prop]: - qual_hash = '' - if 'hash' in qual: - qual_hash = qual['hash'] - - qual_class = self.get_class_representation(qual) - qual_class.is_qualifier = True - qual_class.snak_type = qual['snaktype'] - qual_class.set_hash(qual_hash) - self.qualifiers.append(qual_class) - - # print(self.qualifiers) - mainsnak = self.get_class_representation(json_representation['mainsnak']) - mainsnak.set_references(self.references) - mainsnak.set_qualifiers(self.qualifiers) - if 'id' in json_representation: - mainsnak.set_id(json_representation['id']) - if 'rank' in json_representation: - mainsnak.set_rank(json_representation['rank']) - mainsnak.snak_type = json_representation['mainsnak']['snaktype'] - - return mainsnak - - elif 'property' in self.json_representation: - return self.get_class_representation(jsn=self.json_representation) - - def get_class_representation(self, jsn): - from wikibaseintegrator.wbi_datatype import BaseDataType - data_type = [x for x in BaseDataType.__subclasses__() if x.DTYPE == jsn['datatype']][0] - self.final = True - self.current_type = data_type - return data_type.from_json(jsn) diff --git a/wikibaseintegrator/wbi_login.py b/wikibaseintegrator/wbi_login.py index b7c04ccb..c750db6f 100644 --- a/wikibaseintegrator/wbi_login.py +++ b/wikibaseintegrator/wbi_login.py @@ -1,175 +1,58 @@ +""" +Login class for Wikidata. Takes authentication parameters and stores the session cookies and edit tokens. +""" +import logging import time import webbrowser +from typing import Optional -import requests -from mwoauth import ConsumerToken, Handshaker -from oauthlib.oauth2 import BackendApplicationClient -from requests_oauthlib import OAuth1, OAuth2Session, OAuth2 +from mwoauth import ConsumerToken, Handshaker, OAuthException +from oauthlib.oauth2 import BackendApplicationClient, InvalidClientError +from requests import Session +from requests.cookies import RequestsCookieJar +from requests_oauthlib import OAuth1Session, OAuth2Session from wikibaseintegrator.wbi_backoff import wbi_backoff from wikibaseintegrator.wbi_config import config +from wikibaseintegrator.wbi_helpers import get_user_agent -""" -Login class for Wikidata. Takes username and password and stores the session cookies and edit tokens. -""" +log = logging.getLogger(__name__) -class Login: +class _Login: """ A class which handles the login to Wikidata and the generation of edit-tokens """ @wbi_backoff() - def __init__(self, user=None, pwd=None, mediawiki_api_url=None, mediawiki_index_url=None, mediawiki_rest_url=None, token_renew_period=1800, use_clientlogin=False, - consumer_key=None, consumer_secret=None, access_token=None, access_secret=None, client_id=None, client_secret=None, callback_url='oob', user_agent=None, - debug=False): + def __init__(self, session: Session = None, mediawiki_api_url: str = None, token_renew_period: int = 1800, user_agent: str = None): """ This class handles several types of login procedures. Either use user and pwd authentication or OAuth. Wikidata clientlogin can also be used. If using one method, do NOT pass parameters for another method. - :param user: the username which should be used for the login - :type user: str - :param pwd: the password which should be used for the login - :type pwd: str + + :param session: A requests.Session instance + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server - :type token_renew_period: int - :param use_clientlogin: use authmanager based login method instead of standard login. - For 3rd party data consumer, e.g. web clients - :type use_clientlogin: bool - :param consumer_key: The consumer key for OAuth - :type consumer_key: str - :param consumer_secret: The consumer secret for OAuth - :type consumer_secret: str - :param access_token: The access token for OAuth - :type access_token: str - :param access_secret: The access secret for OAuth - :type access_secret: str - :param callback_url: URL which should be used as the callback URL - :type callback_url: str :param user_agent: UA string to use for API requests. - :type user_agent: str - :return: None """ - self.mediawiki_api_url = config['MEDIAWIKI_API_URL'] if mediawiki_api_url is None else mediawiki_api_url - self.mediawiki_index_url = config['MEDIAWIKI_INDEX_URL'] if mediawiki_index_url is None else mediawiki_index_url - self.mediawiki_rest_url = config['MEDIAWIKI_REST_URL'] if mediawiki_rest_url is None else mediawiki_rest_url - - if debug: - print(self.mediawiki_api_url) + self.session: Session = session or Session() + self.mediawiki_api_url: str = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + self.token_renew_period: int = token_renew_period - self.session = requests.Session() - self.edit_token = '' - self.instantiation_time = time.time() - self.token_renew_period = token_renew_period + self.edit_token: Optional[str] = None + self.instantiation_time: float = time.time() - self.consumer_key = consumer_key - self.consumer_secret = consumer_secret - self.access_token = access_token - self.access_secret = access_secret - self.client_id = client_id - self.client_secret = client_secret - self.response_qs = None - self.callback_url = callback_url - - if user_agent: - self.user_agent = user_agent - else: - # if a user is given append " (User:USER)" to the UA string and update that value in CONFIG - if user and user.casefold() not in config['USER_AGENT_DEFAULT'].casefold(): - config['USER_AGENT_DEFAULT'] += f" (User:{user})" - self.user_agent = config['USER_AGENT_DEFAULT'] self.session.headers.update({ - 'User-Agent': self.user_agent + 'User-Agent': get_user_agent(user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None)) }) - if self.consumer_key and self.consumer_secret: - if self.access_token and self.access_secret: - # OAuth procedure, based on https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers#Python - auth = OAuth1(self.consumer_key, client_secret=self.consumer_secret, resource_owner_key=self.access_token, resource_owner_secret=self.access_secret) - self.session.auth = auth - self.generate_edit_credentials() - else: - # Oauth procedure, based on https://www.mediawiki.org/wiki/OAuth/For_Developers - # Consruct a "consumer" from the key/secret provided by MediaWiki - self.consumer_token = ConsumerToken(self.consumer_key, self.consumer_secret) - - # Construct handshaker with wiki URI and consumer - self.handshaker = Handshaker(self.mediawiki_index_url, self.consumer_token, callback=self.callback_url, user_agent=self.user_agent) - - # Step 1: Initialize -- ask MediaWiki for a temp key/secret for user - # redirect -> authorization -> callback url - self.redirect, self.request_token = self.handshaker.initiate(callback=self.callback_url) - elif self.client_id and self.client_secret: - oauth = OAuth2Session(client=BackendApplicationClient(client_id=self.client_id)) - token = oauth.fetch_token(token_url=self.mediawiki_rest_url + '/oauth2/access_token', client_id=self.client_id, client_secret=self.client_secret) - auth = OAuth2(token=token) - self.session.auth = auth - self.generate_edit_credentials() - else: - params_login = { - 'action': 'query', - 'meta': 'tokens', - 'type': 'login', - 'format': 'json' - } - - # get login token - login_token = self.session.post(self.mediawiki_api_url, data=params_login).json()['query']['tokens']['logintoken'] - - if use_clientlogin: - params = { - 'action': 'clientlogin', - 'username': user, - 'password': pwd, - 'logintoken': login_token, - 'loginreturnurl': 'https://example.org/', - 'format': 'json' - } - - login_result = self.session.post(self.mediawiki_api_url, data=params).json() - - if debug: - print(login_result) - - if 'clientlogin' in login_result: - if login_result['clientlogin']['status'] != 'PASS': - clientlogin = login_result['clientlogin'] - raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") - elif debug: - print("Successfully logged in as", login_result['clientlogin']['username']) - else: - error = login_result['error'] - raise LoginError(f"Login failed ({error['code']}). Message: '{error['info']}'") - - else: - params = { - 'action': 'login', - 'lgname': user, - 'lgpassword': pwd, - 'lgtoken': login_token, - 'format': 'json' - } - - login_result = self.session.post(self.mediawiki_api_url, data=params).json() - - if debug: - print(login_result) - - if login_result['login']['result'] != 'Success': - raise LoginError(f"Login failed. Reason: '{login_result['login']['result']}'") - elif debug: - print("Successfully logged in as", login_result['login']['lgusername']) - - if 'warnings' in login_result: - print("MediaWiki login warnings messages:") - for message in login_result['warnings']: - print(f"* {message}: {login_result['warnings'][message]['*']}") - - self.generate_edit_credentials() + self.generate_edit_credentials() - def generate_edit_credentials(self): + def generate_edit_credentials(self) -> RequestsCookieJar: """ - request an edit token and update the cookie_jar in order to add the session cookie + Request an edit token and update the cookie_jar in order to add the session cookie + :return: Returns a json with all relevant cookies, aka cookie jar """ params = { @@ -178,14 +61,19 @@ def generate_edit_credentials(self): 'type': 'csrf', 'format': 'json' } - response = self.session.get(self.mediawiki_api_url, params=params) - self.edit_token = response.json()['query']['tokens']['csrftoken'] + response = self.session.get(url=self.mediawiki_api_url, params=params).json() + if 'error' in response: + raise LoginError(f"Login failed ({response['error']['code']}). Message: '{response['error']['info']}'") + if response['query']['tokens']['csrftoken'] == '+\\': + raise LoginError("Login failed. An anonymous token was returned.") + self.edit_token = response['query']['tokens']['csrftoken'] return self.session.cookies - def get_edit_cookie(self): + def get_edit_cookie(self) -> RequestsCookieJar: """ Can be called in order to retrieve the cookies from an instance of wbi_login.Login + :return: Returns a json with all relevant cookies, aka cookie jar """ if (time.time() - self.instantiation_time) > self.token_renew_period: @@ -194,9 +82,10 @@ def get_edit_cookie(self): return self.session.cookies - def get_edit_token(self): + def get_edit_token(self) -> Optional[str]: """ Can be called in order to retrieve the edit token from an instance of wbi_login.Login + :return: returns the edit token """ if not self.edit_token or (time.time() - self.instantiation_time) > self.token_renew_period: @@ -205,39 +94,214 @@ def get_edit_token(self): return self.edit_token - def get_session(self): + def get_session(self) -> Session: """ - returns the requests session object used for the login. + Returns the requests.Session object used for the login. + :return: Object of type requests.Session() """ return self.session - def continue_oauth(self, oauth_callback_data=None): + +class OAuth2(_Login): + @wbi_backoff() + def __init__(self, consumer_token: str = None, consumer_secret: str = None, mediawiki_api_url: str = None, mediawiki_rest_url: str = None, token_renew_period: int = 1800, + user_agent: str = None): + """ + This class is used to interact with the OAuth2 API. + + :param consumer_token: The consumer token + :param consumer_secret: The consumer secret + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) + :param mediawiki_rest_url: The URL to the MediaWiki REST API (default Wikidata) + :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server + :param user_agent: UA string to use for API requests. + """ + + mediawiki_rest_url = str(mediawiki_rest_url or config['MEDIAWIKI_REST_URL']) + + session = OAuth2Session(client=BackendApplicationClient(client_id=consumer_token)) + try: + session.fetch_token(token_url=mediawiki_rest_url + '/oauth2/access_token', client_id=consumer_token, client_secret=consumer_secret) + except InvalidClientError as err: + raise LoginError(err) from err + super().__init__(session=session, token_renew_period=token_renew_period, user_agent=user_agent, mediawiki_api_url=mediawiki_api_url) + + +class OAuth1(_Login): + + @wbi_backoff() + def __init__(self, consumer_token: str = None, consumer_secret: str = None, access_token: str = None, access_secret: str = None, callback_url: str = 'oob', + mediawiki_api_url: str = None, mediawiki_index_url: str = None, token_renew_period: int = 1800, user_agent: str = None): + """ + This class is used to interact with the OAuth1 API. + + :param consumer_token: The consumer token + :param consumer_secret: The consumer secret + :param access_token: The access token (optional ) + :param access_secret: The access secret (optional) + :param callback_url: The callback URL used to finalize the handshake + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) + :param mediawiki_index_url: The URL to the MediaWiki index (default Wikidata) + :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server + :param user_agent: UA string to use for API requests. + """ + + mediawiki_index_url = str(mediawiki_index_url or config['MEDIAWIKI_INDEX_URL']) + + if access_token and access_secret: + # OAuth procedure, based on https://www.mediawiki.org/wiki/OAuth/Owner-only_consumers#Python + session = OAuth1Session(client_key=consumer_token, client_secret=consumer_secret, resource_owner_key=access_token, resource_owner_secret=access_secret) + super().__init__(session=session, token_renew_period=token_renew_period, user_agent=user_agent, mediawiki_api_url=mediawiki_api_url) + else: + # Oauth procedure, based on https://www.mediawiki.org/wiki/OAuth/For_Developers + # Consruct a "consumer" from the key/secret provided by MediaWiki + self.oauth1_consumer_token = ConsumerToken(consumer_token, consumer_secret) + + # Construct handshaker with wiki URI and consumer + self.handshaker = Handshaker(mw_uri=mediawiki_index_url, consumer_token=self.oauth1_consumer_token, callback=callback_url, + user_agent=get_user_agent(user_agent or (str(config['USER_AGENT']) if config['USER_AGENT'] is not None else None))) + + # Step 1: Initialize -- ask MediaWiki for a temp key/secret for user + # redirect -> authorization -> callback url + try: + self.redirect, self.request_token = self.handshaker.initiate(callback=callback_url) + except OAuthException as err: + raise LoginError(err) from err + + def continue_oauth(self, oauth_callback_data: str = None) -> None: """ Continuation of OAuth procedure. Method must be explicitly called in order to complete OAuth. This allows external entities, e.g. websites, to provide tokens through callback URLs directly. + :param oauth_callback_data: The callback URL received to a Web app - :type oauth_callback_data: bytes :return: """ - self.response_qs = oauth_callback_data - if not self.response_qs: + if not oauth_callback_data: webbrowser.open(self.redirect) - self.response_qs = input("Callback URL: ") + oauth_callback_data = input("Callback URL: ") # input the url from redirect after authorization - response_qs = self.response_qs.split(b'?')[-1] + response_qs = oauth_callback_data.split('?')[-1] # Step 3: Complete -- obtain authorized key/secret for "resource owner" access_token = self.handshaker.complete(self.request_token, response_qs) + if self.oauth1_consumer_token is None: + raise ValueError("oauth1_consumer_token can't be None") + # input the access token to return a csrf (edit) token - auth = OAuth1(self.consumer_token.key, client_secret=self.consumer_token.secret, resource_owner_key=access_token.key, resource_owner_secret=access_token.secret) - self.session.auth = auth + self.session = OAuth1Session(client_key=self.oauth1_consumer_token.key, client_secret=self.oauth1_consumer_token.secret, resource_owner_key=access_token.key, + resource_owner_secret=access_token.secret) self.generate_edit_credentials() +class Login(_Login): + @wbi_backoff() + def __init__(self, user: str = None, password: str = None, mediawiki_api_url: str = None, token_renew_period: int = 1800, user_agent: str = None): + """ + This class is used to log in with a bot password + + :param user: The user of the bot password (format @) + :param password: The password generated byt the MediaWiki + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) + :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server + :param user_agent: UA string to use for API requests. + """ + + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + session = Session() + + params_login = { + 'action': 'query', + 'meta': 'tokens', + 'type': 'login', + 'format': 'json' + } + + # get login token + login_token = session.post(mediawiki_api_url, data=params_login).json()['query']['tokens']['logintoken'] + + params = { + 'action': 'login', + 'lgname': user, + 'lgpassword': password, + 'lgtoken': login_token, + 'format': 'json' + } + + login_result = session.post(mediawiki_api_url, data=params).json() + + if 'login' in login_result and login_result['login']['result'] == 'Success': + log.info("Successfully logged in as %s", login_result['login']['lgusername']) + else: + raise LoginError(f"Login failed. Reason: '{login_result['login']['reason']}'") + + if 'warnings' in login_result: + logging.warning("MediaWiki login warnings messages:") + for message in login_result['warnings']: + logging.warning(f"* {message}: {login_result['warnings'][message]['*']}") + + super().__init__(session=session, token_renew_period=token_renew_period, user_agent=user_agent, mediawiki_api_url=mediawiki_api_url) + + +class Clientlogin(_Login): + @wbi_backoff() + def __init__(self, user: str = None, password: str = None, mediawiki_api_url: str = None, token_renew_period: int = 1800, user_agent: str = None): + """ + This class is used to log in with a user account + + :param user: The username + :param password: The password + :param mediawiki_api_url: The URL to the MediaWiki API (default Wikidata) + :param token_renew_period: Seconds after which a new token should be requested from the Wikidata server + :param user_agent: UA string to use for API requests. + """ + + mediawiki_api_url = str(mediawiki_api_url or config['MEDIAWIKI_API_URL']) + session = Session() + + params_login = { + 'action': 'query', + 'meta': 'tokens', + 'type': 'login', + 'format': 'json' + } + + # get login token + login_token = session.post(mediawiki_api_url, data=params_login).json()['query']['tokens']['logintoken'] + + params = { + 'action': 'clientlogin', + 'username': user, + 'password': password, + 'logintoken': login_token, + 'loginreturnurl': 'https://example.org/', + 'format': 'json' + } + + login_result = session.post(mediawiki_api_url, data=params).json() + + log.debug(login_result) + + if 'clientlogin' in login_result: + clientlogin = login_result['clientlogin'] + if clientlogin['status'] != 'PASS': + log.debug(clientlogin) + raise LoginError(f"Login failed ({clientlogin['messagecode']}). Message: '{clientlogin['message']}'") + + log.info("Successfully logged in as %s", clientlogin['username']) + else: + raise LoginError(f"Login failed ({login_result['error']['code']}). Message: '{login_result['error']['info']}'") + + if 'warnings' in login_result: + logging.warning("MediaWiki login warnings messages:") + for message in login_result['warnings']: + logging.warning(f"* {message}: {login_result['warnings'][message]['*']}") + + super().__init__(session=session, token_renew_period=token_renew_period, user_agent=user_agent, mediawiki_api_url=mediawiki_api_url) + + class LoginError(Exception): """Raised when there is an issue with the login""" - pass diff --git a/wikibaseintegrator/wikibaseintegrator.py b/wikibaseintegrator/wikibaseintegrator.py new file mode 100644 index 00000000..62662761 --- /dev/null +++ b/wikibaseintegrator/wikibaseintegrator.py @@ -0,0 +1,34 @@ +""" +Main class of the Library. +""" +from __future__ import annotations + +from typing import TYPE_CHECKING + +from wikibaseintegrator.entities.item import ItemEntity +from wikibaseintegrator.entities.lexeme import LexemeEntity +from wikibaseintegrator.entities.mediainfo import MediaInfoEntity +from wikibaseintegrator.entities.property import PropertyEntity + +if TYPE_CHECKING: + from wikibaseintegrator.wbi_login import _Login + + +class WikibaseIntegrator: + + def __init__(self, is_bot: bool = False, login: _Login = None): + """ + This function initializes a WikibaseIntegrator instance to quickly access different entity type instances. + + :param is_bot: declare if the bot flag must be set when you interact with the MediaWiki API. + :param login: a wbi_login instance needed when you try to access a restricted MediaWiki instance. + """ + # Runtime variables + self.is_bot = is_bot or False + self.login = login + + # Quick access to entities + self.item = ItemEntity(api=self) + self.property = PropertyEntity(api=self) + self.lexeme = LexemeEntity(api=self) + self.mediainfo = MediaInfoEntity(api=self)