Skip to content

Commit 20ceebd

Browse files
committed
Refactor MediaInfo handling to unify claims and statements; update tests and documentation
1 parent 7df6f9b commit 20ceebd

File tree

4 files changed

+46
-19
lines changed

4 files changed

+46
-19
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ type instances for references and qualifiers.
569569

570570
WikibaseIntegrator supports SDC (Structured Data on Commons) to update a media file hosted on Wikimedia Commons.
571571

572+
> [!IMPORTANT]
573+
> To ease the compatibility between Wikidata and Wikimedia Commons, the MediaInfo entity use the "claims" field instead of the "statements" from the API.
574+
> When you read a MediaInfo entity, the "claims" field will be filled with the data from the "statements" field.
575+
> When you write a MediaInfo entity, the "claims" field will be replaced by the "statements" field.
576+
572577
### Retrieve data ###
573578

574579
```python

test/test_entity_mediainfo.py

Lines changed: 28 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,31 @@
11
import unittest
22

3+
import pytest
4+
35
from wikibaseintegrator import WikibaseIntegrator
46
from wikibaseintegrator.wbi_config import config as wbi_config
57

6-
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)'
7-
88
wbi = WikibaseIntegrator()
99

1010

1111
class TestEntityMediaInfo(unittest.TestCase):
1212

13+
@pytest.fixture(autouse=True)
14+
def setup(self):
15+
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)'
16+
wbi_config['WIKIBASE_URL'] = 'https://commons.wikimedia.org'
17+
wbi_config['MEDIAWIKI_API_URL'] = 'https://commons.wikimedia.org/w/api.php'
18+
yield
19+
wbi_config['WIKIBASE_URL'] = 'http://www.wikidata.org'
20+
wbi_config['MEDIAWIKI_API_URL'] = 'https://www.wikidata.org/w/api.php'
21+
1322
def test_get(self):
1423
# Test with complete id
15-
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
24+
assert wbi.mediainfo.get('M75908279').id == 'M75908279'
1625
# Test with numeric id as string
17-
assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
26+
assert wbi.mediainfo.get('75908279').id == 'M75908279'
1827
# Test with numeric id as int
19-
assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
28+
assert wbi.mediainfo.get(75908279).id == 'M75908279'
2029

2130
# Test with invalid id
2231
with self.assertRaises(ValueError):
@@ -31,9 +40,19 @@ def test_get(self):
3140
wbi.mediainfo.get(-1)
3241

3342
def test_get_json(self):
34-
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json()
43+
assert wbi.mediainfo.get('M75908279').get_json()
3544

3645
def test_entity_url(self):
37-
assert wbi.mediainfo.new(id='M582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
38-
assert wbi.mediainfo.new(id='582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
39-
assert wbi.mediainfo.new(id=582).get_entity_url() == 'http://www.wikidata.org/entity/M582'
46+
assert wbi.mediainfo.new(id='M75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
47+
assert wbi.mediainfo.new(id='75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
48+
assert wbi.mediainfo.new(id=75908279).get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
49+
50+
# Test if we can read the claims/statements of the entity
51+
def test_entity_claims(self):
52+
media = wbi.mediainfo.get('M75908279')
53+
assert media.claims
54+
55+
# Test if we can have the statements field in the json
56+
def test_get_statements(self):
57+
media = wbi.mediainfo.get('M75908279')
58+
assert media.get_json()['statements']

wikibaseintegrator/datatypes/string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def set_value(self, value: Optional[str] = None):
2424
assert isinstance(value, str) or value is None, f"Expected str, found {type(value)} ({value})"
2525

2626
if value and ('\n' in value or '\r' in value):
27-
raise ValueError("String value must not contain new ine character")
27+
raise ValueError("String value must not contain newline character")
2828

2929
if value:
3030
self.mainsnak.datavalue = {

wikibaseintegrator/entities/mediainfo.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,23 @@ def get_by_title(self, titles: list[str] | str, sites: str = 'commonswiki', **kw
121121
return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]])
122122

123123
def get_json(self) -> dict[str, str | dict]:
124-
return {
124+
json_data = {
125125
'labels': self.labels.get_json(),
126126
'descriptions': self.descriptions.get_json(),
127127
**super().get_json()
128128
}
129129

130-
# if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
131-
# json_data['statements'] = json_data.pop('claims')
130+
if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
131+
json_data['statements'] = json_data.pop('claims')
132132

133-
# if 'statements' in json_data:
134-
# for prop_nr in json_data['statements']:
135-
# for statement in json_data['statements'][prop_nr]:
136-
# if 'mainsnak' in statement and 'datatype' in statement['mainsnak']:
137-
# del statement['mainsnak']['datatype']
133+
if isinstance(json_data, dict) and 'statements' in json_data and isinstance(json_data['statements'], dict):
134+
for prop_nr, statements in json_data['statements'].items():
135+
for statement in statements:
136+
if isinstance(statement, dict) and 'mainsnak' in statement:
137+
if isinstance(statement['mainsnak'], dict) and 'datatype' in statement['mainsnak']:
138+
del statement['mainsnak']['datatype']
139+
140+
return json_data
138141

139142
def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
140143
super().from_json(json_data=json_data)
@@ -143,7 +146,7 @@ def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
143146
self.labels = Labels().from_json(json_data['labels'])
144147
if 'descriptions' in json_data:
145148
self.descriptions = Descriptions().from_json(json_data['descriptions'])
146-
if 'aliases' in json_data:
149+
if 'statements' in json_data:
147150
self.claims = Claims().from_json(json_data['statements'])
148151

149152
return self

0 commit comments

Comments
 (0)