Skip to content

Commit 23a4de9

Browse files
committed
Refactor MediaInfo handling to unify claims and statements; update tests and documentation
1 parent 7df6f9b commit 23a4de9

File tree

3 files changed

+36
-16
lines changed

3 files changed

+36
-16
lines changed

README.md

+5
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,11 @@ type instances for references and qualifiers.
569569

570570
WikibaseIntegrator supports SDC (Structured Data on Commons) to update a media file hosted on Wikimedia Commons.
571571

572+
> [!IMPORTANT]
573+
> To ease the compatibility between Wikidata and Wikimedia Commons, the MediaInfo entity use the "claims" field instead of the "statements" from the API.
574+
> When you read a MediaInfo entity, the "claims" field will be filled with the data from the "statements" field.
575+
> When you write a MediaInfo entity, the "claims" field will be replaced by the "statements" field.
576+
572577
### Retrieve data ###
573578

574579
```python

test/test_entity_mediainfo.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from wikibaseintegrator.wbi_config import config as wbi_config
55

66
wbi_config['USER_AGENT'] = 'WikibaseIntegrator-pytest/1.0 (test_entity_mediainfo.py)'
7+
wbi_config['WIKIBASE_URL'] = 'https://commons.wikimedia.org'
8+
wbi_config['MEDIAWIKI_API_URL'] = 'https://commons.wikimedia.org/w/api.php'
79

810
wbi = WikibaseIntegrator()
911

@@ -12,11 +14,11 @@ class TestEntityMediaInfo(unittest.TestCase):
1214

1315
def test_get(self):
1416
# Test with complete id
15-
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
17+
assert wbi.mediainfo.get('M75908279').id == 'M75908279'
1618
# Test with numeric id as string
17-
assert wbi.mediainfo.get('75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
19+
assert wbi.mediainfo.get('75908279').id == 'M75908279'
1820
# Test with numeric id as int
19-
assert wbi.mediainfo.get(75908279, mediawiki_api_url='https://commons.wikimedia.org/w/api.php').id == 'M75908279'
21+
assert wbi.mediainfo.get(75908279).id == 'M75908279'
2022

2123
# Test with invalid id
2224
with self.assertRaises(ValueError):
@@ -31,9 +33,19 @@ def test_get(self):
3133
wbi.mediainfo.get(-1)
3234

3335
def test_get_json(self):
34-
assert wbi.mediainfo.get('M75908279', mediawiki_api_url='https://commons.wikimedia.org/w/api.php').get_json()
36+
assert wbi.mediainfo.get('M75908279').get_json()
3537

3638
def test_entity_url(self):
37-
assert wbi.mediainfo.new(id='M582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
38-
assert wbi.mediainfo.new(id='582').get_entity_url() == 'http://www.wikidata.org/entity/M582'
39-
assert wbi.mediainfo.new(id=582).get_entity_url() == 'http://www.wikidata.org/entity/M582'
39+
assert wbi.mediainfo.new(id='M75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
40+
assert wbi.mediainfo.new(id='75908279').get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
41+
assert wbi.mediainfo.new(id=75908279).get_entity_url() == 'https://commons.wikimedia.org/entity/M75908279'
42+
43+
# Test if we can read the claims/statements of the entity
44+
def test_entity_claims(self):
45+
media = wbi.mediainfo.get('M75908279')
46+
assert media.claims
47+
48+
# Test if we can have the statements field in the json
49+
def test_get_statements(self):
50+
media = wbi.mediainfo.get('M75908279')
51+
assert media.get_json()['statements']

wikibaseintegrator/entities/mediainfo.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,23 @@ def get_by_title(self, titles: list[str] | str, sites: str = 'commonswiki', **kw
121121
return MediaInfoEntity(api=self.api).from_json(json_data=json_data['entities'][list(json_data['entities'].keys())[0]])
122122

123123
def get_json(self) -> dict[str, str | dict]:
124-
return {
124+
json_data = {
125125
'labels': self.labels.get_json(),
126126
'descriptions': self.descriptions.get_json(),
127127
**super().get_json()
128128
}
129129

130-
# if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
131-
# json_data['statements'] = json_data.pop('claims')
130+
if 'claims' in json_data: # MediaInfo change name of 'claims' to 'statements'
131+
json_data['statements'] = json_data.pop('claims')
132132

133-
# if 'statements' in json_data:
134-
# for prop_nr in json_data['statements']:
135-
# for statement in json_data['statements'][prop_nr]:
136-
# if 'mainsnak' in statement and 'datatype' in statement['mainsnak']:
137-
# del statement['mainsnak']['datatype']
133+
if isinstance(json_data, dict) and 'statements' in json_data and isinstance(json_data['statements'], dict):
134+
for prop_nr, statements in json_data['statements'].items():
135+
for statement in statements:
136+
if isinstance(statement, dict) and 'mainsnak' in statement:
137+
if isinstance(statement['mainsnak'], dict) and 'datatype' in statement['mainsnak']:
138+
del statement['mainsnak']['datatype']
139+
140+
return json_data
138141

139142
def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
140143
super().from_json(json_data=json_data)
@@ -143,7 +146,7 @@ def from_json(self, json_data: dict[str, Any]) -> MediaInfoEntity:
143146
self.labels = Labels().from_json(json_data['labels'])
144147
if 'descriptions' in json_data:
145148
self.descriptions = Descriptions().from_json(json_data['descriptions'])
146-
if 'aliases' in json_data:
149+
if 'statements' in json_data:
147150
self.claims = Claims().from_json(json_data['statements'])
148151

149152
return self

0 commit comments

Comments
 (0)