Skip to content

pySMWSync API Documentation

mapping

Created on 2023-03-03

@author: wf

Mapping

a mapping for properties

Source code in smwsync/mapping.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
class Mapping:
    """
    a mapping for properties
    """

    def __init__(self):
        """
        constructor
        """
        self.map_by_topic = {}

    def fromYaml(self, yaml_path: str):
        """
        initialize me from the given yaml_path

        Args:
            yaml_path(str): the path to the yaml file
        """
        # Read YAML file
        with open(yaml_path, "r") as yaml_file:
            self.map_list = yaml.safe_load(yaml_file)
        for map_record in self.map_list:
            topic_map = TopicMapping(map_record["topic"])
            for propm_record in map_record["prop_list"]:
                topic_map.add_mapping4record(propm_record)
            self.map_by_topic[topic_map.topic_name] = topic_map
        pass

    def toYaml(self, yaml_path: str):
        """
        store me to the given yaml_path

        Args:
            yaml_path(str): the path to the yaml file
        """
        map_list = []
        for tm in self.map_by_topic.values():
            map_list.append(tm.asdict())
        with open(yaml_path, "w") as yaml_file:
            yaml.dump(map_list, yaml_file, sort_keys=False)

__init__()

constructor

Source code in smwsync/mapping.py
109
110
111
112
113
def __init__(self):
    """
    constructor
    """
    self.map_by_topic = {}

fromYaml(yaml_path)

initialize me from the given yaml_path

Parameters:

Name Type Description Default
yaml_path(str)

the path to the yaml file

required
Source code in smwsync/mapping.py
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def fromYaml(self, yaml_path: str):
    """
    initialize me from the given yaml_path

    Args:
        yaml_path(str): the path to the yaml file
    """
    # Read YAML file
    with open(yaml_path, "r") as yaml_file:
        self.map_list = yaml.safe_load(yaml_file)
    for map_record in self.map_list:
        topic_map = TopicMapping(map_record["topic"])
        for propm_record in map_record["prop_list"]:
            topic_map.add_mapping4record(propm_record)
        self.map_by_topic[topic_map.topic_name] = topic_map
    pass

toYaml(yaml_path)

store me to the given yaml_path

Parameters:

Name Type Description Default
yaml_path(str)

the path to the yaml file

required
Source code in smwsync/mapping.py
132
133
134
135
136
137
138
139
140
141
142
143
def toYaml(self, yaml_path: str):
    """
    store me to the given yaml_path

    Args:
        yaml_path(str): the path to the yaml file
    """
    map_list = []
    for tm in self.map_by_topic.values():
        map_list.append(tm.asdict())
    with open(yaml_path, "w") as yaml_file:
        yaml.dump(map_list, yaml_file, sort_keys=False)

TopicMapping

a property mapping for a given topic

Source code in smwsync/mapping.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
class TopicMapping:
    """
    a property mapping for a given topic
    """

    def __init__(self, topic_name: str):
        """
        initialize this topic mapping
        """
        self.topic_name = topic_name
        self.prop_by_arg = {}
        self.prop_by_smw_prop = {}
        self.prop_by_pid = {}

    def __repr__(self):
        """
        return my representation
        """
        tm_dict = self.asdict()
        tm_text = str(tm_dict)
        return tm_text

    def asdict(self):
        prop_list = []
        for pm in self.prop_by_smw_prop.values():
            pm_record = dataclasses.asdict(pm)
            prop_list.append(pm_record)
        tm_dict = {"topic": self.topic_name, "prop_list": prop_list}
        return tm_dict

    def add_mapping4record(self, propm_record: dict) -> PropMapping:
        """
        add a property map record to the mapping

        Args:
            propm_record(dict): the record to instantiate the PropMapping from

        Returns:
            PropMapping: the property Mapping created and added
        """
        propm = None
        try:
            propm = dacite.from_dict(data_class=PropMapping, data=propm_record)
            self.add_mapping(propm)
        except Exception as ex:
            print(
                f"Warning property mapping {propm_record} could not be added: {str(ex)}"
            )
            pass
        return propm

    def add_mapping(self, propm: PropMapping):
        """
        add a property Mapping
        Args:
            propm:PropMapping
        """
        if propm.arg:
            self.prop_by_arg[propm.arg] = propm
        self.prop_by_smw_prop[propm.smw_prop] = propm
        if propm.pid:
            self.prop_by_pid[propm.pid] = propm

    def getPkSMWPropMap(self, pk: str) -> PropMapping:
        pm = None
        if pk == "qid":
            if not pk in self.prop_by_pid:
                raise Exception(
                    f"primary key arg {pk} of topic {self.topic_name}  has no mapping"
                )
            pm = self.prop_by_pid[pk]
        return pm

    def getPmForArg(self, arg: str) -> PropMapping:
        if not arg in self.prop_by_arg:
            raise Exception(
                f"property arg {arg} of topic {self.topic_name}  has no mapping"
            )
        pm = self.prop_by_arg[arg]
        return pm

__init__(topic_name)

initialize this topic mapping

Source code in smwsync/mapping.py
27
28
29
30
31
32
33
34
def __init__(self, topic_name: str):
    """
    initialize this topic mapping
    """
    self.topic_name = topic_name
    self.prop_by_arg = {}
    self.prop_by_smw_prop = {}
    self.prop_by_pid = {}

__repr__()

return my representation

Source code in smwsync/mapping.py
36
37
38
39
40
41
42
def __repr__(self):
    """
    return my representation
    """
    tm_dict = self.asdict()
    tm_text = str(tm_dict)
    return tm_text

add_mapping(propm)

add a property Mapping Args: propm:PropMapping

Source code in smwsync/mapping.py
73
74
75
76
77
78
79
80
81
82
83
def add_mapping(self, propm: PropMapping):
    """
    add a property Mapping
    Args:
        propm:PropMapping
    """
    if propm.arg:
        self.prop_by_arg[propm.arg] = propm
    self.prop_by_smw_prop[propm.smw_prop] = propm
    if propm.pid:
        self.prop_by_pid[propm.pid] = propm

add_mapping4record(propm_record)

add a property map record to the mapping

Parameters:

Name Type Description Default
propm_record(dict)

the record to instantiate the PropMapping from

required

Returns:

Name Type Description
PropMapping PropMapping

the property Mapping created and added

Source code in smwsync/mapping.py
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def add_mapping4record(self, propm_record: dict) -> PropMapping:
    """
    add a property map record to the mapping

    Args:
        propm_record(dict): the record to instantiate the PropMapping from

    Returns:
        PropMapping: the property Mapping created and added
    """
    propm = None
    try:
        propm = dacite.from_dict(data_class=PropMapping, data=propm_record)
        self.add_mapping(propm)
    except Exception as ex:
        print(
            f"Warning property mapping {propm_record} could not be added: {str(ex)}"
        )
        pass
    return propm

synccmd

Created on 2023-03-03

@author: wf

SyncCmd

Command line for synching

Source code in smwsync/synccmd.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
class SyncCmd:
    """
    Command line for synching
    """

    def __init__(
        self,
        wikiId: str = "ceur-ws",
        context_name: str = "CrSchema",
        endpoint_name: str = "wikidata",
        verbose: bool = False,
        progress: bool = False,
        dry: bool = True,
        debug: bool = False,
    ):
        """
        Constructor

        Args:
            wikiId(str): my wiki Id
            topic(str): the topic to sync
            context_name(str): the name of the context
            dry(bool): if True do not execute commands put display them
            debug(bool): if True switch debugging on
        """
        colorama_init()
        self.lang = "en"
        self.wikiId = wikiId
        self.debug = debug
        self.progress = progress
        self.verbose = verbose
        self.dry = dry
        self.smwAccess = SMWAccess(wikiId)
        self.context_name = context_name
        self.mw_contexts = self.smwAccess.getMwContexts()
        if not context_name in self.mw_contexts:
            raise Exception(
                f"context {context_name} not available in SMW wiki {wikiId}"
            )
        self.mw_context = self.mw_contexts[context_name]
        self.context, self.error, self.errMsg = Context.fromWikiContext(
            self.mw_context, debug=self.debug
        )
        self.endpoints = EndpointManager.getEndpoints(lang="sparql")
        self.endpointConf = self.endpoints.get(endpoint_name)
        self.sparql = SPARQL(self.endpointConf.endpoint)

    @classmethod
    def fromArgs(self, args) -> "SyncCmd":
        """
        create a sync command for the given command line arguments

        Args:
            args(Object): command line arguments
        """
        syncCmd = SyncCmd(
            wikiId=args.target,
            context_name=args.context,
            endpoint_name=args.endpoint,
            verbose=args.verbose,
            progress=args.progress,
            dry=args.dry,
            debug=args.debug,
        )
        return syncCmd

    @classmethod
    def getArgParser(cls) -> ArgumentParser:
        """
        Setup command line argument parser

        Returns:
            ArgumentParser: the argument parser
        """
        parser = ArgumentParser(
            description=Version.full_description,
            formatter_class=RawDescriptionHelpFormatter,
        )
        parser.add_argument(
            "-a",
            "--about",
            help="show about info [default: %(default)s]",
            action="store_true",
        )
        parser.add_argument(
            "--context",
            default="CrSchema",
            help="context to generate from [default: %(default)s]",
        )
        parser.add_argument(
            "-cpm", "--createPropertyMap", help="create the yaml property map"
        )
        parser.add_argument(
            "-d",
            "--debug",
            dest="debug",
            action="store_true",
            help="show debug info [default: %(default)s]",
        )
        parser.add_argument(
            "--dry",
            action="store_true",
            help="dry run only - do not execute wikiedit commands but just display them",
        )
        parser.add_argument(
            "-e",
            "--endpoint",
            default="wikidata",
            help="the SPARQL endpoint to be used [default: %(default)s]",
        )
        parser.add_argument("--progress", action="store_true", help="show progress bar")
        parser.add_argument("-p", "--props", help="properties to sync", nargs="+")
        parser.add_argument(
            "--proplist", action="store_true", help="show the properties"
        )
        parser.add_argument("-pm", "--propertyMap", help="the yaml property map")
        parser.add_argument(
            "-pk",
            "--primaryKey",
            help="primary Key [default: %(default)s]",
            default="qid",
        )
        parser.add_argument(
            "-pkv", "--primaryKeyValues", help="primary Key Values", nargs="+"
        )
        parser.add_argument(
            "-t",
            "--target",
            default="ceur-ws",
            help="wikiId of the target wiki [default: %(default)s]",
        )
        parser.add_argument(
            "-u", "--update", action="store_true", help="update the local cache"
        )
        parser.add_argument(
            "--topic",
            help="the topic to work with [default: %(default)s]",
            default="Scholar",
        )
        parser.add_argument(
            "--verbose", action="store_true", help="show verbose edit details"
        )
        parser.add_argument(
            "-V", "--version", action="version", version=Version.version_msg
        )
        return parser

    def getTopic(self, topic_name: str):
        """
        get the topic for the given topic name

        Args:
            topic_name(str): the name of the topic to get the properties for
        """
        if not topic_name in self.context.topics:
            raise Exception(
                f"topic {topic_name} is not in context {self.context.name} in wiki {self.wikiId}"
            )
        topic = self.context.topics[topic_name]
        return topic

    def getCacheRoot(self, cache_root: str = None) -> str:
        """
        get the cache_root for the the given cache_root

        Args:
            cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
        Returns:
            str: the cache root
        """
        if cache_root is None:
            home = str(Path.home())
            cache_root = f"{home}/.smwsync"
        return cache_root

    def getCachePath(self, cache_root: str = None) -> str:
        """
        get the cache_path for the the given cache_root

        Args:
            cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
        Returns:
            str: the cache path for my wikiId and context's name
        """
        cache_root = self.getCacheRoot(cache_root)
        cache_path = f"{cache_root}/{self.wikiId}/{self.context.name}"
        os.makedirs(cache_path, exist_ok=True)
        return cache_path

    def getMapping(self, cache_root: str = None) -> Mapping:
        """
        get the mapping for the given cache_root

        Args:
            cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
        """
        mapping = Mapping()
        cache_root = self.getCacheRoot(cache_root)
        yaml_path = f"{cache_root}/{self.context.name}_wikidata_map.yaml"
        mapping.fromYaml(yaml_path)
        return mapping

    def createMapping(self) -> Mapping:
        """
        create a mapping for my context
        """
        mapping = Mapping()
        for topic_name, topic in self.context.topics.items():
            topic_map = TopicMapping(topic_name)
            for prop_name, _prop in topic.properties.items():
                pm = PropMapping(smw_prop=prop_name, arg=prop_name, pid="P?")
                topic_map.add_mapping(pm)
            mapping.map_by_topic[topic_map.topic_name] = topic_map
        return mapping

    def color_msg(self, color, msg: str):
        """
        print a colored message

        Args:
            color(Fore): the color to use
            msg(str): the message to print
        """
        print(f"{color}{msg}{Style.RESET_ALL}")

    def updateItemCache(self, topic_name: str, cache_path: str = None) -> str:
        """
        update the item cache

        for the given topic name and cache_path

        Args:
            topic_name(str): the name of the topic
            cache_path(str): the path to the cache - if None .smwsync in the home directory is used

        Returns:
            str: the path to the json file where the data is cached

        """
        topic = self.getTopic(topic_name)
        ask_query = topic.askQuery(listLimit=5000)
        items = self.smwAccess.smw.query(ask_query)
        cache_path = self.getCachePath(cache_path)
        json_path = f"{cache_path}/{topic_name}.json"
        with open(json_path, "w", encoding="utf-8") as json_file:
            json.dump(items, json_file, ensure_ascii=False, default=str, indent=2)
        return json_path, items

    def readItemsFromCache(self, topic_name, cache_path: str = None):
        """
        read the items back from cache
        """
        cache_path = self.getCachePath(cache_path)
        json_path = f"{cache_path}/{topic_name}.json"
        with open(json_path, "r") as json_file:
            items = json.load(json_file)
        return items

    def showProperties(self, topic: Topic):
        """
        show the properties for the given Topic

        Args:
            topic(Topic): the topic to show the properties for
        """
        if not topic.name in self.mapping.map_by_topic:
            raise Exception(
                f"missing wikidata mapping for {topic.name} - you might want to add it to the yaml file for {self.context.name}"
            )
        tm = self.mapping.map_by_topic[topic.name]
        for prop_name, prop in topic.properties.items():
            if prop_name in tm.prop_by_smw_prop:
                pm = tm.prop_by_smw_prop[prop_name]
                info = f"{pm.arg}: {pm.pid_label} ({pm.pid}) → {prop.name}"
                print(f"{info}")
            # else:
            # info=f"{prop_name}:{prop} ❌ - missing wikidata map entry"
            pass

    def getValue(self, pk: str, pkValue: str, pid: str):
        """
        get the value for the given primary key and the given property id
        """
        value = None
        if pk == "qid":
            if pid == "description" or pid == "label":
                value = None
                try:
                    label, description = WikidataItem.getLabelAndDescription(
                        self.sparql, itemId=pkValue, lang=self.lang
                    )
                    if pid == "description":
                        value = description
                    else:
                        value = label
                    pass
                except Exception as ex:
                    # make sure we only ignore "failed"
                    if not "failed" in str(ex):
                        raise ex
            else:
                sparql_query = f"""SELECT * {{ 
  wd:{pkValue} wdt:{pid} ?value . 
}}"""
                # see https://www.wikidata.org/wiki/Help:Ranking
                # sparql_query=f"""SELECT ?value {{
                #  wd:{pkValue} p:{pid} ?st .
                #  ?st ps:P569 ?value .
                # ?st wikibase:rank wikibase:PreferredRank
                # }}"""

                records = self.sparql.queryAsListOfDicts(sparql_query)
                if len(records) >= 1:
                    record = records[0]
                    value = record["value"]
                    if isinstance(value, str):
                        value = re.sub(
                            r"http://www.wikidata.org/entity/(.*)", r"\1", value
                        )
                    else:
                        value = str(value)
        return value

    def filterItems(self, items: list, pk_prop: str, pk_values: list) -> list:
        """
        filter the given list of items by SMW records having primary key property values
        in the given pk_values list

        Args:
            items(list): the list of records to filter
            pk_prop(str): the primary key property
            pk_values(list): the list of primary key values
        """
        if pk_values is None:
            return items
        sync_items = []
        for item_record in items:
            if pk_prop in item_record:
                item_pk_value = item_record[pk_prop]
                if item_pk_value in pk_values:
                    sync_items.append(item_record)
        return sync_items

    def sync(self, topic: Topic, pk: str, pk_values: list, prop_arglist: list):
        """
        synchronize the items for the given topic, the properties as specified by the prop_arglist
        the given primary key pk and the filter values pkValues

        Args:
            topic(Topic): the topic / class /entityType
            pk(str): the primary key to use
            pk_values(list): a list of primaryKeyValues to filter for
            prop_arglist(list): the argument names for properties to be handled

        """
        tm = self.mapping.map_by_topic[topic.name]
        items_dict = self.readItemsFromCache(topic.name)
        pk_map = tm.getPkSMWPropMap(pk)
        sync_items = self.filterItems(
            items=items_dict.values(), pk_prop=pk_map.smw_prop, pk_values=pk_values
        )
        self.color_msg(
            Fore.BLUE, f"{len(sync_items)} {tm.topic_name} items to sync ..."
        )
        wikipush = WikiPush(None, self.wikiId, debug=self.debug, verbose=self.verbose)
        if self.progress:
            t = tqdm(total=len(prop_arglist) * len(sync_items))
        else:
            t = None
        for arg in prop_arglist:
            pm = tm.getPmForArg(arg)
            for sync_item in sync_items:
                pk_value = sync_item[pk_map.smw_prop]
                wd_value = self.getValue(pk, pk_value, pm.pid)
                if wd_value is None:
                    wd_value = ""
                page_title = sync_item[tm.topic_name]
                msg = f"updating {page_title} {pm.smw_prop} to {wd_value} from wikidata {pk_value}"
                if self.verbose:
                    self.color_msg(Fore.BLUE, msg)
                cmd = f"""wikiedit -t {self.wikiId} -p "{page_title}" --template {tm.topic_name} --property {pm.smw_prop} --value "{wd_value}" -f"""
                if self.dry:
                    print(cmd)
                if t is not None:
                    t.set_description(f"{page_title}{pm.smw_prop}")
                wikipush.edit_wikison(
                    page_titles=[page_title],
                    entity_type_name=tm.topic_name,
                    property_name=pm.smw_prop,
                    value=wd_value,
                    force=not self.dry,
                )
                if t is not None:
                    t.update()
            pass

    def main(self, args):
        """
        command line handling
        """
        if args.about:
            print(Version.description)
            print(f"see {Version.doc_url}")
            webbrowser.open(Version.doc_url)
        elif args.createPropertyMap:
            mapping = self.createMapping()
            mapping.toYaml(args.createPropertyMap)
        else:
            self.mapping = self.getMapping()
            topic = self.getTopic(topic_name=args.topic)
            if args.proplist:
                self.showProperties(topic=topic)
            if args.update:
                self.color_msg(
                    Fore.BLUE,
                    f"updating cache for {self.context.name}:{topic.name} from wiki {self.wikiId} ...",
                )
                json_path, items = self.updateItemCache(topic.name)
                self.color_msg(
                    Fore.BLUE, f"stored {len(items)} {topic.name} items to {json_path}"
                )
            if args.props:
                self.sync(
                    topic=topic,
                    pk=args.primaryKey,
                    pk_values=args.primaryKeyValues,
                    prop_arglist=args.props,
                )

__init__(wikiId='ceur-ws', context_name='CrSchema', endpoint_name='wikidata', verbose=False, progress=False, dry=True, debug=False)

Constructor

Parameters:

Name Type Description Default
wikiId(str)

my wiki Id

required
topic(str)

the topic to sync

required
context_name(str)

the name of the context

required
dry(bool)

if True do not execute commands put display them

required
debug(bool)

if True switch debugging on

required
Source code in smwsync/synccmd.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def __init__(
    self,
    wikiId: str = "ceur-ws",
    context_name: str = "CrSchema",
    endpoint_name: str = "wikidata",
    verbose: bool = False,
    progress: bool = False,
    dry: bool = True,
    debug: bool = False,
):
    """
    Constructor

    Args:
        wikiId(str): my wiki Id
        topic(str): the topic to sync
        context_name(str): the name of the context
        dry(bool): if True do not execute commands put display them
        debug(bool): if True switch debugging on
    """
    colorama_init()
    self.lang = "en"
    self.wikiId = wikiId
    self.debug = debug
    self.progress = progress
    self.verbose = verbose
    self.dry = dry
    self.smwAccess = SMWAccess(wikiId)
    self.context_name = context_name
    self.mw_contexts = self.smwAccess.getMwContexts()
    if not context_name in self.mw_contexts:
        raise Exception(
            f"context {context_name} not available in SMW wiki {wikiId}"
        )
    self.mw_context = self.mw_contexts[context_name]
    self.context, self.error, self.errMsg = Context.fromWikiContext(
        self.mw_context, debug=self.debug
    )
    self.endpoints = EndpointManager.getEndpoints(lang="sparql")
    self.endpointConf = self.endpoints.get(endpoint_name)
    self.sparql = SPARQL(self.endpointConf.endpoint)

color_msg(color, msg)

print a colored message

Parameters:

Name Type Description Default
color(Fore)

the color to use

required
msg(str)

the message to print

required
Source code in smwsync/synccmd.py
244
245
246
247
248
249
250
251
252
def color_msg(self, color, msg: str):
    """
    print a colored message

    Args:
        color(Fore): the color to use
        msg(str): the message to print
    """
    print(f"{color}{msg}{Style.RESET_ALL}")

createMapping()

create a mapping for my context

Source code in smwsync/synccmd.py
231
232
233
234
235
236
237
238
239
240
241
242
def createMapping(self) -> Mapping:
    """
    create a mapping for my context
    """
    mapping = Mapping()
    for topic_name, topic in self.context.topics.items():
        topic_map = TopicMapping(topic_name)
        for prop_name, _prop in topic.properties.items():
            pm = PropMapping(smw_prop=prop_name, arg=prop_name, pid="P?")
            topic_map.add_mapping(pm)
        mapping.map_by_topic[topic_map.topic_name] = topic_map
    return mapping

filterItems(items, pk_prop, pk_values)

filter the given list of items by SMW records having primary key property values in the given pk_values list

Parameters:

Name Type Description Default
items(list)

the list of records to filter

required
pk_prop(str)

the primary key property

required
pk_values(list)

the list of primary key values

required
Source code in smwsync/synccmd.py
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
def filterItems(self, items: list, pk_prop: str, pk_values: list) -> list:
    """
    filter the given list of items by SMW records having primary key property values
    in the given pk_values list

    Args:
        items(list): the list of records to filter
        pk_prop(str): the primary key property
        pk_values(list): the list of primary key values
    """
    if pk_values is None:
        return items
    sync_items = []
    for item_record in items:
        if pk_prop in item_record:
            item_pk_value = item_record[pk_prop]
            if item_pk_value in pk_values:
                sync_items.append(item_record)
    return sync_items

fromArgs(args) classmethod

create a sync command for the given command line arguments

Parameters:

Name Type Description Default
args(Object)

command line arguments

required
Source code in smwsync/synccmd.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
@classmethod
def fromArgs(self, args) -> "SyncCmd":
    """
    create a sync command for the given command line arguments

    Args:
        args(Object): command line arguments
    """
    syncCmd = SyncCmd(
        wikiId=args.target,
        context_name=args.context,
        endpoint_name=args.endpoint,
        verbose=args.verbose,
        progress=args.progress,
        dry=args.dry,
        debug=args.debug,
    )
    return syncCmd

getArgParser() classmethod

Setup command line argument parser

Returns:

Name Type Description
ArgumentParser ArgumentParser

the argument parser

Source code in smwsync/synccmd.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
@classmethod
def getArgParser(cls) -> ArgumentParser:
    """
    Setup command line argument parser

    Returns:
        ArgumentParser: the argument parser
    """
    parser = ArgumentParser(
        description=Version.full_description,
        formatter_class=RawDescriptionHelpFormatter,
    )
    parser.add_argument(
        "-a",
        "--about",
        help="show about info [default: %(default)s]",
        action="store_true",
    )
    parser.add_argument(
        "--context",
        default="CrSchema",
        help="context to generate from [default: %(default)s]",
    )
    parser.add_argument(
        "-cpm", "--createPropertyMap", help="create the yaml property map"
    )
    parser.add_argument(
        "-d",
        "--debug",
        dest="debug",
        action="store_true",
        help="show debug info [default: %(default)s]",
    )
    parser.add_argument(
        "--dry",
        action="store_true",
        help="dry run only - do not execute wikiedit commands but just display them",
    )
    parser.add_argument(
        "-e",
        "--endpoint",
        default="wikidata",
        help="the SPARQL endpoint to be used [default: %(default)s]",
    )
    parser.add_argument("--progress", action="store_true", help="show progress bar")
    parser.add_argument("-p", "--props", help="properties to sync", nargs="+")
    parser.add_argument(
        "--proplist", action="store_true", help="show the properties"
    )
    parser.add_argument("-pm", "--propertyMap", help="the yaml property map")
    parser.add_argument(
        "-pk",
        "--primaryKey",
        help="primary Key [default: %(default)s]",
        default="qid",
    )
    parser.add_argument(
        "-pkv", "--primaryKeyValues", help="primary Key Values", nargs="+"
    )
    parser.add_argument(
        "-t",
        "--target",
        default="ceur-ws",
        help="wikiId of the target wiki [default: %(default)s]",
    )
    parser.add_argument(
        "-u", "--update", action="store_true", help="update the local cache"
    )
    parser.add_argument(
        "--topic",
        help="the topic to work with [default: %(default)s]",
        default="Scholar",
    )
    parser.add_argument(
        "--verbose", action="store_true", help="show verbose edit details"
    )
    parser.add_argument(
        "-V", "--version", action="version", version=Version.version_msg
    )
    return parser

getCachePath(cache_root=None)

get the cache_path for the the given cache_root

Parameters:

Name Type Description Default
cache_root(str)

root of the cache_path - if None set to $HOME/.smwsync

required

Returns: str: the cache path for my wikiId and context's name

Source code in smwsync/synccmd.py
204
205
206
207
208
209
210
211
212
213
214
215
216
def getCachePath(self, cache_root: str = None) -> str:
    """
    get the cache_path for the the given cache_root

    Args:
        cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
    Returns:
        str: the cache path for my wikiId and context's name
    """
    cache_root = self.getCacheRoot(cache_root)
    cache_path = f"{cache_root}/{self.wikiId}/{self.context.name}"
    os.makedirs(cache_path, exist_ok=True)
    return cache_path

getCacheRoot(cache_root=None)

get the cache_root for the the given cache_root

Parameters:

Name Type Description Default
cache_root(str)

root of the cache_path - if None set to $HOME/.smwsync

required

Returns: str: the cache root

Source code in smwsync/synccmd.py
190
191
192
193
194
195
196
197
198
199
200
201
202
def getCacheRoot(self, cache_root: str = None) -> str:
    """
    get the cache_root for the the given cache_root

    Args:
        cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
    Returns:
        str: the cache root
    """
    if cache_root is None:
        home = str(Path.home())
        cache_root = f"{home}/.smwsync"
    return cache_root

getMapping(cache_root=None)

get the mapping for the given cache_root

Parameters:

Name Type Description Default
cache_root(str)

root of the cache_path - if None set to $HOME/.smwsync

required
Source code in smwsync/synccmd.py
218
219
220
221
222
223
224
225
226
227
228
229
def getMapping(self, cache_root: str = None) -> Mapping:
    """
    get the mapping for the given cache_root

    Args:
        cache_root(str): root of the cache_path - if None set to $HOME/.smwsync
    """
    mapping = Mapping()
    cache_root = self.getCacheRoot(cache_root)
    yaml_path = f"{cache_root}/{self.context.name}_wikidata_map.yaml"
    mapping.fromYaml(yaml_path)
    return mapping

getTopic(topic_name)

get the topic for the given topic name

Parameters:

Name Type Description Default
topic_name(str)

the name of the topic to get the properties for

required
Source code in smwsync/synccmd.py
176
177
178
179
180
181
182
183
184
185
186
187
188
def getTopic(self, topic_name: str):
    """
    get the topic for the given topic name

    Args:
        topic_name(str): the name of the topic to get the properties for
    """
    if not topic_name in self.context.topics:
        raise Exception(
            f"topic {topic_name} is not in context {self.context.name} in wiki {self.wikiId}"
        )
    topic = self.context.topics[topic_name]
    return topic

getValue(pk, pkValue, pid)

get the value for the given primary key and the given property id

Source code in smwsync/synccmd.py
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
    def getValue(self, pk: str, pkValue: str, pid: str):
        """
        get the value for the given primary key and the given property id
        """
        value = None
        if pk == "qid":
            if pid == "description" or pid == "label":
                value = None
                try:
                    label, description = WikidataItem.getLabelAndDescription(
                        self.sparql, itemId=pkValue, lang=self.lang
                    )
                    if pid == "description":
                        value = description
                    else:
                        value = label
                    pass
                except Exception as ex:
                    # make sure we only ignore "failed"
                    if not "failed" in str(ex):
                        raise ex
            else:
                sparql_query = f"""SELECT * {{ 
  wd:{pkValue} wdt:{pid} ?value . 
}}"""
                # see https://www.wikidata.org/wiki/Help:Ranking
                # sparql_query=f"""SELECT ?value {{
                #  wd:{pkValue} p:{pid} ?st .
                #  ?st ps:P569 ?value .
                # ?st wikibase:rank wikibase:PreferredRank
                # }}"""

                records = self.sparql.queryAsListOfDicts(sparql_query)
                if len(records) >= 1:
                    record = records[0]
                    value = record["value"]
                    if isinstance(value, str):
                        value = re.sub(
                            r"http://www.wikidata.org/entity/(.*)", r"\1", value
                        )
                    else:
                        value = str(value)
        return value

main(args)

command line handling

Source code in smwsync/synccmd.py
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def main(self, args):
    """
    command line handling
    """
    if args.about:
        print(Version.description)
        print(f"see {Version.doc_url}")
        webbrowser.open(Version.doc_url)
    elif args.createPropertyMap:
        mapping = self.createMapping()
        mapping.toYaml(args.createPropertyMap)
    else:
        self.mapping = self.getMapping()
        topic = self.getTopic(topic_name=args.topic)
        if args.proplist:
            self.showProperties(topic=topic)
        if args.update:
            self.color_msg(
                Fore.BLUE,
                f"updating cache for {self.context.name}:{topic.name} from wiki {self.wikiId} ...",
            )
            json_path, items = self.updateItemCache(topic.name)
            self.color_msg(
                Fore.BLUE, f"stored {len(items)} {topic.name} items to {json_path}"
            )
        if args.props:
            self.sync(
                topic=topic,
                pk=args.primaryKey,
                pk_values=args.primaryKeyValues,
                prop_arglist=args.props,
            )

readItemsFromCache(topic_name, cache_path=None)

read the items back from cache

Source code in smwsync/synccmd.py
277
278
279
280
281
282
283
284
285
def readItemsFromCache(self, topic_name, cache_path: str = None):
    """
    read the items back from cache
    """
    cache_path = self.getCachePath(cache_path)
    json_path = f"{cache_path}/{topic_name}.json"
    with open(json_path, "r") as json_file:
        items = json.load(json_file)
    return items

showProperties(topic)

show the properties for the given Topic

Parameters:

Name Type Description Default
topic(Topic)

the topic to show the properties for

required
Source code in smwsync/synccmd.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def showProperties(self, topic: Topic):
    """
    show the properties for the given Topic

    Args:
        topic(Topic): the topic to show the properties for
    """
    if not topic.name in self.mapping.map_by_topic:
        raise Exception(
            f"missing wikidata mapping for {topic.name} - you might want to add it to the yaml file for {self.context.name}"
        )
    tm = self.mapping.map_by_topic[topic.name]
    for prop_name, prop in topic.properties.items():
        if prop_name in tm.prop_by_smw_prop:
            pm = tm.prop_by_smw_prop[prop_name]
            info = f"{pm.arg}: {pm.pid_label} ({pm.pid}) → {prop.name}"
            print(f"{info}")
        # else:
        # info=f"{prop_name}:{prop} ❌ - missing wikidata map entry"
        pass

sync(topic, pk, pk_values, prop_arglist)

synchronize the items for the given topic, the properties as specified by the prop_arglist the given primary key pk and the filter values pkValues

Parameters:

Name Type Description Default
topic(Topic)

the topic / class /entityType

required
pk(str)

the primary key to use

required
pk_values(list)

a list of primaryKeyValues to filter for

required
prop_arglist(list)

the argument names for properties to be handled

required
Source code in smwsync/synccmd.py
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
def sync(self, topic: Topic, pk: str, pk_values: list, prop_arglist: list):
    """
    synchronize the items for the given topic, the properties as specified by the prop_arglist
    the given primary key pk and the filter values pkValues

    Args:
        topic(Topic): the topic / class /entityType
        pk(str): the primary key to use
        pk_values(list): a list of primaryKeyValues to filter for
        prop_arglist(list): the argument names for properties to be handled

    """
    tm = self.mapping.map_by_topic[topic.name]
    items_dict = self.readItemsFromCache(topic.name)
    pk_map = tm.getPkSMWPropMap(pk)
    sync_items = self.filterItems(
        items=items_dict.values(), pk_prop=pk_map.smw_prop, pk_values=pk_values
    )
    self.color_msg(
        Fore.BLUE, f"{len(sync_items)} {tm.topic_name} items to sync ..."
    )
    wikipush = WikiPush(None, self.wikiId, debug=self.debug, verbose=self.verbose)
    if self.progress:
        t = tqdm(total=len(prop_arglist) * len(sync_items))
    else:
        t = None
    for arg in prop_arglist:
        pm = tm.getPmForArg(arg)
        for sync_item in sync_items:
            pk_value = sync_item[pk_map.smw_prop]
            wd_value = self.getValue(pk, pk_value, pm.pid)
            if wd_value is None:
                wd_value = ""
            page_title = sync_item[tm.topic_name]
            msg = f"updating {page_title} {pm.smw_prop} to {wd_value} from wikidata {pk_value}"
            if self.verbose:
                self.color_msg(Fore.BLUE, msg)
            cmd = f"""wikiedit -t {self.wikiId} -p "{page_title}" --template {tm.topic_name} --property {pm.smw_prop} --value "{wd_value}" -f"""
            if self.dry:
                print(cmd)
            if t is not None:
                t.set_description(f"{page_title}{pm.smw_prop}")
            wikipush.edit_wikison(
                page_titles=[page_title],
                entity_type_name=tm.topic_name,
                property_name=pm.smw_prop,
                value=wd_value,
                force=not self.dry,
            )
            if t is not None:
                t.update()
        pass

updateItemCache(topic_name, cache_path=None)

update the item cache

for the given topic name and cache_path

Parameters:

Name Type Description Default
topic_name(str)

the name of the topic

required
cache_path(str)

the path to the cache - if None .smwsync in the home directory is used

required

Returns:

Name Type Description
str str

the path to the json file where the data is cached

Source code in smwsync/synccmd.py
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
def updateItemCache(self, topic_name: str, cache_path: str = None) -> str:
    """
    update the item cache

    for the given topic name and cache_path

    Args:
        topic_name(str): the name of the topic
        cache_path(str): the path to the cache - if None .smwsync in the home directory is used

    Returns:
        str: the path to the json file where the data is cached

    """
    topic = self.getTopic(topic_name)
    ask_query = topic.askQuery(listLimit=5000)
    items = self.smwAccess.smw.query(ask_query)
    cache_path = self.getCachePath(cache_path)
    json_path = f"{cache_path}/{topic_name}.json"
    with open(json_path, "w", encoding="utf-8") as json_file:
        json.dump(items, json_file, ensure_ascii=False, default=str, indent=2)
    return json_path, items

main(argv=None)

main program.

Source code in smwsync/synccmd.py
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
def main(argv=None):  # IGNORE:C0111
    """main program."""

    if argv is None:
        argv = sys.argv[1:]

    try:
        parser = SyncCmd.getArgParser()
        args = parser.parse_args(argv)
        if len(argv) < 1:
            parser.print_usage()
            sys.exit(1)
        syncCmd = SyncCmd.fromArgs(args)
        syncCmd.main(args)
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        if DEBUG:
            raise (e)
        indent = len(Version.name) * " "
        sys.stderr.write(Version.name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if args.debug:
            print(traceback.format_exc())
        return 2

version

Created on 2022-09-11

@author: wf

Version

Bases: object

Version handling for pySMWSync

Source code in smwsync/version.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
class Version(object):
    """
    Version handling for pySMWSync
    """

    name = "pySMWSync"
    version = smwsync.__version__
    date = "2023-03-03"
    updated = "2024-03-19"
    description = "python library to synchronize property values of a semantic MediaWiki with e.g. wikidata or another Semantic MediaWiki"

    authors = "Wolfgang Fahl"

    doc_url = "https://wiki.bitplan.com/index.php/pySMWSync"
    chat_url = "https://github.com/WolfgangFahl/pySMWSync/discussions"
    cm_url = "https://github.com/WolfgangFahl/pySMWSync"
    version_msg = f"v{version},{updated}"
    full_description = f"{name} - {description}({version_msg})"

    license = f"""Copyright 2023 contributors. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied."""
    longDescription = f"""{name} version {version}
{description}

  Created by {authors} on {date} last updated {updated}"""