Skip to content

py-sidif API Documentation

profiler

Created on 2022-11-18

@author: wf

Profiler

simple profiler

Source code in sidif/profiler.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        self.starttime = time.time()
        if profile:
            print(f"Starting {msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

__init__(msg, profile=True)

construct me with the given msg and profile active flag

Parameters:

Name Type Description Default
msg(str)

the message to show if profiling is active

required
profile(bool)

True if messages should be shown

required
Source code in sidif/profiler.py
15
16
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, msg, profile=True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    self.starttime = time.time()
    if profile:
        print(f"Starting {msg} ...")

time(extraMsg='')

time the action and print if profile is active

Source code in sidif/profiler.py
29
30
31
32
33
34
35
36
def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

sidif

Created on 2020-11-06

@author: wf

Comment

a comment with it's location

Source code in sidif/sidif.py
215
216
217
218
219
220
221
222
class Comment:
    """
    a comment with it's location
    """

    def __init__(self, comment, location):
        self.comment = comment
        self.location = location

DataInterchange

a data interchange

Source code in sidif/sidif.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
class DataInterchange:
    """
    a data interchange
    """

    def __init__(self):
        self.triples = []
        self.comments = []
        pass

    @staticmethod
    def ofDict(pDict, context="context"):
        dif = DataInterchange()
        dif.addTriple(Triple(context, "isA", "Context"))
        dif.addTriple(Triple(context, "name", "it"))
        dif.addSchemaFromDict(pDict, context, context, "context")
        return dif

    def addLink(
        self,
        name,
        source,
        target,
        sourceRole,
        targetRole,
        sourceMultiple,
        targetMultiple,
    ):
        self.addTriple(Triple(name, "isA", "TopicLink"))
        self.addTriple(Triple(name, "name", "it"))
        self.addTriple(Triple(target, "target", "it"))
        self.addTriple(Triple(source, "source", "it"))
        self.addTriple(Triple(targetRole, "targetRole", "it"))
        self.addTriple(Triple(sourceRole, "sourceRole", "it"))
        self.addTriple(Triple(sourceMultiple, "sourceMultiple", "it"))
        self.addTriple(Triple(targetMultiple, "targetMultiple", "it"))

    def addTopic(self, name, context):
        topicId = self.fixId(name)
        self.addTriple(Triple(topicId, "isA", "Topic"))
        self.addTriple(Triple(name, "name", "it"))
        self.addTriple(Triple(context, "context", "it"))

    def fixId(self, name):
        fixed = re.sub(r"[#@]", "", name)
        return fixed

    def addSchemaFromDict(self, pDict, context, parent, parentName):
        """
        add schema information from the given dict
        """
        if not isinstance(pDict, dict):
            return
        # make sure we work on properties first then other topics
        sortedKeys = sorted(pDict.keys(), key=lambda x: isinstance(pDict[x], dict))
        # loop over nodes
        for key in sortedKeys:
            value = pDict[key]
            # is the subnode a Topic or a Property?
            if isinstance(value, dict):
                # if there is an intermediate node
                # then there is a topic link
                # eg. workshop - events - event
                # https://stackoverflow.com/questions/21062781/shortest-way-to-get-first-item-of-ordereddict-in-python-3
                if len(value) == 1 and isinstance(list(value.values())[0], list):
                    listNode = list(value.values())[0]
                    firstListNode = listNode[0]
                    listKey = list(value.keys())[0]
                    self.addTopic(listKey, context)
                    self.addSchemaFromDict(firstListNode, context, listKey, "Topic")
                    self.addLink(
                        key, parent, listKey, "", key, False, len(listNode) > 1
                    )
                else:
                    # standalone topic
                    linkKey = "%s%s" % (parent, key)
                    self.addTopic(key, context)
                    self.addSchemaFromDict(value, context, key, "Topic")
                    if parentName != "context":
                        self.addLink(linkKey, parent, key, "", "", False, False)
            else:
                propId = self.fixId(key)
                self.addTriple(Triple(propId, "isA", "Property"))
                self.addTriple(Triple(key, "name", "it"))
                valueType = type(value).__name__
                self.addTriple(Triple(valueType, "type", "it"))
                self.addTriple(Triple(parent, parentName, "it"))

    def addTriple(self, triple):
        """
        add the given triple

        Args:
            triple(Triple): the triple to add
        """
        self.triples.append(triple)

    def addComment(self, comment):
        """
        add the given comment
        """
        self.comments.append(comment)

    def asSiDIF(self):
        """
        convert me to SiDIF notation
        """
        sidifStr = ""
        for triple in self.triples:
            sidif = triple.asSiDIF()
            sidifStr += f"{sidif}\n"
        return sidifStr

    def toDictOfDicts(self):
        """
        convert me to a dict of dicts
        following the "it" semantics

        e.g.

        .. code-block:: python

               JohnDoe isA Person
               "John" is firstName of it
               "Doe"  is lastName of it
               35 is age of it

        will have a pseudo - triple representation of

        .. code-block:: python

            JohnDoe isA Person
            John firstName it
            Doe lastName it
            35 age it

        leading to a dict

        .. code-block:: python

            {
               'JohnDoe': {
                  'isA': Person,
                  'firstName': John,
                   'lastName': 'Doe'
                  'age': 35
                }
            }

        Returns:
            dict: the dict of dicts representation of the triples found
        """
        # the dict of dicts
        dod = {}
        # we start with not "it" reference
        it = None
        # loop over all triples
        for triple in self.triples:
            # if this is an "it" reference
            if triple.o == "it":
                if it is None:
                    raise Exception(
                        "Invalid it reference %s at location %d"
                        % (triple, triple.location)
                    )
                o = triple.s
            else:
                o = triple.o
                if triple.s in dod:
                    it = dod[triple.s]
                else:
                    it = {}
                    dod[triple.s] = it
            it[triple.p] = o
        return dod

    def __str__(self):
        text = "%d triples, %d comments" % (len(self.triples), len(self.comments))
        return text

addComment(comment)

add the given comment

Source code in sidif/sidif.py
131
132
133
134
135
def addComment(self, comment):
    """
    add the given comment
    """
    self.comments.append(comment)

addSchemaFromDict(pDict, context, parent, parentName)

add schema information from the given dict

Source code in sidif/sidif.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
def addSchemaFromDict(self, pDict, context, parent, parentName):
    """
    add schema information from the given dict
    """
    if not isinstance(pDict, dict):
        return
    # make sure we work on properties first then other topics
    sortedKeys = sorted(pDict.keys(), key=lambda x: isinstance(pDict[x], dict))
    # loop over nodes
    for key in sortedKeys:
        value = pDict[key]
        # is the subnode a Topic or a Property?
        if isinstance(value, dict):
            # if there is an intermediate node
            # then there is a topic link
            # eg. workshop - events - event
            # https://stackoverflow.com/questions/21062781/shortest-way-to-get-first-item-of-ordereddict-in-python-3
            if len(value) == 1 and isinstance(list(value.values())[0], list):
                listNode = list(value.values())[0]
                firstListNode = listNode[0]
                listKey = list(value.keys())[0]
                self.addTopic(listKey, context)
                self.addSchemaFromDict(firstListNode, context, listKey, "Topic")
                self.addLink(
                    key, parent, listKey, "", key, False, len(listNode) > 1
                )
            else:
                # standalone topic
                linkKey = "%s%s" % (parent, key)
                self.addTopic(key, context)
                self.addSchemaFromDict(value, context, key, "Topic")
                if parentName != "context":
                    self.addLink(linkKey, parent, key, "", "", False, False)
        else:
            propId = self.fixId(key)
            self.addTriple(Triple(propId, "isA", "Property"))
            self.addTriple(Triple(key, "name", "it"))
            valueType = type(value).__name__
            self.addTriple(Triple(valueType, "type", "it"))
            self.addTriple(Triple(parent, parentName, "it"))

addTriple(triple)

add the given triple

Parameters:

Name Type Description Default
triple(Triple)

the triple to add

required
Source code in sidif/sidif.py
122
123
124
125
126
127
128
129
def addTriple(self, triple):
    """
    add the given triple

    Args:
        triple(Triple): the triple to add
    """
    self.triples.append(triple)

asSiDIF()

convert me to SiDIF notation

Source code in sidif/sidif.py
137
138
139
140
141
142
143
144
145
def asSiDIF(self):
    """
    convert me to SiDIF notation
    """
    sidifStr = ""
    for triple in self.triples:
        sidif = triple.asSiDIF()
        sidifStr += f"{sidif}\n"
    return sidifStr

toDictOfDicts()

convert me to a dict of dicts following the "it" semantics

e.g.

.. code-block:: python

   JohnDoe isA Person
   "John" is firstName of it
   "Doe"  is lastName of it
   35 is age of it

will have a pseudo - triple representation of

.. code-block:: python

JohnDoe isA Person
John firstName it
Doe lastName it
35 age it

leading to a dict

.. code-block:: python

{
   'JohnDoe': {
      'isA': Person,
      'firstName': John,
       'lastName': 'Doe'
      'age': 35
    }
}

Returns:

Name Type Description
dict

the dict of dicts representation of the triples found

Source code in sidif/sidif.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
def toDictOfDicts(self):
    """
    convert me to a dict of dicts
    following the "it" semantics

    e.g.

    .. code-block:: python

           JohnDoe isA Person
           "John" is firstName of it
           "Doe"  is lastName of it
           35 is age of it

    will have a pseudo - triple representation of

    .. code-block:: python

        JohnDoe isA Person
        John firstName it
        Doe lastName it
        35 age it

    leading to a dict

    .. code-block:: python

        {
           'JohnDoe': {
              'isA': Person,
              'firstName': John,
               'lastName': 'Doe'
              'age': 35
            }
        }

    Returns:
        dict: the dict of dicts representation of the triples found
    """
    # the dict of dicts
    dod = {}
    # we start with not "it" reference
    it = None
    # loop over all triples
    for triple in self.triples:
        # if this is an "it" reference
        if triple.o == "it":
            if it is None:
                raise Exception(
                    "Invalid it reference %s at location %d"
                    % (triple, triple.location)
                )
            o = triple.s
        else:
            o = triple.o
            if triple.s in dod:
                it = dod[triple.s]
            else:
                it = {}
                dod[triple.s] = it
        it[triple.p] = o
    return dod

SiDIFParser

Bases: object

Parser for SiDIF Simple Data Interchange Format see http://wiki.bitplan.com/index.php/SiDIF

Source code in sidif/sidif.py
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
class SiDIFParser(object):
    """
    Parser for SiDIF Simple Data Interchange Format
    see http://wiki.bitplan.com/index.php/SiDIF
    """

    def __init__(self, showErrors=True, debug=False):
        """
        Constructor
        Args:
            showErrors(bool): True if errors should be shown/printed
            debug(bool): True if debugging should be enabled
        """
        self.showError = showErrors
        self.debug = debug
        self.grammar = None
        ParserElement.setDefaultWhitespaceChars(" \t")

    @staticmethod
    def getUriRegexp():
        """
        get a regular expression for an URI
        """
        # https://mathiasbynens.be/demo/url-regex
        # https://gist.github.com/dperini/729294
        uriRegexp = (
            # protocol identifier
            r"(?:(?:(?:https?|ftp|file):)//|(mailto|news|nntp|telnet):)"
            # user:pass authentication
            r"(?:\S+(?::\S*)?@)?"
            r"(?:"
            # IP address exclusion
            # private & local networks
            r"(?!(?:10|127)(?:\.\d{1,3}){3})"
            r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
            r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
            # IP address dotted notation octets
            # excludes loopback network 0.0.0.0
            # excludes reserved space >= 224.0.0.0
            # excludes network & broadcast addresses
            # (first & last IP address of each class)
            r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
            r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
            r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
            r"|"
            # host & domain names, may end with dot
            # can be replaced by a shortest alternative
            # r"(?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+"
            # r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)"
            # # domain name
            # r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*"
            r"(?:"
            r"(?:"
            r"[a-z0-9\u00a1-\uffff]"
            r"[a-z0-9\u00a1-\uffff_-]{0,62}"
            r")?"
            r"[a-z0-9\u00a1-\uffff]\."
            r")+"
            # TLD identifier name, may end with dot
            r"(?:[a-z\u00a1-\uffff]{2,}\.?)"
            r")"
            # port number (optional)
            r"(?::\d{2,5})?"
            # resource path (optional)
            r"(?:[/?#]\S*)?"
        )
        compiled = re.compile(uriRegexp, re.IGNORECASE | re.UNICODE)
        return compiled

    def convertToTime(self, tokenStr, location, token):
        """
        convert a timeLiteral to a time
        """
        try:
            timestr = token[0]
            fmt = "%H:%M:%S" if len(timestr) == 8 else "%H:%M"
            dt = datetime.datetime.strptime(timestr, fmt)
            timeResult = dt.time()
            return timeResult
        except ValueError as ve:
            raise ParseFatalException(tokenStr, location, str(ve))

    def convertToBoolean(self, tokenStr: str, location, token):
        """
        convert the given token to a boolean
        """
        try:
            tokenStr = token[0]
            if tokenStr == "true":
                return True
            elif tokenStr == "false":
                return False
        except Exception as pe:
            msg = str(pe)
        # https://stackoverflow.com/questions/13393432/raise-a-custom-exception-in-pyparsing
        raise ParseFatalException(
            tokenStr, location, "invalid boolean %s:%s" % (tokenStr, msg)
        )

    def handleDateTimeLiteral(self, tokenStr: str, location, group):
        """
        handle a date time literal
        """
        token = group[0]
        if len(token) == 1:
            date = token[0]
            return date
        elif len(token) == 2:
            date = token[0]
            time = token[1]
            dt = datetime.datetime(
                date.year, date.month, date.day, time.hour, time.minute, time.second
            )
            return dt
        else:
            raise ParseFatalException(
                tokenStr, location, "invalid DateTimeLiteral %s" % tokenStr
            )

    def handleStringLiteral(self, _tokenStr, _location, tokens):
        """
        handle string literals

        Args:
            tokens(ParseResults): the tokens for the literal
        """
        token = tokens[0]
        if len(token) > 0:
            text = token[0]
        else:
            text = ""
        return text

    def handleIdentifier(self, _tokenStr, _location, tokens):
        """
        handle identifiers
        """
        identifier = tokens["identifier"][0]
        return identifier

    def handleComment(self, location, tokens):
        """
        handle the comment given comment tokens
        """
        # tokenName=tokens.getName()
        # count=len(tokens)
        commentText = ""
        for token in tokens:
            commentText += "".join(token)
        comment = Comment(commentText, location)
        return comment

    def handleGroup(self, _tokenStr, _location, tokens):
        """
        handle a Group
        """
        _tokenName = tokens.getName()
        token = tokens[0]
        _innerName = token.getName()
        inner = token[0]
        return inner

    def addContent(self, di: DataInterchange, token, tokenName: str):
        """
        add Content to the given DataInterchange

        Args:
            di(DataInterchange): the datainterchange
            token: the  token to add the content for
            tokenName(str): the name of the token
        """
        if isinstance(token, ParseResults):
            if tokenName == "links" or tokenName == "comment" or tokenName == "line":
                if self.debug:
                    self.warn(f"{tokenName}: {len(token)}")
                tokenName = token.getName()
                for subtoken in token:
                    self.addContent(di, subtoken, tokenName)
            else:
                self.warn(f"parseResult {tokenName} not handled")
        elif isinstance(token, Triple):
            di.addTriple(token)
        elif isinstance(token, Comment):
            di.addComment(token)
        else:
            if self.debug:
                if not token.isspace():
                    token_type = type(token).__name__
                    self.warn(
                        f"plain subtoken of {tokenName} type {token_type} not handled"
                    )
            pass

    def handleLines(self, _tokenStr, _location, tokens):
        """
        handle the line derived
        """
        di = DataInterchange()
        self.addContent(di, tokens, tokens.getName())
        return di

    def convertToTriple(self, tokenStr, location, group):
        """
        convert the given token to a triple

        Args:
            tokenStr(str): the token string
            location(object): location of the parse process
            group(ParseResults): the expected triple defining group
        """
        tripleKind = group.getName()
        tokens = group[0]
        tokenCount = len(tokens)
        if tokenCount != 3:
            raise ParseException(
                tokenStr,
                location,
                "invalid triple %s: %d tokens found 3 expected"
                % (tripleKind, tokenCount),
            )
        e1 = tokens[0]
        e2 = tokens[1]
        e3 = tokens[2]
        if tripleKind == "isValue":
            #'"Paris" is capital of France'
            triple = Triple(e1, e2, e3, location)
        elif tripleKind == "idLink":
            #'Paris capital France'
            triple = Triple(e1, e2, e3, location)
        elif tripleKind == "isLink":
            #'Paris is capital of France'
            triple = Triple(e1, e2, e3, location)
        elif tripleKind == "hasLink":
            #'France has capital Paris'
            triple = Triple(e3, e2, e1, location)
        else:
            raise ParseFatalException(
                tokenStr, location, "invalid tripleKind %s" % tripleKind
            )
        return triple

    def getLiteral(self):
        """
        get the literal sub Grammar
        """
        uri = Regex(SiDIFParser.getUriRegexp())("uri")
        booleanLiteral = oneOf(["true", "false"]).setParseAction(self.convertToBoolean)(
            "boolean"
        )
        hexLiteral = (
            Suppress("0x") + (Word(hexnums).setParseAction(tokenMap(int, 16)))
        )("hexLiteral")
        integerLiteral = pyparsing_common.signed_integer("integerLiteral")
        floatingPointLiteral = Group(
            pyparsing_common.sci_real | pyparsing_common.real
        ).setParseAction(self.handleGroup)("floatingPointLiteral")
        timeLiteral = Regex(r"[0-9]{2}:[0-9]{2}(:[0-9]{2})?").setParseAction(
            self.convertToTime
        )("timeLiteral")
        dateLiteral = pyparsing_common.iso8601_date.copy().setParseAction(
            pyparsing_common.convertToDate()
        )("dateLiteral")
        dateTimeLiteral = Group(dateLiteral + Optional(timeLiteral)).setParseAction(
            self.handleDateTimeLiteral
        )("dateTimeLiteral")
        stringLiteral = Group(
            Suppress('"') + ZeroOrMore(CharsNotIn('"') | LineEnd()) + Suppress('"')
        ).setParseAction(self.handleStringLiteral)("stringLiteral")
        literal = Group(
            uri
            | stringLiteral
            | booleanLiteral
            | hexLiteral
            | dateTimeLiteral
            | timeLiteral
            | floatingPointLiteral
            | integerLiteral
        ).setParseAction(self.handleGroup)("literal")
        return literal

    def getIdentifier(self):
        """
        identifier definition
        """
        identifier = Group(pyparsing_common.identifier).setParseAction(
            self.handleIdentifier
        )("identifier")
        return identifier

    def getValueGrammar(self):
        """
        sub grammar for value definition
        """
        literal = self.getLiteral()
        identifier = self.getIdentifier()
        value = Group(
            literal + Suppress("is") + identifier + Suppress("of") + identifier
        ).setParseAction(self.convertToTriple)("isValue")
        return value

    def getGrammar(self):
        """
        get the grammar
        """
        if self.grammar is None:
            value = self.getValueGrammar()
            identifier = self.getIdentifier()

            idlink = Group(identifier + identifier + identifier).setParseAction(
                self.convertToTriple
            )("idLink")
            islink = Group(
                identifier + Suppress("is") + identifier + Suppress("of") + identifier
            ).setParseAction(self.convertToTriple)("isLink")
            haslink = Group(
                identifier + Suppress("has") + identifier + identifier
            ).setParseAction(self.convertToTriple)("hasLink")
            link = Group(islink | haslink | idlink).setParseAction(self.handleGroup)(
                "link"
            )
            comment = Group(
                Suppress("#")
                + ZeroOrMore(Word(pp.pyparsing_unicode.Latin1.printables))
                + OneOrMore(LineEnd())
                | OneOrMore(LineEnd())
            ).setParseAction(self.handleComment)("comment*")
            line = Group(value | link).setParseAction(self.handleGroup)("line")
            links = Group(OneOrMore(line + LineEnd() | comment)).setParseAction(
                self.handleLines
            )("links*")
            self.grammar = links
        return self.grammar

    def parseUrl(self, url, title=None):
        """
        parse the sidif text from the given url

        Args:
            url(str): the url to read the SiDIF text from
        """
        sidif = urlopen(url).read().decode()
        if title is None:
            title = url
        return self.parseText(sidif, title=title)

    def parseWithGrammar(self, grammar, text, title=None, depth: int = None):
        """
        parse the given text with the given grammar optionally
        labeling the parse with the given title

        Args:
            grammar(object): a pyparsing grammar
            text(str): the text to be parsed
            title(str): optional title
            depth(int): the explain depth to show for the errorMessage
        """
        result = None
        error = None
        if title is None:
            title = "?"
        try:
            result = grammar.parseString(text, parseAll=True)
        except ParseException as pe:
            if self.showError:
                errMsg = SiDIFParser.errorMessage(title, pe, depth=depth)
                print(errMsg, file=sys.stderr)
            error = pe
        return result, error

    @classmethod
    def errorMessage(cls, title: str, pe: ParseException, depth: int = None) -> str:
        """
        Args:
            title(str): the title
            pe(ParseException): the exception to get the error message for
            depth(int): the explain depth to show for the errorMessage
        Returns:
            str: an error message with the explanation
        """
        msg = "%s: error in line %d col %d: \n%s" % (title, pe.lineno, pe.col, pe.line)
        msg += "\n" + pe.explain(depth=depth)
        return msg

    def parseText(self, sidif, title=None, depth: int = None):
        """
        parse the given sidif text

        Args:
            sidif(str): the SiDIF text to be parsed
            depth(int): the explain depth to show for the errorMessage

        Return:
            tuple: ParseResult from pyParsing and error - one of these should be None
        """
        return self.parseWithGrammar(self.getGrammar(), sidif, title, depth=depth)

    def warn(self, msg):
        """
        show a warning with the given message

        Args:
            msg(str): the warning message
        """
        print(msg, file=sys.stderr)

    def printResult(self, pr, indent=""):
        """
        print the given parseResult recursively

        Args:
            pr(object): the ParseResult to print
            indent(str): initial indentation
        """
        if isinstance(pr, ParseResults):
            print("%s%s:" % (indent, pr.getName()))
            for subpr in pr:
                self.printResult(subpr, indent + "  ")
        else:
            print("%s %s=%s" % (indent, type(pr).__name__, pr))

__init__(showErrors=True, debug=False)

Constructor Args: showErrors(bool): True if errors should be shown/printed debug(bool): True if debugging should be enabled

Source code in sidif/sidif.py
281
282
283
284
285
286
287
288
289
290
291
def __init__(self, showErrors=True, debug=False):
    """
    Constructor
    Args:
        showErrors(bool): True if errors should be shown/printed
        debug(bool): True if debugging should be enabled
    """
    self.showError = showErrors
    self.debug = debug
    self.grammar = None
    ParserElement.setDefaultWhitespaceChars(" \t")

addContent(di, token, tokenName)

add Content to the given DataInterchange

Parameters:

Name Type Description Default
di(DataInterchange)

the datainterchange

required
token

the token to add the content for

required
tokenName(str)

the name of the token

required
Source code in sidif/sidif.py
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
def addContent(self, di: DataInterchange, token, tokenName: str):
    """
    add Content to the given DataInterchange

    Args:
        di(DataInterchange): the datainterchange
        token: the  token to add the content for
        tokenName(str): the name of the token
    """
    if isinstance(token, ParseResults):
        if tokenName == "links" or tokenName == "comment" or tokenName == "line":
            if self.debug:
                self.warn(f"{tokenName}: {len(token)}")
            tokenName = token.getName()
            for subtoken in token:
                self.addContent(di, subtoken, tokenName)
        else:
            self.warn(f"parseResult {tokenName} not handled")
    elif isinstance(token, Triple):
        di.addTriple(token)
    elif isinstance(token, Comment):
        di.addComment(token)
    else:
        if self.debug:
            if not token.isspace():
                token_type = type(token).__name__
                self.warn(
                    f"plain subtoken of {tokenName} type {token_type} not handled"
                )
        pass

convertToBoolean(tokenStr, location, token)

convert the given token to a boolean

Source code in sidif/sidif.py
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
def convertToBoolean(self, tokenStr: str, location, token):
    """
    convert the given token to a boolean
    """
    try:
        tokenStr = token[0]
        if tokenStr == "true":
            return True
        elif tokenStr == "false":
            return False
    except Exception as pe:
        msg = str(pe)
    # https://stackoverflow.com/questions/13393432/raise-a-custom-exception-in-pyparsing
    raise ParseFatalException(
        tokenStr, location, "invalid boolean %s:%s" % (tokenStr, msg)
    )

convertToTime(tokenStr, location, token)

convert a timeLiteral to a time

Source code in sidif/sidif.py
344
345
346
347
348
349
350
351
352
353
354
355
def convertToTime(self, tokenStr, location, token):
    """
    convert a timeLiteral to a time
    """
    try:
        timestr = token[0]
        fmt = "%H:%M:%S" if len(timestr) == 8 else "%H:%M"
        dt = datetime.datetime.strptime(timestr, fmt)
        timeResult = dt.time()
        return timeResult
    except ValueError as ve:
        raise ParseFatalException(tokenStr, location, str(ve))

convertToTriple(tokenStr, location, group)

convert the given token to a triple

Parameters:

Name Type Description Default
tokenStr(str)

the token string

required
location(object)

location of the parse process

required
group(ParseResults)

the expected triple defining group

required
Source code in sidif/sidif.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
def convertToTriple(self, tokenStr, location, group):
    """
    convert the given token to a triple

    Args:
        tokenStr(str): the token string
        location(object): location of the parse process
        group(ParseResults): the expected triple defining group
    """
    tripleKind = group.getName()
    tokens = group[0]
    tokenCount = len(tokens)
    if tokenCount != 3:
        raise ParseException(
            tokenStr,
            location,
            "invalid triple %s: %d tokens found 3 expected"
            % (tripleKind, tokenCount),
        )
    e1 = tokens[0]
    e2 = tokens[1]
    e3 = tokens[2]
    if tripleKind == "isValue":
        #'"Paris" is capital of France'
        triple = Triple(e1, e2, e3, location)
    elif tripleKind == "idLink":
        #'Paris capital France'
        triple = Triple(e1, e2, e3, location)
    elif tripleKind == "isLink":
        #'Paris is capital of France'
        triple = Triple(e1, e2, e3, location)
    elif tripleKind == "hasLink":
        #'France has capital Paris'
        triple = Triple(e3, e2, e1, location)
    else:
        raise ParseFatalException(
            tokenStr, location, "invalid tripleKind %s" % tripleKind
        )
    return triple

errorMessage(title, pe, depth=None) classmethod

Parameters:

Name Type Description Default
title(str)

the title

required
pe(ParseException)

the exception to get the error message for

required
depth(int)

the explain depth to show for the errorMessage

required

Returns: str: an error message with the explanation

Source code in sidif/sidif.py
644
645
646
647
648
649
650
651
652
653
654
655
656
@classmethod
def errorMessage(cls, title: str, pe: ParseException, depth: int = None) -> str:
    """
    Args:
        title(str): the title
        pe(ParseException): the exception to get the error message for
        depth(int): the explain depth to show for the errorMessage
    Returns:
        str: an error message with the explanation
    """
    msg = "%s: error in line %d col %d: \n%s" % (title, pe.lineno, pe.col, pe.line)
    msg += "\n" + pe.explain(depth=depth)
    return msg

getGrammar()

get the grammar

Source code in sidif/sidif.py
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
def getGrammar(self):
    """
    get the grammar
    """
    if self.grammar is None:
        value = self.getValueGrammar()
        identifier = self.getIdentifier()

        idlink = Group(identifier + identifier + identifier).setParseAction(
            self.convertToTriple
        )("idLink")
        islink = Group(
            identifier + Suppress("is") + identifier + Suppress("of") + identifier
        ).setParseAction(self.convertToTriple)("isLink")
        haslink = Group(
            identifier + Suppress("has") + identifier + identifier
        ).setParseAction(self.convertToTriple)("hasLink")
        link = Group(islink | haslink | idlink).setParseAction(self.handleGroup)(
            "link"
        )
        comment = Group(
            Suppress("#")
            + ZeroOrMore(Word(pp.pyparsing_unicode.Latin1.printables))
            + OneOrMore(LineEnd())
            | OneOrMore(LineEnd())
        ).setParseAction(self.handleComment)("comment*")
        line = Group(value | link).setParseAction(self.handleGroup)("line")
        links = Group(OneOrMore(line + LineEnd() | comment)).setParseAction(
            self.handleLines
        )("links*")
        self.grammar = links
    return self.grammar

getIdentifier()

identifier definition

Source code in sidif/sidif.py
555
556
557
558
559
560
561
562
def getIdentifier(self):
    """
    identifier definition
    """
    identifier = Group(pyparsing_common.identifier).setParseAction(
        self.handleIdentifier
    )("identifier")
    return identifier

getLiteral()

get the literal sub Grammar

Source code in sidif/sidif.py
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
def getLiteral(self):
    """
    get the literal sub Grammar
    """
    uri = Regex(SiDIFParser.getUriRegexp())("uri")
    booleanLiteral = oneOf(["true", "false"]).setParseAction(self.convertToBoolean)(
        "boolean"
    )
    hexLiteral = (
        Suppress("0x") + (Word(hexnums).setParseAction(tokenMap(int, 16)))
    )("hexLiteral")
    integerLiteral = pyparsing_common.signed_integer("integerLiteral")
    floatingPointLiteral = Group(
        pyparsing_common.sci_real | pyparsing_common.real
    ).setParseAction(self.handleGroup)("floatingPointLiteral")
    timeLiteral = Regex(r"[0-9]{2}:[0-9]{2}(:[0-9]{2})?").setParseAction(
        self.convertToTime
    )("timeLiteral")
    dateLiteral = pyparsing_common.iso8601_date.copy().setParseAction(
        pyparsing_common.convertToDate()
    )("dateLiteral")
    dateTimeLiteral = Group(dateLiteral + Optional(timeLiteral)).setParseAction(
        self.handleDateTimeLiteral
    )("dateTimeLiteral")
    stringLiteral = Group(
        Suppress('"') + ZeroOrMore(CharsNotIn('"') | LineEnd()) + Suppress('"')
    ).setParseAction(self.handleStringLiteral)("stringLiteral")
    literal = Group(
        uri
        | stringLiteral
        | booleanLiteral
        | hexLiteral
        | dateTimeLiteral
        | timeLiteral
        | floatingPointLiteral
        | integerLiteral
    ).setParseAction(self.handleGroup)("literal")
    return literal

getUriRegexp() staticmethod

get a regular expression for an URI

Source code in sidif/sidif.py
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
@staticmethod
def getUriRegexp():
    """
    get a regular expression for an URI
    """
    # https://mathiasbynens.be/demo/url-regex
    # https://gist.github.com/dperini/729294
    uriRegexp = (
        # protocol identifier
        r"(?:(?:(?:https?|ftp|file):)//|(mailto|news|nntp|telnet):)"
        # user:pass authentication
        r"(?:\S+(?::\S*)?@)?"
        r"(?:"
        # IP address exclusion
        # private & local networks
        r"(?!(?:10|127)(?:\.\d{1,3}){3})"
        r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})"
        r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})"
        # IP address dotted notation octets
        # excludes loopback network 0.0.0.0
        # excludes reserved space >= 224.0.0.0
        # excludes network & broadcast addresses
        # (first & last IP address of each class)
        r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])"
        r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}"
        r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))"
        r"|"
        # host & domain names, may end with dot
        # can be replaced by a shortest alternative
        # r"(?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+"
        # r"(?:(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)"
        # # domain name
        # r"(?:\.(?:[a-z\u00a1-\uffff0-9]-?)*[a-z\u00a1-\uffff0-9]+)*"
        r"(?:"
        r"(?:"
        r"[a-z0-9\u00a1-\uffff]"
        r"[a-z0-9\u00a1-\uffff_-]{0,62}"
        r")?"
        r"[a-z0-9\u00a1-\uffff]\."
        r")+"
        # TLD identifier name, may end with dot
        r"(?:[a-z\u00a1-\uffff]{2,}\.?)"
        r")"
        # port number (optional)
        r"(?::\d{2,5})?"
        # resource path (optional)
        r"(?:[/?#]\S*)?"
    )
    compiled = re.compile(uriRegexp, re.IGNORECASE | re.UNICODE)
    return compiled

getValueGrammar()

sub grammar for value definition

Source code in sidif/sidif.py
564
565
566
567
568
569
570
571
572
573
def getValueGrammar(self):
    """
    sub grammar for value definition
    """
    literal = self.getLiteral()
    identifier = self.getIdentifier()
    value = Group(
        literal + Suppress("is") + identifier + Suppress("of") + identifier
    ).setParseAction(self.convertToTriple)("isValue")
    return value

handleComment(location, tokens)

handle the comment given comment tokens

Source code in sidif/sidif.py
415
416
417
418
419
420
421
422
423
424
425
def handleComment(self, location, tokens):
    """
    handle the comment given comment tokens
    """
    # tokenName=tokens.getName()
    # count=len(tokens)
    commentText = ""
    for token in tokens:
        commentText += "".join(token)
    comment = Comment(commentText, location)
    return comment

handleDateTimeLiteral(tokenStr, location, group)

handle a date time literal

Source code in sidif/sidif.py
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def handleDateTimeLiteral(self, tokenStr: str, location, group):
    """
    handle a date time literal
    """
    token = group[0]
    if len(token) == 1:
        date = token[0]
        return date
    elif len(token) == 2:
        date = token[0]
        time = token[1]
        dt = datetime.datetime(
            date.year, date.month, date.day, time.hour, time.minute, time.second
        )
        return dt
    else:
        raise ParseFatalException(
            tokenStr, location, "invalid DateTimeLiteral %s" % tokenStr
        )

handleGroup(_tokenStr, _location, tokens)

handle a Group

Source code in sidif/sidif.py
427
428
429
430
431
432
433
434
435
def handleGroup(self, _tokenStr, _location, tokens):
    """
    handle a Group
    """
    _tokenName = tokens.getName()
    token = tokens[0]
    _innerName = token.getName()
    inner = token[0]
    return inner

handleIdentifier(_tokenStr, _location, tokens)

handle identifiers

Source code in sidif/sidif.py
408
409
410
411
412
413
def handleIdentifier(self, _tokenStr, _location, tokens):
    """
    handle identifiers
    """
    identifier = tokens["identifier"][0]
    return identifier

handleLines(_tokenStr, _location, tokens)

handle the line derived

Source code in sidif/sidif.py
468
469
470
471
472
473
474
def handleLines(self, _tokenStr, _location, tokens):
    """
    handle the line derived
    """
    di = DataInterchange()
    self.addContent(di, tokens, tokens.getName())
    return di

handleStringLiteral(_tokenStr, _location, tokens)

handle string literals

Parameters:

Name Type Description Default
tokens(ParseResults)

the tokens for the literal

required
Source code in sidif/sidif.py
394
395
396
397
398
399
400
401
402
403
404
405
406
def handleStringLiteral(self, _tokenStr, _location, tokens):
    """
    handle string literals

    Args:
        tokens(ParseResults): the tokens for the literal
    """
    token = tokens[0]
    if len(token) > 0:
        text = token[0]
    else:
        text = ""
    return text

parseText(sidif, title=None, depth=None)

parse the given sidif text

Parameters:

Name Type Description Default
sidif(str)

the SiDIF text to be parsed

required
depth(int)

the explain depth to show for the errorMessage

required
Return

tuple: ParseResult from pyParsing and error - one of these should be None

Source code in sidif/sidif.py
658
659
660
661
662
663
664
665
666
667
668
669
def parseText(self, sidif, title=None, depth: int = None):
    """
    parse the given sidif text

    Args:
        sidif(str): the SiDIF text to be parsed
        depth(int): the explain depth to show for the errorMessage

    Return:
        tuple: ParseResult from pyParsing and error - one of these should be None
    """
    return self.parseWithGrammar(self.getGrammar(), sidif, title, depth=depth)

parseUrl(url, title=None)

parse the sidif text from the given url

Parameters:

Name Type Description Default
url(str)

the url to read the SiDIF text from

required
Source code in sidif/sidif.py
608
609
610
611
612
613
614
615
616
617
618
def parseUrl(self, url, title=None):
    """
    parse the sidif text from the given url

    Args:
        url(str): the url to read the SiDIF text from
    """
    sidif = urlopen(url).read().decode()
    if title is None:
        title = url
    return self.parseText(sidif, title=title)

parseWithGrammar(grammar, text, title=None, depth=None)

parse the given text with the given grammar optionally labeling the parse with the given title

Parameters:

Name Type Description Default
grammar(object)

a pyparsing grammar

required
text(str)

the text to be parsed

required
title(str)

optional title

required
depth(int)

the explain depth to show for the errorMessage

required
Source code in sidif/sidif.py
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
def parseWithGrammar(self, grammar, text, title=None, depth: int = None):
    """
    parse the given text with the given grammar optionally
    labeling the parse with the given title

    Args:
        grammar(object): a pyparsing grammar
        text(str): the text to be parsed
        title(str): optional title
        depth(int): the explain depth to show for the errorMessage
    """
    result = None
    error = None
    if title is None:
        title = "?"
    try:
        result = grammar.parseString(text, parseAll=True)
    except ParseException as pe:
        if self.showError:
            errMsg = SiDIFParser.errorMessage(title, pe, depth=depth)
            print(errMsg, file=sys.stderr)
        error = pe
    return result, error

printResult(pr, indent='')

print the given parseResult recursively

Parameters:

Name Type Description Default
pr(object)

the ParseResult to print

required
indent(str)

initial indentation

required
Source code in sidif/sidif.py
680
681
682
683
684
685
686
687
688
689
690
691
692
693
def printResult(self, pr, indent=""):
    """
    print the given parseResult recursively

    Args:
        pr(object): the ParseResult to print
        indent(str): initial indentation
    """
    if isinstance(pr, ParseResults):
        print("%s%s:" % (indent, pr.getName()))
        for subpr in pr:
            self.printResult(subpr, indent + "  ")
    else:
        print("%s %s=%s" % (indent, type(pr).__name__, pr))

warn(msg)

show a warning with the given message

Parameters:

Name Type Description Default
msg(str)

the warning message

required
Source code in sidif/sidif.py
671
672
673
674
675
676
677
678
def warn(self, msg):
    """
    show a warning with the given message

    Args:
        msg(str): the warning message
    """
    print(msg, file=sys.stderr)

Triple

a pseudo - triple (subject,predicate,object) with it's location

due to the "it" syntax the subject may contain the object and the real subject is the latest non it-reference

Source code in sidif/sidif.py
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
class Triple:
    """
    a pseudo - triple (subject,predicate,object)
    with it's location

    due to the "it" syntax the subject may contain the object and the real
    subject is the latest non it-reference
    """

    def __init__(self, pSubject, pPredicate, pObject, location=0):
        """
        constructor

        Args:
            pSubject(object): subject
            pPredicate(object): predicate
            pObject(object): object
            location(int): the location in the source text

        """
        self.s = pSubject
        self.p = pPredicate
        self.o = pObject
        self.location = location

    def dump(self, value):
        d = "%s(%s)" % (value, type(value).__name__)
        return d

    def asLiteral(self, value):
        if isinstance(value, str):
            return '"%s"' % value
        elif isinstance(value, bool):
            return "true" if value else "false"
        else:
            return "%s" % value

    def asSiDIF(self):
        if self.o == "it":
            literal = self.asLiteral(self.s)
            line = "%s is %s of it" % (literal, self.p)
        else:
            line = "%s %s %s" % (self.s, self.p, self.o)
        return line

    def __str__(self):
        text = "{%s,%s,%s}" % (self.dump(self.s), self.dump(self.p), self.dump(self.o))
        return text

__init__(pSubject, pPredicate, pObject, location=0)

constructor

Parameters:

Name Type Description Default
pSubject(object)

subject

required
pPredicate(object)

predicate

required
pObject(object)

object

required
location(int)

the location in the source text

required
Source code in sidif/sidif.py
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
def __init__(self, pSubject, pPredicate, pObject, location=0):
    """
    constructor

    Args:
        pSubject(object): subject
        pPredicate(object): predicate
        pObject(object): object
        location(int): the location in the source text

    """
    self.s = pSubject
    self.p = pPredicate
    self.o = pObject
    self.location = location