Skip to content

pysotsog API Documentation

citeproc

Created on 2022-12-21

@author: wf

Citeproc

see https://en.wikipedia.org/wiki/CiteProc

Source code in skg/citeproc.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
class Citeproc:
    """
    see https://en.wikipedia.org/wiki/CiteProc
    """

    @classmethod
    def asScite(cls, meta_data: dict, retrieved_from: str) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
        Args:
            meta_data(dict): the citeproc compatible metadata dict to convert
            retrieved_from(str): the url the metadata was retrieved from

        Returns:
            str: Semantic Mediawiki markup
        """

        def unlist(value):
            if type(value) != list:
                return value
            text = ""
            delim = ""
            for item in value:
                text += f"{delim}{item}"
                delim = ";"
            if len(value) > 1:
                text += "|+sep=;"
            return text

        def firstValue(value):
            if type(value) != list:
                return value
            else:
                return value[0]

        def get_author(value) -> str:
            """
            get the author markup

            Args:
                value(list): the list to disassemble

            Returns:
                str: Mediawiki markup
            """
            author = ""
            delim = ""
            for arec in value:
                if "given" in arec and "family" in arec:
                    author += f"""{delim}{arec["given"]} {arec["family"]}"""
                    delim = ";"
                elif "family" in arec:
                    author += f"""{delim}{arec["family"]}"""
                    delim = ";"
                else:
                    # incomplete author record ignored
                    pass
            return author

        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        ref_type = "journal-article"
        title = meta_data["title"]
        if type(title) is list:
            title = title[0]
        title_2 = title.lower()[:2]
        author_lower = ""
        if "author" in meta_data:
            first_author = firstValue(meta_data["author"])
            if "family" in first_author:
                family = firstValue(first_author["family"])
                author_lower = family.lower()
            else:
                # debug break point
                pass
        year = ""
        if "published-print" in meta_data:
            year = meta_data["published-print"]["date-parts"][0][0]
        if not year and "issued" in meta_data:
            year = meta_data["issued"]["date-parts"][0][0]
        reference = f"{author_lower}{year}{title_2}"
        markup = ""
        for skey, mkey, func in [
            ("title", "title", unlist),
            ("subtitle", "subtitle", unlist),
            ("authors", "author", get_author),
            ("journal", "container-title", unlist),
            ("publisher", "publisher", str),
            ("issn", "ISSN", unlist),
            ("subject", "subject", unlist),
            ("volume", "volume", str),
            ("pages", "page", str),
            ("doi", "DOI", str),
        ]:
            if mkey in meta_data:
                value = meta_data[mkey]
                if value:
                    value = func(value)
                    markup += f"\n|{skey}={value}"
        markup = f"""{{{{#scite:
|reference={reference}
|type={ref_type}{markup}
|year={year}
|retrieved-from={retrieved_from}
|retrieved-on={timestamp}
}}}}"""
        full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}"
        return full_markup

asScite(meta_data, retrieved_from) classmethod

convert the given meta data to #Scite format

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php Args: meta_data(dict): the citeproc compatible metadata dict to convert retrieved_from(str): the url the metadata was retrieved from

Returns:

Name Type Description
str str

Semantic Mediawiki markup

Source code in skg/citeproc.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    @classmethod
    def asScite(cls, meta_data: dict, retrieved_from: str) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
        Args:
            meta_data(dict): the citeproc compatible metadata dict to convert
            retrieved_from(str): the url the metadata was retrieved from

        Returns:
            str: Semantic Mediawiki markup
        """

        def unlist(value):
            if type(value) != list:
                return value
            text = ""
            delim = ""
            for item in value:
                text += f"{delim}{item}"
                delim = ";"
            if len(value) > 1:
                text += "|+sep=;"
            return text

        def firstValue(value):
            if type(value) != list:
                return value
            else:
                return value[0]

        def get_author(value) -> str:
            """
            get the author markup

            Args:
                value(list): the list to disassemble

            Returns:
                str: Mediawiki markup
            """
            author = ""
            delim = ""
            for arec in value:
                if "given" in arec and "family" in arec:
                    author += f"""{delim}{arec["given"]} {arec["family"]}"""
                    delim = ";"
                elif "family" in arec:
                    author += f"""{delim}{arec["family"]}"""
                    delim = ";"
                else:
                    # incomplete author record ignored
                    pass
            return author

        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        ref_type = "journal-article"
        title = meta_data["title"]
        if type(title) is list:
            title = title[0]
        title_2 = title.lower()[:2]
        author_lower = ""
        if "author" in meta_data:
            first_author = firstValue(meta_data["author"])
            if "family" in first_author:
                family = firstValue(first_author["family"])
                author_lower = family.lower()
            else:
                # debug break point
                pass
        year = ""
        if "published-print" in meta_data:
            year = meta_data["published-print"]["date-parts"][0][0]
        if not year and "issued" in meta_data:
            year = meta_data["issued"]["date-parts"][0][0]
        reference = f"{author_lower}{year}{title_2}"
        markup = ""
        for skey, mkey, func in [
            ("title", "title", unlist),
            ("subtitle", "subtitle", unlist),
            ("authors", "author", get_author),
            ("journal", "container-title", unlist),
            ("publisher", "publisher", str),
            ("issn", "ISSN", unlist),
            ("subject", "subject", unlist),
            ("volume", "volume", str),
            ("pages", "page", str),
            ("doi", "DOI", str),
        ]:
            if mkey in meta_data:
                value = meta_data[mkey]
                if value:
                    value = func(value)
                    markup += f"\n|{skey}={value}"
        markup = f"""{{{{#scite:
|reference={reference}
|type={ref_type}{markup}
|year={year}
|retrieved-from={retrieved_from}
|retrieved-on={timestamp}
}}}}"""
        full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}"
        return full_markup

crossref

Created on 17.11.2022

@author: wf

Crossref

Crossref access

Source code in skg/crossref.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class Crossref:
    """
    Crossref access
    """

    def __init__(self, mailto=None, ua_string=None):
        """
        constructor
        """
        if mailto is None:
            mailto = "wf@bitplan.com"
        if ua_string is None:
            ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
        # self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
        self.cr = habanero.Crossref(ua_string="")

    def doiMetaData(self, dois: list):
        """
        get the meta data for the given dois

        Args:
            doi(list): a list of dois
        """
        metadata = None
        response = self.cr.works(ids=dois)
        if (
            "status" in response
            and "message" in response
            and response["status"] == "ok"
        ):
            metadata = response["message"]
        return metadata

    def doiBibEntry(self, dois: list):
        """
        get bib entries for the given dois
        """
        bibentry = cn.content_negotiation(ids=dois, format="bibentry")
        return bibentry

    def asScite(self, meta_data: dict) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

        Returns:
            str: Semantic Mediawiki markup
        """
        markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url)
        return markup

__init__(mailto=None, ua_string=None)

constructor

Source code in skg/crossref.py
19
20
21
22
23
24
25
26
27
28
def __init__(self, mailto=None, ua_string=None):
    """
    constructor
    """
    if mailto is None:
        mailto = "wf@bitplan.com"
    if ua_string is None:
        ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
    # self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
    self.cr = habanero.Crossref(ua_string="")

asScite(meta_data)

convert the given meta data to #Scite format

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

Returns:

Name Type Description
str str

Semantic Mediawiki markup

Source code in skg/crossref.py
54
55
56
57
58
59
60
61
62
63
64
def asScite(self, meta_data: dict) -> str:
    """
    convert the given meta data to #Scite format

    see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

    Returns:
        str: Semantic Mediawiki markup
    """
    markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url)
    return markup

doiBibEntry(dois)

get bib entries for the given dois

Source code in skg/crossref.py
47
48
49
50
51
52
def doiBibEntry(self, dois: list):
    """
    get bib entries for the given dois
    """
    bibentry = cn.content_negotiation(ids=dois, format="bibentry")
    return bibentry

doiMetaData(dois)

get the meta data for the given dois

Parameters:

Name Type Description Default
doi(list)

a list of dois

required
Source code in skg/crossref.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
def doiMetaData(self, dois: list):
    """
    get the meta data for the given dois

    Args:
        doi(list): a list of dois
    """
    metadata = None
    response = self.cr.works(ids=dois)
    if (
        "status" in response
        and "message" in response
        and response["status"] == "ok"
    ):
        metadata = response["message"]
    return metadata

dblp

Created on 2022-11-17

@author: wf

Dblp

Schloss Dagstuhl Dblp computer science bibliography

Source code in skg/dblp.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
class Dblp:
    """
    Schloss Dagstuhl Dblp computer science bibliography
    """

    def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"):
        """
        constructor

        Args:
            endpoint(str): the endpoint to use
        """
        self.endpoint = endpoint
        self.schema = Owl(
            "dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19"
        )
        self.sparql = SPARQL(self.endpoint)

    def get_paper_records(
        self,
        regex: str,
        prop_name: str = "title",
        limit: int = 100,
        debug: bool = False,
    ) -> list:
        """
        get papers fitting the given regex

        Args:
            prop_name(str): the property to filter
            regex(str): the regex to filter for
            limit(int): the maximum number of records to return
            debug(bool): if True show debug information

        Returns:
            list: a list of dict of paper records
        """
        sparql_query = """PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT
  ?paper 
  ?year
  ?yearofevent
  #?month
  ?doi
  ?isbn
  ?title
  (GROUP_CONCAT(?author_o) as ?authors)
  ?publishedin
WHERE {
  ?paper dblp:title ?title .
  ?paper dblp:doi ?doi .
  OPTIONAL { ?paper dblp:yearOfEvent ?yearofevent } .
  OPTIONAL { ?paper dblp:isbn ?isbn }.
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin .
  ?paper dblp:yearOfPublication ?year.
  OPTIONAL { ?paper dblp:monthOfPublication ?month}.
"""
        sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n"""
        sparql_query += f"""
}}
GROUP BY 
  ?paper 
  ?title 
  ?doi 
  ?isbn
  ?year 
  ?yearofevent
  ?month 
  ?publishedin 
ORDER BY DESC(?year)
LIMIT {limit}"""
        if debug:
            print(sparql_query)
        records = self.sparql.queryAsListOfDicts(sparql_query)
        return records

    def get_random_papers(self, year: int = 2020, limit: int = 10):
        sparql_query = f"""PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?paper 
  (SAMPLE(?doi_o) as ?doi)
  (SAMPLE(?title_o) as ?title)
  (MIN(?year_o) as ?year)
  (GROUP_CONCAT(?author_o) as ?authors)
  (SAMPLE(?publishedin_o) as ?publishedin)
  (SAMPLE(?sortKey) as ?sortKey)
WHERE {{
  VALUES ?year_o {{ "{year}" }}
  ?paper dblp:title ?title_o .
  ?paper dblp:doi ?doi_o .
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin_o .
  ?paper dblp:yearOfPublication ?year_o.
  BIND(RAND() AS ?sortKey)
}}
GROUP BY ?paper
ORDER BY ?sortKey 
LIMIT {limit}
        """

__init__(endpoint='https://qlever.cs.uni-freiburg.de/api/dblp')

constructor

Parameters:

Name Type Description Default
endpoint(str)

the endpoint to use

required
Source code in skg/dblp.py
17
18
19
20
21
22
23
24
25
26
27
28
def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"):
    """
    constructor

    Args:
        endpoint(str): the endpoint to use
    """
    self.endpoint = endpoint
    self.schema = Owl(
        "dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19"
    )
    self.sparql = SPARQL(self.endpoint)

get_paper_records(regex, prop_name='title', limit=100, debug=False)

get papers fitting the given regex

Parameters:

Name Type Description Default
prop_name(str)

the property to filter

required
regex(str)

the regex to filter for

required
limit(int)

the maximum number of records to return

required
debug(bool)

if True show debug information

required

Returns:

Name Type Description
list list

a list of dict of paper records

Source code in skg/dblp.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
    def get_paper_records(
        self,
        regex: str,
        prop_name: str = "title",
        limit: int = 100,
        debug: bool = False,
    ) -> list:
        """
        get papers fitting the given regex

        Args:
            prop_name(str): the property to filter
            regex(str): the regex to filter for
            limit(int): the maximum number of records to return
            debug(bool): if True show debug information

        Returns:
            list: a list of dict of paper records
        """
        sparql_query = """PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT
  ?paper 
  ?year
  ?yearofevent
  #?month
  ?doi
  ?isbn
  ?title
  (GROUP_CONCAT(?author_o) as ?authors)
  ?publishedin
WHERE {
  ?paper dblp:title ?title .
  ?paper dblp:doi ?doi .
  OPTIONAL { ?paper dblp:yearOfEvent ?yearofevent } .
  OPTIONAL { ?paper dblp:isbn ?isbn }.
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin .
  ?paper dblp:yearOfPublication ?year.
  OPTIONAL { ?paper dblp:monthOfPublication ?month}.
"""
        sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n"""
        sparql_query += f"""
}}
GROUP BY 
  ?paper 
  ?title 
  ?doi 
  ?isbn
  ?year 
  ?yearofevent
  ?month 
  ?publishedin 
ORDER BY DESC(?year)
LIMIT {limit}"""
        if debug:
            print(sparql_query)
        records = self.sparql.queryAsListOfDicts(sparql_query)
        return records

dblp2wikidata

Created on 2024-02-26

@author: wf

Dblp2Wikidata

utility for transfering Dblp person entries to Wikidata

Source code in skg/dblp2wikidata.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
class Dblp2Wikidata:
    """
    utility for transfering Dblp person entries to Wikidata
    """

    def __init__(self, debug: bool = False):
        self.debug = debug
        pass

    def transfer(self, args: Namespace):
        """
        Main method to handle the transfer of DBLP entries to Wikidata.

        Args:
            args(Namespace): Command line arguments.
        """
        search_term = getattr(args, "dblp2wikidata", None)
        if self.debug:
            print(f"trying to synchronize DBLP person entry for {search_term}")

transfer(args)

Main method to handle the transfer of DBLP entries to Wikidata.

Parameters:

Name Type Description Default
args(Namespace)

Command line arguments.

required
Source code in skg/dblp2wikidata.py
19
20
21
22
23
24
25
26
27
28
def transfer(self, args: Namespace):
    """
    Main method to handle the transfer of DBLP entries to Wikidata.

    Args:
        args(Namespace): Command line arguments.
    """
    search_term = getattr(args, "dblp2wikidata", None)
    if self.debug:
        print(f"trying to synchronize DBLP person entry for {search_term}")

doi

Created on 2022-11-22

@author: wf

DOI

Digital Object Identifier handling

see e.g. https://www.wikidata.org/wiki/Property:P356 see https://www.doi.org/doi_handbook/2_Numbering.html#2.2 see https://github.com/davidagraf/doi2bib2/blob/master/server/doi2bib.js see https://citation.crosscite.org/docs.html

Source code in skg/doi.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
class DOI:
    """
    Digital Object Identifier handling

    see e.g. https://www.wikidata.org/wiki/Property:P356
    see https://www.doi.org/doi_handbook/2_Numbering.html#2.2
    see https://github.com/davidagraf/doi2bib2/blob/master/server/doi2bib.js
    see https://citation.crosscite.org/docs.html

    """

    pattern = re.compile(
        r"((?P<directory_indicator>10)\.(?P<registrant_code>[0-9]{4,})(?:\.[0-9]+)*(?:\/|%2F)(?:(?![\"&\'])\S)+)"
    )

    def __init__(self, doi: str):
        """
        a DOI
        """
        self.doi = doi
        match = re.match(DOI.pattern, doi)
        self.ok = bool(match)
        if self.ok:
            self.registrant_code = match.group("registrant_code")

    @classmethod
    def isDOI(cls, doi: str):
        """
        check that the given string is a doi

        Args:
            doi(str): the potential DOI string
        """
        if not doi:
            return False
        if isinstance(doi, list):
            ok = len(doi) > 0
            for single_doi in doi:
                ok = ok and cls.isDOI(single_doi)
            return ok
        if not isinstance(doi, str):
            return False
        doi_obj = DOI(doi)
        return doi_obj.ok

    def fetch_response(self, url: str, headers: dict):
        """
        fetch reponse for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use
        """
        req = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(req)
        return response

    def fetch_json(self, url: str, headers: dict):
        """
        fetch json for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use

        Returns:
            json: json data
        """
        # async with aiohttp.ClientSession(headers=headers) as session:
        #    async with session.get(url) as response:
        #        return await response.json()
        text = self.fetch_text(url, headers)
        json_data = json.loads(text)
        return json_data

    def fetch_text(self, url, headers) -> str:
        """
        fetch text for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use

        Returns:
            str: the text
        """
        # async with aiohttp.ClientSession(headers=headers) as session:
        #    async with session.get(url) as response:
        #        return await response.text()
        response = self.fetch_response(url, headers)
        encoding = response.headers.get_content_charset("utf-8")
        content = response.read()
        text = content.decode(encoding)
        return text

    def doi2bibTex(self):
        """
        get the bibtex result for my doi
        """
        url = f"https://doi.org/{self.doi}"
        headers = {"Accept": "application/x-bibtex; charset=utf-8"}
        return self.fetch_text(url, headers)

    def doi2Citeproc(self):
        """
        get the Citeproc JSON result for my doi
        see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html
        """
        url = f"https://doi.org/{self.doi}"
        headers = {"Accept": "application/vnd.citationstyles.csl+json; charset=utf-8"}
        return self.fetch_json(url, headers)

    def dataCiteLookup(self):
        """
        get the dataCite json result for my doi
        """
        url = f"https://api.datacite.org/dois/{self.doi}"
        headers = {"Accept": "application/vnd.api+json; charset=utf-8"}
        return self.fetch_json(url, headers)

    def asScite(self) -> str:
        """
        get DOI metadata and convert to Semantic Cite markup

           see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

        Returns:
            str: Semantic Mediawiki markup
        """
        if not hasattr(self, "meta_data"):
            self.meta_data = self.doi2Citeproc()
        markup = Citeproc.asScite(self.meta_data, retrieved_from="https://doi.org/")
        return markup

__init__(doi)

a DOI

Source code in skg/doi.py
31
32
33
34
35
36
37
38
39
def __init__(self, doi: str):
    """
    a DOI
    """
    self.doi = doi
    match = re.match(DOI.pattern, doi)
    self.ok = bool(match)
    if self.ok:
        self.registrant_code = match.group("registrant_code")

asScite()

get DOI metadata and convert to Semantic Cite markup

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

Returns:

Name Type Description
str str

Semantic Mediawiki markup

Source code in skg/doi.py
136
137
138
139
140
141
142
143
144
145
146
147
148
def asScite(self) -> str:
    """
    get DOI metadata and convert to Semantic Cite markup

       see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

    Returns:
        str: Semantic Mediawiki markup
    """
    if not hasattr(self, "meta_data"):
        self.meta_data = self.doi2Citeproc()
    markup = Citeproc.asScite(self.meta_data, retrieved_from="https://doi.org/")
    return markup

dataCiteLookup()

get the dataCite json result for my doi

Source code in skg/doi.py
128
129
130
131
132
133
134
def dataCiteLookup(self):
    """
    get the dataCite json result for my doi
    """
    url = f"https://api.datacite.org/dois/{self.doi}"
    headers = {"Accept": "application/vnd.api+json; charset=utf-8"}
    return self.fetch_json(url, headers)

doi2Citeproc()

get the Citeproc JSON result for my doi see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html

Source code in skg/doi.py
119
120
121
122
123
124
125
126
def doi2Citeproc(self):
    """
    get the Citeproc JSON result for my doi
    see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html
    """
    url = f"https://doi.org/{self.doi}"
    headers = {"Accept": "application/vnd.citationstyles.csl+json; charset=utf-8"}
    return self.fetch_json(url, headers)

doi2bibTex()

get the bibtex result for my doi

Source code in skg/doi.py
111
112
113
114
115
116
117
def doi2bibTex(self):
    """
    get the bibtex result for my doi
    """
    url = f"https://doi.org/{self.doi}"
    headers = {"Accept": "application/x-bibtex; charset=utf-8"}
    return self.fetch_text(url, headers)

fetch_json(url, headers)

fetch json for the given url with the given headers

Parameters:

Name Type Description Default
url(str)

the url to fetch the data for

required
headers(dict)

the headers to use

required

Returns:

Name Type Description
json

json data

Source code in skg/doi.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def fetch_json(self, url: str, headers: dict):
    """
    fetch json for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use

    Returns:
        json: json data
    """
    # async with aiohttp.ClientSession(headers=headers) as session:
    #    async with session.get(url) as response:
    #        return await response.json()
    text = self.fetch_text(url, headers)
    json_data = json.loads(text)
    return json_data

fetch_response(url, headers)

fetch reponse for the given url with the given headers

Parameters:

Name Type Description Default
url(str)

the url to fetch the data for

required
headers(dict)

the headers to use

required
Source code in skg/doi.py
61
62
63
64
65
66
67
68
69
70
71
def fetch_response(self, url: str, headers: dict):
    """
    fetch reponse for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use
    """
    req = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(req)
    return response

fetch_text(url, headers)

fetch text for the given url with the given headers

Parameters:

Name Type Description Default
url(str)

the url to fetch the data for

required
headers(dict)

the headers to use

required

Returns:

Name Type Description
str str

the text

Source code in skg/doi.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def fetch_text(self, url, headers) -> str:
    """
    fetch text for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use

    Returns:
        str: the text
    """
    # async with aiohttp.ClientSession(headers=headers) as session:
    #    async with session.get(url) as response:
    #        return await response.text()
    response = self.fetch_response(url, headers)
    encoding = response.headers.get_content_charset("utf-8")
    content = response.read()
    text = content.decode(encoding)
    return text

isDOI(doi) classmethod

check that the given string is a doi

Parameters:

Name Type Description Default
doi(str)

the potential DOI string

required
Source code in skg/doi.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
@classmethod
def isDOI(cls, doi: str):
    """
    check that the given string is a doi

    Args:
        doi(str): the potential DOI string
    """
    if not doi:
        return False
    if isinstance(doi, list):
        ok = len(doi) > 0
        for single_doi in doi:
            ok = ok and cls.isDOI(single_doi)
        return ok
    if not isinstance(doi, str):
        return False
    doi_obj = DOI(doi)
    return doi_obj.ok

event

Created on 2022-11-16

@author: wf

Event

Bases: Node

an instance of a scientific event

Source code in skg/event.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class Event(skg.graph.Node):
    """
    an instance of a scientific event
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q112055391",
                "title": "The Third Wikidata Workshop",
                "location": "Hangzhou",
                "point_in_time": "2022-10-24",
                "official_website": "https://wikidataworkshop.github.io/2022/",
            }
        ]
        return samples

EventSeries

Bases: Node

an instance of an academic event series

Source code in skg/event.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
class EventSeries(skg.graph.Node):
    """
    an instance of an academic event series
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {"wikiDataId": "Q6053150", "short_name": "ISWC"},
            {
                "wikiDataId": "Q105491257",
                "short_name": "ECDL",
                "title": "European Conference on Research and Advanced Technology for Digital Libraries (English)",
                "official_website": "http://ecdlconference.isti.cnr.it/",
            },
            {
                "wikiDataId": "Q105695678",
                "short_name": "VNC (English)",
                "DBLP_venue_ID": "conf/vnc",
                "VIAF_ID": "267408611",
                "title": "IEEE Vehicular Networking Conference",
            },
            {
                "wikiDataId": "Q17012957",
                "short_name": "ESWC",
                "inception": "2004",
                "gndId": "1091749205",
            },
        ]
        return samples

Proceedings

Bases: Node

Proceedings of an event

Source code in skg/event.py
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
class Proceedings(skg.graph.Node):
    """
    Proceedings of an event
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q115053286",
                "short_name": "Wikidata 2022 (English)",
                "title": "Proceedings of the 3rd Wikidata Workshop 2022 (English)",
                "publication_date": "2022-11-03",
                "full_work_available_at_URL": "http://ceur-ws.org/Vol-3262/",
            }
        ]
        return samples

graph

Created on 2022-11-16

@author: wf

Concept

an Entity

Source code in skg/graph.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class Concept:
    """
    an Entity
    """

    def __init__(self, name: str, cls):
        """
        constructor

        Args:
            name(str): the name of the node
            cls: a class
        """
        self.name = name
        self.props = {}
        self.cls = cls
        if hasattr(cls, "getSamples"):
            for sample in cls.getSamples():
                for key in sample.keys():
                    if not key in self.props:
                        self.props[key] = Property(self, key)

    def map(self, map_name: str, map_list: list):
        """
        map the given list of property mappings under the given map_name

        Args:
            map_name(str): the name of the mapping e.g. "wikidata"
            map_list(list): a list of mapping tuples
        """
        for prop_name, mapped_prop in map_list:
            if prop_name in self.props:
                prop = self.props[prop_name]
                prop.setmap(map_name, mapped_prop)
        return self

    def map_wikidata(self, wd_class: str, scholia_suffix, map_list: list):
        """
        map wikidata entries

        Args:
            wd_class(str): the main wikidata base class
            scholia_suffix(str): the scholia suffix
        """
        self.wd_class = wd_class
        self.scholia_suffix = scholia_suffix
        self.map("wikidata", map_list)
        return self

__init__(name, cls)

constructor

Parameters:

Name Type Description Default
name(str)

the name of the node

required
cls

a class

required
Source code in skg/graph.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, name: str, cls):
    """
    constructor

    Args:
        name(str): the name of the node
        cls: a class
    """
    self.name = name
    self.props = {}
    self.cls = cls
    if hasattr(cls, "getSamples"):
        for sample in cls.getSamples():
            for key in sample.keys():
                if not key in self.props:
                    self.props[key] = Property(self, key)

map(map_name, map_list)

map the given list of property mappings under the given map_name

Parameters:

Name Type Description Default
map_name(str)

the name of the mapping e.g. "wikidata"

required
map_list(list)

a list of mapping tuples

required
Source code in skg/graph.py
35
36
37
38
39
40
41
42
43
44
45
46
47
def map(self, map_name: str, map_list: list):
    """
    map the given list of property mappings under the given map_name

    Args:
        map_name(str): the name of the mapping e.g. "wikidata"
        map_list(list): a list of mapping tuples
    """
    for prop_name, mapped_prop in map_list:
        if prop_name in self.props:
            prop = self.props[prop_name]
            prop.setmap(map_name, mapped_prop)
    return self

map_wikidata(wd_class, scholia_suffix, map_list)

map wikidata entries

Parameters:

Name Type Description Default
wd_class(str)

the main wikidata base class

required
scholia_suffix(str)

the scholia suffix

required
Source code in skg/graph.py
49
50
51
52
53
54
55
56
57
58
59
60
def map_wikidata(self, wd_class: str, scholia_suffix, map_list: list):
    """
    map wikidata entries

    Args:
        wd_class(str): the main wikidata base class
        scholia_suffix(str): the scholia suffix
    """
    self.wd_class = wd_class
    self.scholia_suffix = scholia_suffix
    self.map("wikidata", map_list)
    return self

Node

a Node in the scholary knowledge graph

Source code in skg/graph.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
class Node:
    """
    a Node in the scholary knowledge graph
    """

    debug = False

    def __init__(self):
        """
        constructor
        """

    def __str__(self):
        """
        return a text representation of me
        """
        text = f"{self.concept.name} âžž {self.label}:"
        delim = "\n  "
        for prop in self.concept.props.values():
            if hasattr(self, prop.name):
                text += f"{delim}{prop.name}={getattr(self,prop.name)}"
        return text

    def from_dict(self, concept, record: str):
        """
        get my values from the given record
        """
        self.concept = concept
        self.label = record[concept.name]
        for key in concept.props.keys():
            if key in record:
                setattr(self, key, record[key])

    def browser_url(self):
        """
        get my browser url
        """
        if self.provenance == "wikidata":
            url = self.scholia_url()
        else:
            url = self.label
        return url

    def scholia_url(self):
        """
        get my scholia url
        """
        prefix = f"https://scholia.toolforge.org/{self.concept.scholia_suffix}"
        wd_url = getattr(self, "wikiDataId", None)
        if wd_url is None:
            return None
        else:
            qid = wd_url.replace("http://www.wikidata.org/entity/", "")
            return f"{prefix}/{qid}"

    @classmethod
    def setProvenance(cls, instances: list, provenance: str):
        """
        set the provenance of the given instances
        """
        for instance in instances:
            instance.provenance = provenance

    @classmethod
    def from_sparql(cls, sparql: SPARQL, sparql_query: str, concept: Concept):
        """
        get instance from the given sparql access point with the given sparql_query for
        the given concept

        Args:
            sparql(SPARQL): the sparql access point
            sparql_query(str): the query to execute
            concept(Concept): the concept to create instances for
        """
        if Node.debug:
            print(sparql_query)
        records = sparql.queryAsListOfDicts(sparql_query)
        instances = cls.from_records(records, concept)
        return instances

    @classmethod
    def from_records(cls, records: list, concept: Concept):
        """
        get instances from the given records for the given concept

        Args:
            records(list): a list of dicts to get instances for
            concept(Concept): the concept to create instances for
        """
        instances = []
        for record in records:
            # call my constructor
            instance = cls()
            instance.from_dict(concept, record)
            instances.append(instance)
        return instances

    @classmethod
    def from_wikidata_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from wikidata for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        wikidata = Wikidata()
        if id_name == "wikiDataId":
            value_clause = f"<http://www.wikidata.org/entity/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query = f"""# Query for {concept.name} details via ID {id_name} value {id_value}
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?{concept.name} ?qId"""
        for prop in concept.props.values():
            sparql_query += f" ?{prop.name}"
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
  # classification!
  ?wikiDataId wdt:P31/wdt:P279* wd:{concept.wd_class}.
  ?wikiDataId rdfs:label ?{concept.name} .
  FILTER(LANG(?{concept.name})="{lang}").
"""
        for prop in concept.props.values():
            if prop.name == "wikiDataId":
                continue
            if not (prop.hasmap("wikidata")):
                raise Exception(
                    f"Property {prop.name} of {concept.name} has no wikidata mapping"
                )
            wd_prop = prop.getmap("wikidata")
            clause = f"?wikiDataId wdt:{wd_prop} ?{prop.name}."
            if prop.name != id_name:
                clause = f"OPTIONAL {{ {clause} }}"
            sparql_query += "\n  " + clause
        sparql_query += "\n}"
        instances = cls.from_sparql(wikidata.sparql, sparql_query, concept)
        cls.setProvenance(instances, "wikidata")
        return instances

    @classmethod
    def from_dblp_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from dblp for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        dblp = Dblp()
        sparql_query = f"""
PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?{concept.name}"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                sparql_query += f" ?{prop.name}"
        if id_name == "doi":
            value_clause = f"<http://dx.doi.org/{id_value}>"
        elif id_name == "orcid":
            value_clause = f"<https://orcid.org/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                dblp_prop = prop.getmap("dblp")
                sparql_query += f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n"""
        sparql_query += "}\n"
        instances = cls.from_sparql(dblp.sparql, sparql_query, concept)
        cls.setProvenance(instances, "dblp")
        return instances

__init__()

constructor

Source code in skg/graph.py
110
111
112
113
def __init__(self):
    """
    constructor
    """

__str__()

return a text representation of me

Source code in skg/graph.py
115
116
117
118
119
120
121
122
123
124
def __str__(self):
    """
    return a text representation of me
    """
    text = f"{self.concept.name} âžž {self.label}:"
    delim = "\n  "
    for prop in self.concept.props.values():
        if hasattr(self, prop.name):
            text += f"{delim}{prop.name}={getattr(self,prop.name)}"
    return text

browser_url()

get my browser url

Source code in skg/graph.py
136
137
138
139
140
141
142
143
144
def browser_url(self):
    """
    get my browser url
    """
    if self.provenance == "wikidata":
        url = self.scholia_url()
    else:
        url = self.label
    return url

from_dblp_via_id(concept, id_name, id_value, lang='en') classmethod

get a node instance from dblp for the given parameters

Parameters:

Name Type Description Default
concept(Concept)

the concept to return

required
id_name(str)

the name of the id to search / lookup with

required
id_value(str)

the value of the id

required
lang(str)

the language code to apply

required
Source code in skg/graph.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
    @classmethod
    def from_dblp_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from dblp for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        dblp = Dblp()
        sparql_query = f"""
PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?{concept.name}"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                sparql_query += f" ?{prop.name}"
        if id_name == "doi":
            value_clause = f"<http://dx.doi.org/{id_value}>"
        elif id_name == "orcid":
            value_clause = f"<https://orcid.org/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                dblp_prop = prop.getmap("dblp")
                sparql_query += f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n"""
        sparql_query += "}\n"
        instances = cls.from_sparql(dblp.sparql, sparql_query, concept)
        cls.setProvenance(instances, "dblp")
        return instances

from_dict(concept, record)

get my values from the given record

Source code in skg/graph.py
126
127
128
129
130
131
132
133
134
def from_dict(self, concept, record: str):
    """
    get my values from the given record
    """
    self.concept = concept
    self.label = record[concept.name]
    for key in concept.props.keys():
        if key in record:
            setattr(self, key, record[key])

from_records(records, concept) classmethod

get instances from the given records for the given concept

Parameters:

Name Type Description Default
records(list)

a list of dicts to get instances for

required
concept(Concept)

the concept to create instances for

required
Source code in skg/graph.py
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
@classmethod
def from_records(cls, records: list, concept: Concept):
    """
    get instances from the given records for the given concept

    Args:
        records(list): a list of dicts to get instances for
        concept(Concept): the concept to create instances for
    """
    instances = []
    for record in records:
        # call my constructor
        instance = cls()
        instance.from_dict(concept, record)
        instances.append(instance)
    return instances

from_sparql(sparql, sparql_query, concept) classmethod

get instance from the given sparql access point with the given sparql_query for the given concept

Parameters:

Name Type Description Default
sparql(SPARQL)

the sparql access point

required
sparql_query(str)

the query to execute

required
concept(Concept)

the concept to create instances for

required
Source code in skg/graph.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
@classmethod
def from_sparql(cls, sparql: SPARQL, sparql_query: str, concept: Concept):
    """
    get instance from the given sparql access point with the given sparql_query for
    the given concept

    Args:
        sparql(SPARQL): the sparql access point
        sparql_query(str): the query to execute
        concept(Concept): the concept to create instances for
    """
    if Node.debug:
        print(sparql_query)
    records = sparql.queryAsListOfDicts(sparql_query)
    instances = cls.from_records(records, concept)
    return instances

from_wikidata_via_id(concept, id_name, id_value, lang='en') classmethod

get a node instance from wikidata for the given parameters

Parameters:

Name Type Description Default
concept(Concept)

the concept to return

required
id_name(str)

the name of the id to search / lookup with

required
id_value(str)

the value of the id

required
lang(str)

the language code to apply

required
Source code in skg/graph.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
    @classmethod
    def from_wikidata_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from wikidata for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        wikidata = Wikidata()
        if id_name == "wikiDataId":
            value_clause = f"<http://www.wikidata.org/entity/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query = f"""# Query for {concept.name} details via ID {id_name} value {id_value}
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?{concept.name} ?qId"""
        for prop in concept.props.values():
            sparql_query += f" ?{prop.name}"
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
  # classification!
  ?wikiDataId wdt:P31/wdt:P279* wd:{concept.wd_class}.
  ?wikiDataId rdfs:label ?{concept.name} .
  FILTER(LANG(?{concept.name})="{lang}").
"""
        for prop in concept.props.values():
            if prop.name == "wikiDataId":
                continue
            if not (prop.hasmap("wikidata")):
                raise Exception(
                    f"Property {prop.name} of {concept.name} has no wikidata mapping"
                )
            wd_prop = prop.getmap("wikidata")
            clause = f"?wikiDataId wdt:{wd_prop} ?{prop.name}."
            if prop.name != id_name:
                clause = f"OPTIONAL {{ {clause} }}"
            sparql_query += "\n  " + clause
        sparql_query += "\n}"
        instances = cls.from_sparql(wikidata.sparql, sparql_query, concept)
        cls.setProvenance(instances, "wikidata")
        return instances

scholia_url()

get my scholia url

Source code in skg/graph.py
146
147
148
149
150
151
152
153
154
155
156
def scholia_url(self):
    """
    get my scholia url
    """
    prefix = f"https://scholia.toolforge.org/{self.concept.scholia_suffix}"
    wd_url = getattr(self, "wikiDataId", None)
    if wd_url is None:
        return None
    else:
        qid = wd_url.replace("http://www.wikidata.org/entity/", "")
        return f"{prefix}/{qid}"

setProvenance(instances, provenance) classmethod

set the provenance of the given instances

Source code in skg/graph.py
158
159
160
161
162
163
164
@classmethod
def setProvenance(cls, instances: list, provenance: str):
    """
    set the provenance of the given instances
    """
    for instance in instances:
        instance.provenance = provenance

Property

a Property

Source code in skg/graph.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class Property:
    """
    a Property
    """

    def __init__(self, concept: Concept, name: str):
        """
        constructor

        Args:
            concept(Concept): the concept this property belongs to
            name(str): the name of the property

        """
        self.concept = concept
        self.name = name
        self.maps = {}

    def setmap(self, map_name, mapped_prop):
        """
        map the given property
        """
        self.maps[map_name] = mapped_prop

    def getmap(self, map_name):
        return self.maps[map_name]

    def hasmap(self, map_name: str) -> bool:
        """
        check whether there is a mapping for the given map_name

        Args:
            map_name(str): the map name to check

        Returns:
            bool: True if there is mapping
        """
        return map_name in self.maps

__init__(concept, name)

constructor

Parameters:

Name Type Description Default
concept(Concept)

the concept this property belongs to

required
name(str)

the name of the property

required
Source code in skg/graph.py
68
69
70
71
72
73
74
75
76
77
78
79
def __init__(self, concept: Concept, name: str):
    """
    constructor

    Args:
        concept(Concept): the concept this property belongs to
        name(str): the name of the property

    """
    self.concept = concept
    self.name = name
    self.maps = {}

hasmap(map_name)

check whether there is a mapping for the given map_name

Parameters:

Name Type Description Default
map_name(str)

the map name to check

required

Returns:

Name Type Description
bool bool

True if there is mapping

Source code in skg/graph.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def hasmap(self, map_name: str) -> bool:
    """
    check whether there is a mapping for the given map_name

    Args:
        map_name(str): the map name to check

    Returns:
        bool: True if there is mapping
    """
    return map_name in self.maps

setmap(map_name, mapped_prop)

map the given property

Source code in skg/graph.py
81
82
83
84
85
def setmap(self, map_name, mapped_prop):
    """
    map the given property
    """
    self.maps[map_name] = mapped_prop

kg

Created on 2022-11-16

@author: wf

SKG_Def

scholary knowledge graph

Source code in skg/kg.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
class SKG_Def:
    """
    scholary knowledge graph
    """

    def __init__(self):
        """
        constructor
        """
        self.concepts = {
            # main concepts
            "Scholar": Concept(name="Scholar", cls=Scholar),
            "Institution": Concept(name="Institution", cls=Institution),
            "Paper": Concept(name="Paper", cls=Paper),
            "Event": Concept(name="Event", cls=Event),
            "EventSeries": Concept(name="EventSeries", cls=EventSeries),
            "Proceedings": Concept(name="Proceedings", cls=Proceedings),
            # neighbour concepts
            "Country": Concept(name="Country", cls=Country),
        }
        self.concepts["Scholar"].map_wikidata(
            "Q5",
            "author",
            [
                ("name", "label"),
                ("dblpId", "P2456"),
                ("gndId", "P227"),
                ("linkedInId", "P6634"),
                ("homepage", "P856"),
                ("googleScholarUser", "P1960"),
                ("orcid", "P496"),
                ("givenName", "P735"),
                ("familyName", "P734"),
                ("gender", "P21"),
                ("image", "P18"),
                ("occupation", "P106"),
                ("Semantic_Scholar_author_ID", "P4012"),
            ],
        ).map(
            "dblp",
            [
                ("name", "primaryCreatorName"),
                ("homepage", "primaryHomepage"),
                ("orcid", "orcid"),
            ],
        ).map(
            "smw",
            [
                ("wikiDataId", "wikiDataId"),
                ("familyName", "name"),
                ("givenName", "firstName"),
                ("googleScholarUser", "googleScholarUser"),
                ("homepage", "homepage"),
                ("dblpId", "dblpId"),
                ("orcid", "orcid"),
                ("linkedInId", "linkedInId"),
            ],
        )
        self.concepts["Institution"].map_wikidata(
            "Q4671277",
            "organization",
            [
                ("short_name", "P1813"),  # 2.0 %
                ("inception", "P571"),  # 65.8 %
                ("image", "P18"),  # 15.2 %
                ("country", "P17"),  # 88.8 %
                ("located_in", "P131"),  # 51.9 %
                ("official_website", "P856"),  # 59.1%
                ("coordinate_location", "P625"),  # 44.0 %
            ],
        )
        self.concepts["Paper"].map_wikidata(
            "Q13442814",
            "work",
            [
                ("title", "label"),
                ("doi", "P356"),
                ("DBLP_publication_ID", "P8978"),
                ("publication_date", "P577"),
            ],
        ).map("dblp", [("title", "title"), ("doi", "doi")])
        # scientific event
        self.concepts["Event"].map_wikidata(
            "Q52260246",
            "event",
            [
                ("title", "P1476"),
                ("country", "P17"),  # 93.9% -> Human Settlement
                ("location", "P276"),  # 94.6%
                ("point_in_time", "P585"),
                ("official_website", "P856"),
            ],
        )
        # academic event series
        self.concepts["EventSeries"].map_wikidata(
            "Q47258130",
            "event-series",
            [
                ("title", "P1476"),  # 96.7 %
                ("short_name", "P1813"),  # 93.1 %
                ("VIAF_ID", "P214"),  # 60.5 %
                ("DBLP_venue_ID", "P8926"),  # 96.4 %
                ("gndId", "P227"),  # 42.3 %
                ("inception", "P571"),  # 22.3 %
                ("official_website", "P856"),  # 13.5 %
            ],
        )
        # proceedings
        self.concepts["Proceedings"].map_wikidata(
            "Q1143604",
            "venue",
            [
                ("title", "P1476"),
                ("short_name", "P1813"),
                ("full_work_available_at_URL", "P953"),
                ("publication_date", "P577"),
            ],
        )
        # country
        self.concepts["Country"].map_wikidata(
            "Q6256",
            "topic",
            [
                ("name", "label"),  # 100% ?
                ("homepage", "P856"),  # 49.4%
                ("population", "P1082"),  # 57.4%
                ("capital", "P36"),  # 59.8%
                ("coordinate_location", "P625"),  # 58.6%
                ("iso_code", "P297"),  # 53.3%
            ],
        )

        self.concepts_by_qid = {}
        for concept in self.concepts.values():
            if concept.wd_class in self.concepts_by_qid:
                raise Exception(f"duplicate wd_class definition: {concept.wd_class}")
            self.concepts_by_qid[concept.wd_class] = concept

    def conceptForQid(self, qid: str) -> Concept:
        """
        get the concept for the given wikidata Q Identifieer

        Args:
            qid(str): get the concept for the given Qid

        Return:
            Concept: or None if none is found
        """
        concept = self.concepts_by_qid.get(qid, None)
        return concept

    def toPlantuml(self, header: str = None, footer: str = None) -> str:
        """
        get a plantuml version of this knowledge graph

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup

        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'{Version.name}:{Version.description}
updated {timestamp}

authors:{Version.authors} 
'/
title  {Version.name}:{Version.description} see {Version.doc_url} updated {timestamp}
hide circle
package skg {{
"""
        if footer is None:
            footer = "}\n"
        markup = f"{header}"
        indent = "  "
        for concept_name, concept in self.concepts.items():
            markup += f"""{indent}class {concept_name} {{\n"""
            for prop_name, prop in concept.props.items():
                markup += f"""{indent}  {prop_name}\n"""
            markup += f"""\n{indent}}}\n"""
        markup += f"{footer}"
        return markup

    def toSiDiF(self) -> str:
        """
        convert me to SiDiF format
        """
        sidif = ""
        for concept_name, concept in self.concepts.items():
            sidif += f"""#
# {concept_name}
#
{concept_name} isA Topic
"{concept_name} is name of it
"""
        return sidif

__init__()

constructor

Source code in skg/kg.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def __init__(self):
    """
    constructor
    """
    self.concepts = {
        # main concepts
        "Scholar": Concept(name="Scholar", cls=Scholar),
        "Institution": Concept(name="Institution", cls=Institution),
        "Paper": Concept(name="Paper", cls=Paper),
        "Event": Concept(name="Event", cls=Event),
        "EventSeries": Concept(name="EventSeries", cls=EventSeries),
        "Proceedings": Concept(name="Proceedings", cls=Proceedings),
        # neighbour concepts
        "Country": Concept(name="Country", cls=Country),
    }
    self.concepts["Scholar"].map_wikidata(
        "Q5",
        "author",
        [
            ("name", "label"),
            ("dblpId", "P2456"),
            ("gndId", "P227"),
            ("linkedInId", "P6634"),
            ("homepage", "P856"),
            ("googleScholarUser", "P1960"),
            ("orcid", "P496"),
            ("givenName", "P735"),
            ("familyName", "P734"),
            ("gender", "P21"),
            ("image", "P18"),
            ("occupation", "P106"),
            ("Semantic_Scholar_author_ID", "P4012"),
        ],
    ).map(
        "dblp",
        [
            ("name", "primaryCreatorName"),
            ("homepage", "primaryHomepage"),
            ("orcid", "orcid"),
        ],
    ).map(
        "smw",
        [
            ("wikiDataId", "wikiDataId"),
            ("familyName", "name"),
            ("givenName", "firstName"),
            ("googleScholarUser", "googleScholarUser"),
            ("homepage", "homepage"),
            ("dblpId", "dblpId"),
            ("orcid", "orcid"),
            ("linkedInId", "linkedInId"),
        ],
    )
    self.concepts["Institution"].map_wikidata(
        "Q4671277",
        "organization",
        [
            ("short_name", "P1813"),  # 2.0 %
            ("inception", "P571"),  # 65.8 %
            ("image", "P18"),  # 15.2 %
            ("country", "P17"),  # 88.8 %
            ("located_in", "P131"),  # 51.9 %
            ("official_website", "P856"),  # 59.1%
            ("coordinate_location", "P625"),  # 44.0 %
        ],
    )
    self.concepts["Paper"].map_wikidata(
        "Q13442814",
        "work",
        [
            ("title", "label"),
            ("doi", "P356"),
            ("DBLP_publication_ID", "P8978"),
            ("publication_date", "P577"),
        ],
    ).map("dblp", [("title", "title"), ("doi", "doi")])
    # scientific event
    self.concepts["Event"].map_wikidata(
        "Q52260246",
        "event",
        [
            ("title", "P1476"),
            ("country", "P17"),  # 93.9% -> Human Settlement
            ("location", "P276"),  # 94.6%
            ("point_in_time", "P585"),
            ("official_website", "P856"),
        ],
    )
    # academic event series
    self.concepts["EventSeries"].map_wikidata(
        "Q47258130",
        "event-series",
        [
            ("title", "P1476"),  # 96.7 %
            ("short_name", "P1813"),  # 93.1 %
            ("VIAF_ID", "P214"),  # 60.5 %
            ("DBLP_venue_ID", "P8926"),  # 96.4 %
            ("gndId", "P227"),  # 42.3 %
            ("inception", "P571"),  # 22.3 %
            ("official_website", "P856"),  # 13.5 %
        ],
    )
    # proceedings
    self.concepts["Proceedings"].map_wikidata(
        "Q1143604",
        "venue",
        [
            ("title", "P1476"),
            ("short_name", "P1813"),
            ("full_work_available_at_URL", "P953"),
            ("publication_date", "P577"),
        ],
    )
    # country
    self.concepts["Country"].map_wikidata(
        "Q6256",
        "topic",
        [
            ("name", "label"),  # 100% ?
            ("homepage", "P856"),  # 49.4%
            ("population", "P1082"),  # 57.4%
            ("capital", "P36"),  # 59.8%
            ("coordinate_location", "P625"),  # 58.6%
            ("iso_code", "P297"),  # 53.3%
        ],
    )

    self.concepts_by_qid = {}
    for concept in self.concepts.values():
        if concept.wd_class in self.concepts_by_qid:
            raise Exception(f"duplicate wd_class definition: {concept.wd_class}")
        self.concepts_by_qid[concept.wd_class] = concept

conceptForQid(qid)

get the concept for the given wikidata Q Identifieer

Parameters:

Name Type Description Default
qid(str)

get the concept for the given Qid

required
Return

Concept: or None if none is found

Source code in skg/kg.py
155
156
157
158
159
160
161
162
163
164
165
166
def conceptForQid(self, qid: str) -> Concept:
    """
    get the concept for the given wikidata Q Identifieer

    Args:
        qid(str): get the concept for the given Qid

    Return:
        Concept: or None if none is found
    """
    concept = self.concepts_by_qid.get(qid, None)
    return concept

toPlantuml(header=None, footer=None)

get a plantuml version of this knowledge graph

Parameters:

Name Type Description Default
header(str)

the header to apply

required
footer(str)

the footer to apply

required

Returns:

Name Type Description
str str

the plantuml markup

Source code in skg/kg.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
    def toPlantuml(self, header: str = None, footer: str = None) -> str:
        """
        get a plantuml version of this knowledge graph

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup

        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'{Version.name}:{Version.description}
updated {timestamp}

authors:{Version.authors} 
'/
title  {Version.name}:{Version.description} see {Version.doc_url} updated {timestamp}
hide circle
package skg {{
"""
        if footer is None:
            footer = "}\n"
        markup = f"{header}"
        indent = "  "
        for concept_name, concept in self.concepts.items():
            markup += f"""{indent}class {concept_name} {{\n"""
            for prop_name, prop in concept.props.items():
                markup += f"""{indent}  {prop_name}\n"""
            markup += f"""\n{indent}}}\n"""
        markup += f"{footer}"
        return markup

toSiDiF()

convert me to SiDiF format

Source code in skg/kg.py
203
204
205
206
207
208
209
210
211
212
213
214
215
    def toSiDiF(self) -> str:
        """
        convert me to SiDiF format
        """
        sidif = ""
        for concept_name, concept in self.concepts.items():
            sidif += f"""#
# {concept_name}
#
{concept_name} isA Topic
"{concept_name} is name of it
"""
        return sidif

location

Created on 2022-11-21

@author: wf

Country

Bases: Node

an instance of a country

Source code in skg/location.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
class Country(skg.graph.Node):
    """
    an instance of a country
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q334",
                "name": "Singapore",
                "iso_code": "SG",
                "homepage": "https://www.gov.sg/",
                "population": 5866139,
                "coordinate_location": "1°18'N, 103°48'E",
            }
        ]
        return samples

orcid

Created on 2022-11-19

@author: wf

ORCID

ORCID handling

see e.g. https://info.orcid.org/brand-guidelines/#h-orcid-logos-and-icons https://pub.orcid.org/v3.0/

Source code in skg/orcid.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
class ORCID:
    """
    ORCID handling

    see e.g.
        https://info.orcid.org/brand-guidelines/#h-orcid-logos-and-icons
        https://pub.orcid.org/v3.0/
    """

    pattern = re.compile(r"^(\d{4}-){3}\d{3}(\d|X)$")

    def __init__(self, orcid: str):
        """
        constructor

        Args:
            orcid(str): the orcid
        """
        self.orcid = orcid
        # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
        self.orcid_num = orcid.replace("-", "")
        match = re.match(ORCID.pattern, orcid)
        self.ok = bool(match) and validate(self.orcid_num)

    @classmethod
    def isORCID(cls, orcid: str) -> bool:
        """
        check that the given string is an ORCID

        Args:
            orcid(str): the potential ORCID string

        Returns:
            bool: True if the string represents a valid ORCID otherwise false
        """
        if not orcid:
            return False
        orcid_obj = ORCID(orcid)
        return orcid_obj.ok

    def getMetadata(self, op: str = None) -> dict:
        """
        get the ORCID metadata data

        Args:
            op(str): the https://pub.orcid.org/v3.0/ API
            operation to apply - default is "Fetch record details"

        Returns:
            dict: the dictionary derived from the JSON response

        """
        op = "" if op is None else f"/{op}"
        url = f"https://pub.orcid.org/v3.0/{self.orcid}{op}"
        r = requests.get(
            url, headers={"User-Agent": "Mozilla/5.0", "accept": "application/json"}
        )
        json_data = r.json()
        return json_data

    def asHtml(self, mode: str = "full", inline: str = "") -> str:
        """
        the orcid logo

        Args:
            mode(str): the mode
            inline(str): in inline mode this is the text to be displayed inline

        Returns:
            str: the html code

        """
        href = f"""https://orcid.org/{self.orcid}"""
        logo = """<img alt="ORCID logo" src="https://info.orcid.org/wp-content/uploads/2019/11/orcid_16x16.png" width="16" height="16" />"""
        if mode == "full":
            html = f"""<a href="{href}">{logo}{href}</a>"""
        elif mode == "compact":
            html = f"""<a href="{href}">{logo}{self.orcid}</a>"""
        elif mode == "inline":
            html = f"""<a href="{href}">{inline}{logo}</a>"""
        return html

__init__(orcid)

constructor

Parameters:

Name Type Description Default
orcid(str)

the orcid

required
Source code in skg/orcid.py
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, orcid: str):
    """
    constructor

    Args:
        orcid(str): the orcid
    """
    self.orcid = orcid
    # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
    self.orcid_num = orcid.replace("-", "")
    match = re.match(ORCID.pattern, orcid)
    self.ok = bool(match) and validate(self.orcid_num)

asHtml(mode='full', inline='')

the orcid logo

Parameters:

Name Type Description Default
mode(str)

the mode

required
inline(str)

in inline mode this is the text to be displayed inline

required

Returns:

Name Type Description
str str

the html code

Source code in skg/orcid.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def asHtml(self, mode: str = "full", inline: str = "") -> str:
    """
    the orcid logo

    Args:
        mode(str): the mode
        inline(str): in inline mode this is the text to be displayed inline

    Returns:
        str: the html code

    """
    href = f"""https://orcid.org/{self.orcid}"""
    logo = """<img alt="ORCID logo" src="https://info.orcid.org/wp-content/uploads/2019/11/orcid_16x16.png" width="16" height="16" />"""
    if mode == "full":
        html = f"""<a href="{href}">{logo}{href}</a>"""
    elif mode == "compact":
        html = f"""<a href="{href}">{logo}{self.orcid}</a>"""
    elif mode == "inline":
        html = f"""<a href="{href}">{inline}{logo}</a>"""
    return html

getMetadata(op=None)

get the ORCID metadata data

Parameters:

Name Type Description Default
op(str)

the https://pub.orcid.org/v3.0/ API

required

Returns:

Name Type Description
dict dict

the dictionary derived from the JSON response

Source code in skg/orcid.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def getMetadata(self, op: str = None) -> dict:
    """
    get the ORCID metadata data

    Args:
        op(str): the https://pub.orcid.org/v3.0/ API
        operation to apply - default is "Fetch record details"

    Returns:
        dict: the dictionary derived from the JSON response

    """
    op = "" if op is None else f"/{op}"
    url = f"https://pub.orcid.org/v3.0/{self.orcid}{op}"
    r = requests.get(
        url, headers={"User-Agent": "Mozilla/5.0", "accept": "application/json"}
    )
    json_data = r.json()
    return json_data

isORCID(orcid) classmethod

check that the given string is an ORCID

Parameters:

Name Type Description Default
orcid(str)

the potential ORCID string

required

Returns:

Name Type Description
bool bool

True if the string represents a valid ORCID otherwise false

Source code in skg/orcid.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
@classmethod
def isORCID(cls, orcid: str) -> bool:
    """
    check that the given string is an ORCID

    Args:
        orcid(str): the potential ORCID string

    Returns:
        bool: True if the string represents a valid ORCID otherwise false
    """
    if not orcid:
        return False
    orcid_obj = ORCID(orcid)
    return orcid_obj.ok

owl

Created on 2022-11-22

@author: wf

Owl

Bases: Schema

Web Ontology Language access see https://en.wikipedia.org/wiki/Web_Ontology_Language

Source code in skg/owl.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
class Owl(Schema):
    """
    Web Ontology Language access
    see https://en.wikipedia.org/wiki/Web_Ontology_Language
    """

    def __init__(self, name: str, url: str, authors: str, inception: str):
        """
        constructor

        Args:
            name(str): the name of this schema
            url(str): the url of this schema
            authors(str): the authors of this schema
            inception(str): the inception of this schema
        """
        Schema.__init__(self, name, url, authors, inception)
        self.schema_url = url
        self.schema = None

    def show_triples(self, result):
        """
        show the triples for the given query result
        """
        for i, row in enumerate(result):
            print(f"{i+1}:{row}")

    def query_schema(self, query: str, formats: str = "", profile: bool = False):
        """
        query the schema

        Args:
            query(str): the SPARQL query to execute
            formats(str): if "triples" is in th format string show the results string
            profile(bool): if True show timing information for the query
        """
        profiler = Profiler(f"query {query}", profile=profile)
        result = self.schema.query(query)
        if "triples" in formats:
            self.show_triples(result)
        if profile:
            profiler.time(f" for {len(result)} triples")
        return result

    def loadSchema(self, formats: str = "", profile: bool = False):
        """
        load the schema

        Args:
            formats(str): the formats to dump
            profile(bool): if True show timing
        """
        # https://stackoverflow.com/questions/56631109/how-to-parse-and-load-an-ontology-in-python
        profiler = Profiler(f"reading {self.name} schema", profile=profile)
        self.schema = rdflib.Graph()
        self.schema.parse(self.schema_url, format="application/rdf+xml")
        if profile:
            profiler.time(f" for {len(self.schema)} triples")
        for t_format in formats.split(","):
            if t_format and t_format != "triples":
                print(self.schema.serialize(format=t_format))
        self.schema.bind("owl", OWL)
        query = """select distinct ?s ?p ?o 
where { ?s ?p ?o}
"""
        self.query_schema(query, formats=formats, profile=profile)
        return self.schema

    def unprefix_value(
        self, value: object, prefixes: list = ["http://xmlns.com/foaf/0.1/"]
    ) -> str:
        """
        get rid of RDF prefixes to simplify our life

        Args:
            value(object): the RDFLib value to unprefix
            prefixes(list): list of prefixes to remove
        Returns:
            str: a simple string representation
        """
        if isinstance(value, list):
            if len(value) >= 1:
                value = value[0]
        if isinstance(value, dict):
            for akey in ["@id", "@value"]:
                if akey in value:
                    value = value[akey]
        if isinstance(value, str):
            parts = value.split("#")
            if len(parts) == 2:
                value = parts[1]
        for prefix in prefixes:
            if value.startswith(prefix):
                value = value.replace(prefix, "")
        return value

    def unprefix_row(self, row: dict):
        """
        get rid of the RDF prefixes in keys and values of the given row
        to simplify our life

        Args:
            row(dict): a dict of RDF values to unprefix
        """
        for key in list(row.keys()):
            org_value = row[key]
            value = self.unprefix_value(org_value)
            row[key] = value
            if "#" in key:
                noprefix_key = self.unprefix_value(key)
                row[noprefix_key] = row.pop(key)
            row[f"{key}_rdf"] = org_value

    def toClasses(self):
        """
        convert to a classes dict of dicts

        Returns:
            dict: a dict of dictionaries
        """
        json_ld = self.schema.serialize(format="json-ld")
        schema_dict = json.loads(json_ld)
        classes = {}
        # get rid of prefixes
        for row in schema_dict:
            self.unprefix_row(row)
        # pass 1 - classes
        for row in schema_dict:
            name = row["@id"]
            ptype = row["@type"]
            comment = row.get("comment", "")
            label = row.get("label", "")
            subClassOf = row.get("subClassOf", "")
            if ptype == "Class":
                if name in classes:
                    clazz = classes[name]
                else:
                    clazz = {
                        "@comment": comment,
                        "@label": label,
                        "@subClassOf": subClassOf,
                    }
                    classes[name] = clazz
        # pass 2 - properties
        for row in schema_dict:
            name = row["@id"]
            ptype = row["@type"]
            comment = row.get("comment", "")
            domain = row.get("domain", "")
            prange = row.get("range", "")
            plabel = row.get("label")
            if ptype == "Property":
                prop = {
                    "name": name,
                    "comment": comment,
                    "label": plabel,
                    "domain": domain,
                    "range": prange,
                }
                if domain in classes:
                    clazz = classes[domain]
                    clazz[name] = prop
            pass
        wrapped_classes = {"classes": classes}
        return wrapped_classes

__init__(name, url, authors, inception)

constructor

Parameters:

Name Type Description Default
name(str)

the name of this schema

required
url(str)

the url of this schema

required
authors(str)

the authors of this schema

required
inception(str)

the inception of this schema

required
Source code in skg/owl.py
22
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(self, name: str, url: str, authors: str, inception: str):
    """
    constructor

    Args:
        name(str): the name of this schema
        url(str): the url of this schema
        authors(str): the authors of this schema
        inception(str): the inception of this schema
    """
    Schema.__init__(self, name, url, authors, inception)
    self.schema_url = url
    self.schema = None

loadSchema(formats='', profile=False)

load the schema

Parameters:

Name Type Description Default
formats(str)

the formats to dump

required
profile(bool)

if True show timing

required
Source code in skg/owl.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
    def loadSchema(self, formats: str = "", profile: bool = False):
        """
        load the schema

        Args:
            formats(str): the formats to dump
            profile(bool): if True show timing
        """
        # https://stackoverflow.com/questions/56631109/how-to-parse-and-load-an-ontology-in-python
        profiler = Profiler(f"reading {self.name} schema", profile=profile)
        self.schema = rdflib.Graph()
        self.schema.parse(self.schema_url, format="application/rdf+xml")
        if profile:
            profiler.time(f" for {len(self.schema)} triples")
        for t_format in formats.split(","):
            if t_format and t_format != "triples":
                print(self.schema.serialize(format=t_format))
        self.schema.bind("owl", OWL)
        query = """select distinct ?s ?p ?o 
where { ?s ?p ?o}
"""
        self.query_schema(query, formats=formats, profile=profile)
        return self.schema

query_schema(query, formats='', profile=False)

query the schema

Parameters:

Name Type Description Default
query(str)

the SPARQL query to execute

required
formats(str)

if "triples" is in th format string show the results string

required
profile(bool)

if True show timing information for the query

required
Source code in skg/owl.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
def query_schema(self, query: str, formats: str = "", profile: bool = False):
    """
    query the schema

    Args:
        query(str): the SPARQL query to execute
        formats(str): if "triples" is in th format string show the results string
        profile(bool): if True show timing information for the query
    """
    profiler = Profiler(f"query {query}", profile=profile)
    result = self.schema.query(query)
    if "triples" in formats:
        self.show_triples(result)
    if profile:
        profiler.time(f" for {len(result)} triples")
    return result

show_triples(result)

show the triples for the given query result

Source code in skg/owl.py
36
37
38
39
40
41
def show_triples(self, result):
    """
    show the triples for the given query result
    """
    for i, row in enumerate(result):
        print(f"{i+1}:{row}")

toClasses()

convert to a classes dict of dicts

Returns:

Name Type Description
dict

a dict of dictionaries

Source code in skg/owl.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
def toClasses(self):
    """
    convert to a classes dict of dicts

    Returns:
        dict: a dict of dictionaries
    """
    json_ld = self.schema.serialize(format="json-ld")
    schema_dict = json.loads(json_ld)
    classes = {}
    # get rid of prefixes
    for row in schema_dict:
        self.unprefix_row(row)
    # pass 1 - classes
    for row in schema_dict:
        name = row["@id"]
        ptype = row["@type"]
        comment = row.get("comment", "")
        label = row.get("label", "")
        subClassOf = row.get("subClassOf", "")
        if ptype == "Class":
            if name in classes:
                clazz = classes[name]
            else:
                clazz = {
                    "@comment": comment,
                    "@label": label,
                    "@subClassOf": subClassOf,
                }
                classes[name] = clazz
    # pass 2 - properties
    for row in schema_dict:
        name = row["@id"]
        ptype = row["@type"]
        comment = row.get("comment", "")
        domain = row.get("domain", "")
        prange = row.get("range", "")
        plabel = row.get("label")
        if ptype == "Property":
            prop = {
                "name": name,
                "comment": comment,
                "label": plabel,
                "domain": domain,
                "range": prange,
            }
            if domain in classes:
                clazz = classes[domain]
                clazz[name] = prop
        pass
    wrapped_classes = {"classes": classes}
    return wrapped_classes

unprefix_row(row)

get rid of the RDF prefixes in keys and values of the given row to simplify our life

Parameters:

Name Type Description Default
row(dict)

a dict of RDF values to unprefix

required
Source code in skg/owl.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def unprefix_row(self, row: dict):
    """
    get rid of the RDF prefixes in keys and values of the given row
    to simplify our life

    Args:
        row(dict): a dict of RDF values to unprefix
    """
    for key in list(row.keys()):
        org_value = row[key]
        value = self.unprefix_value(org_value)
        row[key] = value
        if "#" in key:
            noprefix_key = self.unprefix_value(key)
            row[noprefix_key] = row.pop(key)
        row[f"{key}_rdf"] = org_value

unprefix_value(value, prefixes=['http://xmlns.com/foaf/0.1/'])

get rid of RDF prefixes to simplify our life

Parameters:

Name Type Description Default
value(object)

the RDFLib value to unprefix

required
prefixes(list)

list of prefixes to remove

required

Returns: str: a simple string representation

Source code in skg/owl.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def unprefix_value(
    self, value: object, prefixes: list = ["http://xmlns.com/foaf/0.1/"]
) -> str:
    """
    get rid of RDF prefixes to simplify our life

    Args:
        value(object): the RDFLib value to unprefix
        prefixes(list): list of prefixes to remove
    Returns:
        str: a simple string representation
    """
    if isinstance(value, list):
        if len(value) >= 1:
            value = value[0]
    if isinstance(value, dict):
        for akey in ["@id", "@value"]:
            if akey in value:
                value = value[akey]
    if isinstance(value, str):
        parts = value.split("#")
        if len(parts) == 2:
            value = parts[1]
    for prefix in prefixes:
        if value.startswith(prefix):
            value = value.replace(prefix, "")
    return value

paper

Created on 2022-11-16

@author: wf

Paper

Bases: Node

a scientific paper

Source code in skg/paper.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Paper(skg.graph.Node):
    """
    a scientific paper
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q55693406",
                "title": "Designing the web for an open society",
                "doi": "10.1145/1963405.1963408",
                "DBLP_publication_ID": "conf/www/Berners-Lee11",
                "publication_date": 2011,
            },
            {
                "doi": "10.1007/978-3-031-19433-7_21",
                "title": "An Analysis of Content Gaps Versus User Needs in the Wikidata Knowledge Graph",
            },
        ]
        return samples

    def __init__(self):
        """
        Constructor
        """

    def fromDOI(self, doi: str):
        """
        construct me from the given doi
        """
        self.doi = doi
        self.doi_obj = DOI(doi)
        self.doi_obj.meta_data = self.doi_obj.doi2Citeproc()
        if not hasattr(self, "title"):
            self.title = self.doi_obj.meta_data["title"]
        if not hasattr(self, "label"):
            self.label = f"https://doi.org/{self.doi}"

__init__()

Constructor

Source code in skg/paper.py
33
34
35
36
def __init__(self):
    """
    Constructor
    """

fromDOI(doi)

construct me from the given doi

Source code in skg/paper.py
38
39
40
41
42
43
44
45
46
47
48
def fromDOI(self, doi: str):
    """
    construct me from the given doi
    """
    self.doi = doi
    self.doi_obj = DOI(doi)
    self.doi_obj.meta_data = self.doi_obj.doi2Citeproc()
    if not hasattr(self, "title"):
        self.title = self.doi_obj.meta_data["title"]
    if not hasattr(self, "label"):
        self.label = f"https://doi.org/{self.doi}"

profiler

Created on 2022-11-18

@author: wf

Profiler

simple profiler

Source code in skg/profiler.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        self.starttime = time.time()
        if profile:
            print(f"Starting {msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

__init__(msg, profile=True)

construct me with the given msg and profile active flag

Parameters:

Name Type Description Default
msg(str)

the message to show if profiling is active

required
profile(bool)

True if messages should be shown

required
Source code in skg/profiler.py
15
16
17
18
19
20
21
22
23
24
25
26
27
def __init__(self, msg, profile=True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    self.starttime = time.time()
    if profile:
        print(f"Starting {msg} ...")

time(extraMsg='')

time the action and print if profile is active

Source code in skg/profiler.py
29
30
31
32
33
34
35
36
def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

ris

Created on 2024-03-08

@author: wf

RIS_Entry

Research Information Systems format https://en.wikipedia.org/wiki/RIS_(file_format)

Source code in skg/ris.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
@lod_storable
class RIS_Entry:
    """
    Research Information Systems format
    https://en.wikipedia.org/wiki/RIS_(file_format)
    """

    type_of_reference: Optional[str] = None
    abstract: Optional[str] = None
    type_of_work: Optional[str] = None
    year: Optional[str] = None
    doi: Optional[str] = None
    keywords: List[str] = field(default_factory=list)
    first_authors: List[str] = field(default_factory=list)
    publisher: Optional[str] = None
    language: Optional[str] = None
    primary_title: Optional[str] = None
    urls: List[str] = field(default_factory=list)
    secondary_title: Optional[str] = None

    @property
    def lang_qid(self) -> str:
        qid = "Q1860"  # English
        if self.language == "de":
            qid = "Q188"
        return qid

    @classmethod
    def get_property_mappings(cls):
        """
        get the wikidata property mappings
        """
        mappings = [
            PropertyMapping(
                column="instanceof",
                propertyName="instanceof",
                propertyId="P31",
                propertyType=WdDatatype.itemid,
                value="Q13442814",  # scholarly article
            ),
            PropertyMapping(
                column="primary_title",
                propertyName="title",
                propertyId="P1476",
                propertyType=WdDatatype.text,
            ),
            PropertyMapping(
                column="doi",
                # propertyName="DOI",
                # propertyId="P356",
                propertyName="described at URL",
                propertyId="P973",
                # propertyType=WdDatatype.extid,
                propertyType=WdDatatype.url,
            ),
            PropertyMapping(
                column="lang_qid",
                propertyName="language of work or name",
                propertyId="P407",
                propertyType=WdDatatype.itemid,
            ),
            PropertyMapping(
                column="year",
                propertyName="publication date",
                propertyId="P577",
                propertyType=WdDatatype.year,
            ),
        ]
        return mappings

    @classmethod
    def get_dict_from_file(cls, ris_file_path, by_field: str = "index"):
        ris_dict = {}
        with open(ris_file_path, "r") as bibliography_file:
            entries = rispy.load(bibliography_file)
            for i, entry in enumerate(entries, start=1):
                ris_entry = RIS_Entry.from_dict(entry)
                if by_field == "index":
                    value = i
                else:
                    if by_field in entry:
                        value = field[entry]
                ris_dict[value] = ris_entry

        return ris_dict

get_property_mappings() classmethod

get the wikidata property mappings

Source code in skg/ris.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@classmethod
def get_property_mappings(cls):
    """
    get the wikidata property mappings
    """
    mappings = [
        PropertyMapping(
            column="instanceof",
            propertyName="instanceof",
            propertyId="P31",
            propertyType=WdDatatype.itemid,
            value="Q13442814",  # scholarly article
        ),
        PropertyMapping(
            column="primary_title",
            propertyName="title",
            propertyId="P1476",
            propertyType=WdDatatype.text,
        ),
        PropertyMapping(
            column="doi",
            # propertyName="DOI",
            # propertyId="P356",
            propertyName="described at URL",
            propertyId="P973",
            # propertyType=WdDatatype.extid,
            propertyType=WdDatatype.url,
        ),
        PropertyMapping(
            column="lang_qid",
            propertyName="language of work or name",
            propertyId="P407",
            propertyType=WdDatatype.itemid,
        ),
        PropertyMapping(
            column="year",
            propertyName="publication date",
            propertyId="P577",
            propertyType=WdDatatype.year,
        ),
    ]
    return mappings

schema

Created on 2022-11-22

@author: wf

Schema

a schema

Source code in skg/schema.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
class Schema:
    """
    a schema
    """

    def __init__(self, name: str, url: str, authors: str, inception: str):
        """
        constructor

        Args:
            name(str): the name of this schema
            url(str): the url of this schema
            authors(str): the authors of this schema
            inception(str): the inception of this schema
        """
        self.name = name
        self.url = url
        self.authors = authors
        self.inception = inception

    def classesToPlantUml(self, classes: dict, indent: str = "  "):
        """
        convert the given classes dict to plantuml

        Args:
            classes(dict): a dictionary of classes
            indent(str): the indentation to apply
        """
        classes = classes["classes"]
        markup = ""
        for cname, clazz in classes.items():
            class_markup = ""
            rel_markup = ""  # relations
            for pname, prop in clazz.items():
                if pname.startswith("@"):
                    pass
                else:
                    prange = prop["range"]
                    if prange in classes:
                        #  Class01 "1" *-- "many" Class02 : contains
                        rel_markup += f"{indent}{cname}--{prange}:{pname}\n"
                    else:
                        class_markup += f"{indent}  {pname}:{prange}\n"
            class_markup = f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n"
            class_markup += rel_markup
            if "@subClassOf" in clazz:
                general = clazz["@subClassOf"]
                if general:
                    class_markup += f"{indent}{general} <|-- {cname}\n"
            note = f"{indent}note top of {cname}\n"
            if "@label" in clazz:
                note += f"""{indent}{clazz["@label"]}\n"""
            if "@comment" in clazz:
                note += f"""{indent}{clazz["@comment"]}\n"""
            note += f"{indent}end note\n"
            class_markup = note + class_markup
            markup += class_markup
        return markup

    def toPlantUml(self, header=None, footer=None) -> str:
        """
        get a plantuml version of the schema

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup
        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'
     {self.authors} {self.inception}
     updated {timestamp}

     {self.name} {self.schema_url}
     converted from owl to plantuml
    '/
    title  {self.name} schema {self.schema_url} converted from owl to plantuml updated {timestamp}
    hide circle
    package foaf {{
      class Document {{
      }}
    }}
    package dblp {{
     """
        if footer is None:
            footer = "}\n"
        classes = self.toClasses()
        markup = header + self.classesToPlantUml(classes, indent="  ") + footer
        return markup

__init__(name, url, authors, inception)

constructor

Parameters:

Name Type Description Default
name(str)

the name of this schema

required
url(str)

the url of this schema

required
authors(str)

the authors of this schema

required
inception(str)

the inception of this schema

required
Source code in skg/schema.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
def __init__(self, name: str, url: str, authors: str, inception: str):
    """
    constructor

    Args:
        name(str): the name of this schema
        url(str): the url of this schema
        authors(str): the authors of this schema
        inception(str): the inception of this schema
    """
    self.name = name
    self.url = url
    self.authors = authors
    self.inception = inception

classesToPlantUml(classes, indent=' ')

convert the given classes dict to plantuml

Parameters:

Name Type Description Default
classes(dict)

a dictionary of classes

required
indent(str)

the indentation to apply

required
Source code in skg/schema.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def classesToPlantUml(self, classes: dict, indent: str = "  "):
    """
    convert the given classes dict to plantuml

    Args:
        classes(dict): a dictionary of classes
        indent(str): the indentation to apply
    """
    classes = classes["classes"]
    markup = ""
    for cname, clazz in classes.items():
        class_markup = ""
        rel_markup = ""  # relations
        for pname, prop in clazz.items():
            if pname.startswith("@"):
                pass
            else:
                prange = prop["range"]
                if prange in classes:
                    #  Class01 "1" *-- "many" Class02 : contains
                    rel_markup += f"{indent}{cname}--{prange}:{pname}\n"
                else:
                    class_markup += f"{indent}  {pname}:{prange}\n"
        class_markup = f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n"
        class_markup += rel_markup
        if "@subClassOf" in clazz:
            general = clazz["@subClassOf"]
            if general:
                class_markup += f"{indent}{general} <|-- {cname}\n"
        note = f"{indent}note top of {cname}\n"
        if "@label" in clazz:
            note += f"""{indent}{clazz["@label"]}\n"""
        if "@comment" in clazz:
            note += f"""{indent}{clazz["@comment"]}\n"""
        note += f"{indent}end note\n"
        class_markup = note + class_markup
        markup += class_markup
    return markup

toPlantUml(header=None, footer=None)

get a plantuml version of the schema

Parameters:

Name Type Description Default
header(str)

the header to apply

required
footer(str)

the footer to apply

required

Returns:

Name Type Description
str str

the plantuml markup

Source code in skg/schema.py
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def toPlantUml(self, header=None, footer=None) -> str:
    """
    get a plantuml version of the schema

    Args:
        header(str): the header to apply
        footer(str): the footer to apply

    Returns:
        str: the plantuml markup
    """
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
    if header is None:
        header = f"""/'
 {self.authors} {self.inception}
 updated {timestamp}

 {self.name} {self.schema_url}
 converted from owl to plantuml
'/
title  {self.name} schema {self.schema_url} converted from owl to plantuml updated {timestamp}
hide circle
package foaf {{
  class Document {{
  }}
}}
package dblp {{
 """
    if footer is None:
        footer = "}\n"
    classes = self.toClasses()
    markup = header + self.classesToPlantUml(classes, indent="  ") + footer
    return markup

scholar

Created on 2022-11-16

@author: wf

Institution

Bases: Node

academic institution a scholar might be affiliated with

Source code in skg/scholar.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
class Institution(skg.graph.Node):
    """
    academic institution a scholar might be affiliated with
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {"wikiDataId": "Q273263", "short_name": "RWTH Aachen (German)"},
            {
                "wikiDataId": "Q391028",
                "inception": "1908",
                "short_name": "UBC",
                "country": "Canada",
                "image": "https://commons.wikimedia.org/wiki/File:Irving_K._Barber_Library.jpg",
                "located_in": "Vancouver",
                "official_website": "https://www.ubc.ca/",
            },
        ]
        return samples

    def __init__(self):
        """
        constructor
        """

__init__()

constructor

Source code in skg/scholar.py
72
73
74
75
def __init__(self):
    """
    constructor
    """

Scholar

Bases: Node

an instance of a scholar that writes papers to be an author

Source code in skg/scholar.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Scholar(skg.graph.Node):
    """
    an instance of a scholar that writes papers to be an author
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q54303353",
                "name": "Stefan Decker",
                "gndId": "",
                "dblpId": "d/StefanDecker",
                "orcid": "0000-0001-6324-7164",
                "linkedInId": "",
                "googleScholarUser": "uhVkSswAAAAJ",
                "homepage": "http://www.stefandecker.org",
            },
            {
                "name": "Tim Berners-Lee",
                "wikiDataId": "Q80",
                "givenName": "Timothy",
                "familyName": "Berners-Lee",
                "gender": "male",
                "image": "https://commons.wikimedia.org/wiki/File:Sir_Tim_Berners-Lee_(cropped).jpg",
                # "occupation": "computer scientist" truly tabular issue
            },
            {
                "name": "Anna Lisa Gentile",
                "wikiDataId": "Q54832532",
                "Semantic_Scholar_author_ID": "Anna Lisa Gentile",
            },
        ]
        return samples

    def __init__(self):
        """
        constructor
        """

__init__()

constructor

Source code in skg/scholar.py
45
46
47
48
def __init__(self):
    """
    constructor
    """

scholargrid

Created on 2023-01-04

@author: wf

ScholarGrid

Bases: SmwGrid

show a grid of scholars

Source code in skg/scholargrid.py
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
class ScholarGrid(SmwGrid):
    """
    show a grid of scholars
    """

    def __init__(
        self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False
    ):
        """
        constructor

        Args:
            app(App): the app that I am part of
            wikiUsers(list): the wikiUsers
            wikiId(str): the wikiId to use
            sparql(SPARQL): the SPARQL endpoint to use
            debug(bool): if True show debugging information
        """
        entityName = "Scholar"
        entityPluralName = "Scholars"
        pk = "item"
        super().__init__(
            app=app,
            wikiUsers=wikiUsers,
            wikiId=wikiId,
            entityName=entityName,
            entityPluralName=entityPluralName,
            pk=pk,
            getLod=self.getScholars,
            sparql=sparql,
            debug=debug,
        )

    def getScholars(self) -> list:
        """
        get the list of scholars

        Returns:
            list: the list of dicts of scholars
        """
        # get a dict of dict
        scholars_dod = self.semwiki.scholars()
        # get a list of dicts
        scholars_lod = list(scholars_dod.values())
        # @TODO - shouldn't this be better specified in the mapping?
        for row in scholars_lod:
            row["label"] = row["Scholar"]
        self.wbQuery = ScholarQuery.get()
        return scholars_lod

__init__(app, wikiUsers, wikiId, sparql, debug=False)

constructor

Parameters:

Name Type Description Default
app(App)

the app that I am part of

required
wikiUsers(list)

the wikiUsers

required
wikiId(str)

the wikiId to use

required
sparql(SPARQL)

the SPARQL endpoint to use

required
debug(bool)

if True show debugging information

required
Source code in skg/scholargrid.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def __init__(
    self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False
):
    """
    constructor

    Args:
        app(App): the app that I am part of
        wikiUsers(list): the wikiUsers
        wikiId(str): the wikiId to use
        sparql(SPARQL): the SPARQL endpoint to use
        debug(bool): if True show debugging information
    """
    entityName = "Scholar"
    entityPluralName = "Scholars"
    pk = "item"
    super().__init__(
        app=app,
        wikiUsers=wikiUsers,
        wikiId=wikiId,
        entityName=entityName,
        entityPluralName=entityPluralName,
        pk=pk,
        getLod=self.getScholars,
        sparql=sparql,
        debug=debug,
    )

getScholars()

get the list of scholars

Returns:

Name Type Description
list list

the list of dicts of scholars

Source code in skg/scholargrid.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
def getScholars(self) -> list:
    """
    get the list of scholars

    Returns:
        list: the list of dicts of scholars
    """
    # get a dict of dict
    scholars_dod = self.semwiki.scholars()
    # get a list of dicts
    scholars_lod = list(scholars_dod.values())
    # @TODO - shouldn't this be better specified in the mapping?
    for row in scholars_lod:
        row["label"] = row["Scholar"]
    self.wbQuery = ScholarQuery.get()
    return scholars_lod

ScholarQuery

Source code in skg/scholargrid.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
class ScholarQuery:
    @classmethod
    def get(cls) -> WikibaseQuery:
        """
        get the WikiBaseQuery for scholars

        Returns:
            WikibaseQuery: the wikibase query
        """
        scholar_mapping = [
            # @TODO use metamodel info and read from wiki
            {
                "Column": "",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "instanceof",
                "PropertyId": "P31",
                "PropertyName": "instanceof",
                "Qualifier": "",
                "Type": "",
                "Value": "Q5",
            },
            {
                "Column": "wikiDataId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "",
                "PropertyId": "",
                "PropertyName": "",
                "Qualifier": "",
                "Type": "item",
                "Value": "",
            },
            {
                "Column": "name",
                "Entity": "Scholar",
                "Lookup": "Q101352",
                "PropVarname": "family_name",
                "PropertyId": "P734",
                "PropertyName": "family name",
                "Qualifier": "",
                "Type": "",
                "Value": "",
            },
            {
                "Column": "firstName",
                "Entity": "Scholar",
                "Lookup": "Q202444",
                "PropVarname": "given_name",
                "PropertyId": "P735",
                "PropertyName": "given name",
                "Qualifier": "",
                "Type": "",
                "Value": "",
            },
            {
                "Column": "homepage",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "official_website",
                "PropertyId": "P856",
                "PropertyName": "official website",
                "Qualifier": "",
                "Type": "url",
                "Value": "",
            },
            {
                "Column": "linkedInId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "LinkedIn_personal_profile_ID",
                "PropertyId": "P6634",
                "PropertyName": "LinkedIn personal profile ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "orcid",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "ORCID_iD",
                "PropertyId": "P496",
                "PropertyName": "ORCID iD",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "googleScholarUser",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "Google_Scholar_author_ID",
                "PropertyId": "P1960",
                "PropertyName": "Google Scholar author ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "researchGate",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "ResearchGate_profile_ID",
                "PropertyId": "P2038",
                "PropertyName": "ResearchGate profile ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "gndId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "GND_ID",
                "PropertyId": "P227",
                "PropertyName": "GND ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "dblpId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "DBLP_author_ID",
                "PropertyId": "P2456",
                "PropertyName": "DBLP author ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
        ]
        wbQuery = WikibaseQuery("scholar")
        for row in scholar_mapping:
            wbQuery.addPropertyFromDescriptionRow(row)
        return wbQuery

get() classmethod

get the WikiBaseQuery for scholars

Returns:

Name Type Description
WikibaseQuery WikibaseQuery

the wikibase query

Source code in skg/scholargrid.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@classmethod
def get(cls) -> WikibaseQuery:
    """
    get the WikiBaseQuery for scholars

    Returns:
        WikibaseQuery: the wikibase query
    """
    scholar_mapping = [
        # @TODO use metamodel info and read from wiki
        {
            "Column": "",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "instanceof",
            "PropertyId": "P31",
            "PropertyName": "instanceof",
            "Qualifier": "",
            "Type": "",
            "Value": "Q5",
        },
        {
            "Column": "wikiDataId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "",
            "PropertyId": "",
            "PropertyName": "",
            "Qualifier": "",
            "Type": "item",
            "Value": "",
        },
        {
            "Column": "name",
            "Entity": "Scholar",
            "Lookup": "Q101352",
            "PropVarname": "family_name",
            "PropertyId": "P734",
            "PropertyName": "family name",
            "Qualifier": "",
            "Type": "",
            "Value": "",
        },
        {
            "Column": "firstName",
            "Entity": "Scholar",
            "Lookup": "Q202444",
            "PropVarname": "given_name",
            "PropertyId": "P735",
            "PropertyName": "given name",
            "Qualifier": "",
            "Type": "",
            "Value": "",
        },
        {
            "Column": "homepage",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "official_website",
            "PropertyId": "P856",
            "PropertyName": "official website",
            "Qualifier": "",
            "Type": "url",
            "Value": "",
        },
        {
            "Column": "linkedInId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "LinkedIn_personal_profile_ID",
            "PropertyId": "P6634",
            "PropertyName": "LinkedIn personal profile ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "orcid",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "ORCID_iD",
            "PropertyId": "P496",
            "PropertyName": "ORCID iD",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "googleScholarUser",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "Google_Scholar_author_ID",
            "PropertyId": "P1960",
            "PropertyName": "Google Scholar author ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "researchGate",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "ResearchGate_profile_ID",
            "PropertyId": "P2038",
            "PropertyName": "ResearchGate profile ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "gndId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "GND_ID",
            "PropertyId": "P227",
            "PropertyName": "GND ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "dblpId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "DBLP_author_ID",
            "PropertyId": "P2456",
            "PropertyName": "DBLP author ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
    ]
    wbQuery = WikibaseQuery("scholar")
    for row in scholar_mapping:
        wbQuery.addPropertyFromDescriptionRow(row)
    return wbQuery

SmwGrid

Bases: GridSync

a semantic mediawiki based grid synchable with WikiData

Source code in skg/scholargrid.py
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
class SmwGrid(GridSync):
    """
    a semantic mediawiki based grid synchable with WikiData

    """

    def __init__(
        self,
        solution,
        entityName: str,
        entityPluralName: str,
        pk: str,
        getLod: Callable,
        wikiUsers: list,
        wikiId: str,
        sparql: SPARQL,
        debug: bool = False,
    ):
        """
        constructor

        Args:
            solution:  the solutio that i am part of
            entityName(str): the name of the entity type of items to be shown in the grid
            entityPluralName(str): the plural name of the entities to be shown
            pk(str): the name of the primary key
            getLod(Callable): the callback to load the grid rows list of dicts
            wikiUsers(list): the wikiUsers
            wikiId(str): the wikiId to use
            sparql(SPARQL): the SPARQL endpoint to use
            debug(bool): if True show debugging information
        """
        self.solution = solution
        self.wikiUsers = wikiUsers
        self.wikiId = wikiId
        wikiUser = self.wikiUsers[wikiId]
        self.semwiki = SemWiki(wikiUser)
        wdGrid = WikidataGrid(
            app=app,
            source=wikiId,
            entityName=entityName,
            entityPluralName=entityPluralName,
            getLod=getLod,
            debug=debug,
        )
        # we'd rather lazy load
        # wdGrid.lod=wdGrid.getLod()
        super().__init__(wdGrid, entityName, pk, sparql=sparql, debug=debug)

__init__(solution, entityName, entityPluralName, pk, getLod, wikiUsers, wikiId, sparql, debug=False)

constructor

Parameters:

Name Type Description Default
solution

the solutio that i am part of

required
entityName(str)

the name of the entity type of items to be shown in the grid

required
entityPluralName(str)

the plural name of the entities to be shown

required
pk(str)

the name of the primary key

required
getLod(Callable)

the callback to load the grid rows list of dicts

required
wikiUsers(list)

the wikiUsers

required
wikiId(str)

the wikiId to use

required
sparql(SPARQL)

the SPARQL endpoint to use

required
debug(bool)

if True show debugging information

required
Source code in skg/scholargrid.py
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
def __init__(
    self,
    solution,
    entityName: str,
    entityPluralName: str,
    pk: str,
    getLod: Callable,
    wikiUsers: list,
    wikiId: str,
    sparql: SPARQL,
    debug: bool = False,
):
    """
    constructor

    Args:
        solution:  the solutio that i am part of
        entityName(str): the name of the entity type of items to be shown in the grid
        entityPluralName(str): the plural name of the entities to be shown
        pk(str): the name of the primary key
        getLod(Callable): the callback to load the grid rows list of dicts
        wikiUsers(list): the wikiUsers
        wikiId(str): the wikiId to use
        sparql(SPARQL): the SPARQL endpoint to use
        debug(bool): if True show debugging information
    """
    self.solution = solution
    self.wikiUsers = wikiUsers
    self.wikiId = wikiId
    wikiUser = self.wikiUsers[wikiId]
    self.semwiki = SemWiki(wikiUser)
    wdGrid = WikidataGrid(
        app=app,
        source=wikiId,
        entityName=entityName,
        entityPluralName=entityPluralName,
        getLod=getLod,
        debug=debug,
    )
    # we'd rather lazy load
    # wdGrid.lod=wdGrid.getLod()
    super().__init__(wdGrid, entityName, pk, sparql=sparql, debug=debug)

search

Created on 2022-11-19

@author: wf

SearchOptions

wrapper for search results

Source code in skg/search.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class SearchOptions:
    """
    wrapper for search results
    """

    def __init__(
        self,
        limit: int = 9,
        lang="en",
        show: bool = True,
        markup_names=["bibtex"],
        open_browser: bool = False,
    ):
        """
        constructor

        Args:
            limit(int): limit for the maximum number of results
            lang(str): the language code to use for the search
            show(bool): if True print the search results
            markup_names(list): a list of markup names to support
            open_browser(bool): if True open a browser for the target page of the item e.g. scholia

        """
        self.limit = limit
        self.lang = lang
        self.show = show
        self.markup_names = markup_names
        self.open_browser = open_browser

__init__(limit=9, lang='en', show=True, markup_names=['bibtex'], open_browser=False)

constructor

Parameters:

Name Type Description Default
limit(int)

limit for the maximum number of results

required
lang(str)

the language code to use for the search

required
show(bool)

if True print the search results

required
markup_names(list)

a list of markup names to support

required
open_browser(bool)

if True open a browser for the target page of the item e.g. scholia

required
Source code in skg/search.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def __init__(
    self,
    limit: int = 9,
    lang="en",
    show: bool = True,
    markup_names=["bibtex"],
    open_browser: bool = False,
):
    """
    constructor

    Args:
        limit(int): limit for the maximum number of results
        lang(str): the language code to use for the search
        show(bool): if True print the search results
        markup_names(list): a list of markup names to support
        open_browser(bool): if True open a browser for the target page of the item e.g. scholia

    """
    self.limit = limit
    self.lang = lang
    self.show = show
    self.markup_names = markup_names
    self.open_browser = open_browser

SearchResult

wrapper for search results

Source code in skg/search.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class SearchResult:
    """
    wrapper for search results
    """

    def __init__(self, search_list: list, options=SearchOptions):
        """
        constructor

         Args:
            search_list(list): a list of search terms
            options(SearchOptions): the search options to apply
        """
        self.search_list = search_list
        self.options = options
        self.items = []

__init__(search_list, options=SearchOptions)

constructor

Args: search_list(list): a list of search terms options(SearchOptions): the search options to apply

Source code in skg/search.py
44
45
46
47
48
49
50
51
52
53
54
def __init__(self, search_list: list, options=SearchOptions):
    """
    constructor

     Args:
        search_list(list): a list of search terms
        options(SearchOptions): the search options to apply
    """
    self.search_list = search_list
    self.options = options
    self.items = []

searchengine

Created on 18.11.2022

@author: wf

InternetSearch

generic internet search

Source code in skg/searchengine.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class InternetSearch:
    """
    generic internet search
    """

    def __init__(self, debug: bool = False):
        """
        constructor
        """
        self.debug = debug
        self.gsearch = GoogleSearch()
        self.ysearch = YahooSearch()
        self.bsearch = BingSearch()
        self.dsearch = DuckDuckGoSearch()
        self.gs_search = GoogleScholarSearch()
        self.engines = [self.gs_search, self.ysearch, self.dsearch, self.bsearch]

    def handleException(self, ex):
        """
        handle the given exception
        """
        if self.debug:
            print(f"{str(ex)}", file=sys.stderr)

    def search(self, search_term: str):
        """
        search my engines for the given search_term
        """
        search_args = (search_term, 1)
        for engine in self.engines:
            try:
                result = engine.search(*search_args)
                yield engine.name, result.results
                pass
            except Exception as ex:
                self.handleException(ex)
                pass

__init__(debug=False)

constructor

Source code in skg/searchengine.py
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, debug: bool = False):
    """
    constructor
    """
    self.debug = debug
    self.gsearch = GoogleSearch()
    self.ysearch = YahooSearch()
    self.bsearch = BingSearch()
    self.dsearch = DuckDuckGoSearch()
    self.gs_search = GoogleScholarSearch()
    self.engines = [self.gs_search, self.ysearch, self.dsearch, self.bsearch]

handleException(ex)

handle the given exception

Source code in skg/searchengine.py
35
36
37
38
39
40
def handleException(self, ex):
    """
    handle the given exception
    """
    if self.debug:
        print(f"{str(ex)}", file=sys.stderr)

search(search_term)

search my engines for the given search_term

Source code in skg/searchengine.py
42
43
44
45
46
47
48
49
50
51
52
53
54
def search(self, search_term: str):
    """
    search my engines for the given search_term
    """
    search_args = (search_term, 1)
    for engine in self.engines:
        try:
            result = engine.search(*search_args)
            yield engine.name, result.results
            pass
        except Exception as ex:
            self.handleException(ex)
            pass

semantic_scholar

Created on 2022-11-22

@author: wf

SemanticScholar

wrapper for Semantic Scholar API

Source code in skg/semantic_scholar.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class SemanticScholar:
    """
    wrapper for Semantic Scholar API
    """

    def __init__(self):
        """
        constructor
        """
        self.sch = SemScholar()

    def get_paper(self, doi: str):
        """
        get the paper with the given DOI identifier
        """
        paper = self.sch.get_paper(doi)
        return paper

    def get_author(self):
        """
        https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search
        """
        pass

__init__()

constructor

Source code in skg/semantic_scholar.py
16
17
18
19
20
def __init__(self):
    """
    constructor
    """
    self.sch = SemScholar()

get_author()

https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search

Source code in skg/semantic_scholar.py
29
30
31
32
33
def get_author(self):
    """
    https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search
    """
    pass

get_paper(doi)

get the paper with the given DOI identifier

Source code in skg/semantic_scholar.py
22
23
24
25
26
27
def get_paper(self, doi: str):
    """
    get the paper with the given DOI identifier
    """
    paper = self.sch.get_paper(doi)
    return paper

skgbrowser

Created on 2022-11-18

@author: wf

SkgBrowser

Bases: InputWebserver

scholary knowledge graph browser

Source code in skg/skgbrowser.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class SkgBrowser(InputWebserver):
    """
    scholary knowledge graph browser
    """

    @classmethod
    def get_config(cls) -> WebserverConfig:
        copy_right = "(c)2022-2024 Wolfgang Fahl"
        config = WebserverConfig(
            copy_right=copy_right,
            version=Version(),
            default_port=8765,
            short_name="sotsog",
        )
        server_config = WebserverConfig.get(config)
        server_config.solution_class = SkgSolution
        return server_config

    def __init__(self):
        """Constructs all the necessary attributes for the WebServer object."""
        config = SkgBrowser.get_config()
        InputWebserver.__init__(self, config=config)

        @ui.page("/scholars")
        async def scholars(client: Client):
            return await self.page(client, SkgSolution.scholars)

    def configure_run(self):
        # wiki users
        self.wikiUsers = WikiUser.getWikiUsers()
        self.wikiId = self.args.wikiId
        wikidata = Wikidata()
        self.sparql = wikidata.sparql

__init__()

Constructs all the necessary attributes for the WebServer object.

Source code in skg/skgbrowser.py
39
40
41
42
43
44
45
46
def __init__(self):
    """Constructs all the necessary attributes for the WebServer object."""
    config = SkgBrowser.get_config()
    InputWebserver.__init__(self, config=config)

    @ui.page("/scholars")
    async def scholars(client: Client):
        return await self.page(client, SkgSolution.scholars)

SkgSolution(InputWebSolution)

the scholarly knowledge graph solution

Source code in skg/skgbrowser.py
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
def SkgSolution(InputWebSolution):
    """
    the scholarly knowledge graph solution
    """

    def __init__(self, webserver: SkgBrowser, client: Client):
        """
        Initialize the solution

        Calls the constructor of the base solution
        Args:
            webserver (SkgBrowser): The webserver instance associated with this context.
            client (Client): The client instance this context is associated with.
        """
        super().__init__(webserver, client)  # Call to the superclass constructor
        self.language = "en"
        self.wikiId = "or"
        self.markup_names = ["-", "bibtex", "scite", "smw"]
        self.markup_name = self.markup_names[1]

    def configure_menu(self):
        """
        configure additional non-standard menu entries
        """
        # self.link_button(name='Scholars',icon_name='account-school',target='/scholars')
        pass

    def createItemLink(self, item, term: str, index: int) -> str:
        """
        create a link for the given item

        Args:
            item(Node): the item to create a link for
            term(str): the
        """
        if index > 0:
            style = "color:grey"
            text = f"{term}<sub>{index+1}</sub>"
            delim = "&nbsp"
        else:
            style = ""
            text = term
            delim = ""
        link = Link.create(
            item.browser_url(), text, tooltip=item.label, target="_blank", style=style
        )
        if item.concept.name == "Scholar":
            if hasattr(item, "orcid"):
                orcid = ORCID(item.orcid)
                link += orcid.asHtml()
        markup = delim + link
        return markup

    async def onSearchButton(self, _msg):
        """
        handle button to search for terms
        """
        try:
            self.results.content = ""
            self.markup.content = ""
            terms = self.searchTerms.value.split("\n")
            self.messages.content = "Searching"
            delim = ""
            for term in terms:
                if term:
                    msg = f"... {term}\n"
                    self.messages.content += msg
                    if self.markup_name == "-":
                        self.options.markup_names = []
                    else:
                        self.options.markup_names = [self.markup_name]
                    search_result = self.sotsog.search([term], self.options)
                    items = search_result.items
                    rmarkup = ""
                    if len(items) == 0:
                        # TODO check google search
                        # https://pypi.org/project/googlesearch-python/
                        params = parse.urlencode({"q": term})
                        search_url = f"https://www.google.com/search?{params}"
                        rmarkup = Link.create(
                            search_url,
                            term,
                            "not found",
                            target="_blank",
                            style="color:red",
                        )
                    else:
                        for i, item in enumerate(items):
                            rmarkup += self.createItemLink(item, term, i)
                            if len(item.markups) > 0:
                                markups = ""
                                for _markup_name, markup in item.markups.items():
                                    markups += markup
                                    self.markup.content += f"<pre>{markups}</pre>"
                                    # break
                    self.results.content += delim + rmarkup
                    delim = "<br>"

        except BaseException as ex:
            self.handle_exception(ex)

    def addLanguageSelect(self):
        """
        add a language selector
        """
        lang_dict = Lang.get_language_dict()
        self.add_select("language:", lang_dict).bind_value(self, "language")

    def addWikiUserSelect(self):
        """
        add a wiki user selector
        """
        if len(self.wikiUsers) > 0:
            wu_dict = {}
            for wikiUser in sorted(self.wikiUsers):
                wu_dict[wikiUser] = wikiUser
            self.add_select("wiki:", wu_dict).bind_value(self, "wikiId")

    async def scholars(self, client: Client):
        """
        scholar display

        """
        self.setup_menu()
        with ui.element("div").classes("w-full h-full"):
            try:
                self.scholarsGrid = ScholarGrid(
                    self, self.wikiUsers, self.wikiId, sparql=self.sparql
                )
                # @TODO refactor the two setup calls to one to hide wdgrid details
                # self.scholarsGrid.setup(a=self.rowB, header=self.rowA)
                # self.scholarsGrid.wdgrid.setup(a=self.rowC)
            except BaseException as ex:
                self.handle_exception(ex)
        await self.setup_footer()

    def configure_settings(self):
        """
        configure settings
        """
        self.addLanguageSelect()
        self.addWikiUserSelect()

    async def home(self, _client: Client):
        """
        provide the main content page

        """
        self.setup_menu()
        with ui.element("div").classes("w-full h-full"):
            with ui.splitter() as splitter:
                with splitter.before:
                    self.add_select("markup", self.markup_names).bind_value(
                        self, "markup_name"
                    )
                    self.searchTerms = ui.textarea(placeholder="enter search terms")
                    self.searchButton = ui.button(
                        "search", on_click=self.onSearchButton
                    )
                with splitter.after:
                    self.markup = ui.html()
            self.messages = ui.html()
            self.results = ui.html()
        await self.setup_footer()

smw

Created on 22.11.2022

@author: wf

SemWiki

access to Semantic mediawiki

Source code in skg/smw.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class SemWiki:
    """
    access to Semantic mediawiki
    """

    def __init__(self, wikiUser: WikiUser, withLogin: bool = None):
        """

        constructor

        Args:
            wikiUser:WikiUser
        """
        self.wikiUser = wikiUser
        self.wikiClient = WikiClient.ofWikiId(wikiUser.wikiId)
        if withLogin is None:
            withLogin = self.wikiClient.needsLogin()
        if withLogin:
            self.wikiClient.login()
        self.smw = SMWClient(self.wikiClient.getSite())

    def id_refs(
        self,
        mainlabel="pageTitle",
        condition="DOI::+",
        title: str = "DOI references",
        askExtra: str = "",
        id_prop="DOI",
        id_name="doi",
    ) -> list:
        """
        get a list of id references from the given wiki

        Args:
            mainlabel(str): the mainlabel to use
            condition(str): the condition to apply
            title(str): the title of the query
        """
        ask = f"""{{{{#ask:[[{condition}]]{askExtra}
|?{id_prop}={id_name}
|mainlabel={mainlabel}
|?Creation_date=creationDate
|?Modification_date=modificationDate
|?Last_editor_is=lastEditor
}}}}
"""
        refs = self.smw.query(ask, title)
        return refs

    def papers(self):
        """
        get the paper records
        """
        askExtra = """\n|?Citation_text=reference"""
        paper_records = self.id_refs(
            condition="Citation_text::+", title="doi paper referencs", askExtra=askExtra
        )
        return paper_records

    def scholars(self):
        """
        get scholars
        """
        condition = "Concept:Scholar"
        mainlabel = "Scholar"
        askExtra = """|?Scholar wikiDataId = wikiDataId
|?Scholar name = name
|?Scholar firstName = firstName
|?Scholar description = description
|?Scholar homepage = homepage
|?Scholar orcid = orcid
|?Scholar dblpId = dblpId
|?Scholar linkedInId = linkedInId
|?Scholar researchGate = researchGate
|?Scholar gndId = gndId
|?Scholar smartCRMId = smartCRMId
|sort=Scholar name,Scholar firstName
|order=ascending,ascending
"""
        scholars = self.id_refs(
            mainlabel,
            condition,
            "scholars",
            askExtra,
            "Scholar wikiDataId",
            "wikiDataId",
        )
        return scholars

    @classmethod
    def asMarkup(self, scholar) -> str:
        """
        return the markup for the given scholar

        Args:
            scholar(Node): the scholar
        Returns:
            str: the semantic mediawiki markup
        """
        markup = "{{Scholar"

        for prop_name, prop in scholar.concept.props.items():
            if prop.hasmap("smw"):
                smw_prop = prop.getmap("smw")
                if hasattr(scholar, prop_name):
                    value = getattr(scholar, prop_name)
                    # @TODO refactor
                    qid = Wikidata.getQid(value)
                    if value != qid:
                        # potential lookup need
                        if prop_name != "wikiDataId":
                            value = Wikidata.getLabelForQid(qid)
                        else:
                            value = qid
                    markup += f"\n|{smw_prop}={value}"
        markup += "\n}}"
        return markup

__init__(wikiUser, withLogin=None)

constructor

Parameters:

Name Type Description Default
wikiUser WikiUser

WikiUser

required
Source code in skg/smw.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, wikiUser: WikiUser, withLogin: bool = None):
    """

    constructor

    Args:
        wikiUser:WikiUser
    """
    self.wikiUser = wikiUser
    self.wikiClient = WikiClient.ofWikiId(wikiUser.wikiId)
    if withLogin is None:
        withLogin = self.wikiClient.needsLogin()
    if withLogin:
        self.wikiClient.login()
    self.smw = SMWClient(self.wikiClient.getSite())

asMarkup(scholar) classmethod

return the markup for the given scholar

Parameters:

Name Type Description Default
scholar(Node)

the scholar

required

Returns: str: the semantic mediawiki markup

Source code in skg/smw.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@classmethod
def asMarkup(self, scholar) -> str:
    """
    return the markup for the given scholar

    Args:
        scholar(Node): the scholar
    Returns:
        str: the semantic mediawiki markup
    """
    markup = "{{Scholar"

    for prop_name, prop in scholar.concept.props.items():
        if prop.hasmap("smw"):
            smw_prop = prop.getmap("smw")
            if hasattr(scholar, prop_name):
                value = getattr(scholar, prop_name)
                # @TODO refactor
                qid = Wikidata.getQid(value)
                if value != qid:
                    # potential lookup need
                    if prop_name != "wikiDataId":
                        value = Wikidata.getLabelForQid(qid)
                    else:
                        value = qid
                markup += f"\n|{smw_prop}={value}"
    markup += "\n}}"
    return markup

id_refs(mainlabel='pageTitle', condition='DOI::+', title='DOI references', askExtra='', id_prop='DOI', id_name='doi')

get a list of id references from the given wiki

Parameters:

Name Type Description Default
mainlabel(str)

the mainlabel to use

required
condition(str)

the condition to apply

required
title(str)

the title of the query

required
Source code in skg/smw.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
    def id_refs(
        self,
        mainlabel="pageTitle",
        condition="DOI::+",
        title: str = "DOI references",
        askExtra: str = "",
        id_prop="DOI",
        id_name="doi",
    ) -> list:
        """
        get a list of id references from the given wiki

        Args:
            mainlabel(str): the mainlabel to use
            condition(str): the condition to apply
            title(str): the title of the query
        """
        ask = f"""{{{{#ask:[[{condition}]]{askExtra}
|?{id_prop}={id_name}
|mainlabel={mainlabel}
|?Creation_date=creationDate
|?Modification_date=modificationDate
|?Last_editor_is=lastEditor
}}}}
"""
        refs = self.smw.query(ask, title)
        return refs

papers()

get the paper records

Source code in skg/smw.py
63
64
65
66
67
68
69
70
71
def papers(self):
    """
    get the paper records
    """
    askExtra = """\n|?Citation_text=reference"""
    paper_records = self.id_refs(
        condition="Citation_text::+", title="doi paper referencs", askExtra=askExtra
    )
    return paper_records

scholars()

get scholars

Source code in skg/smw.py
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
    def scholars(self):
        """
        get scholars
        """
        condition = "Concept:Scholar"
        mainlabel = "Scholar"
        askExtra = """|?Scholar wikiDataId = wikiDataId
|?Scholar name = name
|?Scholar firstName = firstName
|?Scholar description = description
|?Scholar homepage = homepage
|?Scholar orcid = orcid
|?Scholar dblpId = dblpId
|?Scholar linkedInId = linkedInId
|?Scholar researchGate = researchGate
|?Scholar gndId = gndId
|?Scholar smartCRMId = smartCRMId
|sort=Scholar name,Scholar firstName
|order=ascending,ascending
"""
        scholars = self.id_refs(
            mainlabel,
            condition,
            "scholars",
            askExtra,
            "Scholar wikiDataId",
            "wikiDataId",
        )
        return scholars

sotsog

Created on 2022-11-16

@author: wf

SotSog

Standing on the shoulders of giants

Source code in skg/sotsog.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class SotSog:
    """
    Standing on the shoulders of giants
    """

    def __init__(self):
        """
        constructor

        """
        Node.debug = self.debug
        self.wikipedia_url = (
            "https://en.wikipedia.org/wiki/Standing_on_the_shoulders_of_giants"
        )
        self.skg_def = SKG_Def()
        self.scholar_concept = self.skg_def.concepts["Scholar"]

    def getMarkups(self, item, options: SearchOptions) -> dict:
        """
        get the markups for the given item and search options

        Args:
            item(Node): the item to get the markup for
            options(SearchOptions): the search options to apply
        """
        markups = {}
        do_markup = len(options.markup_names) > 0
        if do_markup:
            if item.concept.name == "Paper":
                doi = getattr(item, "doi", None)
                if doi is not None:
                    crossref = Crossref()
                    if "bibtex" in options.markup_names:
                        bibentry = crossref.doiBibEntry([doi])
                        markups["bibtex"] = bibentry
                    if "scite" in options.markup_names:
                        # meta_data=crossref.doiMetaData([doi])
                        # scite_entry=crossref.asScite(meta_data)
                        if not hasattr(item, "doi_obj"):
                            item.fromDOI(doi)
                        scite_entry = item.doi_obj.asScite()
                        markups["scite"] = scite_entry
            if item.concept.name == "Scholar":
                if "smw" in options.markup_names:
                    markups["smw"] = SemWiki.asMarkup(item)
        return markups

    def wd_search(self, wd: Wikidata, search_term: str, options) -> list:
        """
        do a wikidata search
        """
        items = []
        wds = WikidataSearch(language=options.lang, debug=self.debug)
        search_options = wds.searchOptions(search_term, limit=options.limit)
        qids = []
        for qid, itemLabel, desc in search_options:
            qids.append(qid)
        class_map = wd.getClassQids(qids)
        for qid, itemLabel, desc in search_options:
            if qid in class_map:
                class_rows = class_map[qid]
                for class_row in class_rows:
                    class_qid = class_row["class_qid"]
                    concept = self.skg_def.conceptForQid(class_qid)
                    if concept is not None:
                        wd_items = concept.cls.from_wikidata_via_id(
                            concept, "wikiDataId", qid, lang=options.lang
                        )
                        if len(wd_items) > 0:
                            item = wd_items[0]
                            items.append(item)
                            self.handleItem(item, qid, itemLabel, desc, options)
        return items

    def handleItem(self, item, item_id, itemLabel, desc, options):
        """
        handle the given item as a search result
        """
        if options.show:
            print(f"{itemLabel}({item_id}):{desc}✅")
            print(item)
        item.markups = self.getMarkups(item, options)
        if options.show:
            for markup_name, markup in item.markups.items():
                print(f"{markup_name} markup:")
                print(markup)
            pass
        if options.open_browser:
            browser_url = item.browser_url()
            if browser_url is not None:
                print(f"opening {browser_url} in browser")
                webbrowser.open(browser_url)

    def handleItems(self, items, options):
        """
        handle the given items
        """
        for item in items:
            item_id = item.wikiDataId
            itemLabel = item.label
            desc = "?"
            self.handleItem(item, item_id, itemLabel, desc, options)

    def handleDoiItem(self, item, options: SearchOptions):
        item_id = item.doi
        itemLabel = item.title
        desc = item.title
        self.handleItem(item, item_id, itemLabel, desc, options)

    def search(self, search_list, options: SearchOptions) -> SearchResult:
        """
        search with the given search list

        Args:
            search_list(list): a list of search terms
            options(SearchOptions): the search options to apply
        """
        search_result = SearchResult(search_list, options)
        search_term = " ".join(search_list)
        for prefix in ["https://doi.org"]:
            if search_term.startswith(prefix):
                search_term = search_term.replace(prefix, "")
        wd = Wikidata(debug=self.debug)
        if ORCID.isORCID(search_term):
            scholar_concept = self.skg_def.concepts["Scholar"]
            items = Node.from_wikidata_via_id(
                scholar_concept, "orcid", search_term, options.lang
            )
            self.handleItems(items, options)
        elif DOI.isDOI(search_term):
            # DOI may not be referencing paper but something else
            paper_concept = self.skg_def.concepts["Paper"]
            items = Paper.from_wikidata_via_id(
                paper_concept, "doi", search_term, options.lang
            )
            self.handleItems(items, options)
            dblp_items = Paper.from_dblp_via_id(
                paper_concept, "doi", search_term.lower()
            )
            if len(dblp_items) == 0:
                paper = Paper()
                paper.concept = paper_concept
                paper.fromDOI(search_term)
                paper.provenance = "doi"
                dblp_items = [paper]
            for item in dblp_items:
                self.handleDoiItem(item, options)
            items.extend(dblp_items)
        else:
            items = self.wd_search(wd, search_term, options)
        search_result.items = items
        return search_result

__init__()

constructor

Source code in skg/sotsog.py
27
28
29
30
31
32
33
34
35
36
37
def __init__(self):
    """
    constructor

    """
    Node.debug = self.debug
    self.wikipedia_url = (
        "https://en.wikipedia.org/wiki/Standing_on_the_shoulders_of_giants"
    )
    self.skg_def = SKG_Def()
    self.scholar_concept = self.skg_def.concepts["Scholar"]

getMarkups(item, options)

get the markups for the given item and search options

Parameters:

Name Type Description Default
item(Node)

the item to get the markup for

required
options(SearchOptions)

the search options to apply

required
Source code in skg/sotsog.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
def getMarkups(self, item, options: SearchOptions) -> dict:
    """
    get the markups for the given item and search options

    Args:
        item(Node): the item to get the markup for
        options(SearchOptions): the search options to apply
    """
    markups = {}
    do_markup = len(options.markup_names) > 0
    if do_markup:
        if item.concept.name == "Paper":
            doi = getattr(item, "doi", None)
            if doi is not None:
                crossref = Crossref()
                if "bibtex" in options.markup_names:
                    bibentry = crossref.doiBibEntry([doi])
                    markups["bibtex"] = bibentry
                if "scite" in options.markup_names:
                    # meta_data=crossref.doiMetaData([doi])
                    # scite_entry=crossref.asScite(meta_data)
                    if not hasattr(item, "doi_obj"):
                        item.fromDOI(doi)
                    scite_entry = item.doi_obj.asScite()
                    markups["scite"] = scite_entry
        if item.concept.name == "Scholar":
            if "smw" in options.markup_names:
                markups["smw"] = SemWiki.asMarkup(item)
    return markups

handleItem(item, item_id, itemLabel, desc, options)

handle the given item as a search result

Source code in skg/sotsog.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
def handleItem(self, item, item_id, itemLabel, desc, options):
    """
    handle the given item as a search result
    """
    if options.show:
        print(f"{itemLabel}({item_id}):{desc}✅")
        print(item)
    item.markups = self.getMarkups(item, options)
    if options.show:
        for markup_name, markup in item.markups.items():
            print(f"{markup_name} markup:")
            print(markup)
        pass
    if options.open_browser:
        browser_url = item.browser_url()
        if browser_url is not None:
            print(f"opening {browser_url} in browser")
            webbrowser.open(browser_url)

handleItems(items, options)

handle the given items

Source code in skg/sotsog.py
115
116
117
118
119
120
121
122
123
def handleItems(self, items, options):
    """
    handle the given items
    """
    for item in items:
        item_id = item.wikiDataId
        itemLabel = item.label
        desc = "?"
        self.handleItem(item, item_id, itemLabel, desc, options)

search(search_list, options)

search with the given search list

Parameters:

Name Type Description Default
search_list(list)

a list of search terms

required
options(SearchOptions)

the search options to apply

required
Source code in skg/sotsog.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
def search(self, search_list, options: SearchOptions) -> SearchResult:
    """
    search with the given search list

    Args:
        search_list(list): a list of search terms
        options(SearchOptions): the search options to apply
    """
    search_result = SearchResult(search_list, options)
    search_term = " ".join(search_list)
    for prefix in ["https://doi.org"]:
        if search_term.startswith(prefix):
            search_term = search_term.replace(prefix, "")
    wd = Wikidata(debug=self.debug)
    if ORCID.isORCID(search_term):
        scholar_concept = self.skg_def.concepts["Scholar"]
        items = Node.from_wikidata_via_id(
            scholar_concept, "orcid", search_term, options.lang
        )
        self.handleItems(items, options)
    elif DOI.isDOI(search_term):
        # DOI may not be referencing paper but something else
        paper_concept = self.skg_def.concepts["Paper"]
        items = Paper.from_wikidata_via_id(
            paper_concept, "doi", search_term, options.lang
        )
        self.handleItems(items, options)
        dblp_items = Paper.from_dblp_via_id(
            paper_concept, "doi", search_term.lower()
        )
        if len(dblp_items) == 0:
            paper = Paper()
            paper.concept = paper_concept
            paper.fromDOI(search_term)
            paper.provenance = "doi"
            dblp_items = [paper]
        for item in dblp_items:
            self.handleDoiItem(item, options)
        items.extend(dblp_items)
    else:
        items = self.wd_search(wd, search_term, options)
    search_result.items = items
    return search_result

do a wikidata search

Source code in skg/sotsog.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def wd_search(self, wd: Wikidata, search_term: str, options) -> list:
    """
    do a wikidata search
    """
    items = []
    wds = WikidataSearch(language=options.lang, debug=self.debug)
    search_options = wds.searchOptions(search_term, limit=options.limit)
    qids = []
    for qid, itemLabel, desc in search_options:
        qids.append(qid)
    class_map = wd.getClassQids(qids)
    for qid, itemLabel, desc in search_options:
        if qid in class_map:
            class_rows = class_map[qid]
            for class_row in class_rows:
                class_qid = class_row["class_qid"]
                concept = self.skg_def.conceptForQid(class_qid)
                if concept is not None:
                    wd_items = concept.cls.from_wikidata_via_id(
                        concept, "wikiDataId", qid, lang=options.lang
                    )
                    if len(wd_items) > 0:
                        item = wd_items[0]
                        items.append(item)
                        self.handleItem(item, qid, itemLabel, desc, options)
    return items

sotsog_cmd

Created on 2024-02-26

@author: wf

SotSogCmd

Bases: WebserverCmd

command line handling for Standing on the Shoulders of Giants

Source code in skg/sotsog_cmd.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
class SotSogCmd(WebserverCmd):
    """
    command line handling for Standing on the Shoulders of Giants
    """

    def __init__(self):
        """
        constructor
        """
        self.config = SkgBrowser.get_config()
        WebserverCmd.__init__(self, self.config, SkgBrowser, DEBUG)

    def getArgParser(self, description: str, version_msg) -> ArgumentParser:
        """
        override the default argparser call
        """
        parser = super().getArgParser(description, version_msg)
        parser.add_argument("search", action="store", nargs="*", help="search terms")
        parser.add_argument(
            "--bibtex", help="output bibtex format", action="store_true"
        )
        parser.add_argument("-la", "--lang", help="language code to use", default="en")
        parser.add_argument(
            "-li",
            "--limit",
            help="limit the number of search results",
            type=int,
            default=9,
        )
        parser.add_argument(
            "-nb", "--nobrowser", help="do not open browser", action="store_true"
        )
        parser.add_argument("--scite", help="output #scite format", action="store_true")
        parser.add_argument(
            "--smw", help="output Semantic MediaWiki (SMW) format", action="store_true"
        )
        parser.add_argument(
            "--wikiId", help="the id of the SMW wiki to connect with", default="ceur-ws"
        )
        parser.add_argument(
            "-dw",
            "--dblp2wikidata",
            action="store_true",
            help="Synchronize DBLP entries with Wikidata",
        )

        return parser

    def handle_args(self) -> bool:
        """
        handle the command line args
        """
        markup_names = []
        args = self.args
        self.sotsog = SotSog(debug=args.debug)
        if args.bibtex:
            markup_names.append("bibtex")
        if args.scite:
            markup_names.append("scite")
        if args.smw:
            markup_names.append("smw")
        self.sotsog.options = SearchOptions(
            limit=args.limit,
            lang=args.lang,
            markup_names=markup_names,
            open_browser=not args.nobrowser,
        )
        handled = super().handle_args()
        if not handled:
            if args.dblp2wikidata:
                d2w = Dblp2Wikidata()
                handled = d2w.transfer(args)
        if not handled:
            self.search(args.search, self.sotsog.options)
            handled = True
        return handled

__init__()

constructor

Source code in skg/sotsog_cmd.py
22
23
24
25
26
27
def __init__(self):
    """
    constructor
    """
    self.config = SkgBrowser.get_config()
    WebserverCmd.__init__(self, self.config, SkgBrowser, DEBUG)

getArgParser(description, version_msg)

override the default argparser call

Source code in skg/sotsog_cmd.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def getArgParser(self, description: str, version_msg) -> ArgumentParser:
    """
    override the default argparser call
    """
    parser = super().getArgParser(description, version_msg)
    parser.add_argument("search", action="store", nargs="*", help="search terms")
    parser.add_argument(
        "--bibtex", help="output bibtex format", action="store_true"
    )
    parser.add_argument("-la", "--lang", help="language code to use", default="en")
    parser.add_argument(
        "-li",
        "--limit",
        help="limit the number of search results",
        type=int,
        default=9,
    )
    parser.add_argument(
        "-nb", "--nobrowser", help="do not open browser", action="store_true"
    )
    parser.add_argument("--scite", help="output #scite format", action="store_true")
    parser.add_argument(
        "--smw", help="output Semantic MediaWiki (SMW) format", action="store_true"
    )
    parser.add_argument(
        "--wikiId", help="the id of the SMW wiki to connect with", default="ceur-ws"
    )
    parser.add_argument(
        "-dw",
        "--dblp2wikidata",
        action="store_true",
        help="Synchronize DBLP entries with Wikidata",
    )

    return parser

handle_args()

handle the command line args

Source code in skg/sotsog_cmd.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def handle_args(self) -> bool:
    """
    handle the command line args
    """
    markup_names = []
    args = self.args
    self.sotsog = SotSog(debug=args.debug)
    if args.bibtex:
        markup_names.append("bibtex")
    if args.scite:
        markup_names.append("scite")
    if args.smw:
        markup_names.append("smw")
    self.sotsog.options = SearchOptions(
        limit=args.limit,
        lang=args.lang,
        markup_names=markup_names,
        open_browser=not args.nobrowser,
    )
    handled = super().handle_args()
    if not handled:
        if args.dblp2wikidata:
            d2w = Dblp2Wikidata()
            handled = d2w.transfer(args)
    if not handled:
        self.search(args.search, self.sotsog.options)
        handled = True
    return handled

main(argv=None)

main call

Source code in skg/sotsog_cmd.py
 95
 96
 97
 98
 99
100
101
def main(argv: list = None):
    """
    main call
    """
    cmd = SotSogCmd()
    exit_code = cmd.cmd_main(argv)
    return exit_code

version

Created on 2022-04-01

@author: wf

Version

Bases: object

Version handling for pysotsog

Source code in skg/version.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class Version(object):
    """
    Version handling for pysotsog
    """

    name = "pysotsog"
    description = (
        "sotsog: Standing on the shoulders of giants - with direct access to the clouds"
    )
    version = skg.__version__
    date = "2022-11-16"
    updated = "2023-10-28"
    authors = "Wolfgang Fahl"
    doc_url = "https://wiki.bitplan.com/index.php/Pysotsog"
    chat_url = "https://github.com/WolfgangFahl/pysotsog/discussions"
    cm_url = "https://github.com/WolfgangFahl/pysotsog"
    license = f"""Copyright 2022 contributors. All rights reserved.
  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0
  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied."""
    longDescription = f"""{name} version {version}
{description}
  Created by {authors} on {date} last updated {updated}"""

wdsearch

Created on 24.07.2022

@author: wf

WikidataSearch

Bases: object

Wikidata Search

Source code in skg/wdsearch.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
class WikidataSearch(object):
    """
    Wikidata Search
    """

    def __init__(self, language="en", timeout=2.0, debug: bool = False):
        """
        Constructor

        Args:
            language(str): the language to use e.g. en/fr
            timeout(float): maximum time to wait for result
            debug(bool): if True debug details should be shown
        """
        self.language = language
        self.timeout = timeout
        self.debug = debug

    def searchOptions(self, searchFor: str, limit: int = 9) -> list:
        """
        search and return a list of qid,itemLabel description tuples

        Args:
            searchFor(str): the string to search for
            limit(int): the maximum amount of results to search for
        """
        options = []
        srlist = self.search(searchFor, limit)
        if srlist is not None:
            for sr in srlist:
                qid = sr["id"]
                itemLabel = sr["label"]
                desc = ""
                if "display" in sr:
                    display = sr["display"]
                    if "description" in display:
                        desc = display["description"]["value"]
                options.append(
                    (
                        qid,
                        itemLabel,
                        desc,
                    )
                )
        return options

    def search(self, searchFor: str, limit: int = 9):
        """

        Args:
            searchFor(str): the string to search for
            limit(int): the maximum amount of results to search for
        """
        try:
            apiurl = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&language={self.language}&format=json&limit={limit}&search="
            if self.debug:
                print(apiurl)
            searchEncoded = urllib.parse.quote_plus(searchFor)
            apisearch = apiurl + searchEncoded
            with urllib.request.urlopen(apisearch, timeout=self.timeout) as url:
                searchResult = json.loads(url.read().decode())
            return searchResult["search"]
        except Exception as _error:
            return None

__init__(language='en', timeout=2.0, debug=False)

Constructor

Parameters:

Name Type Description Default
language(str)

the language to use e.g. en/fr

required
timeout(float)

maximum time to wait for result

required
debug(bool)

if True debug details should be shown

required
Source code in skg/wdsearch.py
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, language="en", timeout=2.0, debug: bool = False):
    """
    Constructor

    Args:
        language(str): the language to use e.g. en/fr
        timeout(float): maximum time to wait for result
        debug(bool): if True debug details should be shown
    """
    self.language = language
    self.timeout = timeout
    self.debug = debug

search(searchFor, limit=9)

Parameters:

Name Type Description Default
searchFor(str)

the string to search for

required
limit(int)

the maximum amount of results to search for

required
Source code in skg/wdsearch.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def search(self, searchFor: str, limit: int = 9):
    """

    Args:
        searchFor(str): the string to search for
        limit(int): the maximum amount of results to search for
    """
    try:
        apiurl = f"https://www.wikidata.org/w/api.php?action=wbsearchentities&language={self.language}&format=json&limit={limit}&search="
        if self.debug:
            print(apiurl)
        searchEncoded = urllib.parse.quote_plus(searchFor)
        apisearch = apiurl + searchEncoded
        with urllib.request.urlopen(apisearch, timeout=self.timeout) as url:
            searchResult = json.loads(url.read().decode())
        return searchResult["search"]
    except Exception as _error:
        return None

searchOptions(searchFor, limit=9)

search and return a list of qid,itemLabel description tuples

Parameters:

Name Type Description Default
searchFor(str)

the string to search for

required
limit(int)

the maximum amount of results to search for

required
Source code in skg/wdsearch.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def searchOptions(self, searchFor: str, limit: int = 9) -> list:
    """
    search and return a list of qid,itemLabel description tuples

    Args:
        searchFor(str): the string to search for
        limit(int): the maximum amount of results to search for
    """
    options = []
    srlist = self.search(searchFor, limit)
    if srlist is not None:
        for sr in srlist:
            qid = sr["id"]
            itemLabel = sr["label"]
            desc = ""
            if "display" in sr:
                display = sr["display"]
                if "description" in display:
                    desc = display["description"]["value"]
            options.append(
                (
                    qid,
                    itemLabel,
                    desc,
                )
            )
    return options

wikidata

Created on 2022-11-16

@author: wf

Wikidata

Wikidata access wrapper

Source code in skg/wikidata.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
class Wikidata:
    """
    Wikidata access wrapper
    """

    instance = None

    def __init__(
        self, endpoint: str = "https://query.wikidata.org/sparql", debug: bool = False
    ):
        """
        constructor
        """
        self.endpoint = endpoint
        self.sparql = SPARQL(endpoint)
        self.debug = debug
        Wikidata.instance = self

    @classmethod
    def getInstance(cls):
        if cls.instance is None:
            Wikidata()
        return cls.instance

    @classmethod
    def getQid(self, wd_url: str):
        qid = wd_url.replace("http://www.wikidata.org/entity/", "")
        return qid

    @classmethod
    def getLabelForQid(self, qid: str, lang: str = "en") -> str:
        """
        get a label for the given Wikidata QID

        Args:
            qid(str): the Wikidata ID
            lang(str): the language
        """
        sparql_query = f"""SELECT ?itemLabel WHERE {{
  VALUES ?item {{
    wd:{qid}
  }}
  ?item rdfs:label ?itemLabel.
  FILTER(LANG(?itemLabel)="{lang}").
}}"""
        wd = Wikidata.getInstance()
        lod = wd.sparql.queryAsListOfDicts(sparql_query)
        label = None
        if len(lod) == 1:
            label = lod[0]["itemLabel"]
        return label

    def getClassQids(self, qids: list) -> dict:
        """
        get the Wikidata Q-Identifiers
        for the given wikidata ids

        Args:
            qids(list): the list of id
        """
        sparql_query = f"""# get the instanceof values for a given entity
SELECT ?item ?itemLabel ?qid ?class_qid ?class ?classLabel
WHERE 
{{
  VALUES ?item {{
"""
        for qid in qids:
            if not qid.startswith("http:"):
                wd_url = f"http://www.wikidata.org/entity/{qid}"
            else:
                wd_url = qid
            sparql_query += f"    <{wd_url}>\n"
        sparql_query += f"""}}
  ?item wdt:P31/wdt:P279* ?class.
  ?item rdfs:label ?itemLabel
  FILTER(LANG(?itemLabel)="en")
  ?class rdfs:label ?classLabel
  FILTER(LANG(?classLabel)="en")
  BIND(REPLACE(STR(?class),"http://www.wikidata.org/entity/","") AS ?class_qid)
  BIND(REPLACE(STR(?item),"http://www.wikidata.org/entity/","") AS ?qid)
}}"""
        if self.debug:
            print(sparql_query)
        class_rows = self.sparql.queryAsListOfDicts(sparql_query)
        class_map = LOD.getLookup(class_rows, "qid", withDuplicates=True)
        return class_map

__init__(endpoint='https://query.wikidata.org/sparql', debug=False)

constructor

Source code in skg/wikidata.py
18
19
20
21
22
23
24
25
26
27
def __init__(
    self, endpoint: str = "https://query.wikidata.org/sparql", debug: bool = False
):
    """
    constructor
    """
    self.endpoint = endpoint
    self.sparql = SPARQL(endpoint)
    self.debug = debug
    Wikidata.instance = self

getClassQids(qids)

get the Wikidata Q-Identifiers for the given wikidata ids

Parameters:

Name Type Description Default
qids(list)

the list of id

required
Source code in skg/wikidata.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    def getClassQids(self, qids: list) -> dict:
        """
        get the Wikidata Q-Identifiers
        for the given wikidata ids

        Args:
            qids(list): the list of id
        """
        sparql_query = f"""# get the instanceof values for a given entity
SELECT ?item ?itemLabel ?qid ?class_qid ?class ?classLabel
WHERE 
{{
  VALUES ?item {{
"""
        for qid in qids:
            if not qid.startswith("http:"):
                wd_url = f"http://www.wikidata.org/entity/{qid}"
            else:
                wd_url = qid
            sparql_query += f"    <{wd_url}>\n"
        sparql_query += f"""}}
  ?item wdt:P31/wdt:P279* ?class.
  ?item rdfs:label ?itemLabel
  FILTER(LANG(?itemLabel)="en")
  ?class rdfs:label ?classLabel
  FILTER(LANG(?classLabel)="en")
  BIND(REPLACE(STR(?class),"http://www.wikidata.org/entity/","") AS ?class_qid)
  BIND(REPLACE(STR(?item),"http://www.wikidata.org/entity/","") AS ?qid)
}}"""
        if self.debug:
            print(sparql_query)
        class_rows = self.sparql.queryAsListOfDicts(sparql_query)
        class_map = LOD.getLookup(class_rows, "qid", withDuplicates=True)
        return class_map

getLabelForQid(qid, lang='en') classmethod

get a label for the given Wikidata QID

Parameters:

Name Type Description Default
qid(str)

the Wikidata ID

required
lang(str)

the language

required
Source code in skg/wikidata.py
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
    @classmethod
    def getLabelForQid(self, qid: str, lang: str = "en") -> str:
        """
        get a label for the given Wikidata QID

        Args:
            qid(str): the Wikidata ID
            lang(str): the language
        """
        sparql_query = f"""SELECT ?itemLabel WHERE {{
  VALUES ?item {{
    wd:{qid}
  }}
  ?item rdfs:label ?itemLabel.
  FILTER(LANG(?itemLabel)="{lang}").
}}"""
        wd = Wikidata.getInstance()
        lod = wd.sparql.queryAsListOfDicts(sparql_query)
        label = None
        if len(lod) == 1:
            label = lod[0]["itemLabel"]
        return label