pysotsog API Documentation

`citeproc`

Created on 2022-12-21

@author: wf

`Citeproc`

see https://en.wikipedia.org/wiki/CiteProc

Source code in skg/citeproc.py

class Citeproc:
    """
    see https://en.wikipedia.org/wiki/CiteProc
    """

    @classmethod
    def asScite(cls, meta_data: dict, retrieved_from: str) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
        Args:
            meta_data(dict): the citeproc compatible metadata dict to convert
            retrieved_from(str): the url the metadata was retrieved from

        Returns:
            str: Semantic Mediawiki markup
        """

        def unlist(value):
            if type(value) != list:
                return value
            text = ""
            delim = ""
            for item in value:
                text += f"{delim}{item}"
                delim = ";"
            if len(value) > 1:
                text += "|+sep=;"
            return text

        def firstValue(value):
            if type(value) != list:
                return value
            else:
                return value[0]

        def get_author(value) -> str:
            """
            get the author markup

            Args:
                value(list): the list to disassemble

            Returns:
                str: Mediawiki markup
            """
            author = ""
            delim = ""
            for arec in value:
                if "given" in arec and "family" in arec:
                    author += f"""{delim}{arec["given"]} {arec["family"]}"""
                    delim = ";"
                elif "family" in arec:
                    author += f"""{delim}{arec["family"]}"""
                    delim = ";"
                else:
                    # incomplete author record ignored
                    pass
            return author

        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        ref_type = "journal-article"
        title = meta_data["title"]
        if type(title) is list:
            title = title[0]
        title_2 = title.lower()[:2]
        author_lower = ""
        if "author" in meta_data:
            first_author = firstValue(meta_data["author"])
            if "family" in first_author:
                family = firstValue(first_author["family"])
                author_lower = family.lower()
            else:
                # debug break point
                pass
        year = ""
        if "published-print" in meta_data:
            year = meta_data["published-print"]["date-parts"][0][0]
        if not year and "issued" in meta_data:
            year = meta_data["issued"]["date-parts"][0][0]
        reference = f"{author_lower}{year}{title_2}"
        markup = ""
        for skey, mkey, func in [
            ("title", "title", unlist),
            ("subtitle", "subtitle", unlist),
            ("authors", "author", get_author),
            ("journal", "container-title", unlist),
            ("publisher", "publisher", str),
            ("issn", "ISSN", unlist),
            ("subject", "subject", unlist),
            ("volume", "volume", str),
            ("pages", "page", str),
            ("doi", "DOI", str),
        ]:
            if mkey in meta_data:
                value = meta_data[mkey]
                if value:
                    value = func(value)
                    markup += f"\n|{skey}={value}"
        markup = f"""{{{{#scite:
|reference={reference}
|type={ref_type}{markup}
|year={year}
|retrieved-from={retrieved_from}
|retrieved-on={timestamp}
}}}}"""
        full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}"
        return full_markup

`asScite(meta_data, retrieved_from)` `classmethod`

convert the given meta data to #Scite format

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php Args: meta_data(dict): the citeproc compatible metadata dict to convert retrieved_from(str): the url the metadata was retrieved from

Returns:

Name	Type	Description
`str`	`str`	Semantic Mediawiki markup

Source code in skg/citeproc.py

    @classmethod
    def asScite(cls, meta_data: dict, retrieved_from: str) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php
        Args:
            meta_data(dict): the citeproc compatible metadata dict to convert
            retrieved_from(str): the url the metadata was retrieved from

        Returns:
            str: Semantic Mediawiki markup
        """

        def unlist(value):
            if type(value) != list:
                return value
            text = ""
            delim = ""
            for item in value:
                text += f"{delim}{item}"
                delim = ";"
            if len(value) > 1:
                text += "|+sep=;"
            return text

        def firstValue(value):
            if type(value) != list:
                return value
            else:
                return value[0]

        def get_author(value) -> str:
            """
            get the author markup

            Args:
                value(list): the list to disassemble

            Returns:
                str: Mediawiki markup
            """
            author = ""
            delim = ""
            for arec in value:
                if "given" in arec and "family" in arec:
                    author += f"""{delim}{arec["given"]} {arec["family"]}"""
                    delim = ";"
                elif "family" in arec:
                    author += f"""{delim}{arec["family"]}"""
                    delim = ";"
                else:
                    # incomplete author record ignored
                    pass
            return author

        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        ref_type = "journal-article"
        title = meta_data["title"]
        if type(title) is list:
            title = title[0]
        title_2 = title.lower()[:2]
        author_lower = ""
        if "author" in meta_data:
            first_author = firstValue(meta_data["author"])
            if "family" in first_author:
                family = firstValue(first_author["family"])
                author_lower = family.lower()
            else:
                # debug break point
                pass
        year = ""
        if "published-print" in meta_data:
            year = meta_data["published-print"]["date-parts"][0][0]
        if not year and "issued" in meta_data:
            year = meta_data["issued"]["date-parts"][0][0]
        reference = f"{author_lower}{year}{title_2}"
        markup = ""
        for skey, mkey, func in [
            ("title", "title", unlist),
            ("subtitle", "subtitle", unlist),
            ("authors", "author", get_author),
            ("journal", "container-title", unlist),
            ("publisher", "publisher", str),
            ("issn", "ISSN", unlist),
            ("subject", "subject", unlist),
            ("volume", "volume", str),
            ("pages", "page", str),
            ("doi", "DOI", str),
        ]:
            if mkey in meta_data:
                value = meta_data[mkey]
                if value:
                    value = func(value)
                    markup += f"\n|{skey}={value}"
        markup = f"""{{{{#scite:
|reference={reference}
|type={ref_type}{markup}
|year={year}
|retrieved-from={retrieved_from}
|retrieved-on={timestamp}
}}}}"""
        full_markup = f"{title}\n[[CiteRef::{reference}]]\n{markup}"
        return full_markup

`crossref`

Created on 17.11.2022

@author: wf

`Crossref`

Crossref access

Source code in skg/crossref.py

class Crossref:
    """
    Crossref access
    """

    def __init__(self, mailto=None, ua_string=None):
        """
        constructor
        """
        if mailto is None:
            mailto = "wf@bitplan.com"
        if ua_string is None:
            ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
        # self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
        self.cr = habanero.Crossref(ua_string="")

    def doiMetaData(self, dois: list):
        """
        get the meta data for the given dois

        Args:
            doi(list): a list of dois
        """
        metadata = None
        response = self.cr.works(ids=dois)
        if (
            "status" in response
            and "message" in response
            and response["status"] == "ok"
        ):
            metadata = response["message"]
        return metadata

    def doiBibEntry(self, dois: list):
        """
        get bib entries for the given dois
        """
        bibentry = cn.content_negotiation(ids=dois, format="bibentry")
        return bibentry

    def asScite(self, meta_data: dict) -> str:
        """
        convert the given meta data to #Scite format

        see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

        Returns:
            str: Semantic Mediawiki markup
        """
        markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url)
        return markup

`init(mailto=None, ua_string=None)`

constructor

Source code in skg/crossref.py

def __init__(self, mailto=None, ua_string=None):
    """
    constructor
    """
    if mailto is None:
        mailto = "wf@bitplan.com"
    if ua_string is None:
        ua_string = f"pysotsog/{skg.__version__} (https://pypi.org/project/pysotsog/; mailto:{mailto})"
    # self.cr = habanero.Crossref(mailto=mailto,ua_string=ua_string)
    self.cr = habanero.Crossref(ua_string="")

`asScite(meta_data)`

convert the given meta data to #Scite format

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

Returns:

Name	Type	Description
`str`	`str`	Semantic Mediawiki markup

Source code in skg/crossref.py

def asScite(self, meta_data: dict) -> str:
    """
    convert the given meta data to #Scite format

    see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

    Returns:
        str: Semantic Mediawiki markup
    """
    markup = Citeproc.asScite(meta_data, retrieved_from=self.cr.base_url)
    return markup

`doiBibEntry(dois)`

get bib entries for the given dois

Source code in skg/crossref.py

def doiBibEntry(self, dois: list):
    """
    get bib entries for the given dois
    """
    bibentry = cn.content_negotiation(ids=dois, format="bibentry")
    return bibentry

`doiMetaData(dois)`

get the meta data for the given dois

Parameters:

Name	Type	Description	Default
`doi(list)`		a list of dois	required

Source code in skg/crossref.py

def doiMetaData(self, dois: list):
    """
    get the meta data for the given dois

    Args:
        doi(list): a list of dois
    """
    metadata = None
    response = self.cr.works(ids=dois)
    if (
        "status" in response
        and "message" in response
        and response["status"] == "ok"
    ):
        metadata = response["message"]
    return metadata

`dblp`

Created on 2022-11-17

@author: wf

`Dblp`

Schloss Dagstuhl Dblp computer science bibliography

Source code in skg/dblp.py

class Dblp:
    """
    Schloss Dagstuhl Dblp computer science bibliography
    """

    def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"):
        """
        constructor

        Args:
            endpoint(str): the endpoint to use
        """
        self.endpoint = endpoint
        self.schema = Owl(
            "dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19"
        )
        self.sparql = SPARQL(self.endpoint)

    def get_paper_records(
        self,
        regex: str,
        prop_name: str = "title",
        limit: int = 100,
        debug: bool = False,
    ) -> list:
        """
        get papers fitting the given regex

        Args:
            prop_name(str): the property to filter
            regex(str): the regex to filter for
            limit(int): the maximum number of records to return
            debug(bool): if True show debug information

        Returns:
            list: a list of dict of paper records
        """
        sparql_query = """PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT
  ?paper 
  ?year
  ?yearofevent
  #?month
  ?doi
  ?isbn
  ?title
  (GROUP_CONCAT(?author_o) as ?authors)
  ?publishedin
WHERE {
  ?paper dblp:title ?title .
  ?paper dblp:doi ?doi .
  OPTIONAL { ?paper dblp:yearOfEvent ?yearofevent } .
  OPTIONAL { ?paper dblp:isbn ?isbn }.
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin .
  ?paper dblp:yearOfPublication ?year.
  OPTIONAL { ?paper dblp:monthOfPublication ?month}.
"""
        sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n"""
        sparql_query += f"""
}}
GROUP BY 
  ?paper 
  ?title 
  ?doi 
  ?isbn
  ?year 
  ?yearofevent
  ?month 
  ?publishedin 
ORDER BY DESC(?year)
LIMIT {limit}"""
        if debug:
            print(sparql_query)
        records = self.sparql.queryAsListOfDicts(sparql_query)
        return records

    def get_random_papers(self, year: int = 2020, limit: int = 10):
        sparql_query = f"""PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?paper 
  (SAMPLE(?doi_o) as ?doi)
  (SAMPLE(?title_o) as ?title)
  (MIN(?year_o) as ?year)
  (GROUP_CONCAT(?author_o) as ?authors)
  (SAMPLE(?publishedin_o) as ?publishedin)
  (SAMPLE(?sortKey) as ?sortKey)
WHERE {{
  VALUES ?year_o {{ "{year}" }}
  ?paper dblp:title ?title_o .
  ?paper dblp:doi ?doi_o .
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin_o .
  ?paper dblp:yearOfPublication ?year_o.
  BIND(RAND() AS ?sortKey)
}}
GROUP BY ?paper
ORDER BY ?sortKey 
LIMIT {limit}
        """

`init(endpoint='https://qlever.cs.uni-freiburg.de/api/dblp')`

constructor

Parameters:

Name	Type	Description	Default
`endpoint(str)`		the endpoint to use	required

Source code in skg/dblp.py

def __init__(self, endpoint: str = "https://qlever.cs.uni-freiburg.de/api/dblp"):
    """
    constructor

    Args:
        endpoint(str): the endpoint to use
    """
    self.endpoint = endpoint
    self.schema = Owl(
        "dblp", "https://dblp.org/rdf/schema", "Wolfgang Fahl", "2022-11-19"
    )
    self.sparql = SPARQL(self.endpoint)

`get_paper_records(regex, prop_name='title', limit=100, debug=False)`

get papers fitting the given regex

Parameters:

Name	Description	Default
`prop_name(str)`	the property to filter	required
`regex(str)`	the regex to filter for	required
`limit(int)`	the maximum number of records to return	required
`debug(bool)`	if True show debug information	required

Returns:

Name	Type	Description
`list`	`list`	a list of dict of paper records

Source code in skg/dblp.py

    def get_paper_records(
        self,
        regex: str,
        prop_name: str = "title",
        limit: int = 100,
        debug: bool = False,
    ) -> list:
        """
        get papers fitting the given regex

        Args:
            prop_name(str): the property to filter
            regex(str): the regex to filter for
            limit(int): the maximum number of records to return
            debug(bool): if True show debug information

        Returns:
            list: a list of dict of paper records
        """
        sparql_query = """PREFIX dblp: <https://dblp.org/rdf/schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
SELECT
  ?paper 
  ?year
  ?yearofevent
  #?month
  ?doi
  ?isbn
  ?title
  (GROUP_CONCAT(?author_o) as ?authors)
  ?publishedin
WHERE {
  ?paper dblp:title ?title .
  ?paper dblp:doi ?doi .
  OPTIONAL { ?paper dblp:yearOfEvent ?yearofevent } .
  OPTIONAL { ?paper dblp:isbn ?isbn }.
  ?paper dblp:authoredBy ?author_o.
  ?paper dblp:publishedIn ?publishedin .
  ?paper dblp:yearOfPublication ?year.
  OPTIONAL { ?paper dblp:monthOfPublication ?month}.
"""
        sparql_query += f"""FILTER regex(?{prop_name}, "{regex}").\n"""
        sparql_query += f"""
}}
GROUP BY 
  ?paper 
  ?title 
  ?doi 
  ?isbn
  ?year 
  ?yearofevent
  ?month 
  ?publishedin 
ORDER BY DESC(?year)
LIMIT {limit}"""
        if debug:
            print(sparql_query)
        records = self.sparql.queryAsListOfDicts(sparql_query)
        return records

`dblp2wikidata`

Created on 2024-02-26

@author: wf

`Dblp2Wikidata`

utility for transfering Dblp person entries to Wikidata

Source code in skg/dblp2wikidata.py

class Dblp2Wikidata:
    """
    utility for transfering Dblp person entries to Wikidata
    """

    def __init__(self, debug: bool = False):
        self.debug = debug
        pass

    def transfer(self, args: Namespace):
        """
        Main method to handle the transfer of DBLP entries to Wikidata.

        Args:
            args(Namespace): Command line arguments.
        """
        search_term = getattr(args, "dblp2wikidata", None)
        if self.debug:
            print(f"trying to synchronize DBLP person entry for {search_term}")

`transfer(args)`

Main method to handle the transfer of DBLP entries to Wikidata.

Parameters:

Name	Type	Description	Default
`args(Namespace)`		Command line arguments.	required

Source code in skg/dblp2wikidata.py

def transfer(self, args: Namespace):
    """
    Main method to handle the transfer of DBLP entries to Wikidata.

    Args:
        args(Namespace): Command line arguments.
    """
    search_term = getattr(args, "dblp2wikidata", None)
    if self.debug:
        print(f"trying to synchronize DBLP person entry for {search_term}")

`doi`

Created on 2022-11-22

@author: wf

`DOI`

Digital Object Identifier handling

see e.g. https://www.wikidata.org/wiki/Property:P356 see https://www.doi.org/doi_handbook/2_Numbering.html#2.2 see https://github.com/davidagraf/doi2bib2/blob/master/server/doi2bib.js see https://citation.crosscite.org/docs.html

Source code in skg/doi.py

class DOI:
    """
    Digital Object Identifier handling

    see e.g. https://www.wikidata.org/wiki/Property:P356
    see https://www.doi.org/doi_handbook/2_Numbering.html#2.2
    see https://github.com/davidagraf/doi2bib2/blob/master/server/doi2bib.js
    see https://citation.crosscite.org/docs.html

    """

    pattern = re.compile(
        r"((?P<directory_indicator>10)\.(?P<registrant_code>[0-9]{4,})(?:\.[0-9]+)*(?:\/|%2F)(?:(?![\"&\'])\S)+)"
    )

    def __init__(self, doi: str):
        """
        a DOI
        """
        self.doi = doi
        match = re.match(DOI.pattern, doi)
        self.ok = bool(match)
        if self.ok:
            self.registrant_code = match.group("registrant_code")

    @classmethod
    def isDOI(cls, doi: str):
        """
        check that the given string is a doi

        Args:
            doi(str): the potential DOI string
        """
        if not doi:
            return False
        if isinstance(doi, list):
            ok = len(doi) > 0
            for single_doi in doi:
                ok = ok and cls.isDOI(single_doi)
            return ok
        if not isinstance(doi, str):
            return False
        doi_obj = DOI(doi)
        return doi_obj.ok

    def fetch_response(self, url: str, headers: dict):
        """
        fetch reponse for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use
        """
        req = urllib.request.Request(url, headers=headers)
        response = urllib.request.urlopen(req)
        return response

    def fetch_json(self, url: str, headers: dict):
        """
        fetch json for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use

        Returns:
            json: json data
        """
        # async with aiohttp.ClientSession(headers=headers) as session:
        #    async with session.get(url) as response:
        #        return await response.json()
        text = self.fetch_text(url, headers)
        json_data = json.loads(text)
        return json_data

    def fetch_text(self, url, headers) -> str:
        """
        fetch text for the given url with the given headers

        Args:
            url(str): the url to fetch the data for
            headers(dict): the headers to use

        Returns:
            str: the text
        """
        # async with aiohttp.ClientSession(headers=headers) as session:
        #    async with session.get(url) as response:
        #        return await response.text()
        response = self.fetch_response(url, headers)
        encoding = response.headers.get_content_charset("utf-8")
        content = response.read()
        text = content.decode(encoding)
        return text

    def doi2bibTex(self):
        """
        get the bibtex result for my doi
        """
        url = f"https://doi.org/{self.doi}"
        headers = {"Accept": "application/x-bibtex; charset=utf-8"}
        return self.fetch_text(url, headers)

    def doi2Citeproc(self):
        """
        get the Citeproc JSON result for my doi
        see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html
        """
        url = f"https://doi.org/{self.doi}"
        headers = {"Accept": "application/vnd.citationstyles.csl+json; charset=utf-8"}
        return self.fetch_json(url, headers)

    def dataCiteLookup(self):
        """
        get the dataCite json result for my doi
        """
        url = f"https://api.datacite.org/dois/{self.doi}"
        headers = {"Accept": "application/vnd.api+json; charset=utf-8"}
        return self.fetch_json(url, headers)

    def asScite(self) -> str:
        """
        get DOI metadata and convert to Semantic Cite markup

           see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

        Returns:
            str: Semantic Mediawiki markup
        """
        if not hasattr(self, "meta_data"):
            self.meta_data = self.doi2Citeproc()
        markup = Citeproc.asScite(self.meta_data, retrieved_from="https://doi.org/")
        return markup

`init(doi)`

a DOI

Source code in skg/doi.py

def __init__(self, doi: str):
    """
    a DOI
    """
    self.doi = doi
    match = re.match(DOI.pattern, doi)
    self.ok = bool(match)
    if self.ok:
        self.registrant_code = match.group("registrant_code")

`asScite()`

get DOI metadata and convert to Semantic Cite markup

see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

Returns:

Name	Type	Description
`str`	`str`	Semantic Mediawiki markup

Source code in skg/doi.py

def asScite(self) -> str:
    """
    get DOI metadata and convert to Semantic Cite markup

       see https://github.com/SemanticMediaWiki/SemanticCite/blob/master/src/FilteredMetadata/BibliographicFilteredRecord.php

    Returns:
        str: Semantic Mediawiki markup
    """
    if not hasattr(self, "meta_data"):
        self.meta_data = self.doi2Citeproc()
    markup = Citeproc.asScite(self.meta_data, retrieved_from="https://doi.org/")
    return markup

`dataCiteLookup()`

get the dataCite json result for my doi

Source code in skg/doi.py

def dataCiteLookup(self):
    """
    get the dataCite json result for my doi
    """
    url = f"https://api.datacite.org/dois/{self.doi}"
    headers = {"Accept": "application/vnd.api+json; charset=utf-8"}
    return self.fetch_json(url, headers)

`doi2Citeproc()`

get the Citeproc JSON result for my doi see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html

Source code in skg/doi.py

def doi2Citeproc(self):
    """
    get the Citeproc JSON result for my doi
    see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html
    """
    url = f"https://doi.org/{self.doi}"
    headers = {"Accept": "application/vnd.citationstyles.csl+json; charset=utf-8"}
    return self.fetch_json(url, headers)

`doi2bibTex()`

get the bibtex result for my doi

Source code in skg/doi.py

def doi2bibTex(self):
    """
    get the bibtex result for my doi
    """
    url = f"https://doi.org/{self.doi}"
    headers = {"Accept": "application/x-bibtex; charset=utf-8"}
    return self.fetch_text(url, headers)

`fetch_json(url, headers)`

fetch json for the given url with the given headers

Parameters:

Name	Type	Description	Default
`url(str)`		the url to fetch the data for	required
`headers(dict)`		the headers to use	required

Returns:

Name	Type	Description
`json`		json data

Source code in skg/doi.py

def fetch_json(self, url: str, headers: dict):
    """
    fetch json for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use

    Returns:
        json: json data
    """
    # async with aiohttp.ClientSession(headers=headers) as session:
    #    async with session.get(url) as response:
    #        return await response.json()
    text = self.fetch_text(url, headers)
    json_data = json.loads(text)
    return json_data

`fetch_response(url, headers)`

fetch reponse for the given url with the given headers

Parameters:

Name	Type	Description	Default
`url(str)`		the url to fetch the data for	required
`headers(dict)`		the headers to use	required

Source code in skg/doi.py

def fetch_response(self, url: str, headers: dict):
    """
    fetch reponse for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use
    """
    req = urllib.request.Request(url, headers=headers)
    response = urllib.request.urlopen(req)
    return response

`fetch_text(url, headers)`

fetch text for the given url with the given headers

Parameters:

Name	Type	Description	Default
`url(str)`		the url to fetch the data for	required
`headers(dict)`		the headers to use	required

Returns:

Name	Type	Description
`str`	`str`	the text

Source code in skg/doi.py

def fetch_text(self, url, headers) -> str:
    """
    fetch text for the given url with the given headers

    Args:
        url(str): the url to fetch the data for
        headers(dict): the headers to use

    Returns:
        str: the text
    """
    # async with aiohttp.ClientSession(headers=headers) as session:
    #    async with session.get(url) as response:
    #        return await response.text()
    response = self.fetch_response(url, headers)
    encoding = response.headers.get_content_charset("utf-8")
    content = response.read()
    text = content.decode(encoding)
    return text

`isDOI(doi)` `classmethod`

check that the given string is a doi

Parameters:

Name	Type	Description	Default
`doi(str)`		the potential DOI string	required

Source code in skg/doi.py

@classmethod
def isDOI(cls, doi: str):
    """
    check that the given string is a doi

    Args:
        doi(str): the potential DOI string
    """
    if not doi:
        return False
    if isinstance(doi, list):
        ok = len(doi) > 0
        for single_doi in doi:
            ok = ok and cls.isDOI(single_doi)
        return ok
    if not isinstance(doi, str):
        return False
    doi_obj = DOI(doi)
    return doi_obj.ok

`event`

Created on 2022-11-16

@author: wf

`Event`

Bases: Node

an instance of a scientific event

Source code in skg/event.py

class Event(skg.graph.Node):
    """
    an instance of a scientific event
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q112055391",
                "title": "The Third Wikidata Workshop",
                "location": "Hangzhou",
                "point_in_time": "2022-10-24",
                "official_website": "https://wikidataworkshop.github.io/2022/",
            }
        ]
        return samples

`EventSeries`

Bases: Node

an instance of an academic event series

Source code in skg/event.py

class EventSeries(skg.graph.Node):
    """
    an instance of an academic event series
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {"wikiDataId": "Q6053150", "short_name": "ISWC"},
            {
                "wikiDataId": "Q105491257",
                "short_name": "ECDL",
                "title": "European Conference on Research and Advanced Technology for Digital Libraries (English)",
                "official_website": "http://ecdlconference.isti.cnr.it/",
            },
            {
                "wikiDataId": "Q105695678",
                "short_name": "VNC (English)",
                "DBLP_venue_ID": "conf/vnc",
                "VIAF_ID": "267408611",
                "title": "IEEE Vehicular Networking Conference",
            },
            {
                "wikiDataId": "Q17012957",
                "short_name": "ESWC",
                "inception": "2004",
                "gndId": "1091749205",
            },
        ]
        return samples

`Proceedings`

Bases: Node

Proceedings of an event

Source code in skg/event.py

class Proceedings(skg.graph.Node):
    """
    Proceedings of an event
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q115053286",
                "short_name": "Wikidata 2022 (English)",
                "title": "Proceedings of the 3rd Wikidata Workshop 2022 (English)",
                "publication_date": "2022-11-03",
                "full_work_available_at_URL": "http://ceur-ws.org/Vol-3262/",
            }
        ]
        return samples

`graph`

Created on 2022-11-16

@author: wf

`Concept`

an Entity

Source code in skg/graph.py

class Concept:
    """
    an Entity
    """

    def __init__(self, name: str, cls):
        """
        constructor

        Args:
            name(str): the name of the node
            cls: a class
        """
        self.name = name
        self.props = {}
        self.cls = cls
        if hasattr(cls, "getSamples"):
            for sample in cls.getSamples():
                for key in sample.keys():
                    if not key in self.props:
                        self.props[key] = Property(self, key)

    def map(self, map_name: str, map_list: list):
        """
        map the given list of property mappings under the given map_name

        Args:
            map_name(str): the name of the mapping e.g. "wikidata"
            map_list(list): a list of mapping tuples
        """
        for prop_name, mapped_prop in map_list:
            if prop_name in self.props:
                prop = self.props[prop_name]
                prop.setmap(map_name, mapped_prop)
        return self

    def map_wikidata(self, wd_class: str, scholia_suffix, map_list: list):
        """
        map wikidata entries

        Args:
            wd_class(str): the main wikidata base class
            scholia_suffix(str): the scholia suffix
        """
        self.wd_class = wd_class
        self.scholia_suffix = scholia_suffix
        self.map("wikidata", map_list)
        return self

`init(name, cls)`

constructor

Parameters:

Name	Type	Description	Default
`name(str)`		the name of the node	required
`cls`		a class	required

Source code in skg/graph.py

def __init__(self, name: str, cls):
    """
    constructor

    Args:
        name(str): the name of the node
        cls: a class
    """
    self.name = name
    self.props = {}
    self.cls = cls
    if hasattr(cls, "getSamples"):
        for sample in cls.getSamples():
            for key in sample.keys():
                if not key in self.props:
                    self.props[key] = Property(self, key)

`map(map_name, map_list)`

map the given list of property mappings under the given map_name

Parameters:

Name	Type	Description	Default
`map_name(str)`		the name of the mapping e.g. "wikidata"	required
`map_list(list)`		a list of mapping tuples	required

Source code in skg/graph.py

def map(self, map_name: str, map_list: list):
    """
    map the given list of property mappings under the given map_name

    Args:
        map_name(str): the name of the mapping e.g. "wikidata"
        map_list(list): a list of mapping tuples
    """
    for prop_name, mapped_prop in map_list:
        if prop_name in self.props:
            prop = self.props[prop_name]
            prop.setmap(map_name, mapped_prop)
    return self

`map_wikidata(wd_class, scholia_suffix, map_list)`

map wikidata entries

Parameters:

Name	Type	Description	Default
`wd_class(str)`		the main wikidata base class	required
`scholia_suffix(str)`		the scholia suffix	required

Source code in skg/graph.py

def map_wikidata(self, wd_class: str, scholia_suffix, map_list: list):
    """
    map wikidata entries

    Args:
        wd_class(str): the main wikidata base class
        scholia_suffix(str): the scholia suffix
    """
    self.wd_class = wd_class
    self.scholia_suffix = scholia_suffix
    self.map("wikidata", map_list)
    return self

`Node`

a Node in the scholary knowledge graph

Source code in skg/graph.py

class Node:
    """
    a Node in the scholary knowledge graph
    """

    debug = False

    def __init__(self):
        """
        constructor
        """

    def __str__(self):
        """
        return a text representation of me
        """
        text = f"{self.concept.name} ➞ {self.label}:"
        delim = "\n  "
        for prop in self.concept.props.values():
            if hasattr(self, prop.name):
                text += f"{delim}{prop.name}={getattr(self,prop.name)}"
        return text

    def from_dict(self, concept, record: str):
        """
        get my values from the given record
        """
        self.concept = concept
        self.label = record[concept.name]
        for key in concept.props.keys():
            if key in record:
                setattr(self, key, record[key])

    def browser_url(self):
        """
        get my browser url
        """
        if self.provenance == "wikidata":
            url = self.scholia_url()
        else:
            url = self.label
        return url

    def scholia_url(self):
        """
        get my scholia url
        """
        prefix = f"https://scholia.toolforge.org/{self.concept.scholia_suffix}"
        wd_url = getattr(self, "wikiDataId", None)
        if wd_url is None:
            return None
        else:
            qid = wd_url.replace("http://www.wikidata.org/entity/", "")
            return f"{prefix}/{qid}"

    @classmethod
    def setProvenance(cls, instances: list, provenance: str):
        """
        set the provenance of the given instances
        """
        for instance in instances:
            instance.provenance = provenance

    @classmethod
    def from_sparql(cls, sparql: SPARQL, sparql_query: str, concept: Concept):
        """
        get instance from the given sparql access point with the given sparql_query for
        the given concept

        Args:
            sparql(SPARQL): the sparql access point
            sparql_query(str): the query to execute
            concept(Concept): the concept to create instances for
        """
        if Node.debug:
            print(sparql_query)
        records = sparql.queryAsListOfDicts(sparql_query)
        instances = cls.from_records(records, concept)
        return instances

    @classmethod
    def from_records(cls, records: list, concept: Concept):
        """
        get instances from the given records for the given concept

        Args:
            records(list): a list of dicts to get instances for
            concept(Concept): the concept to create instances for
        """
        instances = []
        for record in records:
            # call my constructor
            instance = cls()
            instance.from_dict(concept, record)
            instances.append(instance)
        return instances

    @classmethod
    def from_wikidata_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from wikidata for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        wikidata = Wikidata()
        if id_name == "wikiDataId":
            value_clause = f"<http://www.wikidata.org/entity/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query = f"""# Query for {concept.name} details via ID {id_name} value {id_value}
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?{concept.name} ?qId"""
        for prop in concept.props.values():
            sparql_query += f" ?{prop.name}"
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
  # classification!
  ?wikiDataId wdt:P31/wdt:P279* wd:{concept.wd_class}.
  ?wikiDataId rdfs:label ?{concept.name} .
  FILTER(LANG(?{concept.name})="{lang}").
"""
        for prop in concept.props.values():
            if prop.name == "wikiDataId":
                continue
            if not (prop.hasmap("wikidata")):
                raise Exception(
                    f"Property {prop.name} of {concept.name} has no wikidata mapping"
                )
            wd_prop = prop.getmap("wikidata")
            clause = f"?wikiDataId wdt:{wd_prop} ?{prop.name}."
            if prop.name != id_name:
                clause = f"OPTIONAL {{ {clause} }}"
            sparql_query += "\n  " + clause
        sparql_query += "\n}"
        instances = cls.from_sparql(wikidata.sparql, sparql_query, concept)
        cls.setProvenance(instances, "wikidata")
        return instances

    @classmethod
    def from_dblp_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from dblp for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        dblp = Dblp()
        sparql_query = f"""
PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?{concept.name}"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                sparql_query += f" ?{prop.name}"
        if id_name == "doi":
            value_clause = f"<http://dx.doi.org/{id_value}>"
        elif id_name == "orcid":
            value_clause = f"<https://orcid.org/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                dblp_prop = prop.getmap("dblp")
                sparql_query += f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n"""
        sparql_query += "}\n"
        instances = cls.from_sparql(dblp.sparql, sparql_query, concept)
        cls.setProvenance(instances, "dblp")
        return instances

`init()`

constructor

Source code in skg/graph.py

def __init__(self):
    """
    constructor
    """

`str()`

return a text representation of me

Source code in skg/graph.py

def __str__(self):
    """
    return a text representation of me
    """
    text = f"{self.concept.name} ➞ {self.label}:"
    delim = "\n  "
    for prop in self.concept.props.values():
        if hasattr(self, prop.name):
            text += f"{delim}{prop.name}={getattr(self,prop.name)}"
    return text

`browser_url()`

get my browser url

Source code in skg/graph.py

def browser_url(self):
    """
    get my browser url
    """
    if self.provenance == "wikidata":
        url = self.scholia_url()
    else:
        url = self.label
    return url

`from_dblp_via_id(concept, id_name, id_value, lang='en')` `classmethod`

get a node instance from dblp for the given parameters

Parameters:

Name	Description	Default
`concept(Concept)`	the concept to return	required
`id_name(str)`	the name of the id to search / lookup with	required
`id_value(str)`	the value of the id	required
`lang(str)`	the language code to apply	required

Source code in skg/graph.py

    @classmethod
    def from_dblp_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from dblp for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        dblp = Dblp()
        sparql_query = f"""
PREFIX dblp: <https://dblp.org/rdf/schema#>
SELECT 
  ?{concept.name}"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                sparql_query += f" ?{prop.name}"
        if id_name == "doi":
            value_clause = f"<http://dx.doi.org/{id_value}>"
        elif id_name == "orcid":
            value_clause = f"<https://orcid.org/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
"""
        for prop in concept.props.values():
            if prop.hasmap("dblp"):
                dblp_prop = prop.getmap("dblp")
                sparql_query += f"""?{concept.name} dblp:{dblp_prop} ?{dblp_prop}.\n"""
        sparql_query += "}\n"
        instances = cls.from_sparql(dblp.sparql, sparql_query, concept)
        cls.setProvenance(instances, "dblp")
        return instances

`from_dict(concept, record)`

get my values from the given record

Source code in skg/graph.py

def from_dict(self, concept, record: str):
    """
    get my values from the given record
    """
    self.concept = concept
    self.label = record[concept.name]
    for key in concept.props.keys():
        if key in record:
            setattr(self, key, record[key])

`from_records(records, concept)` `classmethod`

get instances from the given records for the given concept

Parameters:

Name	Type	Description	Default
`records(list)`		a list of dicts to get instances for	required
`concept(Concept)`		the concept to create instances for	required

Source code in skg/graph.py

@classmethod
def from_records(cls, records: list, concept: Concept):
    """
    get instances from the given records for the given concept

    Args:
        records(list): a list of dicts to get instances for
        concept(Concept): the concept to create instances for
    """
    instances = []
    for record in records:
        # call my constructor
        instance = cls()
        instance.from_dict(concept, record)
        instances.append(instance)
    return instances

`from_sparql(sparql, sparql_query, concept)` `classmethod`

get instance from the given sparql access point with the given sparql_query for the given concept

Parameters:

Name	Description	Default
`sparql(SPARQL)`	the sparql access point	required
`sparql_query(str)`	the query to execute	required
`concept(Concept)`	the concept to create instances for	required

Source code in skg/graph.py

@classmethod
def from_sparql(cls, sparql: SPARQL, sparql_query: str, concept: Concept):
    """
    get instance from the given sparql access point with the given sparql_query for
    the given concept

    Args:
        sparql(SPARQL): the sparql access point
        sparql_query(str): the query to execute
        concept(Concept): the concept to create instances for
    """
    if Node.debug:
        print(sparql_query)
    records = sparql.queryAsListOfDicts(sparql_query)
    instances = cls.from_records(records, concept)
    return instances

`from_wikidata_via_id(concept, id_name, id_value, lang='en')` `classmethod`

get a node instance from wikidata for the given parameters

Parameters:

Name	Description	Default
`concept(Concept)`	the concept to return	required
`id_name(str)`	the name of the id to search / lookup with	required
`id_value(str)`	the value of the id	required
`lang(str)`	the language code to apply	required

Source code in skg/graph.py

    @classmethod
    def from_wikidata_via_id(
        cls, concept: Concept, id_name: str, id_value: str, lang: str = "en"
    ):
        """
        get a node instance from wikidata for the given parameters

        Args:
            concept(Concept): the concept to return
            id_name(str): the name of the id to search / lookup with
            id_value(str): the value of the id
            lang(str): the language code to apply
        """
        wikidata = Wikidata()
        if id_name == "wikiDataId":
            value_clause = f"<http://www.wikidata.org/entity/{id_value}>"
        else:
            value_clause = f'''"{id_value}"'''
        sparql_query = f"""# Query for {concept.name} details via ID {id_name} value {id_value}
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?{concept.name} ?qId"""
        for prop in concept.props.values():
            sparql_query += f" ?{prop.name}"
        sparql_query += f"""
WHERE {{
  VALUES ?{id_name} {{
    {value_clause}
  }}
  # classification!
  ?wikiDataId wdt:P31/wdt:P279* wd:{concept.wd_class}.
  ?wikiDataId rdfs:label ?{concept.name} .
  FILTER(LANG(?{concept.name})="{lang}").
"""
        for prop in concept.props.values():
            if prop.name == "wikiDataId":
                continue
            if not (prop.hasmap("wikidata")):
                raise Exception(
                    f"Property {prop.name} of {concept.name} has no wikidata mapping"
                )
            wd_prop = prop.getmap("wikidata")
            clause = f"?wikiDataId wdt:{wd_prop} ?{prop.name}."
            if prop.name != id_name:
                clause = f"OPTIONAL {{ {clause} }}"
            sparql_query += "\n  " + clause
        sparql_query += "\n}"
        instances = cls.from_sparql(wikidata.sparql, sparql_query, concept)
        cls.setProvenance(instances, "wikidata")
        return instances

`scholia_url()`

get my scholia url

Source code in skg/graph.py

def scholia_url(self):
    """
    get my scholia url
    """
    prefix = f"https://scholia.toolforge.org/{self.concept.scholia_suffix}"
    wd_url = getattr(self, "wikiDataId", None)
    if wd_url is None:
        return None
    else:
        qid = wd_url.replace("http://www.wikidata.org/entity/", "")
        return f"{prefix}/{qid}"

`setProvenance(instances, provenance)` `classmethod`

set the provenance of the given instances

Source code in skg/graph.py

@classmethod
def setProvenance(cls, instances: list, provenance: str):
    """
    set the provenance of the given instances
    """
    for instance in instances:
        instance.provenance = provenance

`Property`

a Property

Source code in skg/graph.py

class Property:
    """
    a Property
    """

    def __init__(self, concept: Concept, name: str):
        """
        constructor

        Args:
            concept(Concept): the concept this property belongs to
            name(str): the name of the property

        """
        self.concept = concept
        self.name = name
        self.maps = {}

    def setmap(self, map_name, mapped_prop):
        """
        map the given property
        """
        self.maps[map_name] = mapped_prop

    def getmap(self, map_name):
        return self.maps[map_name]

    def hasmap(self, map_name: str) -> bool:
        """
        check whether there is a mapping for the given map_name

        Args:
            map_name(str): the map name to check

        Returns:
            bool: True if there is mapping
        """
        return map_name in self.maps

`init(concept, name)`

constructor

Parameters:

Name	Type	Description	Default
`concept(Concept)`		the concept this property belongs to	required
`name(str)`		the name of the property	required

Source code in skg/graph.py

def __init__(self, concept: Concept, name: str):
    """
    constructor

    Args:
        concept(Concept): the concept this property belongs to
        name(str): the name of the property

    """
    self.concept = concept
    self.name = name
    self.maps = {}

`hasmap(map_name)`

check whether there is a mapping for the given map_name

Parameters:

Name	Type	Description	Default
`map_name(str)`		the map name to check	required

Returns:

Name	Type	Description
`bool`	`bool`	True if there is mapping

Source code in skg/graph.py

def hasmap(self, map_name: str) -> bool:
    """
    check whether there is a mapping for the given map_name

    Args:
        map_name(str): the map name to check

    Returns:
        bool: True if there is mapping
    """
    return map_name in self.maps

`setmap(map_name, mapped_prop)`

map the given property

Source code in skg/graph.py

def setmap(self, map_name, mapped_prop):
    """
    map the given property
    """
    self.maps[map_name] = mapped_prop

`kg`

Created on 2022-11-16

@author: wf

`SKG_Def`

scholary knowledge graph

Source code in skg/kg.py

class SKG_Def:
    """
    scholary knowledge graph
    """

    def __init__(self):
        """
        constructor
        """
        self.concepts = {
            # main concepts
            "Scholar": Concept(name="Scholar", cls=Scholar),
            "Institution": Concept(name="Institution", cls=Institution),
            "Paper": Concept(name="Paper", cls=Paper),
            "Event": Concept(name="Event", cls=Event),
            "EventSeries": Concept(name="EventSeries", cls=EventSeries),
            "Proceedings": Concept(name="Proceedings", cls=Proceedings),
            # neighbour concepts
            "Country": Concept(name="Country", cls=Country),
        }
        self.concepts["Scholar"].map_wikidata(
            "Q5",
            "author",
            [
                ("name", "label"),
                ("dblpId", "P2456"),
                ("gndId", "P227"),
                ("linkedInId", "P6634"),
                ("homepage", "P856"),
                ("googleScholarUser", "P1960"),
                ("orcid", "P496"),
                ("givenName", "P735"),
                ("familyName", "P734"),
                ("gender", "P21"),
                ("image", "P18"),
                ("occupation", "P106"),
                ("Semantic_Scholar_author_ID", "P4012"),
            ],
        ).map(
            "dblp",
            [
                ("name", "primaryCreatorName"),
                ("homepage", "primaryHomepage"),
                ("orcid", "orcid"),
            ],
        ).map(
            "smw",
            [
                ("wikiDataId", "wikiDataId"),
                ("familyName", "name"),
                ("givenName", "firstName"),
                ("googleScholarUser", "googleScholarUser"),
                ("homepage", "homepage"),
                ("dblpId", "dblpId"),
                ("orcid", "orcid"),
                ("linkedInId", "linkedInId"),
            ],
        )
        self.concepts["Institution"].map_wikidata(
            "Q4671277",
            "organization",
            [
                ("short_name", "P1813"),  # 2.0 %
                ("inception", "P571"),  # 65.8 %
                ("image", "P18"),  # 15.2 %
                ("country", "P17"),  # 88.8 %
                ("located_in", "P131"),  # 51.9 %
                ("official_website", "P856"),  # 59.1%
                ("coordinate_location", "P625"),  # 44.0 %
            ],
        )
        self.concepts["Paper"].map_wikidata(
            "Q13442814",
            "work",
            [
                ("title", "label"),
                ("doi", "P356"),
                ("DBLP_publication_ID", "P8978"),
                ("publication_date", "P577"),
            ],
        ).map("dblp", [("title", "title"), ("doi", "doi")])
        # scientific event
        self.concepts["Event"].map_wikidata(
            "Q52260246",
            "event",
            [
                ("title", "P1476"),
                ("country", "P17"),  # 93.9% -> Human Settlement
                ("location", "P276"),  # 94.6%
                ("point_in_time", "P585"),
                ("official_website", "P856"),
            ],
        )
        # academic event series
        self.concepts["EventSeries"].map_wikidata(
            "Q47258130",
            "event-series",
            [
                ("title", "P1476"),  # 96.7 %
                ("short_name", "P1813"),  # 93.1 %
                ("VIAF_ID", "P214"),  # 60.5 %
                ("DBLP_venue_ID", "P8926"),  # 96.4 %
                ("gndId", "P227"),  # 42.3 %
                ("inception", "P571"),  # 22.3 %
                ("official_website", "P856"),  # 13.5 %
            ],
        )
        # proceedings
        self.concepts["Proceedings"].map_wikidata(
            "Q1143604",
            "venue",
            [
                ("title", "P1476"),
                ("short_name", "P1813"),
                ("full_work_available_at_URL", "P953"),
                ("publication_date", "P577"),
            ],
        )
        # country
        self.concepts["Country"].map_wikidata(
            "Q6256",
            "topic",
            [
                ("name", "label"),  # 100% ?
                ("homepage", "P856"),  # 49.4%
                ("population", "P1082"),  # 57.4%
                ("capital", "P36"),  # 59.8%
                ("coordinate_location", "P625"),  # 58.6%
                ("iso_code", "P297"),  # 53.3%
            ],
        )

        self.concepts_by_qid = {}
        for concept in self.concepts.values():
            if concept.wd_class in self.concepts_by_qid:
                raise Exception(f"duplicate wd_class definition: {concept.wd_class}")
            self.concepts_by_qid[concept.wd_class] = concept

    def conceptForQid(self, qid: str) -> Concept:
        """
        get the concept for the given wikidata Q Identifieer

        Args:
            qid(str): get the concept for the given Qid

        Return:
            Concept: or None if none is found
        """
        concept = self.concepts_by_qid.get(qid, None)
        return concept

    def toPlantuml(self, header: str = None, footer: str = None) -> str:
        """
        get a plantuml version of this knowledge graph

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup

        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'{Version.name}:{Version.description}
updated {timestamp}

authors:{Version.authors} 
'/
title  {Version.name}:{Version.description} see {Version.doc_url} updated {timestamp}
hide circle
package skg {{
"""
        if footer is None:
            footer = "}\n"
        markup = f"{header}"
        indent = "  "
        for concept_name, concept in self.concepts.items():
            markup += f"""{indent}class {concept_name} {{\n"""
            for prop_name, prop in concept.props.items():
                markup += f"""{indent}  {prop_name}\n"""
            markup += f"""\n{indent}}}\n"""
        markup += f"{footer}"
        return markup

    def toSiDiF(self) -> str:
        """
        convert me to SiDiF format
        """
        sidif = ""
        for concept_name, concept in self.concepts.items():
            sidif += f"""#
# {concept_name}
#
{concept_name} isA Topic
"{concept_name} is name of it
"""
        return sidif

`init()`

constructor

Source code in skg/kg.py

def __init__(self):
    """
    constructor
    """
    self.concepts = {
        # main concepts
        "Scholar": Concept(name="Scholar", cls=Scholar),
        "Institution": Concept(name="Institution", cls=Institution),
        "Paper": Concept(name="Paper", cls=Paper),
        "Event": Concept(name="Event", cls=Event),
        "EventSeries": Concept(name="EventSeries", cls=EventSeries),
        "Proceedings": Concept(name="Proceedings", cls=Proceedings),
        # neighbour concepts
        "Country": Concept(name="Country", cls=Country),
    }
    self.concepts["Scholar"].map_wikidata(
        "Q5",
        "author",
        [
            ("name", "label"),
            ("dblpId", "P2456"),
            ("gndId", "P227"),
            ("linkedInId", "P6634"),
            ("homepage", "P856"),
            ("googleScholarUser", "P1960"),
            ("orcid", "P496"),
            ("givenName", "P735"),
            ("familyName", "P734"),
            ("gender", "P21"),
            ("image", "P18"),
            ("occupation", "P106"),
            ("Semantic_Scholar_author_ID", "P4012"),
        ],
    ).map(
        "dblp",
        [
            ("name", "primaryCreatorName"),
            ("homepage", "primaryHomepage"),
            ("orcid", "orcid"),
        ],
    ).map(
        "smw",
        [
            ("wikiDataId", "wikiDataId"),
            ("familyName", "name"),
            ("givenName", "firstName"),
            ("googleScholarUser", "googleScholarUser"),
            ("homepage", "homepage"),
            ("dblpId", "dblpId"),
            ("orcid", "orcid"),
            ("linkedInId", "linkedInId"),
        ],
    )
    self.concepts["Institution"].map_wikidata(
        "Q4671277",
        "organization",
        [
            ("short_name", "P1813"),  # 2.0 %
            ("inception", "P571"),  # 65.8 %
            ("image", "P18"),  # 15.2 %
            ("country", "P17"),  # 88.8 %
            ("located_in", "P131"),  # 51.9 %
            ("official_website", "P856"),  # 59.1%
            ("coordinate_location", "P625"),  # 44.0 %
        ],
    )
    self.concepts["Paper"].map_wikidata(
        "Q13442814",
        "work",
        [
            ("title", "label"),
            ("doi", "P356"),
            ("DBLP_publication_ID", "P8978"),
            ("publication_date", "P577"),
        ],
    ).map("dblp", [("title", "title"), ("doi", "doi")])
    # scientific event
    self.concepts["Event"].map_wikidata(
        "Q52260246",
        "event",
        [
            ("title", "P1476"),
            ("country", "P17"),  # 93.9% -> Human Settlement
            ("location", "P276"),  # 94.6%
            ("point_in_time", "P585"),
            ("official_website", "P856"),
        ],
    )
    # academic event series
    self.concepts["EventSeries"].map_wikidata(
        "Q47258130",
        "event-series",
        [
            ("title", "P1476"),  # 96.7 %
            ("short_name", "P1813"),  # 93.1 %
            ("VIAF_ID", "P214"),  # 60.5 %
            ("DBLP_venue_ID", "P8926"),  # 96.4 %
            ("gndId", "P227"),  # 42.3 %
            ("inception", "P571"),  # 22.3 %
            ("official_website", "P856"),  # 13.5 %
        ],
    )
    # proceedings
    self.concepts["Proceedings"].map_wikidata(
        "Q1143604",
        "venue",
        [
            ("title", "P1476"),
            ("short_name", "P1813"),
            ("full_work_available_at_URL", "P953"),
            ("publication_date", "P577"),
        ],
    )
    # country
    self.concepts["Country"].map_wikidata(
        "Q6256",
        "topic",
        [
            ("name", "label"),  # 100% ?
            ("homepage", "P856"),  # 49.4%
            ("population", "P1082"),  # 57.4%
            ("capital", "P36"),  # 59.8%
            ("coordinate_location", "P625"),  # 58.6%
            ("iso_code", "P297"),  # 53.3%
        ],
    )

    self.concepts_by_qid = {}
    for concept in self.concepts.values():
        if concept.wd_class in self.concepts_by_qid:
            raise Exception(f"duplicate wd_class definition: {concept.wd_class}")
        self.concepts_by_qid[concept.wd_class] = concept

`conceptForQid(qid)`

get the concept for the given wikidata Q Identifieer

Parameters:

Name	Type	Description	Default
`qid(str)`		get the concept for the given Qid	required

Return

Concept: or None if none is found

Source code in skg/kg.py

def conceptForQid(self, qid: str) -> Concept:
    """
    get the concept for the given wikidata Q Identifieer

    Args:
        qid(str): get the concept for the given Qid

    Return:
        Concept: or None if none is found
    """
    concept = self.concepts_by_qid.get(qid, None)
    return concept

`toPlantuml(header=None, footer=None)`

get a plantuml version of this knowledge graph

Parameters:

Name	Type	Description	Default
`header(str)`		the header to apply	required
`footer(str)`		the footer to apply	required

Returns:

Name	Type	Description
`str`	`str`	the plantuml markup

Source code in skg/kg.py

    def toPlantuml(self, header: str = None, footer: str = None) -> str:
        """
        get a plantuml version of this knowledge graph

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup

        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'{Version.name}:{Version.description}
updated {timestamp}

authors:{Version.authors} 
'/
title  {Version.name}:{Version.description} see {Version.doc_url} updated {timestamp}
hide circle
package skg {{
"""
        if footer is None:
            footer = "}\n"
        markup = f"{header}"
        indent = "  "
        for concept_name, concept in self.concepts.items():
            markup += f"""{indent}class {concept_name} {{\n"""
            for prop_name, prop in concept.props.items():
                markup += f"""{indent}  {prop_name}\n"""
            markup += f"""\n{indent}}}\n"""
        markup += f"{footer}"
        return markup

`toSiDiF()`

convert me to SiDiF format

Source code in skg/kg.py

    def toSiDiF(self) -> str:
        """
        convert me to SiDiF format
        """
        sidif = ""
        for concept_name, concept in self.concepts.items():
            sidif += f"""#
# {concept_name}
#
{concept_name} isA Topic
"{concept_name} is name of it
"""
        return sidif

`location`

Created on 2022-11-21

@author: wf

`Country`

Bases: Node

an instance of a country

Source code in skg/location.py

class Country(skg.graph.Node):
    """
    an instance of a country
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q334",
                "name": "Singapore",
                "iso_code": "SG",
                "homepage": "https://www.gov.sg/",
                "population": 5866139,
                "coordinate_location": "1°18'N, 103°48'E",
            }
        ]
        return samples

`orcid`

Created on 2022-11-19

@author: wf

`ORCID`

ORCID handling

see e.g. https://info.orcid.org/brand-guidelines/#h-orcid-logos-and-icons https://pub.orcid.org/v3.0/

Source code in skg/orcid.py

class ORCID:
    """
    ORCID handling

    see e.g.
        https://info.orcid.org/brand-guidelines/#h-orcid-logos-and-icons
        https://pub.orcid.org/v3.0/
    """

    pattern = re.compile(r"^(\d{4}-){3}\d{3}(\d|X)$")

    def __init__(self, orcid: str):
        """
        constructor

        Args:
            orcid(str): the orcid
        """
        self.orcid = orcid
        # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
        self.orcid_num = orcid.replace("-", "")
        match = re.match(ORCID.pattern, orcid)
        self.ok = bool(match) and validate(self.orcid_num)

    @classmethod
    def isORCID(cls, orcid: str) -> bool:
        """
        check that the given string is an ORCID

        Args:
            orcid(str): the potential ORCID string

        Returns:
            bool: True if the string represents a valid ORCID otherwise false
        """
        if not orcid:
            return False
        orcid_obj = ORCID(orcid)
        return orcid_obj.ok

    def getMetadata(self, op: str = None) -> dict:
        """
        get the ORCID metadata data

        Args:
            op(str): the https://pub.orcid.org/v3.0/ API
            operation to apply - default is "Fetch record details"

        Returns:
            dict: the dictionary derived from the JSON response

        """
        op = "" if op is None else f"/{op}"
        url = f"https://pub.orcid.org/v3.0/{self.orcid}{op}"
        r = requests.get(
            url, headers={"User-Agent": "Mozilla/5.0", "accept": "application/json"}
        )
        json_data = r.json()
        return json_data

    def asHtml(self, mode: str = "full", inline: str = "") -> str:
        """
        the orcid logo

        Args:
            mode(str): the mode
            inline(str): in inline mode this is the text to be displayed inline

        Returns:
            str: the html code

        """
        href = f"""https://orcid.org/{self.orcid}"""
        logo = """<img alt="ORCID logo" src="https://info.orcid.org/wp-content/uploads/2019/11/orcid_16x16.png" width="16" height="16" />"""
        if mode == "full":
            html = f"""<a href="{href}">{logo}{href}</a>"""
        elif mode == "compact":
            html = f"""<a href="{href}">{logo}{self.orcid}</a>"""
        elif mode == "inline":
            html = f"""<a href="{href}">{inline}{logo}</a>"""
        return html

`init(orcid)`

constructor

Parameters:

Name	Type	Description	Default
`orcid(str)`		the orcid	required

Source code in skg/orcid.py

def __init__(self, orcid: str):
    """
    constructor

    Args:
        orcid(str): the orcid
    """
    self.orcid = orcid
    # https://support.orcid.org/hc/en-us/articles/360006897674-Structure-of-the-ORCID-Identifier
    self.orcid_num = orcid.replace("-", "")
    match = re.match(ORCID.pattern, orcid)
    self.ok = bool(match) and validate(self.orcid_num)

`asHtml(mode='full', inline='')`

the orcid logo

Parameters:

Name	Type	Description	Default
`mode(str)`		the mode	required
`inline(str)`		in inline mode this is the text to be displayed inline	required

Returns:

Name	Type	Description
`str`	`str`	the html code

Source code in skg/orcid.py

def asHtml(self, mode: str = "full", inline: str = "") -> str:
    """
    the orcid logo

    Args:
        mode(str): the mode
        inline(str): in inline mode this is the text to be displayed inline

    Returns:
        str: the html code

    """
    href = f"""https://orcid.org/{self.orcid}"""
    logo = """<img alt="ORCID logo" src="https://info.orcid.org/wp-content/uploads/2019/11/orcid_16x16.png" width="16" height="16" />"""
    if mode == "full":
        html = f"""<a href="{href}">{logo}{href}</a>"""
    elif mode == "compact":
        html = f"""<a href="{href}">{logo}{self.orcid}</a>"""
    elif mode == "inline":
        html = f"""<a href="{href}">{inline}{logo}</a>"""
    return html

`getMetadata(op=None)`

get the ORCID metadata data

Parameters:

Name	Type	Description	Default
`op(str)`		the https://pub.orcid.org/v3.0/ API	required

Returns:

Name	Type	Description
`dict`	`dict`	the dictionary derived from the JSON response

Source code in skg/orcid.py

def getMetadata(self, op: str = None) -> dict:
    """
    get the ORCID metadata data

    Args:
        op(str): the https://pub.orcid.org/v3.0/ API
        operation to apply - default is "Fetch record details"

    Returns:
        dict: the dictionary derived from the JSON response

    """
    op = "" if op is None else f"/{op}"
    url = f"https://pub.orcid.org/v3.0/{self.orcid}{op}"
    r = requests.get(
        url, headers={"User-Agent": "Mozilla/5.0", "accept": "application/json"}
    )
    json_data = r.json()
    return json_data

`isORCID(orcid)` `classmethod`

check that the given string is an ORCID

Parameters:

Name	Type	Description	Default
`orcid(str)`		the potential ORCID string	required

Returns:

Name	Type	Description
`bool`	`bool`	True if the string represents a valid ORCID otherwise false

Source code in skg/orcid.py

@classmethod
def isORCID(cls, orcid: str) -> bool:
    """
    check that the given string is an ORCID

    Args:
        orcid(str): the potential ORCID string

    Returns:
        bool: True if the string represents a valid ORCID otherwise false
    """
    if not orcid:
        return False
    orcid_obj = ORCID(orcid)
    return orcid_obj.ok

`owl`

Created on 2022-11-22

@author: wf

`Owl`

Bases: Schema

Web Ontology Language access see https://en.wikipedia.org/wiki/Web_Ontology_Language

Source code in skg/owl.py

class Owl(Schema):
    """
    Web Ontology Language access
    see https://en.wikipedia.org/wiki/Web_Ontology_Language
    """

    def __init__(self, name: str, url: str, authors: str, inception: str):
        """
        constructor

        Args:
            name(str): the name of this schema
            url(str): the url of this schema
            authors(str): the authors of this schema
            inception(str): the inception of this schema
        """
        Schema.__init__(self, name, url, authors, inception)
        self.schema_url = url
        self.schema = None

    def show_triples(self, result):
        """
        show the triples for the given query result
        """
        for i, row in enumerate(result):
            print(f"{i+1}:{row}")

    def query_schema(self, query: str, formats: str = "", profile: bool = False):
        """
        query the schema

        Args:
            query(str): the SPARQL query to execute
            formats(str): if "triples" is in th format string show the results string
            profile(bool): if True show timing information for the query
        """
        profiler = Profiler(f"query {query}", profile=profile)
        result = self.schema.query(query)
        if "triples" in formats:
            self.show_triples(result)
        if profile:
            profiler.time(f" for {len(result)} triples")
        return result

    def loadSchema(self, formats: str = "", profile: bool = False):
        """
        load the schema

        Args:
            formats(str): the formats to dump
            profile(bool): if True show timing
        """
        # https://stackoverflow.com/questions/56631109/how-to-parse-and-load-an-ontology-in-python
        profiler = Profiler(f"reading {self.name} schema", profile=profile)
        self.schema = rdflib.Graph()
        self.schema.parse(self.schema_url, format="application/rdf+xml")
        if profile:
            profiler.time(f" for {len(self.schema)} triples")
        for t_format in formats.split(","):
            if t_format and t_format != "triples":
                print(self.schema.serialize(format=t_format))
        self.schema.bind("owl", OWL)
        query = """select distinct ?s ?p ?o 
where { ?s ?p ?o}
"""
        self.query_schema(query, formats=formats, profile=profile)
        return self.schema

    def unprefix_value(
        self, value: object, prefixes: list = ["http://xmlns.com/foaf/0.1/"]
    ) -> str:
        """
        get rid of RDF prefixes to simplify our life

        Args:
            value(object): the RDFLib value to unprefix
            prefixes(list): list of prefixes to remove
        Returns:
            str: a simple string representation
        """
        if isinstance(value, list):
            if len(value) >= 1:
                value = value[0]
        if isinstance(value, dict):
            for akey in ["@id", "@value"]:
                if akey in value:
                    value = value[akey]
        if isinstance(value, str):
            parts = value.split("#")
            if len(parts) == 2:
                value = parts[1]
        for prefix in prefixes:
            if value.startswith(prefix):
                value = value.replace(prefix, "")
        return value

    def unprefix_row(self, row: dict):
        """
        get rid of the RDF prefixes in keys and values of the given row
        to simplify our life

        Args:
            row(dict): a dict of RDF values to unprefix
        """
        for key in list(row.keys()):
            org_value = row[key]
            value = self.unprefix_value(org_value)
            row[key] = value
            if "#" in key:
                noprefix_key = self.unprefix_value(key)
                row[noprefix_key] = row.pop(key)
            row[f"{key}_rdf"] = org_value

    def toClasses(self):
        """
        convert to a classes dict of dicts

        Returns:
            dict: a dict of dictionaries
        """
        json_ld = self.schema.serialize(format="json-ld")
        schema_dict = json.loads(json_ld)
        classes = {}
        # get rid of prefixes
        for row in schema_dict:
            self.unprefix_row(row)
        # pass 1 - classes
        for row in schema_dict:
            name = row["@id"]
            ptype = row["@type"]
            comment = row.get("comment", "")
            label = row.get("label", "")
            subClassOf = row.get("subClassOf", "")
            if ptype == "Class":
                if name in classes:
                    clazz = classes[name]
                else:
                    clazz = {
                        "@comment": comment,
                        "@label": label,
                        "@subClassOf": subClassOf,
                    }
                    classes[name] = clazz
        # pass 2 - properties
        for row in schema_dict:
            name = row["@id"]
            ptype = row["@type"]
            comment = row.get("comment", "")
            domain = row.get("domain", "")
            prange = row.get("range", "")
            plabel = row.get("label")
            if ptype == "Property":
                prop = {
                    "name": name,
                    "comment": comment,
                    "label": plabel,
                    "domain": domain,
                    "range": prange,
                }
                if domain in classes:
                    clazz = classes[domain]
                    clazz[name] = prop
            pass
        wrapped_classes = {"classes": classes}
        return wrapped_classes

`init(name, url, authors, inception)`

constructor

Parameters:

Name	Description	Default
`name(str)`	the name of this schema	required
`url(str)`	the url of this schema	required
`authors(str)`	the authors of this schema	required
`inception(str)`	the inception of this schema	required

Source code in skg/owl.py

def __init__(self, name: str, url: str, authors: str, inception: str):
    """
    constructor

    Args:
        name(str): the name of this schema
        url(str): the url of this schema
        authors(str): the authors of this schema
        inception(str): the inception of this schema
    """
    Schema.__init__(self, name, url, authors, inception)
    self.schema_url = url
    self.schema = None

`loadSchema(formats='', profile=False)`

load the schema

Parameters:

Name	Type	Description	Default
`formats(str)`		the formats to dump	required
`profile(bool)`		if True show timing	required

Source code in skg/owl.py

    def loadSchema(self, formats: str = "", profile: bool = False):
        """
        load the schema

        Args:
            formats(str): the formats to dump
            profile(bool): if True show timing
        """
        # https://stackoverflow.com/questions/56631109/how-to-parse-and-load-an-ontology-in-python
        profiler = Profiler(f"reading {self.name} schema", profile=profile)
        self.schema = rdflib.Graph()
        self.schema.parse(self.schema_url, format="application/rdf+xml")
        if profile:
            profiler.time(f" for {len(self.schema)} triples")
        for t_format in formats.split(","):
            if t_format and t_format != "triples":
                print(self.schema.serialize(format=t_format))
        self.schema.bind("owl", OWL)
        query = """select distinct ?s ?p ?o 
where { ?s ?p ?o}
"""
        self.query_schema(query, formats=formats, profile=profile)
        return self.schema

`query_schema(query, formats='', profile=False)`

query the schema

Parameters:

Name	Description	Default
`query(str)`	the SPARQL query to execute	required
`formats(str)`	if "triples" is in th format string show the results string	required
`profile(bool)`	if True show timing information for the query	required

Source code in skg/owl.py

def query_schema(self, query: str, formats: str = "", profile: bool = False):
    """
    query the schema

    Args:
        query(str): the SPARQL query to execute
        formats(str): if "triples" is in th format string show the results string
        profile(bool): if True show timing information for the query
    """
    profiler = Profiler(f"query {query}", profile=profile)
    result = self.schema.query(query)
    if "triples" in formats:
        self.show_triples(result)
    if profile:
        profiler.time(f" for {len(result)} triples")
    return result

`show_triples(result)`

show the triples for the given query result

Source code in skg/owl.py

def show_triples(self, result):
    """
    show the triples for the given query result
    """
    for i, row in enumerate(result):
        print(f"{i+1}:{row}")

`toClasses()`

convert to a classes dict of dicts

Returns:

Name	Type	Description
`dict`		a dict of dictionaries

Source code in skg/owl.py

def toClasses(self):
    """
    convert to a classes dict of dicts

    Returns:
        dict: a dict of dictionaries
    """
    json_ld = self.schema.serialize(format="json-ld")
    schema_dict = json.loads(json_ld)
    classes = {}
    # get rid of prefixes
    for row in schema_dict:
        self.unprefix_row(row)
    # pass 1 - classes
    for row in schema_dict:
        name = row["@id"]
        ptype = row["@type"]
        comment = row.get("comment", "")
        label = row.get("label", "")
        subClassOf = row.get("subClassOf", "")
        if ptype == "Class":
            if name in classes:
                clazz = classes[name]
            else:
                clazz = {
                    "@comment": comment,
                    "@label": label,
                    "@subClassOf": subClassOf,
                }
                classes[name] = clazz
    # pass 2 - properties
    for row in schema_dict:
        name = row["@id"]
        ptype = row["@type"]
        comment = row.get("comment", "")
        domain = row.get("domain", "")
        prange = row.get("range", "")
        plabel = row.get("label")
        if ptype == "Property":
            prop = {
                "name": name,
                "comment": comment,
                "label": plabel,
                "domain": domain,
                "range": prange,
            }
            if domain in classes:
                clazz = classes[domain]
                clazz[name] = prop
        pass
    wrapped_classes = {"classes": classes}
    return wrapped_classes

`unprefix_row(row)`

get rid of the RDF prefixes in keys and values of the given row to simplify our life

Parameters:

Name	Type	Description	Default
`row(dict)`		a dict of RDF values to unprefix	required

Source code in skg/owl.py

def unprefix_row(self, row: dict):
    """
    get rid of the RDF prefixes in keys and values of the given row
    to simplify our life

    Args:
        row(dict): a dict of RDF values to unprefix
    """
    for key in list(row.keys()):
        org_value = row[key]
        value = self.unprefix_value(org_value)
        row[key] = value
        if "#" in key:
            noprefix_key = self.unprefix_value(key)
            row[noprefix_key] = row.pop(key)
        row[f"{key}_rdf"] = org_value

`unprefix_value(value, prefixes=['http://xmlns.com/foaf/0.1/'])`

get rid of RDF prefixes to simplify our life

Parameters:

Name	Type	Description	Default
`value(object)`		the RDFLib value to unprefix	required
`prefixes(list)`		list of prefixes to remove	required

Returns: str: a simple string representation

Source code in skg/owl.py

def unprefix_value(
    self, value: object, prefixes: list = ["http://xmlns.com/foaf/0.1/"]
) -> str:
    """
    get rid of RDF prefixes to simplify our life

    Args:
        value(object): the RDFLib value to unprefix
        prefixes(list): list of prefixes to remove
    Returns:
        str: a simple string representation
    """
    if isinstance(value, list):
        if len(value) >= 1:
            value = value[0]
    if isinstance(value, dict):
        for akey in ["@id", "@value"]:
            if akey in value:
                value = value[akey]
    if isinstance(value, str):
        parts = value.split("#")
        if len(parts) == 2:
            value = parts[1]
    for prefix in prefixes:
        if value.startswith(prefix):
            value = value.replace(prefix, "")
    return value

`paper`

Created on 2022-11-16

@author: wf

`Paper`

Bases: Node

a scientific paper

Source code in skg/paper.py

class Paper(skg.graph.Node):
    """
    a scientific paper
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q55693406",
                "title": "Designing the web for an open society",
                "doi": "10.1145/1963405.1963408",
                "DBLP_publication_ID": "conf/www/Berners-Lee11",
                "publication_date": 2011,
            },
            {
                "doi": "10.1007/978-3-031-19433-7_21",
                "title": "An Analysis of Content Gaps Versus User Needs in the Wikidata Knowledge Graph",
            },
        ]
        return samples

    def __init__(self):
        """
        Constructor
        """

    def fromDOI(self, doi: str):
        """
        construct me from the given doi
        """
        self.doi = doi
        self.doi_obj = DOI(doi)
        self.doi_obj.meta_data = self.doi_obj.doi2Citeproc()
        if not hasattr(self, "title"):
            self.title = self.doi_obj.meta_data["title"]
        if not hasattr(self, "label"):
            self.label = f"https://doi.org/{self.doi}"

`init()`

Constructor

Source code in skg/paper.py

def __init__(self):
    """
    Constructor
    """

`fromDOI(doi)`

construct me from the given doi

Source code in skg/paper.py

def fromDOI(self, doi: str):
    """
    construct me from the given doi
    """
    self.doi = doi
    self.doi_obj = DOI(doi)
    self.doi_obj.meta_data = self.doi_obj.doi2Citeproc()
    if not hasattr(self, "title"):
        self.title = self.doi_obj.meta_data["title"]
    if not hasattr(self, "label"):
        self.label = f"https://doi.org/{self.doi}"

`profiler`

Created on 2022-11-18

@author: wf

`Profiler`

simple profiler

Source code in skg/profiler.py

class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        self.starttime = time.time()
        if profile:
            print(f"Starting {msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

`init(msg, profile=True)`

construct me with the given msg and profile active flag

Parameters:

Name	Type	Description	Default
`msg(str)`		the message to show if profiling is active	required
`profile(bool)`		True if messages should be shown	required

Source code in skg/profiler.py

def __init__(self, msg, profile=True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    self.starttime = time.time()
    if profile:
        print(f"Starting {msg} ...")

`time(extraMsg='')`

time the action and print if profile is active

Source code in skg/profiler.py

def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

`ris`

Created on 2024-03-08

@author: wf

`RIS_Entry`

Research Information Systems format https://en.wikipedia.org/wiki/RIS_(file_format)

Source code in skg/ris.py

@lod_storable
class RIS_Entry:
    """
    Research Information Systems format
    https://en.wikipedia.org/wiki/RIS_(file_format)
    """

    type_of_reference: Optional[str] = None
    abstract: Optional[str] = None
    type_of_work: Optional[str] = None
    year: Optional[str] = None
    doi: Optional[str] = None
    keywords: List[str] = field(default_factory=list)
    first_authors: List[str] = field(default_factory=list)
    publisher: Optional[str] = None
    language: Optional[str] = None
    primary_title: Optional[str] = None
    urls: List[str] = field(default_factory=list)
    secondary_title: Optional[str] = None

    @property
    def lang_qid(self) -> str:
        qid = "Q1860"  # English
        if self.language == "de":
            qid = "Q188"
        return qid

    @classmethod
    def get_property_mappings(cls):
        """
        get the wikidata property mappings
        """
        mappings = [
            PropertyMapping(
                column="instanceof",
                propertyName="instanceof",
                propertyId="P31",
                propertyType=WdDatatype.itemid,
                value="Q13442814",  # scholarly article
            ),
            PropertyMapping(
                column="primary_title",
                propertyName="title",
                propertyId="P1476",
                propertyType=WdDatatype.text,
            ),
            PropertyMapping(
                column="doi",
                # propertyName="DOI",
                # propertyId="P356",
                propertyName="described at URL",
                propertyId="P973",
                # propertyType=WdDatatype.extid,
                propertyType=WdDatatype.url,
            ),
            PropertyMapping(
                column="lang_qid",
                propertyName="language of work or name",
                propertyId="P407",
                propertyType=WdDatatype.itemid,
            ),
            PropertyMapping(
                column="year",
                propertyName="publication date",
                propertyId="P577",
                propertyType=WdDatatype.year,
            ),
        ]
        return mappings

    @classmethod
    def get_dict_from_file(cls, ris_file_path, by_field: str = "index"):
        ris_dict = {}
        with open(ris_file_path, "r") as bibliography_file:
            entries = rispy.load(bibliography_file)
            for i, entry in enumerate(entries, start=1):
                ris_entry = RIS_Entry.from_dict(entry)
                if by_field == "index":
                    value = i
                else:
                    if by_field in entry:
                        value = field[entry]
                ris_dict[value] = ris_entry

        return ris_dict

`get_property_mappings()` `classmethod`

get the wikidata property mappings

Source code in skg/ris.py

@classmethod
def get_property_mappings(cls):
    """
    get the wikidata property mappings
    """
    mappings = [
        PropertyMapping(
            column="instanceof",
            propertyName="instanceof",
            propertyId="P31",
            propertyType=WdDatatype.itemid,
            value="Q13442814",  # scholarly article
        ),
        PropertyMapping(
            column="primary_title",
            propertyName="title",
            propertyId="P1476",
            propertyType=WdDatatype.text,
        ),
        PropertyMapping(
            column="doi",
            # propertyName="DOI",
            # propertyId="P356",
            propertyName="described at URL",
            propertyId="P973",
            # propertyType=WdDatatype.extid,
            propertyType=WdDatatype.url,
        ),
        PropertyMapping(
            column="lang_qid",
            propertyName="language of work or name",
            propertyId="P407",
            propertyType=WdDatatype.itemid,
        ),
        PropertyMapping(
            column="year",
            propertyName="publication date",
            propertyId="P577",
            propertyType=WdDatatype.year,
        ),
    ]
    return mappings

`schema`

Created on 2022-11-22

@author: wf

`Schema`

a schema

Source code in skg/schema.py

class Schema:
    """
    a schema
    """

    def __init__(self, name: str, url: str, authors: str, inception: str):
        """
        constructor

        Args:
            name(str): the name of this schema
            url(str): the url of this schema
            authors(str): the authors of this schema
            inception(str): the inception of this schema
        """
        self.name = name
        self.url = url
        self.authors = authors
        self.inception = inception

    def classesToPlantUml(self, classes: dict, indent: str = "  "):
        """
        convert the given classes dict to plantuml

        Args:
            classes(dict): a dictionary of classes
            indent(str): the indentation to apply
        """
        classes = classes["classes"]
        markup = ""
        for cname, clazz in classes.items():
            class_markup = ""
            rel_markup = ""  # relations
            for pname, prop in clazz.items():
                if pname.startswith("@"):
                    pass
                else:
                    prange = prop["range"]
                    if prange in classes:
                        #  Class01 "1" *-- "many" Class02 : contains
                        rel_markup += f"{indent}{cname}--{prange}:{pname}\n"
                    else:
                        class_markup += f"{indent}  {pname}:{prange}\n"
            class_markup = f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n"
            class_markup += rel_markup
            if "@subClassOf" in clazz:
                general = clazz["@subClassOf"]
                if general:
                    class_markup += f"{indent}{general} <|-- {cname}\n"
            note = f"{indent}note top of {cname}\n"
            if "@label" in clazz:
                note += f"""{indent}{clazz["@label"]}\n"""
            if "@comment" in clazz:
                note += f"""{indent}{clazz["@comment"]}\n"""
            note += f"{indent}end note\n"
            class_markup = note + class_markup
            markup += class_markup
        return markup

    def toPlantUml(self, header=None, footer=None) -> str:
        """
        get a plantuml version of the schema

        Args:
            header(str): the header to apply
            footer(str): the footer to apply

        Returns:
            str: the plantuml markup
        """
        timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
        if header is None:
            header = f"""/'
     {self.authors} {self.inception}
     updated {timestamp}

     {self.name} {self.schema_url}
     converted from owl to plantuml
    '/
    title  {self.name} schema {self.schema_url} converted from owl to plantuml updated {timestamp}
    hide circle
    package foaf {{
      class Document {{
      }}
    }}
    package dblp {{
     """
        if footer is None:
            footer = "}\n"
        classes = self.toClasses()
        markup = header + self.classesToPlantUml(classes, indent="  ") + footer
        return markup

`init(name, url, authors, inception)`

constructor

Parameters:

Name	Description	Default
`name(str)`	the name of this schema	required
`url(str)`	the url of this schema	required
`authors(str)`	the authors of this schema	required
`inception(str)`	the inception of this schema	required

Source code in skg/schema.py

def __init__(self, name: str, url: str, authors: str, inception: str):
    """
    constructor

    Args:
        name(str): the name of this schema
        url(str): the url of this schema
        authors(str): the authors of this schema
        inception(str): the inception of this schema
    """
    self.name = name
    self.url = url
    self.authors = authors
    self.inception = inception

`classesToPlantUml(classes, indent=' ')`

convert the given classes dict to plantuml

Parameters:

Name	Type	Description	Default
`classes(dict)`		a dictionary of classes	required
`indent(str)`		the indentation to apply	required

Source code in skg/schema.py

def classesToPlantUml(self, classes: dict, indent: str = "  "):
    """
    convert the given classes dict to plantuml

    Args:
        classes(dict): a dictionary of classes
        indent(str): the indentation to apply
    """
    classes = classes["classes"]
    markup = ""
    for cname, clazz in classes.items():
        class_markup = ""
        rel_markup = ""  # relations
        for pname, prop in clazz.items():
            if pname.startswith("@"):
                pass
            else:
                prange = prop["range"]
                if prange in classes:
                    #  Class01 "1" *-- "many" Class02 : contains
                    rel_markup += f"{indent}{cname}--{prange}:{pname}\n"
                else:
                    class_markup += f"{indent}  {pname}:{prange}\n"
        class_markup = f"{indent}class {cname}{{\n{class_markup}\n{indent}}}\n"
        class_markup += rel_markup
        if "@subClassOf" in clazz:
            general = clazz["@subClassOf"]
            if general:
                class_markup += f"{indent}{general} <|-- {cname}\n"
        note = f"{indent}note top of {cname}\n"
        if "@label" in clazz:
            note += f"""{indent}{clazz["@label"]}\n"""
        if "@comment" in clazz:
            note += f"""{indent}{clazz["@comment"]}\n"""
        note += f"{indent}end note\n"
        class_markup = note + class_markup
        markup += class_markup
    return markup

`toPlantUml(header=None, footer=None)`

get a plantuml version of the schema

Parameters:

Name	Type	Description	Default
`header(str)`		the header to apply	required
`footer(str)`		the footer to apply	required

Returns:

Name	Type	Description
`str`	`str`	the plantuml markup

Source code in skg/schema.py

def toPlantUml(self, header=None, footer=None) -> str:
    """
    get a plantuml version of the schema

    Args:
        header(str): the header to apply
        footer(str): the footer to apply

    Returns:
        str: the plantuml markup
    """
    timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%d")
    if header is None:
        header = f"""/'
 {self.authors} {self.inception}
 updated {timestamp}

 {self.name} {self.schema_url}
 converted from owl to plantuml
'/
title  {self.name} schema {self.schema_url} converted from owl to plantuml updated {timestamp}
hide circle
package foaf {{
  class Document {{
  }}
}}
package dblp {{
 """
    if footer is None:
        footer = "}\n"
    classes = self.toClasses()
    markup = header + self.classesToPlantUml(classes, indent="  ") + footer
    return markup

`scholar`

Created on 2022-11-16

@author: wf

`Institution`

Bases: Node

academic institution a scholar might be affiliated with

Source code in skg/scholar.py

class Institution(skg.graph.Node):
    """
    academic institution a scholar might be affiliated with
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {"wikiDataId": "Q273263", "short_name": "RWTH Aachen (German)"},
            {
                "wikiDataId": "Q391028",
                "inception": "1908",
                "short_name": "UBC",
                "country": "Canada",
                "image": "https://commons.wikimedia.org/wiki/File:Irving_K._Barber_Library.jpg",
                "located_in": "Vancouver",
                "official_website": "https://www.ubc.ca/",
            },
        ]
        return samples

    def __init__(self):
        """
        constructor
        """

`init()`

constructor

Source code in skg/scholar.py

def __init__(self):
    """
    constructor
    """

`Scholar`

Bases: Node

an instance of a scholar that writes papers to be an author

Source code in skg/scholar.py

class Scholar(skg.graph.Node):
    """
    an instance of a scholar that writes papers to be an author
    """

    @classmethod
    def getSamples(cls):
        samples = [
            {
                "wikiDataId": "Q54303353",
                "name": "Stefan Decker",
                "gndId": "",
                "dblpId": "d/StefanDecker",
                "orcid": "0000-0001-6324-7164",
                "linkedInId": "",
                "googleScholarUser": "uhVkSswAAAAJ",
                "homepage": "http://www.stefandecker.org",
            },
            {
                "name": "Tim Berners-Lee",
                "wikiDataId": "Q80",
                "givenName": "Timothy",
                "familyName": "Berners-Lee",
                "gender": "male",
                "image": "https://commons.wikimedia.org/wiki/File:Sir_Tim_Berners-Lee_(cropped).jpg",
                # "occupation": "computer scientist" truly tabular issue
            },
            {
                "name": "Anna Lisa Gentile",
                "wikiDataId": "Q54832532",
                "Semantic_Scholar_author_ID": "Anna Lisa Gentile",
            },
        ]
        return samples

    def __init__(self):
        """
        constructor
        """

`init()`

constructor

Source code in skg/scholar.py

def __init__(self):
    """
    constructor
    """

`scholargrid`

Created on 2023-01-04

@author: wf

`ScholarGrid`

Bases: SmwGrid

show a grid of scholars

Source code in skg/scholargrid.py

class ScholarGrid(SmwGrid):
    """
    show a grid of scholars
    """

    def __init__(
        self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False
    ):
        """
        constructor

        Args:
            app(App): the app that I am part of
            wikiUsers(list): the wikiUsers
            wikiId(str): the wikiId to use
            sparql(SPARQL): the SPARQL endpoint to use
            debug(bool): if True show debugging information
        """
        entityName = "Scholar"
        entityPluralName = "Scholars"
        pk = "item"
        super().__init__(
            app=app,
            wikiUsers=wikiUsers,
            wikiId=wikiId,
            entityName=entityName,
            entityPluralName=entityPluralName,
            pk=pk,
            getLod=self.getScholars,
            sparql=sparql,
            debug=debug,
        )

    def getScholars(self) -> list:
        """
        get the list of scholars

        Returns:
            list: the list of dicts of scholars
        """
        # get a dict of dict
        scholars_dod = self.semwiki.scholars()
        # get a list of dicts
        scholars_lod = list(scholars_dod.values())
        # @TODO - shouldn't this be better specified in the mapping?
        for row in scholars_lod:
            row["label"] = row["Scholar"]
        self.wbQuery = ScholarQuery.get()
        return scholars_lod

`init(app, wikiUsers, wikiId, sparql, debug=False)`

constructor

Parameters:

Name	Description	Default
`app(App)`	the app that I am part of	required
`wikiUsers(list)`	the wikiUsers	required
`wikiId(str)`	the wikiId to use	required
`sparql(SPARQL)`	the SPARQL endpoint to use	required
`debug(bool)`	if True show debugging information	required

Source code in skg/scholargrid.py

def __init__(
    self, app, wikiUsers, wikiId: str, sparql: SPARQL, debug: bool = False
):
    """
    constructor

    Args:
        app(App): the app that I am part of
        wikiUsers(list): the wikiUsers
        wikiId(str): the wikiId to use
        sparql(SPARQL): the SPARQL endpoint to use
        debug(bool): if True show debugging information
    """
    entityName = "Scholar"
    entityPluralName = "Scholars"
    pk = "item"
    super().__init__(
        app=app,
        wikiUsers=wikiUsers,
        wikiId=wikiId,
        entityName=entityName,
        entityPluralName=entityPluralName,
        pk=pk,
        getLod=self.getScholars,
        sparql=sparql,
        debug=debug,
    )

`getScholars()`

get the list of scholars

Returns:

Name	Type	Description
`list`	`list`	the list of dicts of scholars

Source code in skg/scholargrid.py

def getScholars(self) -> list:
    """
    get the list of scholars

    Returns:
        list: the list of dicts of scholars
    """
    # get a dict of dict
    scholars_dod = self.semwiki.scholars()
    # get a list of dicts
    scholars_lod = list(scholars_dod.values())
    # @TODO - shouldn't this be better specified in the mapping?
    for row in scholars_lod:
        row["label"] = row["Scholar"]
    self.wbQuery = ScholarQuery.get()
    return scholars_lod

`ScholarQuery`

Source code in skg/scholargrid.py

class ScholarQuery:
    @classmethod
    def get(cls) -> WikibaseQuery:
        """
        get the WikiBaseQuery for scholars

        Returns:
            WikibaseQuery: the wikibase query
        """
        scholar_mapping = [
            # @TODO use metamodel info and read from wiki
            {
                "Column": "",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "instanceof",
                "PropertyId": "P31",
                "PropertyName": "instanceof",
                "Qualifier": "",
                "Type": "",
                "Value": "Q5",
            },
            {
                "Column": "wikiDataId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "",
                "PropertyId": "",
                "PropertyName": "",
                "Qualifier": "",
                "Type": "item",
                "Value": "",
            },
            {
                "Column": "name",
                "Entity": "Scholar",
                "Lookup": "Q101352",
                "PropVarname": "family_name",
                "PropertyId": "P734",
                "PropertyName": "family name",
                "Qualifier": "",
                "Type": "",
                "Value": "",
            },
            {
                "Column": "firstName",
                "Entity": "Scholar",
                "Lookup": "Q202444",
                "PropVarname": "given_name",
                "PropertyId": "P735",
                "PropertyName": "given name",
                "Qualifier": "",
                "Type": "",
                "Value": "",
            },
            {
                "Column": "homepage",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "official_website",
                "PropertyId": "P856",
                "PropertyName": "official website",
                "Qualifier": "",
                "Type": "url",
                "Value": "",
            },
            {
                "Column": "linkedInId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "LinkedIn_personal_profile_ID",
                "PropertyId": "P6634",
                "PropertyName": "LinkedIn personal profile ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "orcid",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "ORCID_iD",
                "PropertyId": "P496",
                "PropertyName": "ORCID iD",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "googleScholarUser",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "Google_Scholar_author_ID",
                "PropertyId": "P1960",
                "PropertyName": "Google Scholar author ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "researchGate",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "ResearchGate_profile_ID",
                "PropertyId": "P2038",
                "PropertyName": "ResearchGate profile ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "gndId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "GND_ID",
                "PropertyId": "P227",
                "PropertyName": "GND ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
            {
                "Column": "dblpId",
                "Entity": "Scholar",
                "Lookup": "",
                "PropVarname": "DBLP_author_ID",
                "PropertyId": "P2456",
                "PropertyName": "DBLP author ID",
                "Qualifier": "",
                "Type": "extid",
                "Value": "",
            },
        ]
        wbQuery = WikibaseQuery("scholar")
        for row in scholar_mapping:
            wbQuery.addPropertyFromDescriptionRow(row)
        return wbQuery

`get()` `classmethod`

get the WikiBaseQuery for scholars

Returns:

Name	Type	Description
`WikibaseQuery`	`WikibaseQuery`	the wikibase query

Source code in skg/scholargrid.py

@classmethod
def get(cls) -> WikibaseQuery:
    """
    get the WikiBaseQuery for scholars

    Returns:
        WikibaseQuery: the wikibase query
    """
    scholar_mapping = [
        # @TODO use metamodel info and read from wiki
        {
            "Column": "",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "instanceof",
            "PropertyId": "P31",
            "PropertyName": "instanceof",
            "Qualifier": "",
            "Type": "",
            "Value": "Q5",
        },
        {
            "Column": "wikiDataId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "",
            "PropertyId": "",
            "PropertyName": "",
            "Qualifier": "",
            "Type": "item",
            "Value": "",
        },
        {
            "Column": "name",
            "Entity": "Scholar",
            "Lookup": "Q101352",
            "PropVarname": "family_name",
            "PropertyId": "P734",
            "PropertyName": "family name",
            "Qualifier": "",
            "Type": "",
            "Value": "",
        },
        {
            "Column": "firstName",
            "Entity": "Scholar",
            "Lookup": "Q202444",
            "PropVarname": "given_name",
            "PropertyId": "P735",
            "PropertyName": "given name",
            "Qualifier": "",
            "Type": "",
            "Value": "",
        },
        {
            "Column": "homepage",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "official_website",
            "PropertyId": "P856",
            "PropertyName": "official website",
            "Qualifier": "",
            "Type": "url",
            "Value": "",
        },
        {
            "Column": "linkedInId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "LinkedIn_personal_profile_ID",
            "PropertyId": "P6634",
            "PropertyName": "LinkedIn personal profile ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "orcid",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "ORCID_iD",
            "PropertyId": "P496",
            "PropertyName": "ORCID iD",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "googleScholarUser",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "Google_Scholar_author_ID",
            "PropertyId": "P1960",
            "PropertyName": "Google Scholar author ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "researchGate",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "ResearchGate_profile_ID",
            "PropertyId": "P2038",
            "PropertyName": "ResearchGate profile ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "gndId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "GND_ID",
            "PropertyId": "P227",
            "PropertyName": "GND ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
        {
            "Column": "dblpId",
            "Entity": "Scholar",
            "Lookup": "",
            "PropVarname": "DBLP_author_ID",
            "PropertyId": "P2456",
            "PropertyName": "DBLP author ID",
            "Qualifier": "",
            "Type": "extid",
            "Value": "",
        },
    ]
    wbQuery = WikibaseQuery("scholar")
    for row in scholar_mapping:
        wbQuery.addPropertyFromDescriptionRow(row)
    return wbQuery

`SmwGrid`

Bases: GridSync

a semantic mediawiki based grid synchable with WikiData

Source code in skg/scholargrid.py

class SmwGrid(GridSync):
    """
    a semantic mediawiki based grid synchable with WikiData

    """

    def __init__(
        self,
        solution,
        entityName: str,
        entityPluralName: str,
        pk: str,
        getLod: Callable,
        wikiUsers: list,
        wikiId: str,
        sparql: SPARQL,
        debug: bool = False,
    ):
        """
        constructor

        Args:
            solution:  the solutio that i am part of
            entityName(str): the name of the entity type of items to be shown in the grid
            entityPluralName(str): the plural name of the entities to be shown
            pk(str): the name of the primary key
            getLod(Callable): the callback to load the grid rows list of dicts
            wikiUsers(list): the wikiUsers
            wikiId(str): the wikiId to use
            sparql(SPARQL): the SPARQL endpoint to use
            debug(bool): if True show debugging information
        """
        self.solution = solution
        self.wikiUsers = wikiUsers
        self.wikiId = wikiId
        wikiUser = self.wikiUsers[wikiId]
        self.semwiki = SemWiki(wikiUser)
        wdGrid = WikidataGrid(
            app=app,
            source=wikiId,
            entityName=entityName,
            entityPluralName=entityPluralName,
            getLod=getLod,
            debug=debug,
        )
        # we'd rather lazy load
        # wdGrid.lod=wdGrid.getLod()
        super().__init__(wdGrid, entityName, pk, sparql=sparql, debug=debug)

`init(solution, entityName, entityPluralName, pk, getLod, wikiUsers, wikiId, sparql, debug=False)`

constructor

Parameters:

Name	Description	Default
`solution`	the solutio that i am part of	required
`entityName(str)`	the name of the entity type of items to be shown in the grid	required
`entityPluralName(str)`	the plural name of the entities to be shown	required
`pk(str)`	the name of the primary key	required
`getLod(Callable)`	the callback to load the grid rows list of dicts	required
`wikiUsers(list)`	the wikiUsers	required
`wikiId(str)`	the wikiId to use	required
`sparql(SPARQL)`	the SPARQL endpoint to use	required
`debug(bool)`	if True show debugging information	required

Source code in skg/scholargrid.py

def __init__(
    self,
    solution,
    entityName: str,
    entityPluralName: str,
    pk: str,
    getLod: Callable,
    wikiUsers: list,
    wikiId: str,
    sparql: SPARQL,
    debug: bool = False,
):
    """
    constructor

    Args:
        solution:  the solutio that i am part of
        entityName(str): the name of the entity type of items to be shown in the grid
        entityPluralName(str): the plural name of the entities to be shown
        pk(str): the name of the primary key
        getLod(Callable): the callback to load the grid rows list of dicts
        wikiUsers(list): the wikiUsers
        wikiId(str): the wikiId to use
        sparql(SPARQL): the SPARQL endpoint to use
        debug(bool): if True show debugging information
    """
    self.solution = solution
    self.wikiUsers = wikiUsers
    self.wikiId = wikiId
    wikiUser = self.wikiUsers[wikiId]
    self.semwiki = SemWiki(wikiUser)
    wdGrid = WikidataGrid(
        app=app,
        source=wikiId,
        entityName=entityName,
        entityPluralName=entityPluralName,
        getLod=getLod,
        debug=debug,
    )
    # we'd rather lazy load
    # wdGrid.lod=wdGrid.getLod()
    super().__init__(wdGrid, entityName, pk, sparql=sparql, debug=debug)

`search`

Created on 2022-11-19

@author: wf

`SearchOptions`

wrapper for search results

Source code in skg/search.py

class SearchOptions:
    """
    wrapper for search results
    """

    def __init__(
        self,
        limit: int = 9,
        lang="en",
        show: bool = True,
        markup_names=["bibtex"],
        open_browser: bool = False,
    ):
        """
        constructor

        Args:
            limit(int): limit for the maximum number of results
            lang(str): the language code to use for the search
            show(bool): if True print the search results
            markup_names(list): a list of markup names to support
            open_browser(bool): if True open a browser for the target page of the item e.g. scholia

        """
        self.limit = limit
        self.lang = lang
        self.show = show
        self.markup_names = markup_names
        self.open_browser = open_browser

`init(limit=9, lang='en', show=True, markup_names=['bibtex'], open_browser=False)`

constructor

Parameters:

Name	Description	Default
`limit(int)`	limit for the maximum number of results	required
`lang(str)`	the language code to use for the search	required
`show(bool)`	if True print the search results	required
`markup_names(list)`	a list of markup names to support	required
`open_browser(bool)`	if True open a browser for the target page of the item e.g. scholia	required

Source code in skg/search.py

def __init__(
    self,
    limit: int = 9,
    lang="en",
    show: bool = True,
    markup_names=["bibtex"],
    open_browser: bool = False,
):
    """
    constructor

    Args:
        limit(int): limit for the maximum number of results
        lang(str): the language code to use for the search
        show(bool): if True print the search results
        markup_names(list): a list of markup names to support
        open_browser(bool): if True open a browser for the target page of the item e.g. scholia

    """
    self.limit = limit
    self.lang = lang
    self.show = show
    self.markup_names = markup_names
    self.open_browser = open_browser

`SearchResult`

wrapper for search results

Source code in skg/search.py

class SearchResult:
    """
    wrapper for search results
    """

    def __init__(self, search_list: list, options=SearchOptions):
        """
        constructor

         Args:
            search_list(list): a list of search terms
            options(SearchOptions): the search options to apply
        """
        self.search_list = search_list
        self.options = options
        self.items = []

`init(search_list, options=SearchOptions)`

constructor

Args: search_list(list): a list of search terms options(SearchOptions): the search options to apply

Source code in skg/search.py

def __init__(self, search_list: list, options=SearchOptions):
    """
    constructor

     Args:
        search_list(list): a list of search terms
        options(SearchOptions): the search options to apply
    """
    self.search_list = search_list
    self.options = options
    self.items = []

`searchengine`

Created on 18.11.2022

@author: wf

`InternetSearch`

generic internet search

Source code in skg/searchengine.py

class InternetSearch:
    """
    generic internet search
    """

    def __init__(self, debug: bool = False):
        """
        constructor
        """
        self.debug = debug
        self.gsearch = GoogleSearch()
        self.ysearch = YahooSearch()
        self.bsearch = BingSearch()
        self.dsearch = DuckDuckGoSearch()
        self.gs_search = GoogleScholarSearch()
        self.engines = [self.gs_search, self.ysearch, self.dsearch, self.bsearch]

    def handleException(self, ex):
        """
        handle the given exception
        """
        if self.debug:
            print(f"{str(ex)}", file=sys.stderr)

    def search(self, search_term: str):
        """
        search my engines for the given search_term
        """
        search_args = (search_term, 1)
        for engine in self.engines:
            try:
                result = engine.search(*search_args)
                yield engine.name, result.results
                pass
            except Exception as ex:
                self.handleException(ex)
                pass

`init(debug=False)`

constructor

Source code in skg/searchengine.py

def __init__(self, debug: bool = False):
    """
    constructor
    """
    self.debug = debug
    self.gsearch = GoogleSearch()
    self.ysearch = YahooSearch()
    self.bsearch = BingSearch()
    self.dsearch = DuckDuckGoSearch()
    self.gs_search = GoogleScholarSearch()
    self.engines = [self.gs_search, self.ysearch, self.dsearch, self.bsearch]

`handleException(ex)`

handle the given exception

Source code in skg/searchengine.py

def handleException(self, ex):
    """
    handle the given exception
    """
    if self.debug:
        print(f"{str(ex)}", file=sys.stderr)

`search(search_term)`

search my engines for the given search_term

Source code in skg/searchengine.py

def search(self, search_term: str):
    """
    search my engines for the given search_term
    """
    search_args = (search_term, 1)
    for engine in self.engines:
        try:
            result = engine.search(*search_args)
            yield engine.name, result.results
            pass
        except Exception as ex:
            self.handleException(ex)
            pass

`semantic_scholar`

Created on 2022-11-22

@author: wf

`SemanticScholar`

wrapper for Semantic Scholar API

Source code in skg/semantic_scholar.py

class SemanticScholar:
    """
    wrapper for Semantic Scholar API
    """

    def __init__(self):
        """
        constructor
        """
        self.sch = SemScholar()

    def get_paper(self, doi: str):
        """
        get the paper with the given DOI identifier
        """
        paper = self.sch.get_paper(doi)
        return paper

    def get_author(self):
        """
        https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search
        """
        pass

`init()`

constructor

Source code in skg/semantic_scholar.py

def __init__(self):
    """
    constructor
    """
    self.sch = SemScholar()

`get_author()`

https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search

Source code in skg/semantic_scholar.py

def get_author(self):
    """
    https://api.semanticscholar.org/api-docs/graph#tag/Author-Data/operation/get_graph_get_author_search
    """
    pass

`get_paper(doi)`

get the paper with the given DOI identifier

Source code in skg/semantic_scholar.py

def get_paper(self, doi: str):
    """
    get the paper with the given DOI identifier
    """
    paper = self.sch.get_paper(doi)
    return paper

`skgbrowser`

Created on 2022-11-18

@author: wf

`SkgBrowser`

Bases: InputWebserver

scholary knowledge graph browser

Source code in skg/skgbrowser.py

class SkgBrowser(InputWebserver):
    """
    scholary knowledge graph browser
    """

    @classmethod
    def get_config(cls) -> WebserverConfig:
        copy_right = "(c)2022-2024 Wolfgang Fahl"
        config = WebserverConfig(
            copy_right=copy_right,
            version=Version(),
            default_port=8765,
            short_name="sotsog",
        )
        server_config = WebserverConfig.get(config)
        server_config.solution_class = SkgSolution
        return server_config

    def __init__(self):
        """Constructs all the necessary attributes for the WebServer object."""
        config = SkgBrowser.get_config()
        InputWebserver.__init__(self, config=config)

        @ui.page("/scholars")
        async def scholars(client: Client):
            return await self.page(client, SkgSolution.scholars)

    def configure_run(self):
        # wiki users
        self.wikiUsers = WikiUser.getWikiUsers()
        self.wikiId = self.args.wikiId
        wikidata = Wikidata()
        self.sparql = wikidata.sparql

`init()`

Constructs all the necessary attributes for the WebServer object.

Source code in skg/skgbrowser.py

def __init__(self):
    """Constructs all the necessary attributes for the WebServer object."""
    config = SkgBrowser.get_config()
    InputWebserver.__init__(self, config=config)

    @ui.page("/scholars")
    async def scholars(client: Client):
        return await self.page(client, SkgSolution.scholars)

`SkgSolution(InputWebSolution)`

the scholarly knowledge graph solution

Source code in skg/skgbrowser.py

def SkgSolution(InputWebSolution):
    """
    the scholarly knowledge graph solution
    """

    def __init__(self, webserver: SkgBrowser, client: Client):
        """
        Initialize the solution

        Calls the constructor of the base solution
        Args:
            webserver (SkgBrowser): The webserver instance associated with this context.
            client (Client): The client instance this context is associated with.
        """
        super().__init__(webserver, client)  # Call to the superclass constructor
        self.language = "en"
        self.wikiId = "or"
        self.markup_names = ["-", "bibtex", "scite", "smw"]
        self.markup_name = self.markup_names[1]

    def configure_menu(self):
        """
        configure additional non-standard menu entries
        """
        # self.link_button(name='Scholars',icon_name='account-school',target='/scholars')
        pass

    def createItemLink(self, item, term: str, index: int) -> str:
        """
        create a link for the given item

        Args:
            item(Node): the item to create a link for
            term(str): the
        """
        if index > 0:
            style = "color:grey"
            text = f"{term}<sub>{index+1}</sub>"
            delim = "&nbsp"
        else:
            style = ""
            text = term
            delim = ""
        link = Link.create(
            item.browser_url(), text, tooltip=item.label, target="_blank", style=style
        )
        if item.concept.name == "Scholar":
            if hasattr(item, "orcid"):
                orcid = ORCID(item.orcid)
                link += orcid.asHtml()
        markup = delim + link
        return markup

    async def onSearchButton(self, _msg):
        """
        handle button to search for terms
        """
        try:
            self.results.content = ""
            self.markup.content = ""
            terms = self.searchTerms.value.split("\n")
            self.messages.content = "Searching"
            delim = ""
            for term in terms:
                if term:
                    msg = f"... {term}\n"
                    self.messages.content += msg
                    if self.markup_name == "-":
                        self.options.markup_names = []
                    else:
                        self.options.markup_names = [self.markup_name]
                    search_result = self.sotsog.search([term], self.options)
                    items = search_result.items
                    rmarkup = ""
                    if len(items) == 0:
                        # TODO check google search
                        # https://pypi.org/project/googlesearch-python/
                        params = parse.urlencode({"q": term})
                        search_url = f"https://www.google.com/search?{params}"
                        rmarkup = Link.create(
                            search_url,
                            term,
                            "not found",
                            target="_blank",
                            style="color:red",
                        )
                    else:
                        for i, item in enumerate(items):
                            rmarkup += self.createItemLink(item, term, i)
                            if len(item.markups) > 0:
                                markups = ""
                                for _markup_name, markup in item.markups.items():
                                    markups += markup
                                    self.markup.content += f"<pre>{markups}</pre>"
                                    # break
                    self.results.content += delim + rmarkup
                    delim = "<br>"

        except BaseException as ex:
            self.handle_exception(ex)

    def addLanguageSelect(self):
        """
        add a language selector
        """
        lang_dict = Lang.get_language_dict()
        self.add_select("language:", lang_dict).bind_value(self, "language")

    def addWikiUserSelect(self):
        """
        add a wiki user selector
        """
        if len(self.wikiUsers) > 0:
            wu_dict = {}
            for wikiUser in sorted(self.wikiUsers):
                wu_dict[wikiUser] = wikiUser
            self.add_select("wiki:", wu_dict).bind_value(self, "wikiId")

    async def scholars(self, client: Client):
        """
        scholar display

        """
        self.setup_menu()
        with ui.element("div").classes("w-full h-full"):
            try:
                self.scholarsGrid = ScholarGrid(
                    self, self.wikiUsers, self.wikiId, sparql=self.sparql
                )
                # @TODO refactor the two setup calls to one to hide wdgrid details
                # self.scholarsGrid.setup(a=self.rowB, header=self.rowA)
                # self.scholarsGrid.wdgrid.setup(a=self.rowC)
            except BaseException as ex:
                self.handle_exception(ex)
        await self.setup_footer()

    def configure_settings(self):
        """
        configure settings
        """
        self.addLanguageSelect()
        self.addWikiUserSelect()

    async def home(self, _client: Client):
        """
        provide the main content page

        """
        self.setup_menu()
        with ui.element("div").classes("w-full h-full"):
            with ui.splitter() as splitter:
                with splitter.before:
                    self.add_select("markup", self.markup_names).bind_value(
                        self, "markup_name"
                    )
                    self.searchTerms = ui.textarea(placeholder="enter search terms")
                    self.searchButton = ui.button(
                        "search", on_click=self.onSearchButton
                    )
                with splitter.after:
                    self.markup = ui.html()
            self.messages = ui.html()
            self.results = ui.html()
        await self.setup_footer()

`smw`

Created on 22.11.2022

@author: wf

`SemWiki`

access to Semantic mediawiki

Source code in skg/smw.py

class SemWiki:
    """
    access to Semantic mediawiki
    """

    def __init__(self, wikiUser: WikiUser, withLogin: bool = None):
        """

        constructor

        Args:
            wikiUser:WikiUser
        """
        self.wikiUser = wikiUser
        self.wikiClient = WikiClient.ofWikiId(wikiUser.wikiId)
        if withLogin is None:
            withLogin = self.wikiClient.needsLogin()
        if withLogin:
            self.wikiClient.login()
        self.smw = SMWClient(self.wikiClient.getSite())

    def id_refs(
        self,
        mainlabel="pageTitle",
        condition="DOI::+",
        title: str = "DOI references",
        askExtra: str = "",
        id_prop="DOI",
        id_name="doi",
    ) -> list:
        """
        get a list of id references from the given wiki

        Args:
            mainlabel(str): the mainlabel to use
            condition(str): the condition to apply
            title(str): the title of the query
        """
        ask = f"""{{{{#ask:[[{condition}]]{askExtra}
|?{id_prop}={id_name}
|mainlabel={mainlabel}
|?Creation_date=creationDate
|?Modification_date=modificationDate
|?Last_editor_is=lastEditor
}}}}
"""
        refs = self.smw.query(ask, title)
        return refs

    def papers(self):
        """
        get the paper records
        """
        askExtra = """\n|?Citation_text=reference"""
        paper_records = self.id_refs(
            condition="Citation_text::+", title="doi paper referencs", askExtra=askExtra
        )
        return paper_records

    def scholars(self):
        """
        get scholars
        """
        condition = "Concept:Scholar"
        mainlabel = "Scholar"
        askExtra = """|?Scholar wikiDataId = wikiDataId
|?Scholar name = name
|?Scholar firstName = firstName
|?Scholar description = description
|?Scholar homepage = homepage
|?Scholar orcid = orcid
|?Scholar dblpId = dblpId
|?Scholar linkedInId = linkedInId
|?Scholar researchGate = researchGate
|?Scholar gndId = gndId
|?Scholar smartCRMId = smartCRMId
|sort=Scholar name,Scholar firstName
|order=ascending,ascending
"""
        scholars = self.id_refs(
            mainlabel,
            condition,
            "scholars",
            askExtra,
            "Scholar wikiDataId",
            "wikiDataId",
        )
        return scholars

    @classmethod
    def asMarkup(self, scholar) -> str:
        """
        return the markup for the given scholar

        Args:
            scholar(Node): the scholar
        Returns:
            str: the semantic mediawiki markup
        """
        markup = "{{Scholar"

        for prop_name, prop in scholar.concept.props.items():
            if prop.hasmap("smw"):
                smw_prop = prop.getmap("smw")
                if hasattr(scholar, prop_name):
                    value = getattr(scholar, prop_name)
                    # @TODO refactor
                    qid = Wikidata.getQid(value)
                    if value != qid:
                        # potential lookup need
                        if prop_name != "wikiDataId":
                            value = Wikidata.getLabelForQid(qid)
                        else:
                            value = qid
                    markup += f"\n|{smw_prop}={value}"
        markup += "\n}}"
        return markup

`init(wikiUser, withLogin=None)`

constructor

Parameters:

Name	Type	Description	Default
`wikiUser`	`WikiUser`	WikiUser	required

Source code in skg/smw.py

def __init__(self, wikiUser: WikiUser, withLogin: bool = None):
    """

    constructor

    Args:
        wikiUser:WikiUser
    """
    self.wikiUser = wikiUser
    self.wikiClient = WikiClient.ofWikiId(wikiUser.wikiId)
    if withLogin is None:
        withLogin = self.wikiClient.needsLogin()
    if withLogin:
        self.wikiClient.login()
    self.smw = SMWClient(self.wikiClient.getSite())

`asMarkup(scholar)` `classmethod`

return the markup for the given scholar

Parameters:

Name	Type	Description	Default
`scholar(Node)`		the scholar	required

Returns: str: the semantic mediawiki markup

Source code in skg/smw.py

@classmethod
def asMarkup(self, scholar) -> str:
    """
    return the markup for the given scholar

    Args:
        scholar(Node): the scholar
    Returns:
        str: the semantic mediawiki markup
    """
    markup = "{{Scholar"

    for prop_name, prop in scholar.concept.props.items():
        if prop.hasmap("smw"):
            smw_prop = prop.getmap("smw")
            if hasattr(scholar, prop_name):
                value = getattr(scholar, prop_name)
                # @TODO refactor
                qid = Wikidata.getQid(value)
                if value != qid:
                    # potential lookup need
                    if prop_name != "wikiDataId":
                        value = Wikidata.getLabelForQid(qid)
                    else:
                        value = qid
                markup += f"\n|{smw_prop}={value}"
    markup += "\n}}"
    return markup

`id_refs(mainlabel='pageTitle', condition='DOI::+', title='DOI references', askExtra='', id_prop='DOI', id_name='doi')`

get a list of id references from the given wiki

Parameters:

Name	Description	Default
`mainlabel(str)`	the mainlabel to use	required
`condition(str)`	the condition to apply	required
`title(str)`	the title of the query	required

Source code in skg/smw.py

    def id_refs(
        self,
        mainlabel="pageTitle",
        condition="DOI::+",
        title: str = "DOI references",
        askExtra: str = "",
        id_prop="DOI",
        id_name="doi",
    ) -> list:
        """
        get a list of id references from the given wiki

        Args:
            mainlabel(str): the mainlabel to use
            condition(str): the condition to apply
            title(str): the title of the query
        """
        ask = f"""{{{{#ask:[[{condition}]]{askExtra}
|?{id_prop}={id_name}
|mainlabel={mainlabel}
|?Creation_date=creationDate
|?Modification_date=modificationDate
|?Last_editor_is=lastEditor
}}}}
"""
        refs = self.smw.query(ask, title)
        return refs

`papers()`

get the paper records

Source code in skg/smw.py

def papers(self):
    """
    get the paper records
    """
    askExtra = """\n|?Citation_text=reference"""
    paper_records = self.id_refs(
        condition="Citation_text::+", title="doi paper referencs", askExtra=askExtra
    )
    return paper_records

`scholars()`

get scholars

Source code in skg/smw.py

    def scholars(self):
        """
        get scholars
        """
        condition = "Concept:Scholar"
        mainlabel = "Scholar"
        askExtra = """|?Scholar wikiDataId = wikiDataId
|?Scholar name = name
|?Scholar firstName = firstName
|?Scholar description = description
|?Scholar homepage = homepage
|?Scholar orcid = orcid
|?Scholar dblpId = dblpId
|?Scholar linkedInId = linkedInId
|?Scholar researchGate = researchGate
|?Scholar gndId = gndId
|?Scholar smartCRMId = smartCRMId
|sort=Scholar name,Scholar firstName
|order=ascending,ascending
"""
        scholars = self.id_refs(
            mainlabel,
            condition,
            "scholars",
            askExtra,
            "Scholar wikiDataId",
            "wikiDataId",
        )
        return scholars

`sotsog`

Created on 2022-11-16

@author: wf