Skip to content

snapquery API Documentation

basequeryview

Created on 2024-06-23 @author: wf

BaseQueryView

general search for queries

Source code in snapquery/basequeryview.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class BaseQueryView:
    """
    general search for queries
    """

    def __init__(self, solution: WebSolution, debug: bool = False):
        self.solution = solution
        self.nqm = self.solution.nqm
        self.debug = debug
        self.setup_ui()

    def setup_ui(self):
        """
        setup my user interface
        """
        with ui.row().classes("w-full items-baseline") as self.header_row:
            ui.label("Available Queries").classes("text-xl")
            ui.label("select a query to view and execute").classes("text-slate-400")

        self.query_selector = QuerySelector(self.solution, self.on_search_change)
        self.search_result_row = ui.row()
        self.debouncer = DebouncerUI(parent=self.search_result_row, delay=0.65, debug=self.debug)

        ui.timer(0.0, self.on_search_change, once=True)

    async def on_search_change(self, _args=None):
        """
        react on changes in the search input
        """
        await self.debouncer.debounce(self.perform_search)

    async def perform_search(self):
        """
        Performs the search based on the current QuerySelector values.
        """
        try:
            qn = self.query_selector.qn
            name_like = f"{qn.name}%"
            namespace_like = f"{qn.namespace}%"
            domain_like = f"{qn.domain}%"
            sql_query = """SELECT 
            * 
            FROM NamedQuery 
            WHERE 
                name LIKE ? 
                AND namespace LIKE ? 
                AND domain LIKE ?"""
            self.q_lod = self.nqm.sql_db.query(sql_query, (name_like, namespace_like, domain_like))
            self.show_lod(self.q_lod)
        except Exception as ex:
            self.solution.handle_exception(ex)

    def show_lod(self, q_lod: List):
        """
        show the given list of dicts
        """
        self.search_result_row.clear()
        view_lod = []
        for record in self.q_lod:
            nq = NamedQuery.from_record(record)
            vr = nq.as_viewrecord()
            view_lod.append(vr)
        with self.search_result_row:
            self.search_result_grid = ListOfDictsGrid()
            ui.notify(f"found {len(q_lod)} queries")
            self.search_result_grid.load_lod(view_lod)
        self.search_result_row.update()

on_search_change(_args=None) async

react on changes in the search input

Source code in snapquery/basequeryview.py
41
42
43
44
45
async def on_search_change(self, _args=None):
    """
    react on changes in the search input
    """
    await self.debouncer.debounce(self.perform_search)

Performs the search based on the current QuerySelector values.

Source code in snapquery/basequeryview.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
async def perform_search(self):
    """
    Performs the search based on the current QuerySelector values.
    """
    try:
        qn = self.query_selector.qn
        name_like = f"{qn.name}%"
        namespace_like = f"{qn.namespace}%"
        domain_like = f"{qn.domain}%"
        sql_query = """SELECT 
        * 
        FROM NamedQuery 
        WHERE 
            name LIKE ? 
            AND namespace LIKE ? 
            AND domain LIKE ?"""
        self.q_lod = self.nqm.sql_db.query(sql_query, (name_like, namespace_like, domain_like))
        self.show_lod(self.q_lod)
    except Exception as ex:
        self.solution.handle_exception(ex)

setup_ui()

setup my user interface

Source code in snapquery/basequeryview.py
27
28
29
30
31
32
33
34
35
36
37
38
39
def setup_ui(self):
    """
    setup my user interface
    """
    with ui.row().classes("w-full items-baseline") as self.header_row:
        ui.label("Available Queries").classes("text-xl")
        ui.label("select a query to view and execute").classes("text-slate-400")

    self.query_selector = QuerySelector(self.solution, self.on_search_change)
    self.search_result_row = ui.row()
    self.debouncer = DebouncerUI(parent=self.search_result_row, delay=0.65, debug=self.debug)

    ui.timer(0.0, self.on_search_change, once=True)

show_lod(q_lod)

show the given list of dicts

Source code in snapquery/basequeryview.py
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def show_lod(self, q_lod: List):
    """
    show the given list of dicts
    """
    self.search_result_row.clear()
    view_lod = []
    for record in self.q_lod:
        nq = NamedQuery.from_record(record)
        vr = nq.as_viewrecord()
        view_lod.append(vr)
    with self.search_result_row:
        self.search_result_grid = ListOfDictsGrid()
        ui.notify(f"found {len(q_lod)} queries")
        self.search_result_grid.load_lod(view_lod)
    self.search_result_row.update()

ceurws

Created on 2024-07-02 @author: wf

CeurWSQueries

A class to handle the extraction and management of CEUR-WS queries.

Source code in snapquery/ceurws.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
class CeurWSQueries:
    """
    A class to handle the extraction and management of CEUR-WS queries.
    """

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """
        Constructor
        Args:
            nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
            debug (bool): Enable debug output. Defaults to False.
        """
        self.nqm = nqm
        self.named_query_set = NamedQuerySet(
            domain="ceur-ws.org",
            namespace="challenge",
            target_graph_name="wikidata",
        )
        self.debug = debug
        self.wiki_id = "cr"
        self.wiki_client = WikiClient.ofWikiId(self.wiki_id)
        self.smw = SMWClient(self.wiki_client.getSite())

    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the CEUR-WS challenge wiki.
        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        if limit:
            limitclause = f"|limit={limit}"
        else:
            limitclause = ""
        ask_query = f"""{{{{#ask: [[Concept:Query]]
|mainlabel=Query
|?Query id=id
|?Query name=name
|?Query title=title
|?Query tryiturl=tryiturl
|?Query wdqsurl=wdqsurl
|?Query scholia=scholia
|?Query relevance=relevance
|?Query task=task
|?Query sparql=sparql
{limitclause}
|sort=Query task,Query id
|order=ascending
}}}}"""
        query_results = self.smw.query(ask_query)
        for _page_title, query_data in query_results.items():
            # Extract values into local variables for easier debugging
            name = query_data.get("name")
            url = query_data.get("wdqsurl")
            if not url:
                continue
            title = query_data.get("title")
            description = query_data.get("task")
            sparql = query_data.get("sparql")
            if url:
                url = f"https://w.wiki/{url}"
            tryiturl = query_data.get("tryiturl")
            if tryiturl:
                tryiturl = f"https://qlever.cs.uni-freiburg.de/wikidata/{tryiturl}"
            comment = f"qlever tryit url: {tryiturl}" if tryiturl else None
            named_query = NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=url,
                title=title,
                description=description,
                sparql=sparql,
                comment=comment,
            )
            self.named_query_set.queries.append(named_query)

            if self.debug:
                print(".", end="", flush=True)
                if len(self.named_query_set.queries) % 80 == 0:
                    print(f"{len(self.named_query_set.queries)}")

        if self.debug:
            print(f"\nFound {len(self.named_query_set.queries)} CEUR-WS challenge queries")

    def save_to_json(self, file_path: str = "/tmp/ceurws-queries.json"):
        """
        Save the NamedQueryList to a JSON file.
        Args:
            file_path (str): Path to the JSON file.
        """
        self.named_query_set.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_set)

__init__(nqm, debug=False)

Constructor Args: nqm (NamedQueryManager): The NamedQueryManager to use for storing queries. debug (bool): Enable debug output. Defaults to False.

Source code in snapquery/ceurws.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """
    Constructor
    Args:
        nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
        debug (bool): Enable debug output. Defaults to False.
    """
    self.nqm = nqm
    self.named_query_set = NamedQuerySet(
        domain="ceur-ws.org",
        namespace="challenge",
        target_graph_name="wikidata",
    )
    self.debug = debug
    self.wiki_id = "cr"
    self.wiki_client = WikiClient.ofWikiId(self.wiki_id)
    self.smw = SMWClient(self.wiki_client.getSite())

extract_queries(limit=None)

Extract all queries from the CEUR-WS challenge wiki. Args: limit (int, optional): Limit the number of queries fetched. Defaults to None.

Source code in snapquery/ceurws.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the CEUR-WS challenge wiki.
        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        if limit:
            limitclause = f"|limit={limit}"
        else:
            limitclause = ""
        ask_query = f"""{{{{#ask: [[Concept:Query]]
|mainlabel=Query
|?Query id=id
|?Query name=name
|?Query title=title
|?Query tryiturl=tryiturl
|?Query wdqsurl=wdqsurl
|?Query scholia=scholia
|?Query relevance=relevance
|?Query task=task
|?Query sparql=sparql
{limitclause}
|sort=Query task,Query id
|order=ascending
}}}}"""
        query_results = self.smw.query(ask_query)
        for _page_title, query_data in query_results.items():
            # Extract values into local variables for easier debugging
            name = query_data.get("name")
            url = query_data.get("wdqsurl")
            if not url:
                continue
            title = query_data.get("title")
            description = query_data.get("task")
            sparql = query_data.get("sparql")
            if url:
                url = f"https://w.wiki/{url}"
            tryiturl = query_data.get("tryiturl")
            if tryiturl:
                tryiturl = f"https://qlever.cs.uni-freiburg.de/wikidata/{tryiturl}"
            comment = f"qlever tryit url: {tryiturl}" if tryiturl else None
            named_query = NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=url,
                title=title,
                description=description,
                sparql=sparql,
                comment=comment,
            )
            self.named_query_set.queries.append(named_query)

            if self.debug:
                print(".", end="", flush=True)
                if len(self.named_query_set.queries) % 80 == 0:
                    print(f"{len(self.named_query_set.queries)}")

        if self.debug:
            print(f"\nFound {len(self.named_query_set.queries)} CEUR-WS challenge queries")

save_to_json(file_path='/tmp/ceurws-queries.json')

Save the NamedQueryList to a JSON file. Args: file_path (str): Path to the JSON file.

Source code in snapquery/ceurws.py
 96
 97
 98
 99
100
101
102
def save_to_json(self, file_path: str = "/tmp/ceurws-queries.json"):
    """
    Save the NamedQueryList to a JSON file.
    Args:
        file_path (str): Path to the JSON file.
    """
    self.named_query_set.save_to_json_file(file_path, indent=2)

store_queries()

Store the named queries into the database.

Source code in snapquery/ceurws.py
104
105
106
107
108
def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_set)

dblp

Created on 2024-06-07

@author: wf

DblpPersonLookup

lookup persons in dblp

Source code in snapquery/dblp.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class DblpPersonLookup:
    """
    lookup persons in dblp
    """

    def __init__(self, nqm: NamedQueryManager, endpoint_name: str = "dblp"):
        self.nqm = nqm
        self.endpoint_name = endpoint_name

    def search(self, name_part: str, limit: int = 10) -> List[Person]:
        """
        search persons by part of their name using a SPARQL query with regex.

        Args:
            name_part (str): The part of the name to search for.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of Person objects.
        """
        named_query = NamedQuery(
            domain="dblp.org",
            namespace="pid-lookup",
            name="person-by-name-part",
            title="Lookup persons with a name matching a pattern",
            description="Search for persons by matching part of their name using regex",
            sparql="""# snapquery person lookup by name part
SELECT DISTINCT 
  ?author 
  ?label 
  ?dblp_author_id 
  ?wikidata_id 
  ?orcid_id
WHERE {
  ?author a dblp:Person.
  ?author rdfs:label ?label.
  FILTER regex(?label, "{{ name_regex }}", "i")
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier.
    ?identifier datacite:usesIdentifierScheme datacite:dblp.
    ?identifier litre:hasLiteralValue ?dblp_author_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier2.
    ?identifier2 datacite:usesIdentifierScheme datacite:wikidata.
    ?identifier2 litre:hasLiteralValue ?wikidata_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier3.
    ?identifier3 datacite:usesIdentifierScheme datacite:orcid.
    ?identifier3 litre:hasLiteralValue ?orcid_id.
  }
}
            """,
        )
        params_dict = {"name_regex": name_part}

        person_lod, _stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            endpoint_name=self.endpoint_name,
            limit=limit,
            with_stats=False,
        )
        persons = []
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                wikidata_id=pr.get("wikidata_id"),
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
            )
            person.parse_label()
            persons.append(person)
        return persons

search(name_part, limit=10)

search persons by part of their name using a SPARQL query with regex.

Parameters:

Name Type Description Default
name_part str

The part of the name to search for.

required
limit int

The maximum number of results to return.

10

Returns:

Type Description
List[Person]

List[Person]: A list of Person objects.

Source code in snapquery/dblp.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
    def search(self, name_part: str, limit: int = 10) -> List[Person]:
        """
        search persons by part of their name using a SPARQL query with regex.

        Args:
            name_part (str): The part of the name to search for.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of Person objects.
        """
        named_query = NamedQuery(
            domain="dblp.org",
            namespace="pid-lookup",
            name="person-by-name-part",
            title="Lookup persons with a name matching a pattern",
            description="Search for persons by matching part of their name using regex",
            sparql="""# snapquery person lookup by name part
SELECT DISTINCT 
  ?author 
  ?label 
  ?dblp_author_id 
  ?wikidata_id 
  ?orcid_id
WHERE {
  ?author a dblp:Person.
  ?author rdfs:label ?label.
  FILTER regex(?label, "{{ name_regex }}", "i")
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier.
    ?identifier datacite:usesIdentifierScheme datacite:dblp.
    ?identifier litre:hasLiteralValue ?dblp_author_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier2.
    ?identifier2 datacite:usesIdentifierScheme datacite:wikidata.
    ?identifier2 litre:hasLiteralValue ?wikidata_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier3.
    ?identifier3 datacite:usesIdentifierScheme datacite:orcid.
    ?identifier3 litre:hasLiteralValue ?orcid_id.
  }
}
            """,
        )
        params_dict = {"name_regex": name_part}

        person_lod, _stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            endpoint_name=self.endpoint_name,
            limit=limit,
            with_stats=False,
        )
        persons = []
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                wikidata_id=pr.get("wikidata_id"),
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
            )
            person.parse_label()
            persons.append(person)
        return persons

endpoint

Created on 29.06.2024 @author: wf

Endpoint

A query endpoint for SPARQL, SQL or other storage systems

Source code in snapquery/endpoint.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@lod_storable
class Endpoint:
    """
    A query endpoint for SPARQL, SQL or other storage systems
    """

    name: str
    endpoint: str
    lang: str = "sparql"
    website: Optional[str] = None
    database: Optional[str] = None
    method: Optional[str] = "POST"
    prefixes: Optional[str] = None
    auth: Optional[str] = None
    user: Optional[str] = None
    password: Optional[str] = None

    def __post_init__(self):
        """
        Perform post-initialization processing if needed.
        """
        pass

    @classmethod
    def get_samples(cls) -> dict[str, List["Endpoint"]]:
        """
        Get samples for Endpoint
        """
        samples = {
            "sample-endpoints": [
                cls(
                    name="wikidata",
                    lang="sparql",
                    endpoint="https://query.wikidata.org/sparql",
                    website="https://query.wikidata.org/",
                    database="blazegraph",
                    method="POST",
                    prefixes="PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
                ),
                cls(
                    name="dbis-jena",
                    lang="sparql",
                    endpoint="https://confident.dbis.rwth-aachen.de/jena/",
                    website="https://confident.dbis.rwth-aachen.de",
                    auth="BASIC",
                    user="secret",
                    password="#not public - example not usable for access#",
                ),
            ]
        }
        return samples

__post_init__()

Perform post-initialization processing if needed.

Source code in snapquery/endpoint.py
29
30
31
32
33
def __post_init__(self):
    """
    Perform post-initialization processing if needed.
    """
    pass

get_samples() classmethod

Get samples for Endpoint

Source code in snapquery/endpoint.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@classmethod
def get_samples(cls) -> dict[str, List["Endpoint"]]:
    """
    Get samples for Endpoint
    """
    samples = {
        "sample-endpoints": [
            cls(
                name="wikidata",
                lang="sparql",
                endpoint="https://query.wikidata.org/sparql",
                website="https://query.wikidata.org/",
                database="blazegraph",
                method="POST",
                prefixes="PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
            ),
            cls(
                name="dbis-jena",
                lang="sparql",
                endpoint="https://confident.dbis.rwth-aachen.de/jena/",
                website="https://confident.dbis.rwth-aachen.de",
                auth="BASIC",
                user="secret",
                password="#not public - example not usable for access#",
            ),
        ]
    }
    return samples

EndpointManager

Manages the storage and retrieval of Endpoint configurations.

Source code in snapquery/endpoint.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
@lod_storable
class EndpointManager:
    """
    Manages the storage and retrieval of
    Endpoint configurations.
    """

    endpoints: Dict[str, Endpoint] = field(default_factory=dict)

    @classmethod
    def get_yaml_path(cls) -> str:
        samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        yaml_path = os.path.join(samples_path, "endpoints.yaml")
        return yaml_path

    def get_endpoint(self, name: str) -> Endpoint:
        """
        Retrieve an endpoint by name.
        """
        return self.endpoints.get(name)

    def __len__(self):
        return len(self.endpoints)

    def __iter__(self):
        return iter(self.endpoints.values())

get_endpoint(name)

Retrieve an endpoint by name.

Source code in snapquery/endpoint.py
80
81
82
83
84
def get_endpoint(self, name: str) -> Endpoint:
    """
    Retrieve an endpoint by name.
    """
    return self.endpoints.get(name)

error_filter

Created on 2024-05-06

@author: wf

ErrorFilter

handle technical error message to retrieve user friendly content

Source code in snapquery/error_filter.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
class ErrorFilter:
    """
    handle technical error message to
    retrieve user friendly content
    """

    def __init__(self, raw_error_message: str):
        self.raw_error_message = raw_error_message
        self.category = self.categorize_error()
        self.filtered_message = self._extract_relevant_info()

    def categorize_error(self) -> str:
        """
        Categorizes the error message into predefined types.

        Returns:
            str: The category of the error message.
        """
        if self.raw_error_message is None:
            return None

        lower_error_msg = self.raw_error_message.lower()
        # Todo: query is often part of the error message when these keywords are used within the query the classification fails.
        if (
            "query timeout after" in lower_error_msg
            or "timeoutexception" in lower_error_msg
            or "query has timed out" in lower_error_msg
            or "http error 504" in lower_error_msg
        ):
            return "Timeout"
        elif (
            "syntax error" in lower_error_msg
            or "invalid sparql query" in lower_error_msg
            or "querybadformed" in lower_error_msg
        ):
            return "Syntax Error"
        elif "connection error" in lower_error_msg:
            return "Connection Error"
        elif "access denied" in lower_error_msg:
            return "Authorization Error"
        elif (
            "service unavailable" in lower_error_msg
            or "service temporarily unavailable" in lower_error_msg
            or "http error 503" in lower_error_msg
        ):
            return "Service Unavailable"
        elif "too many requests" in lower_error_msg or "http error 429" in lower_error_msg:
            return "Too Many Requests"
        elif "bad gateway" in lower_error_msg or "http error 502" in lower_error_msg:
            return "Bad Gateway"
        elif "endpointinternalerror" in lower_error_msg:
            return "EndPointInternalError"
        else:
            return "Other"

    def _extract_relevant_info(self) -> str:
        """
        Extract relevant information from the given raw error message.
        Identifies and processes different error message formats.
        """
        if not self.raw_error_message:
            return None

        if "SPARQL-QUERY:" in self.raw_error_message:
            return self._extract_sparql_error()
        elif self.raw_error_message.startswith("QueryBadFormed:") and "Virtuoso" in self.raw_error_message:
            return self._extract_virtuoso_error()
        elif self.raw_error_message.startswith("QueryBadFormed:"):
            return self._extract_triply_db_error()
        elif "Not supported:" in self.raw_error_message:
            return self._extract_qlever_error()
        elif "Invalid SPARQL query" in self.raw_error_message:
            return self._extract_invalid_sparql_error()
        else:
            if self.category == "Timeout":
                return "Query has timed out."
            message_json = self._get_error_message_json()
            if message_json and isinstance(message_json, dict) and "exception" in message_json:
                return message_json.get("exception")
            return "Error: Unrecognized error format."

    def _extract_sparql_error(self) -> str:
        """
        Specifically extract and format SPARQL error messages.
        """
        if "java.util.concurrent.TimeoutException" in self.raw_error_message:
            return "Query has timed out."
        sparql_start_token = "SPARQL-QUERY:"
        sparql_end_token = "java.util.concurrent.ExecutionException"
        sparql_query = self._extract_message_between_tokens(sparql_start_token, sparql_end_token)
        error_log_start = sparql_end_token
        error_log_start_idx = self.raw_error_message.find(error_log_start)
        error_log_end_idx = self.raw_error_message.find("\\tat", error_log_start_idx)
        error_message = self.raw_error_message[error_log_start_idx:error_log_end_idx]
        if error_message:
            return error_message.split("Exception:")[-1].encode("utf-8").decode("unicode_escape").strip()
        else:
            return "Error: SPARQL query information is incomplete."

    def _extract_qlever_error(self) -> str:
        """
        Specifically extract and format QLever error messages.
        """
        start_idx = self.raw_error_message.find("Not supported:")
        if start_idx != -1:
            end_idx = self.raw_error_message.find("}", start_idx)
            error_message = self.raw_error_message[start_idx : end_idx + 1].strip()
            return f"QLever error:\n{error_message}"
        else:
            return "Error: QLever error information is incomplete."

    def _extract_virtuoso_error(self) -> str:
        """
        Specifically extract and format virtuoso error messages.
        Returns:

        """
        start_token = "Response: b'"
        end_token = "SPARQL query:"
        message = self._extract_message_between_tokens(start_token, end_token)
        if message:
            return message
        else:
            return "Error: Virtuoso error information is incomplete."

    def _extract_triply_db_error(self) -> str:
        """
        Specifically extract and format TriplyDB error messages.
        Returns:

        """
        message_json = self._get_error_message_json()
        if message_json and "message" in message_json:
            return message_json.get("message")
        elif message_json and "exception" in message_json:
            return message_json.get("exception")
        else:
            return "Error: TriplyDB error information is incomplete."

    def _get_error_message_json(self) -> Union[dict, None]:
        """
        Try to extract the json record from the raw error message.
        """
        start_token = "Response:\nb'"
        stat_idx = self.raw_error_message.find(start_token)
        end_idx = -1
        message_json_raw = self.raw_error_message[stat_idx + len(start_token) : end_idx].strip()
        try:
            message_json = json.loads(message_json_raw.encode().decode("unicode_escape"))
        except JSONDecodeError as e:
            message_json = None
        return message_json

    def _extract_message_between_tokens(self, start_token: str, end_token: str) -> Union[str, None]:
        """
        Extract and format message between tokens.
        Args:
            start_token:
            end_token:

        Returns:

        """
        start_idx = self.raw_error_message.find(start_token)
        end_idx = self.raw_error_message.find(end_token)
        message = None
        if start_idx != -1 and end_idx != -1:
            message = self.raw_error_message[start_idx:end_idx]
            message = message[len(start_token) :]
            message = message.strip()
        return message

    def _extract_invalid_sparql_error(self) -> str:
        """
        Specifically extract and format Invalid SPARQL query error messages.
        """
        error_start = self.raw_error_message.find("Invalid SPARQL query")
        if error_start != -1:
            error_msg = self.raw_error_message[error_start:].split("\n")[0]
            return f"Invalid SPARQL query error:\n{error_msg}"
        else:
            return "Error: Invalid SPARQL query information is incomplete."

    def get_message(self, for_html: bool = True) -> str:
        """
        get the filtered message
        """
        filtered_msg = self.filtered_message
        if filtered_msg:
            filtered_msg = filtered_msg.replace("\\n", "\n")
            if for_html:
                filtered_msg = filtered_msg.replace("\n", "<br>\n")
        return filtered_msg

categorize_error()

Categorizes the error message into predefined types.

Returns:

Name Type Description
str str

The category of the error message.

Source code in snapquery/error_filter.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def categorize_error(self) -> str:
    """
    Categorizes the error message into predefined types.

    Returns:
        str: The category of the error message.
    """
    if self.raw_error_message is None:
        return None

    lower_error_msg = self.raw_error_message.lower()
    # Todo: query is often part of the error message when these keywords are used within the query the classification fails.
    if (
        "query timeout after" in lower_error_msg
        or "timeoutexception" in lower_error_msg
        or "query has timed out" in lower_error_msg
        or "http error 504" in lower_error_msg
    ):
        return "Timeout"
    elif (
        "syntax error" in lower_error_msg
        or "invalid sparql query" in lower_error_msg
        or "querybadformed" in lower_error_msg
    ):
        return "Syntax Error"
    elif "connection error" in lower_error_msg:
        return "Connection Error"
    elif "access denied" in lower_error_msg:
        return "Authorization Error"
    elif (
        "service unavailable" in lower_error_msg
        or "service temporarily unavailable" in lower_error_msg
        or "http error 503" in lower_error_msg
    ):
        return "Service Unavailable"
    elif "too many requests" in lower_error_msg or "http error 429" in lower_error_msg:
        return "Too Many Requests"
    elif "bad gateway" in lower_error_msg or "http error 502" in lower_error_msg:
        return "Bad Gateway"
    elif "endpointinternalerror" in lower_error_msg:
        return "EndPointInternalError"
    else:
        return "Other"

get_message(for_html=True)

get the filtered message

Source code in snapquery/error_filter.py
194
195
196
197
198
199
200
201
202
203
def get_message(self, for_html: bool = True) -> str:
    """
    get the filtered message
    """
    filtered_msg = self.filtered_message
    if filtered_msg:
        filtered_msg = filtered_msg.replace("\\n", "\n")
        if for_html:
            filtered_msg = filtered_msg.replace("\n", "<br>\n")
    return filtered_msg

execution

Created on 2024-07-09

@author: wf

Execution

supports execution of named queries

Source code in snapquery/execution.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
class Execution:
    """
    supports execution of named queries
    """

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """ """
        self.nqm = nqm
        self.debug = debug
        self.logger = logging.getLogger("snapquery.execution.Execution")

    def parameterize(self, nq: NamedQuery):
        qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
        # Execute the query
        params_dict = {}
        if qd.params == "q":
            # use Tim Berners-Lee as a example
            params_dict = {"q": "Q80"}
            pass
        return qd, params_dict

    def execute(
        self,
        nq: NamedQuery,
        endpoint_name: str,
        title: str,
        context: str = "test",
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ):
        """
        execute the given named query
        """
        qd, params_dict = self.parameterize(nq)
        self.logger.debug(f"{title}: {nq.name} {qd} - via {endpoint_name}")
        _results, stats = self.nqm.execute_query(
            nq, params_dict=params_dict, endpoint_name=endpoint_name, prefix_merger=prefix_merger
        )
        stats.context = context
        self.nqm.store_stats([stats])
        msg = f"{title} executed:"
        if not stats.records:
            msg += f"error {stats.filtered_msg}"
        else:
            msg += f"{stats.records} records found"
        self.logger.debug(msg)

__init__(nqm, debug=False)

Source code in snapquery/execution.py
17
18
19
20
21
def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """ """
    self.nqm = nqm
    self.debug = debug
    self.logger = logging.getLogger("snapquery.execution.Execution")

execute(nq, endpoint_name, title, context='test', prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)

execute the given named query

Source code in snapquery/execution.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def execute(
    self,
    nq: NamedQuery,
    endpoint_name: str,
    title: str,
    context: str = "test",
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
):
    """
    execute the given named query
    """
    qd, params_dict = self.parameterize(nq)
    self.logger.debug(f"{title}: {nq.name} {qd} - via {endpoint_name}")
    _results, stats = self.nqm.execute_query(
        nq, params_dict=params_dict, endpoint_name=endpoint_name, prefix_merger=prefix_merger
    )
    stats.context = context
    self.nqm.store_stats([stats])
    msg = f"{title} executed:"
    if not stats.records:
        msg += f"error {stats.filtered_msg}"
    else:
        msg += f"{stats.records} records found"
    self.logger.debug(msg)

graph

Created on 27.06.2024

@author: wf

Graph

A class representing a graph with its basic properties.

Source code in snapquery/graph.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@lod_storable
class Graph:
    """
    A class representing a graph with its basic properties.
    """

    name: str
    default_endpoint_name: str
    description: str
    url: str
    comment: str = ""

    def __post_init__(self):
        """
        Perform post-initialization processing if needed.
        """
        pass

    @classmethod
    def get_samples(cls) -> dict[str, "Graph"]:
        """
        get samples for Graph
        """
        samples = {
            "graphs": [
                cls(
                    name="wikidata",
                    default_endpoint_name="wikidata",
                    description="Wikidata knowledge graph",
                    url="https://query.wikidata.org/sparql",
                    comment="Main Wikidata endpoint",
                ),
                cls(
                    name="dblp",
                    default_endpoint_name="dblp",
                    description="DBLP computer science bibliography",
                    url="https://qlever.cs.uni-freiburg.de/api/dblp",
                    comment="DBLP endpoint powered by QLever",
                ),
            ]
        }
        return samples

__post_init__()

Perform post-initialization processing if needed.

Source code in snapquery/graph.py
25
26
27
28
29
def __post_init__(self):
    """
    Perform post-initialization processing if needed.
    """
    pass

get_samples() classmethod

get samples for Graph

Source code in snapquery/graph.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
@classmethod
def get_samples(cls) -> dict[str, "Graph"]:
    """
    get samples for Graph
    """
    samples = {
        "graphs": [
            cls(
                name="wikidata",
                default_endpoint_name="wikidata",
                description="Wikidata knowledge graph",
                url="https://query.wikidata.org/sparql",
                comment="Main Wikidata endpoint",
            ),
            cls(
                name="dblp",
                default_endpoint_name="dblp",
                description="DBLP computer science bibliography",
                url="https://qlever.cs.uni-freiburg.de/api/dblp",
                comment="DBLP endpoint powered by QLever",
            ),
        ]
    }
    return samples

GraphManager

Manages the storage and retrieval of Graph configurations.

Source code in snapquery/graph.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
@lod_storable
class GraphManager:
    """
    Manages the storage and retrieval of
    Graph configurations.
    """

    graphs: Dict[str, Graph] = field(default_factory=dict)

    @classmethod
    def get_yaml_path(cls) -> str:
        samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        yaml_path = os.path.join(samples_path, "graphs.yaml")
        return yaml_path

    def get_graph(self, name: str) -> Graph:
        """
        Retrieve a graph by name.
        """
        return self.graphs.get(name)

    def __len__(self):
        return len(self.graphs)

    def __iter__(self):
        return iter(self.graphs.values())

get_graph(name)

Retrieve a graph by name.

Source code in snapquery/graph.py
72
73
74
75
76
def get_graph(self, name: str) -> Graph:
    """
    Retrieve a graph by name.
    """
    return self.graphs.get(name)

models

person

Created 2023 refactored to snapquery by WF 2024-05

@author: th

Affiliation dataclass

affiliation of a person

Source code in snapquery/models/person.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
@dataclass
class Affiliation:
    """
    affiliation of a person
    """
    name: Optional[str] = None
    location: Optional[str] = None
    country: Optional[str] = None
    wikidata_id: Optional[str] = None

    @property
    def ui_label(self) -> str:
        if not self.name:
            return "❓"  # empty
        else:
            return self.name

Person dataclass

Bases: PersonName

A person

Source code in snapquery/models/person.py
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
@dataclass
class Person(PersonName):
    """
    A person
    """

    wikidata_id: Optional[str] = None
    dblp_author_id: Optional[str] = None
    orcid_id: Optional[str] = None
    image: Optional[str] = None
    affiliation: Optional[List[Affiliation]] = None
    official_website: Optional[str] = None

    @property
    def has_pid(self) -> bool:
        """
        Checks if the scholar has any persistent identifier (PID) set.
        """
        return any([self.wikidata_id, self.dblp_author_id, self.orcid_id])

    def share_identifier(self, other: "Person") -> bool:
        """
        Check if the given person shares an identifier with this person.
        Args:

            other: another person

        Returns:
            true if the person shares an identifier, false otherwise
        """
        share_id = False
        if self.wikidata_id is not None and other.wikidata_id == self.wikidata_id:
            share_id = True
        elif self.dblp_author_id is not None and other.dblp_author_id == self.dblp_author_id:
            share_id = True
        elif self.orcid_id is not None and other.orcid_id == self.orcid_id:
            share_id = True
        return share_id

    def merge_with(self, other: "Person"):
        """
        Merge this person with another person.
        Args:
            other: person to merge with
        """
        for field in fields(self):
            value = getattr(self, field.name)
            if value is None:
                value = getattr(other, field.name)
            setattr(self, field.name, value)
has_pid: bool property

Checks if the scholar has any persistent identifier (PID) set.

merge_with(other)

Merge this person with another person. Args: other: person to merge with

Source code in snapquery/models/person.py
102
103
104
105
106
107
108
109
110
111
112
def merge_with(self, other: "Person"):
    """
    Merge this person with another person.
    Args:
        other: person to merge with
    """
    for field in fields(self):
        value = getattr(self, field.name)
        if value is None:
            value = getattr(other, field.name)
        setattr(self, field.name, value)
share_identifier(other)

Check if the given person shares an identifier with this person. Args:

other: another person

Returns:

Type Description
bool

true if the person shares an identifier, false otherwise

Source code in snapquery/models/person.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def share_identifier(self, other: "Person") -> bool:
    """
    Check if the given person shares an identifier with this person.
    Args:

        other: another person

    Returns:
        true if the person shares an identifier, false otherwise
    """
    share_id = False
    if self.wikidata_id is not None and other.wikidata_id == self.wikidata_id:
        share_id = True
    elif self.dblp_author_id is not None and other.dblp_author_id == self.dblp_author_id:
        share_id = True
    elif self.orcid_id is not None and other.orcid_id == self.orcid_id:
        share_id = True
    return share_id

PersonName dataclass

person name handling

Source code in snapquery/models/person.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
@dataclass
class PersonName:    
    """
    person name handling
    """    
    label: Optional[str] = None
    given_name: Optional[str] = None
    family_name: Optional[str] = None

    @property
    def name(self) -> str:
        if not self.given_name and not self.family_name:
            return "❓"  # empty
        elif not self.given_name:
            return self.family_name
        elif not self.family_name:
            return self.given_name
        else:
            return f"{self.given_name} {self.family_name}"

    @property
    def ui_label(self) -> str:
        return self.name

    def parse_label(self):
        """
        get family name and given name from label
        """
        if self.label:
            human_name=HumanName(self.label)
            if not self.family_name and human_name.last:
                self.family_name=human_name.last
            if not self.given_name and human_name.first:
                self.given_name=human_name.first
parse_label()

get family name and given name from label

Source code in snapquery/models/person.py
52
53
54
55
56
57
58
59
60
61
def parse_label(self):
    """
    get family name and given name from label
    """
    if self.label:
        human_name=HumanName(self.label)
        if not self.family_name and human_name.last:
            self.family_name=human_name.last
        if not self.given_name and human_name.first:
            self.given_name=human_name.first

mwlogin

Created on 04.05.2024

@author: wf

Login

login to mediawiki

Source code in snapquery/mwlogin.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class Login:
    """
    login to mediawiki
    """

    def __init__(
        self,
        consumer_key,
        consumer_secret,
        wiki_url="https://en.wikipedia.org/w/index.php",
    ):
        self.consumer_token = ConsumerToken(consumer_key, consumer_secret)
        self.handshaker = Handshaker(wiki_url, self.consumer_token)
        self.request_token = None
        self.access_token = None

    def initiate_login(self):
        """
        Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user
        """
        redirect, self.request_token = self.handshaker.initiate()
        webbrowser.open(redirect)
        print("Browser opened to MediaWiki login page. Please authorize the application.")

    def complete_login(self, response_qs):
        """
        Step 3: Complete -- obtain authorized key/secret for "resource owner"
        """
        self.access_token = self.handshaker.complete(self.request_token, response_qs)
        print("Login completed successfully.")

    def identify_user(self):
        """
        Step 4: Identify -- (optional) get identifying information about the user
        """
        if self.access_token:
            identity = self.handshaker.identify(self.access_token)
            print(f"Identified as {identity['username']}.")
        else:
            print("Access token is not available. Please complete the login process first.")

complete_login(response_qs)

Step 3: Complete -- obtain authorized key/secret for "resource owner"

Source code in snapquery/mwlogin.py
37
38
39
40
41
42
def complete_login(self, response_qs):
    """
    Step 3: Complete -- obtain authorized key/secret for "resource owner"
    """
    self.access_token = self.handshaker.complete(self.request_token, response_qs)
    print("Login completed successfully.")

identify_user()

Step 4: Identify -- (optional) get identifying information about the user

Source code in snapquery/mwlogin.py
44
45
46
47
48
49
50
51
52
def identify_user(self):
    """
    Step 4: Identify -- (optional) get identifying information about the user
    """
    if self.access_token:
        identity = self.handshaker.identify(self.access_token)
        print(f"Identified as {identity['username']}.")
    else:
        print("Access token is not available. Please complete the login process first.")

initiate_login()

Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user

Source code in snapquery/mwlogin.py
29
30
31
32
33
34
35
def initiate_login(self):
    """
    Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user
    """
    redirect, self.request_token = self.handshaker.initiate()
    webbrowser.open(redirect)
    print("Browser opened to MediaWiki login page. Please authorize the application.")

namespace_stats_view

Created on 2024-06-23

@author: wf

NamespaceStatsView

Class to view and manage SPARQL query statistics using NiceGUI.

Attributes:

Name Type Description
solution WebSolution

The web solution context which provides access to shared resources.

nqm NamedQueryManager

The manager to handle named queries and database interactions.

results_row row

UI component that holds the results grid.

lod_grid ListOfDictsGrid

Grid component to display the query statistics.

Source code in snapquery/namespace_stats_view.py
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class NamespaceStatsView:
    """Class to view and manage SPARQL query statistics using NiceGUI.

    Attributes:
        solution (WebSolution): The web solution context which provides access to shared resources.
        nqm (NamedQueryManager): The manager to handle named queries and database interactions.
        results_row (ui.row): UI component that holds the results grid.
        lod_grid (ListOfDictsGrid): Grid component to display the query statistics.
    """

    def __init__(self, solution: WebSolution):
        """Initialize the NamespaceStatsView with a given web solution context.

        Args:
            solution (WebSolution): The web solution context which includes shared resources like the NamedQueryManager.
        """
        self.solution = solution
        self.nqm = self.solution.nqm
        self.progress_bar: Optional[NiceguiProgressbar] = None
        self.lod_grid: Optional[ListOfDictsGrid] = None
        self.setup_ui()

    def setup_ui(self):
        """Sets up the user interface for displaying SPARQL query statistics."""
        with ui.row() as self.progress_row:
            self.progress_bar = NiceguiProgressbar(desc="Query Progress", total=100, unit="queries")
            self.progress_bar.progress.classes("rounded")
        with ui.row() as self.results_row:
            ui.label("Legend: ✅ Distinct Successful Queries  ❌ Distinct Failed Queries  🔄 Total Successful Runs")
            self.lod_grid = ListOfDictsGrid()
            # Set up a click event handler for the grid
            self.lod_grid.ag_grid.on("cellClicked", self.on_cell_clicked)

        # Fetch and display data immediately upon UI setup
        ui.timer(0.0, self.on_fetch_lod, once=True)

    async def on_cell_clicked(self, event):
        """Handle cell click events to perform specific actions based on the cell content."""
        # Retrieve details from the event object
        logger.debug(f"Cell clicked: {event}")
        row_data = event.args["data"]
        endpoint_name = event.args["colId"]
        namespace = row_data["namespace"]
        domain = row_data["domain"]
        if endpoint_name in self.nqm.endpoints.keys():
            if self.solution.webserver.authenticated():
                await run.io_bound(
                    self.execute_queries,
                    namespace=namespace,
                    endpoint_name=endpoint_name,
                    domain=domain,
                )
            else:
                ui.notify("you must be admin to run queries via the web interface")
        else:
            # this should not be possible
            ui.notify(f"invalid endpoint {endpoint_name}")

    async def on_fetch_lod(self, _args=None):
        """Fetches data asynchronously and loads it into the grid upon successful retrieval."""
        try:
            stats_lod = await run.io_bound(self.fetch_query_lod)
            processed_lod = self.process_stats_lod(stats_lod)
            with self.results_row:
                self.lod_grid.load_lod(processed_lod)
                self.lod_grid.update()
        except Exception as ex:
            self.solution.handle_exception(ex)

    def fetch_query_lod(self) -> List[Dict[str, any]]:
        """Fetch data from the database based on the named query 'query_success_by_namespace'.

        Returns:
            List[Dict[str, any]]: A list of dictionaries containing the query results.
        """
        query_name = "query_namespace_endpoint_matrix_with_distinct"
        query = self.nqm.meta_qm.queriesByName[query_name]
        return self.nqm.sql_db.query(query.query)

    def process_stats_lod(self, raw_lod: List[Dict[str, any]]) -> List[Dict[str, any]]:
        """Process the raw list of dictionaries to format suitable for the grid display.

        Args:
            raw_lod (List[Dict[str, any]]): The raw data fetched from the SQL query.

        Returns:
            List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.
        """
        domain_namespace_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
        endpoints = list(self.nqm.endpoints.keys())
        total_queries = {}

        for entry in raw_lod:
            domain = entry["domain"]
            namespace = entry["namespace"]
            endpoint = entry["endpoint_name"]
            distinct_successful = entry.get("distinct_successful", 0)
            distinct_failed = entry.get("distinct_failed", 0)
            success_count = entry["success_count"]
            total_queries[(domain, namespace)] = entry["total"]
            domain_namespace_stats[domain][namespace][endpoint] = [
                distinct_successful,
                distinct_failed,
                success_count,
            ]

        processed_lod = []
        for domain, namespaces in domain_namespace_stats.items():
            for namespace, counts in namespaces.items():
                row = {
                    "domain": domain,
                    "namespace": namespace,
                    "total": total_queries[(domain, namespace)],
                }
                for endpoint in endpoints:
                    success, fail, total = counts.get(endpoint, [0, 0, 0])
                    if success == 0 and fail == 0 and total == 0:
                        row[endpoint] = ""
                    else:
                        row[endpoint] = f"✅{success}{fail} 🔄{total}"
                processed_lod.append(row)

        return processed_lod

    def execute_queries(self, namespace: str, endpoint_name: str, domain: str):
        """execute queries with progress updates.
        Args:
            namespace (str): The namespace of the queries to execute.
            endpoint_name (str): The endpoint name where the queries will be executed.
            domain: domain name
        """
        queries = self.nqm.get_all_queries(namespace=namespace, domain=domain)
        total_queries = len(queries)

        self.progress_bar.total = total_queries
        self.progress_bar.reset()
        execution = Execution(self.nqm)
        for i, nq in enumerate(queries, start=1):
            with self.progress_row:
                self.progress_bar.update_value(i)
                self.progress_bar.set_description(f"Executing {nq.name} on {endpoint_name}")
                logger.debug(f"Executing {nq.name} on {endpoint_name}")
            execution.execute(nq, endpoint_name, title=f"query {i}/{len(queries)}::{endpoint_name}", context="web-test")
        with self.progress_row:
            ui.timer(0.1, self.on_fetch_lod, once=True)
            ui.notify(
                f"finished {total_queries} queries for namespace: {namespace} with domain: {domain}",
                type="positive",
            )

__init__(solution)

Initialize the NamespaceStatsView with a given web solution context.

Parameters:

Name Type Description Default
solution WebSolution

The web solution context which includes shared resources like the NamedQueryManager.

required
Source code in snapquery/namespace_stats_view.py
31
32
33
34
35
36
37
38
39
40
41
def __init__(self, solution: WebSolution):
    """Initialize the NamespaceStatsView with a given web solution context.

    Args:
        solution (WebSolution): The web solution context which includes shared resources like the NamedQueryManager.
    """
    self.solution = solution
    self.nqm = self.solution.nqm
    self.progress_bar: Optional[NiceguiProgressbar] = None
    self.lod_grid: Optional[ListOfDictsGrid] = None
    self.setup_ui()

execute_queries(namespace, endpoint_name, domain)

execute queries with progress updates. Args: namespace (str): The namespace of the queries to execute. endpoint_name (str): The endpoint name where the queries will be executed. domain: domain name

Source code in snapquery/namespace_stats_view.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def execute_queries(self, namespace: str, endpoint_name: str, domain: str):
    """execute queries with progress updates.
    Args:
        namespace (str): The namespace of the queries to execute.
        endpoint_name (str): The endpoint name where the queries will be executed.
        domain: domain name
    """
    queries = self.nqm.get_all_queries(namespace=namespace, domain=domain)
    total_queries = len(queries)

    self.progress_bar.total = total_queries
    self.progress_bar.reset()
    execution = Execution(self.nqm)
    for i, nq in enumerate(queries, start=1):
        with self.progress_row:
            self.progress_bar.update_value(i)
            self.progress_bar.set_description(f"Executing {nq.name} on {endpoint_name}")
            logger.debug(f"Executing {nq.name} on {endpoint_name}")
        execution.execute(nq, endpoint_name, title=f"query {i}/{len(queries)}::{endpoint_name}", context="web-test")
    with self.progress_row:
        ui.timer(0.1, self.on_fetch_lod, once=True)
        ui.notify(
            f"finished {total_queries} queries for namespace: {namespace} with domain: {domain}",
            type="positive",
        )

fetch_query_lod()

Fetch data from the database based on the named query 'query_success_by_namespace'.

Returns:

Type Description
List[Dict[str, any]]

List[Dict[str, any]]: A list of dictionaries containing the query results.

Source code in snapquery/namespace_stats_view.py
90
91
92
93
94
95
96
97
98
def fetch_query_lod(self) -> List[Dict[str, any]]:
    """Fetch data from the database based on the named query 'query_success_by_namespace'.

    Returns:
        List[Dict[str, any]]: A list of dictionaries containing the query results.
    """
    query_name = "query_namespace_endpoint_matrix_with_distinct"
    query = self.nqm.meta_qm.queriesByName[query_name]
    return self.nqm.sql_db.query(query.query)

on_cell_clicked(event) async

Handle cell click events to perform specific actions based on the cell content.

Source code in snapquery/namespace_stats_view.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
async def on_cell_clicked(self, event):
    """Handle cell click events to perform specific actions based on the cell content."""
    # Retrieve details from the event object
    logger.debug(f"Cell clicked: {event}")
    row_data = event.args["data"]
    endpoint_name = event.args["colId"]
    namespace = row_data["namespace"]
    domain = row_data["domain"]
    if endpoint_name in self.nqm.endpoints.keys():
        if self.solution.webserver.authenticated():
            await run.io_bound(
                self.execute_queries,
                namespace=namespace,
                endpoint_name=endpoint_name,
                domain=domain,
            )
        else:
            ui.notify("you must be admin to run queries via the web interface")
    else:
        # this should not be possible
        ui.notify(f"invalid endpoint {endpoint_name}")

on_fetch_lod(_args=None) async

Fetches data asynchronously and loads it into the grid upon successful retrieval.

Source code in snapquery/namespace_stats_view.py
79
80
81
82
83
84
85
86
87
88
async def on_fetch_lod(self, _args=None):
    """Fetches data asynchronously and loads it into the grid upon successful retrieval."""
    try:
        stats_lod = await run.io_bound(self.fetch_query_lod)
        processed_lod = self.process_stats_lod(stats_lod)
        with self.results_row:
            self.lod_grid.load_lod(processed_lod)
            self.lod_grid.update()
    except Exception as ex:
        self.solution.handle_exception(ex)

process_stats_lod(raw_lod)

Process the raw list of dictionaries to format suitable for the grid display.

Parameters:

Name Type Description Default
raw_lod List[Dict[str, any]]

The raw data fetched from the SQL query.

required

Returns:

Type Description
List[Dict[str, any]]

List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.

Source code in snapquery/namespace_stats_view.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
def process_stats_lod(self, raw_lod: List[Dict[str, any]]) -> List[Dict[str, any]]:
    """Process the raw list of dictionaries to format suitable for the grid display.

    Args:
        raw_lod (List[Dict[str, any]]): The raw data fetched from the SQL query.

    Returns:
        List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.
    """
    domain_namespace_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
    endpoints = list(self.nqm.endpoints.keys())
    total_queries = {}

    for entry in raw_lod:
        domain = entry["domain"]
        namespace = entry["namespace"]
        endpoint = entry["endpoint_name"]
        distinct_successful = entry.get("distinct_successful", 0)
        distinct_failed = entry.get("distinct_failed", 0)
        success_count = entry["success_count"]
        total_queries[(domain, namespace)] = entry["total"]
        domain_namespace_stats[domain][namespace][endpoint] = [
            distinct_successful,
            distinct_failed,
            success_count,
        ]

    processed_lod = []
    for domain, namespaces in domain_namespace_stats.items():
        for namespace, counts in namespaces.items():
            row = {
                "domain": domain,
                "namespace": namespace,
                "total": total_queries[(domain, namespace)],
            }
            for endpoint in endpoints:
                success, fail, total = counts.get(endpoint, [0, 0, 0])
                if success == 0 and fail == 0 and total == 0:
                    row[endpoint] = ""
                else:
                    row[endpoint] = f"✅{success}{fail} 🔄{total}"
            processed_lod.append(row)

    return processed_lod

setup_ui()

Sets up the user interface for displaying SPARQL query statistics.

Source code in snapquery/namespace_stats_view.py
43
44
45
46
47
48
49
50
51
52
53
54
55
def setup_ui(self):
    """Sets up the user interface for displaying SPARQL query statistics."""
    with ui.row() as self.progress_row:
        self.progress_bar = NiceguiProgressbar(desc="Query Progress", total=100, unit="queries")
        self.progress_bar.progress.classes("rounded")
    with ui.row() as self.results_row:
        ui.label("Legend: ✅ Distinct Successful Queries  ❌ Distinct Failed Queries  🔄 Total Successful Runs")
        self.lod_grid = ListOfDictsGrid()
        # Set up a click event handler for the grid
        self.lod_grid.ag_grid.on("cellClicked", self.on_cell_clicked)

    # Fetch and display data immediately upon UI setup
    ui.timer(0.0, self.on_fetch_lod, once=True)

orcid

OrcidAccessToken

orcid access token response

Source code in snapquery/orcid.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
@lod_storable
class OrcidAccessToken:
    """
    orcid access token response
    """

    orcid: str
    access_token: str
    token_type: str
    refresh_token: str
    expires_in: int
    scope: str
    name: str
    login_timestamp: int = int(time())

    @classmethod
    def get_samples(cls):
        lod = [
            {
                "access_token": "f5af9f51-07e6-4332-8f1a-c0c11c1e3728",
                "token_type": "bearer",
                "refresh_token": "f725f747-3a65-49f6-a231-3e8944ce464d",
                "expires_in": 631138518,
                "scope": "/activities/update /read-limited",
                "name": "Sofia Garcia",
                "orcid": "0000-0001-2345-6789",
            }
        ]
        return [OrcidAccessToken.from_dict2(d) for d in lod]

OrcidAuth

authenticate with orcid

Source code in snapquery/orcid.py
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
class OrcidAuth:
    """
    authenticate with orcid
    """

    def __init__(
        self,
        base_path: Optional[Path] = None,
        config_file_name: str = "orcid_config.yaml",
    ):
        if base_path is None:
            base_path = Path.home() / ".solutions/snapquery"
        self.base_path = base_path
        self.config_file_name = config_file_name
        self.config = self.load_config()

    def get_config_path(self) -> Path:
        return self.base_path / self.config_file_name

    def config_exists(self):
        return self.get_config_path().exists()

    def available(self) -> bool:
        return self.config is not None

    def load_config(self) -> Union["OrcidConfig", None]:
        if not self.config_exists():
            return None
        config = OrcidConfig.load_from_yaml_file(str(self.get_config_path()))
        return config

    def authenticate_url(self):
        return self.config.authenticate_url()

    def authenticated(self) -> bool:
        authenticated = False
        if not self.available():
            return authenticated
        orcid_token = self.get_cached_user_access_token()
        if orcid_token is not None:
            authenticated = self._check_access_token(orcid_token)
        return authenticated

    def get_cached_user_access_token(self) -> Union["OrcidAccessToken", None]:
        orcid_token_record = app.storage.user.get("orcid_token", None)
        orcid_token = None
        if orcid_token_record:
            orcid_token: OrcidAccessToken = OrcidAccessToken.from_dict2(orcid_token_record)
        return orcid_token

    def _check_access_token(self, orcid_token: "OrcidAccessToken") -> bool:
        """
        Check if the given access token is valid
        Args:
            orcid_token: orcid access token

        Returns:
            True if the access token is valid, False otherwise
        """
        time_passed = int(time()) - orcid_token.login_timestamp
        if orcid_token.expires_in - time_passed < 0:
            return False
        else:
            return True

    def login(self, access_code: str) -> bool:
        authenticated = False
        try:
            orcid_token = self._retrieve_token(access_code)
            app.storage.user.update({"orcid_token": asdict(orcid_token)})
            authenticated = True
        except Exception as e:
            print(e)
            raise e
        return authenticated

    def _retrieve_token(self, code: str) -> "OrcidAccessToken":
        """
        URL=https://sandbox.orcid.org/oauth/token
         HEADER: Accept: application/json
         HEADER: Content-Type: application/x-www-form-urlencoded
         METHOD: POST
         DATA:
           client_id=[Your client ID]
           client_secret=[Your client secret]
           grant_type=authorization_code
           code=Six-digit code
           redirect_uri=[Your landing page]
        """
        url = f"{self.config.url}/oauth/token"
        data = {
            "client_id": self.config.client_id,
            "client_secret": self.config.client_secret,
            "grant_type": "authorization_code",
            "code": code,
        }
        resp = requests.post(url, data=data)
        resp.raise_for_status()
        resp_json = resp.json()
        orcid_token: OrcidAccessToken = OrcidAccessToken.from_dict2(resp_json)
        return orcid_token

    def logout(self):
        """
        logout user by deleting cached access token
        """
        del app.storage.user["orcid_token"]

    def _request_search_token(self) -> str:
        """
        Request search token
        see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/

        URL=https://sandbox.orcid.org/oauth/token
          HEADER: Accept: application/json
          METHOD: POST
          DATA:
            client_id=[Your public API client ID]
            client_secret=[Your public API secret]
            grant_type=client_credentials
            scope=/read-public
        Returns:

        """
        url = f"{self.config.url}/oauth/token"
        data = {
            "client_id": self.config.client_id,
            "client_secret": self.config.client_secret,
            "grant_type": "client_credentials",
            "scope": "/read-public",
        }
        resp = requests.post(url, data=data)
        resp.raise_for_status()
        resp_json = resp.json()
        return resp_json["access_token"]

    @property
    def search_token(self) -> str:
        if self.config.search_token is None:
            search_token = self._request_search_token()
            self.config.search_token = search_token
            self.store_config()
        return self.config.search_token

    def store_config(self):
        self.config.save_to_yaml_file(str(self.get_config_path()))

    def search(self, params: "OrcidSearchParams", limit: int = 10) -> list[Person]:
        access_token = self.search_token
        url = f"{self.config.api_endpoint}/expanded-search/?q={params.get_search_query()}&rows={limit}"
        headers = {
            "Accept": "application/json",
            "Authorization": f"Bearer {access_token}",
        }
        resp = requests.get(url, headers=headers)
        resp.raise_for_status()
        records: list[dict] = resp.json().get("expanded-result", [])
        persons = []
        if records:
            for record in records:
                person = Person(
                    given_name=record.get("given-names", None),
                    family_name=record.get("family-names", None),
                    orcid_id=record.get("orcid-id", None),
                )
                persons.append(person)
        return persons

logout()

logout user by deleting cached access token

Source code in snapquery/orcid.py
115
116
117
118
119
def logout(self):
    """
    logout user by deleting cached access token
    """
    del app.storage.user["orcid_token"]

OrcidConfig

orcid authentication configuration

Source code in snapquery/orcid.py
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
@lod_storable
class OrcidConfig:
    """
    orcid authentication configuration
    """

    url: str
    client_id: str
    client_secret: str
    redirect_uri: str = "http://127.0.0.1:9862/orcid_callback"
    api_endpoint: str = "https://pub.orcid.org/v3.0"
    search_token: Optional[str] = None

    @classmethod
    def get_samples(cls) -> list["OrcidConfig"]:
        lod = [
            {
                "url": "https://orcid.org",
                "client_id": "APP-123456789ABCDEFG",
                "client_secret": "<KEY>",
                "redirect_uri": "http://127.0.0.1:9862/orcid_callback",
                "api_endpoint": "https://sandbox.orcid.org/v3.0",
            }
        ]
        return [OrcidConfig.from_dict2(d) for d in lod]

    def authenticate_url(self):
        return f"{self.url}/oauth/authorize?client_id={self.client_id}&response_type=code&scope=/authenticate&redirect_uri={self.redirect_uri}"

OrcidSearchParams dataclass

Orcid search api params see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/

Source code in snapquery/orcid.py
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
@dataclass
class OrcidSearchParams:
    """
    Orcid search api params
    see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/
    """

    # Biographical data
    given_names: Optional[str] = None
    family_name: Optional[str] = None
    credit_name: Optional[str] = None
    other_names: Optional[list[str]] = None
    email: Optional[str] = None
    keyword: Optional[list[str]] = None
    external_id_reference: Optional[str] = None

    # Affiliations data
    affiliation_org_name: Optional[str] = None
    grid_org_id: Optional[str] = None
    ror_org_id: Optional[str] = None
    ringgold_org_id: Optional[str] = None

    # Funding data
    funding_titles: Optional[list[str]] = None
    fundref_org_id: Optional[str] = None
    grant_numbers: Optional[list[str]] = None

    # Research activities data
    work_titles: Optional[list[str]] = None
    digital_object_ids: Optional[list[str]] = None

    # ORCID record data
    orcid: Optional[str] = None
    profile_submission_date: Optional[str] = None  # Assuming date format is string
    profile_last_modified_date: Optional[str] = None  # Assuming date format is string

    # All data (default for Lucene syntax)
    text: Optional[str] = None

    def get_search_query(self) -> str:
        query = ""
        dlim = ""
        for field in fields(self):
            key = field.name.replace("_", "-")
            value = getattr(self, field.name)
            if value is None:
                continue
            query += f"{key}:{value}"
            dlim = "+"
        return query

params_view

Created on 06.05.2024

@author: wf

ParamsView

a view for Query Parameters

Source code in snapquery/params_view.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class ParamsView:
    """
    a view for Query Parameters
    """

    def __init__(self, solution, params: Params):
        """
        construct me with the given solution and params
        """
        self.solution = solution
        self.params = params

    def open(self):
        """
        show the details of the dict edit
        """
        self.dict_edit.expansion.open()

    def close(self):
        """
        hide the details of the dict edit
        """
        self.dict_edit.expansion.close()

    def get_dict_edit(self) -> DictEdit:
        """
        Return a DictEdit instance for editing parameters.
        """
        # Define a custom form definition for the title "Params"
        form_ui_def = FormUiDef(
            title="Params",
            icon="tune",
            ui_fields={key: FieldUiDef.from_key_value(key, value) for key, value in self.params.params_dict.items()},
        )
        self.dict_edit = DictEdit(data_to_edit=self.params.params_dict, form_ui_def=form_ui_def)
        self.open()
        return self.dict_edit

__init__(solution, params)

construct me with the given solution and params

Source code in snapquery/params_view.py
15
16
17
18
19
20
def __init__(self, solution, params: Params):
    """
    construct me with the given solution and params
    """
    self.solution = solution
    self.params = params

close()

hide the details of the dict edit

Source code in snapquery/params_view.py
28
29
30
31
32
def close(self):
    """
    hide the details of the dict edit
    """
    self.dict_edit.expansion.close()

get_dict_edit()

Return a DictEdit instance for editing parameters.

Source code in snapquery/params_view.py
34
35
36
37
38
39
40
41
42
43
44
45
46
def get_dict_edit(self) -> DictEdit:
    """
    Return a DictEdit instance for editing parameters.
    """
    # Define a custom form definition for the title "Params"
    form_ui_def = FormUiDef(
        title="Params",
        icon="tune",
        ui_fields={key: FieldUiDef.from_key_value(key, value) for key, value in self.params.params_dict.items()},
    )
    self.dict_edit = DictEdit(data_to_edit=self.params.params_dict, form_ui_def=form_ui_def)
    self.open()
    return self.dict_edit

open()

show the details of the dict edit

Source code in snapquery/params_view.py
22
23
24
25
26
def open(self):
    """
    show the details of the dict edit
    """
    self.dict_edit.expansion.open()

person_selector

Created 2023

@author: th

PersonSelector

Provides an interface for searching and selecting people with auto-suggestions.

Source code in snapquery/person_selector.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
class PersonSelector:
    """
    Provides an interface for searching and selecting people with auto-suggestions.
    """

    def __init__(
        self,
        solution: WebSolution,
        selection_callback: Callable[[Person], Any],
        limit: int = 10,
    ):
        """
        Constructor
        """
        # parameters
        self.solution = solution
        self.selection_callback = selection_callback
        self.limit = limit
        # instance variables
        self.suggested_persons: List[Person] = []
        self.selected_person: Optional[Person] = None
        self.suggestion_view: Optional[ui.element] = None
        self.search_name = ""
        self.person_lookup = PersonLookup(nqm=solution.webserver.nqm)
        self.selection_btn: Optional[Button] = None
        self.debouncer_ui = DebouncerUI(parent=self.solution.container, debug=True)
        self.person_selection()

    @ui.refreshable
    def person_selection(self):
        """
        Display input fields for person data with auto-suggestion
        """
        person = self.selected_person if self.selected_person else Person()
        with ui.element("row").classes("w-full h-full"):
            with ui.splitter().classes("h-full  w-full") as splitter:
                with splitter.before:
                    with ui.row() as self.top_row:
                        pass
                    with ui.card() as self.selection_card:
                        with ui.row():
                            self.label = ui.label("Name or Pid:")
                        with ui.row():
                            self.name_input = ui.input(
                                label="name",
                                placeholder="Tim Berners-Lee",
                                on_change=self.suggest_persons,
                                value=self.search_name,
                            ).props("size=60")
                        with ui.row():
                            self.identifier_input = ui.input(
                                label="PID",
                                placeholder="Q80",
                                on_change=self.check_pid,
                                value=person.wikidata_id,
                            ).props("size=20")
                        # if self.selection_btn is None:
                        self.selection_btn = ui.button(text="Continue", on_click=self.btn_selection_callback)
                        self.selection_btn.disable()
            with splitter.after:
                with ui.element("column").classes(" w-full h-full gap-2"):
                    self.suggestion_view = ui.column().classes("rounded-md border-2 p-3")

    async def btn_selection_callback(self):
        person = Person()
        pid_value = PIDs().pid4id(self.identifier_input.value)
        if pid_value.pid.name == "Wikidata":
            person.wikidata_id = self.identifier_input.value
        elif pid_value.pid.name == "dblp":
            person.dblp_id = self.identifier_input.value
        elif pid_value.pid.name == "ORCID":
            person.orcid_id = self.identifier_input.value
        person.label = self.name_input.value
        self.selection_callback(person)

    async def check_pid(self):
        pid = PIDs().pid4id(self.identifier_input.value)
        if pid is not None and pid.is_valid() and self.selection_btn is not None:
            self.selection_btn.enable()
        elif self.selection_btn:
            self.selection_btn.disable()

    def clear_suggested_persons(self):
        self.suggested_persons = []
        self.update_suggestions_view()

    async def suggest_persons(self):
        """
        Use debouncer to
        suggest potential persons based on the input.
        """
        await self.debouncer_ui.debounce(self.load_person_suggestions, self.name_input.value)

    async def load_person_suggestions(self, search_name: str):
        """
        Load person suggestions based on the search name.
        This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

        Args:
            search_name(str): the search name to search for
        """
        if len(search_name) < 4:  # Skip querying for very short input strings.
            return
        try:
            self.clear_suggested_persons()
            tasks = [
                asyncio.to_thread(self.person_lookup.suggest_from_wikidata, search_name, self.limit),
                asyncio.to_thread(self.person_lookup.suggest_from_orcid, search_name, self.limit),
                asyncio.to_thread(self.person_lookup.suggest_from_dblp, search_name, self.limit),
            ]
            for future in asyncio.as_completed(tasks):
                new_persons = await future
                self.merge_and_update_suggestions(new_persons)
                self.update_suggestions_view()
        except Exception as ex:
            self.solution.handle_exception(ex)

    def merge_and_update_suggestions(self, new_persons: List[Person]):
        """
        Merges new persons with existing ones based on shared identifiers or adds them if unique.
        Ensures no duplicates are present in the list of suggested persons.

        Args:
            new_persons (List[Person]): New person suggestions to be added or merged.
        """
        for new_person in new_persons:
            merged = False
            for existing_person in self.suggested_persons:
                if existing_person.share_identifier(new_person):
                    existing_person.merge_with(new_person)
                    merged = True
                    break
            if not merged:
                self.suggested_persons.append(new_person)

    def update_suggestions_view(self):
        """
        update the suggestions view
        """
        if self.suggestion_view:
            self.suggestion_view.clear()
            with self.suggestion_view:
                with ui.list().props("bordered separator"):
                    ui.item_label("Suggestions").props("header").classes("text-bold")
                    ui.separator()
                    for person in self.suggested_persons[: self.limit]:
                        PersonSuggestion(person=person, on_select=self.selection_callback)

                    if len(self.suggested_persons) > self.limit:
                        with ui.item():
                            ui.label(
                                f"{'>' if len(self.suggested_persons) >= 10000 else ''}{len(self.suggested_persons)} matches are available..."
                            )
            return []

    def select_person_suggestion(self, person: Person):
        """
        Select the given Person by updating the input fields to the selected person and storing the object internally
        Args:
            person: person that should be selected
        """
        self.selected_person = person
        self.person_selection.refresh()
        self.suggested_persons = [person]
        self.update_suggestions_list()

__init__(solution, selection_callback, limit=10)

Constructor

Source code in snapquery/person_selector.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def __init__(
    self,
    solution: WebSolution,
    selection_callback: Callable[[Person], Any],
    limit: int = 10,
):
    """
    Constructor
    """
    # parameters
    self.solution = solution
    self.selection_callback = selection_callback
    self.limit = limit
    # instance variables
    self.suggested_persons: List[Person] = []
    self.selected_person: Optional[Person] = None
    self.suggestion_view: Optional[ui.element] = None
    self.search_name = ""
    self.person_lookup = PersonLookup(nqm=solution.webserver.nqm)
    self.selection_btn: Optional[Button] = None
    self.debouncer_ui = DebouncerUI(parent=self.solution.container, debug=True)
    self.person_selection()

load_person_suggestions(search_name) async

Load person suggestions based on the search name. This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

Parameters:

Name Type Description Default
search_name(str)

the search name to search for

required
Source code in snapquery/person_selector.py
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
async def load_person_suggestions(self, search_name: str):
    """
    Load person suggestions based on the search name.
    This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

    Args:
        search_name(str): the search name to search for
    """
    if len(search_name) < 4:  # Skip querying for very short input strings.
        return
    try:
        self.clear_suggested_persons()
        tasks = [
            asyncio.to_thread(self.person_lookup.suggest_from_wikidata, search_name, self.limit),
            asyncio.to_thread(self.person_lookup.suggest_from_orcid, search_name, self.limit),
            asyncio.to_thread(self.person_lookup.suggest_from_dblp, search_name, self.limit),
        ]
        for future in asyncio.as_completed(tasks):
            new_persons = await future
            self.merge_and_update_suggestions(new_persons)
            self.update_suggestions_view()
    except Exception as ex:
        self.solution.handle_exception(ex)

merge_and_update_suggestions(new_persons)

Merges new persons with existing ones based on shared identifiers or adds them if unique. Ensures no duplicates are present in the list of suggested persons.

Parameters:

Name Type Description Default
new_persons List[Person]

New person suggestions to be added or merged.

required
Source code in snapquery/person_selector.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
def merge_and_update_suggestions(self, new_persons: List[Person]):
    """
    Merges new persons with existing ones based on shared identifiers or adds them if unique.
    Ensures no duplicates are present in the list of suggested persons.

    Args:
        new_persons (List[Person]): New person suggestions to be added or merged.
    """
    for new_person in new_persons:
        merged = False
        for existing_person in self.suggested_persons:
            if existing_person.share_identifier(new_person):
                existing_person.merge_with(new_person)
                merged = True
                break
        if not merged:
            self.suggested_persons.append(new_person)

person_selection()

Display input fields for person data with auto-suggestion

Source code in snapquery/person_selector.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
@ui.refreshable
def person_selection(self):
    """
    Display input fields for person data with auto-suggestion
    """
    person = self.selected_person if self.selected_person else Person()
    with ui.element("row").classes("w-full h-full"):
        with ui.splitter().classes("h-full  w-full") as splitter:
            with splitter.before:
                with ui.row() as self.top_row:
                    pass
                with ui.card() as self.selection_card:
                    with ui.row():
                        self.label = ui.label("Name or Pid:")
                    with ui.row():
                        self.name_input = ui.input(
                            label="name",
                            placeholder="Tim Berners-Lee",
                            on_change=self.suggest_persons,
                            value=self.search_name,
                        ).props("size=60")
                    with ui.row():
                        self.identifier_input = ui.input(
                            label="PID",
                            placeholder="Q80",
                            on_change=self.check_pid,
                            value=person.wikidata_id,
                        ).props("size=20")
                    # if self.selection_btn is None:
                    self.selection_btn = ui.button(text="Continue", on_click=self.btn_selection_callback)
                    self.selection_btn.disable()
        with splitter.after:
            with ui.element("column").classes(" w-full h-full gap-2"):
                self.suggestion_view = ui.column().classes("rounded-md border-2 p-3")

select_person_suggestion(person)

Select the given Person by updating the input fields to the selected person and storing the object internally Args: person: person that should be selected

Source code in snapquery/person_selector.py
247
248
249
250
251
252
253
254
255
256
def select_person_suggestion(self, person: Person):
    """
    Select the given Person by updating the input fields to the selected person and storing the object internally
    Args:
        person: person that should be selected
    """
    self.selected_person = person
    self.person_selection.refresh()
    self.suggested_persons = [person]
    self.update_suggestions_list()

suggest_persons() async

Use debouncer to suggest potential persons based on the input.

Source code in snapquery/person_selector.py
178
179
180
181
182
183
async def suggest_persons(self):
    """
    Use debouncer to
    suggest potential persons based on the input.
    """
    await self.debouncer_ui.debounce(self.load_person_suggestions, self.name_input.value)

update_suggestions_view()

update the suggestions view

Source code in snapquery/person_selector.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
def update_suggestions_view(self):
    """
    update the suggestions view
    """
    if self.suggestion_view:
        self.suggestion_view.clear()
        with self.suggestion_view:
            with ui.list().props("bordered separator"):
                ui.item_label("Suggestions").props("header").classes("text-bold")
                ui.separator()
                for person in self.suggested_persons[: self.limit]:
                    PersonSuggestion(person=person, on_select=self.selection_callback)

                if len(self.suggested_persons) > self.limit:
                    with ui.item():
                        ui.label(
                            f"{'>' if len(self.suggested_persons) >= 10000 else ''}{len(self.suggested_persons)} matches are available..."
                        )
        return []

PersonSuggestion

Bases: PersonView

Display a Person

Source code in snapquery/person_selector.py
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
class PersonSuggestion(PersonView):
    """
    Display a Person
    """

    def __init__(self, person: Person, on_select: Callable[[Person], Any]):
        super().__init__(person=person)
        self._on_select_callback = on_select
        self.person_card.on_click(self.on_select)

    def on_select(self):
        """
        Handle selection of the suggestion card
        """
        return self._on_select_callback(self.person)

on_select()

Handle selection of the suggestion card

Source code in snapquery/person_selector.py
85
86
87
88
89
def on_select(self):
    """
    Handle selection of the suggestion card
    """
    return self._on_select_callback(self.person)

PersonView

Bases: Element

Display a person

Source code in snapquery/person_selector.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
class PersonView(Element):
    """
    Display a person
    """

    def __init__(self, person: Person):
        self.pids = PIDs()
        self.pid_values = self._create_pid_values(person)
        super().__init__(tag="div")
        self.person = person
        with self:
            with ui.item() as self.person_card:
                with ui.item_section().props("avatar"):
                    with ui.avatar():
                        if person.image:
                            ui.image(source=person.image)
                with ui.item_section():
                    with ui.row():
                        self.person_label = ui.label(self.person.label)
                    with ui.row():
                        self.person_name = ui.label(f"{self.person.given_name} {self.person.family_name}")
                    with ui.row():
                        self._show_identifier()

    def _create_pid_values(self, person: Person) -> List[PIDValue]:
        """
        Create PIDValue instances for the person's identifiers
        """
        pid_values = []
        for pid_key, pid in self.pids.pids.items():
            attr = f"{pid_key}_id"
            pid_value = getattr(person, attr, None)
            if pid_value:
                pid_values.append(PIDValue(pid=pid, value=pid_value))
        return pid_values

    def _show_identifier(self):
        """
        Display all identifiers of the person
        """
        for pid_value in self.pid_values:
            with ui.element("div"):
                ui.avatar(
                    icon=f"img:{pid_value.pid.logo}",
                    color=None,
                    size="sm",
                    square=True,
                )
                ui.link(
                    text=pid_value.value,
                    target=pid_value.url,
                    new_tab=True,
                )

pid

Created on 2024-05-26 @author: wf

PID dataclass

A persistent identifier source e.g. ORCID, dblpID or wikidata id

Source code in snapquery/pid.py
11
12
13
14
15
16
17
18
19
20
@dataclass
class PID:
    """
    A persistent identifier source e.g. ORCID, dblpID or wikidata id
    """

    name: str
    logo: str
    formatter_url: str
    regex: str

PIDValue dataclass

Represents a specific instance of a persistent identifier with its value.

Source code in snapquery/pid.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
@dataclass
class PIDValue:
    """
    Represents a specific instance of a persistent identifier with its value.
    """

    pid: PID
    value: str

    @property
    def url(self) -> str:
        return self.pid.formatter_url.format(self.value)

    @property
    def html(self) -> str:
        return f'<a href="{self.url}"><img src="{self.pid.logo}" alt="{self.pid.name} logo"> {self.value}</a>'

    def is_valid(self) -> bool:
        return re.match(self.pid.regex, self.value) is not None

PIDs

Available PIDs

Source code in snapquery/pid.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class PIDs:
    """
    Available PIDs
    """

    def __init__(self):
        self.pids = {
            "orcid": PID(
                name="ORCID",
                logo="https://orcid.org/sites/default/files/images/orcid_16x16.png",
                formatter_url="https://orcid.org/{}",
                regex=r"^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$",
            ),
            "dblp": PID(
                name="dblp",
                logo="https://dblp.org/img/dblp-icon-64x64.png",
                formatter_url="https://dblp.org/pid/{}",
                regex=r"^[a-z0-9/]+$",
            ),
            "wikidata": PID(
                name="Wikidata",
                logo="https://www.wikidata.org/static/favicon/wikidata.ico",
                formatter_url="https://www.wikidata.org/wiki/{}",
                regex=r"^Q[0-9]+$",
            ),
        }

    def pid4id(self, identifier: str) -> Optional[PIDValue]:
        """
        Create a PIDValue instance based on the identifier type.
        """
        for _key, pid in self.pids.items():
            if re.match(pid.regex, identifier):
                return PIDValue(pid=pid, value=identifier)
        return None

pid4id(identifier)

Create a PIDValue instance based on the identifier type.

Source code in snapquery/pid.py
71
72
73
74
75
76
77
78
def pid4id(self, identifier: str) -> Optional[PIDValue]:
    """
    Create a PIDValue instance based on the identifier type.
    """
    for _key, pid in self.pids.items():
        if re.match(pid.regex, identifier):
            return PIDValue(pid=pid, value=identifier)
    return None

pid_lookup

Created on 2024-05-26 @author: wf

PersonLookup

Lookup potential persons from various databases such as Wikidata, ORCID, and DBLP.

Source code in snapquery/pid_lookup.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
class PersonLookup:
    """
    Lookup potential persons from various
    databases such as Wikidata, ORCID, and DBLP.
    """

    def __init__(self, nqm: NamedQueryManager):
        """
        Initialize the PersonLookup with a Named Query Manager.

        Args:
            nqm (NamedQueryManager): The named query manager to execute SPARQL queries.
        """
        self.pids = PIDs()
        self.nqm = nqm
        self.wikidata_search = WikidataSearch()
        self.dblp_person_lookup = DblpPersonLookup(self.nqm)

    def suggest_from_wikidata(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using WikidataSearch.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from Wikidata.
        """
        persons = []
        suggestions = self.wikidata_search.searchOptions(search_name, limit=limit)
        qid_list = ""
        delim = ""
        for qid, _plabel, _pdesc in suggestions:
            qid_list += f"{delim}wd:{qid}"
            delim = " "
        named_query = NamedQuery(
            domain="wikidata.org",
            namespace="pid-lookup",
            name="person-by-qid",
            title="Lookup persons with the given qids",
            description="based on a pre-search with wikidata search select persons",
            sparql="""# snapquery person lookup 
SELECT *
WHERE 
{
  VALUES ?scholar {
    {{ qid_list }}
  } 
  ?scholar wdt:P31 wd:Q5 .
  ?scholar wdt:P735 ?given_name_qid .
  ?given_name_qid rdfs:label ?given_name .
  ?scholar wdt:P734 ?family_name_qid .
  ?family_name_qid rdfs:label ?family_name .
  OPTIONAL{{ ?scholar rdfs:label ?label FILTER(lang(?label) = "en") }}.
  OPTIONAL{{?scholar wdt:P2456 ?dblp_author_id .}}
  OPTIONAL{{?scholar wdt:P496 ?orcid_id . }}
  OPTIONAL{{?scholar wdt:P18 ?image . }}
  FILTER(lang(?given_name) = "en")
  FILTER(lang(?family_name) = "en")
}
            """,
        )
        params_dict = {"qid_list": qid_list}
        person_lod, stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            limit=limit,
            with_stats=False,
        )
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                given_name=pr.get("given_name"),
                family_name=pr.get("family_name"),
                wikidata_id=pr.get("scholar").split("/")[-1],
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
                image=pr.get("image"),
            )
            persons.append(person)

        return persons

    def suggest_from_orcid(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using the ORCID registry search.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from ORCID.
        """
        orcid = OrcidAuth()
        persons = []
        if orcid.available():
            persons = orcid.search(OrcidSearchParams(family_name=search_name), limit=limit)
        return persons

    def suggest_from_dblp(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using DBLP author search.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from DBLP.
        """
        persons = self.dblp_person_lookup.search(name_part=search_name, limit=limit)
        return persons

__init__(nqm)

Initialize the PersonLookup with a Named Query Manager.

Parameters:

Name Type Description Default
nqm NamedQueryManager

The named query manager to execute SPARQL queries.

required
Source code in snapquery/pid_lookup.py
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, nqm: NamedQueryManager):
    """
    Initialize the PersonLookup with a Named Query Manager.

    Args:
        nqm (NamedQueryManager): The named query manager to execute SPARQL queries.
    """
    self.pids = PIDs()
    self.nqm = nqm
    self.wikidata_search = WikidataSearch()
    self.dblp_person_lookup = DblpPersonLookup(self.nqm)

suggest_from_dblp(search_name, limit=10)

Suggest persons using DBLP author search.

Parameters:

Name Type Description Default
search_name str

The name to search for suggestions.

required
limit int

The maximum number of results to return.

10

Returns:

Type Description
List[Person]

List[Person]: A list of suggested persons from DBLP.

Source code in snapquery/pid_lookup.py
118
119
120
121
122
123
124
125
126
127
128
129
130
def suggest_from_dblp(self, search_name: str, limit: int = 10) -> List[Person]:
    """
    Suggest persons using DBLP author search.

    Args:
        search_name (str): The name to search for suggestions.
        limit (int): The maximum number of results to return.

    Returns:
        List[Person]: A list of suggested persons from DBLP.
    """
    persons = self.dblp_person_lookup.search(name_part=search_name, limit=limit)
    return persons

suggest_from_orcid(search_name, limit=10)

Suggest persons using the ORCID registry search.

Parameters:

Name Type Description Default
search_name str

The name to search for suggestions.

required
limit int

The maximum number of results to return.

10

Returns:

Type Description
List[Person]

List[Person]: A list of suggested persons from ORCID.

Source code in snapquery/pid_lookup.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def suggest_from_orcid(self, search_name: str, limit: int = 10) -> List[Person]:
    """
    Suggest persons using the ORCID registry search.

    Args:
        search_name (str): The name to search for suggestions.
        limit (int): The maximum number of results to return.

    Returns:
        List[Person]: A list of suggested persons from ORCID.
    """
    orcid = OrcidAuth()
    persons = []
    if orcid.available():
        persons = orcid.search(OrcidSearchParams(family_name=search_name), limit=limit)
    return persons

suggest_from_wikidata(search_name, limit=10)

Suggest persons using WikidataSearch.

Parameters:

Name Type Description Default
search_name str

The name to search for suggestions.

required
limit int

The maximum number of results to return.

10

Returns:

Type Description
List[Person]

List[Person]: A list of suggested persons from Wikidata.

Source code in snapquery/pid_lookup.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
    def suggest_from_wikidata(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using WikidataSearch.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from Wikidata.
        """
        persons = []
        suggestions = self.wikidata_search.searchOptions(search_name, limit=limit)
        qid_list = ""
        delim = ""
        for qid, _plabel, _pdesc in suggestions:
            qid_list += f"{delim}wd:{qid}"
            delim = " "
        named_query = NamedQuery(
            domain="wikidata.org",
            namespace="pid-lookup",
            name="person-by-qid",
            title="Lookup persons with the given qids",
            description="based on a pre-search with wikidata search select persons",
            sparql="""# snapquery person lookup 
SELECT *
WHERE 
{
  VALUES ?scholar {
    {{ qid_list }}
  } 
  ?scholar wdt:P31 wd:Q5 .
  ?scholar wdt:P735 ?given_name_qid .
  ?given_name_qid rdfs:label ?given_name .
  ?scholar wdt:P734 ?family_name_qid .
  ?family_name_qid rdfs:label ?family_name .
  OPTIONAL{{ ?scholar rdfs:label ?label FILTER(lang(?label) = "en") }}.
  OPTIONAL{{?scholar wdt:P2456 ?dblp_author_id .}}
  OPTIONAL{{?scholar wdt:P496 ?orcid_id . }}
  OPTIONAL{{?scholar wdt:P18 ?image . }}
  FILTER(lang(?given_name) = "en")
  FILTER(lang(?family_name) = "en")
}
            """,
        )
        params_dict = {"qid_list": qid_list}
        person_lod, stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            limit=limit,
            with_stats=False,
        )
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                given_name=pr.get("given_name"),
                family_name=pr.get("family_name"),
                wikidata_id=pr.get("scholar").split("/")[-1],
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
                image=pr.get("image"),
            )
            persons.append(person)

        return persons

qimport

Created on 2024-05-05

@author: wf

QueryImport

Import named queries from a given URL or file.

Source code in snapquery/qimport.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class QueryImport:
    """
    Import named queries from a given URL or file.
    """

    def __init__(self, nqm: NamedQueryManager = None):
        """
        Constructor

        Args:
            nqm (NamedQueryManager, optional): The NamedQueryManager to use for storing queries.
        """
        self.nqm = nqm
        pass

    def import_samples(self, with_store: bool = True, show_progress: bool = False):
        """
        import all sample json files

        Args:
            with_store(bool): if True store the result
            show_progress(bool): if True show a tqdm progress bar
        """
        for json_file in glob.glob(os.path.join(self.nqm.samples_path, "*.json")):
            try:
                nq_list = self.import_from_json_file(json_file, with_store, show_progress)
            except Exception as ex:
                print(f"could not load json_file {json_file}")
                raise ex
            if "ceur" in json_file:
                json_file_name = os.path.basename(json_file)
                output_path = os.path.join("/tmp", json_file_name)
                nq_list.save_to_json_file(output_path, indent=2)
                pass

    def import_from_json_file(
        self, json_file: str, with_store: bool = False, show_progress: bool = False
    ) -> NamedQuerySet:
        """
        Import named queries from a JSON file.

        Args:
            json_file (str): Path to the JSON file.
            with_store (bool): If True, store the results in the NamedQueryManager.
            show_progress (bool): If True, show a progress bar during the import.

        Returns:
            NamedQuerySet: A NamedQuerySet object containing the imported NamedQuery objects.
        """
        nq_set = NamedQuerySet.load_from_json_file(json_file)
        iterable = (
            tqdm(
                nq_set.queries,
                desc=f"Importing Namespace {nq_set.namespace}@{nq_set.domain}",
            )
            if show_progress
            else nq_set.queries
        )

        for nq in iterable:
            if not nq.sparql:
                if nq.url and nq.url.startswith("https://w.wiki/"):
                    short_url = ShortUrl(nq.url)
                    nq.sparql = short_url.read_query()
                else:
                    raise Exception(f"invalid named query with no url: {nq}")
                    # what now?
                    continue
            if with_store and self.nqm:
                self.nqm.add_and_store(nq)
        return nq_set

__init__(nqm=None)

Constructor

Parameters:

Name Type Description Default
nqm NamedQueryManager

The NamedQueryManager to use for storing queries.

None
Source code in snapquery/qimport.py
21
22
23
24
25
26
27
28
29
def __init__(self, nqm: NamedQueryManager = None):
    """
    Constructor

    Args:
        nqm (NamedQueryManager, optional): The NamedQueryManager to use for storing queries.
    """
    self.nqm = nqm
    pass

import_from_json_file(json_file, with_store=False, show_progress=False)

Import named queries from a JSON file.

Parameters:

Name Type Description Default
json_file str

Path to the JSON file.

required
with_store bool

If True, store the results in the NamedQueryManager.

False
show_progress bool

If True, show a progress bar during the import.

False

Returns:

Name Type Description
NamedQuerySet NamedQuerySet

A NamedQuerySet object containing the imported NamedQuery objects.

Source code in snapquery/qimport.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def import_from_json_file(
    self, json_file: str, with_store: bool = False, show_progress: bool = False
) -> NamedQuerySet:
    """
    Import named queries from a JSON file.

    Args:
        json_file (str): Path to the JSON file.
        with_store (bool): If True, store the results in the NamedQueryManager.
        show_progress (bool): If True, show a progress bar during the import.

    Returns:
        NamedQuerySet: A NamedQuerySet object containing the imported NamedQuery objects.
    """
    nq_set = NamedQuerySet.load_from_json_file(json_file)
    iterable = (
        tqdm(
            nq_set.queries,
            desc=f"Importing Namespace {nq_set.namespace}@{nq_set.domain}",
        )
        if show_progress
        else nq_set.queries
    )

    for nq in iterable:
        if not nq.sparql:
            if nq.url and nq.url.startswith("https://w.wiki/"):
                short_url = ShortUrl(nq.url)
                nq.sparql = short_url.read_query()
            else:
                raise Exception(f"invalid named query with no url: {nq}")
                # what now?
                continue
        if with_store and self.nqm:
            self.nqm.add_and_store(nq)
    return nq_set

import_samples(with_store=True, show_progress=False)

import all sample json files

Parameters:

Name Type Description Default
with_store(bool)

if True store the result

required
show_progress(bool)

if True show a tqdm progress bar

required
Source code in snapquery/qimport.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def import_samples(self, with_store: bool = True, show_progress: bool = False):
    """
    import all sample json files

    Args:
        with_store(bool): if True store the result
        show_progress(bool): if True show a tqdm progress bar
    """
    for json_file in glob.glob(os.path.join(self.nqm.samples_path, "*.json")):
        try:
            nq_list = self.import_from_json_file(json_file, with_store, show_progress)
        except Exception as ex:
            print(f"could not load json_file {json_file}")
            raise ex
        if "ceur" in json_file:
            json_file_name = os.path.basename(json_file)
            output_path = os.path.join("/tmp", json_file_name)
            nq_list.save_to_json_file(output_path, indent=2)
            pass

qimport_view

Created on 2024-05-05

@author: wf

QueryImportView

display Query Import UI

Source code in snapquery/qimport_view.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class QueryImportView:
    """
    display Query Import UI
    """

    def __init__(
        self,
        solution=None,
        person: Optional[Person] = None,
        allow_importing_from_url: bool = True,
    ):
        self.person = person
        self.solution = solution
        self.allow_importing_from_url = allow_importing_from_url
        self.namespace = ""
        self.name = ""
        self.url = ""
        self.title = ""
        self.description = ""
        self.comment = ""
        self.query = None
        if self.solution:
            self.qimport = QueryImport()
            self.nqm = self.solution.nqm
            self.setup_ui()

    def setup_ui(self):
        """
        setup the user interface
        """
        with self.solution.container:
            with ui.row() as self.input_row:
                self.input_row.classes("h-full")
                ui.input(label="namespace", placeholder="e.g. wikidata-examples").bind_value(self, "namespace")
                with ui.input(label="name", placeholder="e.g. all proceedings of CEUR-WS").bind_value(self, "name"):
                    ui.tooltip("short name for query; needs to be unique within the namespace")
                ui.input(label="url", placeholder="e.g. short url to the query").props("size=80").bind_value(
                    self, "url"
                )
                if self.allow_importing_from_url:
                    ui.button(icon="input", text="Import Query", on_click=self.on_input_button)
                ui.button(icon="publish", text="Publish Query", on_click=self.on_import_button)
                with ui.input(label="title").props("size=80").bind_value(self, "title"):
                    ui.tooltip("Descriptive title of the query")
            self.query_row = ui.row().classes("w-full h-full flex ")
            with self.query_row:
                ui.textarea(label="query").bind_value(self, "query").classes(
                    "w-full h-full resize min-h-80 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
            with ui.row() as self.details_row:
                self.details_row.classes("flex")
                ui.textarea(label="description").bind_value(self, "description").classes(
                    "w-1/2 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
                ui.textarea(label="comment").bind_value(self, "comment").classes(
                    "w-2/5 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
                self.named_query_link = ui.html()

    def on_import_button(self, _args):
        """
        import a query
        """
        if self.query is None:
            with self.query_row:
                ui.notify("input a query first")
            return
        if self.person:
            self.comment = f"[query nominated by {self.person}] {self.comment}"
        nq_record = {
            "namespace": self.namespace,
            "name": self.name,
            "title": self.title,
            "url": self.url,
            "description": self.description,
            "comment": self.comment,
            "sparql": self.query.query if isinstance(self.query, Query) else self.query,
        }
        nq = NamedQuery.from_record(nq_record)
        self.nqm.add_and_store(nq)
        with self.query_row:
            ui.notify(f"added named query {self.name}")
            self.named_query_link.content = nq.as_link()
        self.clear_inputs()

    def clear_inputs(self):
        self.query = None
        self.name = None
        self.url = None
        self.title = None
        self.description = None
        self.comment = None

    def on_input_button(self, _args):
        """
        imput a query
        """
        self.query_row.clear()
        with self.query_row:
            ui.notify(f"importing named query from {self.url}")
            sparql_query = self.qimport.read_from_short_url(self.url)
            self.query = Query(name=self.name, title=self.title, lang="sparql", query=sparql_query)
            query_syntax_highlight = QuerySyntaxHighlight(self.query)
            syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
            ui.add_css(syntax_highlight_css)
            ui.html(query_syntax_highlight.highlight())

on_import_button(_args)

import a query

Source code in snapquery/qimport_view.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def on_import_button(self, _args):
    """
    import a query
    """
    if self.query is None:
        with self.query_row:
            ui.notify("input a query first")
        return
    if self.person:
        self.comment = f"[query nominated by {self.person}] {self.comment}"
    nq_record = {
        "namespace": self.namespace,
        "name": self.name,
        "title": self.title,
        "url": self.url,
        "description": self.description,
        "comment": self.comment,
        "sparql": self.query.query if isinstance(self.query, Query) else self.query,
    }
    nq = NamedQuery.from_record(nq_record)
    self.nqm.add_and_store(nq)
    with self.query_row:
        ui.notify(f"added named query {self.name}")
        self.named_query_link.content = nq.as_link()
    self.clear_inputs()

on_input_button(_args)

imput a query

Source code in snapquery/qimport_view.py
110
111
112
113
114
115
116
117
118
119
120
121
122
def on_input_button(self, _args):
    """
    imput a query
    """
    self.query_row.clear()
    with self.query_row:
        ui.notify(f"importing named query from {self.url}")
        sparql_query = self.qimport.read_from_short_url(self.url)
        self.query = Query(name=self.name, title=self.title, lang="sparql", query=sparql_query)
        query_syntax_highlight = QuerySyntaxHighlight(self.query)
        syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
        ui.add_css(syntax_highlight_css)
        ui.html(query_syntax_highlight.highlight())

setup_ui()

setup the user interface

Source code in snapquery/qimport_view.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def setup_ui(self):
    """
    setup the user interface
    """
    with self.solution.container:
        with ui.row() as self.input_row:
            self.input_row.classes("h-full")
            ui.input(label="namespace", placeholder="e.g. wikidata-examples").bind_value(self, "namespace")
            with ui.input(label="name", placeholder="e.g. all proceedings of CEUR-WS").bind_value(self, "name"):
                ui.tooltip("short name for query; needs to be unique within the namespace")
            ui.input(label="url", placeholder="e.g. short url to the query").props("size=80").bind_value(
                self, "url"
            )
            if self.allow_importing_from_url:
                ui.button(icon="input", text="Import Query", on_click=self.on_input_button)
            ui.button(icon="publish", text="Publish Query", on_click=self.on_import_button)
            with ui.input(label="title").props("size=80").bind_value(self, "title"):
                ui.tooltip("Descriptive title of the query")
        self.query_row = ui.row().classes("w-full h-full flex ")
        with self.query_row:
            ui.textarea(label="query").bind_value(self, "query").classes(
                "w-full h-full resize min-h-80 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
        with ui.row() as self.details_row:
            self.details_row.classes("flex")
            ui.textarea(label="description").bind_value(self, "description").classes(
                "w-1/2 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
            ui.textarea(label="comment").bind_value(self, "comment").classes(
                "w-2/5 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
            self.named_query_link = ui.html()

qlever

Created on 2024-06-20

@author: wf

QLever

handle https://github.com/ad-freiburg/qlever specifics

Source code in snapquery/qlever.py
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class QLever:
    """
    handle https://github.com/ad-freiburg/qlever specifics
    """

    def __init__(self, with_progress=True):
        self.url = "https://github.com/ad-freiburg/qlever"
        self.with_progress = with_progress
        # Regex pattern to find URLs starting with the specified prefix
        self.wd_url_pattern = re.compile(r"https://qlever\.cs\.uni-freiburg\.de/wikidata/[A-Za-z0-9]+")
        self.osproject = OsProject.fromUrl(self.url)

    def wd_urls_for_ticket(self, ticket: Ticket) -> List[str]:
        """
        Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.
        """
        extracted_urls = []

        # Extract URLs from the ticket body
        if ticket.body:
            found_urls = self.wd_url_pattern.findall(ticket.body)
            extracted_urls.extend(found_urls)

        # Fetch and extract URLs from comments
        comments = self.osproject.ticketSystem.getComments(self.osproject, ticket.number)
        for comment in comments:
            found_urls = self.wd_url_pattern.findall(comment["body"])
            extracted_urls.extend(found_urls)

        return extracted_urls

    def named_queries_for_tickets(self, ticket_dict):
        """
        Create named queries for each ticket's extracted URLs.

        Args:
            ticket_dict (dict): Dictionary mapping tickets to a list of URLs.

        Returns:
            NamedQuerySet: A set of named queries generated from the URLs.
        """
        named_query_set = NamedQuerySet(
            domain="qlever.cs.uni-freiburg.de",
            namespace="issues.wikidata",
            target_graph_name="wikidata",
        )
        for ticket, urls in ticket_dict.items():
            for i, url in enumerate(urls, 1):
                # Assuming URLs are like 'https://qlever.cs.uni-freiburg.de/wikidata/iTzJwQ'
                # Customizing ShortUrl instance for QLever specific URLs
                short_url_handler = QLeverUrl(url)
                short_url_handler.read_query()
                if short_url_handler.sparql:
                    # Example placeholder logic to create a NamedQuery for each URL
                    query = NamedQuery(
                        domain=named_query_set.domain,
                        name=f"Issue{ticket.number}-query{i}",
                        namespace=named_query_set.namespace,
                        url=url,
                        sparql=short_url_handler.sparql,
                        title=f"QLever github issue #{ticket.number}-query{i}",
                        description=ticket.title,
                        comment=f"See ticket {ticket.url} and query {url}",
                    )
                    named_query_set.queries.append(query)
        return named_query_set

named_queries_for_tickets(ticket_dict)

Create named queries for each ticket's extracted URLs.

Parameters:

Name Type Description Default
ticket_dict dict

Dictionary mapping tickets to a list of URLs.

required

Returns:

Name Type Description
NamedQuerySet

A set of named queries generated from the URLs.

Source code in snapquery/qlever.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
def named_queries_for_tickets(self, ticket_dict):
    """
    Create named queries for each ticket's extracted URLs.

    Args:
        ticket_dict (dict): Dictionary mapping tickets to a list of URLs.

    Returns:
        NamedQuerySet: A set of named queries generated from the URLs.
    """
    named_query_set = NamedQuerySet(
        domain="qlever.cs.uni-freiburg.de",
        namespace="issues.wikidata",
        target_graph_name="wikidata",
    )
    for ticket, urls in ticket_dict.items():
        for i, url in enumerate(urls, 1):
            # Assuming URLs are like 'https://qlever.cs.uni-freiburg.de/wikidata/iTzJwQ'
            # Customizing ShortUrl instance for QLever specific URLs
            short_url_handler = QLeverUrl(url)
            short_url_handler.read_query()
            if short_url_handler.sparql:
                # Example placeholder logic to create a NamedQuery for each URL
                query = NamedQuery(
                    domain=named_query_set.domain,
                    name=f"Issue{ticket.number}-query{i}",
                    namespace=named_query_set.namespace,
                    url=url,
                    sparql=short_url_handler.sparql,
                    title=f"QLever github issue #{ticket.number}-query{i}",
                    description=ticket.title,
                    comment=f"See ticket {ticket.url} and query {url}",
                )
                named_query_set.queries.append(query)
    return named_query_set

wd_urls_for_ticket(ticket)

Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.

Source code in snapquery/qlever.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def wd_urls_for_ticket(self, ticket: Ticket) -> List[str]:
    """
    Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.
    """
    extracted_urls = []

    # Extract URLs from the ticket body
    if ticket.body:
        found_urls = self.wd_url_pattern.findall(ticket.body)
        extracted_urls.extend(found_urls)

    # Fetch and extract URLs from comments
    comments = self.osproject.ticketSystem.getComments(self.osproject, ticket.number)
    for comment in comments:
        found_urls = self.wd_url_pattern.findall(comment["body"])
        extracted_urls.extend(found_urls)

    return extracted_urls

QLeverUrl

Bases: ShortUrl

Handles operations related to QLever short URLs.

Source code in snapquery/qlever.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class QLeverUrl(ShortUrl):
    """
    Handles operations related to QLever short URLs.
    """

    def __init__(self, short_url: str):
        super().__init__(short_url, scheme="https", netloc="qlever.cs.uni-freiburg.de")

    def read_query(self) -> str:
        """
        Read a query from a QLever short URL.

        Returns:
            str: The SPARQL query extracted from the short URL.
        """
        self.fetch_final_url()
        if self.url:
            try:
                response = requests.get(self.url)
                response.raise_for_status()
                soup = BeautifulSoup(response.content, "html.parser")
                query_element = soup.find("textarea", {"id": "query"})
                if query_element and query_element.text:
                    self.sparql = query_element.text.strip()
            except Exception as ex:
                self.error = ex
        return self.sparql

read_query()

Read a query from a QLever short URL.

Returns:

Name Type Description
str str

The SPARQL query extracted from the short URL.

Source code in snapquery/qlever.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def read_query(self) -> str:
    """
    Read a query from a QLever short URL.

    Returns:
        str: The SPARQL query extracted from the short URL.
    """
    self.fetch_final_url()
    if self.url:
        try:
            response = requests.get(self.url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, "html.parser")
            query_element = soup.find("textarea", {"id": "query"})
            if query_element and query_element.text:
                self.sparql = query_element.text.strip()
        except Exception as ex:
            self.error = ex
    return self.sparql

query_annotate

Created on 2024-05-15

@author: tholzheim

NamespaceStat

contains namespace information

Source code in snapquery/query_annotate.py
101
102
103
104
105
106
107
108
@lod_storable
class NamespaceStat:
    """
    contains namespace information
    """

    name: str
    count: int = 0

SparqlQueryAnnotater

Annotate a query

Source code in snapquery/query_annotate.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
class SparqlQueryAnnotater:
    """
    Annotate a query
    """

    def __init__(self, query: Query):
        self.query = query
        query_syntax_highlight = QuerySyntaxHighlight(query)
        html = query_syntax_highlight.highlight()
        self.soup = BeautifulSoup(html, "html.parser")
        self.stats = QUERY_ITEM_STATS

    def get_used_properties(self):
        prefix_element = self.soup.find_all("span", {"class": "nn"})
        properties = []
        for element in prefix_element:
            item = element.next_sibling.next_sibling
            if hasattr(item, "attrs") and "nt" in item.attrs.get("class"):
                properties.append(f"{element.text}:{item.text}")
        return properties

    def annotate(self) -> str:
        prefix_element = self.soup.find_all("span", {"class": "nn"})
        for element in prefix_element:
            prefix = element
            colon = element.next_sibling
            item = element.next_sibling.next_sibling
            if hasattr(item, "attrs") and "nt" in item.attrs.get("class"):
                identifier = item.text
                if not identifier.startswith(("P", "Q")):
                    identifier = f"{prefix.text}:{identifier}"
                item_stat = self.stats.get_by_id(identifier)
                title = item_stat.label if item_stat else item.text
                annotation_element = self.soup.new_tag(
                    "a",
                    href="http://www.wikidata.org/entity/" + item.text,
                    title=title,
                    target="_blank",
                )
                prefix.insert_before(annotation_element)
                annotation_element.insert(0, prefix)
                annotation_element.insert(1, colon)
                annotation_element.insert(2, item)
        return str(self.soup)

query_selector

Created on 2024-07-04 @author: wf

QuerySelector

A class to select domain, namespace, and name for a query using comboboxes. Uses a single change handler to update selections dynamically.

Source code in snapquery/query_selector.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
class QuerySelector:
    """
    A class to select domain, namespace, and name for a query using comboboxes.
    Uses a single change handler to update selections dynamically.
    """

    def __init__(self, solution: WebSolution, on_change):
        self.solution = solution
        self.nqm = self.solution.nqm
        self.qns = QueryNameSet(self.nqm)  # Initialize QueryNameSet
        self.qn = QueryName(domain="", namespace="", name="")  # Current selection state
        self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
        self.on_change = on_change
        self.setup_ui()

    def setup_ui(self):
        """
        Setup the user interface for query selection using comboboxes.
        """
        with ui.row() as self.select_row:
            self.domain_select = self.create_combobox("Domain", self.qns.domains, 25)
            self.namespace_select = self.create_combobox("Namespace", self.qns.namespaces, 40)
            self.name_select = self.create_combobox("Name", self.qns.names, 80)

    def create_combobox(self, label: str, options: List[str], width_chars: int) -> ComboBox:
        """Create a ComboBox with the given label, options, and width."""
        return ComboBox(
            label=label,
            options=options,
            width_chars=width_chars,
            clearable=True,
            on_change=self.handle_change,
        )

    async def handle_change(self):
        """
        Update self.qn and call the provided on_change callback
        """
        self.qn.domain = self.domain_select.select.value or ""
        self.qn.namespace = self.namespace_select.select.value or ""
        self.qn.name = self.name_select.select.value or ""

        self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
        self.update_ui()

        if self.on_change:
            await self.on_change()

    def update_options(self, select_widget, options):
        select_widget.update_options(options)

    def update_ui(self):
        """
        Update UI components based on filtered results using the custom update_options method for safe sorting.
        """
        self.update_options(self.domain_select, self.qns.domains)
        self.update_options(self.namespace_select, self.qns.namespaces)
        self.update_options(self.name_select, self.qns.names)

create_combobox(label, options, width_chars)

Create a ComboBox with the given label, options, and width.

Source code in snapquery/query_selector.py
38
39
40
41
42
43
44
45
46
def create_combobox(self, label: str, options: List[str], width_chars: int) -> ComboBox:
    """Create a ComboBox with the given label, options, and width."""
    return ComboBox(
        label=label,
        options=options,
        width_chars=width_chars,
        clearable=True,
        on_change=self.handle_change,
    )

handle_change() async

Update self.qn and call the provided on_change callback

Source code in snapquery/query_selector.py
48
49
50
51
52
53
54
55
56
57
58
59
60
async def handle_change(self):
    """
    Update self.qn and call the provided on_change callback
    """
    self.qn.domain = self.domain_select.select.value or ""
    self.qn.namespace = self.namespace_select.select.value or ""
    self.qn.name = self.name_select.select.value or ""

    self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
    self.update_ui()

    if self.on_change:
        await self.on_change()

setup_ui()

Setup the user interface for query selection using comboboxes.

Source code in snapquery/query_selector.py
29
30
31
32
33
34
35
36
def setup_ui(self):
    """
    Setup the user interface for query selection using comboboxes.
    """
    with ui.row() as self.select_row:
        self.domain_select = self.create_combobox("Domain", self.qns.domains, 25)
        self.namespace_select = self.create_combobox("Namespace", self.qns.namespaces, 40)
        self.name_select = self.create_combobox("Name", self.qns.names, 80)

update_ui()

Update UI components based on filtered results using the custom update_options method for safe sorting.

Source code in snapquery/query_selector.py
65
66
67
68
69
70
71
def update_ui(self):
    """
    Update UI components based on filtered results using the custom update_options method for safe sorting.
    """
    self.update_options(self.domain_select, self.qns.domains)
    self.update_options(self.namespace_select, self.qns.namespaces)
    self.update_options(self.name_select, self.qns.names)

scholia

Created on 2024-05-04

@author: wf

ScholiaQueries

A class to handle the extraction and management of Scholia queries.

Source code in snapquery/scholia.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class ScholiaQueries:
    """
    A class to handle the extraction and management of Scholia queries.
    """

    repository_url = "https://api.github.com/repos/WDscholia/scholia/contents/scholia/app/templates"

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """
        Constructor

        Args:
            nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
            debug (bool): Enable debug output. Defaults to False.
        """
        self.nqm = nqm
        self.named_query_set = NamedQuerySet(
            domain="scholia.toolforge.org",
            namespace="named_queries",
            target_graph_name="wikidata",
        )
        self.debug = debug

    def get_scholia_file_list(self):
        """
        Retrieve the list of SPARQL files from the Scholia repository.

        Returns:
            list: List of dictionaries representing file information.
        """
        headers = {"Accept": "application/vnd.github.v3+json"}
        response = requests.get(self.repository_url, headers=headers)
        response.raise_for_status()  # Ensure we notice bad responses
        return response.json()

    def extract_query(self, file_info) -> NamedQuery:
        """
        Extract a single query from file information.

        Args:
            file_info (dict): Dictionary containing information about the file.

        Returns:
            NamedQuery: The extracted NamedQuery object.
        """
        file_name = file_info["name"]
        if file_name.endswith(".sparql") and file_name[:-7]:
            file_response = requests.get(file_info["download_url"])
            file_response.raise_for_status()
            query_str = file_response.text
            name = file_name[:-7]
            return NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=file_info["download_url"],
                title=name,
                description=name,
                comment="",
                sparql=query_str,
            )

    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the Scholia repository.

        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        file_list_json = self.get_scholia_file_list()
        for i, file_info in enumerate(file_list_json, start=1):
            named_query = self.extract_query(file_info)
            if named_query:
                self.named_query_set.queries.append(named_query)
                if self.debug:
                    if i % 80 == 0:
                        print(f"{i}")
                    print(".", end="", flush=True)
                if limit and len(self.named_query_set.queries) >= limit:
                    break

        if self.debug:
            print(f"found {len(self.named_query_set.queries)} scholia queries")

    def save_to_json(self, file_path: str = "/tmp/scholia-queries.json"):
        """
        Save the NamedQueryList to a JSON file.

        Args:
            file_path (str): Path to the JSON file.
        """
        self.named_query_set.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_set)

__init__(nqm, debug=False)

Constructor

Parameters:

Name Type Description Default
nqm NamedQueryManager

The NamedQueryManager to use for storing queries.

required
debug bool

Enable debug output. Defaults to False.

False
Source code in snapquery/scholia.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """
    Constructor

    Args:
        nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
        debug (bool): Enable debug output. Defaults to False.
    """
    self.nqm = nqm
    self.named_query_set = NamedQuerySet(
        domain="scholia.toolforge.org",
        namespace="named_queries",
        target_graph_name="wikidata",
    )
    self.debug = debug

extract_queries(limit=None)

Extract all queries from the Scholia repository.

Parameters:

Name Type Description Default
limit int

Limit the number of queries fetched. Defaults to None.

None
Source code in snapquery/scholia.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def extract_queries(self, limit: int = None):
    """
    Extract all queries from the Scholia repository.

    Args:
        limit (int, optional): Limit the number of queries fetched. Defaults to None.
    """
    file_list_json = self.get_scholia_file_list()
    for i, file_info in enumerate(file_list_json, start=1):
        named_query = self.extract_query(file_info)
        if named_query:
            self.named_query_set.queries.append(named_query)
            if self.debug:
                if i % 80 == 0:
                    print(f"{i}")
                print(".", end="", flush=True)
            if limit and len(self.named_query_set.queries) >= limit:
                break

    if self.debug:
        print(f"found {len(self.named_query_set.queries)} scholia queries")

extract_query(file_info)

Extract a single query from file information.

Parameters:

Name Type Description Default
file_info dict

Dictionary containing information about the file.

required

Returns:

Name Type Description
NamedQuery NamedQuery

The extracted NamedQuery object.

Source code in snapquery/scholia.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def extract_query(self, file_info) -> NamedQuery:
    """
    Extract a single query from file information.

    Args:
        file_info (dict): Dictionary containing information about the file.

    Returns:
        NamedQuery: The extracted NamedQuery object.
    """
    file_name = file_info["name"]
    if file_name.endswith(".sparql") and file_name[:-7]:
        file_response = requests.get(file_info["download_url"])
        file_response.raise_for_status()
        query_str = file_response.text
        name = file_name[:-7]
        return NamedQuery(
            domain=self.named_query_set.domain,
            namespace=self.named_query_set.namespace,
            name=name,
            url=file_info["download_url"],
            title=name,
            description=name,
            comment="",
            sparql=query_str,
        )

get_scholia_file_list()

Retrieve the list of SPARQL files from the Scholia repository.

Returns:

Name Type Description
list

List of dictionaries representing file information.

Source code in snapquery/scholia.py
35
36
37
38
39
40
41
42
43
44
45
def get_scholia_file_list(self):
    """
    Retrieve the list of SPARQL files from the Scholia repository.

    Returns:
        list: List of dictionaries representing file information.
    """
    headers = {"Accept": "application/vnd.github.v3+json"}
    response = requests.get(self.repository_url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.json()

save_to_json(file_path='/tmp/scholia-queries.json')

Save the NamedQueryList to a JSON file.

Parameters:

Name Type Description Default
file_path str

Path to the JSON file.

'/tmp/scholia-queries.json'
Source code in snapquery/scholia.py
 96
 97
 98
 99
100
101
102
103
def save_to_json(self, file_path: str = "/tmp/scholia-queries.json"):
    """
    Save the NamedQueryList to a JSON file.

    Args:
        file_path (str): Path to the JSON file.
    """
    self.named_query_set.save_to_json_file(file_path, indent=2)

store_queries()

Store the named queries into the database.

Source code in snapquery/scholia.py
105
106
107
108
109
def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_set)

snapquery_cmd

Created on 2024-05-03

@author: wf

SnapQueryCmd

Bases: WebserverCmd

Command line for diagrams server

Source code in snapquery/snapquery_cmd.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
class SnapQueryCmd(WebserverCmd):
    """
    Command line for diagrams server
    """

    def getArgParser(self, description: str, version_msg) -> ArgumentParser:
        """
        override the default argparser call
        """
        parser = super().getArgParser(description, version_msg)
        # see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/querymain.py
        parser.add_argument(
            "-ep",
            "--endpointPath",
            default=None,
            help="path to yaml file to configure endpoints to use for queries",
        )
        parser.add_argument(
            "-en",
            "--endpointName",
            default="wikidata",
            choices=list(NamedQueryManager.from_samples().endpoints.keys()),
            help="Name of the endpoint to use for queries - use --listEndpoints to list available endpoints",
        )
        parser.add_argument(
            "-idb",
            "--initDatabase",
            action="store_true",
            help="initialize the database",
        )
        parser.add_argument(
            "-le",
            "--listEndpoints",
            action="store_true",
            help="show the list of available endpoints",
        )
        parser.add_argument(
            "-lm",
            "--listMetaqueries",
            action="store_true",
            help="show the list of available metaqueries",
        )
        parser.add_argument(
            "-ln",
            "--listNamespaces",
            action="store_true",
            help="show the list of available namespaces",
        )
        parser.add_argument(
            "-lg",
            "--listGraphs",
            action="store_true",
            help="show the list of available graphs",
        )
        parser.add_argument(
            "-tq",
            "--testQueries",
            action="store_true",
            help="test run the queries",
        )
        parser.add_argument("--limit", type=int, default=None, help="set limit parameter of query")
        parser.add_argument(
            "--params",
            action=StoreDictKeyPair,
            help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
        )
        parser.add_argument(
            "--domain",
            type=str,
            default="wikidata.org",
            help="domain to filter queries",
        )
        parser.add_argument(
            "--namespace",
            type=str,
            default="examples",
            help="namespace to filter queries",
        )
        parser.add_argument("-qn", "--queryName", help="run a named query")
        parser.add_argument(
            "query_id",
            nargs="?",  # Make it optional
            help="Query ID in the format 'name[--namespace[@domain]]'",
        )
        parser.add_argument("-f", "--format", type=Format, choices=list(Format))
        parser.add_argument(
            "--import",
            dest="import_file",
            help="Import named queries from a JSON file.",
        )
        parser.add_argument(
            "--context",
            type=str,
            default="test",
            help="context name to store the execution statistics with",
        )
        parser.add_argument(
            "--prefix_merger",
            type=str,
            default=QueryPrefixMerger.default_merger().name,
            choices=[merger.name for merger in QueryPrefixMerger],
            help="query prefix merger to use",
        )
        return parser

    def cmd_parse(self, argv: Optional[list] = None):
        """
        parse the argument lists and prepare

        Args:
            argv(list): list of command line arguments

        """
        super().cmd_parse(argv)
        if self.args.debug:
            level = logging.DEBUG
        else:
            level = logging.INFO
        logging.basicConfig(level=level)
        if hasattr(self.args, "func"):
            self.args.func(self.args)
        return self.args

    def handle_args(self) -> bool:
        """
        handle the command line args
        """
        # Call the superclass handle_args to maintain base class behavior
        handled = super().handle_args()
        self.debug = self.args.debug
        nqm = NamedQueryManager.from_samples()
        self.nqm = nqm
        # Check args functions
        nqm = NamedQueryManager.from_samples(force_init=self.args.initDatabase)
        if self.args.listEndpoints:
            # List endpoints
            for endpoint in self.nqm.endpoints.values():
                print(endpoint)
            handled = True  # Operation handled
        elif self.args.listGraphs:
            print(self.nqm.gm.to_json(indent=2))
            handled = True
        elif self.args.listMetaqueries:
            meta_qm = self.nqm.meta_qm
            for name, query in meta_qm.queriesByName.items():
                print(f"{name}:{query.title}")
            handled = True
        elif self.args.listNamespaces:
            namespaces = self.nqm.get_namespaces()
            for namespace, count in namespaces.items():
                print(f"{namespace}:{count}")
            handled = True
        elif self.args.testQueries:
            if self.args.endpointName:
                endpoint_names = [self.args.endpointName]
            else:
                endpoint_names = list(nqm.endpoints.keys())
            queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
            execution = Execution(self.nqm, debug=self.args.debug)
            for i, nq in enumerate(queries, start=1):
                for endpoint_name in endpoint_names:
                    execution.execute(
                        nq,
                        endpoint_name=endpoint_name,
                        context=self.args.context,
                        title=f"query {i:3}/{len(queries)}::{endpoint_name}",
                        prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
                    )
        elif self.args.queryName is not None or self.args.query_id is not None:
            if self.args.query_id is not None:
                query_name = QueryName.from_query_id(self.args.query_id)
            else:
                query_name = QueryName(
                    name=self.args.queryName,
                    namespace=self.args.namespace,
                    domain=self.args.domain,
                )
            endpoint_name = self.args.endpointName
            r_format = self.args.format
            limit = self.args.limit
            qb = nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            query = qb.query
            params = Params(query.query)
            if params.has_params:
                if not self.args.params:
                    raise Exception(f"{query.name} needs parameters")
                else:
                    params.set(self.args.params)
                    query.query = params.apply_parameters()
            if r_format == Format.raw:
                formatted_result = qb.raw_query()
            else:
                qlod = qb.get_lod()
                formatted_result = qb.format_result(qlod=qlod, r_format=r_format)
            print(formatted_result)
        elif self.args.import_file:
            self.handle_import(self.args.import_file)
            handled = True
        return handled

    def handle_import(self, json_file: str):
        """
        Handle the import of named queries from a JSON file.

        Args:
            json_file (str): Path to the JSON file to import.
        """
        nqm = NamedQueryManager.from_samples()
        qimport = QueryImport(nqm=nqm)
        nq_list = qimport.import_from_json_file(json_file, with_store=True, show_progress=True)
        print(f"Imported {len(nq_list.queries)} named queries from {json_file}.")

cmd_parse(argv=None)

parse the argument lists and prepare

Parameters:

Name Type Description Default
argv(list)

list of command line arguments

required
Source code in snapquery/snapquery_cmd.py
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def cmd_parse(self, argv: Optional[list] = None):
    """
    parse the argument lists and prepare

    Args:
        argv(list): list of command line arguments

    """
    super().cmd_parse(argv)
    if self.args.debug:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level)
    if hasattr(self.args, "func"):
        self.args.func(self.args)
    return self.args

getArgParser(description, version_msg)

override the default argparser call

Source code in snapquery/snapquery_cmd.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def getArgParser(self, description: str, version_msg) -> ArgumentParser:
    """
    override the default argparser call
    """
    parser = super().getArgParser(description, version_msg)
    # see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/querymain.py
    parser.add_argument(
        "-ep",
        "--endpointPath",
        default=None,
        help="path to yaml file to configure endpoints to use for queries",
    )
    parser.add_argument(
        "-en",
        "--endpointName",
        default="wikidata",
        choices=list(NamedQueryManager.from_samples().endpoints.keys()),
        help="Name of the endpoint to use for queries - use --listEndpoints to list available endpoints",
    )
    parser.add_argument(
        "-idb",
        "--initDatabase",
        action="store_true",
        help="initialize the database",
    )
    parser.add_argument(
        "-le",
        "--listEndpoints",
        action="store_true",
        help="show the list of available endpoints",
    )
    parser.add_argument(
        "-lm",
        "--listMetaqueries",
        action="store_true",
        help="show the list of available metaqueries",
    )
    parser.add_argument(
        "-ln",
        "--listNamespaces",
        action="store_true",
        help="show the list of available namespaces",
    )
    parser.add_argument(
        "-lg",
        "--listGraphs",
        action="store_true",
        help="show the list of available graphs",
    )
    parser.add_argument(
        "-tq",
        "--testQueries",
        action="store_true",
        help="test run the queries",
    )
    parser.add_argument("--limit", type=int, default=None, help="set limit parameter of query")
    parser.add_argument(
        "--params",
        action=StoreDictKeyPair,
        help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
    )
    parser.add_argument(
        "--domain",
        type=str,
        default="wikidata.org",
        help="domain to filter queries",
    )
    parser.add_argument(
        "--namespace",
        type=str,
        default="examples",
        help="namespace to filter queries",
    )
    parser.add_argument("-qn", "--queryName", help="run a named query")
    parser.add_argument(
        "query_id",
        nargs="?",  # Make it optional
        help="Query ID in the format 'name[--namespace[@domain]]'",
    )
    parser.add_argument("-f", "--format", type=Format, choices=list(Format))
    parser.add_argument(
        "--import",
        dest="import_file",
        help="Import named queries from a JSON file.",
    )
    parser.add_argument(
        "--context",
        type=str,
        default="test",
        help="context name to store the execution statistics with",
    )
    parser.add_argument(
        "--prefix_merger",
        type=str,
        default=QueryPrefixMerger.default_merger().name,
        choices=[merger.name for merger in QueryPrefixMerger],
        help="query prefix merger to use",
    )
    return parser

handle_args()

handle the command line args

Source code in snapquery/snapquery_cmd.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
def handle_args(self) -> bool:
    """
    handle the command line args
    """
    # Call the superclass handle_args to maintain base class behavior
    handled = super().handle_args()
    self.debug = self.args.debug
    nqm = NamedQueryManager.from_samples()
    self.nqm = nqm
    # Check args functions
    nqm = NamedQueryManager.from_samples(force_init=self.args.initDatabase)
    if self.args.listEndpoints:
        # List endpoints
        for endpoint in self.nqm.endpoints.values():
            print(endpoint)
        handled = True  # Operation handled
    elif self.args.listGraphs:
        print(self.nqm.gm.to_json(indent=2))
        handled = True
    elif self.args.listMetaqueries:
        meta_qm = self.nqm.meta_qm
        for name, query in meta_qm.queriesByName.items():
            print(f"{name}:{query.title}")
        handled = True
    elif self.args.listNamespaces:
        namespaces = self.nqm.get_namespaces()
        for namespace, count in namespaces.items():
            print(f"{namespace}:{count}")
        handled = True
    elif self.args.testQueries:
        if self.args.endpointName:
            endpoint_names = [self.args.endpointName]
        else:
            endpoint_names = list(nqm.endpoints.keys())
        queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
        execution = Execution(self.nqm, debug=self.args.debug)
        for i, nq in enumerate(queries, start=1):
            for endpoint_name in endpoint_names:
                execution.execute(
                    nq,
                    endpoint_name=endpoint_name,
                    context=self.args.context,
                    title=f"query {i:3}/{len(queries)}::{endpoint_name}",
                    prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
                )
    elif self.args.queryName is not None or self.args.query_id is not None:
        if self.args.query_id is not None:
            query_name = QueryName.from_query_id(self.args.query_id)
        else:
            query_name = QueryName(
                name=self.args.queryName,
                namespace=self.args.namespace,
                domain=self.args.domain,
            )
        endpoint_name = self.args.endpointName
        r_format = self.args.format
        limit = self.args.limit
        qb = nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        query = qb.query
        params = Params(query.query)
        if params.has_params:
            if not self.args.params:
                raise Exception(f"{query.name} needs parameters")
            else:
                params.set(self.args.params)
                query.query = params.apply_parameters()
        if r_format == Format.raw:
            formatted_result = qb.raw_query()
        else:
            qlod = qb.get_lod()
            formatted_result = qb.format_result(qlod=qlod, r_format=r_format)
        print(formatted_result)
    elif self.args.import_file:
        self.handle_import(self.args.import_file)
        handled = True
    return handled

handle_import(json_file)

Handle the import of named queries from a JSON file.

Parameters:

Name Type Description Default
json_file str

Path to the JSON file to import.

required
Source code in snapquery/snapquery_cmd.py
223
224
225
226
227
228
229
230
231
232
233
def handle_import(self, json_file: str):
    """
    Handle the import of named queries from a JSON file.

    Args:
        json_file (str): Path to the JSON file to import.
    """
    nqm = NamedQueryManager.from_samples()
    qimport = QueryImport(nqm=nqm)
    nq_list = qimport.import_from_json_file(json_file, with_store=True, show_progress=True)
    print(f"Imported {len(nq_list.queries)} named queries from {json_file}.")

main(argv=None)

main call

Source code in snapquery/snapquery_cmd.py
236
237
238
239
240
241
242
243
244
245
def main(argv: list = None):
    """
    main call
    """
    cmd = SnapQueryCmd(
        config=SnapQueryWebServer.get_config(),
        webserver_cls=SnapQueryWebServer,
    )
    exit_code = cmd.cmd_main(argv)
    return exit_code

snapquery_core

Created on 2024-05-03

@author: wf

NamedQuery dataclass

Bases: QueryName

A named query that encapsulates the details and SPARQL query for a specific purpose.

Attributes:

Name Type Description
title str

A brief one-line title that describes the query.

description str

A detailed multiline description of what the query does and the data it accesses.

sparql str

The SPARQL query string. This might be hidden in future to encapsulate query details.

query_id str

A unique identifier for the query, generated from namespace and name, used as a primary key.

Source code in snapquery/snapquery_core.py
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
@dataclass
class NamedQuery(QueryName):
    """
    A named query that encapsulates the details and SPARQL query for a specific purpose.

    Attributes:
        title (str): A brief one-line title that describes the query.
        description (str): A detailed multiline description of what the query does and the data it accesses.
        sparql (str): The SPARQL query string. This might be hidden in future to encapsulate query details.
        query_id (str): A unique identifier for the query, generated from namespace and name, used as a primary key.
    """

    # sparql query (to be hidden later)
    sparql: Optional[str] = None
    # the url of the source code of the query
    url: Optional[str] = None
    # one line title
    title: Optional[str] = None
    # multiline description
    description: Optional[str] = None
    comment: Optional[str] = None

    @classmethod
    def get_samples(cls) -> dict[str, "NamedQuery"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="cats",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats",
                    title="Cats on Wikidata",
                    description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.",
                    comment="modified cats query from wikidata-examples",
                    sparql="""# snapquery cats example
SELECT ?item ?itemLabel
WHERE {
  ?item wdt:P31 wd:Q146. # Must be a cat
  OPTIONAL { ?item rdfs:label ?itemLabel. }
  FILTER (LANG(?itemLabel) = "en")
}
""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="bands",
                    title="Rock bands",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Rock_bands_that_start_with_%22M%22",
                    description="""Rock bands that start with "M" """,
                    comment="",
                    sparql="""SELECT ?band ?bandLabel
WHERE {
  ?band wdt:P31 wd:Q5741069.
  ?band rdfs:label ?bandLabel.
  FILTER(LANG(?bandLabel)="en").
  FILTER(STRSTARTS(?bandLabel,"M")).
}""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="horses",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Horses_(showing_some_info_about_them)",
                    title="Horses on Wikidata",
                    description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.",
                    sparql="""# snapquery example horses
SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel
(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel
WHERE {
  ?horse wdt:P31/wdt:P279* wd:Q726 .     # Instance and subclasses of horse (Q726)
  OPTIONAL{?horse wdt:P25 ?mother .}     # Mother
  OPTIONAL{?horse wdt:P22 ?father .}     # Father
  OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date
  OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date
  OPTIONAL{?horse wdt:P21 ?gender .}     # Gender
  OPTIONAL { ?horse rdfs:label ?horseLabel . FILTER (lang(?horseLabel) = "en") }
  OPTIONAL { ?mother rdfs:label ?motherLabel . FILTER (lang(?motherLabel) = "en") }
  OPTIONAL { ?father rdfs:label ?fatherLabel . FILTER (lang(?fatherLabel) = "en") }
  OPTIONAL { ?gender rdfs:label ?genderLabel . FILTER (lang(?genderLabel) = "en") }
}
ORDER BY ?horse
""",
                ),
            ]
        }
        return samples

    def as_link(self) -> str:
        """
        get me as a link
        """
        url = f"/query/{self.domain}/{self.namespace}/{self.name}"
        text = self.name
        tooltip = "query details"
        link = Link.create(url, text, tooltip)
        return link

    @classmethod
    def from_record(cls, record: Dict) -> "NamedQuery":
        """
        Class method to instantiate NamedQuery
        from a dictionary record.
        """
        return cls(
            domain=record["domain"],
            namespace=record["namespace"],
            name=record["name"],
            title=record.get("title"),
            url=record.get("url"),
            description=record.get("description"),
            sparql=record.get("sparql"),
        )

    def as_record(self) -> Dict:
        record = {
            "query_id": self.query_id,
            "domain": self.domain,
            "namespace": self.namespace,
            "name": self.name,
            "url": self.url,
            "title": self.title,
            "description": self.description,
            "sparql": self.sparql,
        }
        return record

    def as_viewrecord(self) -> Dict:
        """
        Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.
        """
        url_link = Link.create(self.url, self.url)
        return {
            "domain": self.domain,
            "namespace": self.namespace,
            "name": self.as_link(),
            "title": self.title,
            "url": url_link,
        }

get me as a link

Source code in snapquery/snapquery_core.py
331
332
333
334
335
336
337
338
339
def as_link(self) -> str:
    """
    get me as a link
    """
    url = f"/query/{self.domain}/{self.namespace}/{self.name}"
    text = self.name
    tooltip = "query details"
    link = Link.create(url, text, tooltip)
    return link

as_viewrecord()

Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.

Source code in snapquery/snapquery_core.py
370
371
372
373
374
375
376
377
378
379
380
381
def as_viewrecord(self) -> Dict:
    """
    Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.
    """
    url_link = Link.create(self.url, self.url)
    return {
        "domain": self.domain,
        "namespace": self.namespace,
        "name": self.as_link(),
        "title": self.title,
        "url": url_link,
    }

from_record(record) classmethod

Class method to instantiate NamedQuery from a dictionary record.

Source code in snapquery/snapquery_core.py
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
@classmethod
def from_record(cls, record: Dict) -> "NamedQuery":
    """
    Class method to instantiate NamedQuery
    from a dictionary record.
    """
    return cls(
        domain=record["domain"],
        namespace=record["namespace"],
        name=record["name"],
        title=record.get("title"),
        url=record.get("url"),
        description=record.get("description"),
        sparql=record.get("sparql"),
    )

get_samples() classmethod

get samples

Source code in snapquery/snapquery_core.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
    @classmethod
    def get_samples(cls) -> dict[str, "NamedQuery"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="cats",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats",
                    title="Cats on Wikidata",
                    description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.",
                    comment="modified cats query from wikidata-examples",
                    sparql="""# snapquery cats example
SELECT ?item ?itemLabel
WHERE {
  ?item wdt:P31 wd:Q146. # Must be a cat
  OPTIONAL { ?item rdfs:label ?itemLabel. }
  FILTER (LANG(?itemLabel) = "en")
}
""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="bands",
                    title="Rock bands",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Rock_bands_that_start_with_%22M%22",
                    description="""Rock bands that start with "M" """,
                    comment="",
                    sparql="""SELECT ?band ?bandLabel
WHERE {
  ?band wdt:P31 wd:Q5741069.
  ?band rdfs:label ?bandLabel.
  FILTER(LANG(?bandLabel)="en").
  FILTER(STRSTARTS(?bandLabel,"M")).
}""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="horses",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Horses_(showing_some_info_about_them)",
                    title="Horses on Wikidata",
                    description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.",
                    sparql="""# snapquery example horses
SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel
(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel
WHERE {
  ?horse wdt:P31/wdt:P279* wd:Q726 .     # Instance and subclasses of horse (Q726)
  OPTIONAL{?horse wdt:P25 ?mother .}     # Mother
  OPTIONAL{?horse wdt:P22 ?father .}     # Father
  OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date
  OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date
  OPTIONAL{?horse wdt:P21 ?gender .}     # Gender
  OPTIONAL { ?horse rdfs:label ?horseLabel . FILTER (lang(?horseLabel) = "en") }
  OPTIONAL { ?mother rdfs:label ?motherLabel . FILTER (lang(?motherLabel) = "en") }
  OPTIONAL { ?father rdfs:label ?fatherLabel . FILTER (lang(?fatherLabel) = "en") }
  OPTIONAL { ?gender rdfs:label ?genderLabel . FILTER (lang(?genderLabel) = "en") }
}
ORDER BY ?horse
""",
                ),
            ]
        }
        return samples

NamedQueryManager

Manages the storage, retrieval, and execution of named SPARQL queries.

Source code in snapquery/snapquery_core.py
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
class NamedQueryManager:
    """
    Manages the storage, retrieval, and execution of named SPARQL queries.
    """

    def __init__(self, db_path: str = None, debug: bool = False):
        """
        Initializes the NamedQueryManager with a specific database path and a debug mode.

        Args:
            db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used.
            debug (bool): If True, enables debug mode which may provide additional logging and error reporting.

        Attributes:
            debug (bool): Stores the debug state.
            sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions.
            endpoints (dict): A dictionary of SPARQL endpoints configured for use.
        """
        if db_path is None:
            db_path = NamedQueryManager.get_cache_path()
        self.debug = debug
        self.sql_db = SQLDB(dbname=db_path, check_same_thread=False, debug=debug)
        # Get the path of the yaml_file relative to the current Python module
        self.samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        endpoints_path = os.path.join(self.samples_path, "endpoints.yaml")
        self.endpoints = EndpointManager.getEndpoints(endpointPath=endpoints_path, lang="sparql", with_default=False)
        yaml_path = os.path.join(self.samples_path, "meta_query.yaml")
        self.meta_qm = QueryManager(queriesPath=yaml_path, with_default=False, lang="sql")
        # Graph Manager
        gm_yaml_path = GraphManager.get_yaml_path()
        self.gm = GraphManager.load_from_yaml_file(gm_yaml_path)
        # SQL meta data handling
        # primary keys
        self.primary_keys = {
            QueryStats: "stats_id",
            NamedQuery: "query_id",
            QueryDetails: "query_id",
        }
        self.entity_infos = {}
        pass

    @classmethod
    def get_cache_path(cls) -> str:
        home = str(Path.home())
        cache_dir = f"{home}/.solutions/snapquery/storage"
        os.makedirs(cache_dir, exist_ok=True)
        cache_path = f"{cache_dir}/named_queries.db"
        return cache_path

    @classmethod
    def from_samples(
        cls,
        db_path: Optional[str] = None,
        force_init: bool = False,
        with_backup: bool = True,
        debug: bool = False,
    ) -> "NamedQueryManager":
        """
        Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

        Args:
            db_path (Optional[str]): Path to the database file. If None, the default path is used.
            force_init (bool): If True, the existing database file is dropped and recreated, and backed up if with_backup is True.
            with_backup (bool): If True and force_init is True, moves the database file to a backup location before reinitialization.
            debug (bool): If True, enables debug mode which may provide additional logging.

        Returns:
            NamedQueryManager: An instance of the manager initialized with the database at `db_path`.
        """
        if db_path is None:
            db_path = cls.get_cache_path()

        path_obj = Path(db_path)

        # Handle backup and force initialization
        if force_init and path_obj.exists():
            if with_backup:
                timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
                backup_path = path_obj.with_name(f"{path_obj.stem}-{timestamp}{path_obj.suffix}")
                path_obj.rename(backup_path)  # Move the existing file to backup

        nqm = NamedQueryManager(db_path=db_path, debug=debug)
        if force_init or not path_obj.exists() or path_obj.stat().st_size == 0:
            for source_class, pk in [
                (NamedQuery, "query_id"),
                (QueryStats, "stats_id"),
                (QueryDetails, "quer_id"),
            ]:
                # Fetch sample records from the specified class
                sample_records = cls.get_sample_records(source_class=source_class)

                # Define entity information dynamically based on the class and primary key
                entityInfo = EntityInfo(sample_records, name=source_class.__name__, primaryKey=pk)

                # Create and populate the table specific to each class
                nqm.sql_db.createTable(sample_records, source_class.__name__, withDrop=True)
                nqm.sql_db.store(sample_records, entityInfo, fixNone=True, replace=True)
            # store yaml defined entities to SQL database
            nqm.store_endpoints()
            nqm.store_graphs()
        return nqm

    def store_named_query_list(self, nq_set: NamedQuerySet):
        """
        store the given named query set

        Args:
            nq_list: NamedQueryList
        """
        lod = []
        for nq in nq_set.queries:
            lod.append(asdict(nq))
        self.store(lod=lod)

    def store_query_details_list(self, qd_list: List[QueryDetails]):
        """
        Stores a list of QueryDetails instances into the database. This function converts
        each QueryDetails instance into a dictionary and then stores the entire list of dictionaries.
        It utilizes the 'store' method to handle database operations based on the entity information
        derived from the QueryDetails class.

        Args:
            qd_list (List[QueryDetails]): List of QueryDetails instances to be stored.
        """
        qd_lod = []
        for qd in qd_list:
            qd_lod.append(asdict(qd))
        self.store(lod=qd_lod, source_class=QueryDetails)

    def store_stats(self, stats_list: List[QueryStats]):
        """
        store the given list of query statistics
        """
        stats_lod = []
        for stats in stats_list:
            stats_lod.append(asdict(stats))
        self.store(lod=stats_lod, source_class=QueryStats)

    def store_graphs(self, gm: GraphManager = None):
        """
        Stores all graphs managed by the given GraphManager into my
        SQL database
        """
        if gm is None:
            gm = self.gm

        lod = [asdict(graph) for graph in gm]  # Convert each Graph instance to a dictionary using asdict()

        self.store(lod=lod, source_class=Graph, with_create=True)

    def store_endpoints(self, endpoints: Optional[Dict[str, Endpoint]] = None):
        """
        Stores the given endpoints or self.endpoints into the SQL database.

        Args:
            endpoints (Optional[Dict[str, LODStorageEndpoint]]): A dictionary of endpoints to store.
                If None, uses self.endpoints.
        """
        # This is a compatiblity layer for pylodstorage Endpoints
        # as of 2024-06 pylodstorage Endpoint still uses @Jsonable which is
        # deprecated so we convert instances to our local endpoint modules Endpoint format
        # and use our store mechanism to create SQL records
        if endpoints is None:
            endpoints = self.endpoints

        endpoints_lod = []
        for endpoint_name, lod_endpoint in endpoints.items():
            # Create a dictionary with only the attributes that exist in lod_endpoint
            endpoint_dict = {
                "name": endpoint_name,
                "lang": getattr(lod_endpoint, "lang", None),
                "endpoint": getattr(lod_endpoint, "endpoint", None),
                "website": getattr(lod_endpoint, "website", None),
                "database": getattr(lod_endpoint, "database", None),
                "method": getattr(lod_endpoint, "method", None),
                "prefixes": getattr(lod_endpoint, "prefixes", None),
                "auth": getattr(lod_endpoint, "auth", None),
                "user": getattr(lod_endpoint, "user", None),
                "password": getattr(lod_endpoint, "password", None),
            }

            # Remove None values
            endpoint_dict = {k: v for k, v in endpoint_dict.items() if v is not None}

            # Create SnapQueryEndpoint instance with only the available attributes
            snap_endpoint = SnapQueryEndpoint(**endpoint_dict)
            endpoints_lod.append(asdict(snap_endpoint))

        # Store the list of dictionaries in the database
        self.store(lod=endpoints_lod, source_class=SnapQueryEndpoint, with_create=True)

    def execute_query(
        self,
        named_query: NamedQuery,
        params_dict: Dict,
        endpoint_name: str = "wikidata",
        limit: int = None,
        with_stats: bool = True,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ):
        """
        execute the given named_query

        Args:
            named_query(NamedQuery): the query to execute
            params_dict(Dict): the query parameters to apply (if any)
            endpoint_name(str): the endpoint where to the excute the query
            limit(int): the record limit for the results (if any)
            with_stats(bool): if True run the stats
            prefix_merger: prefix merger to use
        """
        # Assemble the query bundle using the named query, endpoint, and limit
        query_bundle = self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)
        params = Params(query_bundle.query.query)
        if params.has_params:
            params.set(params_dict)
            query = params.apply_parameters()
            query_bundle.query.query = query
        if with_stats:
            # Execute the query
            results, stats = query_bundle.get_lod_with_stats()
            self.store_stats([stats])
        else:
            results = query_bundle.get_lod()
            stats = None
        return results, stats

    def add_and_store(self, nq: NamedQuery):
        """
        Adds a new NamedQuery instance and stores it in the database.

        Args:
            nq (NamedQuery): The NamedQuery instance to add and store.

        """
        qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
        lod = []
        nq_record = asdict(nq)
        lod.append(nq_record)
        self.store(lod)
        qd_list = []
        qd_list.append(qd)
        self.store_query_details_list(qd_list)

    def get_entity_info(self, source_class: Type) -> EntityInfo:
        """
        Gets or creates EntityInfo for the given source class.
        """
        if source_class not in self.entity_infos:
            primary_key = self.primary_keys.get(source_class, None)
            sample_records = self.get_sample_records(source_class)
            self.entity_infos[source_class] = EntityInfo(
                sample_records,
                name=source_class.__name__,
                primaryKey=primary_key,
                debug=self.debug,
            )
        return self.entity_infos[source_class]

    def store(
        self,
        lod: List[Dict[str, Any]],
        source_class: Type = NamedQuery,
        with_create: bool = False,
    ) -> None:
        """
        Stores the given list of dictionaries in the database using entity information
        derived from a specified source class.

        Args:
            lod (List[Dict[str, Any]]): List of dictionaries that represent the records to be stored.
            source_class (Type): The class from which the entity information is derived. This class
                should have an attribute or method that defines its primary key and must have a `__name__` attribute.
                with_create(bool): if True create the table
        Raises:
            AttributeError: If the source class does not have the necessary method or attribute to define the primary key.
        """
        entity_info = self.get_entity_info(source_class)
        if with_create:
            self.sql_db.createTable4EntityInfo(entityInfo=entity_info, withDrop=True)
        # Store the list of dictionaries in the database using the defined entity information
        self.sql_db.store(lod, entity_info, fixNone=True, replace=True)

    @classmethod
    def get_sample_records(cls, source_class: Type) -> List[Dict[str, Any]]:
        """
        Generates a list of dictionary records based on the sample instances
        provided by a source class. This method utilizes the `get_samples` method
        of the source class, which should return a dictionary of sample instances.

        Args:
            source_class (Type): The class from which to fetch sample instances.
                This class must implement a `get_samples` method that returns
                a dictionary of instances categorized by some key.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries where each dictionary
                is a record that corresponds to a sample instance from the source class.

        Raises:
            AttributeError: If the source_class does not have a `get_samples` method.
        """
        if not hasattr(source_class, "get_samples"):
            raise AttributeError(f"The class {source_class.__name__} must have a 'get_samples' method.")

        sample_instances = source_class.get_samples()
        list_of_records = []

        # Assuming each key in the returned dictionary of get_samples corresponds to a list of instances
        for instance_group in sample_instances.values():
            for instance in instance_group:
                # Ensure that the instance is a dataclass instance
                if is_dataclass(instance):
                    record = asdict(instance)
                    list_of_records.append(record)
                else:
                    raise ValueError(f"The instance of class {source_class.__name__} is not a dataclass instance")

        return list_of_records

    def lookup(self, query_name: QueryName, lenient: bool = True) -> NamedQuery:
        """
        lookup the named query for the given structured query name


        Args:
            query_name(QueryName): the structured query name
            lenient(bool): if True handle multiple entry errors as warnings
        Returns:
            NamedQuery: the named query
        """
        qn = query_name
        query_id = qn.query_id
        sql_query = """SELECT
    *
FROM
    NamedQuery
WHERE
    query_id=?"""
        query_records = self.sql_db.query(sql_query, (query_id,))
        if not query_records:
            msg = f"NamedQuery not found for the specified query '{qn}'."
            raise ValueError(msg)

        query_count = len(query_records)
        if query_count != 1:
            msg = f"multiple entries ({query_count}) for query '{qn.name}' namespace '{qn.namespace} and domain '{qn.domain}' the id '{qn.query_id}' is not unique"
            if lenient:
                print(f"warning: {msg}")
            else:
                raise ValueError(msg)

        record = query_records[0]
        named_query = NamedQuery.from_record(record)
        return named_query

    def get_query(
        self,
        query_name: QueryName,
        endpoint_name: str = "wikidata",
        limit: int = None,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ) -> QueryBundle:
        """
        Get the query for the given parameters.

        Args:
            query_name: (QueryName):a structured query name
            endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'.
            limit (int): The query limit (if any).
            prefix_merger: Prefix merger to use
        Returns:
            QueryBundle: named_query, query, and endpoint.
        """
        named_query = self.lookup(query_name=query_name)
        return self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)

    def as_query_bundle(
        self,
        named_query: NamedQuery,
        endpoint_name: str,
        limit: int = None,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ) -> QueryBundle:
        """
        Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

        Args:
            named_query (NamedQuery): Named query object.
            endpoint_name (str): Name of the endpoint where the query should be executed.
            limit (int): Optional limit for the query.

        Returns:
            QueryBundle: A bundle containing the named query, the query object, and the endpoint.
        """
        if endpoint_name not in self.endpoints:
            raise ValueError(f"Invalid endpoint {endpoint_name}")

        endpoint = self.endpoints[endpoint_name]
        query = Query(
            name=named_query.name,
            query=named_query.sparql,
            lang="sparql",
            endpoint=endpoint.endpoint,
            limit=limit,
        )
        query.query = QueryPrefixMerger.merge_prefixes(named_query, query, endpoint, prefix_merger)
        if limit:
            query.query += f"\nLIMIT {limit}"
        return QueryBundle(named_query=named_query, query=query, endpoint=endpoint)

    def get_namespaces(self) -> Dict[str, int]:
        """
        Retrieves all unique namespaces and the count of NamedQueries associated with each from the database,
        sorted by the count of queries from lowest to highest.

        Returns:
            Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.
        """
        # Multi-line SQL query for better readability
        query = """
        SELECT domain,namespace, COUNT(*) AS query_count
        FROM NamedQuery
        GROUP BY domain,namespace
        ORDER BY COUNT(*)
        """
        result = self.sql_db.query(query)
        namespaces: Dict[str, int] = {}
        for row in result:
            domain = row["domain"]
            namespace = row["namespace"]
            count = int(row["query_count"])
            namespaces[f"{namespace}@{domain}"] = count
        return namespaces

    def get_all_queries(
        self,
        namespace: str = "snapquery-examples",
        domain: str = "wikidata.org",
        limit: int = None,  # Default limit is None, meaning no limit
    ) -> List[NamedQuery]:
        """
        Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching.
        Optionally limits the number of results.

        Args:
            namespace (str): Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.
            domain (str): Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.
            limit (int): Maximum number of NamedQueries to retrieve, defaults to None for unlimited.

        Returns:
            List[NamedQuery]: A list of NamedQuery instances in the database.
        """
        sql_query = """SELECT * FROM NamedQuery
WHERE domain LIKE ? AND namespace LIKE ?
ORDER BY domain,namespace,name"""
        params = (f"{domain}%", f"{namespace}%")

        if limit is not None:
            sql_query += " LIMIT ?"
            params += (limit,)

        query_records = self.sql_db.query(sql_query, params)
        named_queries = []
        for record in query_records:
            named_query = NamedQuery.from_record(record)
            named_queries.append(named_query)

        return named_queries

    def get_query_stats(self, query_id: str) -> list[QueryStats]:
        """
        Get query stats for the given query name
        Args:
            query_id: id of the query

        Returns:
            list of query stats
        """
        sql_query = """
        SELECT *
        FROM QueryStats
        WHERE query_id = ?
        """
        query_records = self.sql_db.query(sql_query, (query_id,))
        stats = []
        if query_records:
            for record in query_records:
                query_stat = QueryStats.from_record(record)
                stats.append(query_stat)
        return stats

    def get_query_stats_by_context(self, context: str) -> list[QueryStats]:
        """
        Get query stats for the given query name
        Args:
            query_id: id of the query

        Returns:
            list of query stats
        """
        sql_query = """
        SELECT *
        FROM QueryStats
        WHERE context = ?
        """
        query_records = self.sql_db.query(sql_query, (context,))
        stats = [QueryStats.from_record(record) for record in query_records]
        return stats

__init__(db_path=None, debug=False)

Initializes the NamedQueryManager with a specific database path and a debug mode.

Parameters:

Name Type Description Default
db_path Optional[str]

The file path to the SQLite database. If None, the default cache path is used.

None
debug bool

If True, enables debug mode which may provide additional logging and error reporting.

False

Attributes:

Name Type Description
debug bool

Stores the debug state.

sql_db SQLDB

An instance of SQLDB to manage the SQLite database interactions.

endpoints dict

A dictionary of SPARQL endpoints configured for use.

Source code in snapquery/snapquery_core.py
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
def __init__(self, db_path: str = None, debug: bool = False):
    """
    Initializes the NamedQueryManager with a specific database path and a debug mode.

    Args:
        db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used.
        debug (bool): If True, enables debug mode which may provide additional logging and error reporting.

    Attributes:
        debug (bool): Stores the debug state.
        sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions.
        endpoints (dict): A dictionary of SPARQL endpoints configured for use.
    """
    if db_path is None:
        db_path = NamedQueryManager.get_cache_path()
    self.debug = debug
    self.sql_db = SQLDB(dbname=db_path, check_same_thread=False, debug=debug)
    # Get the path of the yaml_file relative to the current Python module
    self.samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
    endpoints_path = os.path.join(self.samples_path, "endpoints.yaml")
    self.endpoints = EndpointManager.getEndpoints(endpointPath=endpoints_path, lang="sparql", with_default=False)
    yaml_path = os.path.join(self.samples_path, "meta_query.yaml")
    self.meta_qm = QueryManager(queriesPath=yaml_path, with_default=False, lang="sql")
    # Graph Manager
    gm_yaml_path = GraphManager.get_yaml_path()
    self.gm = GraphManager.load_from_yaml_file(gm_yaml_path)
    # SQL meta data handling
    # primary keys
    self.primary_keys = {
        QueryStats: "stats_id",
        NamedQuery: "query_id",
        QueryDetails: "query_id",
    }
    self.entity_infos = {}
    pass

add_and_store(nq)

Adds a new NamedQuery instance and stores it in the database.

Parameters:

Name Type Description Default
nq NamedQuery

The NamedQuery instance to add and store.

required
Source code in snapquery/snapquery_core.py
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
def add_and_store(self, nq: NamedQuery):
    """
    Adds a new NamedQuery instance and stores it in the database.

    Args:
        nq (NamedQuery): The NamedQuery instance to add and store.

    """
    qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
    lod = []
    nq_record = asdict(nq)
    lod.append(nq_record)
    self.store(lod)
    qd_list = []
    qd_list.append(qd)
    self.store_query_details_list(qd_list)

as_query_bundle(named_query, endpoint_name, limit=None, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)

Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

Parameters:

Name Type Description Default
named_query NamedQuery

Named query object.

required
endpoint_name str

Name of the endpoint where the query should be executed.

required
limit int

Optional limit for the query.

None

Returns:

Name Type Description
QueryBundle QueryBundle

A bundle containing the named query, the query object, and the endpoint.

Source code in snapquery/snapquery_core.py
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
def as_query_bundle(
    self,
    named_query: NamedQuery,
    endpoint_name: str,
    limit: int = None,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
) -> QueryBundle:
    """
    Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

    Args:
        named_query (NamedQuery): Named query object.
        endpoint_name (str): Name of the endpoint where the query should be executed.
        limit (int): Optional limit for the query.

    Returns:
        QueryBundle: A bundle containing the named query, the query object, and the endpoint.
    """
    if endpoint_name not in self.endpoints:
        raise ValueError(f"Invalid endpoint {endpoint_name}")

    endpoint = self.endpoints[endpoint_name]
    query = Query(
        name=named_query.name,
        query=named_query.sparql,
        lang="sparql",
        endpoint=endpoint.endpoint,
        limit=limit,
    )
    query.query = QueryPrefixMerger.merge_prefixes(named_query, query, endpoint, prefix_merger)
    if limit:
        query.query += f"\nLIMIT {limit}"
    return QueryBundle(named_query=named_query, query=query, endpoint=endpoint)

execute_query(named_query, params_dict, endpoint_name='wikidata', limit=None, with_stats=True, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)

execute the given named_query

Parameters:

Name Type Description Default
named_query(NamedQuery)

the query to execute

required
params_dict(Dict)

the query parameters to apply (if any)

required
endpoint_name(str)

the endpoint where to the excute the query

required
limit(int)

the record limit for the results (if any)

required
with_stats(bool)

if True run the stats

required
prefix_merger QueryPrefixMerger

prefix merger to use

SIMPLE_MERGER
Source code in snapquery/snapquery_core.py
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
def execute_query(
    self,
    named_query: NamedQuery,
    params_dict: Dict,
    endpoint_name: str = "wikidata",
    limit: int = None,
    with_stats: bool = True,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
):
    """
    execute the given named_query

    Args:
        named_query(NamedQuery): the query to execute
        params_dict(Dict): the query parameters to apply (if any)
        endpoint_name(str): the endpoint where to the excute the query
        limit(int): the record limit for the results (if any)
        with_stats(bool): if True run the stats
        prefix_merger: prefix merger to use
    """
    # Assemble the query bundle using the named query, endpoint, and limit
    query_bundle = self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)
    params = Params(query_bundle.query.query)
    if params.has_params:
        params.set(params_dict)
        query = params.apply_parameters()
        query_bundle.query.query = query
    if with_stats:
        # Execute the query
        results, stats = query_bundle.get_lod_with_stats()
        self.store_stats([stats])
    else:
        results = query_bundle.get_lod()
        stats = None
    return results, stats

from_samples(db_path=None, force_init=False, with_backup=True, debug=False) classmethod

Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

Parameters:

Name Type Description Default
db_path Optional[str]

Path to the database file. If None, the default path is used.

None
force_init bool

If True, the existing database file is dropped and recreated, and backed up if with_backup is True.

False
with_backup bool

If True and force_init is True, moves the database file to a backup location before reinitialization.

True
debug bool

If True, enables debug mode which may provide additional logging.

False

Returns:

Name Type Description
NamedQueryManager NamedQueryManager

An instance of the manager initialized with the database at db_path.

Source code in snapquery/snapquery_core.py
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
@classmethod
def from_samples(
    cls,
    db_path: Optional[str] = None,
    force_init: bool = False,
    with_backup: bool = True,
    debug: bool = False,
) -> "NamedQueryManager":
    """
    Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

    Args:
        db_path (Optional[str]): Path to the database file. If None, the default path is used.
        force_init (bool): If True, the existing database file is dropped and recreated, and backed up if with_backup is True.
        with_backup (bool): If True and force_init is True, moves the database file to a backup location before reinitialization.
        debug (bool): If True, enables debug mode which may provide additional logging.

    Returns:
        NamedQueryManager: An instance of the manager initialized with the database at `db_path`.
    """
    if db_path is None:
        db_path = cls.get_cache_path()

    path_obj = Path(db_path)

    # Handle backup and force initialization
    if force_init and path_obj.exists():
        if with_backup:
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
            backup_path = path_obj.with_name(f"{path_obj.stem}-{timestamp}{path_obj.suffix}")
            path_obj.rename(backup_path)  # Move the existing file to backup

    nqm = NamedQueryManager(db_path=db_path, debug=debug)
    if force_init or not path_obj.exists() or path_obj.stat().st_size == 0:
        for source_class, pk in [
            (NamedQuery, "query_id"),
            (QueryStats, "stats_id"),
            (QueryDetails, "quer_id"),
        ]:
            # Fetch sample records from the specified class
            sample_records = cls.get_sample_records(source_class=source_class)

            # Define entity information dynamically based on the class and primary key
            entityInfo = EntityInfo(sample_records, name=source_class.__name__, primaryKey=pk)

            # Create and populate the table specific to each class
            nqm.sql_db.createTable(sample_records, source_class.__name__, withDrop=True)
            nqm.sql_db.store(sample_records, entityInfo, fixNone=True, replace=True)
        # store yaml defined entities to SQL database
        nqm.store_endpoints()
        nqm.store_graphs()
    return nqm

get_all_queries(namespace='snapquery-examples', domain='wikidata.org', limit=None)

Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching. Optionally limits the number of results.

Parameters:

Name Type Description Default
namespace str

Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.

'snapquery-examples'
domain str

Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.

'wikidata.org'
limit int

Maximum number of NamedQueries to retrieve, defaults to None for unlimited.

None

Returns:

Type Description
List[NamedQuery]

List[NamedQuery]: A list of NamedQuery instances in the database.

Source code in snapquery/snapquery_core.py
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
    def get_all_queries(
        self,
        namespace: str = "snapquery-examples",
        domain: str = "wikidata.org",
        limit: int = None,  # Default limit is None, meaning no limit
    ) -> List[NamedQuery]:
        """
        Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching.
        Optionally limits the number of results.

        Args:
            namespace (str): Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.
            domain (str): Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.
            limit (int): Maximum number of NamedQueries to retrieve, defaults to None for unlimited.

        Returns:
            List[NamedQuery]: A list of NamedQuery instances in the database.
        """
        sql_query = """SELECT * FROM NamedQuery
WHERE domain LIKE ? AND namespace LIKE ?
ORDER BY domain,namespace,name"""
        params = (f"{domain}%", f"{namespace}%")

        if limit is not None:
            sql_query += " LIMIT ?"
            params += (limit,)

        query_records = self.sql_db.query(sql_query, params)
        named_queries = []
        for record in query_records:
            named_query = NamedQuery.from_record(record)
            named_queries.append(named_query)

        return named_queries

get_entity_info(source_class)

Gets or creates EntityInfo for the given source class.

Source code in snapquery/snapquery_core.py
955
956
957
958
959
960
961
962
963
964
965
966
967
968
def get_entity_info(self, source_class: Type) -> EntityInfo:
    """
    Gets or creates EntityInfo for the given source class.
    """
    if source_class not in self.entity_infos:
        primary_key = self.primary_keys.get(source_class, None)
        sample_records = self.get_sample_records(source_class)
        self.entity_infos[source_class] = EntityInfo(
            sample_records,
            name=source_class.__name__,
            primaryKey=primary_key,
            debug=self.debug,
        )
    return self.entity_infos[source_class]

get_namespaces()

Retrieves all unique namespaces and the count of NamedQueries associated with each from the database, sorted by the count of queries from lowest to highest.

Returns:

Type Description
Dict[str, int]

Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.

Source code in snapquery/snapquery_core.py
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
def get_namespaces(self) -> Dict[str, int]:
    """
    Retrieves all unique namespaces and the count of NamedQueries associated with each from the database,
    sorted by the count of queries from lowest to highest.

    Returns:
        Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.
    """
    # Multi-line SQL query for better readability
    query = """
    SELECT domain,namespace, COUNT(*) AS query_count
    FROM NamedQuery
    GROUP BY domain,namespace
    ORDER BY COUNT(*)
    """
    result = self.sql_db.query(query)
    namespaces: Dict[str, int] = {}
    for row in result:
        domain = row["domain"]
        namespace = row["namespace"]
        count = int(row["query_count"])
        namespaces[f"{namespace}@{domain}"] = count
    return namespaces

get_query(query_name, endpoint_name='wikidata', limit=None, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)

Get the query for the given parameters.

Parameters:

Name Type Description Default
query_name QueryName

(QueryName):a structured query name

required
endpoint_name str

The name of the endpoint to send the SPARQL query to, default is 'wikidata'.

'wikidata'
limit int

The query limit (if any).

None
prefix_merger QueryPrefixMerger

Prefix merger to use

SIMPLE_MERGER

Returns: QueryBundle: named_query, query, and endpoint.

Source code in snapquery/snapquery_core.py
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
def get_query(
    self,
    query_name: QueryName,
    endpoint_name: str = "wikidata",
    limit: int = None,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
) -> QueryBundle:
    """
    Get the query for the given parameters.

    Args:
        query_name: (QueryName):a structured query name
        endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'.
        limit (int): The query limit (if any).
        prefix_merger: Prefix merger to use
    Returns:
        QueryBundle: named_query, query, and endpoint.
    """
    named_query = self.lookup(query_name=query_name)
    return self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)

get_query_stats(query_id)

Get query stats for the given query name Args: query_id: id of the query

Returns:

Type Description
list[QueryStats]

list of query stats

Source code in snapquery/snapquery_core.py
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
def get_query_stats(self, query_id: str) -> list[QueryStats]:
    """
    Get query stats for the given query name
    Args:
        query_id: id of the query

    Returns:
        list of query stats
    """
    sql_query = """
    SELECT *
    FROM QueryStats
    WHERE query_id = ?
    """
    query_records = self.sql_db.query(sql_query, (query_id,))
    stats = []
    if query_records:
        for record in query_records:
            query_stat = QueryStats.from_record(record)
            stats.append(query_stat)
    return stats

get_query_stats_by_context(context)

Get query stats for the given query name Args: query_id: id of the query

Returns:

Type Description
list[QueryStats]

list of query stats

Source code in snapquery/snapquery_core.py
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
def get_query_stats_by_context(self, context: str) -> list[QueryStats]:
    """
    Get query stats for the given query name
    Args:
        query_id: id of the query

    Returns:
        list of query stats
    """
    sql_query = """
    SELECT *
    FROM QueryStats
    WHERE context = ?
    """
    query_records = self.sql_db.query(sql_query, (context,))
    stats = [QueryStats.from_record(record) for record in query_records]
    return stats

get_sample_records(source_class) classmethod

Generates a list of dictionary records based on the sample instances provided by a source class. This method utilizes the get_samples method of the source class, which should return a dictionary of sample instances.

Parameters:

Name Type Description Default
source_class Type

The class from which to fetch sample instances. This class must implement a get_samples method that returns a dictionary of instances categorized by some key.

required

Returns:

Type Description
List[Dict[str, Any]]

List[Dict[str, Any]]: A list of dictionaries where each dictionary is a record that corresponds to a sample instance from the source class.

Raises:

Type Description
AttributeError

If the source_class does not have a get_samples method.

Source code in snapquery/snapquery_core.py
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
@classmethod
def get_sample_records(cls, source_class: Type) -> List[Dict[str, Any]]:
    """
    Generates a list of dictionary records based on the sample instances
    provided by a source class. This method utilizes the `get_samples` method
    of the source class, which should return a dictionary of sample instances.

    Args:
        source_class (Type): The class from which to fetch sample instances.
            This class must implement a `get_samples` method that returns
            a dictionary of instances categorized by some key.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries where each dictionary
            is a record that corresponds to a sample instance from the source class.

    Raises:
        AttributeError: If the source_class does not have a `get_samples` method.
    """
    if not hasattr(source_class, "get_samples"):
        raise AttributeError(f"The class {source_class.__name__} must have a 'get_samples' method.")

    sample_instances = source_class.get_samples()
    list_of_records = []

    # Assuming each key in the returned dictionary of get_samples corresponds to a list of instances
    for instance_group in sample_instances.values():
        for instance in instance_group:
            # Ensure that the instance is a dataclass instance
            if is_dataclass(instance):
                record = asdict(instance)
                list_of_records.append(record)
            else:
                raise ValueError(f"The instance of class {source_class.__name__} is not a dataclass instance")

    return list_of_records

lookup(query_name, lenient=True)

lookup the named query for the given structured query name

Parameters:

Name Type Description Default
query_name(QueryName)

the structured query name

required
lenient(bool)

if True handle multiple entry errors as warnings

required

Returns: NamedQuery: the named query

Source code in snapquery/snapquery_core.py
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
    def lookup(self, query_name: QueryName, lenient: bool = True) -> NamedQuery:
        """
        lookup the named query for the given structured query name


        Args:
            query_name(QueryName): the structured query name
            lenient(bool): if True handle multiple entry errors as warnings
        Returns:
            NamedQuery: the named query
        """
        qn = query_name
        query_id = qn.query_id
        sql_query = """SELECT
    *
FROM
    NamedQuery
WHERE
    query_id=?"""
        query_records = self.sql_db.query(sql_query, (query_id,))
        if not query_records:
            msg = f"NamedQuery not found for the specified query '{qn}'."
            raise ValueError(msg)

        query_count = len(query_records)
        if query_count != 1:
            msg = f"multiple entries ({query_count}) for query '{qn.name}' namespace '{qn.namespace} and domain '{qn.domain}' the id '{qn.query_id}' is not unique"
            if lenient:
                print(f"warning: {msg}")
            else:
                raise ValueError(msg)

        record = query_records[0]
        named_query = NamedQuery.from_record(record)
        return named_query

store(lod, source_class=NamedQuery, with_create=False)

Stores the given list of dictionaries in the database using entity information derived from a specified source class.

Parameters:

Name Type Description Default
lod List[Dict[str, Any]]

List of dictionaries that represent the records to be stored.

required
source_class Type

The class from which the entity information is derived. This class should have an attribute or method that defines its primary key and must have a __name__ attribute. with_create(bool): if True create the table

NamedQuery

Raises: AttributeError: If the source class does not have the necessary method or attribute to define the primary key.

Source code in snapquery/snapquery_core.py
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
def store(
    self,
    lod: List[Dict[str, Any]],
    source_class: Type = NamedQuery,
    with_create: bool = False,
) -> None:
    """
    Stores the given list of dictionaries in the database using entity information
    derived from a specified source class.

    Args:
        lod (List[Dict[str, Any]]): List of dictionaries that represent the records to be stored.
        source_class (Type): The class from which the entity information is derived. This class
            should have an attribute or method that defines its primary key and must have a `__name__` attribute.
            with_create(bool): if True create the table
    Raises:
        AttributeError: If the source class does not have the necessary method or attribute to define the primary key.
    """
    entity_info = self.get_entity_info(source_class)
    if with_create:
        self.sql_db.createTable4EntityInfo(entityInfo=entity_info, withDrop=True)
    # Store the list of dictionaries in the database using the defined entity information
    self.sql_db.store(lod, entity_info, fixNone=True, replace=True)

store_endpoints(endpoints=None)

Stores the given endpoints or self.endpoints into the SQL database.

Parameters:

Name Type Description Default
endpoints Optional[Dict[str, LODStorageEndpoint]]

A dictionary of endpoints to store. If None, uses self.endpoints.

None
Source code in snapquery/snapquery_core.py
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
def store_endpoints(self, endpoints: Optional[Dict[str, Endpoint]] = None):
    """
    Stores the given endpoints or self.endpoints into the SQL database.

    Args:
        endpoints (Optional[Dict[str, LODStorageEndpoint]]): A dictionary of endpoints to store.
            If None, uses self.endpoints.
    """
    # This is a compatiblity layer for pylodstorage Endpoints
    # as of 2024-06 pylodstorage Endpoint still uses @Jsonable which is
    # deprecated so we convert instances to our local endpoint modules Endpoint format
    # and use our store mechanism to create SQL records
    if endpoints is None:
        endpoints = self.endpoints

    endpoints_lod = []
    for endpoint_name, lod_endpoint in endpoints.items():
        # Create a dictionary with only the attributes that exist in lod_endpoint
        endpoint_dict = {
            "name": endpoint_name,
            "lang": getattr(lod_endpoint, "lang", None),
            "endpoint": getattr(lod_endpoint, "endpoint", None),
            "website": getattr(lod_endpoint, "website", None),
            "database": getattr(lod_endpoint, "database", None),
            "method": getattr(lod_endpoint, "method", None),
            "prefixes": getattr(lod_endpoint, "prefixes", None),
            "auth": getattr(lod_endpoint, "auth", None),
            "user": getattr(lod_endpoint, "user", None),
            "password": getattr(lod_endpoint, "password", None),
        }

        # Remove None values
        endpoint_dict = {k: v for k, v in endpoint_dict.items() if v is not None}

        # Create SnapQueryEndpoint instance with only the available attributes
        snap_endpoint = SnapQueryEndpoint(**endpoint_dict)
        endpoints_lod.append(asdict(snap_endpoint))

    # Store the list of dictionaries in the database
    self.store(lod=endpoints_lod, source_class=SnapQueryEndpoint, with_create=True)

store_graphs(gm=None)

Stores all graphs managed by the given GraphManager into my SQL database

Source code in snapquery/snapquery_core.py
849
850
851
852
853
854
855
856
857
858
859
def store_graphs(self, gm: GraphManager = None):
    """
    Stores all graphs managed by the given GraphManager into my
    SQL database
    """
    if gm is None:
        gm = self.gm

    lod = [asdict(graph) for graph in gm]  # Convert each Graph instance to a dictionary using asdict()

    self.store(lod=lod, source_class=Graph, with_create=True)

store_named_query_list(nq_set)

store the given named query set

Parameters:

Name Type Description Default
nq_list

NamedQueryList

required
Source code in snapquery/snapquery_core.py
813
814
815
816
817
818
819
820
821
822
823
def store_named_query_list(self, nq_set: NamedQuerySet):
    """
    store the given named query set

    Args:
        nq_list: NamedQueryList
    """
    lod = []
    for nq in nq_set.queries:
        lod.append(asdict(nq))
    self.store(lod=lod)

store_query_details_list(qd_list)

Stores a list of QueryDetails instances into the database. This function converts each QueryDetails instance into a dictionary and then stores the entire list of dictionaries. It utilizes the 'store' method to handle database operations based on the entity information derived from the QueryDetails class.

Parameters:

Name Type Description Default
qd_list List[QueryDetails]

List of QueryDetails instances to be stored.

required
Source code in snapquery/snapquery_core.py
825
826
827
828
829
830
831
832
833
834
835
836
837
838
def store_query_details_list(self, qd_list: List[QueryDetails]):
    """
    Stores a list of QueryDetails instances into the database. This function converts
    each QueryDetails instance into a dictionary and then stores the entire list of dictionaries.
    It utilizes the 'store' method to handle database operations based on the entity information
    derived from the QueryDetails class.

    Args:
        qd_list (List[QueryDetails]): List of QueryDetails instances to be stored.
    """
    qd_lod = []
    for qd in qd_list:
        qd_lod.append(asdict(qd))
    self.store(lod=qd_lod, source_class=QueryDetails)

store_stats(stats_list)

store the given list of query statistics

Source code in snapquery/snapquery_core.py
840
841
842
843
844
845
846
847
def store_stats(self, stats_list: List[QueryStats]):
    """
    store the given list of query statistics
    """
    stats_lod = []
    for stats in stats_list:
        stats_lod.append(asdict(stats))
    self.store(lod=stats_lod, source_class=QueryStats)

NamedQuerySet

a list/set of named queries which defines a namespace

Source code in snapquery/snapquery_core.py
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
@lod_storable
class NamedQuerySet:
    """
    a list/set of named queries which defines a namespace
    """

    domain: str  # the domain of this NamedQuerySet
    namespace: str  # the namespace

    target_graph_name: str  # the name of the target graph
    queries: List[NamedQuery] = field(default_factory=list)

    def __len__(self):
        return len(self.queries)

    def __post_init__(self):
        """
        Initialize the dictionary after the object is created
        """
        self._query_dict = {query.query_id: query for query in self.queries}

    def add(self, query: NamedQuery):
        """
        Add a query to both the list and dictionary
        """
        if query.query_id not in self._query_dict:
            self.queries.append(query)
            self._query_dict[query.query_id] = query

__post_init__()

Initialize the dictionary after the object is created

Source code in snapquery/snapquery_core.py
483
484
485
486
487
def __post_init__(self):
    """
    Initialize the dictionary after the object is created
    """
    self._query_dict = {query.query_id: query for query in self.queries}

add(query)

Add a query to both the list and dictionary

Source code in snapquery/snapquery_core.py
489
490
491
492
493
494
495
def add(self, query: NamedQuery):
    """
    Add a query to both the list and dictionary
    """
    if query.query_id not in self._query_dict:
        self.queries.append(query)
        self._query_dict[query.query_id] = query

QueryBundle

Bundles a named query, a query, and an endpoint into a single manageable object, facilitating the execution of SPARQL queries.

Attributes:

Name Type Description
named_query NamedQuery

The named query object, which includes metadata about the query.

query Query

The actual query object that contains the SPARQL query string.

endpoint Endpoint

The endpoint object where the SPARQL query should be executed.

sparql SPARQL

A SPARQL service object initialized with the endpoint URL.

Source code in snapquery/snapquery_core.py
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
class QueryBundle:
    """
    Bundles a named query, a query, and an endpoint into a single manageable object, facilitating the execution of SPARQL queries.

    Attributes:
        named_query (NamedQuery): The named query object, which includes metadata about the query.
        query (Query): The actual query object that contains the SPARQL query string.
        endpoint (Endpoint): The endpoint object where the SPARQL query should be executed.
        sparql (SPARQL): A SPARQL service object initialized with the endpoint URL.
    """

    def __init__(self, named_query: NamedQuery, query: Query, endpoint: Endpoint = None):
        """
        Initializes a new instance of the QueryBundle class.

        Args:
            named_query (NamedQuery): An instance of NamedQuery that provides a named reference to the query.
            query (Query): An instance of Query containing the SPARQL query string.
            endpoint (Endpoint): An instance of Endpoint representing the SPARQL endpoint URL.
        """
        self.named_query = named_query
        self.query = query
        self.update_endpoint(endpoint)

    def update_endpoint(self, endpoint):
        self.endpoint = endpoint
        if endpoint:
            self.sparql = SPARQL(endpoint.endpoint, method=self.endpoint.method)

    def raw_query(self, resultFormat, mime_type: str = None, timeout: float = 10.0):
        """
        returns raw result of the endpoint

        Args:
            resultFormat (str): format of the result
            mime_type (str): mime_type to use (if any)
            timeout (float): timeout in seconds

        Returns:
            raw result of the query
        """
        params = {"query": self.query.query, "format": resultFormat}
        payload = {}
        if mime_type:
            headers = {"Accept": mime_type}
        else:
            headers = {}
        endpoint_url = self.endpoint.endpoint
        method = self.endpoint.method
        response = requests.request(
            method,
            endpoint_url,
            headers=headers,
            data=payload,
            params=params,
            timeout=timeout,
        )
        return response.text

    def get_lod(self) -> List[dict]:
        """
        Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

        Returns:
            List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
        """
        lod = self.sparql.queryAsListOfDicts(self.query.query)
        return lod

    def get_lod_with_stats(self) -> tuple[list[dict], QueryStats]:
        """
        Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

        Returns:
            List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
        """
        logger.info(f"Querying {self.endpoint.name} with query {self.named_query.name}")
        query_stat = QueryStats(query_id=self.named_query.query_id, endpoint_name=self.endpoint.name)
        try:
            lod = self.sparql.queryAsListOfDicts(self.query.query)
            query_stat.records = len(lod) if lod else -1
            query_stat.done()
        except Exception as ex:
            lod = []
            logger.debug(f"Execution of query failed: {ex}")
            query_stat.error(ex)
        return (lod, query_stat)

    def format_result(
        self,
        qlod: List[Dict[str, Any]] = None,
        r_format: Format = Format.json,
    ) -> Optional[str]:
        """
        Formats the query results based on the specified format and prints them.

        Args:
            qlod (List[Dict[str, Any]]): The list of dictionaries that represent the query results.
            query (Query): The query object which contains details like the endpoint and the database.
            r_format (Format): The format in which to print the results.

        Returns:
            Optional[str]: The formatted string representation of the query results, or None if printed directly.
        """
        if qlod is None:
            qlod = self.get_lod()
        if r_format is None:
            r_format = Format.json
        if r_format == Format.csv:
            csv_output = CSV.toCSV(qlod)
            return csv_output
        elif r_format in [Format.latex, Format.github, Format.mediawiki, Format.html]:
            doc = self.query.documentQueryResult(qlod, tablefmt=str(r_format), floatfmt=".1f")
            return doc.asText()
        elif r_format == Format.json:
            return json.dumps(qlod, indent=2, sort_keys=True, default=str)
        return None  # In case no format is matched or needed

    def set_limit(self, limit: int = None):
        """
        set the limit of my query

        Args:
            limit(int): the limit to set - default: None
        """
        if limit:
            sparql_query = self.query.query
            # @TODO - this is too naive for cases where
            # there are SPARQL elements hat have a "limit" in the name e.g. "height_limit"
            # or if there is a LIMIT in a subquery
            if "limit" in sparql_query or "LIMIT" in sparql_query:
                sparql_query = re.sub(r"(limit|LIMIT)\s+(\d+)", f"LIMIT {limit}", sparql_query)
            else:
                sparql_query += f"\nLIMIT {limit}"
            self.query.query = sparql_query

__init__(named_query, query, endpoint=None)

Initializes a new instance of the QueryBundle class.

Parameters:

Name Type Description Default
named_query NamedQuery

An instance of NamedQuery that provides a named reference to the query.

required
query Query

An instance of Query containing the SPARQL query string.

required
endpoint Endpoint

An instance of Endpoint representing the SPARQL endpoint URL.

None
Source code in snapquery/snapquery_core.py
508
509
510
511
512
513
514
515
516
517
518
519
def __init__(self, named_query: NamedQuery, query: Query, endpoint: Endpoint = None):
    """
    Initializes a new instance of the QueryBundle class.

    Args:
        named_query (NamedQuery): An instance of NamedQuery that provides a named reference to the query.
        query (Query): An instance of Query containing the SPARQL query string.
        endpoint (Endpoint): An instance of Endpoint representing the SPARQL endpoint URL.
    """
    self.named_query = named_query
    self.query = query
    self.update_endpoint(endpoint)

format_result(qlod=None, r_format=Format.json)

Formats the query results based on the specified format and prints them.

Parameters:

Name Type Description Default
qlod List[Dict[str, Any]]

The list of dictionaries that represent the query results.

None
query Query

The query object which contains details like the endpoint and the database.

required
r_format Format

The format in which to print the results.

json

Returns:

Type Description
Optional[str]

Optional[str]: The formatted string representation of the query results, or None if printed directly.

Source code in snapquery/snapquery_core.py
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
def format_result(
    self,
    qlod: List[Dict[str, Any]] = None,
    r_format: Format = Format.json,
) -> Optional[str]:
    """
    Formats the query results based on the specified format and prints them.

    Args:
        qlod (List[Dict[str, Any]]): The list of dictionaries that represent the query results.
        query (Query): The query object which contains details like the endpoint and the database.
        r_format (Format): The format in which to print the results.

    Returns:
        Optional[str]: The formatted string representation of the query results, or None if printed directly.
    """
    if qlod is None:
        qlod = self.get_lod()
    if r_format is None:
        r_format = Format.json
    if r_format == Format.csv:
        csv_output = CSV.toCSV(qlod)
        return csv_output
    elif r_format in [Format.latex, Format.github, Format.mediawiki, Format.html]:
        doc = self.query.documentQueryResult(qlod, tablefmt=str(r_format), floatfmt=".1f")
        return doc.asText()
    elif r_format == Format.json:
        return json.dumps(qlod, indent=2, sort_keys=True, default=str)
    return None  # In case no format is matched or needed

get_lod()

Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

Returns:

Type Description
List[dict]

List[dict]: A list where each dictionary represents a row of results from the SPARQL query.

Source code in snapquery/snapquery_core.py
556
557
558
559
560
561
562
563
564
def get_lod(self) -> List[dict]:
    """
    Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

    Returns:
        List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
    """
    lod = self.sparql.queryAsListOfDicts(self.query.query)
    return lod

get_lod_with_stats()

Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

Returns:

Type Description
tuple[list[dict], QueryStats]

List[dict]: A list where each dictionary represents a row of results from the SPARQL query.

Source code in snapquery/snapquery_core.py
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
def get_lod_with_stats(self) -> tuple[list[dict], QueryStats]:
    """
    Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

    Returns:
        List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
    """
    logger.info(f"Querying {self.endpoint.name} with query {self.named_query.name}")
    query_stat = QueryStats(query_id=self.named_query.query_id, endpoint_name=self.endpoint.name)
    try:
        lod = self.sparql.queryAsListOfDicts(self.query.query)
        query_stat.records = len(lod) if lod else -1
        query_stat.done()
    except Exception as ex:
        lod = []
        logger.debug(f"Execution of query failed: {ex}")
        query_stat.error(ex)
    return (lod, query_stat)

raw_query(resultFormat, mime_type=None, timeout=10.0)

returns raw result of the endpoint

Parameters:

Name Type Description Default
resultFormat str

format of the result

required
mime_type str

mime_type to use (if any)

None
timeout float

timeout in seconds

10.0

Returns:

Type Description

raw result of the query

Source code in snapquery/snapquery_core.py
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
def raw_query(self, resultFormat, mime_type: str = None, timeout: float = 10.0):
    """
    returns raw result of the endpoint

    Args:
        resultFormat (str): format of the result
        mime_type (str): mime_type to use (if any)
        timeout (float): timeout in seconds

    Returns:
        raw result of the query
    """
    params = {"query": self.query.query, "format": resultFormat}
    payload = {}
    if mime_type:
        headers = {"Accept": mime_type}
    else:
        headers = {}
    endpoint_url = self.endpoint.endpoint
    method = self.endpoint.method
    response = requests.request(
        method,
        endpoint_url,
        headers=headers,
        data=payload,
        params=params,
        timeout=timeout,
    )
    return response.text

set_limit(limit=None)

set the limit of my query

Parameters:

Name Type Description Default
limit(int)

the limit to set - default: None

required
Source code in snapquery/snapquery_core.py
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
def set_limit(self, limit: int = None):
    """
    set the limit of my query

    Args:
        limit(int): the limit to set - default: None
    """
    if limit:
        sparql_query = self.query.query
        # @TODO - this is too naive for cases where
        # there are SPARQL elements hat have a "limit" in the name e.g. "height_limit"
        # or if there is a LIMIT in a subquery
        if "limit" in sparql_query or "LIMIT" in sparql_query:
            sparql_query = re.sub(r"(limit|LIMIT)\s+(\d+)", f"LIMIT {limit}", sparql_query)
        else:
            sparql_query += f"\nLIMIT {limit}"
        self.query.query = sparql_query

QueryDetails

Details for a named query

Source code in snapquery/snapquery_core.py
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
@lod_storable
class QueryDetails:
    """
    Details for a named query
    """

    query_id: str
    params: str  # e.g. q - q1,q2,
    default_params: str  # e.g. Q80 - Q58631663, Q125422124
    default_param_types: str  # e.g. Q5 - Q191067,Q43229
    param_count: int
    lines: int
    size: int

    @classmethod
    def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails":
        """
        Creates an instance of QueryDetails from a SPARQL query string.

        This method parses the SPARQL query to determine the number of lines and the size of the query.
        It also identifies and lists the parameters used within the SPARQL query.

        Args:
            query_id (str): The identifier of the query.
            sparql (str): The SPARQL query string from which to generate the query details.

        Returns:
            QueryDetails: An instance containing details about the SPARQL query.
        """
        # Calculate the number of lines and the size of the sparql string
        lines = sparql.count("\n") + 1
        size = len(sparql.encode("utf-8"))

        # Example to extract parameters - this may need to be replaced with actual parameter extraction logic
        sparql_params = Params(
            query=sparql
        )  # Assuming Params is a class that can parse SPARQL queries to extract parameters
        params = ",".join(sparql_params.params) if sparql_params.params else None
        param_count = len(sparql_params.params)
        # @TODO get parameters
        default_params = None
        default_param_types = None
        # Create and return the QueryDetails instance
        return cls(
            query_id=query_id,
            params=params,
            default_params=default_params,
            default_param_types=default_param_types,
            param_count=param_count,
            lines=lines,
            size=size,
        )

    @classmethod
    def get_samples(cls) -> dict[str, "QueryDetails"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                QueryDetails(
                    query_id="scholia.test",
                    params="q",
                    default_params="Q80",
                    default_param_types="Q5",
                    param_count=1,
                    lines=1,
                    size=50,
                )
            ]
        }
        return samples

from_sparql(query_id, sparql) classmethod

Creates an instance of QueryDetails from a SPARQL query string.

This method parses the SPARQL query to determine the number of lines and the size of the query. It also identifies and lists the parameters used within the SPARQL query.

Parameters:

Name Type Description Default
query_id str

The identifier of the query.

required
sparql str

The SPARQL query string from which to generate the query details.

required

Returns:

Name Type Description
QueryDetails QueryDetails

An instance containing details about the SPARQL query.

Source code in snapquery/snapquery_core.py
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
@classmethod
def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails":
    """
    Creates an instance of QueryDetails from a SPARQL query string.

    This method parses the SPARQL query to determine the number of lines and the size of the query.
    It also identifies and lists the parameters used within the SPARQL query.

    Args:
        query_id (str): The identifier of the query.
        sparql (str): The SPARQL query string from which to generate the query details.

    Returns:
        QueryDetails: An instance containing details about the SPARQL query.
    """
    # Calculate the number of lines and the size of the sparql string
    lines = sparql.count("\n") + 1
    size = len(sparql.encode("utf-8"))

    # Example to extract parameters - this may need to be replaced with actual parameter extraction logic
    sparql_params = Params(
        query=sparql
    )  # Assuming Params is a class that can parse SPARQL queries to extract parameters
    params = ",".join(sparql_params.params) if sparql_params.params else None
    param_count = len(sparql_params.params)
    # @TODO get parameters
    default_params = None
    default_param_types = None
    # Create and return the QueryDetails instance
    return cls(
        query_id=query_id,
        params=params,
        default_params=default_params,
        default_param_types=default_param_types,
        param_count=param_count,
        lines=lines,
        size=size,
    )

get_samples() classmethod

get samples

Source code in snapquery/snapquery_core.py
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
@classmethod
def get_samples(cls) -> dict[str, "QueryDetails"]:
    """
    get samples
    """
    samples = {
        "snapquery-examples": [
            QueryDetails(
                query_id="scholia.test",
                params="q",
                default_params="Q80",
                default_param_types="Q5",
                param_count=1,
                lines=1,
                size=50,
            )
        ]
    }
    return samples

QueryName

A structured query name with a fully qualifying query id that is URL-friendly Attributes: domain(str): the domain of the owner of this namespace namespace (str): The namespace of the query, which helps in categorizing the query. name (str): The unique name or identifier of the query within its namespace. query_id(str): encoded id e.g. cats--examples@wikidata.org

Source code in snapquery/snapquery_core.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
@lod_storable
class QueryName:
    """
    A structured query name with a fully qualifying query id that is URL-friendly
    Attributes:
        domain(str): the domain of the owner of this namespace
        namespace (str): The namespace of the query, which helps in categorizing the query.
        name (str): The unique name or identifier of the query within its namespace.
        query_id(str): encoded id e.g. cats--examples@wikidata.org
    """

    # name
    name: str
    # namespace
    namespace: str = "examples"
    # domain
    domain: str = "wikidata.org"
    # query_id
    query_id: str = field(init=False)

    def __post_init__(self):
        self.query_id = self.get_query_id(self.name, self.namespace, self.domain)

    @classmethod
    def get_query_id(cls, name: str, namespace: str, domain: str) -> str:
        """
        Generate a URL-friendly query_id
        """
        # Convert None to empty string (or use any other default logic)
        name, namespace, domain = (name or ""), (namespace or ""), (domain or "")

        # Apply slugify with Unicode support and basic cleanup
        encoded_name = slugify(name, allow_unicode=True)

        # Create a combined query_id
        query_id = f"{encoded_name}--{namespace}@{domain}"

        return query_id

    @classmethod
    def from_query_id(
        cls,
        query_id: str,
        namespace: str = "examples",  # default namespace
        domain: str = "wikidata.org",  # default domain
    ) -> "QueryName":
        """
        Parse a URL-friendly query_id string into a QueryName object.
        Args:
            query_id (str): The URL-friendly query_id string to parse.
            namespace (str): Default namespace if not specified in query_id
            domain (str): Default domain if not specified in query_id
        Returns:
            QueryName: A QueryName object containing name, namespace, and domain.
        """
        parts = query_id.split("--")
        name = urllib.parse.unquote(parts[0])

        if len(parts) > 1:
            ns_domain = parts[1].split("@")
            namespace = urllib.parse.unquote(ns_domain[0])
            if len(ns_domain) > 1:
                domain = urllib.parse.unquote(ns_domain[1])
        return cls(name=name, namespace=namespace, domain=domain)

    def to_dict(self) -> dict:
        """
        Convert the QueryName object to a dictionary
        """
        return {
            "name": self.name,
            "namespace": self.namespace,
            "domain": self.domain,
            "query_id": self.query_id,
        }

from_query_id(query_id, namespace='examples', domain='wikidata.org') classmethod

Parse a URL-friendly query_id string into a QueryName object. Args: query_id (str): The URL-friendly query_id string to parse. namespace (str): Default namespace if not specified in query_id domain (str): Default domain if not specified in query_id Returns: QueryName: A QueryName object containing name, namespace, and domain.

Source code in snapquery/snapquery_core.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
@classmethod
def from_query_id(
    cls,
    query_id: str,
    namespace: str = "examples",  # default namespace
    domain: str = "wikidata.org",  # default domain
) -> "QueryName":
    """
    Parse a URL-friendly query_id string into a QueryName object.
    Args:
        query_id (str): The URL-friendly query_id string to parse.
        namespace (str): Default namespace if not specified in query_id
        domain (str): Default domain if not specified in query_id
    Returns:
        QueryName: A QueryName object containing name, namespace, and domain.
    """
    parts = query_id.split("--")
    name = urllib.parse.unquote(parts[0])

    if len(parts) > 1:
        ns_domain = parts[1].split("@")
        namespace = urllib.parse.unquote(ns_domain[0])
        if len(ns_domain) > 1:
            domain = urllib.parse.unquote(ns_domain[1])
    return cls(name=name, namespace=namespace, domain=domain)

get_query_id(name, namespace, domain) classmethod

Generate a URL-friendly query_id

Source code in snapquery/snapquery_core.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
@classmethod
def get_query_id(cls, name: str, namespace: str, domain: str) -> str:
    """
    Generate a URL-friendly query_id
    """
    # Convert None to empty string (or use any other default logic)
    name, namespace, domain = (name or ""), (namespace or ""), (domain or "")

    # Apply slugify with Unicode support and basic cleanup
    encoded_name = slugify(name, allow_unicode=True)

    # Create a combined query_id
    query_id = f"{encoded_name}--{namespace}@{domain}"

    return query_id

to_dict()

Convert the QueryName object to a dictionary

Source code in snapquery/snapquery_core.py
228
229
230
231
232
233
234
235
236
237
def to_dict(self) -> dict:
    """
    Convert the QueryName object to a dictionary
    """
    return {
        "name": self.name,
        "namespace": self.namespace,
        "domain": self.domain,
        "query_id": self.query_id,
    }

QueryNameSet

Manages a set of QueryNames filtered by domain and namespaces SQL like patterns

Attributes:

nqm (NamedQueryManager): A manager to handle named queries and interactions with the database.
limit(int): the maximum number of names and top_queries
Calculated on update

total (int): Total number of queries that match the current filter criteria. domains (set): A set of domains that match the current filter criteria. namespaces (set): A set of namespaces that match the current filter criteria. names (set): A set of names that match the current filter criteria. top_queries (list): List of top queries based on the specified limit.

Source code in snapquery/snapquery_core.py
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
class QueryNameSet:
    """
    Manages a set of QueryNames filtered by domain and namespaces SQL like patterns

    Attributes:

        nqm (NamedQueryManager): A manager to handle named queries and interactions with the database.
        limit(int): the maximum number of names and top_queries

    Calculated on update:
        total (int): Total number of queries that match the current filter criteria.
        domains (set): A set of domains that match the current filter criteria.
        namespaces (set): A set of namespaces that match the current filter criteria.
        names (set): A set of names that match the current filter criteria.
        top_queries (list): List of top queries based on the specified limit.
    """

    def __init__(self, nqm: "NamedQueryManager", limit: int = None):
        self.nqm = nqm
        self.limit = limit
        self.total = 0
        self.domains = set()
        self.namespaces = set()
        self.names = set()
        self.update("", "")

    def __str__(self):
        return (
            f"QueryNameSet(Total: {self.total}, Domains: {sorted(self.domains)}, "
            f"Namespaces: {sorted(self.namespaces)}, Names: {sorted(self.names)}, "
            f"Top Queries: [{', '.join(query.name for query in self.top_queries)}])"
        )

    def update(self, domain: str, namespace: str, limit: int = None):
        """
        update my attributes

        Args:
            domain (str): The domain part of the filter, supports SQL-like wildcards.
            namespace (str): The namespace part of the filter, supports SQL-like wildcards.
            limit (int, optional): Maximum number of queries to fetch. If None, no limit is applied.

        """
        if limit is None:
            limit = self.limit
        query = self.nqm.meta_qm.queriesByName["domain_namespace_stats"]
        params = (f"{domain}%", f"{namespace}%")
        results = self.nqm.sql_db.query(query.query, params)

        self.total = 0  # Reset total for each update call
        self.domains.clear()  # Clear previous domains
        self.namespaces.clear()  # Clear previous namespaces
        self.names.clear()  # Clear previous names

        for record in results:
            self.domains.add(record["domain"])
            self.namespaces.add(record["namespace"])
            self.total += record["query_count"]
        self.top_queries = self.nqm.get_all_queries(namespace=namespace, domain=domain, limit=limit)
        for query in self.top_queries:
            self.names.add(query.name)

update(domain, namespace, limit=None)

update my attributes

Parameters:

Name Type Description Default
domain str

The domain part of the filter, supports SQL-like wildcards.

required
namespace str

The namespace part of the filter, supports SQL-like wildcards.

required
limit int

Maximum number of queries to fetch. If None, no limit is applied.

None
Source code in snapquery/snapquery_core.py
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
def update(self, domain: str, namespace: str, limit: int = None):
    """
    update my attributes

    Args:
        domain (str): The domain part of the filter, supports SQL-like wildcards.
        namespace (str): The namespace part of the filter, supports SQL-like wildcards.
        limit (int, optional): Maximum number of queries to fetch. If None, no limit is applied.

    """
    if limit is None:
        limit = self.limit
    query = self.nqm.meta_qm.queriesByName["domain_namespace_stats"]
    params = (f"{domain}%", f"{namespace}%")
    results = self.nqm.sql_db.query(query.query, params)

    self.total = 0  # Reset total for each update call
    self.domains.clear()  # Clear previous domains
    self.namespaces.clear()  # Clear previous namespaces
    self.names.clear()  # Clear previous names

    for record in results:
        self.domains.add(record["domain"])
        self.namespaces.add(record["namespace"])
        self.total += record["query_count"]
    self.top_queries = self.nqm.get_all_queries(namespace=namespace, domain=domain, limit=limit)
    for query in self.top_queries:
        self.names.add(query.name)

QueryPrefixMerger

Bases: Enum

SPARQL Query prefix merger

Source code in snapquery/snapquery_core.py
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
class QueryPrefixMerger(Enum):
    """
    SPARQL Query prefix merger
    """

    RAW = "raw"
    SIMPLE_MERGER = "simple merger"
    ANALYSIS_MERGER = "analysis merger"

    @classmethod
    def _missing_(cls, key):
        return cls.default_merger()

    @classmethod
    def default_merger(cls) -> "QueryPrefixMerger":
        return cls.SIMPLE_MERGER

    @classmethod
    def get_by_name(cls, name: str) -> "QueryPrefixMerger":
        merger_map = {merger.name: merger.value for merger in QueryPrefixMerger}
        merger_value = merger_map.get(name, None)
        merger = QueryPrefixMerger(merger_value)
        return merger

    @classmethod
    def merge_prefixes(
        cls, named_query: NamedQuery, query: Query, endpoint: Endpoint, merger: "QueryPrefixMerger"
    ) -> str:
        """
        Merge prefixes with the given merger
        Args:
            named_query (NamedQuery):
            query (Query):
            endpoint (Endpoint):
            merger (QueryPrefixMerger):

        Returns:
            merged query
        """
        if merger == QueryPrefixMerger.SIMPLE_MERGER:
            return cls.simple_prefix_merger(query.query, endpoint)
        elif merger == QueryPrefixMerger.ANALYSIS_MERGER:
            return cls.analysis_prefix_merger(query.query)
        else:
            return query.query

    @classmethod
    def simple_prefix_merger(cls, query_str: str, endpoint: Endpoint) -> str:
        """
        Simple prefix merger
        Args:
            query_str (str): the query string
            endpoint (Endpoint): the endpoint

        Returns:
            merged query
        """
        prefixes = endpoint.prefixes if hasattr(endpoint, "prefixes") else None
        merged_query = query_str
        if prefixes:
            merged_query = f"{prefixes}\n{merged_query}"
        return merged_query

    @classmethod
    def analysis_prefix_merger(cls, query_str: str) -> str:
        """
        Analysis prefix merger
        Args:
            query_str

        Returns:
            merged query
        """
        merged_query = SparqlAnalyzer.add_missing_prefixes(query_str)
        return merged_query

analysis_prefix_merger(query_str) classmethod

Analysis prefix merger Args: query_str

Returns:

Type Description
str

merged query

Source code in snapquery/snapquery_core.py
697
698
699
700
701
702
703
704
705
706
707
708
@classmethod
def analysis_prefix_merger(cls, query_str: str) -> str:
    """
    Analysis prefix merger
    Args:
        query_str

    Returns:
        merged query
    """
    merged_query = SparqlAnalyzer.add_missing_prefixes(query_str)
    return merged_query

merge_prefixes(named_query, query, endpoint, merger) classmethod

Merge prefixes with the given merger Args: named_query (NamedQuery): query (Query): endpoint (Endpoint): merger (QueryPrefixMerger):

Returns:

Type Description
str

merged query

Source code in snapquery/snapquery_core.py
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
@classmethod
def merge_prefixes(
    cls, named_query: NamedQuery, query: Query, endpoint: Endpoint, merger: "QueryPrefixMerger"
) -> str:
    """
    Merge prefixes with the given merger
    Args:
        named_query (NamedQuery):
        query (Query):
        endpoint (Endpoint):
        merger (QueryPrefixMerger):

    Returns:
        merged query
    """
    if merger == QueryPrefixMerger.SIMPLE_MERGER:
        return cls.simple_prefix_merger(query.query, endpoint)
    elif merger == QueryPrefixMerger.ANALYSIS_MERGER:
        return cls.analysis_prefix_merger(query.query)
    else:
        return query.query

simple_prefix_merger(query_str, endpoint) classmethod

Simple prefix merger Args: query_str (str): the query string endpoint (Endpoint): the endpoint

Returns:

Type Description
str

merged query

Source code in snapquery/snapquery_core.py
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
@classmethod
def simple_prefix_merger(cls, query_str: str, endpoint: Endpoint) -> str:
    """
    Simple prefix merger
    Args:
        query_str (str): the query string
        endpoint (Endpoint): the endpoint

    Returns:
        merged query
    """
    prefixes = endpoint.prefixes if hasattr(endpoint, "prefixes") else None
    merged_query = query_str
    if prefixes:
        merged_query = f"{prefixes}\n{merged_query}"
    return merged_query

QueryStats

statistics about a query

Source code in snapquery/snapquery_core.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
@lod_storable
class QueryStats:
    """
    statistics about a query
    """

    stats_id: str = field(init=False)
    query_id: str  # foreign key
    endpoint_name: str  # foreign key

    context: Optional[str] = None  # a context for the query stats
    records: Optional[int] = None
    time_stamp: datetime.datetime = field(init=False)
    duration: Optional[float] = field(init=False, default=None)  # duration in seconds
    error_msg: Optional[str] = None
    error_category: Optional[str] = None

    filtered_msg: Optional[str] = None

    def __post_init__(self):
        """
        Post-initialization processing to construct a unique identifier for the query
        and record the timestamp when the query stats object is created.
        """
        self.stats_id = str(uuid.uuid4())
        self.time_stamp = datetime.datetime.now()

    def done(self):
        """
        Set the duration by calculating the elapsed time since the `time_stamp`.
        """
        self.duration = (datetime.datetime.now() - self.time_stamp).total_seconds()

    def apply_error_filter(self, for_html: bool = False) -> ErrorFilter:
        """
        Applies an error filter to the error message and sets the filtered message.

        Args:
            for_html (bool): If True, formats the message for HTML output.

        Returns:
            ErrorFilter: the error filter that has been applied
        """
        error_filter = ErrorFilter(self.error_msg)
        self.filtered_msg = error_filter.get_message(for_html=for_html)
        self.error_category = error_filter.category
        return error_filter

    def error(self, ex: Exception):
        """
        Handle exception of query
        """
        self.duration = None
        self.error_msg = str(ex)
        self.apply_error_filter()

    @classmethod
    def from_record(cls, record: Dict) -> "QueryStats":
        """
        Class method to instantiate NamedQuery
        from a dictionary record.
        """
        stat = cls(
            query_id=record.get("query_id", None),
            endpoint_name=record.get("endpoint_name", None),
            records=record.get("records", None),
            error_msg=record.get("error_msg", None),
            error_category=record.get("error_category", None),
            filtered_msg=record.get("filtered_msg", None),
        )
        stat.stats_id = record.get("stats_id", stat.stats_id)
        stat.time_stamp = record.get("time_stamp", stat.time_stamp)
        stat.duration = record.get("duration", None)
        return stat

    def as_record(self) -> Dict:
        """
        convert my declared attributes to a dict
        @TODO may be use asdict from dataclasses instead?
        """
        record = {}
        for _field in fields(self):
            # Include field in the record dictionary if it has already been initialized (i.e., not None or has default)
            if hasattr(self, _field.name):
                record[_field.name] = getattr(self, _field.name)
        return record

    @classmethod
    def get_samples(cls) -> dict[str, "QueryStats"]:
        """
        get samples for QueryStats
        """
        samples = {
            "snapquery-examples": [
                cls(
                    query_id="horses--snapquery-examples@wikidata.org",
                    endpoint_name="wikidata",
                    context="samples",
                    records=0,
                    error_msg="HTTP Error 504: Query has timed out.",
                    filtered_msg="Timeout: HTTP Error 504: Query has timed out.",
                    error_category="Timeout",
                ),
                cls(
                    query_id="cats--snapquery-examples@wikidata.org",
                    endpoint_name="wikidata",
                    context="samples",
                    records=223,
                    error_msg="",
                    error_category=None,
                    filtered_msg="",
                ),
            ]
        }
        # Set the duration for each sample instance
        for sample_list in samples.values():
            for sample in sample_list:
                sample.duration = 0.5
        return samples

    def is_successful(self) -> bool:
        """
        Returns True if the query was successful
        """
        return self.duration and self.error_msg is None

__post_init__()

Post-initialization processing to construct a unique identifier for the query and record the timestamp when the query stats object is created.

Source code in snapquery/snapquery_core.py
55
56
57
58
59
60
61
def __post_init__(self):
    """
    Post-initialization processing to construct a unique identifier for the query
    and record the timestamp when the query stats object is created.
    """
    self.stats_id = str(uuid.uuid4())
    self.time_stamp = datetime.datetime.now()

apply_error_filter(for_html=False)

Applies an error filter to the error message and sets the filtered message.

Parameters:

Name Type Description Default
for_html bool

If True, formats the message for HTML output.

False

Returns:

Name Type Description
ErrorFilter ErrorFilter

the error filter that has been applied

Source code in snapquery/snapquery_core.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def apply_error_filter(self, for_html: bool = False) -> ErrorFilter:
    """
    Applies an error filter to the error message and sets the filtered message.

    Args:
        for_html (bool): If True, formats the message for HTML output.

    Returns:
        ErrorFilter: the error filter that has been applied
    """
    error_filter = ErrorFilter(self.error_msg)
    self.filtered_msg = error_filter.get_message(for_html=for_html)
    self.error_category = error_filter.category
    return error_filter

as_record()

convert my declared attributes to a dict @TODO may be use asdict from dataclasses instead?

Source code in snapquery/snapquery_core.py
111
112
113
114
115
116
117
118
119
120
121
def as_record(self) -> Dict:
    """
    convert my declared attributes to a dict
    @TODO may be use asdict from dataclasses instead?
    """
    record = {}
    for _field in fields(self):
        # Include field in the record dictionary if it has already been initialized (i.e., not None or has default)
        if hasattr(self, _field.name):
            record[_field.name] = getattr(self, _field.name)
    return record

done()

Set the duration by calculating the elapsed time since the time_stamp.

Source code in snapquery/snapquery_core.py
63
64
65
66
67
def done(self):
    """
    Set the duration by calculating the elapsed time since the `time_stamp`.
    """
    self.duration = (datetime.datetime.now() - self.time_stamp).total_seconds()

error(ex)

Handle exception of query

Source code in snapquery/snapquery_core.py
84
85
86
87
88
89
90
def error(self, ex: Exception):
    """
    Handle exception of query
    """
    self.duration = None
    self.error_msg = str(ex)
    self.apply_error_filter()

from_record(record) classmethod

Class method to instantiate NamedQuery from a dictionary record.

Source code in snapquery/snapquery_core.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
@classmethod
def from_record(cls, record: Dict) -> "QueryStats":
    """
    Class method to instantiate NamedQuery
    from a dictionary record.
    """
    stat = cls(
        query_id=record.get("query_id", None),
        endpoint_name=record.get("endpoint_name", None),
        records=record.get("records", None),
        error_msg=record.get("error_msg", None),
        error_category=record.get("error_category", None),
        filtered_msg=record.get("filtered_msg", None),
    )
    stat.stats_id = record.get("stats_id", stat.stats_id)
    stat.time_stamp = record.get("time_stamp", stat.time_stamp)
    stat.duration = record.get("duration", None)
    return stat

get_samples() classmethod

get samples for QueryStats

Source code in snapquery/snapquery_core.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
@classmethod
def get_samples(cls) -> dict[str, "QueryStats"]:
    """
    get samples for QueryStats
    """
    samples = {
        "snapquery-examples": [
            cls(
                query_id="horses--snapquery-examples@wikidata.org",
                endpoint_name="wikidata",
                context="samples",
                records=0,
                error_msg="HTTP Error 504: Query has timed out.",
                filtered_msg="Timeout: HTTP Error 504: Query has timed out.",
                error_category="Timeout",
            ),
            cls(
                query_id="cats--snapquery-examples@wikidata.org",
                endpoint_name="wikidata",
                context="samples",
                records=223,
                error_msg="",
                error_category=None,
                filtered_msg="",
            ),
        ]
    }
    # Set the duration for each sample instance
    for sample_list in samples.values():
        for sample in sample_list:
            sample.duration = 0.5
    return samples

is_successful()

Returns True if the query was successful

Source code in snapquery/snapquery_core.py
156
157
158
159
160
def is_successful(self) -> bool:
    """
    Returns True if the query was successful
    """
    return self.duration and self.error_msg is None

QueryStatsList

a list of query statistics

Source code in snapquery/snapquery_core.py
458
459
460
461
462
463
464
465
@lod_storable
class QueryStatsList:
    """
    a list of query statistics
    """

    name: str  # the name of the list
    stats: List[QueryStats] = field(default_factory=list)

snapquery_view

Created on 2024-05-03

@author: wf

NamedQuerySearch

Bases: BaseQueryView

search for namedqueries

Source code in snapquery/snapquery_view.py
233
234
235
236
237
238
239
class NamedQuerySearch(BaseQueryView):
    """
    search for namedqueries
    """

    def __init__(self, solution: InputWebSolution):
        super().__init__(solution)

NamedQueryView

display a named Query

Source code in snapquery/snapquery_view.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
class NamedQueryView:
    """
    display a named Query
    """

    def __init__(
        self,
        solution: InputWebSolution,
        query_bundle: QueryBundle,
        r_format_str: str = "html",
    ):
        self.solution = solution
        self.endpoint_name = "wikidata"
        self.nqm: NamedQueryManager = self.solution.nqm
        self.query_bundle = query_bundle
        self.r_format_str = r_format_str
        self.load_task = None
        self.limit = 200
        self.timeout = 20.0
        # preload ValueFormatter
        ValueFormatter.getFormats()
        self.setup_ui()

    def setup_ui(self):
        """
        setup my user interface
        """
        nq = self.query_bundle.named_query
        url = self.query_bundle.query.tryItUrl
        text = nq.title
        tooltip = "try it!"
        link = Link.create(url, text, tooltip, target="_blank")
        with self.solution.container:
            with ui.column():
                with ui.row() as self.query_settings_row:
                    self.query_settings_row.classes("w-full")
                    ui.number(label="limit").bind_value(self, "limit")
                    ui.number(label="time out").bind_value(self, "timeout")
                    endpoint_selector = ui.select(
                        list(self.nqm.endpoints.keys()),
                        value=self.solution.endpoint_name,
                        label="endpoint",
                    )
                    endpoint_selector.bind_value(
                        self,
                        "endpoint_name",
                    )
                    endpoint_selector.classes("w-64")
                with ui.row() as self.query_row:
                    self.try_it_link = ui.html(link)
                    ui.label(nq.description)
                    self.params = Params(nq.sparql)
                    if self.params.has_params:
                        self.params_view = ParamsView(self, self.params)
                        self.params_edit = self.params_view.get_dict_edit()
                        pass
                    ui.button(icon="play_arrow", on_click=self.run_query)
                    self.stats_html = ui.html()
                with ui.row():
                    with ui.expansion("Show Query", icon="manage_search").classes("w-full"):
                        query_syntax_highlight = QuerySyntaxHighlight(self.query_bundle.query)
                        syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
                        annotated_query = SparqlQueryAnnotater(self.query_bundle.query)
                        ui.add_css(syntax_highlight_css)
                        # ui.html(query_syntax_highlight.highlight())
                        ui.html(annotated_query.annotate())
                if self.solution.webserver.authenticated():
                    with ui.row().classes("w-full"):
                        with ui.expansion("Show Query Stats", icon="query_stats") as self.stats_container:
                            self.stats_container.classes("w-full")
                            self.load_stats()
                self.grid_row = ui.expansion("Query Results", icon="table_rows", value=True)
                self.grid_row.classes("w-full")
                with self.grid_row:
                    ui.label("Not yet executed ")
                    ui.button("Run Query", icon="play_arrow", on_click=self.run_query)
                pass

    def load_stats(self):
        """
        display query stats
        """
        self.stats_container.clear()
        with self.stats_container:
            container = ui.row()
        query_stats = self.nqm.get_query_stats(self.query_bundle.named_query.query_id)
        errors = [stat for stat in query_stats if not stat.is_successful()]
        successful = [stat for stat in query_stats if stat.is_successful()]
        if successful:
            exec_times_by_endpoint: dict[str, list[QueryStats]] = {}
            for stat in successful:
                if stat.endpoint_name not in exec_times_by_endpoint:
                    exec_times_by_endpoint[stat.endpoint_name] = []
                exec_times_by_endpoint[stat.endpoint_name].append(stat)
            data = []
            for endpoint_name, stats in exec_times_by_endpoint.items():
                record = {
                    "type": "box",
                    "name": endpoint_name,
                    "x": [stat.duration for stat in stats],
                }
                data.append(record)
            fig = {
                "data": data,
                "layout": {
                    "margin": {"l": 200, "r": 15, "t": 30, "b": 30},
                    "plot_bgcolor": "#E5ECF6",
                    "xaxis": {"gridcolor": "white", "title": "Execution Time [s]"},
                    "yaxis": {"gridcolor": "white", "title": "Endpoint"},
                    "title": "Query Execution Times by Endpoint",
                },
                "config": {
                    "staticPlot": True,
                },
            }
            with container:
                ui.plotly(fig)
        if errors:
            error_records = [stat.as_record() for stat in errors]
            for record in error_records:
                if record["error_msg"]:
                    record["error_msg"] = record["error_msg"][:16] + "..."
                else:
                    record["error_msg"] = "<unkown>"
            error_df = pd.DataFrame.from_records(error_records)
            error_df_grouped = error_df.groupby(["endpoint_name", "error_msg"], as_index=False).count()
            error_fig = px.bar(
                error_df_grouped,
                x="endpoint_name",
                y="query_id",
                title="Query Execution Errors",
                labels={"query_id": "count", "endpoint_name": "Endpoint"},
                color="error_msg",
            )
            error_fig.update_layout(margin=dict(l=15, r=15, t=30, b=15))
            with container:
                ui.plotly(error_fig)
        if not successful and not errors:
            with container:
                ui.label("No query statistics available")
        with container:
            ui.button("Update statistics", icon="update", on_click=self.load_stats)

    async def load_query_results(self):
        """
        (re) load the query results
        """
        if self.params.has_params:
            self.query_bundle.query.query = self.params.apply_parameters()
            self.params_view.close()
        self.query_bundle.set_limit(int(self.limit))
        endpoint = self.nqm.endpoints[self.endpoint_name]
        self.query_bundle.update_endpoint(endpoint)
        result = await run.io_bound(self.query_bundle.get_lod_with_stats)
        if not result:
            with self.solution.container:
                ui.notify("query execution failure")
            return
        lod, stats = result
        self.nqm.store_stats([stats])
        self.grid_row.clear()
        if stats.error_msg:
            with self.grid_row:
                stats.apply_error_filter()
                markup = f'<span style="color: red;">{stats.filtered_msg}</span>'
                ui.html(markup)
        else:
            with self.query_row:
                record_count = len(lod) if lod is not None else 0
                markup = f'<span style="color: green;">{record_count} records in {stats.duration:.2f} secs</span>'
                self.stats_html.content = markup
        if not lod:
            with self.query_row:
                ui.notify("query failed")
            return
        query = self.query_bundle.query
        query.formats = ["*:wikidata"]
        tablefmt = "html"
        query.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        query.formatWithValueFormatters(lod, tablefmt=tablefmt)
        for record in lod:
            for key, value in record.items():
                if isinstance(value, str):
                    if value.startswith("http"):
                        record[key] = Link.create(value, value)
        with self.grid_row:
            self.lod_grid = ListOfDictsGrid()
            self.lod_grid.load_lod(lod)
        self.grid_row.update()

    async def run_query(self, _args):
        """
        run the current query
        """

        def cancel_running():
            if self.load_task:
                self.load_task.cancel()

        self.grid_row.clear()
        with self.grid_row:
            ui.spinner()
        self.grid_row.update()
        # cancel task still running
        cancel_running()
        # cancel task if it takes too long
        ui.timer(self.timeout, lambda: cancel_running(), once=True)
        # run task in background
        self.load_task = background_tasks.create(self.load_query_results())

load_query_results() async

(re) load the query results

Source code in snapquery/snapquery_view.py
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
async def load_query_results(self):
    """
    (re) load the query results
    """
    if self.params.has_params:
        self.query_bundle.query.query = self.params.apply_parameters()
        self.params_view.close()
    self.query_bundle.set_limit(int(self.limit))
    endpoint = self.nqm.endpoints[self.endpoint_name]
    self.query_bundle.update_endpoint(endpoint)
    result = await run.io_bound(self.query_bundle.get_lod_with_stats)
    if not result:
        with self.solution.container:
            ui.notify("query execution failure")
        return
    lod, stats = result
    self.nqm.store_stats([stats])
    self.grid_row.clear()
    if stats.error_msg:
        with self.grid_row:
            stats.apply_error_filter()
            markup = f'<span style="color: red;">{stats.filtered_msg}</span>'
            ui.html(markup)
    else:
        with self.query_row:
            record_count = len(lod) if lod is not None else 0
            markup = f'<span style="color: green;">{record_count} records in {stats.duration:.2f} secs</span>'
            self.stats_html.content = markup
    if not lod:
        with self.query_row:
            ui.notify("query failed")
        return
    query = self.query_bundle.query
    query.formats = ["*:wikidata"]
    tablefmt = "html"
    query.preFormatWithCallBacks(lod, tablefmt=tablefmt)
    query.formatWithValueFormatters(lod, tablefmt=tablefmt)
    for record in lod:
        for key, value in record.items():
            if isinstance(value, str):
                if value.startswith("http"):
                    record[key] = Link.create(value, value)
    with self.grid_row:
        self.lod_grid = ListOfDictsGrid()
        self.lod_grid.load_lod(lod)
    self.grid_row.update()

load_stats()

display query stats

Source code in snapquery/snapquery_view.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def load_stats(self):
    """
    display query stats
    """
    self.stats_container.clear()
    with self.stats_container:
        container = ui.row()
    query_stats = self.nqm.get_query_stats(self.query_bundle.named_query.query_id)
    errors = [stat for stat in query_stats if not stat.is_successful()]
    successful = [stat for stat in query_stats if stat.is_successful()]
    if successful:
        exec_times_by_endpoint: dict[str, list[QueryStats]] = {}
        for stat in successful:
            if stat.endpoint_name not in exec_times_by_endpoint:
                exec_times_by_endpoint[stat.endpoint_name] = []
            exec_times_by_endpoint[stat.endpoint_name].append(stat)
        data = []
        for endpoint_name, stats in exec_times_by_endpoint.items():
            record = {
                "type": "box",
                "name": endpoint_name,
                "x": [stat.duration for stat in stats],
            }
            data.append(record)
        fig = {
            "data": data,
            "layout": {
                "margin": {"l": 200, "r": 15, "t": 30, "b": 30},
                "plot_bgcolor": "#E5ECF6",
                "xaxis": {"gridcolor": "white", "title": "Execution Time [s]"},
                "yaxis": {"gridcolor": "white", "title": "Endpoint"},
                "title": "Query Execution Times by Endpoint",
            },
            "config": {
                "staticPlot": True,
            },
        }
        with container:
            ui.plotly(fig)
    if errors:
        error_records = [stat.as_record() for stat in errors]
        for record in error_records:
            if record["error_msg"]:
                record["error_msg"] = record["error_msg"][:16] + "..."
            else:
                record["error_msg"] = "<unkown>"
        error_df = pd.DataFrame.from_records(error_records)
        error_df_grouped = error_df.groupby(["endpoint_name", "error_msg"], as_index=False).count()
        error_fig = px.bar(
            error_df_grouped,
            x="endpoint_name",
            y="query_id",
            title="Query Execution Errors",
            labels={"query_id": "count", "endpoint_name": "Endpoint"},
            color="error_msg",
        )
        error_fig.update_layout(margin=dict(l=15, r=15, t=30, b=15))
        with container:
            ui.plotly(error_fig)
    if not successful and not errors:
        with container:
            ui.label("No query statistics available")
    with container:
        ui.button("Update statistics", icon="update", on_click=self.load_stats)

run_query(_args) async

run the current query

Source code in snapquery/snapquery_view.py
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
async def run_query(self, _args):
    """
    run the current query
    """

    def cancel_running():
        if self.load_task:
            self.load_task.cancel()

    self.grid_row.clear()
    with self.grid_row:
        ui.spinner()
    self.grid_row.update()
    # cancel task still running
    cancel_running()
    # cancel task if it takes too long
    ui.timer(self.timeout, lambda: cancel_running(), once=True)
    # run task in background
    self.load_task = background_tasks.create(self.load_query_results())

setup_ui()

setup my user interface

Source code in snapquery/snapquery_view.py
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def setup_ui(self):
    """
    setup my user interface
    """
    nq = self.query_bundle.named_query
    url = self.query_bundle.query.tryItUrl
    text = nq.title
    tooltip = "try it!"
    link = Link.create(url, text, tooltip, target="_blank")
    with self.solution.container:
        with ui.column():
            with ui.row() as self.query_settings_row:
                self.query_settings_row.classes("w-full")
                ui.number(label="limit").bind_value(self, "limit")
                ui.number(label="time out").bind_value(self, "timeout")
                endpoint_selector = ui.select(
                    list(self.nqm.endpoints.keys()),
                    value=self.solution.endpoint_name,
                    label="endpoint",
                )
                endpoint_selector.bind_value(
                    self,
                    "endpoint_name",
                )
                endpoint_selector.classes("w-64")
            with ui.row() as self.query_row:
                self.try_it_link = ui.html(link)
                ui.label(nq.description)
                self.params = Params(nq.sparql)
                if self.params.has_params:
                    self.params_view = ParamsView(self, self.params)
                    self.params_edit = self.params_view.get_dict_edit()
                    pass
                ui.button(icon="play_arrow", on_click=self.run_query)
                self.stats_html = ui.html()
            with ui.row():
                with ui.expansion("Show Query", icon="manage_search").classes("w-full"):
                    query_syntax_highlight = QuerySyntaxHighlight(self.query_bundle.query)
                    syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
                    annotated_query = SparqlQueryAnnotater(self.query_bundle.query)
                    ui.add_css(syntax_highlight_css)
                    # ui.html(query_syntax_highlight.highlight())
                    ui.html(annotated_query.annotate())
            if self.solution.webserver.authenticated():
                with ui.row().classes("w-full"):
                    with ui.expansion("Show Query Stats", icon="query_stats") as self.stats_container:
                        self.stats_container.classes("w-full")
                        self.load_stats()
            self.grid_row = ui.expansion("Query Results", icon="table_rows", value=True)
            self.grid_row.classes("w-full")
            with self.grid_row:
                ui.label("Not yet executed ")
                ui.button("Run Query", icon="play_arrow", on_click=self.run_query)
            pass

snapquery_webserver

Created on 2024-05-03 @author: wf

SnapQuerySolution

Bases: InputWebSolution

the Snap Query solution

Source code in snapquery/snapquery_webserver.py
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
class SnapQuerySolution(InputWebSolution):
    """
    the Snap Query solution
    """

    def __init__(self, webserver: SnapQueryWebServer, client: Client):
        """
        Initialize the solution

        Calls the constructor of the base solution
        Args:
            webserver (SnapQueryWebServer): The webserver instance associated with this context.
            client (Client): The client instance this context is associated with.
        """
        super().__init__(webserver, client)  # Call to the superclass constructor
        self.webserver: SnapQueryWebServer
        self.nqm = self.webserver.nqm
        self.endpoint_name = self.get_user_endpoint()

    def configure_settings(self):
        """
        add additional settings
        """
        self.add_select("default Endpoint", list(self.nqm.endpoints.keys()), value=self.endpoint_name,).bind_value(
            app.storage.user,
            "endpoint_name",
        )
        self.add_select(
            "prefix merger",
            {merger.name: merger.value for merger in QueryPrefixMerger},
            value=self.get_user_prefix_merger().name,
        ).bind_value(
            app.storage.user,
            "prefix_merger",
        )

    def setup_menu(self, detailed: bool = True):
        """
        setup the menu
        """
        ui.button(icon="menu", on_click=lambda: self.header.toggle())
        self.webserver: SnapQueryWebServer
        super().setup_menu(detailed=detailed)
        with self.header:
            self.header.value = False
            self.link_button("Nominate a Query", "/nominate", "post_add", new_tab=False)
            self.link_button(
                "Queries by Namespace",
                "/queries_by_namespace",
                "view_list",
                new_tab=False,
            )
            if self.webserver.authenticated():
                self.link_button("logout", "/logout", "logout", new_tab=False)
                if self.webserver.login.authenticated():
                    self.link_button("admin", "/admin", "supervisor_account", new_tab=False)
                self.link_button("stats", "/stats", icon_name="query_stats", new_tab=False)
            else:
                self.link_button("login", "/login", "login", new_tab=False)
                if self.webserver.orcid_auth.available():
                    redirect_url = self.webserver.orcid_auth.authenticate_url()
                    self.link_button("login with orcid", redirect_url, "login", new_tab=False)
            if self.webserver.orcid_auth.authenticated():
                orcid_token = self.webserver.orcid_auth.get_cached_user_access_token()
                ui.markdown(f"*logged in as* **{orcid_token.name} ({orcid_token.orcid})**").props(
                    "flat color=white icon=folder"
                ).classes("ml-auto")

    async def nominate_ui(self):
        """
        nominate a new query
        """

        def show():
            """
            show the nominate ui
            """

            def selection_callback(person: Person):
                self.container.clear()
                with self.container:
                    with ui.row().classes("w-full"):
                        with ui.column():
                            ui.label(text="Nominate your Query").classes("text-xl")
                            ui.link(
                                text="see the documentation for detailed information on the nomination procedure",
                                new_tab=True,
                                target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                            )
                        PersonView(person).classes("ml-auto bg-slate-100 rounded-md")
                with ui.row().classes("w-full"):
                    self.query_import_view = QueryImportView(self, allow_importing_from_url=False, person=person)

            with ui.column():
                ui.label(text="Nominate your Query").classes("text-xl")
                ui.link(
                    text="see the documentation for detailed information on the nomination procedure",
                    new_tab=True,
                    target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                )
                ui.label("Please identify yourself by entering or looking up a valid PID(Wikidata ID, ORCID, dblp).")
                self.person_selector = PersonSelector(solution=self, selection_callback=selection_callback)

        await self.setup_content_div(show)

    async def admin_ui(self):
        """
        admin ui
        """

        def show():
            """ """
            self.query_import_view = QueryImportView(self)

        await self.setup_content_div(show)

    async def login_ui(self):
        """
        login ui
        """
        await self.webserver.login.login(self)

    async def stats_ui(self):
        """
        stats ui
        """

        def show():
            """ """
            QueryStatsView(self)

        await self.setup_content_div(show)

    def setup_ui(self):
        """
        setup my user interface
        """
        self.search = NamedQuerySearch(self)

    async def home(
        self,
    ):
        """Generates the home page"""
        await self.setup_content_div(self.setup_ui)

    async def queries_by_namespace(self):
        def show():
            _nsv = NamespaceStatsView(self)

        await self.setup_content_div(show)

    async def query_page(
        self,
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
        r_format_str: str = "html",
    ):
        def show():
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(
                query_name=query_name,
                endpoint_name=endpoint_name,
                limit=limit,
                prefix_merger=self.get_user_prefix_merger(),
            )
            self.named_query_view = NamedQueryView(self, query_bundle=qb, r_format_str=r_format_str)

        await self.setup_content_div(show)

    @staticmethod
    def get_user_endpoint() -> str:
        """
        Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata
        """
        endpoint = app.storage.user.get("endpoint_name", "wikidata")
        return endpoint

    @staticmethod
    def get_user_prefix_merger() -> QueryPrefixMerger:
        """
        Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used
        """
        merger_name = app.storage.user.get("prefix_merger", None)
        merger = QueryPrefixMerger.get_by_name(merger_name)
        if merger_name is None:
            app.storage.user["prefix_merger"] = merger.name
        return merger

__init__(webserver, client)

Initialize the solution

Calls the constructor of the base solution Args: webserver (SnapQueryWebServer): The webserver instance associated with this context. client (Client): The client instance this context is associated with.

Source code in snapquery/snapquery_webserver.py
286
287
288
289
290
291
292
293
294
295
296
297
298
def __init__(self, webserver: SnapQueryWebServer, client: Client):
    """
    Initialize the solution

    Calls the constructor of the base solution
    Args:
        webserver (SnapQueryWebServer): The webserver instance associated with this context.
        client (Client): The client instance this context is associated with.
    """
    super().__init__(webserver, client)  # Call to the superclass constructor
    self.webserver: SnapQueryWebServer
    self.nqm = self.webserver.nqm
    self.endpoint_name = self.get_user_endpoint()

admin_ui() async

admin ui

Source code in snapquery/snapquery_webserver.py
386
387
388
389
390
391
392
393
394
395
async def admin_ui(self):
    """
    admin ui
    """

    def show():
        """ """
        self.query_import_view = QueryImportView(self)

    await self.setup_content_div(show)

configure_settings()

add additional settings

Source code in snapquery/snapquery_webserver.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
def configure_settings(self):
    """
    add additional settings
    """
    self.add_select("default Endpoint", list(self.nqm.endpoints.keys()), value=self.endpoint_name,).bind_value(
        app.storage.user,
        "endpoint_name",
    )
    self.add_select(
        "prefix merger",
        {merger.name: merger.value for merger in QueryPrefixMerger},
        value=self.get_user_prefix_merger().name,
    ).bind_value(
        app.storage.user,
        "prefix_merger",
    )

get_user_endpoint() staticmethod

Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata

Source code in snapquery/snapquery_webserver.py
453
454
455
456
457
458
459
@staticmethod
def get_user_endpoint() -> str:
    """
    Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata
    """
    endpoint = app.storage.user.get("endpoint_name", "wikidata")
    return endpoint

get_user_prefix_merger() staticmethod

Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used

Source code in snapquery/snapquery_webserver.py
461
462
463
464
465
466
467
468
469
470
@staticmethod
def get_user_prefix_merger() -> QueryPrefixMerger:
    """
    Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used
    """
    merger_name = app.storage.user.get("prefix_merger", None)
    merger = QueryPrefixMerger.get_by_name(merger_name)
    if merger_name is None:
        app.storage.user["prefix_merger"] = merger.name
    return merger

home() async

Generates the home page

Source code in snapquery/snapquery_webserver.py
420
421
422
423
424
async def home(
    self,
):
    """Generates the home page"""
    await self.setup_content_div(self.setup_ui)

login_ui() async

login ui

Source code in snapquery/snapquery_webserver.py
397
398
399
400
401
async def login_ui(self):
    """
    login ui
    """
    await self.webserver.login.login(self)

nominate_ui() async

nominate a new query

Source code in snapquery/snapquery_webserver.py
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
async def nominate_ui(self):
    """
    nominate a new query
    """

    def show():
        """
        show the nominate ui
        """

        def selection_callback(person: Person):
            self.container.clear()
            with self.container:
                with ui.row().classes("w-full"):
                    with ui.column():
                        ui.label(text="Nominate your Query").classes("text-xl")
                        ui.link(
                            text="see the documentation for detailed information on the nomination procedure",
                            new_tab=True,
                            target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                        )
                    PersonView(person).classes("ml-auto bg-slate-100 rounded-md")
            with ui.row().classes("w-full"):
                self.query_import_view = QueryImportView(self, allow_importing_from_url=False, person=person)

        with ui.column():
            ui.label(text="Nominate your Query").classes("text-xl")
            ui.link(
                text="see the documentation for detailed information on the nomination procedure",
                new_tab=True,
                target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
            )
            ui.label("Please identify yourself by entering or looking up a valid PID(Wikidata ID, ORCID, dblp).")
            self.person_selector = PersonSelector(solution=self, selection_callback=selection_callback)

    await self.setup_content_div(show)

setup_menu(detailed=True)

setup the menu

Source code in snapquery/snapquery_webserver.py
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
def setup_menu(self, detailed: bool = True):
    """
    setup the menu
    """
    ui.button(icon="menu", on_click=lambda: self.header.toggle())
    self.webserver: SnapQueryWebServer
    super().setup_menu(detailed=detailed)
    with self.header:
        self.header.value = False
        self.link_button("Nominate a Query", "/nominate", "post_add", new_tab=False)
        self.link_button(
            "Queries by Namespace",
            "/queries_by_namespace",
            "view_list",
            new_tab=False,
        )
        if self.webserver.authenticated():
            self.link_button("logout", "/logout", "logout", new_tab=False)
            if self.webserver.login.authenticated():
                self.link_button("admin", "/admin", "supervisor_account", new_tab=False)
            self.link_button("stats", "/stats", icon_name="query_stats", new_tab=False)
        else:
            self.link_button("login", "/login", "login", new_tab=False)
            if self.webserver.orcid_auth.available():
                redirect_url = self.webserver.orcid_auth.authenticate_url()
                self.link_button("login with orcid", redirect_url, "login", new_tab=False)
        if self.webserver.orcid_auth.authenticated():
            orcid_token = self.webserver.orcid_auth.get_cached_user_access_token()
            ui.markdown(f"*logged in as* **{orcid_token.name} ({orcid_token.orcid})**").props(
                "flat color=white icon=folder"
            ).classes("ml-auto")

setup_ui()

setup my user interface

Source code in snapquery/snapquery_webserver.py
414
415
416
417
418
def setup_ui(self):
    """
    setup my user interface
    """
    self.search = NamedQuerySearch(self)

stats_ui() async

stats ui

Source code in snapquery/snapquery_webserver.py
403
404
405
406
407
408
409
410
411
412
async def stats_ui(self):
    """
    stats ui
    """

    def show():
        """ """
        QueryStatsView(self)

    await self.setup_content_div(show)

SnapQueryWebServer

Bases: InputWebserver

server to supply named Queries

Source code in snapquery/snapquery_webserver.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
class SnapQueryWebServer(InputWebserver):
    """
    server to supply named Queries
    """

    @classmethod
    def get_config(cls) -> WebserverConfig:
        """
        get the configuration for this Webserver
        """
        copy_right = ""
        config = WebserverConfig(
            short_name="snapquery",
            copy_right=copy_right,
            version=Version(),
            default_port=9862,
            timeout=6.0,
        )
        server_config = WebserverConfig.get(config)
        server_config.solution_class = SnapQuerySolution
        return server_config

    def __init__(self):
        """Constructs all the necessary attributes for the WebServer object."""
        InputWebserver.__init__(self, config=SnapQueryWebServer.get_config())
        users = Users("~/.solutions/snapquery")
        self.login = Login(self, users)
        self.orcid_auth = OrcidAuth(Path(self.config.base_path))
        self.nqm = NamedQueryManager.from_samples()

        @ui.page("/admin")
        async def admin(client: Client):
            if not self.login.authenticated():
                return RedirectResponse("/login")
            return await self.page(client, SnapQuerySolution.admin_ui)

        @ui.page("/nominate")
        async def nominate(client: Client):
            return await self.page(client, SnapQuerySolution.nominate_ui)

        @ui.page("/stats")
        async def stats(client: Client):
            if not self.authenticated():
                return RedirectResponse("/login")
            return await self.page(client, SnapQuerySolution.stats_ui)

        @ui.page("/login")
        async def login(client: Client):
            return await self.page(client, SnapQuerySolution.login_ui)

        @app.get("/orcid_callback")
        async def orcid_authenticate_callback(code: str):
            try:
                self.orcid_auth.login(code)
            except Exception as e:
                return HTTPException(status_code=401, detail=str(e))
            return RedirectResponse("/")

        @ui.page("/logout")
        async def logout(client: Client) -> RedirectResponse:
            if self.login.authenticated():
                await self.login.logout()
            if self.orcid_auth.authenticated():
                self.orcid_auth.logout()
            return RedirectResponse("/")

        @ui.page("/queries_by_namespace")
        async def queries_by_namespace(client: Client):
            return await self.page(client, SnapQuerySolution.queries_by_namespace)

        @ui.page("/query/{domain}/{namespace}/{name}")
        async def query_page(
            client: Client,
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = None,
            limit: int = None,
            format: str = "html",
        ):
            """
            show the query page for the given namespace and name
            """
            if endpoint_name is None:
                endpoint_name = SnapQuerySolution.get_user_endpoint()
            return await self.page(
                client,
                SnapQuerySolution.query_page,
                domain=domain,
                namespace=namespace,
                name=name,
                endpoint_name=endpoint_name,
                limit=limit,
                r_format_str=format,
            )

        @app.get("/api/endpoints")
        def get_endpoints():
            """
            list all endpoints
            """
            endpoints = self.nqm.endpoints
            return endpoints

        @app.get("/api/meta_query/{name}")
        def meta_query(name: str, limit: int = None):
            """
            run the meta query with the given name
            """
            name, r_format = self.get_r_format(name, "json")
            if name not in self.nqm.meta_qm.queriesByName:
                raise HTTPException(status_code=404, detail=f"meta query {name} not known")
            query = self.nqm.meta_qm.queriesByName[name]
            qb = QueryBundle(named_query=None, query=query)
            qlod = self.nqm.sql_db.query(query.query)
            if limit:
                qlod = qlod[:limit]
            content = qb.format_result(qlod, r_format)
            # content=content.replace("\n", "<br>\n")
            if r_format == Format.html:
                return HTMLResponse(content)
            return PlainTextResponse(content)

        @app.get("/api/sparql/{domain}/{namespace}/{name}")
        def sparql(
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = "wikidata",
            limit: int = None,
        ) -> PlainTextResponse:
            """
            Gets a SPARQL query by name within a specified namespace

            Args:
                domain (str): The domain identifying the domain of the query.
                namespace (str): The namespace identifying the group or category of the query.
                name (str): The specific name of the query to be executed.
                endpoint_name (str): the name of the endpoint to use
                limit (int): a limit to set, default=None
            Returns:
                HTMLResponse: The plain text SPARQL code

            Raises:
                HTTPException: If the query cannot be found or fails to execute.
            """
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            sparql_query = qb.query.query
            return PlainTextResponse(sparql_query)

        @app.get("/api/query/{domain}/{namespace}/{name}")
        def query(
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = "wikidata",
            limit: int = None,
        ) -> HTMLResponse:
            """
            Executes a SPARQL query by name within a specified namespace, formats the results, and returns them as an HTML response.

            Args:
                domain (str): The domain identifying the domain of the query.
                namespace (str): The namespace identifying the group or category of the query.
                name (str): The specific name of the query to be executed.
                endpoint_name (str): the name of the endpoint to use
                limit(int): a limit to set, default=None

            Returns:
                HTMLResponse: The HTML formatted response containing the results of the query execution.

            Raises:
                HTTPException: If the query cannot be found or fails to execute.
            """
            content = self.query(
                name=name,
                namespace=namespace,
                domain=domain,
                endpoint_name=endpoint_name,
                limit=limit,
            )
            if not content:
                raise HTTPException(status_code=500, detail="Could not create result")

            # Return the content as an HTML response
            return HTMLResponse(content)

    def get_r_format(self, name: str, default_format_str: str = "html") -> Format:
        """
        get the result format from the given query name following the
        dot convention that <name>.<r_format_str> specifies the result format
        e.g. cats.json will ask for the json result format

        Args:
            name (str): the name of the query/meta query
            default_format_str (str): the name of the default format to use

        Returns:
            Format: the result format
        """
        if "." in name:
            r_format_str = name.split(".")[-1]
            name = name[: name.rfind(".")]
        else:
            r_format_str = default_format_str
        r_format = Format[r_format_str]
        return name, r_format

    def query(
        self,
        name: str,
        namespace: str,
        domain: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> str:
        """
        Queries an external API to retrieve data based on a given namespace and name.

        Args:
            name (str): The name identifier of the data to be queried.
            namespace (str): The namespace to which the query belongs. It helps in categorizing the data.
            domain (str): The domain identifying the domain of the query.
            endpoint_name (str): The name of the endpoint to be used for the query. Defaults to 'wikidata'.
            limit (int): the limit for the query default: None

            Returns:
                str: the content retrieved
        """
        try:
            # content negotiation
            name, r_format = self.get_r_format(name)
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            (qlod, stats) = qb.get_lod_with_stats()
            self.nqm.store_stats([stats])
            content = qb.format_result(qlod, r_format)
            return content
        except Exception as e:
            # Handling specific exceptions can be more detailed based on what nqm.get_sparql and nqm.query can raise
            raise HTTPException(status_code=404, detail=str(e))

    def authenticated(self) -> bool:
        """
        Check if the user is authenticated.
        Returns:
            True if the user is authenticated, False otherwise.
        """
        return self.login.authenticated() or self.orcid_auth.authenticated()

__init__()

Constructs all the necessary attributes for the WebServer object.

Source code in snapquery/snapquery_webserver.py
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def __init__(self):
    """Constructs all the necessary attributes for the WebServer object."""
    InputWebserver.__init__(self, config=SnapQueryWebServer.get_config())
    users = Users("~/.solutions/snapquery")
    self.login = Login(self, users)
    self.orcid_auth = OrcidAuth(Path(self.config.base_path))
    self.nqm = NamedQueryManager.from_samples()

    @ui.page("/admin")
    async def admin(client: Client):
        if not self.login.authenticated():
            return RedirectResponse("/login")
        return await self.page(client, SnapQuerySolution.admin_ui)

    @ui.page("/nominate")
    async def nominate(client: Client):
        return await self.page(client, SnapQuerySolution.nominate_ui)

    @ui.page("/stats")
    async def stats(client: Client):
        if not self.authenticated():
            return RedirectResponse("/login")
        return await self.page(client, SnapQuerySolution.stats_ui)

    @ui.page("/login")
    async def login(client: Client):
        return await self.page(client, SnapQuerySolution.login_ui)

    @app.get("/orcid_callback")
    async def orcid_authenticate_callback(code: str):
        try:
            self.orcid_auth.login(code)
        except Exception as e:
            return HTTPException(status_code=401, detail=str(e))
        return RedirectResponse("/")

    @ui.page("/logout")
    async def logout(client: Client) -> RedirectResponse:
        if self.login.authenticated():
            await self.login.logout()
        if self.orcid_auth.authenticated():
            self.orcid_auth.logout()
        return RedirectResponse("/")

    @ui.page("/queries_by_namespace")
    async def queries_by_namespace(client: Client):
        return await self.page(client, SnapQuerySolution.queries_by_namespace)

    @ui.page("/query/{domain}/{namespace}/{name}")
    async def query_page(
        client: Client,
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = None,
        limit: int = None,
        format: str = "html",
    ):
        """
        show the query page for the given namespace and name
        """
        if endpoint_name is None:
            endpoint_name = SnapQuerySolution.get_user_endpoint()
        return await self.page(
            client,
            SnapQuerySolution.query_page,
            domain=domain,
            namespace=namespace,
            name=name,
            endpoint_name=endpoint_name,
            limit=limit,
            r_format_str=format,
        )

    @app.get("/api/endpoints")
    def get_endpoints():
        """
        list all endpoints
        """
        endpoints = self.nqm.endpoints
        return endpoints

    @app.get("/api/meta_query/{name}")
    def meta_query(name: str, limit: int = None):
        """
        run the meta query with the given name
        """
        name, r_format = self.get_r_format(name, "json")
        if name not in self.nqm.meta_qm.queriesByName:
            raise HTTPException(status_code=404, detail=f"meta query {name} not known")
        query = self.nqm.meta_qm.queriesByName[name]
        qb = QueryBundle(named_query=None, query=query)
        qlod = self.nqm.sql_db.query(query.query)
        if limit:
            qlod = qlod[:limit]
        content = qb.format_result(qlod, r_format)
        # content=content.replace("\n", "<br>\n")
        if r_format == Format.html:
            return HTMLResponse(content)
        return PlainTextResponse(content)

    @app.get("/api/sparql/{domain}/{namespace}/{name}")
    def sparql(
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> PlainTextResponse:
        """
        Gets a SPARQL query by name within a specified namespace

        Args:
            domain (str): The domain identifying the domain of the query.
            namespace (str): The namespace identifying the group or category of the query.
            name (str): The specific name of the query to be executed.
            endpoint_name (str): the name of the endpoint to use
            limit (int): a limit to set, default=None
        Returns:
            HTMLResponse: The plain text SPARQL code

        Raises:
            HTTPException: If the query cannot be found or fails to execute.
        """
        query_name = QueryName(domain=domain, namespace=namespace, name=name)
        qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        sparql_query = qb.query.query
        return PlainTextResponse(sparql_query)

    @app.get("/api/query/{domain}/{namespace}/{name}")
    def query(
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> HTMLResponse:
        """
        Executes a SPARQL query by name within a specified namespace, formats the results, and returns them as an HTML response.

        Args:
            domain (str): The domain identifying the domain of the query.
            namespace (str): The namespace identifying the group or category of the query.
            name (str): The specific name of the query to be executed.
            endpoint_name (str): the name of the endpoint to use
            limit(int): a limit to set, default=None

        Returns:
            HTMLResponse: The HTML formatted response containing the results of the query execution.

        Raises:
            HTTPException: If the query cannot be found or fails to execute.
        """
        content = self.query(
            name=name,
            namespace=namespace,
            domain=domain,
            endpoint_name=endpoint_name,
            limit=limit,
        )
        if not content:
            raise HTTPException(status_code=500, detail="Could not create result")

        # Return the content as an HTML response
        return HTMLResponse(content)

authenticated()

Check if the user is authenticated. Returns: True if the user is authenticated, False otherwise.

Source code in snapquery/snapquery_webserver.py
272
273
274
275
276
277
278
def authenticated(self) -> bool:
    """
    Check if the user is authenticated.
    Returns:
        True if the user is authenticated, False otherwise.
    """
    return self.login.authenticated() or self.orcid_auth.authenticated()

get_config() classmethod

get the configuration for this Webserver

Source code in snapquery/snapquery_webserver.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
@classmethod
def get_config(cls) -> WebserverConfig:
    """
    get the configuration for this Webserver
    """
    copy_right = ""
    config = WebserverConfig(
        short_name="snapquery",
        copy_right=copy_right,
        version=Version(),
        default_port=9862,
        timeout=6.0,
    )
    server_config = WebserverConfig.get(config)
    server_config.solution_class = SnapQuerySolution
    return server_config

get_r_format(name, default_format_str='html')

get the result format from the given query name following the dot convention that . specifies the result format e.g. cats.json will ask for the json result format

Parameters:

Name Type Description Default
name str

the name of the query/meta query

required
default_format_str str

the name of the default format to use

'html'

Returns:

Name Type Description
Format Format

the result format

Source code in snapquery/snapquery_webserver.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
def get_r_format(self, name: str, default_format_str: str = "html") -> Format:
    """
    get the result format from the given query name following the
    dot convention that <name>.<r_format_str> specifies the result format
    e.g. cats.json will ask for the json result format

    Args:
        name (str): the name of the query/meta query
        default_format_str (str): the name of the default format to use

    Returns:
        Format: the result format
    """
    if "." in name:
        r_format_str = name.split(".")[-1]
        name = name[: name.rfind(".")]
    else:
        r_format_str = default_format_str
    r_format = Format[r_format_str]
    return name, r_format

query(name, namespace, domain, endpoint_name='wikidata', limit=None)

Queries an external API to retrieve data based on a given namespace and name.

Parameters:

Name Type Description Default
name str

The name identifier of the data to be queried.

required
namespace str

The namespace to which the query belongs. It helps in categorizing the data.

required
domain str

The domain identifying the domain of the query.

required
endpoint_name str

The name of the endpoint to be used for the query. Defaults to 'wikidata'.

'wikidata'
limit int

the limit for the query default: None

None
Returns

str: the content retrieved

required
Source code in snapquery/snapquery_webserver.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
def query(
    self,
    name: str,
    namespace: str,
    domain: str,
    endpoint_name: str = "wikidata",
    limit: int = None,
) -> str:
    """
    Queries an external API to retrieve data based on a given namespace and name.

    Args:
        name (str): The name identifier of the data to be queried.
        namespace (str): The namespace to which the query belongs. It helps in categorizing the data.
        domain (str): The domain identifying the domain of the query.
        endpoint_name (str): The name of the endpoint to be used for the query. Defaults to 'wikidata'.
        limit (int): the limit for the query default: None

        Returns:
            str: the content retrieved
    """
    try:
        # content negotiation
        name, r_format = self.get_r_format(name)
        query_name = QueryName(domain=domain, namespace=namespace, name=name)
        qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        (qlod, stats) = qb.get_lod_with_stats()
        self.nqm.store_stats([stats])
        content = qb.format_result(qlod, r_format)
        return content
    except Exception as e:
        # Handling specific exceptions can be more detailed based on what nqm.get_sparql and nqm.query can raise
        raise HTTPException(status_code=404, detail=str(e))

sparql_analyzer

SparqlAnalyzer

SPARQL Query Analyzer

Source code in snapquery/sparql_analyzer.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
class SparqlAnalyzer:
    """
    SPARQL Query Analyzer
    """

    BLAZEGRAPH_NAMED_SUBQUERY_PATTERN = r"""WITH[\s\n]*(#[\w\s://\.\n,]+)?{(#[\w\s://\.\n,]+)?[\s\n](?P<subquery>[\n\r\b\w\d:\t\.";,\{\)\(\?\}\W#]*?)\s+[Aa][Ss]\s+%(?P<name>[A-Za-z\d_]+)"""

    @classmethod
    def get_prefix_luts(cls) -> dict[str, str]:
        return {
            "biopax": "http://www.biopax.org/release/biopax-level3.owl#",
            "bd": "http://www.bigdata.com/rdf#",
            "cc": "http://creativecommons.org/ns#",
            "datacite": "http://purl.org/spar/datacite/",
            "dblp": "https://dblp.org/rdf/schema#",
            "dc": "http://purl.org/dc/elements/1.1/",
            "dct": "http://purl.org/dc/terms/",
            "freq": "http://purl.org/cld/freq/",
            "geo": "http://www.opengis.net/ont/geosparql#",
            "geof": "http://www.opengis.net/def/function/geosparql/",
            "geom": "http://geovocab.org/geometry#",
            "gpml": "http://vocabularies.wikipathways.org/gpml#",
            "litre": "http://purl.org/spar/literal/",
            "lgdo": "http://linkedgeodata.org/ontology/",
            "ontolex": "http://www.w3.org/ns/lemon/ontolex#",
            "orkgp": "http://orkg.org/orkg/predicate/",
            "orkgc": "http://orkg.org/orkg/class/",
            "orkgr": "http://orkg.org/orkg/resource/",
            "owl": "http://www.w3.org/2002/07/owl#",
            "p": "http://www.wikidata.org/prop/",
            "pav": "http://purl.org/pav/",
            "pq": "http://www.wikidata.org/prop/qualifier/",
            "pqn": "http://www.wikidata.org/prop/qualifier/value-normalized/",
            "pqv": "http://www.wikidata.org/prop/qualifier/value/",
            "pr": "http://www.wikidata.org/prop/reference/",
            "prn": "http://www.wikidata.org/prop/reference/value-normalized/",
            "prov": "http://www.w3.org/ns/prov#",
            "prv": "http://www.wikidata.org/prop/reference/value/",
            "ps": "http://www.wikidata.org/prop/statement/",
            "psn": "http://www.wikidata.org/prop/statement/value-normalized/",
            "psv": "http://www.wikidata.org/prop/statement/value/",
            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
            "schema": "http://schema.org/",
            "skos": "http://www.w3.org/2004/02/skos/core#",
            "void": "http://rdfs.org/ns/void#",
            "vrank": "http://purl.org/voc/vrank#",
            "wd": "http://www.wikidata.org/entity/",
            "wdata": "http://www.wikidata.org/wiki/Special:EntityData/",
            "wdno": "http://www.wikidata.org/prop/novalue/",
            "wdref": "http://www.wikidata.org/reference/",
            "wds": "http://www.wikidata.org/entity/statement/",
            "wdt": "http://www.wikidata.org/prop/direct/",
            "wdtn": "http://www.wikidata.org/prop/direct-normalized/",
            "wdv": "http://www.wikidata.org/value/",
            "wikibase": "http://wikiba.se/ontology#",
            "wp": "http://vocabularies.wikipathways.org/wp#",
            "wprdf": "http://rdf.wikipathways.org/",
            "xsd": "http://www.w3.org/2001/XMLSchema#",
            "mwapi": "https://www.mediawiki.org/ontology#API/",
            "hint": "http://www.bigdata.com/queryHints#",
            "gas": "http://www.bigdata.com/rdf/gas#",
        }

    @classmethod
    def prefix_clause(cls, prefix: str, iri: str) -> str:
        """
        Provide SPARQL refix clause for given prefix and url
        Args:
            prefix: prefix name
            iri: iri

        Returns:
            prefix clause
        """
        return f"PREFIX {prefix}:  <{iri}>"

    @classmethod
    def extract_used_prefixes(cls, query: str) -> tuple[dict[str, str], set[str]]:
        """
        Extract used prefixes from SPARQL query
        Args:
            query: SPARQL query

        Returns:
            dict of declared prefixes
        """
        # add prefixes to avoid parsing error due to missing prefix
        prefix_lut = cls.get_prefix_luts()
        prefixed_query = cls._add_prefixes(prefix_lut, query)
        parsed_query = parseQuery(prefixed_query)
        elements = parsed_query.as_list()
        defined_prefixes = []
        used_prefixes = []
        for element in elements:
            if isinstance(element, CompValue) and element.name == "PrefixDecl":
                defined_prefixes.append(element)
            elif isinstance(element, CompValue) and element.name == "pname":
                used_prefixes.append(element)
            elif isinstance(element, Iterable) and not isinstance(element, str):
                if isinstance(element, dict):
                    elements.extend(element.values())
                else:
                    elements.extend(element)
            else:
                pass
        declared_prefix_counter = Counter([value.get("prefix") for value in defined_prefixes])
        multi_declarations = [prefix for prefix, count in declared_prefix_counter.items() if count > 1]
        used_prefix_names = {value.get("prefix") for value in used_prefixes}
        used_prefix_map = dict()
        for prefix_value in reversed(defined_prefixes):
            prefix_name = prefix_value.get("prefix")
            prefix_iri = prefix_value.get("iri")
            if prefix_name in multi_declarations or prefix_name not in prefix_lut:
                used_prefix_map[prefix_name] = str(prefix_iri)
        return used_prefix_map, used_prefix_names

    @classmethod
    def add_missing_prefixes(cls, query: str):
        """
        Add missing prefixes to SPARQL query
        Args:
            query: SPARQL query

        Returns:
            SPARQL query
        """
        try:
            # normalize query for parsing
            prepared_query = query
            if cls.has_parameter(prepared_query):
                prepared_query = cls.fill_with_sample_query_parameters(prepared_query)
            if cls.has_blazegraph_with_clause(prepared_query):
                prepared_query = cls.transform_with_clause_to_subquery(prepared_query)
            # extract used and declared prefixes
            declared_prefixes, used_prefixes = cls.extract_used_prefixes(prepared_query)
            missing_prefix_declarations = used_prefixes - set(declared_prefixes.keys())
            undefined_prefixes = missing_prefix_declarations.difference(cls.get_prefix_luts().keys())
            if undefined_prefixes:
                logger.error(
                    f"Prefix definitions missing for: {undefined_prefixes} → Not all prefixes that are missing can be added"
                )
            missing_prefix_declarations_lut = {
                key: value for key, value in cls.get_prefix_luts().items() if key in missing_prefix_declarations
            }
            fixed_query = cls._add_prefixes(missing_prefix_declarations_lut, query)
        except Exception as e:
            logger.debug("Adding missing prefixes to query failed → Unable to parse SPARQL query")
            logging.error(e)
            fixed_query = query
        return fixed_query

    @classmethod
    def transform_with_clause_to_subquery(cls, query: str) -> str:
        """
        Transform blazegraph with clause to subquery statement
        Args:
            query:

        Returns:

        """
        match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
        if match:
            subquery = match.group("subquery")
            name = match.group("name")
            start_pos, end_pos = match.span()
            # check if Where mus be added
            select_part = query[:start_pos]
            where_part = query[end_pos + 1 :]
            if cls.has_blazegraph_with_clause(where_part):
                where_part = cls.transform_with_clause_to_subquery(where_part)
            if where_part.lower().strip().startswith("where"):
                query_with_removed = select_part + where_part
            else:
                query_with_removed = f"{select_part}\nWHERE\n{where_part}"

            include_pattern = f"[Ii][Nn][Cc][Ll][Uu][Dd][Ee]\s+%{name}"
            subquery = f"{{{subquery}\n"
            query_transformed = re.sub(include_pattern, subquery, query_with_removed)
            return query_transformed

    @classmethod
    def has_blazegraph_with_clause(cls, query: str) -> bool:
        """
        Check if the given query has a WITH clause (named subquery)
        For details see https://github.com/blazegraph/database/wiki/NamedSubquery
        Args:
            query: SPARQL query

        Returns:
            True if the query has a WITH clause (named subquery)
        """
        match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
        return True if match else False

    @classmethod
    def _add_prefixes(cls, prefixes: dict[str, str], query: str) -> str:
        """
        Add prefixes to SPARQL query
        Args:
            prefixes: prefixes to add
            query: SPARQL query

        Returns:
            SPARQL query with prefixes added
        """
        prefixes_clauses = [cls.prefix_clause(prefix, iri) for prefix, iri in prefixes.items()]
        prefixes_clauses_str = "\n".join(prefixes_clauses)
        return prefixes_clauses_str + "\n" + query

    @classmethod
    def has_parameter(cls, query: str) -> bool:
        """
        Check if the given query has parameters that need to need set
        Args:
            query: SPARQL query

        Returns:
            True if the query has parameters that need to need set
        """
        vars = cls.get_query_parameter(query)
        return len(vars) > 0

    @classmethod
    def get_query_parameter(cls, query: str) -> set[str]:
        env = Environment()
        ast = env.parse(query)
        vars = meta.find_undeclared_variables(ast)
        return vars

    @classmethod
    def fill_with_sample_query_parameters(cls, query: str) -> str:
        """
        Fill the given SPARQL query with sample query parameters
        Args:
            query: SPARQL query

        Returns:

        """
        if not cls.has_parameter(query):
            return query
        parameter_names = cls.get_query_parameter(query)
        params = cls._prepare_sample_parameter(parameter_names)
        return cls.bind_parameters_to_query(query, params)

    @classmethod
    def bind_parameters_to_query(cls, query: str, params: dict[str, str]) -> str:
        """
        Bind the parameters to the given query
        Args:
            query: SPARQL query
            params: quera params

        Returns:
            Query with parameters binded
        """
        template = Template(query)
        query_with_param_values = template.render(**params)
        return query_with_param_values

    @classmethod
    def _prepare_sample_parameter(cls, parameter_names: set[str]) -> dict[str, str]:
        """
        Prepare sample query parameters
        """
        params = dict()
        for name in parameter_names:
            params[name] = f"Q{random.randint(1, 1000)}"
        return params

    @classmethod
    def is_valid(cls, query: str):
        """
        Check if query is valid SPARQL query
        Args:
            query: SPARQL query

        Returns:
            True if query is valid SPARQL query
        """
        try:
            prepareQuery(query)
            return True
        except Exception as e:
            logger.debug(f"Query is not valid SPARQL query: {e}")
            return False

add_missing_prefixes(query) classmethod

Add missing prefixes to SPARQL query Args: query: SPARQL query

Returns:

Type Description

SPARQL query

Source code in snapquery/sparql_analyzer.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
@classmethod
def add_missing_prefixes(cls, query: str):
    """
    Add missing prefixes to SPARQL query
    Args:
        query: SPARQL query

    Returns:
        SPARQL query
    """
    try:
        # normalize query for parsing
        prepared_query = query
        if cls.has_parameter(prepared_query):
            prepared_query = cls.fill_with_sample_query_parameters(prepared_query)
        if cls.has_blazegraph_with_clause(prepared_query):
            prepared_query = cls.transform_with_clause_to_subquery(prepared_query)
        # extract used and declared prefixes
        declared_prefixes, used_prefixes = cls.extract_used_prefixes(prepared_query)
        missing_prefix_declarations = used_prefixes - set(declared_prefixes.keys())
        undefined_prefixes = missing_prefix_declarations.difference(cls.get_prefix_luts().keys())
        if undefined_prefixes:
            logger.error(
                f"Prefix definitions missing for: {undefined_prefixes} → Not all prefixes that are missing can be added"
            )
        missing_prefix_declarations_lut = {
            key: value for key, value in cls.get_prefix_luts().items() if key in missing_prefix_declarations
        }
        fixed_query = cls._add_prefixes(missing_prefix_declarations_lut, query)
    except Exception as e:
        logger.debug("Adding missing prefixes to query failed → Unable to parse SPARQL query")
        logging.error(e)
        fixed_query = query
    return fixed_query

bind_parameters_to_query(query, params) classmethod

Bind the parameters to the given query Args: query: SPARQL query params: quera params

Returns:

Type Description
str

Query with parameters binded

Source code in snapquery/sparql_analyzer.py
262
263
264
265
266
267
268
269
270
271
272
273
274
275
@classmethod
def bind_parameters_to_query(cls, query: str, params: dict[str, str]) -> str:
    """
    Bind the parameters to the given query
    Args:
        query: SPARQL query
        params: quera params

    Returns:
        Query with parameters binded
    """
    template = Template(query)
    query_with_param_values = template.render(**params)
    return query_with_param_values

extract_used_prefixes(query) classmethod

Extract used prefixes from SPARQL query Args: query: SPARQL query

Returns:

Type Description
tuple[dict[str, str], set[str]]

dict of declared prefixes

Source code in snapquery/sparql_analyzer.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@classmethod
def extract_used_prefixes(cls, query: str) -> tuple[dict[str, str], set[str]]:
    """
    Extract used prefixes from SPARQL query
    Args:
        query: SPARQL query

    Returns:
        dict of declared prefixes
    """
    # add prefixes to avoid parsing error due to missing prefix
    prefix_lut = cls.get_prefix_luts()
    prefixed_query = cls._add_prefixes(prefix_lut, query)
    parsed_query = parseQuery(prefixed_query)
    elements = parsed_query.as_list()
    defined_prefixes = []
    used_prefixes = []
    for element in elements:
        if isinstance(element, CompValue) and element.name == "PrefixDecl":
            defined_prefixes.append(element)
        elif isinstance(element, CompValue) and element.name == "pname":
            used_prefixes.append(element)
        elif isinstance(element, Iterable) and not isinstance(element, str):
            if isinstance(element, dict):
                elements.extend(element.values())
            else:
                elements.extend(element)
        else:
            pass
    declared_prefix_counter = Counter([value.get("prefix") for value in defined_prefixes])
    multi_declarations = [prefix for prefix, count in declared_prefix_counter.items() if count > 1]
    used_prefix_names = {value.get("prefix") for value in used_prefixes}
    used_prefix_map = dict()
    for prefix_value in reversed(defined_prefixes):
        prefix_name = prefix_value.get("prefix")
        prefix_iri = prefix_value.get("iri")
        if prefix_name in multi_declarations or prefix_name not in prefix_lut:
            used_prefix_map[prefix_name] = str(prefix_iri)
    return used_prefix_map, used_prefix_names

fill_with_sample_query_parameters(query) classmethod

Fill the given SPARQL query with sample query parameters Args: query: SPARQL query

Returns:

Source code in snapquery/sparql_analyzer.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
@classmethod
def fill_with_sample_query_parameters(cls, query: str) -> str:
    """
    Fill the given SPARQL query with sample query parameters
    Args:
        query: SPARQL query

    Returns:

    """
    if not cls.has_parameter(query):
        return query
    parameter_names = cls.get_query_parameter(query)
    params = cls._prepare_sample_parameter(parameter_names)
    return cls.bind_parameters_to_query(query, params)

has_blazegraph_with_clause(query) classmethod

Check if the given query has a WITH clause (named subquery) For details see https://github.com/blazegraph/database/wiki/NamedSubquery Args: query: SPARQL query

Returns:

Type Description
bool

True if the query has a WITH clause (named subquery)

Source code in snapquery/sparql_analyzer.py
197
198
199
200
201
202
203
204
205
206
207
208
209
@classmethod
def has_blazegraph_with_clause(cls, query: str) -> bool:
    """
    Check if the given query has a WITH clause (named subquery)
    For details see https://github.com/blazegraph/database/wiki/NamedSubquery
    Args:
        query: SPARQL query

    Returns:
        True if the query has a WITH clause (named subquery)
    """
    match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
    return True if match else False

has_parameter(query) classmethod

Check if the given query has parameters that need to need set Args: query: SPARQL query

Returns:

Type Description
bool

True if the query has parameters that need to need set

Source code in snapquery/sparql_analyzer.py
226
227
228
229
230
231
232
233
234
235
236
237
@classmethod
def has_parameter(cls, query: str) -> bool:
    """
    Check if the given query has parameters that need to need set
    Args:
        query: SPARQL query

    Returns:
        True if the query has parameters that need to need set
    """
    vars = cls.get_query_parameter(query)
    return len(vars) > 0

is_valid(query) classmethod

Check if query is valid SPARQL query Args: query: SPARQL query

Returns:

Type Description

True if query is valid SPARQL query

Source code in snapquery/sparql_analyzer.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
@classmethod
def is_valid(cls, query: str):
    """
    Check if query is valid SPARQL query
    Args:
        query: SPARQL query

    Returns:
        True if query is valid SPARQL query
    """
    try:
        prepareQuery(query)
        return True
    except Exception as e:
        logger.debug(f"Query is not valid SPARQL query: {e}")
        return False

prefix_clause(prefix, iri) classmethod

Provide SPARQL refix clause for given prefix and url Args: prefix: prefix name iri: iri

Returns:

Type Description
str

prefix clause

Source code in snapquery/sparql_analyzer.py
79
80
81
82
83
84
85
86
87
88
89
90
@classmethod
def prefix_clause(cls, prefix: str, iri: str) -> str:
    """
    Provide SPARQL refix clause for given prefix and url
    Args:
        prefix: prefix name
        iri: iri

    Returns:
        prefix clause
    """
    return f"PREFIX {prefix}:  <{iri}>"

transform_with_clause_to_subquery(query) classmethod

Transform blazegraph with clause to subquery statement Args: query:

Returns:

Source code in snapquery/sparql_analyzer.py
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
@classmethod
def transform_with_clause_to_subquery(cls, query: str) -> str:
    """
    Transform blazegraph with clause to subquery statement
    Args:
        query:

    Returns:

    """
    match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
    if match:
        subquery = match.group("subquery")
        name = match.group("name")
        start_pos, end_pos = match.span()
        # check if Where mus be added
        select_part = query[:start_pos]
        where_part = query[end_pos + 1 :]
        if cls.has_blazegraph_with_clause(where_part):
            where_part = cls.transform_with_clause_to_subquery(where_part)
        if where_part.lower().strip().startswith("where"):
            query_with_removed = select_part + where_part
        else:
            query_with_removed = f"{select_part}\nWHERE\n{where_part}"

        include_pattern = f"[Ii][Nn][Cc][Ll][Uu][Dd][Ee]\s+%{name}"
        subquery = f"{{{subquery}\n"
        query_transformed = re.sub(include_pattern, subquery, query_with_removed)
        return query_transformed

stats_view

QueryStatsView

display Query Import UI

Source code in snapquery/stats_view.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
class QueryStatsView:
    """
    display Query Import UI
    """

    def __init__(self, solution=None):
        self.solution = solution
        if self.solution:
            self.nqm = self.solution.nqm
            self.setup_ui()

    def setup_ui(self):
        """
        setup the user interface
        """
        with self.solution.container:
            with ui.expansion(
                text="Statistics about the properties and items used in the stored queries",
                value=True,
            ):
                self.input_row = ui.column()
                self.input_row.classes("w-full")
                self.show_entity_usage()
                self.show_property_usage()
            with ui.expansion(text="Query Stats", value=True):
                ui.label("ToDo:")

    def show_entity_usage(self):
        """
        show entity usage in the queries
        """
        stats = QUERY_ITEM_STATS.get_entity_stats()
        records = [{"name": stat.label, "count": stat.count, "id": stat.identifier} for stat in stats]
        df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
        fig = px.bar(df, x="name", y="count", title="Entity usage in queries")
        with self.input_row:
            ui.plotly(fig).classes("w-full")

    def show_property_usage(self):
        """
        show property usage in the queries
        """
        stats = QUERY_ITEM_STATS.get_property_stats()
        records = [{"name": stat.label, "count": stat.count} for stat in stats]
        df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
        fig = px.bar(df, x="name", y="count", title="Property usage in queries")
        with self.input_row:
            ui.plotly(fig).classes("w-full")

setup_ui()

setup the user interface

Source code in snapquery/stats_view.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
def setup_ui(self):
    """
    setup the user interface
    """
    with self.solution.container:
        with ui.expansion(
            text="Statistics about the properties and items used in the stored queries",
            value=True,
        ):
            self.input_row = ui.column()
            self.input_row.classes("w-full")
            self.show_entity_usage()
            self.show_property_usage()
        with ui.expansion(text="Query Stats", value=True):
            ui.label("ToDo:")

show_entity_usage()

show entity usage in the queries

Source code in snapquery/stats_view.py
35
36
37
38
39
40
41
42
43
44
def show_entity_usage(self):
    """
    show entity usage in the queries
    """
    stats = QUERY_ITEM_STATS.get_entity_stats()
    records = [{"name": stat.label, "count": stat.count, "id": stat.identifier} for stat in stats]
    df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
    fig = px.bar(df, x="name", y="count", title="Entity usage in queries")
    with self.input_row:
        ui.plotly(fig).classes("w-full")

show_property_usage()

show property usage in the queries

Source code in snapquery/stats_view.py
46
47
48
49
50
51
52
53
54
55
def show_property_usage(self):
    """
    show property usage in the queries
    """
    stats = QUERY_ITEM_STATS.get_property_stats()
    records = [{"name": stat.label, "count": stat.count} for stat in stats]
    df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
    fig = px.bar(df, x="name", y="count", title="Property usage in queries")
    with self.input_row:
        ui.plotly(fig).classes("w-full")

version

Created on 2024-05-01

@author: wf

Version dataclass

Version handling for nicepdf

Source code in snapquery/version.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
@dataclass
class Version:
    """
    Version handling for nicepdf
    """

    name = "snapquery"
    version = snapquery.__version__
    date = "2024-05-03"
    updated = "2024-07-13"
    description = "Introduce Named Queries and Named Query Middleware to wikidata"

    authors = "Wolfgang Fahl"

    doc_url = "https://wiki.bitplan.com/index.php/snapquery"
    chat_url = "https://github.com/WolfgangFahl/snapquery/discussions"
    cm_url = "https://github.com/WolfgangFahl/snapquery"

    license = """Copyright 2024 contributors. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied."""

    longDescription = f"""{name} version {version}
{description}

  Created by {authors} on {date} last updated {updated}"""

wd_page_query_extractor

Created on 2024-05-04 Author: tholzheim

WikipediaQueryExtractor

A class to handle the extraction and management of SPARQL queries from a Wikipedia page.

Source code in snapquery/wd_page_query_extractor.py
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
class WikipediaQueryExtractor:
    """
    A class to handle the extraction and management
    of SPARQL queries from a Wikipedia page.
    """

    def __init__(
        self,
        nqm: NamedQueryManager,
        base_url: str,
        domain: str,
        namespace: str,
        target_graph_name: str,
        template_name: str = "SPARQL",  # https://en.wikipedia.org/wiki/Template:SPARQL) - if None seek for short-urls
        debug: bool=False
    ):
        """
        Constructor
        """
        self.nqm = nqm
        self.base_url = base_url
        self.domain = domain
        self.namespace = namespace
        self.target_graph_name = target_graph_name
        self.template_name = template_name
        self.debug=debug
        self.logger = logging.getLogger("snapquery.wd_page_extractor.WikipediaQueryExtractor")

        self.named_query_list = NamedQuerySet(
            domain=self.domain, 
            namespace=self.namespace, 
            target_graph_name=self.target_graph_name
        )
        self.errors = []

    def log(self, message: str, is_error: bool = False):
        if self.debug:
            print(message)
        if is_error:
            self.logger.debug(message) 
            self.errors.append(message)

    def get_wikitext(self) -> str:
        """
        Get wiki text with SPARQL query examples.

        Returns:
            str: Raw wikitext of the page.
        """
        res = requests.get(f"{self.base_url}?action=raw")
        return res.text

    def sanitize_text(self, text: str) -> str:
        """
        General method to sanitize text by removing translation tags, comments,
        and other non-essential markup.

        Args:
            text (str): The text to be sanitized.

        Returns:
            str: The sanitized text.
        """
        # Remove <translate>...</translate> tags
        text = re.sub(r"<translate>(.*?)<\/translate>", r"\1", text, flags=re.DOTALL)
        # Remove <!--T:...--> tags
        text = re.sub(r"<!--T:\d+-->", "", text)
        # Strip whitespace that might be left at the beginning and end
        text = text.strip()
        return text

    def extract_query_from_wiki_markup(self, title: str, markup: str, sparql: str,url:str=None) -> NamedQuery:
        """
        Extracts a named query from wiki markup.

        This method processes the title, markup, and SPARQL query to create a NamedQuery object.
        It sanitizes the text, removes section headers from the description, and constructs 
        a URL that points to the specific section of the Wikipedia page.

        Args:
            title (str): The title of the query section.
            markup (str): The wiki markup text containing the query description.
            sparql (str): The SPARQL query string.
            url(str): the url to assign - if not given derive from base_url and section title

        Returns:
            NamedQuery: A NamedQuery object containing the processed information.

        Note:
            The method sanitizes the title and description, removes section headers from the
            description, and constructs a URL with a section anchor based on the title.
        """
        desc = self.sanitize_text(markup)
        if desc:
            # Remove section headers
            desc = re.sub(r"\n*={2,4}.*?={2,4}\n*", "", desc)
            desc = desc.strip()
        title = self.sanitize_text(title)
        if url is None:
            url=f"{self.base_url}#{title.replace(' ', '_')}"
        named_query = NamedQuery(
            domain=self.domain,
            namespace=self.namespace,
            name=title,
            title=title,
            description=desc,
            url=url,
            sparql=sparql,
        )
        return named_query

    def extract_queries_from_wiki_markup(self, markup: str) -> List[NamedQuery]:
        named_queries = []
        pattern = r"(.*?)(https?://w\.wiki/\S+)(.*?)(?=https?://w\.wiki/|\Z)"
        matches = re.findall(pattern, markup, re.DOTALL | re.MULTILINE)

        for pre_text, short_url, post_text in matches:
            self.log(f"Processing short URL: {short_url}")
            pre_text = pre_text.strip()
            post_text = post_text.strip()
            description = f"{pre_text} {post_text}".strip()
            short_url_instance = ShortUrl(short_url=short_url)

            title = short_url_instance.name
            query_name = QueryName(name=title, namespace=self.namespace, domain=self.domain)

            if query_name.query_id in self.named_query_list._query_dict:
                self.log(f"Query with ID {query_name.query_id} already exists. Skipping.", is_error=True)
                continue

            sparql_query = short_url_instance.read_query()
            if short_url_instance.error:
                self.log(f"Error reading query from {short_url}: {short_url_instance.error}", is_error=True)
                continue

            if sparql_query:
                query = self.extract_query_from_wiki_markup(
                    title=title, 
                    markup=description, 
                    sparql=sparql_query,
                    url=short_url_instance.short_url)
                self.named_query_list.add(query)
                self.log(f"Added query: {title}")
            else:
                self.log(f"No query found for short URL {short_url}", is_error=True)

        if not self.debug and self.errors:
            self.logger.info(f"Encountered {len(self.errors)} errors during extraction. Set debug=True for more details.")

        return named_queries

    def extract_queries_from_section(self, section: Section):
        """
        Extract named queries from section.

        Args:
            section (Section): Wikitext section containing a SPARQL query.
        """
        if self.template_name:
            template = self.get_template(section.templates)
            if template:
                sparql = template.arguments[0].value
                if sparql:
                    query = self.extract_query_from_wiki_markup(
                        section.title, markup=section.plain_text(), sparql=sparql
                    )
                    self.named_query_list.add(query)
        else:
            markup = section.plain_text()
            self.extract_queries_from_wiki_markup(markup)

    def get_template(self, templates: list[Template]) -> Template:
        """
        Get template from the list of templates.

        Args:
            templates (list[Template]): List of Wikitext templates.

        Returns:
            Template: template if available, otherwise None.
        """
        queries = [template for template in templates if template.name == self.template_name]
        return queries[0] if len(queries) == 1 else None

    def extract_queries(self, wikitext: str = None):
        """
        Extract all queries from the base_url page.
        """
        if wikitext is None:
            wikitext = self.get_wikitext()
        parsed = wtp.parse(wikitext)
        for section in parsed.sections:
            self.extract_queries_from_section(section)

    def save_to_json(self, file_path: str):
        """
        Save the NamedQueryList to a JSON file.

        Args:
           file_path (str): Path to the JSON file.
        """
        self.named_query_list.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_list)

    def show_queries(self):
        for query in self.named_query_list.queries:
            pprint.pprint(query)
        print(f"Found {len(self.named_query_list.queries)} queries")

    def show_errors(self):
        print(f"{len(self.errors)} errors:")
        for i,error in enumerate(self.errors,start=1):
            print(f"{i:3}:{error}")

__init__(nqm, base_url, domain, namespace, target_graph_name, template_name='SPARQL', debug=False)

Constructor

Source code in snapquery/wd_page_query_extractor.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def __init__(
    self,
    nqm: NamedQueryManager,
    base_url: str,
    domain: str,
    namespace: str,
    target_graph_name: str,
    template_name: str = "SPARQL",  # https://en.wikipedia.org/wiki/Template:SPARQL) - if None seek for short-urls
    debug: bool=False
):
    """
    Constructor
    """
    self.nqm = nqm
    self.base_url = base_url
    self.domain = domain
    self.namespace = namespace
    self.target_graph_name = target_graph_name
    self.template_name = template_name
    self.debug=debug
    self.logger = logging.getLogger("snapquery.wd_page_extractor.WikipediaQueryExtractor")

    self.named_query_list = NamedQuerySet(
        domain=self.domain, 
        namespace=self.namespace, 
        target_graph_name=self.target_graph_name
    )
    self.errors = []

extract_queries(wikitext=None)

Extract all queries from the base_url page.

Source code in snapquery/wd_page_query_extractor.py
202
203
204
205
206
207
208
209
210
def extract_queries(self, wikitext: str = None):
    """
    Extract all queries from the base_url page.
    """
    if wikitext is None:
        wikitext = self.get_wikitext()
    parsed = wtp.parse(wikitext)
    for section in parsed.sections:
        self.extract_queries_from_section(section)

extract_queries_from_section(section)

Extract named queries from section.

Parameters:

Name Type Description Default
section Section

Wikitext section containing a SPARQL query.

required
Source code in snapquery/wd_page_query_extractor.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
def extract_queries_from_section(self, section: Section):
    """
    Extract named queries from section.

    Args:
        section (Section): Wikitext section containing a SPARQL query.
    """
    if self.template_name:
        template = self.get_template(section.templates)
        if template:
            sparql = template.arguments[0].value
            if sparql:
                query = self.extract_query_from_wiki_markup(
                    section.title, markup=section.plain_text(), sparql=sparql
                )
                self.named_query_list.add(query)
    else:
        markup = section.plain_text()
        self.extract_queries_from_wiki_markup(markup)

extract_query_from_wiki_markup(title, markup, sparql, url=None)

Extracts a named query from wiki markup.

This method processes the title, markup, and SPARQL query to create a NamedQuery object. It sanitizes the text, removes section headers from the description, and constructs a URL that points to the specific section of the Wikipedia page.

Parameters:

Name Type Description Default
title str

The title of the query section.

required
markup str

The wiki markup text containing the query description.

required
sparql str

The SPARQL query string.

required
url(str)

the url to assign - if not given derive from base_url and section title

required

Returns:

Name Type Description
NamedQuery NamedQuery

A NamedQuery object containing the processed information.

Note

The method sanitizes the title and description, removes section headers from the description, and constructs a URL with a section anchor based on the title.

Source code in snapquery/wd_page_query_extractor.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def extract_query_from_wiki_markup(self, title: str, markup: str, sparql: str,url:str=None) -> NamedQuery:
    """
    Extracts a named query from wiki markup.

    This method processes the title, markup, and SPARQL query to create a NamedQuery object.
    It sanitizes the text, removes section headers from the description, and constructs 
    a URL that points to the specific section of the Wikipedia page.

    Args:
        title (str): The title of the query section.
        markup (str): The wiki markup text containing the query description.
        sparql (str): The SPARQL query string.
        url(str): the url to assign - if not given derive from base_url and section title

    Returns:
        NamedQuery: A NamedQuery object containing the processed information.

    Note:
        The method sanitizes the title and description, removes section headers from the
        description, and constructs a URL with a section anchor based on the title.
    """
    desc = self.sanitize_text(markup)
    if desc:
        # Remove section headers
        desc = re.sub(r"\n*={2,4}.*?={2,4}\n*", "", desc)
        desc = desc.strip()
    title = self.sanitize_text(title)
    if url is None:
        url=f"{self.base_url}#{title.replace(' ', '_')}"
    named_query = NamedQuery(
        domain=self.domain,
        namespace=self.namespace,
        name=title,
        title=title,
        description=desc,
        url=url,
        sparql=sparql,
    )
    return named_query

get_template(templates)

Get template from the list of templates.

Parameters:

Name Type Description Default
templates list[Template]

List of Wikitext templates.

required

Returns:

Name Type Description
Template Template

template if available, otherwise None.

Source code in snapquery/wd_page_query_extractor.py
189
190
191
192
193
194
195
196
197
198
199
200
def get_template(self, templates: list[Template]) -> Template:
    """
    Get template from the list of templates.

    Args:
        templates (list[Template]): List of Wikitext templates.

    Returns:
        Template: template if available, otherwise None.
    """
    queries = [template for template in templates if template.name == self.template_name]
    return queries[0] if len(queries) == 1 else None

get_wikitext()

Get wiki text with SPARQL query examples.

Returns:

Name Type Description
str str

Raw wikitext of the page.

Source code in snapquery/wd_page_query_extractor.py
60
61
62
63
64
65
66
67
68
def get_wikitext(self) -> str:
    """
    Get wiki text with SPARQL query examples.

    Returns:
        str: Raw wikitext of the page.
    """
    res = requests.get(f"{self.base_url}?action=raw")
    return res.text

sanitize_text(text)

General method to sanitize text by removing translation tags, comments, and other non-essential markup.

Parameters:

Name Type Description Default
text str

The text to be sanitized.

required

Returns:

Name Type Description
str str

The sanitized text.

Source code in snapquery/wd_page_query_extractor.py
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def sanitize_text(self, text: str) -> str:
    """
    General method to sanitize text by removing translation tags, comments,
    and other non-essential markup.

    Args:
        text (str): The text to be sanitized.

    Returns:
        str: The sanitized text.
    """
    # Remove <translate>...</translate> tags
    text = re.sub(r"<translate>(.*?)<\/translate>", r"\1", text, flags=re.DOTALL)
    # Remove <!--T:...--> tags
    text = re.sub(r"<!--T:\d+-->", "", text)
    # Strip whitespace that might be left at the beginning and end
    text = text.strip()
    return text

save_to_json(file_path)

Save the NamedQueryList to a JSON file.

Parameters:

Name Type Description Default
file_path str

Path to the JSON file.

required
Source code in snapquery/wd_page_query_extractor.py
212
213
214
215
216
217
218
219
def save_to_json(self, file_path: str):
    """
    Save the NamedQueryList to a JSON file.

    Args:
       file_path (str): Path to the JSON file.
    """
    self.named_query_list.save_to_json_file(file_path, indent=2)

store_queries()

Store the named queries into the database.

Source code in snapquery/wd_page_query_extractor.py
221
222
223
224
225
def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_list)

wd_short_url

Created on 2024-05-12

@author: wf

ShortIds

short id handling

Source code in snapquery/wd_short_url.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
class ShortIds:
    """
    short id handling
    """

    def __init__(
        self,
        base_chars: str = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz$",
    ):
        self.base_chars = base_chars

    def id_to_int(self, id_str: str) -> int:
        """
        Convert an ID string to an integer using my base character set.

        Args:
            id_str (str): The custom ID string to convert.

        Returns:
            int: The converted integer value.
        """
        base = len(self.base_chars)
        value = 0

        for char in id_str:
            value *= base
            value += self.base_chars.index(char)

        return value

    def get_random(self, k: int = 4) -> str:
        """
        get a random short id

        Returns:
            str: a random short id
        """
        short_id = "".join(random.choices(self.base_chars, k=k))
        return short_id

get_random(k=4)

get a random short id

Returns:

Name Type Description
str str

a random short id

Source code in snapquery/wd_short_url.py
46
47
48
49
50
51
52
53
54
def get_random(self, k: int = 4) -> str:
    """
    get a random short id

    Returns:
        str: a random short id
    """
    short_id = "".join(random.choices(self.base_chars, k=k))
    return short_id

id_to_int(id_str)

Convert an ID string to an integer using my base character set.

Parameters:

Name Type Description Default
id_str str

The custom ID string to convert.

required

Returns:

Name Type Description
int int

The converted integer value.

Source code in snapquery/wd_short_url.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def id_to_int(self, id_str: str) -> int:
    """
    Convert an ID string to an integer using my base character set.

    Args:
        id_str (str): The custom ID string to convert.

    Returns:
        int: The converted integer value.
    """
    base = len(self.base_chars)
    value = 0

    for char in id_str:
        value *= base
        value += self.base_chars.index(char)

    return value

ShortUrl

Handles operations related to wikidata and similar short URLs such as QLever. see https://meta.wikimedia.org/wiki/Wikimedia_URL_Shortener for

Source code in snapquery/wd_short_url.py
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
class ShortUrl:
    """
    Handles operations related to wikidata and similar short URLs such as QLever.
    see https://meta.wikimedia.org/wiki/Wikimedia_URL_Shortener for
    """
    # see https://stackoverflow.com/questions/62396801/how-to-handle-too-many-requests-on-wikidata-using-sparqlwrapper
    CALLS_PER_MINUTE = 30
    ONE_MINUTE = 60

    def __init__(self, short_url: str, scheme: str = "https", netloc: str = "query.wikidata.org"):
        """
        Constructor

        Args:
            short_url (str): The URL to be processed.
            scheme (str): URL scheme to be used (e.g., 'https' or 'http') for validating URL format.
            netloc (str): Network location part of the URL, typically the domain name, to be used for validating URL format.
        """

        self.short_url = short_url
        self.scheme = scheme
        self.netloc = netloc
        self.url = None
        self.sparql = None
        self.error = None
        self.user_agent = self.get_user_agent()

    @staticmethod
    def get_user_agent():
        version = Version()
        return f"{version.name}/{version.version} ({version.cm_url}; {version.authors}) Python-requests/{requests.__version__}"


    @property
    def name(self):
        """
        Extracts and returns the name part of the short URL.

        Returns:
            str: The name part of the short URL.
        """
        # Assuming the short URL ends with the name part after the last '/'
        if self.short_url:
            name_part = self.short_url.rsplit("/", 1)[-1]
            return name_part
        return None

    @classmethod
    def get_prompt_text(cls, sparql: str) -> str:
        prompt_text = f"""give an english name, title and description in json 
for cut &paste for the SPARQL query below- the name should be less than 60 chars be a proper identifier which has no special chars so it can be used in an url without escaping. The title should be less than 80 chars and the 
description not more than three lines of 80 chars. 
A valid example result would be e.g.
{{
  "name": "Locations_in_Rennes_with_French_Wikipedia_Article"
  "title": "Locations in Rennes with a French Wikipedia Article",
  "description": "Maps locations in Rennes linked to French Wikipedia articles. It displays entities within 10 km of Rennes' center, showing their names, coordinates, and linked Wikipedia pages. The results include entities' identifiers, coordinates, and related site links."
}}

The example is just an example - do not use it's content if it does not match. 
Avoid  hallucinating and stick to the facts.
If the you can not determine a proper name, title and description return {{}}
SPARQL: {sparql}
"""
        return prompt_text

    @classmethod
    def get_random_query_list(
        cls,
        namespace: str,
        count: int,
        max_postfix="9pfu",
        with_llm=False,
        with_progress: bool = False,
        debug=False,
    ) -> NamedQuerySet:
        """
        Read a specified number of random queries from a list of short URLs.

        Args:
            namespace(str): the name to use for the named query list
            count (int): Number of random URLs to fetch.
            max_postfix(str): the maximum ID to try
            with_progress(bool): if True show progress

        Returns:
            NamedQueryList: A NamedQueryList containing the queries read from the URLs.
        """
        if with_llm:
            llm = LLM(model="gpt-4")
        short_ids = ShortIds()
        base_url = "https://w.wiki/"
        unique_urls = set()
        unique_names = set()

        nq_set = NamedQuerySet(domain="wikidata.org", namespace=namespace, target_graph_name="wikidata")
        give_up = (
            count * 15
        )  # heuristic factor for probability that a short url points to a wikidata entry - 14 has worked so far
        max_short_int = short_ids.id_to_int(max_postfix)
        while len(unique_urls) < count and give_up > 0:
            if with_progress and not debug:
                print(".", end="")
                if give_up % 80 == 0:
                    print()
            # Generate a 4-char base36 string
            postfix = short_ids.get_random()
            if short_ids.id_to_int(postfix) > max_short_int:
                continue
            if debug:
                print(f"{give_up:4}:{postfix}")
            wd_short_url = f"{base_url}{postfix}"
            short_url = cls(short_url=wd_short_url)
            short_url.read_query()
            if short_url.sparql and not short_url.error:
                nq = NamedQuery(
                    domain=nq_set.domain,
                    name=postfix,
                    namespace=nq_set.namespace,
                    url=wd_short_url,
                    sparql=short_url.sparql,
                )
                if with_llm:
                    try:
                        llm_response = llm.ask(cls.get_prompt_text(short_url.sparql))
                        if llm_response:
                            response_json = json.loads(llm_response)
                            name = response_json.get("name", None)
                            if name in unique_names:
                                # try again with a different url to avoid name clash
                                give_up -= 1
                                continue
                            if name:
                                nq.name = name
                            title = response_json.get("title", "")
                            description = response_json.get("description", "")
                            nq.title = title
                            nq.description = description
                            nq.__post_init__()
                    except Exception as ex:
                        if debug:
                            print(f"Failed to get LLM response: {str(ex)}")
                        continue
                nq_set.queries.append(nq)
                unique_urls.add(nq.url)
                unique_names.add(nq.name)
                if debug:
                    print(nq)
            else:
                give_up -= 1
        return nq_set

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=ONE_MINUTE)
    def fetch_final_url(self):
        """
        Follow the redirection to get the final URL with rate limiting.

        Returns:
            str: The final URL after redirection.
        """
        try:
            headers = {'User-Agent': self.user_agent}
            response = requests.get(self.short_url, headers=headers, allow_redirects=True)
            response.raise_for_status()
            self.url = response.url
        except Exception as ex:
            self.error = ex
        return self.url

    def read_query(self) -> str:
        """
        Read a query from a short URL.

        Returns:
            str: The SPARQL query extracted from the short URL.
        """
        self.fetch_final_url()
        if self.url:
            parsed_url = urllib.parse.urlparse(self.url)
            if parsed_url.scheme == self.scheme and parsed_url.netloc == self.netloc:
                if parsed_url.fragment:
                    self.sparql = urllib.parse.unquote(parsed_url.fragment)
                else:
                    query_params = urllib.parse.parse_qs(parsed_url.query)
                    if "query" in query_params:
                        self.sparql = query_params["query"][0]
        return self.sparql

name property

Extracts and returns the name part of the short URL.

Returns:

Name Type Description
str

The name part of the short URL.

__init__(short_url, scheme='https', netloc='query.wikidata.org')

Constructor

Parameters:

Name Type Description Default
short_url str

The URL to be processed.

required
scheme str

URL scheme to be used (e.g., 'https' or 'http') for validating URL format.

'https'
netloc str

Network location part of the URL, typically the domain name, to be used for validating URL format.

'query.wikidata.org'
Source code in snapquery/wd_short_url.py
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def __init__(self, short_url: str, scheme: str = "https", netloc: str = "query.wikidata.org"):
    """
    Constructor

    Args:
        short_url (str): The URL to be processed.
        scheme (str): URL scheme to be used (e.g., 'https' or 'http') for validating URL format.
        netloc (str): Network location part of the URL, typically the domain name, to be used for validating URL format.
    """

    self.short_url = short_url
    self.scheme = scheme
    self.netloc = netloc
    self.url = None
    self.sparql = None
    self.error = None
    self.user_agent = self.get_user_agent()

fetch_final_url()

Follow the redirection to get the final URL with rate limiting.

Returns:

Name Type Description
str

The final URL after redirection.

Source code in snapquery/wd_short_url.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
@sleep_and_retry
@limits(calls=CALLS_PER_MINUTE, period=ONE_MINUTE)
def fetch_final_url(self):
    """
    Follow the redirection to get the final URL with rate limiting.

    Returns:
        str: The final URL after redirection.
    """
    try:
        headers = {'User-Agent': self.user_agent}
        response = requests.get(self.short_url, headers=headers, allow_redirects=True)
        response.raise_for_status()
        self.url = response.url
    except Exception as ex:
        self.error = ex
    return self.url

get_random_query_list(namespace, count, max_postfix='9pfu', with_llm=False, with_progress=False, debug=False) classmethod

Read a specified number of random queries from a list of short URLs.

Parameters:

Name Type Description Default
namespace(str)

the name to use for the named query list

required
count int

Number of random URLs to fetch.

required
max_postfix(str)

the maximum ID to try

required
with_progress(bool)

if True show progress

required

Returns:

Name Type Description
NamedQueryList NamedQuerySet

A NamedQueryList containing the queries read from the URLs.

Source code in snapquery/wd_short_url.py
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
@classmethod
def get_random_query_list(
    cls,
    namespace: str,
    count: int,
    max_postfix="9pfu",
    with_llm=False,
    with_progress: bool = False,
    debug=False,
) -> NamedQuerySet:
    """
    Read a specified number of random queries from a list of short URLs.

    Args:
        namespace(str): the name to use for the named query list
        count (int): Number of random URLs to fetch.
        max_postfix(str): the maximum ID to try
        with_progress(bool): if True show progress

    Returns:
        NamedQueryList: A NamedQueryList containing the queries read from the URLs.
    """
    if with_llm:
        llm = LLM(model="gpt-4")
    short_ids = ShortIds()
    base_url = "https://w.wiki/"
    unique_urls = set()
    unique_names = set()

    nq_set = NamedQuerySet(domain="wikidata.org", namespace=namespace, target_graph_name="wikidata")
    give_up = (
        count * 15
    )  # heuristic factor for probability that a short url points to a wikidata entry - 14 has worked so far
    max_short_int = short_ids.id_to_int(max_postfix)
    while len(unique_urls) < count and give_up > 0:
        if with_progress and not debug:
            print(".", end="")
            if give_up % 80 == 0:
                print()
        # Generate a 4-char base36 string
        postfix = short_ids.get_random()
        if short_ids.id_to_int(postfix) > max_short_int:
            continue
        if debug:
            print(f"{give_up:4}:{postfix}")
        wd_short_url = f"{base_url}{postfix}"
        short_url = cls(short_url=wd_short_url)
        short_url.read_query()
        if short_url.sparql and not short_url.error:
            nq = NamedQuery(
                domain=nq_set.domain,
                name=postfix,
                namespace=nq_set.namespace,
                url=wd_short_url,
                sparql=short_url.sparql,
            )
            if with_llm:
                try:
                    llm_response = llm.ask(cls.get_prompt_text(short_url.sparql))
                    if llm_response:
                        response_json = json.loads(llm_response)
                        name = response_json.get("name", None)
                        if name in unique_names:
                            # try again with a different url to avoid name clash
                            give_up -= 1
                            continue
                        if name:
                            nq.name = name
                        title = response_json.get("title", "")
                        description = response_json.get("description", "")
                        nq.title = title
                        nq.description = description
                        nq.__post_init__()
                except Exception as ex:
                    if debug:
                        print(f"Failed to get LLM response: {str(ex)}")
                    continue
            nq_set.queries.append(nq)
            unique_urls.add(nq.url)
            unique_names.add(nq.name)
            if debug:
                print(nq)
        else:
            give_up -= 1
    return nq_set

read_query()

Read a query from a short URL.

Returns:

Name Type Description
str str

The SPARQL query extracted from the short URL.

Source code in snapquery/wd_short_url.py
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def read_query(self) -> str:
    """
    Read a query from a short URL.

    Returns:
        str: The SPARQL query extracted from the short URL.
    """
    self.fetch_final_url()
    if self.url:
        parsed_url = urllib.parse.urlparse(self.url)
        if parsed_url.scheme == self.scheme and parsed_url.netloc == self.netloc:
            if parsed_url.fragment:
                self.sparql = urllib.parse.unquote(parsed_url.fragment)
            else:
                query_params = urllib.parse.parse_qs(parsed_url.query)
                if "query" in query_params:
                    self.sparql = query_params["query"][0]
    return self.sparql