snapquery API Documentation

`basequeryview`

Created on 2024-06-23 @author: wf

`BaseQueryView`

general search for queries

Source code in snapquery/basequeryview.py

class BaseQueryView:
    """
    general search for queries
    """

    def __init__(self, solution: WebSolution, debug: bool = False):
        self.solution = solution
        self.nqm = self.solution.nqm
        self.debug = debug
        self.setup_ui()

    def setup_ui(self):
        """
        setup my user interface
        """
        with ui.row().classes("w-full items-baseline") as self.header_row:
            ui.label("Available Queries").classes("text-xl")
            ui.label("select a query to view and execute").classes("text-slate-400")

        self.query_selector = QuerySelector(self.solution, self.on_search_change)
        self.search_result_row = ui.row()
        self.debouncer = DebouncerUI(parent=self.search_result_row, delay=0.65, debug=self.debug)

        ui.timer(0.0, self.on_search_change, once=True)

    async def on_search_change(self, _args=None):
        """
        react on changes in the search input
        """
        await self.debouncer.debounce(self.perform_search)

    async def perform_search(self):
        """
        Performs the search based on the current QuerySelector values.
        """
        try:
            qn = self.query_selector.qn
            name_like = f"{qn.name}%"
            namespace_like = f"{qn.namespace}%"
            domain_like = f"{qn.domain}%"
            sql_query = """SELECT 
            * 
            FROM NamedQuery 
            WHERE 
                name LIKE ? 
                AND namespace LIKE ? 
                AND domain LIKE ?"""
            self.q_lod = self.nqm.sql_db.query(sql_query, (name_like, namespace_like, domain_like))
            self.show_lod(self.q_lod)
        except Exception as ex:
            self.solution.handle_exception(ex)

    def show_lod(self, q_lod: List):
        """
        show the given list of dicts
        """
        self.search_result_row.clear()
        view_lod = []
        for record in self.q_lod:
            nq = NamedQuery.from_record(record)
            vr = nq.as_viewrecord()
            view_lod.append(vr)
        with self.search_result_row:
            self.search_result_grid = ListOfDictsGrid()
            ui.notify(f"found {len(q_lod)} queries")
            self.search_result_grid.load_lod(view_lod)
        self.search_result_row.update()

`on_search_change(_args=None)` `async`

react on changes in the search input

Source code in snapquery/basequeryview.py

async def on_search_change(self, _args=None):
    """
    react on changes in the search input
    """
    await self.debouncer.debounce(self.perform_search)

`perform_search()` `async`

Performs the search based on the current QuerySelector values.

Source code in snapquery/basequeryview.py

async def perform_search(self):
    """
    Performs the search based on the current QuerySelector values.
    """
    try:
        qn = self.query_selector.qn
        name_like = f"{qn.name}%"
        namespace_like = f"{qn.namespace}%"
        domain_like = f"{qn.domain}%"
        sql_query = """SELECT 
        * 
        FROM NamedQuery 
        WHERE 
            name LIKE ? 
            AND namespace LIKE ? 
            AND domain LIKE ?"""
        self.q_lod = self.nqm.sql_db.query(sql_query, (name_like, namespace_like, domain_like))
        self.show_lod(self.q_lod)
    except Exception as ex:
        self.solution.handle_exception(ex)

`setup_ui()`

setup my user interface

Source code in snapquery/basequeryview.py

def setup_ui(self):
    """
    setup my user interface
    """
    with ui.row().classes("w-full items-baseline") as self.header_row:
        ui.label("Available Queries").classes("text-xl")
        ui.label("select a query to view and execute").classes("text-slate-400")

    self.query_selector = QuerySelector(self.solution, self.on_search_change)
    self.search_result_row = ui.row()
    self.debouncer = DebouncerUI(parent=self.search_result_row, delay=0.65, debug=self.debug)

    ui.timer(0.0, self.on_search_change, once=True)

`show_lod(q_lod)`

show the given list of dicts

Source code in snapquery/basequeryview.py

def show_lod(self, q_lod: List):
    """
    show the given list of dicts
    """
    self.search_result_row.clear()
    view_lod = []
    for record in self.q_lod:
        nq = NamedQuery.from_record(record)
        vr = nq.as_viewrecord()
        view_lod.append(vr)
    with self.search_result_row:
        self.search_result_grid = ListOfDictsGrid()
        ui.notify(f"found {len(q_lod)} queries")
        self.search_result_grid.load_lod(view_lod)
    self.search_result_row.update()

`ceurws`

Created on 2024-07-02 @author: wf

`CeurWSQueries`

A class to handle the extraction and management of CEUR-WS queries.

Source code in snapquery/ceurws.py

class CeurWSQueries:
    """
    A class to handle the extraction and management of CEUR-WS queries.
    """

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """
        Constructor
        Args:
            nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
            debug (bool): Enable debug output. Defaults to False.
        """
        self.nqm = nqm
        self.named_query_set = NamedQuerySet(
            domain="ceur-ws.org",
            namespace="challenge",
            target_graph_name="wikidata",
        )
        self.debug = debug
        self.wiki_id = "cr"
        self.wiki_client = WikiClient.ofWikiId(self.wiki_id)
        self.smw = SMWClient(self.wiki_client.getSite())

    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the CEUR-WS challenge wiki.
        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        if limit:
            limitclause = f"|limit={limit}"
        else:
            limitclause = ""
        ask_query = f"""{{{{#ask: [[Concept:Query]]
|mainlabel=Query
|?Query id=id
|?Query name=name
|?Query title=title
|?Query tryiturl=tryiturl
|?Query wdqsurl=wdqsurl
|?Query scholia=scholia
|?Query relevance=relevance
|?Query task=task
|?Query sparql=sparql
{limitclause}
|sort=Query task,Query id
|order=ascending
}}}}"""
        query_results = self.smw.query(ask_query)
        for _page_title, query_data in query_results.items():
            # Extract values into local variables for easier debugging
            name = query_data.get("name")
            url = query_data.get("wdqsurl")
            if not url:
                continue
            title = query_data.get("title")
            description = query_data.get("task")
            sparql = query_data.get("sparql")
            if url:
                url = f"https://w.wiki/{url}"
            tryiturl = query_data.get("tryiturl")
            if tryiturl:
                tryiturl = f"https://qlever.cs.uni-freiburg.de/wikidata/{tryiturl}"
            comment = f"qlever tryit url: {tryiturl}" if tryiturl else None
            named_query = NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=url,
                title=title,
                description=description,
                sparql=sparql,
                comment=comment,
            )
            self.named_query_set.queries.append(named_query)

            if self.debug:
                print(".", end="", flush=True)
                if len(self.named_query_set.queries) % 80 == 0:
                    print(f"{len(self.named_query_set.queries)}")

        if self.debug:
            print(f"\nFound {len(self.named_query_set.queries)} CEUR-WS challenge queries")

    def save_to_json(self, file_path: str = "/tmp/ceurws-queries.json"):
        """
        Save the NamedQueryList to a JSON file.
        Args:
            file_path (str): Path to the JSON file.
        """
        self.named_query_set.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_set)

`init(nqm, debug=False)`

Constructor Args: nqm (NamedQueryManager): The NamedQueryManager to use for storing queries. debug (bool): Enable debug output. Defaults to False.

Source code in snapquery/ceurws.py

def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """
    Constructor
    Args:
        nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
        debug (bool): Enable debug output. Defaults to False.
    """
    self.nqm = nqm
    self.named_query_set = NamedQuerySet(
        domain="ceur-ws.org",
        namespace="challenge",
        target_graph_name="wikidata",
    )
    self.debug = debug
    self.wiki_id = "cr"
    self.wiki_client = WikiClient.ofWikiId(self.wiki_id)
    self.smw = SMWClient(self.wiki_client.getSite())

`extract_queries(limit=None)`

Extract all queries from the CEUR-WS challenge wiki. Args: limit (int, optional): Limit the number of queries fetched. Defaults to None.

Source code in snapquery/ceurws.py

    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the CEUR-WS challenge wiki.
        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        if limit:
            limitclause = f"|limit={limit}"
        else:
            limitclause = ""
        ask_query = f"""{{{{#ask: [[Concept:Query]]
|mainlabel=Query
|?Query id=id
|?Query name=name
|?Query title=title
|?Query tryiturl=tryiturl
|?Query wdqsurl=wdqsurl
|?Query scholia=scholia
|?Query relevance=relevance
|?Query task=task
|?Query sparql=sparql
{limitclause}
|sort=Query task,Query id
|order=ascending
}}}}"""
        query_results = self.smw.query(ask_query)
        for _page_title, query_data in query_results.items():
            # Extract values into local variables for easier debugging
            name = query_data.get("name")
            url = query_data.get("wdqsurl")
            if not url:
                continue
            title = query_data.get("title")
            description = query_data.get("task")
            sparql = query_data.get("sparql")
            if url:
                url = f"https://w.wiki/{url}"
            tryiturl = query_data.get("tryiturl")
            if tryiturl:
                tryiturl = f"https://qlever.cs.uni-freiburg.de/wikidata/{tryiturl}"
            comment = f"qlever tryit url: {tryiturl}" if tryiturl else None
            named_query = NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=url,
                title=title,
                description=description,
                sparql=sparql,
                comment=comment,
            )
            self.named_query_set.queries.append(named_query)

            if self.debug:
                print(".", end="", flush=True)
                if len(self.named_query_set.queries) % 80 == 0:
                    print(f"{len(self.named_query_set.queries)}")

        if self.debug:
            print(f"\nFound {len(self.named_query_set.queries)} CEUR-WS challenge queries")

`save_to_json(file_path='/tmp/ceurws-queries.json')`

Save the NamedQueryList to a JSON file. Args: file_path (str): Path to the JSON file.

Source code in snapquery/ceurws.py

def save_to_json(self, file_path: str = "/tmp/ceurws-queries.json"):
    """
    Save the NamedQueryList to a JSON file.
    Args:
        file_path (str): Path to the JSON file.
    """
    self.named_query_set.save_to_json_file(file_path, indent=2)

`store_queries()`

Store the named queries into the database.

Source code in snapquery/ceurws.py

def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_set)

`dblp`

Created on 2024-06-07

@author: wf

`DblpPersonLookup`

lookup persons in dblp

Source code in snapquery/dblp.py

class DblpPersonLookup:
    """
    lookup persons in dblp
    """

    def __init__(self, nqm: NamedQueryManager, endpoint_name: str = "dblp"):
        self.nqm = nqm
        self.endpoint_name = endpoint_name

    def search(self, name_part: str, limit: int = 10) -> List[Person]:
        """
        search persons by part of their name using a SPARQL query with regex.

        Args:
            name_part (str): The part of the name to search for.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of Person objects.
        """
        named_query = NamedQuery(
            domain="dblp.org",
            namespace="pid-lookup",
            name="person-by-name-part",
            title="Lookup persons with a name matching a pattern",
            description="Search for persons by matching part of their name using regex",
            sparql="""# snapquery person lookup by name part
SELECT DISTINCT 
  ?author 
  ?label 
  ?dblp_author_id 
  ?wikidata_id 
  ?orcid_id
WHERE {
  ?author a dblp:Person.
  ?author rdfs:label ?label.
  FILTER regex(?label, "{{ name_regex }}", "i")
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier.
    ?identifier datacite:usesIdentifierScheme datacite:dblp.
    ?identifier litre:hasLiteralValue ?dblp_author_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier2.
    ?identifier2 datacite:usesIdentifierScheme datacite:wikidata.
    ?identifier2 litre:hasLiteralValue ?wikidata_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier3.
    ?identifier3 datacite:usesIdentifierScheme datacite:orcid.
    ?identifier3 litre:hasLiteralValue ?orcid_id.
  }
}
            """,
        )
        params_dict = {"name_regex": name_part}

        person_lod, _stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            endpoint_name=self.endpoint_name,
            limit=limit,
            with_stats=False,
        )
        persons = []
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                wikidata_id=pr.get("wikidata_id"),
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
            )
            person.parse_label()
            persons.append(person)
        return persons

`search(name_part, limit=10)`

search persons by part of their name using a SPARQL query with regex.

Parameters:

Name	Type	Description	Default
`name_part`	`str`	The part of the name to search for.	required
`limit`	`int`	The maximum number of results to return.	`10`

Returns:

Type	Description
`List[Person]`	List[Person]: A list of Person objects.

Source code in snapquery/dblp.py

    def search(self, name_part: str, limit: int = 10) -> List[Person]:
        """
        search persons by part of their name using a SPARQL query with regex.

        Args:
            name_part (str): The part of the name to search for.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of Person objects.
        """
        named_query = NamedQuery(
            domain="dblp.org",
            namespace="pid-lookup",
            name="person-by-name-part",
            title="Lookup persons with a name matching a pattern",
            description="Search for persons by matching part of their name using regex",
            sparql="""# snapquery person lookup by name part
SELECT DISTINCT 
  ?author 
  ?label 
  ?dblp_author_id 
  ?wikidata_id 
  ?orcid_id
WHERE {
  ?author a dblp:Person.
  ?author rdfs:label ?label.
  FILTER regex(?label, "{{ name_regex }}", "i")
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier.
    ?identifier datacite:usesIdentifierScheme datacite:dblp.
    ?identifier litre:hasLiteralValue ?dblp_author_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier2.
    ?identifier2 datacite:usesIdentifierScheme datacite:wikidata.
    ?identifier2 litre:hasLiteralValue ?wikidata_id.
  }
  OPTIONAL {
    ?author datacite:hasIdentifier ?identifier3.
    ?identifier3 datacite:usesIdentifierScheme datacite:orcid.
    ?identifier3 litre:hasLiteralValue ?orcid_id.
  }
}
            """,
        )
        params_dict = {"name_regex": name_part}

        person_lod, _stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            endpoint_name=self.endpoint_name,
            limit=limit,
            with_stats=False,
        )
        persons = []
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                wikidata_id=pr.get("wikidata_id"),
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
            )
            person.parse_label()
            persons.append(person)
        return persons

`endpoint`

Created on 29.06.2024 @author: wf

`Endpoint`

A query endpoint for SPARQL, SQL or other storage systems

Source code in snapquery/endpoint.py

@lod_storable
class Endpoint:
    """
    A query endpoint for SPARQL, SQL or other storage systems
    """

    name: str
    endpoint: str
    lang: str = "sparql"
    website: Optional[str] = None
    database: Optional[str] = None
    method: Optional[str] = "POST"
    prefixes: Optional[str] = None
    auth: Optional[str] = None
    user: Optional[str] = None
    password: Optional[str] = None

    def __post_init__(self):
        """
        Perform post-initialization processing if needed.
        """
        pass

    @classmethod
    def get_samples(cls) -> dict[str, List["Endpoint"]]:
        """
        Get samples for Endpoint
        """
        samples = {
            "sample-endpoints": [
                cls(
                    name="wikidata",
                    lang="sparql",
                    endpoint="https://query.wikidata.org/sparql",
                    website="https://query.wikidata.org/",
                    database="blazegraph",
                    method="POST",
                    prefixes="PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
                ),
                cls(
                    name="dbis-jena",
                    lang="sparql",
                    endpoint="https://confident.dbis.rwth-aachen.de/jena/",
                    website="https://confident.dbis.rwth-aachen.de",
                    auth="BASIC",
                    user="secret",
                    password="#not public - example not usable for access#",
                ),
            ]
        }
        return samples

`__post_init__()`

Perform post-initialization processing if needed.

Source code in snapquery/endpoint.py

def __post_init__(self):
    """
    Perform post-initialization processing if needed.
    """
    pass

`get_samples()` `classmethod`

Get samples for Endpoint

Source code in snapquery/endpoint.py

@classmethod
def get_samples(cls) -> dict[str, List["Endpoint"]]:
    """
    Get samples for Endpoint
    """
    samples = {
        "sample-endpoints": [
            cls(
                name="wikidata",
                lang="sparql",
                endpoint="https://query.wikidata.org/sparql",
                website="https://query.wikidata.org/",
                database="blazegraph",
                method="POST",
                prefixes="PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
            ),
            cls(
                name="dbis-jena",
                lang="sparql",
                endpoint="https://confident.dbis.rwth-aachen.de/jena/",
                website="https://confident.dbis.rwth-aachen.de",
                auth="BASIC",
                user="secret",
                password="#not public - example not usable for access#",
            ),
        ]
    }
    return samples

`EndpointManager`

Manages the storage and retrieval of Endpoint configurations.

Source code in snapquery/endpoint.py

@lod_storable
class EndpointManager:
    """
    Manages the storage and retrieval of
    Endpoint configurations.
    """

    endpoints: Dict[str, Endpoint] = field(default_factory=dict)

    @classmethod
    def get_yaml_path(cls) -> str:
        samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        yaml_path = os.path.join(samples_path, "endpoints.yaml")
        return yaml_path

    def get_endpoint(self, name: str) -> Endpoint:
        """
        Retrieve an endpoint by name.
        """
        return self.endpoints.get(name)

    def __len__(self):
        return len(self.endpoints)

    def __iter__(self):
        return iter(self.endpoints.values())

`get_endpoint(name)`

Retrieve an endpoint by name.

Source code in snapquery/endpoint.py

def get_endpoint(self, name: str) -> Endpoint:
    """
    Retrieve an endpoint by name.
    """
    return self.endpoints.get(name)

`error_filter`

Created on 2024-05-06

@author: wf

`ErrorFilter`

handle technical error message to retrieve user friendly content

Source code in snapquery/error_filter.py

class ErrorFilter:
    """
    handle technical error message to
    retrieve user friendly content
    """

    def __init__(self, raw_error_message: str):
        self.raw_error_message = raw_error_message
        self.category = self.categorize_error()
        self.filtered_message = self._extract_relevant_info()

    def categorize_error(self) -> str:
        """
        Categorizes the error message into predefined types.

        Returns:
            str: The category of the error message.
        """
        if self.raw_error_message is None:
            return None

        lower_error_msg = self.raw_error_message.lower()
        # Todo: query is often part of the error message when these keywords are used within the query the classification fails.
        if (
            "query timeout after" in lower_error_msg
            or "timeoutexception" in lower_error_msg
            or "query has timed out" in lower_error_msg
            or "http error 504" in lower_error_msg
        ):
            return "Timeout"
        elif (
            "syntax error" in lower_error_msg
            or "invalid sparql query" in lower_error_msg
            or "querybadformed" in lower_error_msg
        ):
            return "Syntax Error"
        elif "connection error" in lower_error_msg:
            return "Connection Error"
        elif "access denied" in lower_error_msg:
            return "Authorization Error"
        elif (
            "service unavailable" in lower_error_msg
            or "service temporarily unavailable" in lower_error_msg
            or "http error 503" in lower_error_msg
        ):
            return "Service Unavailable"
        elif "too many requests" in lower_error_msg or "http error 429" in lower_error_msg:
            return "Too Many Requests"
        elif "bad gateway" in lower_error_msg or "http error 502" in lower_error_msg:
            return "Bad Gateway"
        elif "endpointinternalerror" in lower_error_msg:
            return "EndPointInternalError"
        else:
            return "Other"

    def _extract_relevant_info(self) -> str:
        """
        Extract relevant information from the given raw error message.
        Identifies and processes different error message formats.
        """
        if not self.raw_error_message:
            return None

        if "SPARQL-QUERY:" in self.raw_error_message:
            return self._extract_sparql_error()
        elif self.raw_error_message.startswith("QueryBadFormed:") and "Virtuoso" in self.raw_error_message:
            return self._extract_virtuoso_error()
        elif self.raw_error_message.startswith("QueryBadFormed:"):
            return self._extract_triply_db_error()
        elif "Not supported:" in self.raw_error_message:
            return self._extract_qlever_error()
        elif "Invalid SPARQL query" in self.raw_error_message:
            return self._extract_invalid_sparql_error()
        else:
            if self.category == "Timeout":
                return "Query has timed out."
            message_json = self._get_error_message_json()
            if message_json and isinstance(message_json, dict) and "exception" in message_json:
                return message_json.get("exception")
            return "Error: Unrecognized error format."

    def _extract_sparql_error(self) -> str:
        """
        Specifically extract and format SPARQL error messages.
        """
        if "java.util.concurrent.TimeoutException" in self.raw_error_message:
            return "Query has timed out."
        sparql_start_token = "SPARQL-QUERY:"
        sparql_end_token = "java.util.concurrent.ExecutionException"
        sparql_query = self._extract_message_between_tokens(sparql_start_token, sparql_end_token)
        error_log_start = sparql_end_token
        error_log_start_idx = self.raw_error_message.find(error_log_start)
        error_log_end_idx = self.raw_error_message.find("\\tat", error_log_start_idx)
        error_message = self.raw_error_message[error_log_start_idx:error_log_end_idx]
        if error_message:
            return error_message.split("Exception:")[-1].encode("utf-8").decode("unicode_escape").strip()
        else:
            return "Error: SPARQL query information is incomplete."

    def _extract_qlever_error(self) -> str:
        """
        Specifically extract and format QLever error messages.
        """
        start_idx = self.raw_error_message.find("Not supported:")
        if start_idx != -1:
            end_idx = self.raw_error_message.find("}", start_idx)
            error_message = self.raw_error_message[start_idx : end_idx + 1].strip()
            return f"QLever error:\n{error_message}"
        else:
            return "Error: QLever error information is incomplete."

    def _extract_virtuoso_error(self) -> str:
        """
        Specifically extract and format virtuoso error messages.
        Returns:

        """
        start_token = "Response: b'"
        end_token = "SPARQL query:"
        message = self._extract_message_between_tokens(start_token, end_token)
        if message:
            return message
        else:
            return "Error: Virtuoso error information is incomplete."

    def _extract_triply_db_error(self) -> str:
        """
        Specifically extract and format TriplyDB error messages.
        Returns:

        """
        message_json = self._get_error_message_json()
        if message_json and "message" in message_json:
            return message_json.get("message")
        elif message_json and "exception" in message_json:
            return message_json.get("exception")
        else:
            return "Error: TriplyDB error information is incomplete."

    def _get_error_message_json(self) -> Union[dict, None]:
        """
        Try to extract the json record from the raw error message.
        """
        start_token = "Response:\nb'"
        stat_idx = self.raw_error_message.find(start_token)
        end_idx = -1
        message_json_raw = self.raw_error_message[stat_idx + len(start_token) : end_idx].strip()
        try:
            message_json = json.loads(message_json_raw.encode().decode("unicode_escape"))
        except JSONDecodeError as e:
            message_json = None
        return message_json

    def _extract_message_between_tokens(self, start_token: str, end_token: str) -> Union[str, None]:
        """
        Extract and format message between tokens.
        Args:
            start_token:
            end_token:

        Returns:

        """
        start_idx = self.raw_error_message.find(start_token)
        end_idx = self.raw_error_message.find(end_token)
        message = None
        if start_idx != -1 and end_idx != -1:
            message = self.raw_error_message[start_idx:end_idx]
            message = message[len(start_token) :]
            message = message.strip()
        return message

    def _extract_invalid_sparql_error(self) -> str:
        """
        Specifically extract and format Invalid SPARQL query error messages.
        """
        error_start = self.raw_error_message.find("Invalid SPARQL query")
        if error_start != -1:
            error_msg = self.raw_error_message[error_start:].split("\n")[0]
            return f"Invalid SPARQL query error:\n{error_msg}"
        else:
            return "Error: Invalid SPARQL query information is incomplete."

    def get_message(self, for_html: bool = True) -> str:
        """
        get the filtered message
        """
        filtered_msg = self.filtered_message
        if filtered_msg:
            filtered_msg = filtered_msg.replace("\\n", "\n")
            if for_html:
                filtered_msg = filtered_msg.replace("\n", "<br>\n")
        return filtered_msg

`categorize_error()`

Categorizes the error message into predefined types.

Returns:

Name	Type	Description
`str`	`str`	The category of the error message.

Source code in snapquery/error_filter.py

def categorize_error(self) -> str:
    """
    Categorizes the error message into predefined types.

    Returns:
        str: The category of the error message.
    """
    if self.raw_error_message is None:
        return None

    lower_error_msg = self.raw_error_message.lower()
    # Todo: query is often part of the error message when these keywords are used within the query the classification fails.
    if (
        "query timeout after" in lower_error_msg
        or "timeoutexception" in lower_error_msg
        or "query has timed out" in lower_error_msg
        or "http error 504" in lower_error_msg
    ):
        return "Timeout"
    elif (
        "syntax error" in lower_error_msg
        or "invalid sparql query" in lower_error_msg
        or "querybadformed" in lower_error_msg
    ):
        return "Syntax Error"
    elif "connection error" in lower_error_msg:
        return "Connection Error"
    elif "access denied" in lower_error_msg:
        return "Authorization Error"
    elif (
        "service unavailable" in lower_error_msg
        or "service temporarily unavailable" in lower_error_msg
        or "http error 503" in lower_error_msg
    ):
        return "Service Unavailable"
    elif "too many requests" in lower_error_msg or "http error 429" in lower_error_msg:
        return "Too Many Requests"
    elif "bad gateway" in lower_error_msg or "http error 502" in lower_error_msg:
        return "Bad Gateway"
    elif "endpointinternalerror" in lower_error_msg:
        return "EndPointInternalError"
    else:
        return "Other"

`get_message(for_html=True)`

get the filtered message

Source code in snapquery/error_filter.py

def get_message(self, for_html: bool = True) -> str:
    """
    get the filtered message
    """
    filtered_msg = self.filtered_message
    if filtered_msg:
        filtered_msg = filtered_msg.replace("\\n", "\n")
        if for_html:
            filtered_msg = filtered_msg.replace("\n", "<br>\n")
    return filtered_msg

`execution`

Created on 2024-07-09

@author: wf

`Execution`

supports execution of named queries

Source code in snapquery/execution.py

class Execution:
    """
    supports execution of named queries
    """

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """ """
        self.nqm = nqm
        self.debug = debug
        self.logger = logging.getLogger("snapquery.execution.Execution")

    def parameterize(self, nq: NamedQuery):
        qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
        # Execute the query
        params_dict = {}
        if qd.params == "q":
            # use Tim Berners-Lee as a example
            params_dict = {"q": "Q80"}
            pass
        return qd, params_dict

    def execute(
        self,
        nq: NamedQuery,
        endpoint_name: str,
        title: str,
        context: str = "test",
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ):
        """
        execute the given named query
        """
        qd, params_dict = self.parameterize(nq)
        self.logger.debug(f"{title}: {nq.name} {qd} - via {endpoint_name}")
        _results, stats = self.nqm.execute_query(
            nq, params_dict=params_dict, endpoint_name=endpoint_name, prefix_merger=prefix_merger
        )
        stats.context = context
        self.nqm.store_stats([stats])
        msg = f"{title} executed:"
        if not stats.records:
            msg += f"error {stats.filtered_msg}"
        else:
            msg += f"{stats.records} records found"
        self.logger.debug(msg)

`init(nqm, debug=False)`

Source code in snapquery/execution.py

def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """ """
    self.nqm = nqm
    self.debug = debug
    self.logger = logging.getLogger("snapquery.execution.Execution")

`execute(nq, endpoint_name, title, context='test', prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)`

execute the given named query

Source code in snapquery/execution.py

def execute(
    self,
    nq: NamedQuery,
    endpoint_name: str,
    title: str,
    context: str = "test",
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
):
    """
    execute the given named query
    """
    qd, params_dict = self.parameterize(nq)
    self.logger.debug(f"{title}: {nq.name} {qd} - via {endpoint_name}")
    _results, stats = self.nqm.execute_query(
        nq, params_dict=params_dict, endpoint_name=endpoint_name, prefix_merger=prefix_merger
    )
    stats.context = context
    self.nqm.store_stats([stats])
    msg = f"{title} executed:"
    if not stats.records:
        msg += f"error {stats.filtered_msg}"
    else:
        msg += f"{stats.records} records found"
    self.logger.debug(msg)

`graph`

Created on 27.06.2024

@author: wf

`Graph`

A class representing a graph with its basic properties.

Source code in snapquery/graph.py

@lod_storable
class Graph:
    """
    A class representing a graph with its basic properties.
    """

    name: str
    default_endpoint_name: str
    description: str
    url: str
    comment: str = ""

    def __post_init__(self):
        """
        Perform post-initialization processing if needed.
        """
        pass

    @classmethod
    def get_samples(cls) -> dict[str, "Graph"]:
        """
        get samples for Graph
        """
        samples = {
            "graphs": [
                cls(
                    name="wikidata",
                    default_endpoint_name="wikidata",
                    description="Wikidata knowledge graph",
                    url="https://query.wikidata.org/sparql",
                    comment="Main Wikidata endpoint",
                ),
                cls(
                    name="dblp",
                    default_endpoint_name="dblp",
                    description="DBLP computer science bibliography",
                    url="https://qlever.cs.uni-freiburg.de/api/dblp",
                    comment="DBLP endpoint powered by QLever",
                ),
            ]
        }
        return samples

`__post_init__()`

Perform post-initialization processing if needed.

Source code in snapquery/graph.py

def __post_init__(self):
    """
    Perform post-initialization processing if needed.
    """
    pass

`get_samples()` `classmethod`

get samples for Graph

Source code in snapquery/graph.py

@classmethod
def get_samples(cls) -> dict[str, "Graph"]:
    """
    get samples for Graph
    """
    samples = {
        "graphs": [
            cls(
                name="wikidata",
                default_endpoint_name="wikidata",
                description="Wikidata knowledge graph",
                url="https://query.wikidata.org/sparql",
                comment="Main Wikidata endpoint",
            ),
            cls(
                name="dblp",
                default_endpoint_name="dblp",
                description="DBLP computer science bibliography",
                url="https://qlever.cs.uni-freiburg.de/api/dblp",
                comment="DBLP endpoint powered by QLever",
            ),
        ]
    }
    return samples

`GraphManager`

Manages the storage and retrieval of Graph configurations.

Source code in snapquery/graph.py

@lod_storable
class GraphManager:
    """
    Manages the storage and retrieval of
    Graph configurations.
    """

    graphs: Dict[str, Graph] = field(default_factory=dict)

    @classmethod
    def get_yaml_path(cls) -> str:
        samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        yaml_path = os.path.join(samples_path, "graphs.yaml")
        return yaml_path

    def get_graph(self, name: str) -> Graph:
        """
        Retrieve a graph by name.
        """
        return self.graphs.get(name)

    def __len__(self):
        return len(self.graphs)

    def __iter__(self):
        return iter(self.graphs.values())

`get_graph(name)`

Retrieve a graph by name.

Source code in snapquery/graph.py

def get_graph(self, name: str) -> Graph:
    """
    Retrieve a graph by name.
    """
    return self.graphs.get(name)

`models`

`person`

Created 2023 refactored to snapquery by WF 2024-05

@author: th

`Affiliation` `dataclass`

affiliation of a person

Source code in snapquery/models/person.py

@dataclass
class Affiliation:
    """
    affiliation of a person
    """
    name: Optional[str] = None
    location: Optional[str] = None
    country: Optional[str] = None
    wikidata_id: Optional[str] = None

    @property
    def ui_label(self) -> str:
        if not self.name:
            return "❓"  # empty
        else:
            return self.name

`Person` `dataclass`

Bases: PersonName

A person

Source code in snapquery/models/person.py

@dataclass
class Person(PersonName):
    """
    A person
    """

    wikidata_id: Optional[str] = None
    dblp_author_id: Optional[str] = None
    orcid_id: Optional[str] = None
    image: Optional[str] = None
    affiliation: Optional[List[Affiliation]] = None
    official_website: Optional[str] = None

    @property
    def has_pid(self) -> bool:
        """
        Checks if the scholar has any persistent identifier (PID) set.
        """
        return any([self.wikidata_id, self.dblp_author_id, self.orcid_id])

    def share_identifier(self, other: "Person") -> bool:
        """
        Check if the given person shares an identifier with this person.
        Args:

            other: another person

        Returns:
            true if the person shares an identifier, false otherwise
        """
        share_id = False
        if self.wikidata_id is not None and other.wikidata_id == self.wikidata_id:
            share_id = True
        elif self.dblp_author_id is not None and other.dblp_author_id == self.dblp_author_id:
            share_id = True
        elif self.orcid_id is not None and other.orcid_id == self.orcid_id:
            share_id = True
        return share_id

    def merge_with(self, other: "Person"):
        """
        Merge this person with another person.
        Args:
            other: person to merge with
        """
        for field in fields(self):
            value = getattr(self, field.name)
            if value is None:
                value = getattr(other, field.name)
            setattr(self, field.name, value)

`has_pid: bool` `property`

Checks if the scholar has any persistent identifier (PID) set.

`merge_with(other)`

Merge this person with another person. Args: other: person to merge with

Source code in snapquery/models/person.py

def merge_with(self, other: "Person"):
    """
    Merge this person with another person.
    Args:
        other: person to merge with
    """
    for field in fields(self):
        value = getattr(self, field.name)
        if value is None:
            value = getattr(other, field.name)
        setattr(self, field.name, value)

`share_identifier(other)`

Check if the given person shares an identifier with this person. Args:

other: another person

Returns:

Type	Description
`bool`	true if the person shares an identifier, false otherwise

Source code in snapquery/models/person.py

def share_identifier(self, other: "Person") -> bool:
    """
    Check if the given person shares an identifier with this person.
    Args:

        other: another person

    Returns:
        true if the person shares an identifier, false otherwise
    """
    share_id = False
    if self.wikidata_id is not None and other.wikidata_id == self.wikidata_id:
        share_id = True
    elif self.dblp_author_id is not None and other.dblp_author_id == self.dblp_author_id:
        share_id = True
    elif self.orcid_id is not None and other.orcid_id == self.orcid_id:
        share_id = True
    return share_id

`PersonName` `dataclass`

person name handling

Source code in snapquery/models/person.py

@dataclass
class PersonName:    
    """
    person name handling
    """    
    label: Optional[str] = None
    given_name: Optional[str] = None
    family_name: Optional[str] = None

    @property
    def name(self) -> str:
        if not self.given_name and not self.family_name:
            return "❓"  # empty
        elif not self.given_name:
            return self.family_name
        elif not self.family_name:
            return self.given_name
        else:
            return f"{self.given_name} {self.family_name}"

    @property
    def ui_label(self) -> str:
        return self.name

    def parse_label(self):
        """
        get family name and given name from label
        """
        if self.label:
            human_name=HumanName(self.label)
            if not self.family_name and human_name.last:
                self.family_name=human_name.last
            if not self.given_name and human_name.first:
                self.given_name=human_name.first

`parse_label()`

get family name and given name from label

Source code in snapquery/models/person.py

def parse_label(self):
    """
    get family name and given name from label
    """
    if self.label:
        human_name=HumanName(self.label)
        if not self.family_name and human_name.last:
            self.family_name=human_name.last
        if not self.given_name and human_name.first:
            self.given_name=human_name.first

`mwlogin`

Created on 04.05.2024

@author: wf

`Login`

login to mediawiki

Source code in snapquery/mwlogin.py

class Login:
    """
    login to mediawiki
    """

    def __init__(
        self,
        consumer_key,
        consumer_secret,
        wiki_url="https://en.wikipedia.org/w/index.php",
    ):
        self.consumer_token = ConsumerToken(consumer_key, consumer_secret)
        self.handshaker = Handshaker(wiki_url, self.consumer_token)
        self.request_token = None
        self.access_token = None

    def initiate_login(self):
        """
        Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user
        """
        redirect, self.request_token = self.handshaker.initiate()
        webbrowser.open(redirect)
        print("Browser opened to MediaWiki login page. Please authorize the application.")

    def complete_login(self, response_qs):
        """
        Step 3: Complete -- obtain authorized key/secret for "resource owner"
        """
        self.access_token = self.handshaker.complete(self.request_token, response_qs)
        print("Login completed successfully.")

    def identify_user(self):
        """
        Step 4: Identify -- (optional) get identifying information about the user
        """
        if self.access_token:
            identity = self.handshaker.identify(self.access_token)
            print(f"Identified as {identity['username']}.")
        else:
            print("Access token is not available. Please complete the login process first.")

`complete_login(response_qs)`

Step 3: Complete -- obtain authorized key/secret for "resource owner"

Source code in snapquery/mwlogin.py

def complete_login(self, response_qs):
    """
    Step 3: Complete -- obtain authorized key/secret for "resource owner"
    """
    self.access_token = self.handshaker.complete(self.request_token, response_qs)
    print("Login completed successfully.")

`identify_user()`

Step 4: Identify -- (optional) get identifying information about the user

Source code in snapquery/mwlogin.py

def identify_user(self):
    """
    Step 4: Identify -- (optional) get identifying information about the user
    """
    if self.access_token:
        identity = self.handshaker.identify(self.access_token)
        print(f"Identified as {identity['username']}.")
    else:
        print("Access token is not available. Please complete the login process first.")

`initiate_login()`

Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user

Source code in snapquery/mwlogin.py

def initiate_login(self):
    """
    Step 1: Initialize -- ask MediaWiki for a temporary key/secret for user
    """
    redirect, self.request_token = self.handshaker.initiate()
    webbrowser.open(redirect)
    print("Browser opened to MediaWiki login page. Please authorize the application.")

`namespace_stats_view`

Created on 2024-06-23

@author: wf

`NamespaceStatsView`

Class to view and manage SPARQL query statistics using NiceGUI.

Attributes:

Name	Type	Description
`solution`	`WebSolution`	The web solution context which provides access to shared resources.
`nqm`	`NamedQueryManager`	The manager to handle named queries and database interactions.
`results_row`	`row`	UI component that holds the results grid.
`lod_grid`	`ListOfDictsGrid`	Grid component to display the query statistics.

Source code in snapquery/namespace_stats_view.py

class NamespaceStatsView:
    """Class to view and manage SPARQL query statistics using NiceGUI.

    Attributes:
        solution (WebSolution): The web solution context which provides access to shared resources.
        nqm (NamedQueryManager): The manager to handle named queries and database interactions.
        results_row (ui.row): UI component that holds the results grid.
        lod_grid (ListOfDictsGrid): Grid component to display the query statistics.
    """

    def __init__(self, solution: WebSolution):
        """Initialize the NamespaceStatsView with a given web solution context.

        Args:
            solution (WebSolution): The web solution context which includes shared resources like the NamedQueryManager.
        """
        self.solution = solution
        self.nqm = self.solution.nqm
        self.progress_bar: Optional[NiceguiProgressbar] = None
        self.lod_grid: Optional[ListOfDictsGrid] = None
        self.setup_ui()

    def setup_ui(self):
        """Sets up the user interface for displaying SPARQL query statistics."""
        with ui.row() as self.progress_row:
            self.progress_bar = NiceguiProgressbar(desc="Query Progress", total=100, unit="queries")
            self.progress_bar.progress.classes("rounded")
        with ui.row() as self.results_row:
            ui.label("Legend: ✅ Distinct Successful Queries  ❌ Distinct Failed Queries  🔄 Total Successful Runs")
            self.lod_grid = ListOfDictsGrid()
            # Set up a click event handler for the grid
            self.lod_grid.ag_grid.on("cellClicked", self.on_cell_clicked)

        # Fetch and display data immediately upon UI setup
        ui.timer(0.0, self.on_fetch_lod, once=True)

    async def on_cell_clicked(self, event):
        """Handle cell click events to perform specific actions based on the cell content."""
        # Retrieve details from the event object
        logger.debug(f"Cell clicked: {event}")
        row_data = event.args["data"]
        endpoint_name = event.args["colId"]
        namespace = row_data["namespace"]
        domain = row_data["domain"]
        if endpoint_name in self.nqm.endpoints.keys():
            if self.solution.webserver.authenticated():
                await run.io_bound(
                    self.execute_queries,
                    namespace=namespace,
                    endpoint_name=endpoint_name,
                    domain=domain,
                )
            else:
                ui.notify("you must be admin to run queries via the web interface")
        else:
            # this should not be possible
            ui.notify(f"invalid endpoint {endpoint_name}")

    async def on_fetch_lod(self, _args=None):
        """Fetches data asynchronously and loads it into the grid upon successful retrieval."""
        try:
            stats_lod = await run.io_bound(self.fetch_query_lod)
            processed_lod = self.process_stats_lod(stats_lod)
            with self.results_row:
                self.lod_grid.load_lod(processed_lod)
                self.lod_grid.update()
        except Exception as ex:
            self.solution.handle_exception(ex)

    def fetch_query_lod(self) -> List[Dict[str, any]]:
        """Fetch data from the database based on the named query 'query_success_by_namespace'.

        Returns:
            List[Dict[str, any]]: A list of dictionaries containing the query results.
        """
        query_name = "query_namespace_endpoint_matrix_with_distinct"
        query = self.nqm.meta_qm.queriesByName[query_name]
        return self.nqm.sql_db.query(query.query)

    def process_stats_lod(self, raw_lod: List[Dict[str, any]]) -> List[Dict[str, any]]:
        """Process the raw list of dictionaries to format suitable for the grid display.

        Args:
            raw_lod (List[Dict[str, any]]): The raw data fetched from the SQL query.

        Returns:
            List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.
        """
        domain_namespace_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
        endpoints = list(self.nqm.endpoints.keys())
        total_queries = {}

        for entry in raw_lod:
            domain = entry["domain"]
            namespace = entry["namespace"]
            endpoint = entry["endpoint_name"]
            distinct_successful = entry.get("distinct_successful", 0)
            distinct_failed = entry.get("distinct_failed", 0)
            success_count = entry["success_count"]
            total_queries[(domain, namespace)] = entry["total"]
            domain_namespace_stats[domain][namespace][endpoint] = [
                distinct_successful,
                distinct_failed,
                success_count,
            ]

        processed_lod = []
        for domain, namespaces in domain_namespace_stats.items():
            for namespace, counts in namespaces.items():
                row = {
                    "domain": domain,
                    "namespace": namespace,
                    "total": total_queries[(domain, namespace)],
                }
                for endpoint in endpoints:
                    success, fail, total = counts.get(endpoint, [0, 0, 0])
                    if success == 0 and fail == 0 and total == 0:
                        row[endpoint] = ""
                    else:
                        row[endpoint] = f"✅{success} ❌{fail} 🔄{total}"
                processed_lod.append(row)

        return processed_lod

    def execute_queries(self, namespace: str, endpoint_name: str, domain: str):
        """execute queries with progress updates.
        Args:
            namespace (str): The namespace of the queries to execute.
            endpoint_name (str): The endpoint name where the queries will be executed.
            domain: domain name
        """
        queries = self.nqm.get_all_queries(namespace=namespace, domain=domain)
        total_queries = len(queries)

        self.progress_bar.total = total_queries
        self.progress_bar.reset()
        execution = Execution(self.nqm)
        for i, nq in enumerate(queries, start=1):
            with self.progress_row:
                self.progress_bar.update_value(i)
                self.progress_bar.set_description(f"Executing {nq.name} on {endpoint_name}")
                logger.debug(f"Executing {nq.name} on {endpoint_name}")
            execution.execute(nq, endpoint_name, title=f"query {i}/{len(queries)}::{endpoint_name}", context="web-test")
        with self.progress_row:
            ui.timer(0.1, self.on_fetch_lod, once=True)
            ui.notify(
                f"finished {total_queries} queries for namespace: {namespace} with domain: {domain}",
                type="positive",
            )

`init(solution)`

Initialize the NamespaceStatsView with a given web solution context.

Parameters:

Name	Type	Description	Default
`solution`	`WebSolution`	The web solution context which includes shared resources like the NamedQueryManager.	required

Source code in snapquery/namespace_stats_view.py

def __init__(self, solution: WebSolution):
    """Initialize the NamespaceStatsView with a given web solution context.

    Args:
        solution (WebSolution): The web solution context which includes shared resources like the NamedQueryManager.
    """
    self.solution = solution
    self.nqm = self.solution.nqm
    self.progress_bar: Optional[NiceguiProgressbar] = None
    self.lod_grid: Optional[ListOfDictsGrid] = None
    self.setup_ui()

`execute_queries(namespace, endpoint_name, domain)`

execute queries with progress updates. Args: namespace (str): The namespace of the queries to execute. endpoint_name (str): The endpoint name where the queries will be executed. domain: domain name

Source code in snapquery/namespace_stats_view.py

def execute_queries(self, namespace: str, endpoint_name: str, domain: str):
    """execute queries with progress updates.
    Args:
        namespace (str): The namespace of the queries to execute.
        endpoint_name (str): The endpoint name where the queries will be executed.
        domain: domain name
    """
    queries = self.nqm.get_all_queries(namespace=namespace, domain=domain)
    total_queries = len(queries)

    self.progress_bar.total = total_queries
    self.progress_bar.reset()
    execution = Execution(self.nqm)
    for i, nq in enumerate(queries, start=1):
        with self.progress_row:
            self.progress_bar.update_value(i)
            self.progress_bar.set_description(f"Executing {nq.name} on {endpoint_name}")
            logger.debug(f"Executing {nq.name} on {endpoint_name}")
        execution.execute(nq, endpoint_name, title=f"query {i}/{len(queries)}::{endpoint_name}", context="web-test")
    with self.progress_row:
        ui.timer(0.1, self.on_fetch_lod, once=True)
        ui.notify(
            f"finished {total_queries} queries for namespace: {namespace} with domain: {domain}",
            type="positive",
        )

`fetch_query_lod()`

Fetch data from the database based on the named query 'query_success_by_namespace'.

Returns:

Type	Description
`List[Dict[str, any]]`	List[Dict[str, any]]: A list of dictionaries containing the query results.

Source code in snapquery/namespace_stats_view.py

def fetch_query_lod(self) -> List[Dict[str, any]]:
    """Fetch data from the database based on the named query 'query_success_by_namespace'.

    Returns:
        List[Dict[str, any]]: A list of dictionaries containing the query results.
    """
    query_name = "query_namespace_endpoint_matrix_with_distinct"
    query = self.nqm.meta_qm.queriesByName[query_name]
    return self.nqm.sql_db.query(query.query)

`on_cell_clicked(event)` `async`

Handle cell click events to perform specific actions based on the cell content.

Source code in snapquery/namespace_stats_view.py

async def on_cell_clicked(self, event):
    """Handle cell click events to perform specific actions based on the cell content."""
    # Retrieve details from the event object
    logger.debug(f"Cell clicked: {event}")
    row_data = event.args["data"]
    endpoint_name = event.args["colId"]
    namespace = row_data["namespace"]
    domain = row_data["domain"]
    if endpoint_name in self.nqm.endpoints.keys():
        if self.solution.webserver.authenticated():
            await run.io_bound(
                self.execute_queries,
                namespace=namespace,
                endpoint_name=endpoint_name,
                domain=domain,
            )
        else:
            ui.notify("you must be admin to run queries via the web interface")
    else:
        # this should not be possible
        ui.notify(f"invalid endpoint {endpoint_name}")

`on_fetch_lod(_args=None)` `async`

Fetches data asynchronously and loads it into the grid upon successful retrieval.

Source code in snapquery/namespace_stats_view.py

async def on_fetch_lod(self, _args=None):
    """Fetches data asynchronously and loads it into the grid upon successful retrieval."""
    try:
        stats_lod = await run.io_bound(self.fetch_query_lod)
        processed_lod = self.process_stats_lod(stats_lod)
        with self.results_row:
            self.lod_grid.load_lod(processed_lod)
            self.lod_grid.update()
    except Exception as ex:
        self.solution.handle_exception(ex)

`process_stats_lod(raw_lod)`

Process the raw list of dictionaries to format suitable for the grid display.

Parameters:

Name	Type	Description	Default
`raw_lod`	`List[Dict[str, any]]`	The raw data fetched from the SQL query.	required

Returns:

Type	Description
`List[Dict[str, any]]`	List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.

Source code in snapquery/namespace_stats_view.py

def process_stats_lod(self, raw_lod: List[Dict[str, any]]) -> List[Dict[str, any]]:
    """Process the raw list of dictionaries to format suitable for the grid display.

    Args:
        raw_lod (List[Dict[str, any]]): The raw data fetched from the SQL query.

    Returns:
        List[Dict[str, any]]: The processed list of dictionaries formatted for grid display.
    """
    domain_namespace_stats = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: [0, 0, 0])))
    endpoints = list(self.nqm.endpoints.keys())
    total_queries = {}

    for entry in raw_lod:
        domain = entry["domain"]
        namespace = entry["namespace"]
        endpoint = entry["endpoint_name"]
        distinct_successful = entry.get("distinct_successful", 0)
        distinct_failed = entry.get("distinct_failed", 0)
        success_count = entry["success_count"]
        total_queries[(domain, namespace)] = entry["total"]
        domain_namespace_stats[domain][namespace][endpoint] = [
            distinct_successful,
            distinct_failed,
            success_count,
        ]

    processed_lod = []
    for domain, namespaces in domain_namespace_stats.items():
        for namespace, counts in namespaces.items():
            row = {
                "domain": domain,
                "namespace": namespace,
                "total": total_queries[(domain, namespace)],
            }
            for endpoint in endpoints:
                success, fail, total = counts.get(endpoint, [0, 0, 0])
                if success == 0 and fail == 0 and total == 0:
                    row[endpoint] = ""
                else:
                    row[endpoint] = f"✅{success} ❌{fail} 🔄{total}"
            processed_lod.append(row)

    return processed_lod

`setup_ui()`

Sets up the user interface for displaying SPARQL query statistics.

Source code in snapquery/namespace_stats_view.py

def setup_ui(self):
    """Sets up the user interface for displaying SPARQL query statistics."""
    with ui.row() as self.progress_row:
        self.progress_bar = NiceguiProgressbar(desc="Query Progress", total=100, unit="queries")
        self.progress_bar.progress.classes("rounded")
    with ui.row() as self.results_row:
        ui.label("Legend: ✅ Distinct Successful Queries  ❌ Distinct Failed Queries  🔄 Total Successful Runs")
        self.lod_grid = ListOfDictsGrid()
        # Set up a click event handler for the grid
        self.lod_grid.ag_grid.on("cellClicked", self.on_cell_clicked)

    # Fetch and display data immediately upon UI setup
    ui.timer(0.0, self.on_fetch_lod, once=True)

`orcid`

`OrcidAccessToken`

orcid access token response

Source code in snapquery/orcid.py

@lod_storable
class OrcidAccessToken:
    """
    orcid access token response
    """

    orcid: str
    access_token: str
    token_type: str
    refresh_token: str
    expires_in: int
    scope: str
    name: str
    login_timestamp: int = int(time())

    @classmethod
    def get_samples(cls):
        lod = [
            {
                "access_token": "f5af9f51-07e6-4332-8f1a-c0c11c1e3728",
                "token_type": "bearer",
                "refresh_token": "f725f747-3a65-49f6-a231-3e8944ce464d",
                "expires_in": 631138518,
                "scope": "/activities/update /read-limited",
                "name": "Sofia Garcia",
                "orcid": "0000-0001-2345-6789",
            }
        ]
        return [OrcidAccessToken.from_dict2(d) for d in lod]

`OrcidAuth`

authenticate with orcid

Source code in snapquery/orcid.py

class OrcidAuth:
    """
    authenticate with orcid
    """

    def __init__(
        self,
        base_path: Optional[Path] = None,
        config_file_name: str = "orcid_config.yaml",
    ):
        if base_path is None:
            base_path = Path.home() / ".solutions/snapquery"
        self.base_path = base_path
        self.config_file_name = config_file_name
        self.config = self.load_config()

    def get_config_path(self) -> Path:
        return self.base_path / self.config_file_name

    def config_exists(self):
        return self.get_config_path().exists()

    def available(self) -> bool:
        return self.config is not None

    def load_config(self) -> Union["OrcidConfig", None]:
        if not self.config_exists():
            return None
        config = OrcidConfig.load_from_yaml_file(str(self.get_config_path()))
        return config

    def authenticate_url(self):
        return self.config.authenticate_url()

    def authenticated(self) -> bool:
        authenticated = False
        if not self.available():
            return authenticated
        orcid_token = self.get_cached_user_access_token()
        if orcid_token is not None:
            authenticated = self._check_access_token(orcid_token)
        return authenticated

    def get_cached_user_access_token(self) -> Union["OrcidAccessToken", None]:
        orcid_token_record = app.storage.user.get("orcid_token", None)
        orcid_token = None
        if orcid_token_record:
            orcid_token: OrcidAccessToken = OrcidAccessToken.from_dict2(orcid_token_record)
        return orcid_token

    def _check_access_token(self, orcid_token: "OrcidAccessToken") -> bool:
        """
        Check if the given access token is valid
        Args:
            orcid_token: orcid access token

        Returns:
            True if the access token is valid, False otherwise
        """
        time_passed = int(time()) - orcid_token.login_timestamp
        if orcid_token.expires_in - time_passed < 0:
            return False
        else:
            return True

    def login(self, access_code: str) -> bool:
        authenticated = False
        try:
            orcid_token = self._retrieve_token(access_code)
            app.storage.user.update({"orcid_token": asdict(orcid_token)})
            authenticated = True
        except Exception as e:
            print(e)
            raise e
        return authenticated

    def _retrieve_token(self, code: str) -> "OrcidAccessToken":
        """
        URL=https://sandbox.orcid.org/oauth/token
         HEADER: Accept: application/json
         HEADER: Content-Type: application/x-www-form-urlencoded
         METHOD: POST
         DATA:
           client_id=[Your client ID]
           client_secret=[Your client secret]
           grant_type=authorization_code
           code=Six-digit code
           redirect_uri=[Your landing page]
        """
        url = f"{self.config.url}/oauth/token"
        data = {
            "client_id": self.config.client_id,
            "client_secret": self.config.client_secret,
            "grant_type": "authorization_code",
            "code": code,
        }
        resp = requests.post(url, data=data)
        resp.raise_for_status()
        resp_json = resp.json()
        orcid_token: OrcidAccessToken = OrcidAccessToken.from_dict2(resp_json)
        return orcid_token

    def logout(self):
        """
        logout user by deleting cached access token
        """
        del app.storage.user["orcid_token"]

    def _request_search_token(self) -> str:
        """
        Request search token
        see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/

        URL=https://sandbox.orcid.org/oauth/token
          HEADER: Accept: application/json
          METHOD: POST
          DATA:
            client_id=[Your public API client ID]
            client_secret=[Your public API secret]
            grant_type=client_credentials
            scope=/read-public
        Returns:

        """
        url = f"{self.config.url}/oauth/token"
        data = {
            "client_id": self.config.client_id,
            "client_secret": self.config.client_secret,
            "grant_type": "client_credentials",
            "scope": "/read-public",
        }
        resp = requests.post(url, data=data)
        resp.raise_for_status()
        resp_json = resp.json()
        return resp_json["access_token"]

    @property
    def search_token(self) -> str:
        if self.config.search_token is None:
            search_token = self._request_search_token()
            self.config.search_token = search_token
            self.store_config()
        return self.config.search_token

    def store_config(self):
        self.config.save_to_yaml_file(str(self.get_config_path()))

    def search(self, params: "OrcidSearchParams", limit: int = 10) -> list[Person]:
        access_token = self.search_token
        url = f"{self.config.api_endpoint}/expanded-search/?q={params.get_search_query()}&rows={limit}"
        headers = {
            "Accept": "application/json",
            "Authorization": f"Bearer {access_token}",
        }
        resp = requests.get(url, headers=headers)
        resp.raise_for_status()
        records: list[dict] = resp.json().get("expanded-result", [])
        persons = []
        if records:
            for record in records:
                person = Person(
                    given_name=record.get("given-names", None),
                    family_name=record.get("family-names", None),
                    orcid_id=record.get("orcid-id", None),
                )
                persons.append(person)
        return persons

`logout()`

logout user by deleting cached access token

Source code in snapquery/orcid.py

def logout(self):
    """
    logout user by deleting cached access token
    """
    del app.storage.user["orcid_token"]

`OrcidConfig`

orcid authentication configuration

Source code in snapquery/orcid.py

@lod_storable
class OrcidConfig:
    """
    orcid authentication configuration
    """

    url: str
    client_id: str
    client_secret: str
    redirect_uri: str = "http://127.0.0.1:9862/orcid_callback"
    api_endpoint: str = "https://pub.orcid.org/v3.0"
    search_token: Optional[str] = None

    @classmethod
    def get_samples(cls) -> list["OrcidConfig"]:
        lod = [
            {
                "url": "https://orcid.org",
                "client_id": "APP-123456789ABCDEFG",
                "client_secret": "<KEY>",
                "redirect_uri": "http://127.0.0.1:9862/orcid_callback",
                "api_endpoint": "https://sandbox.orcid.org/v3.0",
            }
        ]
        return [OrcidConfig.from_dict2(d) for d in lod]

    def authenticate_url(self):
        return f"{self.url}/oauth/authorize?client_id={self.client_id}&response_type=code&scope=/authenticate&redirect_uri={self.redirect_uri}"

`OrcidSearchParams` `dataclass`

Orcid search api params see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/

Source code in snapquery/orcid.py

@dataclass
class OrcidSearchParams:
    """
    Orcid search api params
    see https://info.orcid.org/documentation/api-tutorials/api-tutorial-searching-the-orcid-registry/
    """

    # Biographical data
    given_names: Optional[str] = None
    family_name: Optional[str] = None
    credit_name: Optional[str] = None
    other_names: Optional[list[str]] = None
    email: Optional[str] = None
    keyword: Optional[list[str]] = None
    external_id_reference: Optional[str] = None

    # Affiliations data
    affiliation_org_name: Optional[str] = None
    grid_org_id: Optional[str] = None
    ror_org_id: Optional[str] = None
    ringgold_org_id: Optional[str] = None

    # Funding data
    funding_titles: Optional[list[str]] = None
    fundref_org_id: Optional[str] = None
    grant_numbers: Optional[list[str]] = None

    # Research activities data
    work_titles: Optional[list[str]] = None
    digital_object_ids: Optional[list[str]] = None

    # ORCID record data
    orcid: Optional[str] = None
    profile_submission_date: Optional[str] = None  # Assuming date format is string
    profile_last_modified_date: Optional[str] = None  # Assuming date format is string

    # All data (default for Lucene syntax)
    text: Optional[str] = None

    def get_search_query(self) -> str:
        query = ""
        dlim = ""
        for field in fields(self):
            key = field.name.replace("_", "-")
            value = getattr(self, field.name)
            if value is None:
                continue
            query += f"{key}:{value}"
            dlim = "+"
        return query

`params_view`

Created on 06.05.2024

@author: wf

`ParamsView`

a view for Query Parameters

Source code in snapquery/params_view.py

class ParamsView:
    """
    a view for Query Parameters
    """

    def __init__(self, solution, params: Params):
        """
        construct me with the given solution and params
        """
        self.solution = solution
        self.params = params

    def open(self):
        """
        show the details of the dict edit
        """
        self.dict_edit.expansion.open()

    def close(self):
        """
        hide the details of the dict edit
        """
        self.dict_edit.expansion.close()

    def get_dict_edit(self) -> DictEdit:
        """
        Return a DictEdit instance for editing parameters.
        """
        # Define a custom form definition for the title "Params"
        form_ui_def = FormUiDef(
            title="Params",
            icon="tune",
            ui_fields={key: FieldUiDef.from_key_value(key, value) for key, value in self.params.params_dict.items()},
        )
        self.dict_edit = DictEdit(data_to_edit=self.params.params_dict, form_ui_def=form_ui_def)
        self.open()
        return self.dict_edit

`init(solution, params)`

construct me with the given solution and params

Source code in snapquery/params_view.py

def __init__(self, solution, params: Params):
    """
    construct me with the given solution and params
    """
    self.solution = solution
    self.params = params

`close()`

hide the details of the dict edit

Source code in snapquery/params_view.py

def close(self):
    """
    hide the details of the dict edit
    """
    self.dict_edit.expansion.close()

`get_dict_edit()`

Return a DictEdit instance for editing parameters.

Source code in snapquery/params_view.py

def get_dict_edit(self) -> DictEdit:
    """
    Return a DictEdit instance for editing parameters.
    """
    # Define a custom form definition for the title "Params"
    form_ui_def = FormUiDef(
        title="Params",
        icon="tune",
        ui_fields={key: FieldUiDef.from_key_value(key, value) for key, value in self.params.params_dict.items()},
    )
    self.dict_edit = DictEdit(data_to_edit=self.params.params_dict, form_ui_def=form_ui_def)
    self.open()
    return self.dict_edit

`open()`

show the details of the dict edit

Source code in snapquery/params_view.py

def open(self):
    """
    show the details of the dict edit
    """
    self.dict_edit.expansion.open()

`person_selector`

Created 2023

@author: th

`PersonSelector`

Provides an interface for searching and selecting people with auto-suggestions.

Source code in snapquery/person_selector.py

class PersonSelector:
    """
    Provides an interface for searching and selecting people with auto-suggestions.
    """

    def __init__(
        self,
        solution: WebSolution,
        selection_callback: Callable[[Person], Any],
        limit: int = 10,
    ):
        """
        Constructor
        """
        # parameters
        self.solution = solution
        self.selection_callback = selection_callback
        self.limit = limit
        # instance variables
        self.suggested_persons: List[Person] = []
        self.selected_person: Optional[Person] = None
        self.suggestion_view: Optional[ui.element] = None
        self.search_name = ""
        self.person_lookup = PersonLookup(nqm=solution.webserver.nqm)
        self.selection_btn: Optional[Button] = None
        self.debouncer_ui = DebouncerUI(parent=self.solution.container, debug=True)
        self.person_selection()

    @ui.refreshable
    def person_selection(self):
        """
        Display input fields for person data with auto-suggestion
        """
        person = self.selected_person if self.selected_person else Person()
        with ui.element("row").classes("w-full h-full"):
            with ui.splitter().classes("h-full  w-full") as splitter:
                with splitter.before:
                    with ui.row() as self.top_row:
                        pass
                    with ui.card() as self.selection_card:
                        with ui.row():
                            self.label = ui.label("Name or Pid:")
                        with ui.row():
                            self.name_input = ui.input(
                                label="name",
                                placeholder="Tim Berners-Lee",
                                on_change=self.suggest_persons,
                                value=self.search_name,
                            ).props("size=60")
                        with ui.row():
                            self.identifier_input = ui.input(
                                label="PID",
                                placeholder="Q80",
                                on_change=self.check_pid,
                                value=person.wikidata_id,
                            ).props("size=20")
                        # if self.selection_btn is None:
                        self.selection_btn = ui.button(text="Continue", on_click=self.btn_selection_callback)
                        self.selection_btn.disable()
            with splitter.after:
                with ui.element("column").classes(" w-full h-full gap-2"):
                    self.suggestion_view = ui.column().classes("rounded-md border-2 p-3")

    async def btn_selection_callback(self):
        person = Person()
        pid_value = PIDs().pid4id(self.identifier_input.value)
        if pid_value.pid.name == "Wikidata":
            person.wikidata_id = self.identifier_input.value
        elif pid_value.pid.name == "dblp":
            person.dblp_id = self.identifier_input.value
        elif pid_value.pid.name == "ORCID":
            person.orcid_id = self.identifier_input.value
        person.label = self.name_input.value
        self.selection_callback(person)

    async def check_pid(self):
        pid = PIDs().pid4id(self.identifier_input.value)
        if pid is not None and pid.is_valid() and self.selection_btn is not None:
            self.selection_btn.enable()
        elif self.selection_btn:
            self.selection_btn.disable()

    def clear_suggested_persons(self):
        self.suggested_persons = []
        self.update_suggestions_view()

    async def suggest_persons(self):
        """
        Use debouncer to
        suggest potential persons based on the input.
        """
        await self.debouncer_ui.debounce(self.load_person_suggestions, self.name_input.value)

    async def load_person_suggestions(self, search_name: str):
        """
        Load person suggestions based on the search name.
        This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

        Args:
            search_name(str): the search name to search for
        """
        if len(search_name) < 4:  # Skip querying for very short input strings.
            return
        try:
            self.clear_suggested_persons()
            tasks = [
                asyncio.to_thread(self.person_lookup.suggest_from_wikidata, search_name, self.limit),
                asyncio.to_thread(self.person_lookup.suggest_from_orcid, search_name, self.limit),
                asyncio.to_thread(self.person_lookup.suggest_from_dblp, search_name, self.limit),
            ]
            for future in asyncio.as_completed(tasks):
                new_persons = await future
                self.merge_and_update_suggestions(new_persons)
                self.update_suggestions_view()
        except Exception as ex:
            self.solution.handle_exception(ex)

    def merge_and_update_suggestions(self, new_persons: List[Person]):
        """
        Merges new persons with existing ones based on shared identifiers or adds them if unique.
        Ensures no duplicates are present in the list of suggested persons.

        Args:
            new_persons (List[Person]): New person suggestions to be added or merged.
        """
        for new_person in new_persons:
            merged = False
            for existing_person in self.suggested_persons:
                if existing_person.share_identifier(new_person):
                    existing_person.merge_with(new_person)
                    merged = True
                    break
            if not merged:
                self.suggested_persons.append(new_person)

    def update_suggestions_view(self):
        """
        update the suggestions view
        """
        if self.suggestion_view:
            self.suggestion_view.clear()
            with self.suggestion_view:
                with ui.list().props("bordered separator"):
                    ui.item_label("Suggestions").props("header").classes("text-bold")
                    ui.separator()
                    for person in self.suggested_persons[: self.limit]:
                        PersonSuggestion(person=person, on_select=self.selection_callback)

                    if len(self.suggested_persons) > self.limit:
                        with ui.item():
                            ui.label(
                                f"{'>' if len(self.suggested_persons) >= 10000 else ''}{len(self.suggested_persons)} matches are available..."
                            )
            return []

    def select_person_suggestion(self, person: Person):
        """
        Select the given Person by updating the input fields to the selected person and storing the object internally
        Args:
            person: person that should be selected
        """
        self.selected_person = person
        self.person_selection.refresh()
        self.suggested_persons = [person]
        self.update_suggestions_list()

`init(solution, selection_callback, limit=10)`

Constructor

Source code in snapquery/person_selector.py

def __init__(
    self,
    solution: WebSolution,
    selection_callback: Callable[[Person], Any],
    limit: int = 10,
):
    """
    Constructor
    """
    # parameters
    self.solution = solution
    self.selection_callback = selection_callback
    self.limit = limit
    # instance variables
    self.suggested_persons: List[Person] = []
    self.selected_person: Optional[Person] = None
    self.suggestion_view: Optional[ui.element] = None
    self.search_name = ""
    self.person_lookup = PersonLookup(nqm=solution.webserver.nqm)
    self.selection_btn: Optional[Button] = None
    self.debouncer_ui = DebouncerUI(parent=self.solution.container, debug=True)
    self.person_selection()

`load_person_suggestions(search_name)` `async`

Load person suggestions based on the search name. This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

Parameters:

Name	Type	Description	Default
`search_name(str)`		the search name to search for	required

Source code in snapquery/person_selector.py

async def load_person_suggestions(self, search_name: str):
    """
    Load person suggestions based on the search name.
    This method fetches data concurrently from multiple sources and updates suggestions as they arrive.

    Args:
        search_name(str): the search name to search for
    """
    if len(search_name) < 4:  # Skip querying for very short input strings.
        return
    try:
        self.clear_suggested_persons()
        tasks = [
            asyncio.to_thread(self.person_lookup.suggest_from_wikidata, search_name, self.limit),
            asyncio.to_thread(self.person_lookup.suggest_from_orcid, search_name, self.limit),
            asyncio.to_thread(self.person_lookup.suggest_from_dblp, search_name, self.limit),
        ]
        for future in asyncio.as_completed(tasks):
            new_persons = await future
            self.merge_and_update_suggestions(new_persons)
            self.update_suggestions_view()
    except Exception as ex:
        self.solution.handle_exception(ex)

`merge_and_update_suggestions(new_persons)`

Merges new persons with existing ones based on shared identifiers or adds them if unique. Ensures no duplicates are present in the list of suggested persons.

Parameters:

Name	Type	Description	Default
`new_persons`	`List[Person]`	New person suggestions to be added or merged.	required

Source code in snapquery/person_selector.py

def merge_and_update_suggestions(self, new_persons: List[Person]):
    """
    Merges new persons with existing ones based on shared identifiers or adds them if unique.
    Ensures no duplicates are present in the list of suggested persons.

    Args:
        new_persons (List[Person]): New person suggestions to be added or merged.
    """
    for new_person in new_persons:
        merged = False
        for existing_person in self.suggested_persons:
            if existing_person.share_identifier(new_person):
                existing_person.merge_with(new_person)
                merged = True
                break
        if not merged:
            self.suggested_persons.append(new_person)

`person_selection()`

Display input fields for person data with auto-suggestion

Source code in snapquery/person_selector.py

@ui.refreshable
def person_selection(self):
    """
    Display input fields for person data with auto-suggestion
    """
    person = self.selected_person if self.selected_person else Person()
    with ui.element("row").classes("w-full h-full"):
        with ui.splitter().classes("h-full  w-full") as splitter:
            with splitter.before:
                with ui.row() as self.top_row:
                    pass
                with ui.card() as self.selection_card:
                    with ui.row():
                        self.label = ui.label("Name or Pid:")
                    with ui.row():
                        self.name_input = ui.input(
                            label="name",
                            placeholder="Tim Berners-Lee",
                            on_change=self.suggest_persons,
                            value=self.search_name,
                        ).props("size=60")
                    with ui.row():
                        self.identifier_input = ui.input(
                            label="PID",
                            placeholder="Q80",
                            on_change=self.check_pid,
                            value=person.wikidata_id,
                        ).props("size=20")
                    # if self.selection_btn is None:
                    self.selection_btn = ui.button(text="Continue", on_click=self.btn_selection_callback)
                    self.selection_btn.disable()
        with splitter.after:
            with ui.element("column").classes(" w-full h-full gap-2"):
                self.suggestion_view = ui.column().classes("rounded-md border-2 p-3")

`select_person_suggestion(person)`

Select the given Person by updating the input fields to the selected person and storing the object internally Args: person: person that should be selected

Source code in snapquery/person_selector.py

def select_person_suggestion(self, person: Person):
    """
    Select the given Person by updating the input fields to the selected person and storing the object internally
    Args:
        person: person that should be selected
    """
    self.selected_person = person
    self.person_selection.refresh()
    self.suggested_persons = [person]
    self.update_suggestions_list()

`suggest_persons()` `async`

Use debouncer to suggest potential persons based on the input.

Source code in snapquery/person_selector.py

async def suggest_persons(self):
    """
    Use debouncer to
    suggest potential persons based on the input.
    """
    await self.debouncer_ui.debounce(self.load_person_suggestions, self.name_input.value)

`update_suggestions_view()`

update the suggestions view

Source code in snapquery/person_selector.py

def update_suggestions_view(self):
    """
    update the suggestions view
    """
    if self.suggestion_view:
        self.suggestion_view.clear()
        with self.suggestion_view:
            with ui.list().props("bordered separator"):
                ui.item_label("Suggestions").props("header").classes("text-bold")
                ui.separator()
                for person in self.suggested_persons[: self.limit]:
                    PersonSuggestion(person=person, on_select=self.selection_callback)

                if len(self.suggested_persons) > self.limit:
                    with ui.item():
                        ui.label(
                            f"{'>' if len(self.suggested_persons) >= 10000 else ''}{len(self.suggested_persons)} matches are available..."
                        )
        return []

`PersonSuggestion`

Bases: PersonView

Display a Person

Source code in snapquery/person_selector.py

class PersonSuggestion(PersonView):
    """
    Display a Person
    """

    def __init__(self, person: Person, on_select: Callable[[Person], Any]):
        super().__init__(person=person)
        self._on_select_callback = on_select
        self.person_card.on_click(self.on_select)

    def on_select(self):
        """
        Handle selection of the suggestion card
        """
        return self._on_select_callback(self.person)

`on_select()`

Handle selection of the suggestion card

Source code in snapquery/person_selector.py

def on_select(self):
    """
    Handle selection of the suggestion card
    """
    return self._on_select_callback(self.person)

`PersonView`

Bases: Element

Display a person

Source code in snapquery/person_selector.py

class PersonView(Element):
    """
    Display a person
    """

    def __init__(self, person: Person):
        self.pids = PIDs()
        self.pid_values = self._create_pid_values(person)
        super().__init__(tag="div")
        self.person = person
        with self:
            with ui.item() as self.person_card:
                with ui.item_section().props("avatar"):
                    with ui.avatar():
                        if person.image:
                            ui.image(source=person.image)
                with ui.item_section():
                    with ui.row():
                        self.person_label = ui.label(self.person.label)
                    with ui.row():
                        self.person_name = ui.label(f"{self.person.given_name} {self.person.family_name}")
                    with ui.row():
                        self._show_identifier()

    def _create_pid_values(self, person: Person) -> List[PIDValue]:
        """
        Create PIDValue instances for the person's identifiers
        """
        pid_values = []
        for pid_key, pid in self.pids.pids.items():
            attr = f"{pid_key}_id"
            pid_value = getattr(person, attr, None)
            if pid_value:
                pid_values.append(PIDValue(pid=pid, value=pid_value))
        return pid_values

    def _show_identifier(self):
        """
        Display all identifiers of the person
        """
        for pid_value in self.pid_values:
            with ui.element("div"):
                ui.avatar(
                    icon=f"img:{pid_value.pid.logo}",
                    color=None,
                    size="sm",
                    square=True,
                )
                ui.link(
                    text=pid_value.value,
                    target=pid_value.url,
                    new_tab=True,
                )

`pid`

Created on 2024-05-26 @author: wf

`PID` `dataclass`

A persistent identifier source e.g. ORCID, dblpID or wikidata id

Source code in snapquery/pid.py

@dataclass
class PID:
    """
    A persistent identifier source e.g. ORCID, dblpID or wikidata id
    """

    name: str
    logo: str
    formatter_url: str
    regex: str

`PIDValue` `dataclass`

Represents a specific instance of a persistent identifier with its value.

Source code in snapquery/pid.py

@dataclass
class PIDValue:
    """
    Represents a specific instance of a persistent identifier with its value.
    """

    pid: PID
    value: str

    @property
    def url(self) -> str:
        return self.pid.formatter_url.format(self.value)

    @property
    def html(self) -> str:
        return f'<a href="{self.url}"><img src="{self.pid.logo}" alt="{self.pid.name} logo"> {self.value}</a>'

    def is_valid(self) -> bool:
        return re.match(self.pid.regex, self.value) is not None

`PIDs`

Available PIDs

Source code in snapquery/pid.py

class PIDs:
    """
    Available PIDs
    """

    def __init__(self):
        self.pids = {
            "orcid": PID(
                name="ORCID",
                logo="https://orcid.org/sites/default/files/images/orcid_16x16.png",
                formatter_url="https://orcid.org/{}",
                regex=r"^\d{4}-\d{4}-\d{4}-\d{3}[0-9X]$",
            ),
            "dblp": PID(
                name="dblp",
                logo="https://dblp.org/img/dblp-icon-64x64.png",
                formatter_url="https://dblp.org/pid/{}",
                regex=r"^[a-z0-9/]+$",
            ),
            "wikidata": PID(
                name="Wikidata",
                logo="https://www.wikidata.org/static/favicon/wikidata.ico",
                formatter_url="https://www.wikidata.org/wiki/{}",
                regex=r"^Q[0-9]+$",
            ),
        }

    def pid4id(self, identifier: str) -> Optional[PIDValue]:
        """
        Create a PIDValue instance based on the identifier type.
        """
        for _key, pid in self.pids.items():
            if re.match(pid.regex, identifier):
                return PIDValue(pid=pid, value=identifier)
        return None

`pid4id(identifier)`

Create a PIDValue instance based on the identifier type.

Source code in snapquery/pid.py

def pid4id(self, identifier: str) -> Optional[PIDValue]:
    """
    Create a PIDValue instance based on the identifier type.
    """
    for _key, pid in self.pids.items():
        if re.match(pid.regex, identifier):
            return PIDValue(pid=pid, value=identifier)
    return None

`pid_lookup`

Created on 2024-05-26 @author: wf

`PersonLookup`

Lookup potential persons from various databases such as Wikidata, ORCID, and DBLP.

Source code in snapquery/pid_lookup.py

class PersonLookup:
    """
    Lookup potential persons from various
    databases such as Wikidata, ORCID, and DBLP.
    """

    def __init__(self, nqm: NamedQueryManager):
        """
        Initialize the PersonLookup with a Named Query Manager.

        Args:
            nqm (NamedQueryManager): The named query manager to execute SPARQL queries.
        """
        self.pids = PIDs()
        self.nqm = nqm
        self.wikidata_search = WikidataSearch()
        self.dblp_person_lookup = DblpPersonLookup(self.nqm)

    def suggest_from_wikidata(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using WikidataSearch.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from Wikidata.
        """
        persons = []
        suggestions = self.wikidata_search.searchOptions(search_name, limit=limit)
        qid_list = ""
        delim = ""
        for qid, _plabel, _pdesc in suggestions:
            qid_list += f"{delim}wd:{qid}"
            delim = " "
        named_query = NamedQuery(
            domain="wikidata.org",
            namespace="pid-lookup",
            name="person-by-qid",
            title="Lookup persons with the given qids",
            description="based on a pre-search with wikidata search select persons",
            sparql="""# snapquery person lookup 
SELECT *
WHERE 
{
  VALUES ?scholar {
    {{ qid_list }}
  } 
  ?scholar wdt:P31 wd:Q5 .
  ?scholar wdt:P735 ?given_name_qid .
  ?given_name_qid rdfs:label ?given_name .
  ?scholar wdt:P734 ?family_name_qid .
  ?family_name_qid rdfs:label ?family_name .
  OPTIONAL{{ ?scholar rdfs:label ?label FILTER(lang(?label) = "en") }}.
  OPTIONAL{{?scholar wdt:P2456 ?dblp_author_id .}}
  OPTIONAL{{?scholar wdt:P496 ?orcid_id . }}
  OPTIONAL{{?scholar wdt:P18 ?image . }}
  FILTER(lang(?given_name) = "en")
  FILTER(lang(?family_name) = "en")
}
            """,
        )
        params_dict = {"qid_list": qid_list}
        person_lod, stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            limit=limit,
            with_stats=False,
        )
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                given_name=pr.get("given_name"),
                family_name=pr.get("family_name"),
                wikidata_id=pr.get("scholar").split("/")[-1],
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
                image=pr.get("image"),
            )
            persons.append(person)

        return persons

    def suggest_from_orcid(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using the ORCID registry search.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from ORCID.
        """
        orcid = OrcidAuth()
        persons = []
        if orcid.available():
            persons = orcid.search(OrcidSearchParams(family_name=search_name), limit=limit)
        return persons

    def suggest_from_dblp(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using DBLP author search.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from DBLP.
        """
        persons = self.dblp_person_lookup.search(name_part=search_name, limit=limit)
        return persons

`init(nqm)`

Initialize the PersonLookup with a Named Query Manager.

Parameters:

Name	Type	Description	Default
`nqm`	`NamedQueryManager`	The named query manager to execute SPARQL queries.	required

Source code in snapquery/pid_lookup.py

def __init__(self, nqm: NamedQueryManager):
    """
    Initialize the PersonLookup with a Named Query Manager.

    Args:
        nqm (NamedQueryManager): The named query manager to execute SPARQL queries.
    """
    self.pids = PIDs()
    self.nqm = nqm
    self.wikidata_search = WikidataSearch()
    self.dblp_person_lookup = DblpPersonLookup(self.nqm)

`suggest_from_dblp(search_name, limit=10)`

Suggest persons using DBLP author search.

Parameters:

Name	Type	Description	Default
`search_name`	`str`	The name to search for suggestions.	required
`limit`	`int`	The maximum number of results to return.	`10`

Returns:

Type	Description
`List[Person]`	List[Person]: A list of suggested persons from DBLP.

Source code in snapquery/pid_lookup.py

def suggest_from_dblp(self, search_name: str, limit: int = 10) -> List[Person]:
    """
    Suggest persons using DBLP author search.

    Args:
        search_name (str): The name to search for suggestions.
        limit (int): The maximum number of results to return.

    Returns:
        List[Person]: A list of suggested persons from DBLP.
    """
    persons = self.dblp_person_lookup.search(name_part=search_name, limit=limit)
    return persons

`suggest_from_orcid(search_name, limit=10)`

Suggest persons using the ORCID registry search.

Parameters:

Name	Type	Description	Default
`search_name`	`str`	The name to search for suggestions.	required
`limit`	`int`	The maximum number of results to return.	`10`

Returns:

Type	Description
`List[Person]`	List[Person]: A list of suggested persons from ORCID.

Source code in snapquery/pid_lookup.py

def suggest_from_orcid(self, search_name: str, limit: int = 10) -> List[Person]:
    """
    Suggest persons using the ORCID registry search.

    Args:
        search_name (str): The name to search for suggestions.
        limit (int): The maximum number of results to return.

    Returns:
        List[Person]: A list of suggested persons from ORCID.
    """
    orcid = OrcidAuth()
    persons = []
    if orcid.available():
        persons = orcid.search(OrcidSearchParams(family_name=search_name), limit=limit)
    return persons

`suggest_from_wikidata(search_name, limit=10)`

Suggest persons using WikidataSearch.

Parameters:

Name	Type	Description	Default
`search_name`	`str`	The name to search for suggestions.	required
`limit`	`int`	The maximum number of results to return.	`10`

Returns:

Type	Description
`List[Person]`	List[Person]: A list of suggested persons from Wikidata.

Source code in snapquery/pid_lookup.py

    def suggest_from_wikidata(self, search_name: str, limit: int = 10) -> List[Person]:
        """
        Suggest persons using WikidataSearch.

        Args:
            search_name (str): The name to search for suggestions.
            limit (int): The maximum number of results to return.

        Returns:
            List[Person]: A list of suggested persons from Wikidata.
        """
        persons = []
        suggestions = self.wikidata_search.searchOptions(search_name, limit=limit)
        qid_list = ""
        delim = ""
        for qid, _plabel, _pdesc in suggestions:
            qid_list += f"{delim}wd:{qid}"
            delim = " "
        named_query = NamedQuery(
            domain="wikidata.org",
            namespace="pid-lookup",
            name="person-by-qid",
            title="Lookup persons with the given qids",
            description="based on a pre-search with wikidata search select persons",
            sparql="""# snapquery person lookup 
SELECT *
WHERE 
{
  VALUES ?scholar {
    {{ qid_list }}
  } 
  ?scholar wdt:P31 wd:Q5 .
  ?scholar wdt:P735 ?given_name_qid .
  ?given_name_qid rdfs:label ?given_name .
  ?scholar wdt:P734 ?family_name_qid .
  ?family_name_qid rdfs:label ?family_name .
  OPTIONAL{{ ?scholar rdfs:label ?label FILTER(lang(?label) = "en") }}.
  OPTIONAL{{?scholar wdt:P2456 ?dblp_author_id .}}
  OPTIONAL{{?scholar wdt:P496 ?orcid_id . }}
  OPTIONAL{{?scholar wdt:P18 ?image . }}
  FILTER(lang(?given_name) = "en")
  FILTER(lang(?family_name) = "en")
}
            """,
        )
        params_dict = {"qid_list": qid_list}
        person_lod, stats = self.nqm.execute_query(
            named_query=named_query,
            params_dict=params_dict,
            limit=limit,
            with_stats=False,
        )
        for pr in person_lod:
            person = Person(
                label=pr.get("label"),
                given_name=pr.get("given_name"),
                family_name=pr.get("family_name"),
                wikidata_id=pr.get("scholar").split("/")[-1],
                dblp_author_id=pr.get("dblp_author_id"),
                orcid_id=pr.get("orcid_id"),
                image=pr.get("image"),
            )
            persons.append(person)

        return persons

`qimport`

Created on 2024-05-05

@author: wf

`QueryImport`

Import named queries from a given URL or file.

Source code in snapquery/qimport.py

class QueryImport:
    """
    Import named queries from a given URL or file.
    """

    def __init__(self, nqm: NamedQueryManager = None):
        """
        Constructor

        Args:
            nqm (NamedQueryManager, optional): The NamedQueryManager to use for storing queries.
        """
        self.nqm = nqm
        pass

    def import_samples(self, with_store: bool = True, show_progress: bool = False):
        """
        import all sample json files

        Args:
            with_store(bool): if True store the result
            show_progress(bool): if True show a tqdm progress bar
        """
        for json_file in glob.glob(os.path.join(self.nqm.samples_path, "*.json")):
            try:
                nq_list = self.import_from_json_file(json_file, with_store, show_progress)
            except Exception as ex:
                print(f"could not load json_file {json_file}")
                raise ex
            if "ceur" in json_file:
                json_file_name = os.path.basename(json_file)
                output_path = os.path.join("/tmp", json_file_name)
                nq_list.save_to_json_file(output_path, indent=2)
                pass

    def import_from_json_file(
        self, json_file: str, with_store: bool = False, show_progress: bool = False
    ) -> NamedQuerySet:
        """
        Import named queries from a JSON file.

        Args:
            json_file (str): Path to the JSON file.
            with_store (bool): If True, store the results in the NamedQueryManager.
            show_progress (bool): If True, show a progress bar during the import.

        Returns:
            NamedQuerySet: A NamedQuerySet object containing the imported NamedQuery objects.
        """
        nq_set = NamedQuerySet.load_from_json_file(json_file)
        iterable = (
            tqdm(
                nq_set.queries,
                desc=f"Importing Namespace {nq_set.namespace}@{nq_set.domain}",
            )
            if show_progress
            else nq_set.queries
        )

        for nq in iterable:
            if not nq.sparql:
                if nq.url and nq.url.startswith("https://w.wiki/"):
                    short_url = ShortUrl(nq.url)
                    nq.sparql = short_url.read_query()
                else:
                    raise Exception(f"invalid named query with no url: {nq}")
                    # what now?
                    continue
            if with_store and self.nqm:
                self.nqm.add_and_store(nq)
        return nq_set

`init(nqm=None)`

Constructor

Parameters:

Name	Type	Description	Default
`nqm`	`NamedQueryManager`	The NamedQueryManager to use for storing queries.	`None`

Source code in snapquery/qimport.py

def __init__(self, nqm: NamedQueryManager = None):
    """
    Constructor

    Args:
        nqm (NamedQueryManager, optional): The NamedQueryManager to use for storing queries.
    """
    self.nqm = nqm
    pass

`import_from_json_file(json_file, with_store=False, show_progress=False)`

Import named queries from a JSON file.

Parameters:

Name	Type	Description	Default
`json_file`	`str`	Path to the JSON file.	required
`with_store`	`bool`	If True, store the results in the NamedQueryManager.	`False`
`show_progress`	`bool`	If True, show a progress bar during the import.	`False`

Returns:

Name	Type	Description
`NamedQuerySet`	`NamedQuerySet`	A NamedQuerySet object containing the imported NamedQuery objects.

Source code in snapquery/qimport.py

def import_from_json_file(
    self, json_file: str, with_store: bool = False, show_progress: bool = False
) -> NamedQuerySet:
    """
    Import named queries from a JSON file.

    Args:
        json_file (str): Path to the JSON file.
        with_store (bool): If True, store the results in the NamedQueryManager.
        show_progress (bool): If True, show a progress bar during the import.

    Returns:
        NamedQuerySet: A NamedQuerySet object containing the imported NamedQuery objects.
    """
    nq_set = NamedQuerySet.load_from_json_file(json_file)
    iterable = (
        tqdm(
            nq_set.queries,
            desc=f"Importing Namespace {nq_set.namespace}@{nq_set.domain}",
        )
        if show_progress
        else nq_set.queries
    )

    for nq in iterable:
        if not nq.sparql:
            if nq.url and nq.url.startswith("https://w.wiki/"):
                short_url = ShortUrl(nq.url)
                nq.sparql = short_url.read_query()
            else:
                raise Exception(f"invalid named query with no url: {nq}")
                # what now?
                continue
        if with_store and self.nqm:
            self.nqm.add_and_store(nq)
    return nq_set

`import_samples(with_store=True, show_progress=False)`

import all sample json files

Parameters:

Name	Type	Description	Default
`with_store(bool)`		if True store the result	required
`show_progress(bool)`		if True show a tqdm progress bar	required

Source code in snapquery/qimport.py

def import_samples(self, with_store: bool = True, show_progress: bool = False):
    """
    import all sample json files

    Args:
        with_store(bool): if True store the result
        show_progress(bool): if True show a tqdm progress bar
    """
    for json_file in glob.glob(os.path.join(self.nqm.samples_path, "*.json")):
        try:
            nq_list = self.import_from_json_file(json_file, with_store, show_progress)
        except Exception as ex:
            print(f"could not load json_file {json_file}")
            raise ex
        if "ceur" in json_file:
            json_file_name = os.path.basename(json_file)
            output_path = os.path.join("/tmp", json_file_name)
            nq_list.save_to_json_file(output_path, indent=2)
            pass

`qimport_view`

Created on 2024-05-05

@author: wf

`QueryImportView`

display Query Import UI

Source code in snapquery/qimport_view.py

class QueryImportView:
    """
    display Query Import UI
    """

    def __init__(
        self,
        solution=None,
        person: Optional[Person] = None,
        allow_importing_from_url: bool = True,
    ):
        self.person = person
        self.solution = solution
        self.allow_importing_from_url = allow_importing_from_url
        self.namespace = ""
        self.name = ""
        self.url = ""
        self.title = ""
        self.description = ""
        self.comment = ""
        self.query = None
        if self.solution:
            self.qimport = QueryImport()
            self.nqm = self.solution.nqm
            self.setup_ui()

    def setup_ui(self):
        """
        setup the user interface
        """
        with self.solution.container:
            with ui.row() as self.input_row:
                self.input_row.classes("h-full")
                ui.input(label="namespace", placeholder="e.g. wikidata-examples").bind_value(self, "namespace")
                with ui.input(label="name", placeholder="e.g. all proceedings of CEUR-WS").bind_value(self, "name"):
                    ui.tooltip("short name for query; needs to be unique within the namespace")
                ui.input(label="url", placeholder="e.g. short url to the query").props("size=80").bind_value(
                    self, "url"
                )
                if self.allow_importing_from_url:
                    ui.button(icon="input", text="Import Query", on_click=self.on_input_button)
                ui.button(icon="publish", text="Publish Query", on_click=self.on_import_button)
                with ui.input(label="title").props("size=80").bind_value(self, "title"):
                    ui.tooltip("Descriptive title of the query")
            self.query_row = ui.row().classes("w-full h-full flex ")
            with self.query_row:
                ui.textarea(label="query").bind_value(self, "query").classes(
                    "w-full h-full resize min-h-80 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
            with ui.row() as self.details_row:
                self.details_row.classes("flex")
                ui.textarea(label="description").bind_value(self, "description").classes(
                    "w-1/2 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
                ui.textarea(label="comment").bind_value(self, "comment").classes(
                    "w-2/5 border-solid m-5 border-gray-dark border-2 rounded-md"
                )
                self.named_query_link = ui.html()

    def on_import_button(self, _args):
        """
        import a query
        """
        if self.query is None:
            with self.query_row:
                ui.notify("input a query first")
            return
        if self.person:
            self.comment = f"[query nominated by {self.person}] {self.comment}"
        nq_record = {
            "namespace": self.namespace,
            "name": self.name,
            "title": self.title,
            "url": self.url,
            "description": self.description,
            "comment": self.comment,
            "sparql": self.query.query if isinstance(self.query, Query) else self.query,
        }
        nq = NamedQuery.from_record(nq_record)
        self.nqm.add_and_store(nq)
        with self.query_row:
            ui.notify(f"added named query {self.name}")
            self.named_query_link.content = nq.as_link()
        self.clear_inputs()

    def clear_inputs(self):
        self.query = None
        self.name = None
        self.url = None
        self.title = None
        self.description = None
        self.comment = None

    def on_input_button(self, _args):
        """
        imput a query
        """
        self.query_row.clear()
        with self.query_row:
            ui.notify(f"importing named query from {self.url}")
            sparql_query = self.qimport.read_from_short_url(self.url)
            self.query = Query(name=self.name, title=self.title, lang="sparql", query=sparql_query)
            query_syntax_highlight = QuerySyntaxHighlight(self.query)
            syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
            ui.add_css(syntax_highlight_css)
            ui.html(query_syntax_highlight.highlight())

`on_import_button(_args)`

import a query

Source code in snapquery/qimport_view.py

def on_import_button(self, _args):
    """
    import a query
    """
    if self.query is None:
        with self.query_row:
            ui.notify("input a query first")
        return
    if self.person:
        self.comment = f"[query nominated by {self.person}] {self.comment}"
    nq_record = {
        "namespace": self.namespace,
        "name": self.name,
        "title": self.title,
        "url": self.url,
        "description": self.description,
        "comment": self.comment,
        "sparql": self.query.query if isinstance(self.query, Query) else self.query,
    }
    nq = NamedQuery.from_record(nq_record)
    self.nqm.add_and_store(nq)
    with self.query_row:
        ui.notify(f"added named query {self.name}")
        self.named_query_link.content = nq.as_link()
    self.clear_inputs()

`on_input_button(_args)`

imput a query

Source code in snapquery/qimport_view.py

def on_input_button(self, _args):
    """
    imput a query
    """
    self.query_row.clear()
    with self.query_row:
        ui.notify(f"importing named query from {self.url}")
        sparql_query = self.qimport.read_from_short_url(self.url)
        self.query = Query(name=self.name, title=self.title, lang="sparql", query=sparql_query)
        query_syntax_highlight = QuerySyntaxHighlight(self.query)
        syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
        ui.add_css(syntax_highlight_css)
        ui.html(query_syntax_highlight.highlight())

`setup_ui()`

setup the user interface

Source code in snapquery/qimport_view.py

def setup_ui(self):
    """
    setup the user interface
    """
    with self.solution.container:
        with ui.row() as self.input_row:
            self.input_row.classes("h-full")
            ui.input(label="namespace", placeholder="e.g. wikidata-examples").bind_value(self, "namespace")
            with ui.input(label="name", placeholder="e.g. all proceedings of CEUR-WS").bind_value(self, "name"):
                ui.tooltip("short name for query; needs to be unique within the namespace")
            ui.input(label="url", placeholder="e.g. short url to the query").props("size=80").bind_value(
                self, "url"
            )
            if self.allow_importing_from_url:
                ui.button(icon="input", text="Import Query", on_click=self.on_input_button)
            ui.button(icon="publish", text="Publish Query", on_click=self.on_import_button)
            with ui.input(label="title").props("size=80").bind_value(self, "title"):
                ui.tooltip("Descriptive title of the query")
        self.query_row = ui.row().classes("w-full h-full flex ")
        with self.query_row:
            ui.textarea(label="query").bind_value(self, "query").classes(
                "w-full h-full resize min-h-80 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
        with ui.row() as self.details_row:
            self.details_row.classes("flex")
            ui.textarea(label="description").bind_value(self, "description").classes(
                "w-1/2 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
            ui.textarea(label="comment").bind_value(self, "comment").classes(
                "w-2/5 border-solid m-5 border-gray-dark border-2 rounded-md"
            )
            self.named_query_link = ui.html()

`qlever`

Created on 2024-06-20

@author: wf

`QLever`

handle https://github.com/ad-freiburg/qlever specifics

Source code in snapquery/qlever.py

class QLever:
    """
    handle https://github.com/ad-freiburg/qlever specifics
    """

    def __init__(self, with_progress=True):
        self.url = "https://github.com/ad-freiburg/qlever"
        self.with_progress = with_progress
        # Regex pattern to find URLs starting with the specified prefix
        self.wd_url_pattern = re.compile(r"https://qlever\.cs\.uni-freiburg\.de/wikidata/[A-Za-z0-9]+")
        self.osproject = OsProject.fromUrl(self.url)

    def wd_urls_for_ticket(self, ticket: Ticket) -> List[str]:
        """
        Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.
        """
        extracted_urls = []

        # Extract URLs from the ticket body
        if ticket.body:
            found_urls = self.wd_url_pattern.findall(ticket.body)
            extracted_urls.extend(found_urls)

        # Fetch and extract URLs from comments
        comments = self.osproject.ticketSystem.getComments(self.osproject, ticket.number)
        for comment in comments:
            found_urls = self.wd_url_pattern.findall(comment["body"])
            extracted_urls.extend(found_urls)

        return extracted_urls

    def named_queries_for_tickets(self, ticket_dict):
        """
        Create named queries for each ticket's extracted URLs.

        Args:
            ticket_dict (dict): Dictionary mapping tickets to a list of URLs.

        Returns:
            NamedQuerySet: A set of named queries generated from the URLs.
        """
        named_query_set = NamedQuerySet(
            domain="qlever.cs.uni-freiburg.de",
            namespace="issues.wikidata",
            target_graph_name="wikidata",
        )
        for ticket, urls in ticket_dict.items():
            for i, url in enumerate(urls, 1):
                # Assuming URLs are like 'https://qlever.cs.uni-freiburg.de/wikidata/iTzJwQ'
                # Customizing ShortUrl instance for QLever specific URLs
                short_url_handler = QLeverUrl(url)
                short_url_handler.read_query()
                if short_url_handler.sparql:
                    # Example placeholder logic to create a NamedQuery for each URL
                    query = NamedQuery(
                        domain=named_query_set.domain,
                        name=f"Issue{ticket.number}-query{i}",
                        namespace=named_query_set.namespace,
                        url=url,
                        sparql=short_url_handler.sparql,
                        title=f"QLever github issue #{ticket.number}-query{i}",
                        description=ticket.title,
                        comment=f"See ticket {ticket.url} and query {url}",
                    )
                    named_query_set.queries.append(query)
        return named_query_set

`named_queries_for_tickets(ticket_dict)`

Create named queries for each ticket's extracted URLs.

Parameters:

Name	Type	Description	Default
`ticket_dict`	`dict`	Dictionary mapping tickets to a list of URLs.	required

Returns:

Name	Type	Description
`NamedQuerySet`		A set of named queries generated from the URLs.

Source code in snapquery/qlever.py

def named_queries_for_tickets(self, ticket_dict):
    """
    Create named queries for each ticket's extracted URLs.

    Args:
        ticket_dict (dict): Dictionary mapping tickets to a list of URLs.

    Returns:
        NamedQuerySet: A set of named queries generated from the URLs.
    """
    named_query_set = NamedQuerySet(
        domain="qlever.cs.uni-freiburg.de",
        namespace="issues.wikidata",
        target_graph_name="wikidata",
    )
    for ticket, urls in ticket_dict.items():
        for i, url in enumerate(urls, 1):
            # Assuming URLs are like 'https://qlever.cs.uni-freiburg.de/wikidata/iTzJwQ'
            # Customizing ShortUrl instance for QLever specific URLs
            short_url_handler = QLeverUrl(url)
            short_url_handler.read_query()
            if short_url_handler.sparql:
                # Example placeholder logic to create a NamedQuery for each URL
                query = NamedQuery(
                    domain=named_query_set.domain,
                    name=f"Issue{ticket.number}-query{i}",
                    namespace=named_query_set.namespace,
                    url=url,
                    sparql=short_url_handler.sparql,
                    title=f"QLever github issue #{ticket.number}-query{i}",
                    description=ticket.title,
                    comment=f"See ticket {ticket.url} and query {url}",
                )
                named_query_set.queries.append(query)
    return named_query_set

`wd_urls_for_ticket(ticket)`

Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.

Source code in snapquery/qlever.py

def wd_urls_for_ticket(self, ticket: Ticket) -> List[str]:
    """
    Extracts and returns all URLs from a ticket's body and comments that match the specified pattern.
    """
    extracted_urls = []

    # Extract URLs from the ticket body
    if ticket.body:
        found_urls = self.wd_url_pattern.findall(ticket.body)
        extracted_urls.extend(found_urls)

    # Fetch and extract URLs from comments
    comments = self.osproject.ticketSystem.getComments(self.osproject, ticket.number)
    for comment in comments:
        found_urls = self.wd_url_pattern.findall(comment["body"])
        extracted_urls.extend(found_urls)

    return extracted_urls

`QLeverUrl`

Bases: ShortUrl

Handles operations related to QLever short URLs.

Source code in snapquery/qlever.py

class QLeverUrl(ShortUrl):
    """
    Handles operations related to QLever short URLs.
    """

    def __init__(self, short_url: str):
        super().__init__(short_url, scheme="https", netloc="qlever.cs.uni-freiburg.de")

    def read_query(self) -> str:
        """
        Read a query from a QLever short URL.

        Returns:
            str: The SPARQL query extracted from the short URL.
        """
        self.fetch_final_url()
        if self.url:
            try:
                response = requests.get(self.url)
                response.raise_for_status()
                soup = BeautifulSoup(response.content, "html.parser")
                query_element = soup.find("textarea", {"id": "query"})
                if query_element and query_element.text:
                    self.sparql = query_element.text.strip()
            except Exception as ex:
                self.error = ex
        return self.sparql

`read_query()`

Read a query from a QLever short URL.

Returns:

Name	Type	Description
`str`	`str`	The SPARQL query extracted from the short URL.

Source code in snapquery/qlever.py

def read_query(self) -> str:
    """
    Read a query from a QLever short URL.

    Returns:
        str: The SPARQL query extracted from the short URL.
    """
    self.fetch_final_url()
    if self.url:
        try:
            response = requests.get(self.url)
            response.raise_for_status()
            soup = BeautifulSoup(response.content, "html.parser")
            query_element = soup.find("textarea", {"id": "query"})
            if query_element and query_element.text:
                self.sparql = query_element.text.strip()
        except Exception as ex:
            self.error = ex
    return self.sparql

`query_annotate`

Created on 2024-05-15

@author: tholzheim

`NamespaceStat`

contains namespace information

Source code in snapquery/query_annotate.py

@lod_storable
class NamespaceStat:
    """
    contains namespace information
    """

    name: str
    count: int = 0

`SparqlQueryAnnotater`

Annotate a query

Source code in snapquery/query_annotate.py

class SparqlQueryAnnotater:
    """
    Annotate a query
    """

    def __init__(self, query: Query):
        self.query = query
        query_syntax_highlight = QuerySyntaxHighlight(query)
        html = query_syntax_highlight.highlight()
        self.soup = BeautifulSoup(html, "html.parser")
        self.stats = QUERY_ITEM_STATS

    def get_used_properties(self):
        prefix_element = self.soup.find_all("span", {"class": "nn"})
        properties = []
        for element in prefix_element:
            item = element.next_sibling.next_sibling
            if hasattr(item, "attrs") and "nt" in item.attrs.get("class"):
                properties.append(f"{element.text}:{item.text}")
        return properties

    def annotate(self) -> str:
        prefix_element = self.soup.find_all("span", {"class": "nn"})
        for element in prefix_element:
            prefix = element
            colon = element.next_sibling
            item = element.next_sibling.next_sibling
            if hasattr(item, "attrs") and "nt" in item.attrs.get("class"):
                identifier = item.text
                if not identifier.startswith(("P", "Q")):
                    identifier = f"{prefix.text}:{identifier}"
                item_stat = self.stats.get_by_id(identifier)
                title = item_stat.label if item_stat else item.text
                annotation_element = self.soup.new_tag(
                    "a",
                    href="http://www.wikidata.org/entity/" + item.text,
                    title=title,
                    target="_blank",
                )
                prefix.insert_before(annotation_element)
                annotation_element.insert(0, prefix)
                annotation_element.insert(1, colon)
                annotation_element.insert(2, item)
        return str(self.soup)

`query_selector`

Created on 2024-07-04 @author: wf

`QuerySelector`

A class to select domain, namespace, and name for a query using comboboxes. Uses a single change handler to update selections dynamically.

Source code in snapquery/query_selector.py

class QuerySelector:
    """
    A class to select domain, namespace, and name for a query using comboboxes.
    Uses a single change handler to update selections dynamically.
    """

    def __init__(self, solution: WebSolution, on_change):
        self.solution = solution
        self.nqm = self.solution.nqm
        self.qns = QueryNameSet(self.nqm)  # Initialize QueryNameSet
        self.qn = QueryName(domain="", namespace="", name="")  # Current selection state
        self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
        self.on_change = on_change
        self.setup_ui()

    def setup_ui(self):
        """
        Setup the user interface for query selection using comboboxes.
        """
        with ui.row() as self.select_row:
            self.domain_select = self.create_combobox("Domain", self.qns.domains, 25)
            self.namespace_select = self.create_combobox("Namespace", self.qns.namespaces, 40)
            self.name_select = self.create_combobox("Name", self.qns.names, 80)

    def create_combobox(self, label: str, options: List[str], width_chars: int) -> ComboBox:
        """Create a ComboBox with the given label, options, and width."""
        return ComboBox(
            label=label,
            options=options,
            width_chars=width_chars,
            clearable=True,
            on_change=self.handle_change,
        )

    async def handle_change(self):
        """
        Update self.qn and call the provided on_change callback
        """
        self.qn.domain = self.domain_select.select.value or ""
        self.qn.namespace = self.namespace_select.select.value or ""
        self.qn.name = self.name_select.select.value or ""

        self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
        self.update_ui()

        if self.on_change:
            await self.on_change()

    def update_options(self, select_widget, options):
        select_widget.update_options(options)

    def update_ui(self):
        """
        Update UI components based on filtered results using the custom update_options method for safe sorting.
        """
        self.update_options(self.domain_select, self.qns.domains)
        self.update_options(self.namespace_select, self.qns.namespaces)
        self.update_options(self.name_select, self.qns.names)

`create_combobox(label, options, width_chars)`

Create a ComboBox with the given label, options, and width.

Source code in snapquery/query_selector.py

def create_combobox(self, label: str, options: List[str], width_chars: int) -> ComboBox:
    """Create a ComboBox with the given label, options, and width."""
    return ComboBox(
        label=label,
        options=options,
        width_chars=width_chars,
        clearable=True,
        on_change=self.handle_change,
    )

`handle_change()` `async`

Update self.qn and call the provided on_change callback

Source code in snapquery/query_selector.py

async def handle_change(self):
    """
    Update self.qn and call the provided on_change callback
    """
    self.qn.domain = self.domain_select.select.value or ""
    self.qn.namespace = self.namespace_select.select.value or ""
    self.qn.name = self.name_select.select.value or ""

    self.qns.update(domain=self.qn.domain, namespace=self.qn.namespace)
    self.update_ui()

    if self.on_change:
        await self.on_change()

`setup_ui()`

Setup the user interface for query selection using comboboxes.

Source code in snapquery/query_selector.py

def setup_ui(self):
    """
    Setup the user interface for query selection using comboboxes.
    """
    with ui.row() as self.select_row:
        self.domain_select = self.create_combobox("Domain", self.qns.domains, 25)
        self.namespace_select = self.create_combobox("Namespace", self.qns.namespaces, 40)
        self.name_select = self.create_combobox("Name", self.qns.names, 80)

`update_ui()`

Update UI components based on filtered results using the custom update_options method for safe sorting.

Source code in snapquery/query_selector.py

def update_ui(self):
    """
    Update UI components based on filtered results using the custom update_options method for safe sorting.
    """
    self.update_options(self.domain_select, self.qns.domains)
    self.update_options(self.namespace_select, self.qns.namespaces)
    self.update_options(self.name_select, self.qns.names)

`scholia`

Created on 2024-05-04

@author: wf

`ScholiaQueries`

A class to handle the extraction and management of Scholia queries.

Source code in snapquery/scholia.py

class ScholiaQueries:
    """
    A class to handle the extraction and management of Scholia queries.
    """

    repository_url = "https://api.github.com/repos/WDscholia/scholia/contents/scholia/app/templates"

    def __init__(self, nqm: NamedQueryManager, debug: bool = False):
        """
        Constructor

        Args:
            nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
            debug (bool): Enable debug output. Defaults to False.
        """
        self.nqm = nqm
        self.named_query_set = NamedQuerySet(
            domain="scholia.toolforge.org",
            namespace="named_queries",
            target_graph_name="wikidata",
        )
        self.debug = debug

    def get_scholia_file_list(self):
        """
        Retrieve the list of SPARQL files from the Scholia repository.

        Returns:
            list: List of dictionaries representing file information.
        """
        headers = {"Accept": "application/vnd.github.v3+json"}
        response = requests.get(self.repository_url, headers=headers)
        response.raise_for_status()  # Ensure we notice bad responses
        return response.json()

    def extract_query(self, file_info) -> NamedQuery:
        """
        Extract a single query from file information.

        Args:
            file_info (dict): Dictionary containing information about the file.

        Returns:
            NamedQuery: The extracted NamedQuery object.
        """
        file_name = file_info["name"]
        if file_name.endswith(".sparql") and file_name[:-7]:
            file_response = requests.get(file_info["download_url"])
            file_response.raise_for_status()
            query_str = file_response.text
            name = file_name[:-7]
            return NamedQuery(
                domain=self.named_query_set.domain,
                namespace=self.named_query_set.namespace,
                name=name,
                url=file_info["download_url"],
                title=name,
                description=name,
                comment="",
                sparql=query_str,
            )

    def extract_queries(self, limit: int = None):
        """
        Extract all queries from the Scholia repository.

        Args:
            limit (int, optional): Limit the number of queries fetched. Defaults to None.
        """
        file_list_json = self.get_scholia_file_list()
        for i, file_info in enumerate(file_list_json, start=1):
            named_query = self.extract_query(file_info)
            if named_query:
                self.named_query_set.queries.append(named_query)
                if self.debug:
                    if i % 80 == 0:
                        print(f"{i}")
                    print(".", end="", flush=True)
                if limit and len(self.named_query_set.queries) >= limit:
                    break

        if self.debug:
            print(f"found {len(self.named_query_set.queries)} scholia queries")

    def save_to_json(self, file_path: str = "/tmp/scholia-queries.json"):
        """
        Save the NamedQueryList to a JSON file.

        Args:
            file_path (str): Path to the JSON file.
        """
        self.named_query_set.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_set)

`init(nqm, debug=False)`

Constructor

Parameters:

Name	Type	Description	Default
`nqm`	`NamedQueryManager`	The NamedQueryManager to use for storing queries.	required
`debug`	`bool`	Enable debug output. Defaults to False.	`False`

Source code in snapquery/scholia.py

def __init__(self, nqm: NamedQueryManager, debug: bool = False):
    """
    Constructor

    Args:
        nqm (NamedQueryManager): The NamedQueryManager to use for storing queries.
        debug (bool): Enable debug output. Defaults to False.
    """
    self.nqm = nqm
    self.named_query_set = NamedQuerySet(
        domain="scholia.toolforge.org",
        namespace="named_queries",
        target_graph_name="wikidata",
    )
    self.debug = debug

`extract_queries(limit=None)`

Extract all queries from the Scholia repository.

Parameters:

Name	Type	Description	Default
`limit`	`int`	Limit the number of queries fetched. Defaults to None.	`None`

Source code in snapquery/scholia.py

def extract_queries(self, limit: int = None):
    """
    Extract all queries from the Scholia repository.

    Args:
        limit (int, optional): Limit the number of queries fetched. Defaults to None.
    """
    file_list_json = self.get_scholia_file_list()
    for i, file_info in enumerate(file_list_json, start=1):
        named_query = self.extract_query(file_info)
        if named_query:
            self.named_query_set.queries.append(named_query)
            if self.debug:
                if i % 80 == 0:
                    print(f"{i}")
                print(".", end="", flush=True)
            if limit and len(self.named_query_set.queries) >= limit:
                break

    if self.debug:
        print(f"found {len(self.named_query_set.queries)} scholia queries")

`extract_query(file_info)`

Extract a single query from file information.

Parameters:

Name	Type	Description	Default
`file_info`	`dict`	Dictionary containing information about the file.	required

Returns:

Name	Type	Description
`NamedQuery`	`NamedQuery`	The extracted NamedQuery object.

Source code in snapquery/scholia.py

def extract_query(self, file_info) -> NamedQuery:
    """
    Extract a single query from file information.

    Args:
        file_info (dict): Dictionary containing information about the file.

    Returns:
        NamedQuery: The extracted NamedQuery object.
    """
    file_name = file_info["name"]
    if file_name.endswith(".sparql") and file_name[:-7]:
        file_response = requests.get(file_info["download_url"])
        file_response.raise_for_status()
        query_str = file_response.text
        name = file_name[:-7]
        return NamedQuery(
            domain=self.named_query_set.domain,
            namespace=self.named_query_set.namespace,
            name=name,
            url=file_info["download_url"],
            title=name,
            description=name,
            comment="",
            sparql=query_str,
        )

`get_scholia_file_list()`

Retrieve the list of SPARQL files from the Scholia repository.

Returns:

Name	Type	Description
`list`		List of dictionaries representing file information.

Source code in snapquery/scholia.py

def get_scholia_file_list(self):
    """
    Retrieve the list of SPARQL files from the Scholia repository.

    Returns:
        list: List of dictionaries representing file information.
    """
    headers = {"Accept": "application/vnd.github.v3+json"}
    response = requests.get(self.repository_url, headers=headers)
    response.raise_for_status()  # Ensure we notice bad responses
    return response.json()

`save_to_json(file_path='/tmp/scholia-queries.json')`

Save the NamedQueryList to a JSON file.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the JSON file.	`'/tmp/scholia-queries.json'`

Source code in snapquery/scholia.py

def save_to_json(self, file_path: str = "/tmp/scholia-queries.json"):
    """
    Save the NamedQueryList to a JSON file.

    Args:
        file_path (str): Path to the JSON file.
    """
    self.named_query_set.save_to_json_file(file_path, indent=2)

`store_queries()`

Store the named queries into the database.

Source code in snapquery/scholia.py

def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_set)

`snapquery_cmd`

Created on 2024-05-03

@author: wf

`SnapQueryCmd`

Bases: WebserverCmd

Command line for diagrams server

Source code in snapquery/snapquery_cmd.py

class SnapQueryCmd(WebserverCmd):
    """
    Command line for diagrams server
    """

    def getArgParser(self, description: str, version_msg) -> ArgumentParser:
        """
        override the default argparser call
        """
        parser = super().getArgParser(description, version_msg)
        # see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/querymain.py
        parser.add_argument(
            "-ep",
            "--endpointPath",
            default=None,
            help="path to yaml file to configure endpoints to use for queries",
        )
        parser.add_argument(
            "-en",
            "--endpointName",
            default="wikidata",
            choices=list(NamedQueryManager.from_samples().endpoints.keys()),
            help="Name of the endpoint to use for queries - use --listEndpoints to list available endpoints",
        )
        parser.add_argument(
            "-idb",
            "--initDatabase",
            action="store_true",
            help="initialize the database",
        )
        parser.add_argument(
            "-le",
            "--listEndpoints",
            action="store_true",
            help="show the list of available endpoints",
        )
        parser.add_argument(
            "-lm",
            "--listMetaqueries",
            action="store_true",
            help="show the list of available metaqueries",
        )
        parser.add_argument(
            "-ln",
            "--listNamespaces",
            action="store_true",
            help="show the list of available namespaces",
        )
        parser.add_argument(
            "-lg",
            "--listGraphs",
            action="store_true",
            help="show the list of available graphs",
        )
        parser.add_argument(
            "-tq",
            "--testQueries",
            action="store_true",
            help="test run the queries",
        )
        parser.add_argument("--limit", type=int, default=None, help="set limit parameter of query")
        parser.add_argument(
            "--params",
            action=StoreDictKeyPair,
            help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
        )
        parser.add_argument(
            "--domain",
            type=str,
            default="wikidata.org",
            help="domain to filter queries",
        )
        parser.add_argument(
            "--namespace",
            type=str,
            default="examples",
            help="namespace to filter queries",
        )
        parser.add_argument("-qn", "--queryName", help="run a named query")
        parser.add_argument(
            "query_id",
            nargs="?",  # Make it optional
            help="Query ID in the format 'name[--namespace[@domain]]'",
        )
        parser.add_argument("-f", "--format", type=Format, choices=list(Format))
        parser.add_argument(
            "--import",
            dest="import_file",
            help="Import named queries from a JSON file.",
        )
        parser.add_argument(
            "--context",
            type=str,
            default="test",
            help="context name to store the execution statistics with",
        )
        parser.add_argument(
            "--prefix_merger",
            type=str,
            default=QueryPrefixMerger.default_merger().name,
            choices=[merger.name for merger in QueryPrefixMerger],
            help="query prefix merger to use",
        )
        return parser

    def cmd_parse(self, argv: Optional[list] = None):
        """
        parse the argument lists and prepare

        Args:
            argv(list): list of command line arguments

        """
        super().cmd_parse(argv)
        if self.args.debug:
            level = logging.DEBUG
        else:
            level = logging.INFO
        logging.basicConfig(level=level)
        if hasattr(self.args, "func"):
            self.args.func(self.args)
        return self.args

    def handle_args(self) -> bool:
        """
        handle the command line args
        """
        # Call the superclass handle_args to maintain base class behavior
        handled = super().handle_args()
        self.debug = self.args.debug
        nqm = NamedQueryManager.from_samples()
        self.nqm = nqm
        # Check args functions
        nqm = NamedQueryManager.from_samples(force_init=self.args.initDatabase)
        if self.args.listEndpoints:
            # List endpoints
            for endpoint in self.nqm.endpoints.values():
                print(endpoint)
            handled = True  # Operation handled
        elif self.args.listGraphs:
            print(self.nqm.gm.to_json(indent=2))
            handled = True
        elif self.args.listMetaqueries:
            meta_qm = self.nqm.meta_qm
            for name, query in meta_qm.queriesByName.items():
                print(f"{name}:{query.title}")
            handled = True
        elif self.args.listNamespaces:
            namespaces = self.nqm.get_namespaces()
            for namespace, count in namespaces.items():
                print(f"{namespace}:{count}")
            handled = True
        elif self.args.testQueries:
            if self.args.endpointName:
                endpoint_names = [self.args.endpointName]
            else:
                endpoint_names = list(nqm.endpoints.keys())
            queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
            execution = Execution(self.nqm, debug=self.args.debug)
            for i, nq in enumerate(queries, start=1):
                for endpoint_name in endpoint_names:
                    execution.execute(
                        nq,
                        endpoint_name=endpoint_name,
                        context=self.args.context,
                        title=f"query {i:3}/{len(queries)}::{endpoint_name}",
                        prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
                    )
        elif self.args.queryName is not None or self.args.query_id is not None:
            if self.args.query_id is not None:
                query_name = QueryName.from_query_id(self.args.query_id)
            else:
                query_name = QueryName(
                    name=self.args.queryName,
                    namespace=self.args.namespace,
                    domain=self.args.domain,
                )
            endpoint_name = self.args.endpointName
            r_format = self.args.format
            limit = self.args.limit
            qb = nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            query = qb.query
            params = Params(query.query)
            if params.has_params:
                if not self.args.params:
                    raise Exception(f"{query.name} needs parameters")
                else:
                    params.set(self.args.params)
                    query.query = params.apply_parameters()
            if r_format == Format.raw:
                formatted_result = qb.raw_query()
            else:
                qlod = qb.get_lod()
                formatted_result = qb.format_result(qlod=qlod, r_format=r_format)
            print(formatted_result)
        elif self.args.import_file:
            self.handle_import(self.args.import_file)
            handled = True
        return handled

    def handle_import(self, json_file: str):
        """
        Handle the import of named queries from a JSON file.

        Args:
            json_file (str): Path to the JSON file to import.
        """
        nqm = NamedQueryManager.from_samples()
        qimport = QueryImport(nqm=nqm)
        nq_list = qimport.import_from_json_file(json_file, with_store=True, show_progress=True)
        print(f"Imported {len(nq_list.queries)} named queries from {json_file}.")

`cmd_parse(argv=None)`

parse the argument lists and prepare

Parameters:

Name	Type	Description	Default
`argv(list)`		list of command line arguments	required

Source code in snapquery/snapquery_cmd.py

def cmd_parse(self, argv: Optional[list] = None):
    """
    parse the argument lists and prepare

    Args:
        argv(list): list of command line arguments

    """
    super().cmd_parse(argv)
    if self.args.debug:
        level = logging.DEBUG
    else:
        level = logging.INFO
    logging.basicConfig(level=level)
    if hasattr(self.args, "func"):
        self.args.func(self.args)
    return self.args

`getArgParser(description, version_msg)`

override the default argparser call

Source code in snapquery/snapquery_cmd.py

def getArgParser(self, description: str, version_msg) -> ArgumentParser:
    """
    override the default argparser call
    """
    parser = super().getArgParser(description, version_msg)
    # see https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/querymain.py
    parser.add_argument(
        "-ep",
        "--endpointPath",
        default=None,
        help="path to yaml file to configure endpoints to use for queries",
    )
    parser.add_argument(
        "-en",
        "--endpointName",
        default="wikidata",
        choices=list(NamedQueryManager.from_samples().endpoints.keys()),
        help="Name of the endpoint to use for queries - use --listEndpoints to list available endpoints",
    )
    parser.add_argument(
        "-idb",
        "--initDatabase",
        action="store_true",
        help="initialize the database",
    )
    parser.add_argument(
        "-le",
        "--listEndpoints",
        action="store_true",
        help="show the list of available endpoints",
    )
    parser.add_argument(
        "-lm",
        "--listMetaqueries",
        action="store_true",
        help="show the list of available metaqueries",
    )
    parser.add_argument(
        "-ln",
        "--listNamespaces",
        action="store_true",
        help="show the list of available namespaces",
    )
    parser.add_argument(
        "-lg",
        "--listGraphs",
        action="store_true",
        help="show the list of available graphs",
    )
    parser.add_argument(
        "-tq",
        "--testQueries",
        action="store_true",
        help="test run the queries",
    )
    parser.add_argument("--limit", type=int, default=None, help="set limit parameter of query")
    parser.add_argument(
        "--params",
        action=StoreDictKeyPair,
        help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
    )
    parser.add_argument(
        "--domain",
        type=str,
        default="wikidata.org",
        help="domain to filter queries",
    )
    parser.add_argument(
        "--namespace",
        type=str,
        default="examples",
        help="namespace to filter queries",
    )
    parser.add_argument("-qn", "--queryName", help="run a named query")
    parser.add_argument(
        "query_id",
        nargs="?",  # Make it optional
        help="Query ID in the format 'name[--namespace[@domain]]'",
    )
    parser.add_argument("-f", "--format", type=Format, choices=list(Format))
    parser.add_argument(
        "--import",
        dest="import_file",
        help="Import named queries from a JSON file.",
    )
    parser.add_argument(
        "--context",
        type=str,
        default="test",
        help="context name to store the execution statistics with",
    )
    parser.add_argument(
        "--prefix_merger",
        type=str,
        default=QueryPrefixMerger.default_merger().name,
        choices=[merger.name for merger in QueryPrefixMerger],
        help="query prefix merger to use",
    )
    return parser

`handle_args()`

handle the command line args

Source code in snapquery/snapquery_cmd.py

def handle_args(self) -> bool:
    """
    handle the command line args
    """
    # Call the superclass handle_args to maintain base class behavior
    handled = super().handle_args()
    self.debug = self.args.debug
    nqm = NamedQueryManager.from_samples()
    self.nqm = nqm
    # Check args functions
    nqm = NamedQueryManager.from_samples(force_init=self.args.initDatabase)
    if self.args.listEndpoints:
        # List endpoints
        for endpoint in self.nqm.endpoints.values():
            print(endpoint)
        handled = True  # Operation handled
    elif self.args.listGraphs:
        print(self.nqm.gm.to_json(indent=2))
        handled = True
    elif self.args.listMetaqueries:
        meta_qm = self.nqm.meta_qm
        for name, query in meta_qm.queriesByName.items():
            print(f"{name}:{query.title}")
        handled = True
    elif self.args.listNamespaces:
        namespaces = self.nqm.get_namespaces()
        for namespace, count in namespaces.items():
            print(f"{namespace}:{count}")
        handled = True
    elif self.args.testQueries:
        if self.args.endpointName:
            endpoint_names = [self.args.endpointName]
        else:
            endpoint_names = list(nqm.endpoints.keys())
        queries = self.nqm.get_all_queries(domain=self.args.domain, namespace=self.args.namespace)
        execution = Execution(self.nqm, debug=self.args.debug)
        for i, nq in enumerate(queries, start=1):
            for endpoint_name in endpoint_names:
                execution.execute(
                    nq,
                    endpoint_name=endpoint_name,
                    context=self.args.context,
                    title=f"query {i:3}/{len(queries)}::{endpoint_name}",
                    prefix_merger=QueryPrefixMerger.get_by_name(self.args.prefix_merger),
                )
    elif self.args.queryName is not None or self.args.query_id is not None:
        if self.args.query_id is not None:
            query_name = QueryName.from_query_id(self.args.query_id)
        else:
            query_name = QueryName(
                name=self.args.queryName,
                namespace=self.args.namespace,
                domain=self.args.domain,
            )
        endpoint_name = self.args.endpointName
        r_format = self.args.format
        limit = self.args.limit
        qb = nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        query = qb.query
        params = Params(query.query)
        if params.has_params:
            if not self.args.params:
                raise Exception(f"{query.name} needs parameters")
            else:
                params.set(self.args.params)
                query.query = params.apply_parameters()
        if r_format == Format.raw:
            formatted_result = qb.raw_query()
        else:
            qlod = qb.get_lod()
            formatted_result = qb.format_result(qlod=qlod, r_format=r_format)
        print(formatted_result)
    elif self.args.import_file:
        self.handle_import(self.args.import_file)
        handled = True
    return handled

`handle_import(json_file)`

Handle the import of named queries from a JSON file.

Parameters:

Name	Type	Description	Default
`json_file`	`str`	Path to the JSON file to import.	required

Source code in snapquery/snapquery_cmd.py

def handle_import(self, json_file: str):
    """
    Handle the import of named queries from a JSON file.

    Args:
        json_file (str): Path to the JSON file to import.
    """
    nqm = NamedQueryManager.from_samples()
    qimport = QueryImport(nqm=nqm)
    nq_list = qimport.import_from_json_file(json_file, with_store=True, show_progress=True)
    print(f"Imported {len(nq_list.queries)} named queries from {json_file}.")

`main(argv=None)`

main call

Source code in snapquery/snapquery_cmd.py

def main(argv: list = None):
    """
    main call
    """
    cmd = SnapQueryCmd(
        config=SnapQueryWebServer.get_config(),
        webserver_cls=SnapQueryWebServer,
    )
    exit_code = cmd.cmd_main(argv)
    return exit_code

`snapquery_core`

Created on 2024-05-03

@author: wf

`NamedQuery` `dataclass`

Bases: QueryName

A named query that encapsulates the details and SPARQL query for a specific purpose.

Attributes:

Name	Type	Description
`title`	`str`	A brief one-line title that describes the query.
`description`	`str`	A detailed multiline description of what the query does and the data it accesses.
`sparql`	`str`	The SPARQL query string. This might be hidden in future to encapsulate query details.
`query_id`	`str`	A unique identifier for the query, generated from namespace and name, used as a primary key.

Source code in snapquery/snapquery_core.py

@dataclass
class NamedQuery(QueryName):
    """
    A named query that encapsulates the details and SPARQL query for a specific purpose.

    Attributes:
        title (str): A brief one-line title that describes the query.
        description (str): A detailed multiline description of what the query does and the data it accesses.
        sparql (str): The SPARQL query string. This might be hidden in future to encapsulate query details.
        query_id (str): A unique identifier for the query, generated from namespace and name, used as a primary key.
    """

    # sparql query (to be hidden later)
    sparql: Optional[str] = None
    # the url of the source code of the query
    url: Optional[str] = None
    # one line title
    title: Optional[str] = None
    # multiline description
    description: Optional[str] = None
    comment: Optional[str] = None

    @classmethod
    def get_samples(cls) -> dict[str, "NamedQuery"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="cats",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats",
                    title="Cats on Wikidata",
                    description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.",
                    comment="modified cats query from wikidata-examples",
                    sparql="""# snapquery cats example
SELECT ?item ?itemLabel
WHERE {
  ?item wdt:P31 wd:Q146. # Must be a cat
  OPTIONAL { ?item rdfs:label ?itemLabel. }
  FILTER (LANG(?itemLabel) = "en")
}
""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="bands",
                    title="Rock bands",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Rock_bands_that_start_with_%22M%22",
                    description="""Rock bands that start with "M" """,
                    comment="",
                    sparql="""SELECT ?band ?bandLabel
WHERE {
  ?band wdt:P31 wd:Q5741069.
  ?band rdfs:label ?bandLabel.
  FILTER(LANG(?bandLabel)="en").
  FILTER(STRSTARTS(?bandLabel,"M")).
}""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="horses",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Horses_(showing_some_info_about_them)",
                    title="Horses on Wikidata",
                    description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.",
                    sparql="""# snapquery example horses
SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel
(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel
WHERE {
  ?horse wdt:P31/wdt:P279* wd:Q726 .     # Instance and subclasses of horse (Q726)
  OPTIONAL{?horse wdt:P25 ?mother .}     # Mother
  OPTIONAL{?horse wdt:P22 ?father .}     # Father
  OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date
  OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date
  OPTIONAL{?horse wdt:P21 ?gender .}     # Gender
  OPTIONAL { ?horse rdfs:label ?horseLabel . FILTER (lang(?horseLabel) = "en") }
  OPTIONAL { ?mother rdfs:label ?motherLabel . FILTER (lang(?motherLabel) = "en") }
  OPTIONAL { ?father rdfs:label ?fatherLabel . FILTER (lang(?fatherLabel) = "en") }
  OPTIONAL { ?gender rdfs:label ?genderLabel . FILTER (lang(?genderLabel) = "en") }
}
ORDER BY ?horse
""",
                ),
            ]
        }
        return samples

    def as_link(self) -> str:
        """
        get me as a link
        """
        url = f"/query/{self.domain}/{self.namespace}/{self.name}"
        text = self.name
        tooltip = "query details"
        link = Link.create(url, text, tooltip)
        return link

    @classmethod
    def from_record(cls, record: Dict) -> "NamedQuery":
        """
        Class method to instantiate NamedQuery
        from a dictionary record.
        """
        return cls(
            domain=record["domain"],
            namespace=record["namespace"],
            name=record["name"],
            title=record.get("title"),
            url=record.get("url"),
            description=record.get("description"),
            sparql=record.get("sparql"),
        )

    def as_record(self) -> Dict:
        record = {
            "query_id": self.query_id,
            "domain": self.domain,
            "namespace": self.namespace,
            "name": self.name,
            "url": self.url,
            "title": self.title,
            "description": self.description,
            "sparql": self.sparql,
        }
        return record

    def as_viewrecord(self) -> Dict:
        """
        Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.
        """
        url_link = Link.create(self.url, self.url)
        return {
            "domain": self.domain,
            "namespace": self.namespace,
            "name": self.as_link(),
            "title": self.title,
            "url": url_link,
        }

`as_link()`

get me as a link

Source code in snapquery/snapquery_core.py

def as_link(self) -> str:
    """
    get me as a link
    """
    url = f"/query/{self.domain}/{self.namespace}/{self.name}"
    text = self.name
    tooltip = "query details"
    link = Link.create(url, text, tooltip)
    return link

`as_viewrecord()`

Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.

Source code in snapquery/snapquery_core.py

def as_viewrecord(self) -> Dict:
    """
    Return a dictionary representing the NamedQuery with keys ordered as Name, Namespace, Title, Description.
    """
    url_link = Link.create(self.url, self.url)
    return {
        "domain": self.domain,
        "namespace": self.namespace,
        "name": self.as_link(),
        "title": self.title,
        "url": url_link,
    }

`from_record(record)` `classmethod`

Class method to instantiate NamedQuery from a dictionary record.

Source code in snapquery/snapquery_core.py

@classmethod
def from_record(cls, record: Dict) -> "NamedQuery":
    """
    Class method to instantiate NamedQuery
    from a dictionary record.
    """
    return cls(
        domain=record["domain"],
        namespace=record["namespace"],
        name=record["name"],
        title=record.get("title"),
        url=record.get("url"),
        description=record.get("description"),
        sparql=record.get("sparql"),
    )

`get_samples()` `classmethod`

get samples

Source code in snapquery/snapquery_core.py

    @classmethod
    def get_samples(cls) -> dict[str, "NamedQuery"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="cats",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Cats",
                    title="Cats on Wikidata",
                    description="This query retrieves all items classified under 'house cat' (Q146) on Wikidata.",
                    comment="modified cats query from wikidata-examples",
                    sparql="""# snapquery cats example
SELECT ?item ?itemLabel
WHERE {
  ?item wdt:P31 wd:Q146. # Must be a cat
  OPTIONAL { ?item rdfs:label ?itemLabel. }
  FILTER (LANG(?itemLabel) = "en")
}
""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="bands",
                    title="Rock bands",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Rock_bands_that_start_with_%22M%22",
                    description="""Rock bands that start with "M" """,
                    comment="",
                    sparql="""SELECT ?band ?bandLabel
WHERE {
  ?band wdt:P31 wd:Q5741069.
  ?band rdfs:label ?bandLabel.
  FILTER(LANG(?bandLabel)="en").
  FILTER(STRSTARTS(?bandLabel,"M")).
}""",
                ),
                NamedQuery(
                    domain="wikidata.org",
                    namespace="snapquery-examples",
                    name="horses",
                    url="https://www.wikidata.org/wiki/Wikidata:SPARQL_query_service/queries/examples#Horses_(showing_some_info_about_them)",
                    title="Horses on Wikidata",
                    description="This query retrieves information about horses, including parents, gender, and approximate birth and death years.",
                    sparql="""# snapquery example horses
SELECT DISTINCT ?horse ?horseLabel ?mother ?motherLabel ?father ?fatherLabel
(year(?birthdate) as ?birthyear) (year(?deathdate) as ?deathyear) ?genderLabel
WHERE {
  ?horse wdt:P31/wdt:P279* wd:Q726 .     # Instance and subclasses of horse (Q726)
  OPTIONAL{?horse wdt:P25 ?mother .}     # Mother
  OPTIONAL{?horse wdt:P22 ?father .}     # Father
  OPTIONAL{?horse wdt:P569 ?birthdate .} # Birth date
  OPTIONAL{?horse wdt:P570 ?deathdate .} # Death date
  OPTIONAL{?horse wdt:P21 ?gender .}     # Gender
  OPTIONAL { ?horse rdfs:label ?horseLabel . FILTER (lang(?horseLabel) = "en") }
  OPTIONAL { ?mother rdfs:label ?motherLabel . FILTER (lang(?motherLabel) = "en") }
  OPTIONAL { ?father rdfs:label ?fatherLabel . FILTER (lang(?fatherLabel) = "en") }
  OPTIONAL { ?gender rdfs:label ?genderLabel . FILTER (lang(?genderLabel) = "en") }
}
ORDER BY ?horse
""",
                ),
            ]
        }
        return samples

`NamedQueryManager`

Manages the storage, retrieval, and execution of named SPARQL queries.

Source code in snapquery/snapquery_core.py

class NamedQueryManager:
    """
    Manages the storage, retrieval, and execution of named SPARQL queries.
    """

    def __init__(self, db_path: str = None, debug: bool = False):
        """
        Initializes the NamedQueryManager with a specific database path and a debug mode.

        Args:
            db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used.
            debug (bool): If True, enables debug mode which may provide additional logging and error reporting.

        Attributes:
            debug (bool): Stores the debug state.
            sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions.
            endpoints (dict): A dictionary of SPARQL endpoints configured for use.
        """
        if db_path is None:
            db_path = NamedQueryManager.get_cache_path()
        self.debug = debug
        self.sql_db = SQLDB(dbname=db_path, check_same_thread=False, debug=debug)
        # Get the path of the yaml_file relative to the current Python module
        self.samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
        endpoints_path = os.path.join(self.samples_path, "endpoints.yaml")
        self.endpoints = EndpointManager.getEndpoints(endpointPath=endpoints_path, lang="sparql", with_default=False)
        yaml_path = os.path.join(self.samples_path, "meta_query.yaml")
        self.meta_qm = QueryManager(queriesPath=yaml_path, with_default=False, lang="sql")
        # Graph Manager
        gm_yaml_path = GraphManager.get_yaml_path()
        self.gm = GraphManager.load_from_yaml_file(gm_yaml_path)
        # SQL meta data handling
        # primary keys
        self.primary_keys = {
            QueryStats: "stats_id",
            NamedQuery: "query_id",
            QueryDetails: "query_id",
        }
        self.entity_infos = {}
        pass

    @classmethod
    def get_cache_path(cls) -> str:
        home = str(Path.home())
        cache_dir = f"{home}/.solutions/snapquery/storage"
        os.makedirs(cache_dir, exist_ok=True)
        cache_path = f"{cache_dir}/named_queries.db"
        return cache_path

    @classmethod
    def from_samples(
        cls,
        db_path: Optional[str] = None,
        force_init: bool = False,
        with_backup: bool = True,
        debug: bool = False,
    ) -> "NamedQueryManager":
        """
        Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

        Args:
            db_path (Optional[str]): Path to the database file. If None, the default path is used.
            force_init (bool): If True, the existing database file is dropped and recreated, and backed up if with_backup is True.
            with_backup (bool): If True and force_init is True, moves the database file to a backup location before reinitialization.
            debug (bool): If True, enables debug mode which may provide additional logging.

        Returns:
            NamedQueryManager: An instance of the manager initialized with the database at `db_path`.
        """
        if db_path is None:
            db_path = cls.get_cache_path()

        path_obj = Path(db_path)

        # Handle backup and force initialization
        if force_init and path_obj.exists():
            if with_backup:
                timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
                backup_path = path_obj.with_name(f"{path_obj.stem}-{timestamp}{path_obj.suffix}")
                path_obj.rename(backup_path)  # Move the existing file to backup

        nqm = NamedQueryManager(db_path=db_path, debug=debug)
        if force_init or not path_obj.exists() or path_obj.stat().st_size == 0:
            for source_class, pk in [
                (NamedQuery, "query_id"),
                (QueryStats, "stats_id"),
                (QueryDetails, "quer_id"),
            ]:
                # Fetch sample records from the specified class
                sample_records = cls.get_sample_records(source_class=source_class)

                # Define entity information dynamically based on the class and primary key
                entityInfo = EntityInfo(sample_records, name=source_class.__name__, primaryKey=pk)

                # Create and populate the table specific to each class
                nqm.sql_db.createTable(sample_records, source_class.__name__, withDrop=True)
                nqm.sql_db.store(sample_records, entityInfo, fixNone=True, replace=True)
            # store yaml defined entities to SQL database
            nqm.store_endpoints()
            nqm.store_graphs()
        return nqm

    def store_named_query_list(self, nq_set: NamedQuerySet):
        """
        store the given named query set

        Args:
            nq_list: NamedQueryList
        """
        lod = []
        for nq in nq_set.queries:
            lod.append(asdict(nq))
        self.store(lod=lod)

    def store_query_details_list(self, qd_list: List[QueryDetails]):
        """
        Stores a list of QueryDetails instances into the database. This function converts
        each QueryDetails instance into a dictionary and then stores the entire list of dictionaries.
        It utilizes the 'store' method to handle database operations based on the entity information
        derived from the QueryDetails class.

        Args:
            qd_list (List[QueryDetails]): List of QueryDetails instances to be stored.
        """
        qd_lod = []
        for qd in qd_list:
            qd_lod.append(asdict(qd))
        self.store(lod=qd_lod, source_class=QueryDetails)

    def store_stats(self, stats_list: List[QueryStats]):
        """
        store the given list of query statistics
        """
        stats_lod = []
        for stats in stats_list:
            stats_lod.append(asdict(stats))
        self.store(lod=stats_lod, source_class=QueryStats)

    def store_graphs(self, gm: GraphManager = None):
        """
        Stores all graphs managed by the given GraphManager into my
        SQL database
        """
        if gm is None:
            gm = self.gm

        lod = [asdict(graph) for graph in gm]  # Convert each Graph instance to a dictionary using asdict()

        self.store(lod=lod, source_class=Graph, with_create=True)

    def store_endpoints(self, endpoints: Optional[Dict[str, Endpoint]] = None):
        """
        Stores the given endpoints or self.endpoints into the SQL database.

        Args:
            endpoints (Optional[Dict[str, LODStorageEndpoint]]): A dictionary of endpoints to store.
                If None, uses self.endpoints.
        """
        # This is a compatiblity layer for pylodstorage Endpoints
        # as of 2024-06 pylodstorage Endpoint still uses @Jsonable which is
        # deprecated so we convert instances to our local endpoint modules Endpoint format
        # and use our store mechanism to create SQL records
        if endpoints is None:
            endpoints = self.endpoints

        endpoints_lod = []
        for endpoint_name, lod_endpoint in endpoints.items():
            # Create a dictionary with only the attributes that exist in lod_endpoint
            endpoint_dict = {
                "name": endpoint_name,
                "lang": getattr(lod_endpoint, "lang", None),
                "endpoint": getattr(lod_endpoint, "endpoint", None),
                "website": getattr(lod_endpoint, "website", None),
                "database": getattr(lod_endpoint, "database", None),
                "method": getattr(lod_endpoint, "method", None),
                "prefixes": getattr(lod_endpoint, "prefixes", None),
                "auth": getattr(lod_endpoint, "auth", None),
                "user": getattr(lod_endpoint, "user", None),
                "password": getattr(lod_endpoint, "password", None),
            }

            # Remove None values
            endpoint_dict = {k: v for k, v in endpoint_dict.items() if v is not None}

            # Create SnapQueryEndpoint instance with only the available attributes
            snap_endpoint = SnapQueryEndpoint(**endpoint_dict)
            endpoints_lod.append(asdict(snap_endpoint))

        # Store the list of dictionaries in the database
        self.store(lod=endpoints_lod, source_class=SnapQueryEndpoint, with_create=True)

    def execute_query(
        self,
        named_query: NamedQuery,
        params_dict: Dict,
        endpoint_name: str = "wikidata",
        limit: int = None,
        with_stats: bool = True,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ):
        """
        execute the given named_query

        Args:
            named_query(NamedQuery): the query to execute
            params_dict(Dict): the query parameters to apply (if any)
            endpoint_name(str): the endpoint where to the excute the query
            limit(int): the record limit for the results (if any)
            with_stats(bool): if True run the stats
            prefix_merger: prefix merger to use
        """
        # Assemble the query bundle using the named query, endpoint, and limit
        query_bundle = self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)
        params = Params(query_bundle.query.query)
        if params.has_params:
            params.set(params_dict)
            query = params.apply_parameters()
            query_bundle.query.query = query
        if with_stats:
            # Execute the query
            results, stats = query_bundle.get_lod_with_stats()
            self.store_stats([stats])
        else:
            results = query_bundle.get_lod()
            stats = None
        return results, stats

    def add_and_store(self, nq: NamedQuery):
        """
        Adds a new NamedQuery instance and stores it in the database.

        Args:
            nq (NamedQuery): The NamedQuery instance to add and store.

        """
        qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
        lod = []
        nq_record = asdict(nq)
        lod.append(nq_record)
        self.store(lod)
        qd_list = []
        qd_list.append(qd)
        self.store_query_details_list(qd_list)

    def get_entity_info(self, source_class: Type) -> EntityInfo:
        """
        Gets or creates EntityInfo for the given source class.
        """
        if source_class not in self.entity_infos:
            primary_key = self.primary_keys.get(source_class, None)
            sample_records = self.get_sample_records(source_class)
            self.entity_infos[source_class] = EntityInfo(
                sample_records,
                name=source_class.__name__,
                primaryKey=primary_key,
                debug=self.debug,
            )
        return self.entity_infos[source_class]

    def store(
        self,
        lod: List[Dict[str, Any]],
        source_class: Type = NamedQuery,
        with_create: bool = False,
    ) -> None:
        """
        Stores the given list of dictionaries in the database using entity information
        derived from a specified source class.

        Args:
            lod (List[Dict[str, Any]]): List of dictionaries that represent the records to be stored.
            source_class (Type): The class from which the entity information is derived. This class
                should have an attribute or method that defines its primary key and must have a `__name__` attribute.
                with_create(bool): if True create the table
        Raises:
            AttributeError: If the source class does not have the necessary method or attribute to define the primary key.
        """
        entity_info = self.get_entity_info(source_class)
        if with_create:
            self.sql_db.createTable4EntityInfo(entityInfo=entity_info, withDrop=True)
        # Store the list of dictionaries in the database using the defined entity information
        self.sql_db.store(lod, entity_info, fixNone=True, replace=True)

    @classmethod
    def get_sample_records(cls, source_class: Type) -> List[Dict[str, Any]]:
        """
        Generates a list of dictionary records based on the sample instances
        provided by a source class. This method utilizes the `get_samples` method
        of the source class, which should return a dictionary of sample instances.

        Args:
            source_class (Type): The class from which to fetch sample instances.
                This class must implement a `get_samples` method that returns
                a dictionary of instances categorized by some key.

        Returns:
            List[Dict[str, Any]]: A list of dictionaries where each dictionary
                is a record that corresponds to a sample instance from the source class.

        Raises:
            AttributeError: If the source_class does not have a `get_samples` method.
        """
        if not hasattr(source_class, "get_samples"):
            raise AttributeError(f"The class {source_class.__name__} must have a 'get_samples' method.")

        sample_instances = source_class.get_samples()
        list_of_records = []

        # Assuming each key in the returned dictionary of get_samples corresponds to a list of instances
        for instance_group in sample_instances.values():
            for instance in instance_group:
                # Ensure that the instance is a dataclass instance
                if is_dataclass(instance):
                    record = asdict(instance)
                    list_of_records.append(record)
                else:
                    raise ValueError(f"The instance of class {source_class.__name__} is not a dataclass instance")

        return list_of_records

    def lookup(self, query_name: QueryName, lenient: bool = True) -> NamedQuery:
        """
        lookup the named query for the given structured query name


        Args:
            query_name(QueryName): the structured query name
            lenient(bool): if True handle multiple entry errors as warnings
        Returns:
            NamedQuery: the named query
        """
        qn = query_name
        query_id = qn.query_id
        sql_query = """SELECT
    *
FROM
    NamedQuery
WHERE
    query_id=?"""
        query_records = self.sql_db.query(sql_query, (query_id,))
        if not query_records:
            msg = f"NamedQuery not found for the specified query '{qn}'."
            raise ValueError(msg)

        query_count = len(query_records)
        if query_count != 1:
            msg = f"multiple entries ({query_count}) for query '{qn.name}' namespace '{qn.namespace} and domain '{qn.domain}' the id '{qn.query_id}' is not unique"
            if lenient:
                print(f"warning: {msg}")
            else:
                raise ValueError(msg)

        record = query_records[0]
        named_query = NamedQuery.from_record(record)
        return named_query

    def get_query(
        self,
        query_name: QueryName,
        endpoint_name: str = "wikidata",
        limit: int = None,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ) -> QueryBundle:
        """
        Get the query for the given parameters.

        Args:
            query_name: (QueryName):a structured query name
            endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'.
            limit (int): The query limit (if any).
            prefix_merger: Prefix merger to use
        Returns:
            QueryBundle: named_query, query, and endpoint.
        """
        named_query = self.lookup(query_name=query_name)
        return self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)

    def as_query_bundle(
        self,
        named_query: NamedQuery,
        endpoint_name: str,
        limit: int = None,
        prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
    ) -> QueryBundle:
        """
        Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

        Args:
            named_query (NamedQuery): Named query object.
            endpoint_name (str): Name of the endpoint where the query should be executed.
            limit (int): Optional limit for the query.

        Returns:
            QueryBundle: A bundle containing the named query, the query object, and the endpoint.
        """
        if endpoint_name not in self.endpoints:
            raise ValueError(f"Invalid endpoint {endpoint_name}")

        endpoint = self.endpoints[endpoint_name]
        query = Query(
            name=named_query.name,
            query=named_query.sparql,
            lang="sparql",
            endpoint=endpoint.endpoint,
            limit=limit,
        )
        query.query = QueryPrefixMerger.merge_prefixes(named_query, query, endpoint, prefix_merger)
        if limit:
            query.query += f"\nLIMIT {limit}"
        return QueryBundle(named_query=named_query, query=query, endpoint=endpoint)

    def get_namespaces(self) -> Dict[str, int]:
        """
        Retrieves all unique namespaces and the count of NamedQueries associated with each from the database,
        sorted by the count of queries from lowest to highest.

        Returns:
            Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.
        """
        # Multi-line SQL query for better readability
        query = """
        SELECT domain,namespace, COUNT(*) AS query_count
        FROM NamedQuery
        GROUP BY domain,namespace
        ORDER BY COUNT(*)
        """
        result = self.sql_db.query(query)
        namespaces: Dict[str, int] = {}
        for row in result:
            domain = row["domain"]
            namespace = row["namespace"]
            count = int(row["query_count"])
            namespaces[f"{namespace}@{domain}"] = count
        return namespaces

    def get_all_queries(
        self,
        namespace: str = "snapquery-examples",
        domain: str = "wikidata.org",
        limit: int = None,  # Default limit is None, meaning no limit
    ) -> List[NamedQuery]:
        """
        Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching.
        Optionally limits the number of results.

        Args:
            namespace (str): Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.
            domain (str): Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.
            limit (int): Maximum number of NamedQueries to retrieve, defaults to None for unlimited.

        Returns:
            List[NamedQuery]: A list of NamedQuery instances in the database.
        """
        sql_query = """SELECT * FROM NamedQuery
WHERE domain LIKE ? AND namespace LIKE ?
ORDER BY domain,namespace,name"""
        params = (f"{domain}%", f"{namespace}%")

        if limit is not None:
            sql_query += " LIMIT ?"
            params += (limit,)

        query_records = self.sql_db.query(sql_query, params)
        named_queries = []
        for record in query_records:
            named_query = NamedQuery.from_record(record)
            named_queries.append(named_query)

        return named_queries

    def get_query_stats(self, query_id: str) -> list[QueryStats]:
        """
        Get query stats for the given query name
        Args:
            query_id: id of the query

        Returns:
            list of query stats
        """
        sql_query = """
        SELECT *
        FROM QueryStats
        WHERE query_id = ?
        """
        query_records = self.sql_db.query(sql_query, (query_id,))
        stats = []
        if query_records:
            for record in query_records:
                query_stat = QueryStats.from_record(record)
                stats.append(query_stat)
        return stats

    def get_query_stats_by_context(self, context: str) -> list[QueryStats]:
        """
        Get query stats for the given query name
        Args:
            query_id: id of the query

        Returns:
            list of query stats
        """
        sql_query = """
        SELECT *
        FROM QueryStats
        WHERE context = ?
        """
        query_records = self.sql_db.query(sql_query, (context,))
        stats = [QueryStats.from_record(record) for record in query_records]
        return stats

`init(db_path=None, debug=False)`

Initializes the NamedQueryManager with a specific database path and a debug mode.

Parameters:

Name	Type	Description	Default
`db_path`	`Optional[str]`	The file path to the SQLite database. If None, the default cache path is used.	`None`
`debug`	`bool`	If True, enables debug mode which may provide additional logging and error reporting.	`False`

Attributes:

Name	Type	Description
`debug`	`bool`	Stores the debug state.
`sql_db`	`SQLDB`	An instance of SQLDB to manage the SQLite database interactions.
`endpoints`	`dict`	A dictionary of SPARQL endpoints configured for use.

Source code in snapquery/snapquery_core.py

def __init__(self, db_path: str = None, debug: bool = False):
    """
    Initializes the NamedQueryManager with a specific database path and a debug mode.

    Args:
        db_path (Optional[str]): The file path to the SQLite database. If None, the default cache path is used.
        debug (bool): If True, enables debug mode which may provide additional logging and error reporting.

    Attributes:
        debug (bool): Stores the debug state.
        sql_db (SQLDB): An instance of SQLDB to manage the SQLite database interactions.
        endpoints (dict): A dictionary of SPARQL endpoints configured for use.
    """
    if db_path is None:
        db_path = NamedQueryManager.get_cache_path()
    self.debug = debug
    self.sql_db = SQLDB(dbname=db_path, check_same_thread=False, debug=debug)
    # Get the path of the yaml_file relative to the current Python module
    self.samples_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "samples")
    endpoints_path = os.path.join(self.samples_path, "endpoints.yaml")
    self.endpoints = EndpointManager.getEndpoints(endpointPath=endpoints_path, lang="sparql", with_default=False)
    yaml_path = os.path.join(self.samples_path, "meta_query.yaml")
    self.meta_qm = QueryManager(queriesPath=yaml_path, with_default=False, lang="sql")
    # Graph Manager
    gm_yaml_path = GraphManager.get_yaml_path()
    self.gm = GraphManager.load_from_yaml_file(gm_yaml_path)
    # SQL meta data handling
    # primary keys
    self.primary_keys = {
        QueryStats: "stats_id",
        NamedQuery: "query_id",
        QueryDetails: "query_id",
    }
    self.entity_infos = {}
    pass

`add_and_store(nq)`

Adds a new NamedQuery instance and stores it in the database.

Parameters:

Name	Type	Description	Default
`nq`	`NamedQuery`	The NamedQuery instance to add and store.	required

Source code in snapquery/snapquery_core.py

def add_and_store(self, nq: NamedQuery):
    """
    Adds a new NamedQuery instance and stores it in the database.

    Args:
        nq (NamedQuery): The NamedQuery instance to add and store.

    """
    qd = QueryDetails.from_sparql(query_id=nq.query_id, sparql=nq.sparql)
    lod = []
    nq_record = asdict(nq)
    lod.append(nq_record)
    self.store(lod)
    qd_list = []
    qd_list.append(qd)
    self.store_query_details_list(qd_list)

`as_query_bundle(named_query, endpoint_name, limit=None, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)`

Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

Parameters:

Name	Type	Description	Default
`named_query`	`NamedQuery`	Named query object.	required
`endpoint_name`	`str`	Name of the endpoint where the query should be executed.	required
`limit`	`int`	Optional limit for the query.	`None`

Returns:

Name	Type	Description
`QueryBundle`	`QueryBundle`	A bundle containing the named query, the query object, and the endpoint.

Source code in snapquery/snapquery_core.py

def as_query_bundle(
    self,
    named_query: NamedQuery,
    endpoint_name: str,
    limit: int = None,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
) -> QueryBundle:
    """
    Assembles a QueryBundle from a NamedQuery, endpoint name, and optional limit.

    Args:
        named_query (NamedQuery): Named query object.
        endpoint_name (str): Name of the endpoint where the query should be executed.
        limit (int): Optional limit for the query.

    Returns:
        QueryBundle: A bundle containing the named query, the query object, and the endpoint.
    """
    if endpoint_name not in self.endpoints:
        raise ValueError(f"Invalid endpoint {endpoint_name}")

    endpoint = self.endpoints[endpoint_name]
    query = Query(
        name=named_query.name,
        query=named_query.sparql,
        lang="sparql",
        endpoint=endpoint.endpoint,
        limit=limit,
    )
    query.query = QueryPrefixMerger.merge_prefixes(named_query, query, endpoint, prefix_merger)
    if limit:
        query.query += f"\nLIMIT {limit}"
    return QueryBundle(named_query=named_query, query=query, endpoint=endpoint)

`execute_query(named_query, params_dict, endpoint_name='wikidata', limit=None, with_stats=True, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)`

execute the given named_query

Parameters:

Name	Type	Description	Default
`named_query(NamedQuery)`		the query to execute	required
`params_dict(Dict)`		the query parameters to apply (if any)	required
`endpoint_name(str)`		the endpoint where to the excute the query	required
`limit(int)`		the record limit for the results (if any)	required
`with_stats(bool)`		if True run the stats	required
`prefix_merger`	`QueryPrefixMerger`	prefix merger to use	`SIMPLE_MERGER`

Source code in snapquery/snapquery_core.py

def execute_query(
    self,
    named_query: NamedQuery,
    params_dict: Dict,
    endpoint_name: str = "wikidata",
    limit: int = None,
    with_stats: bool = True,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
):
    """
    execute the given named_query

    Args:
        named_query(NamedQuery): the query to execute
        params_dict(Dict): the query parameters to apply (if any)
        endpoint_name(str): the endpoint where to the excute the query
        limit(int): the record limit for the results (if any)
        with_stats(bool): if True run the stats
        prefix_merger: prefix merger to use
    """
    # Assemble the query bundle using the named query, endpoint, and limit
    query_bundle = self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)
    params = Params(query_bundle.query.query)
    if params.has_params:
        params.set(params_dict)
        query = params.apply_parameters()
        query_bundle.query.query = query
    if with_stats:
        # Execute the query
        results, stats = query_bundle.get_lod_with_stats()
        self.store_stats([stats])
    else:
        results = query_bundle.get_lod()
        stats = None
    return results, stats

`from_samples(db_path=None, force_init=False, with_backup=True, debug=False)` `classmethod`

Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

Parameters:

Name	Type	Description	Default
`db_path`	`Optional[str]`	Path to the database file. If None, the default path is used.	`None`
`force_init`	`bool`	If True, the existing database file is dropped and recreated, and backed up if with_backup is True.	`False`
`with_backup`	`bool`	If True and force_init is True, moves the database file to a backup location before reinitialization.	`True`
`debug`	`bool`	If True, enables debug mode which may provide additional logging.	`False`

Returns:

Name	Type	Description
`NamedQueryManager`	`NamedQueryManager`	An instance of the manager initialized with the database at `db_path`.

Source code in snapquery/snapquery_core.py

@classmethod
def from_samples(
    cls,
    db_path: Optional[str] = None,
    force_init: bool = False,
    with_backup: bool = True,
    debug: bool = False,
) -> "NamedQueryManager":
    """
    Creates and returns an instance of NamedQueryManager, optionally initializing it from sample data.

    Args:
        db_path (Optional[str]): Path to the database file. If None, the default path is used.
        force_init (bool): If True, the existing database file is dropped and recreated, and backed up if with_backup is True.
        with_backup (bool): If True and force_init is True, moves the database file to a backup location before reinitialization.
        debug (bool): If True, enables debug mode which may provide additional logging.

    Returns:
        NamedQueryManager: An instance of the manager initialized with the database at `db_path`.
    """
    if db_path is None:
        db_path = cls.get_cache_path()

    path_obj = Path(db_path)

    # Handle backup and force initialization
    if force_init and path_obj.exists():
        if with_backup:
            timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
            backup_path = path_obj.with_name(f"{path_obj.stem}-{timestamp}{path_obj.suffix}")
            path_obj.rename(backup_path)  # Move the existing file to backup

    nqm = NamedQueryManager(db_path=db_path, debug=debug)
    if force_init or not path_obj.exists() or path_obj.stat().st_size == 0:
        for source_class, pk in [
            (NamedQuery, "query_id"),
            (QueryStats, "stats_id"),
            (QueryDetails, "quer_id"),
        ]:
            # Fetch sample records from the specified class
            sample_records = cls.get_sample_records(source_class=source_class)

            # Define entity information dynamically based on the class and primary key
            entityInfo = EntityInfo(sample_records, name=source_class.__name__, primaryKey=pk)

            # Create and populate the table specific to each class
            nqm.sql_db.createTable(sample_records, source_class.__name__, withDrop=True)
            nqm.sql_db.store(sample_records, entityInfo, fixNone=True, replace=True)
        # store yaml defined entities to SQL database
        nqm.store_endpoints()
        nqm.store_graphs()
    return nqm

`get_all_queries(namespace='snapquery-examples', domain='wikidata.org', limit=None)`

Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching. Optionally limits the number of results.

Parameters:

Name	Type	Description	Default
`namespace`	`str`	Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.	`'snapquery-examples'`
`domain`	`str`	Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.	`'wikidata.org'`
`limit`	`int`	Maximum number of NamedQueries to retrieve, defaults to None for unlimited.	`None`

Returns:

Type	Description
`List[NamedQuery]`	List[NamedQuery]: A list of NamedQuery instances in the database.

Source code in snapquery/snapquery_core.py

    def get_all_queries(
        self,
        namespace: str = "snapquery-examples",
        domain: str = "wikidata.org",
        limit: int = None,  # Default limit is None, meaning no limit
    ) -> List[NamedQuery]:
        """
        Retrieves named queries stored in the database, filtered by domain and namespace with pattern matching.
        Optionally limits the number of results.

        Args:
            namespace (str): Namespace filter, supports wildcard '%', e.g., 'example%' for partial matches.
            domain (str): Domain filter, supports wildcard '%', e.g., 'wikidata%' for partial matches.
            limit (int): Maximum number of NamedQueries to retrieve, defaults to None for unlimited.

        Returns:
            List[NamedQuery]: A list of NamedQuery instances in the database.
        """
        sql_query = """SELECT * FROM NamedQuery
WHERE domain LIKE ? AND namespace LIKE ?
ORDER BY domain,namespace,name"""
        params = (f"{domain}%", f"{namespace}%")

        if limit is not None:
            sql_query += " LIMIT ?"
            params += (limit,)

        query_records = self.sql_db.query(sql_query, params)
        named_queries = []
        for record in query_records:
            named_query = NamedQuery.from_record(record)
            named_queries.append(named_query)

        return named_queries

`get_entity_info(source_class)`

Gets or creates EntityInfo for the given source class.

Source code in snapquery/snapquery_core.py

def get_entity_info(self, source_class: Type) -> EntityInfo:
    """
    Gets or creates EntityInfo for the given source class.
    """
    if source_class not in self.entity_infos:
        primary_key = self.primary_keys.get(source_class, None)
        sample_records = self.get_sample_records(source_class)
        self.entity_infos[source_class] = EntityInfo(
            sample_records,
            name=source_class.__name__,
            primaryKey=primary_key,
            debug=self.debug,
        )
    return self.entity_infos[source_class]

`get_namespaces()`

Retrieves all unique namespaces and the count of NamedQueries associated with each from the database, sorted by the count of queries from lowest to highest.

Returns:

Type	Description
`Dict[str, int]`	Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.

Source code in snapquery/snapquery_core.py

def get_namespaces(self) -> Dict[str, int]:
    """
    Retrieves all unique namespaces and the count of NamedQueries associated with each from the database,
    sorted by the count of queries from lowest to highest.

    Returns:
        Dict[str, int]: A dictionary where keys are namespaces and values are the counts of associated queries, sorted by count.
    """
    # Multi-line SQL query for better readability
    query = """
    SELECT domain,namespace, COUNT(*) AS query_count
    FROM NamedQuery
    GROUP BY domain,namespace
    ORDER BY COUNT(*)
    """
    result = self.sql_db.query(query)
    namespaces: Dict[str, int] = {}
    for row in result:
        domain = row["domain"]
        namespace = row["namespace"]
        count = int(row["query_count"])
        namespaces[f"{namespace}@{domain}"] = count
    return namespaces

`get_query(query_name, endpoint_name='wikidata', limit=None, prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)`

Get the query for the given parameters.

Parameters:

Name	Type	Description	Default
`query_name`	`QueryName`	(QueryName):a structured query name	required
`endpoint_name`	`str`	The name of the endpoint to send the SPARQL query to, default is 'wikidata'.	`'wikidata'`
`limit`	`int`	The query limit (if any).	`None`
`prefix_merger`	`QueryPrefixMerger`	Prefix merger to use	`SIMPLE_MERGER`

Returns: QueryBundle: named_query, query, and endpoint.

Source code in snapquery/snapquery_core.py

def get_query(
    self,
    query_name: QueryName,
    endpoint_name: str = "wikidata",
    limit: int = None,
    prefix_merger: QueryPrefixMerger = QueryPrefixMerger.SIMPLE_MERGER,
) -> QueryBundle:
    """
    Get the query for the given parameters.

    Args:
        query_name: (QueryName):a structured query name
        endpoint_name (str): The name of the endpoint to send the SPARQL query to, default is 'wikidata'.
        limit (int): The query limit (if any).
        prefix_merger: Prefix merger to use
    Returns:
        QueryBundle: named_query, query, and endpoint.
    """
    named_query = self.lookup(query_name=query_name)
    return self.as_query_bundle(named_query, endpoint_name, limit, prefix_merger)

`get_query_stats(query_id)`

Get query stats for the given query name Args: query_id: id of the query

Returns:

Type	Description
`list[QueryStats]`	list of query stats

Source code in snapquery/snapquery_core.py

def get_query_stats(self, query_id: str) -> list[QueryStats]:
    """
    Get query stats for the given query name
    Args:
        query_id: id of the query

    Returns:
        list of query stats
    """
    sql_query = """
    SELECT *
    FROM QueryStats
    WHERE query_id = ?
    """
    query_records = self.sql_db.query(sql_query, (query_id,))
    stats = []
    if query_records:
        for record in query_records:
            query_stat = QueryStats.from_record(record)
            stats.append(query_stat)
    return stats

`get_query_stats_by_context(context)`

Get query stats for the given query name Args: query_id: id of the query

Returns:

Type	Description
`list[QueryStats]`	list of query stats

Source code in snapquery/snapquery_core.py

def get_query_stats_by_context(self, context: str) -> list[QueryStats]:
    """
    Get query stats for the given query name
    Args:
        query_id: id of the query

    Returns:
        list of query stats
    """
    sql_query = """
    SELECT *
    FROM QueryStats
    WHERE context = ?
    """
    query_records = self.sql_db.query(sql_query, (context,))
    stats = [QueryStats.from_record(record) for record in query_records]
    return stats

`get_sample_records(source_class)` `classmethod`

Generates a list of dictionary records based on the sample instances provided by a source class. This method utilizes the get_samples method of the source class, which should return a dictionary of sample instances.

Parameters:

Name	Type	Description	Default
`source_class`	`Type`	The class from which to fetch sample instances. This class must implement a `get_samples` method that returns a dictionary of instances categorized by some key.	required

Returns:

Type	Description
`List[Dict[str, Any]]`	List[Dict[str, Any]]: A list of dictionaries where each dictionary is a record that corresponds to a sample instance from the source class.

Raises:

Type	Description
`AttributeError`	If the source_class does not have a `get_samples` method.

Source code in snapquery/snapquery_core.py

@classmethod
def get_sample_records(cls, source_class: Type) -> List[Dict[str, Any]]:
    """
    Generates a list of dictionary records based on the sample instances
    provided by a source class. This method utilizes the `get_samples` method
    of the source class, which should return a dictionary of sample instances.

    Args:
        source_class (Type): The class from which to fetch sample instances.
            This class must implement a `get_samples` method that returns
            a dictionary of instances categorized by some key.

    Returns:
        List[Dict[str, Any]]: A list of dictionaries where each dictionary
            is a record that corresponds to a sample instance from the source class.

    Raises:
        AttributeError: If the source_class does not have a `get_samples` method.
    """
    if not hasattr(source_class, "get_samples"):
        raise AttributeError(f"The class {source_class.__name__} must have a 'get_samples' method.")

    sample_instances = source_class.get_samples()
    list_of_records = []

    # Assuming each key in the returned dictionary of get_samples corresponds to a list of instances
    for instance_group in sample_instances.values():
        for instance in instance_group:
            # Ensure that the instance is a dataclass instance
            if is_dataclass(instance):
                record = asdict(instance)
                list_of_records.append(record)
            else:
                raise ValueError(f"The instance of class {source_class.__name__} is not a dataclass instance")

    return list_of_records

`lookup(query_name, lenient=True)`

lookup the named query for the given structured query name

Parameters:

Name	Type	Description	Default
`query_name(QueryName)`		the structured query name	required
`lenient(bool)`		if True handle multiple entry errors as warnings	required

Returns: NamedQuery: the named query

Source code in snapquery/snapquery_core.py

    def lookup(self, query_name: QueryName, lenient: bool = True) -> NamedQuery:
        """
        lookup the named query for the given structured query name


        Args:
            query_name(QueryName): the structured query name
            lenient(bool): if True handle multiple entry errors as warnings
        Returns:
            NamedQuery: the named query
        """
        qn = query_name
        query_id = qn.query_id
        sql_query = """SELECT
    *
FROM
    NamedQuery
WHERE
    query_id=?"""
        query_records = self.sql_db.query(sql_query, (query_id,))
        if not query_records:
            msg = f"NamedQuery not found for the specified query '{qn}'."
            raise ValueError(msg)

        query_count = len(query_records)
        if query_count != 1:
            msg = f"multiple entries ({query_count}) for query '{qn.name}' namespace '{qn.namespace} and domain '{qn.domain}' the id '{qn.query_id}' is not unique"
            if lenient:
                print(f"warning: {msg}")
            else:
                raise ValueError(msg)

        record = query_records[0]
        named_query = NamedQuery.from_record(record)
        return named_query

`store(lod, source_class=NamedQuery, with_create=False)`

Stores the given list of dictionaries in the database using entity information derived from a specified source class.

Parameters:

Name	Type	Description	Default
`lod`	`List[Dict[str, Any]]`	List of dictionaries that represent the records to be stored.	required
`source_class`	`Type`	The class from which the entity information is derived. This class should have an attribute or method that defines its primary key and must have a `__name__` attribute. with_create(bool): if True create the table	`NamedQuery`

Raises: AttributeError: If the source class does not have the necessary method or attribute to define the primary key.

Source code in snapquery/snapquery_core.py

def store(
    self,
    lod: List[Dict[str, Any]],
    source_class: Type = NamedQuery,
    with_create: bool = False,
) -> None:
    """
    Stores the given list of dictionaries in the database using entity information
    derived from a specified source class.

    Args:
        lod (List[Dict[str, Any]]): List of dictionaries that represent the records to be stored.
        source_class (Type): The class from which the entity information is derived. This class
            should have an attribute or method that defines its primary key and must have a `__name__` attribute.
            with_create(bool): if True create the table
    Raises:
        AttributeError: If the source class does not have the necessary method or attribute to define the primary key.
    """
    entity_info = self.get_entity_info(source_class)
    if with_create:
        self.sql_db.createTable4EntityInfo(entityInfo=entity_info, withDrop=True)
    # Store the list of dictionaries in the database using the defined entity information
    self.sql_db.store(lod, entity_info, fixNone=True, replace=True)

`store_endpoints(endpoints=None)`

Stores the given endpoints or self.endpoints into the SQL database.

Parameters:

Name	Type	Description	Default
`endpoints`	`Optional[Dict[str, LODStorageEndpoint]]`	A dictionary of endpoints to store. If None, uses self.endpoints.	`None`

Source code in snapquery/snapquery_core.py

def store_endpoints(self, endpoints: Optional[Dict[str, Endpoint]] = None):
    """
    Stores the given endpoints or self.endpoints into the SQL database.

    Args:
        endpoints (Optional[Dict[str, LODStorageEndpoint]]): A dictionary of endpoints to store.
            If None, uses self.endpoints.
    """
    # This is a compatiblity layer for pylodstorage Endpoints
    # as of 2024-06 pylodstorage Endpoint still uses @Jsonable which is
    # deprecated so we convert instances to our local endpoint modules Endpoint format
    # and use our store mechanism to create SQL records
    if endpoints is None:
        endpoints = self.endpoints

    endpoints_lod = []
    for endpoint_name, lod_endpoint in endpoints.items():
        # Create a dictionary with only the attributes that exist in lod_endpoint
        endpoint_dict = {
            "name": endpoint_name,
            "lang": getattr(lod_endpoint, "lang", None),
            "endpoint": getattr(lod_endpoint, "endpoint", None),
            "website": getattr(lod_endpoint, "website", None),
            "database": getattr(lod_endpoint, "database", None),
            "method": getattr(lod_endpoint, "method", None),
            "prefixes": getattr(lod_endpoint, "prefixes", None),
            "auth": getattr(lod_endpoint, "auth", None),
            "user": getattr(lod_endpoint, "user", None),
            "password": getattr(lod_endpoint, "password", None),
        }

        # Remove None values
        endpoint_dict = {k: v for k, v in endpoint_dict.items() if v is not None}

        # Create SnapQueryEndpoint instance with only the available attributes
        snap_endpoint = SnapQueryEndpoint(**endpoint_dict)
        endpoints_lod.append(asdict(snap_endpoint))

    # Store the list of dictionaries in the database
    self.store(lod=endpoints_lod, source_class=SnapQueryEndpoint, with_create=True)

`store_graphs(gm=None)`

Stores all graphs managed by the given GraphManager into my SQL database

Source code in snapquery/snapquery_core.py

def store_graphs(self, gm: GraphManager = None):
    """
    Stores all graphs managed by the given GraphManager into my
    SQL database
    """
    if gm is None:
        gm = self.gm

    lod = [asdict(graph) for graph in gm]  # Convert each Graph instance to a dictionary using asdict()

    self.store(lod=lod, source_class=Graph, with_create=True)

`store_named_query_list(nq_set)`

store the given named query set

Parameters:

Name	Type	Description	Default
`nq_list`		NamedQueryList	required

Source code in snapquery/snapquery_core.py

def store_named_query_list(self, nq_set: NamedQuerySet):
    """
    store the given named query set

    Args:
        nq_list: NamedQueryList
    """
    lod = []
    for nq in nq_set.queries:
        lod.append(asdict(nq))
    self.store(lod=lod)

`store_query_details_list(qd_list)`

Stores a list of QueryDetails instances into the database. This function converts each QueryDetails instance into a dictionary and then stores the entire list of dictionaries. It utilizes the 'store' method to handle database operations based on the entity information derived from the QueryDetails class.

Parameters:

Name	Type	Description	Default
`qd_list`	`List[QueryDetails]`	List of QueryDetails instances to be stored.	required

Source code in snapquery/snapquery_core.py

def store_query_details_list(self, qd_list: List[QueryDetails]):
    """
    Stores a list of QueryDetails instances into the database. This function converts
    each QueryDetails instance into a dictionary and then stores the entire list of dictionaries.
    It utilizes the 'store' method to handle database operations based on the entity information
    derived from the QueryDetails class.

    Args:
        qd_list (List[QueryDetails]): List of QueryDetails instances to be stored.
    """
    qd_lod = []
    for qd in qd_list:
        qd_lod.append(asdict(qd))
    self.store(lod=qd_lod, source_class=QueryDetails)

`store_stats(stats_list)`

store the given list of query statistics

Source code in snapquery/snapquery_core.py

def store_stats(self, stats_list: List[QueryStats]):
    """
    store the given list of query statistics
    """
    stats_lod = []
    for stats in stats_list:
        stats_lod.append(asdict(stats))
    self.store(lod=stats_lod, source_class=QueryStats)

`NamedQuerySet`

a list/set of named queries which defines a namespace

Source code in snapquery/snapquery_core.py

@lod_storable
class NamedQuerySet:
    """
    a list/set of named queries which defines a namespace
    """

    domain: str  # the domain of this NamedQuerySet
    namespace: str  # the namespace

    target_graph_name: str  # the name of the target graph
    queries: List[NamedQuery] = field(default_factory=list)

    def __len__(self):
        return len(self.queries)

    def __post_init__(self):
        """
        Initialize the dictionary after the object is created
        """
        self._query_dict = {query.query_id: query for query in self.queries}

    def add(self, query: NamedQuery):
        """
        Add a query to both the list and dictionary
        """
        if query.query_id not in self._query_dict:
            self.queries.append(query)
            self._query_dict[query.query_id] = query

`__post_init__()`

Initialize the dictionary after the object is created

Source code in snapquery/snapquery_core.py

def __post_init__(self):
    """
    Initialize the dictionary after the object is created
    """
    self._query_dict = {query.query_id: query for query in self.queries}

`add(query)`

Add a query to both the list and dictionary

Source code in snapquery/snapquery_core.py

def add(self, query: NamedQuery):
    """
    Add a query to both the list and dictionary
    """
    if query.query_id not in self._query_dict:
        self.queries.append(query)
        self._query_dict[query.query_id] = query

`QueryBundle`

Bundles a named query, a query, and an endpoint into a single manageable object, facilitating the execution of SPARQL queries.

Attributes:

Name	Type	Description
`named_query`	`NamedQuery`	The named query object, which includes metadata about the query.
`query`	`Query`	The actual query object that contains the SPARQL query string.
`endpoint`	`Endpoint`	The endpoint object where the SPARQL query should be executed.
`sparql`	`SPARQL`	A SPARQL service object initialized with the endpoint URL.

Source code in snapquery/snapquery_core.py

class QueryBundle:
    """
    Bundles a named query, a query, and an endpoint into a single manageable object, facilitating the execution of SPARQL queries.

    Attributes:
        named_query (NamedQuery): The named query object, which includes metadata about the query.
        query (Query): The actual query object that contains the SPARQL query string.
        endpoint (Endpoint): The endpoint object where the SPARQL query should be executed.
        sparql (SPARQL): A SPARQL service object initialized with the endpoint URL.
    """

    def __init__(self, named_query: NamedQuery, query: Query, endpoint: Endpoint = None):
        """
        Initializes a new instance of the QueryBundle class.

        Args:
            named_query (NamedQuery): An instance of NamedQuery that provides a named reference to the query.
            query (Query): An instance of Query containing the SPARQL query string.
            endpoint (Endpoint): An instance of Endpoint representing the SPARQL endpoint URL.
        """
        self.named_query = named_query
        self.query = query
        self.update_endpoint(endpoint)

    def update_endpoint(self, endpoint):
        self.endpoint = endpoint
        if endpoint:
            self.sparql = SPARQL(endpoint.endpoint, method=self.endpoint.method)

    def raw_query(self, resultFormat, mime_type: str = None, timeout: float = 10.0):
        """
        returns raw result of the endpoint

        Args:
            resultFormat (str): format of the result
            mime_type (str): mime_type to use (if any)
            timeout (float): timeout in seconds

        Returns:
            raw result of the query
        """
        params = {"query": self.query.query, "format": resultFormat}
        payload = {}
        if mime_type:
            headers = {"Accept": mime_type}
        else:
            headers = {}
        endpoint_url = self.endpoint.endpoint
        method = self.endpoint.method
        response = requests.request(
            method,
            endpoint_url,
            headers=headers,
            data=payload,
            params=params,
            timeout=timeout,
        )
        return response.text

    def get_lod(self) -> List[dict]:
        """
        Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

        Returns:
            List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
        """
        lod = self.sparql.queryAsListOfDicts(self.query.query)
        return lod

    def get_lod_with_stats(self) -> tuple[list[dict], QueryStats]:
        """
        Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

        Returns:
            List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
        """
        logger.info(f"Querying {self.endpoint.name} with query {self.named_query.name}")
        query_stat = QueryStats(query_id=self.named_query.query_id, endpoint_name=self.endpoint.name)
        try:
            lod = self.sparql.queryAsListOfDicts(self.query.query)
            query_stat.records = len(lod) if lod else -1
            query_stat.done()
        except Exception as ex:
            lod = []
            logger.debug(f"Execution of query failed: {ex}")
            query_stat.error(ex)
        return (lod, query_stat)

    def format_result(
        self,
        qlod: List[Dict[str, Any]] = None,
        r_format: Format = Format.json,
    ) -> Optional[str]:
        """
        Formats the query results based on the specified format and prints them.

        Args:
            qlod (List[Dict[str, Any]]): The list of dictionaries that represent the query results.
            query (Query): The query object which contains details like the endpoint and the database.
            r_format (Format): The format in which to print the results.

        Returns:
            Optional[str]: The formatted string representation of the query results, or None if printed directly.
        """
        if qlod is None:
            qlod = self.get_lod()
        if r_format is None:
            r_format = Format.json
        if r_format == Format.csv:
            csv_output = CSV.toCSV(qlod)
            return csv_output
        elif r_format in [Format.latex, Format.github, Format.mediawiki, Format.html]:
            doc = self.query.documentQueryResult(qlod, tablefmt=str(r_format), floatfmt=".1f")
            return doc.asText()
        elif r_format == Format.json:
            return json.dumps(qlod, indent=2, sort_keys=True, default=str)
        return None  # In case no format is matched or needed

    def set_limit(self, limit: int = None):
        """
        set the limit of my query

        Args:
            limit(int): the limit to set - default: None
        """
        if limit:
            sparql_query = self.query.query
            # @TODO - this is too naive for cases where
            # there are SPARQL elements hat have a "limit" in the name e.g. "height_limit"
            # or if there is a LIMIT in a subquery
            if "limit" in sparql_query or "LIMIT" in sparql_query:
                sparql_query = re.sub(r"(limit|LIMIT)\s+(\d+)", f"LIMIT {limit}", sparql_query)
            else:
                sparql_query += f"\nLIMIT {limit}"
            self.query.query = sparql_query

`init(named_query, query, endpoint=None)`

Initializes a new instance of the QueryBundle class.

Parameters:

Name	Type	Description	Default
`named_query`	`NamedQuery`	An instance of NamedQuery that provides a named reference to the query.	required
`query`	`Query`	An instance of Query containing the SPARQL query string.	required
`endpoint`	`Endpoint`	An instance of Endpoint representing the SPARQL endpoint URL.	`None`

Source code in snapquery/snapquery_core.py

def __init__(self, named_query: NamedQuery, query: Query, endpoint: Endpoint = None):
    """
    Initializes a new instance of the QueryBundle class.

    Args:
        named_query (NamedQuery): An instance of NamedQuery that provides a named reference to the query.
        query (Query): An instance of Query containing the SPARQL query string.
        endpoint (Endpoint): An instance of Endpoint representing the SPARQL endpoint URL.
    """
    self.named_query = named_query
    self.query = query
    self.update_endpoint(endpoint)

`format_result(qlod=None, r_format=Format.json)`

Formats the query results based on the specified format and prints them.

Parameters:

Name	Type	Description	Default
`qlod`	`List[Dict[str, Any]]`	The list of dictionaries that represent the query results.	`None`
`query`	`Query`	The query object which contains details like the endpoint and the database.	required
`r_format`	`Format`	The format in which to print the results.	`json`

Returns:

Type	Description
`Optional[str]`	Optional[str]: The formatted string representation of the query results, or None if printed directly.

Source code in snapquery/snapquery_core.py

def format_result(
    self,
    qlod: List[Dict[str, Any]] = None,
    r_format: Format = Format.json,
) -> Optional[str]:
    """
    Formats the query results based on the specified format and prints them.

    Args:
        qlod (List[Dict[str, Any]]): The list of dictionaries that represent the query results.
        query (Query): The query object which contains details like the endpoint and the database.
        r_format (Format): The format in which to print the results.

    Returns:
        Optional[str]: The formatted string representation of the query results, or None if printed directly.
    """
    if qlod is None:
        qlod = self.get_lod()
    if r_format is None:
        r_format = Format.json
    if r_format == Format.csv:
        csv_output = CSV.toCSV(qlod)
        return csv_output
    elif r_format in [Format.latex, Format.github, Format.mediawiki, Format.html]:
        doc = self.query.documentQueryResult(qlod, tablefmt=str(r_format), floatfmt=".1f")
        return doc.asText()
    elif r_format == Format.json:
        return json.dumps(qlod, indent=2, sort_keys=True, default=str)
    return None  # In case no format is matched or needed

`get_lod()`

Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

Returns:

Type	Description
`List[dict]`	List[dict]: A list where each dictionary represents a row of results from the SPARQL query.

Source code in snapquery/snapquery_core.py

def get_lod(self) -> List[dict]:
    """
    Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

    Returns:
        List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
    """
    lod = self.sparql.queryAsListOfDicts(self.query.query)
    return lod

`get_lod_with_stats()`

Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

Returns:

Type	Description
`tuple[list[dict], QueryStats]`	List[dict]: A list where each dictionary represents a row of results from the SPARQL query.

Source code in snapquery/snapquery_core.py

def get_lod_with_stats(self) -> tuple[list[dict], QueryStats]:
    """
    Executes the stored query using the SPARQL service and returns the results as a list of dictionaries.

    Returns:
        List[dict]: A list where each dictionary represents a row of results from the SPARQL query.
    """
    logger.info(f"Querying {self.endpoint.name} with query {self.named_query.name}")
    query_stat = QueryStats(query_id=self.named_query.query_id, endpoint_name=self.endpoint.name)
    try:
        lod = self.sparql.queryAsListOfDicts(self.query.query)
        query_stat.records = len(lod) if lod else -1
        query_stat.done()
    except Exception as ex:
        lod = []
        logger.debug(f"Execution of query failed: {ex}")
        query_stat.error(ex)
    return (lod, query_stat)

`raw_query(resultFormat, mime_type=None, timeout=10.0)`

returns raw result of the endpoint

Parameters:

Name	Type	Description	Default
`resultFormat`	`str`	format of the result	required
`mime_type`	`str`	mime_type to use (if any)	`None`
`timeout`	`float`	timeout in seconds	`10.0`

Returns:

Type	Description
	raw result of the query

Source code in snapquery/snapquery_core.py

def raw_query(self, resultFormat, mime_type: str = None, timeout: float = 10.0):
    """
    returns raw result of the endpoint

    Args:
        resultFormat (str): format of the result
        mime_type (str): mime_type to use (if any)
        timeout (float): timeout in seconds

    Returns:
        raw result of the query
    """
    params = {"query": self.query.query, "format": resultFormat}
    payload = {}
    if mime_type:
        headers = {"Accept": mime_type}
    else:
        headers = {}
    endpoint_url = self.endpoint.endpoint
    method = self.endpoint.method
    response = requests.request(
        method,
        endpoint_url,
        headers=headers,
        data=payload,
        params=params,
        timeout=timeout,
    )
    return response.text

`set_limit(limit=None)`

set the limit of my query

Parameters:

Name	Type	Description	Default
`limit(int)`		the limit to set - default: None	required

Source code in snapquery/snapquery_core.py

def set_limit(self, limit: int = None):
    """
    set the limit of my query

    Args:
        limit(int): the limit to set - default: None
    """
    if limit:
        sparql_query = self.query.query
        # @TODO - this is too naive for cases where
        # there are SPARQL elements hat have a "limit" in the name e.g. "height_limit"
        # or if there is a LIMIT in a subquery
        if "limit" in sparql_query or "LIMIT" in sparql_query:
            sparql_query = re.sub(r"(limit|LIMIT)\s+(\d+)", f"LIMIT {limit}", sparql_query)
        else:
            sparql_query += f"\nLIMIT {limit}"
        self.query.query = sparql_query

`QueryDetails`

Details for a named query

Source code in snapquery/snapquery_core.py

@lod_storable
class QueryDetails:
    """
    Details for a named query
    """

    query_id: str
    params: str  # e.g. q - q1,q2,
    default_params: str  # e.g. Q80 - Q58631663, Q125422124
    default_param_types: str  # e.g. Q5 - Q191067,Q43229
    param_count: int
    lines: int
    size: int

    @classmethod
    def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails":
        """
        Creates an instance of QueryDetails from a SPARQL query string.

        This method parses the SPARQL query to determine the number of lines and the size of the query.
        It also identifies and lists the parameters used within the SPARQL query.

        Args:
            query_id (str): The identifier of the query.
            sparql (str): The SPARQL query string from which to generate the query details.

        Returns:
            QueryDetails: An instance containing details about the SPARQL query.
        """
        # Calculate the number of lines and the size of the sparql string
        lines = sparql.count("\n") + 1
        size = len(sparql.encode("utf-8"))

        # Example to extract parameters - this may need to be replaced with actual parameter extraction logic
        sparql_params = Params(
            query=sparql
        )  # Assuming Params is a class that can parse SPARQL queries to extract parameters
        params = ",".join(sparql_params.params) if sparql_params.params else None
        param_count = len(sparql_params.params)
        # @TODO get parameters
        default_params = None
        default_param_types = None
        # Create and return the QueryDetails instance
        return cls(
            query_id=query_id,
            params=params,
            default_params=default_params,
            default_param_types=default_param_types,
            param_count=param_count,
            lines=lines,
            size=size,
        )

    @classmethod
    def get_samples(cls) -> dict[str, "QueryDetails"]:
        """
        get samples
        """
        samples = {
            "snapquery-examples": [
                QueryDetails(
                    query_id="scholia.test",
                    params="q",
                    default_params="Q80",
                    default_param_types="Q5",
                    param_count=1,
                    lines=1,
                    size=50,
                )
            ]
        }
        return samples

`from_sparql(query_id, sparql)` `classmethod`

Creates an instance of QueryDetails from a SPARQL query string.

This method parses the SPARQL query to determine the number of lines and the size of the query. It also identifies and lists the parameters used within the SPARQL query.

Parameters:

Name	Type	Description	Default
`query_id`	`str`	The identifier of the query.	required
`sparql`	`str`	The SPARQL query string from which to generate the query details.	required

Returns:

Name	Type	Description
`QueryDetails`	`QueryDetails`	An instance containing details about the SPARQL query.

Source code in snapquery/snapquery_core.py

@classmethod
def from_sparql(cls, query_id: str, sparql: str) -> "QueryDetails":
    """
    Creates an instance of QueryDetails from a SPARQL query string.

    This method parses the SPARQL query to determine the number of lines and the size of the query.
    It also identifies and lists the parameters used within the SPARQL query.

    Args:
        query_id (str): The identifier of the query.
        sparql (str): The SPARQL query string from which to generate the query details.

    Returns:
        QueryDetails: An instance containing details about the SPARQL query.
    """
    # Calculate the number of lines and the size of the sparql string
    lines = sparql.count("\n") + 1
    size = len(sparql.encode("utf-8"))

    # Example to extract parameters - this may need to be replaced with actual parameter extraction logic
    sparql_params = Params(
        query=sparql
    )  # Assuming Params is a class that can parse SPARQL queries to extract parameters
    params = ",".join(sparql_params.params) if sparql_params.params else None
    param_count = len(sparql_params.params)
    # @TODO get parameters
    default_params = None
    default_param_types = None
    # Create and return the QueryDetails instance
    return cls(
        query_id=query_id,
        params=params,
        default_params=default_params,
        default_param_types=default_param_types,
        param_count=param_count,
        lines=lines,
        size=size,
    )

`get_samples()` `classmethod`

get samples

Source code in snapquery/snapquery_core.py

@classmethod
def get_samples(cls) -> dict[str, "QueryDetails"]:
    """
    get samples
    """
    samples = {
        "snapquery-examples": [
            QueryDetails(
                query_id="scholia.test",
                params="q",
                default_params="Q80",
                default_param_types="Q5",
                param_count=1,
                lines=1,
                size=50,
            )
        ]
    }
    return samples

`QueryName`

A structured query name with a fully qualifying query id that is URL-friendly Attributes: domain(str): the domain of the owner of this namespace namespace (str): The namespace of the query, which helps in categorizing the query. name (str): The unique name or identifier of the query within its namespace. query_id(str): encoded id e.g. cats--examples@wikidata.org

Source code in snapquery/snapquery_core.py

@lod_storable
class QueryName:
    """
    A structured query name with a fully qualifying query id that is URL-friendly
    Attributes:
        domain(str): the domain of the owner of this namespace
        namespace (str): The namespace of the query, which helps in categorizing the query.
        name (str): The unique name or identifier of the query within its namespace.
        query_id(str): encoded id e.g. cats--examples@wikidata.org
    """

    # name
    name: str
    # namespace
    namespace: str = "examples"
    # domain
    domain: str = "wikidata.org"
    # query_id
    query_id: str = field(init=False)

    def __post_init__(self):
        self.query_id = self.get_query_id(self.name, self.namespace, self.domain)

    @classmethod
    def get_query_id(cls, name: str, namespace: str, domain: str) -> str:
        """
        Generate a URL-friendly query_id
        """
        # Convert None to empty string (or use any other default logic)
        name, namespace, domain = (name or ""), (namespace or ""), (domain or "")

        # Apply slugify with Unicode support and basic cleanup
        encoded_name = slugify(name, allow_unicode=True)

        # Create a combined query_id
        query_id = f"{encoded_name}--{namespace}@{domain}"

        return query_id

    @classmethod
    def from_query_id(
        cls,
        query_id: str,
        namespace: str = "examples",  # default namespace
        domain: str = "wikidata.org",  # default domain
    ) -> "QueryName":
        """
        Parse a URL-friendly query_id string into a QueryName object.
        Args:
            query_id (str): The URL-friendly query_id string to parse.
            namespace (str): Default namespace if not specified in query_id
            domain (str): Default domain if not specified in query_id
        Returns:
            QueryName: A QueryName object containing name, namespace, and domain.
        """
        parts = query_id.split("--")
        name = urllib.parse.unquote(parts[0])

        if len(parts) > 1:
            ns_domain = parts[1].split("@")
            namespace = urllib.parse.unquote(ns_domain[0])
            if len(ns_domain) > 1:
                domain = urllib.parse.unquote(ns_domain[1])
        return cls(name=name, namespace=namespace, domain=domain)

    def to_dict(self) -> dict:
        """
        Convert the QueryName object to a dictionary
        """
        return {
            "name": self.name,
            "namespace": self.namespace,
            "domain": self.domain,
            "query_id": self.query_id,
        }

`from_query_id(query_id, namespace='examples', domain='wikidata.org')` `classmethod`

Parse a URL-friendly query_id string into a QueryName object. Args: query_id (str): The URL-friendly query_id string to parse. namespace (str): Default namespace if not specified in query_id domain (str): Default domain if not specified in query_id Returns: QueryName: A QueryName object containing name, namespace, and domain.

Source code in snapquery/snapquery_core.py

@classmethod
def from_query_id(
    cls,
    query_id: str,
    namespace: str = "examples",  # default namespace
    domain: str = "wikidata.org",  # default domain
) -> "QueryName":
    """
    Parse a URL-friendly query_id string into a QueryName object.
    Args:
        query_id (str): The URL-friendly query_id string to parse.
        namespace (str): Default namespace if not specified in query_id
        domain (str): Default domain if not specified in query_id
    Returns:
        QueryName: A QueryName object containing name, namespace, and domain.
    """
    parts = query_id.split("--")
    name = urllib.parse.unquote(parts[0])

    if len(parts) > 1:
        ns_domain = parts[1].split("@")
        namespace = urllib.parse.unquote(ns_domain[0])
        if len(ns_domain) > 1:
            domain = urllib.parse.unquote(ns_domain[1])
    return cls(name=name, namespace=namespace, domain=domain)

`get_query_id(name, namespace, domain)` `classmethod`

Generate a URL-friendly query_id

Source code in snapquery/snapquery_core.py

@classmethod
def get_query_id(cls, name: str, namespace: str, domain: str) -> str:
    """
    Generate a URL-friendly query_id
    """
    # Convert None to empty string (or use any other default logic)
    name, namespace, domain = (name or ""), (namespace or ""), (domain or "")

    # Apply slugify with Unicode support and basic cleanup
    encoded_name = slugify(name, allow_unicode=True)

    # Create a combined query_id
    query_id = f"{encoded_name}--{namespace}@{domain}"

    return query_id

`to_dict()`

Convert the QueryName object to a dictionary

Source code in snapquery/snapquery_core.py

def to_dict(self) -> dict:
    """
    Convert the QueryName object to a dictionary
    """
    return {
        "name": self.name,
        "namespace": self.namespace,
        "domain": self.domain,
        "query_id": self.query_id,
    }

`QueryNameSet`

Manages a set of QueryNames filtered by domain and namespaces SQL like patterns

Attributes:

nqm (NamedQueryManager): A manager to handle named queries and interactions with the database.
limit(int): the maximum number of names and top_queries

Calculated on update

total (int): Total number of queries that match the current filter criteria. domains (set): A set of domains that match the current filter criteria. namespaces (set): A set of namespaces that match the current filter criteria. names (set): A set of names that match the current filter criteria. top_queries (list): List of top queries based on the specified limit.

Source code in snapquery/snapquery_core.py

class QueryNameSet:
    """
    Manages a set of QueryNames filtered by domain and namespaces SQL like patterns

    Attributes:

        nqm (NamedQueryManager): A manager to handle named queries and interactions with the database.
        limit(int): the maximum number of names and top_queries

    Calculated on update:
        total (int): Total number of queries that match the current filter criteria.
        domains (set): A set of domains that match the current filter criteria.
        namespaces (set): A set of namespaces that match the current filter criteria.
        names (set): A set of names that match the current filter criteria.
        top_queries (list): List of top queries based on the specified limit.
    """

    def __init__(self, nqm: "NamedQueryManager", limit: int = None):
        self.nqm = nqm
        self.limit = limit
        self.total = 0
        self.domains = set()
        self.namespaces = set()
        self.names = set()
        self.update("", "")

    def __str__(self):
        return (
            f"QueryNameSet(Total: {self.total}, Domains: {sorted(self.domains)}, "
            f"Namespaces: {sorted(self.namespaces)}, Names: {sorted(self.names)}, "
            f"Top Queries: [{', '.join(query.name for query in self.top_queries)}])"
        )

    def update(self, domain: str, namespace: str, limit: int = None):
        """
        update my attributes

        Args:
            domain (str): The domain part of the filter, supports SQL-like wildcards.
            namespace (str): The namespace part of the filter, supports SQL-like wildcards.
            limit (int, optional): Maximum number of queries to fetch. If None, no limit is applied.

        """
        if limit is None:
            limit = self.limit
        query = self.nqm.meta_qm.queriesByName["domain_namespace_stats"]
        params = (f"{domain}%", f"{namespace}%")
        results = self.nqm.sql_db.query(query.query, params)

        self.total = 0  # Reset total for each update call
        self.domains.clear()  # Clear previous domains
        self.namespaces.clear()  # Clear previous namespaces
        self.names.clear()  # Clear previous names

        for record in results:
            self.domains.add(record["domain"])
            self.namespaces.add(record["namespace"])
            self.total += record["query_count"]
        self.top_queries = self.nqm.get_all_queries(namespace=namespace, domain=domain, limit=limit)
        for query in self.top_queries:
            self.names.add(query.name)

`update(domain, namespace, limit=None)`

update my attributes

Parameters:

Name	Type	Description	Default
`domain`	`str`	The domain part of the filter, supports SQL-like wildcards.	required
`namespace`	`str`	The namespace part of the filter, supports SQL-like wildcards.	required
`limit`	`int`	Maximum number of queries to fetch. If None, no limit is applied.	`None`

Source code in snapquery/snapquery_core.py

def update(self, domain: str, namespace: str, limit: int = None):
    """
    update my attributes

    Args:
        domain (str): The domain part of the filter, supports SQL-like wildcards.
        namespace (str): The namespace part of the filter, supports SQL-like wildcards.
        limit (int, optional): Maximum number of queries to fetch. If None, no limit is applied.

    """
    if limit is None:
        limit = self.limit
    query = self.nqm.meta_qm.queriesByName["domain_namespace_stats"]
    params = (f"{domain}%", f"{namespace}%")
    results = self.nqm.sql_db.query(query.query, params)

    self.total = 0  # Reset total for each update call
    self.domains.clear()  # Clear previous domains
    self.namespaces.clear()  # Clear previous namespaces
    self.names.clear()  # Clear previous names

    for record in results:
        self.domains.add(record["domain"])
        self.namespaces.add(record["namespace"])
        self.total += record["query_count"]
    self.top_queries = self.nqm.get_all_queries(namespace=namespace, domain=domain, limit=limit)
    for query in self.top_queries:
        self.names.add(query.name)

`QueryPrefixMerger`

Bases: Enum

SPARQL Query prefix merger

Source code in snapquery/snapquery_core.py

class QueryPrefixMerger(Enum):
    """
    SPARQL Query prefix merger
    """

    RAW = "raw"
    SIMPLE_MERGER = "simple merger"
    ANALYSIS_MERGER = "analysis merger"

    @classmethod
    def _missing_(cls, key):
        return cls.default_merger()

    @classmethod
    def default_merger(cls) -> "QueryPrefixMerger":
        return cls.SIMPLE_MERGER

    @classmethod
    def get_by_name(cls, name: str) -> "QueryPrefixMerger":
        merger_map = {merger.name: merger.value for merger in QueryPrefixMerger}
        merger_value = merger_map.get(name, None)
        merger = QueryPrefixMerger(merger_value)
        return merger

    @classmethod
    def merge_prefixes(
        cls, named_query: NamedQuery, query: Query, endpoint: Endpoint, merger: "QueryPrefixMerger"
    ) -> str:
        """
        Merge prefixes with the given merger
        Args:
            named_query (NamedQuery):
            query (Query):
            endpoint (Endpoint):
            merger (QueryPrefixMerger):

        Returns:
            merged query
        """
        if merger == QueryPrefixMerger.SIMPLE_MERGER:
            return cls.simple_prefix_merger(query.query, endpoint)
        elif merger == QueryPrefixMerger.ANALYSIS_MERGER:
            return cls.analysis_prefix_merger(query.query)
        else:
            return query.query

    @classmethod
    def simple_prefix_merger(cls, query_str: str, endpoint: Endpoint) -> str:
        """
        Simple prefix merger
        Args:
            query_str (str): the query string
            endpoint (Endpoint): the endpoint

        Returns:
            merged query
        """
        prefixes = endpoint.prefixes if hasattr(endpoint, "prefixes") else None
        merged_query = query_str
        if prefixes:
            merged_query = f"{prefixes}\n{merged_query}"
        return merged_query

    @classmethod
    def analysis_prefix_merger(cls, query_str: str) -> str:
        """
        Analysis prefix merger
        Args:
            query_str

        Returns:
            merged query
        """
        merged_query = SparqlAnalyzer.add_missing_prefixes(query_str)
        return merged_query

`analysis_prefix_merger(query_str)` `classmethod`

Analysis prefix merger Args: query_str

Returns:

Type	Description
`str`	merged query

Source code in snapquery/snapquery_core.py

@classmethod
def analysis_prefix_merger(cls, query_str: str) -> str:
    """
    Analysis prefix merger
    Args:
        query_str

    Returns:
        merged query
    """
    merged_query = SparqlAnalyzer.add_missing_prefixes(query_str)
    return merged_query

`merge_prefixes(named_query, query, endpoint, merger)` `classmethod`

Merge prefixes with the given merger Args: named_query (NamedQuery): query (Query): endpoint (Endpoint): merger (QueryPrefixMerger):

Returns:

Type	Description
`str`	merged query

Source code in snapquery/snapquery_core.py

@classmethod
def merge_prefixes(
    cls, named_query: NamedQuery, query: Query, endpoint: Endpoint, merger: "QueryPrefixMerger"
) -> str:
    """
    Merge prefixes with the given merger
    Args:
        named_query (NamedQuery):
        query (Query):
        endpoint (Endpoint):
        merger (QueryPrefixMerger):

    Returns:
        merged query
    """
    if merger == QueryPrefixMerger.SIMPLE_MERGER:
        return cls.simple_prefix_merger(query.query, endpoint)
    elif merger == QueryPrefixMerger.ANALYSIS_MERGER:
        return cls.analysis_prefix_merger(query.query)
    else:
        return query.query

`simple_prefix_merger(query_str, endpoint)` `classmethod`

Simple prefix merger Args: query_str (str): the query string endpoint (Endpoint): the endpoint

Returns:

Type	Description
`str`	merged query

Source code in snapquery/snapquery_core.py

@classmethod
def simple_prefix_merger(cls, query_str: str, endpoint: Endpoint) -> str:
    """
    Simple prefix merger
    Args:
        query_str (str): the query string
        endpoint (Endpoint): the endpoint

    Returns:
        merged query
    """
    prefixes = endpoint.prefixes if hasattr(endpoint, "prefixes") else None
    merged_query = query_str
    if prefixes:
        merged_query = f"{prefixes}\n{merged_query}"
    return merged_query

`QueryStats`

statistics about a query

Source code in snapquery/snapquery_core.py

@lod_storable
class QueryStats:
    """
    statistics about a query
    """

    stats_id: str = field(init=False)
    query_id: str  # foreign key
    endpoint_name: str  # foreign key

    context: Optional[str] = None  # a context for the query stats
    records: Optional[int] = None
    time_stamp: datetime.datetime = field(init=False)
    duration: Optional[float] = field(init=False, default=None)  # duration in seconds
    error_msg: Optional[str] = None
    error_category: Optional[str] = None

    filtered_msg: Optional[str] = None

    def __post_init__(self):
        """
        Post-initialization processing to construct a unique identifier for the query
        and record the timestamp when the query stats object is created.
        """
        self.stats_id = str(uuid.uuid4())
        self.time_stamp = datetime.datetime.now()

    def done(self):
        """
        Set the duration by calculating the elapsed time since the `time_stamp`.
        """
        self.duration = (datetime.datetime.now() - self.time_stamp).total_seconds()

    def apply_error_filter(self, for_html: bool = False) -> ErrorFilter:
        """
        Applies an error filter to the error message and sets the filtered message.

        Args:
            for_html (bool): If True, formats the message for HTML output.

        Returns:
            ErrorFilter: the error filter that has been applied
        """
        error_filter = ErrorFilter(self.error_msg)
        self.filtered_msg = error_filter.get_message(for_html=for_html)
        self.error_category = error_filter.category
        return error_filter

    def error(self, ex: Exception):
        """
        Handle exception of query
        """
        self.duration = None
        self.error_msg = str(ex)
        self.apply_error_filter()

    @classmethod
    def from_record(cls, record: Dict) -> "QueryStats":
        """
        Class method to instantiate NamedQuery
        from a dictionary record.
        """
        stat = cls(
            query_id=record.get("query_id", None),
            endpoint_name=record.get("endpoint_name", None),
            records=record.get("records", None),
            error_msg=record.get("error_msg", None),
            error_category=record.get("error_category", None),
            filtered_msg=record.get("filtered_msg", None),
        )
        stat.stats_id = record.get("stats_id", stat.stats_id)
        stat.time_stamp = record.get("time_stamp", stat.time_stamp)
        stat.duration = record.get("duration", None)
        return stat

    def as_record(self) -> Dict:
        """
        convert my declared attributes to a dict
        @TODO may be use asdict from dataclasses instead?
        """
        record = {}
        for _field in fields(self):
            # Include field in the record dictionary if it has already been initialized (i.e., not None or has default)
            if hasattr(self, _field.name):
                record[_field.name] = getattr(self, _field.name)
        return record

    @classmethod
    def get_samples(cls) -> dict[str, "QueryStats"]:
        """
        get samples for QueryStats
        """
        samples = {
            "snapquery-examples": [
                cls(
                    query_id="horses--snapquery-examples@wikidata.org",
                    endpoint_name="wikidata",
                    context="samples",
                    records=0,
                    error_msg="HTTP Error 504: Query has timed out.",
                    filtered_msg="Timeout: HTTP Error 504: Query has timed out.",
                    error_category="Timeout",
                ),
                cls(
                    query_id="cats--snapquery-examples@wikidata.org",
                    endpoint_name="wikidata",
                    context="samples",
                    records=223,
                    error_msg="",
                    error_category=None,
                    filtered_msg="",
                ),
            ]
        }
        # Set the duration for each sample instance
        for sample_list in samples.values():
            for sample in sample_list:
                sample.duration = 0.5
        return samples

    def is_successful(self) -> bool:
        """
        Returns True if the query was successful
        """
        return self.duration and self.error_msg is None

`__post_init__()`

Post-initialization processing to construct a unique identifier for the query and record the timestamp when the query stats object is created.

Source code in snapquery/snapquery_core.py

def __post_init__(self):
    """
    Post-initialization processing to construct a unique identifier for the query
    and record the timestamp when the query stats object is created.
    """
    self.stats_id = str(uuid.uuid4())
    self.time_stamp = datetime.datetime.now()

`apply_error_filter(for_html=False)`

Applies an error filter to the error message and sets the filtered message.

Parameters:

Name	Type	Description	Default
`for_html`	`bool`	If True, formats the message for HTML output.	`False`

Returns:

Name	Type	Description
`ErrorFilter`	`ErrorFilter`	the error filter that has been applied

Source code in snapquery/snapquery_core.py

def apply_error_filter(self, for_html: bool = False) -> ErrorFilter:
    """
    Applies an error filter to the error message and sets the filtered message.

    Args:
        for_html (bool): If True, formats the message for HTML output.

    Returns:
        ErrorFilter: the error filter that has been applied
    """
    error_filter = ErrorFilter(self.error_msg)
    self.filtered_msg = error_filter.get_message(for_html=for_html)
    self.error_category = error_filter.category
    return error_filter

`as_record()`

convert my declared attributes to a dict @TODO may be use asdict from dataclasses instead?

Source code in snapquery/snapquery_core.py

def as_record(self) -> Dict:
    """
    convert my declared attributes to a dict
    @TODO may be use asdict from dataclasses instead?
    """
    record = {}
    for _field in fields(self):
        # Include field in the record dictionary if it has already been initialized (i.e., not None or has default)
        if hasattr(self, _field.name):
            record[_field.name] = getattr(self, _field.name)
    return record

`done()`

Set the duration by calculating the elapsed time since the time_stamp.

Source code in snapquery/snapquery_core.py

def done(self):
    """
    Set the duration by calculating the elapsed time since the `time_stamp`.
    """
    self.duration = (datetime.datetime.now() - self.time_stamp).total_seconds()

`error(ex)`

Handle exception of query

Source code in snapquery/snapquery_core.py

def error(self, ex: Exception):
    """
    Handle exception of query
    """
    self.duration = None
    self.error_msg = str(ex)
    self.apply_error_filter()

`from_record(record)` `classmethod`

Class method to instantiate NamedQuery from a dictionary record.

Source code in snapquery/snapquery_core.py

@classmethod
def from_record(cls, record: Dict) -> "QueryStats":
    """
    Class method to instantiate NamedQuery
    from a dictionary record.
    """
    stat = cls(
        query_id=record.get("query_id", None),
        endpoint_name=record.get("endpoint_name", None),
        records=record.get("records", None),
        error_msg=record.get("error_msg", None),
        error_category=record.get("error_category", None),
        filtered_msg=record.get("filtered_msg", None),
    )
    stat.stats_id = record.get("stats_id", stat.stats_id)
    stat.time_stamp = record.get("time_stamp", stat.time_stamp)
    stat.duration = record.get("duration", None)
    return stat

`get_samples()` `classmethod`

get samples for QueryStats

Source code in snapquery/snapquery_core.py

@classmethod
def get_samples(cls) -> dict[str, "QueryStats"]:
    """
    get samples for QueryStats
    """
    samples = {
        "snapquery-examples": [
            cls(
                query_id="horses--snapquery-examples@wikidata.org",
                endpoint_name="wikidata",
                context="samples",
                records=0,
                error_msg="HTTP Error 504: Query has timed out.",
                filtered_msg="Timeout: HTTP Error 504: Query has timed out.",
                error_category="Timeout",
            ),
            cls(
                query_id="cats--snapquery-examples@wikidata.org",
                endpoint_name="wikidata",
                context="samples",
                records=223,
                error_msg="",
                error_category=None,
                filtered_msg="",
            ),
        ]
    }
    # Set the duration for each sample instance
    for sample_list in samples.values():
        for sample in sample_list:
            sample.duration = 0.5
    return samples

`is_successful()`

Returns True if the query was successful

Source code in snapquery/snapquery_core.py

def is_successful(self) -> bool:
    """
    Returns True if the query was successful
    """
    return self.duration and self.error_msg is None

`QueryStatsList`

a list of query statistics

Source code in snapquery/snapquery_core.py

@lod_storable
class QueryStatsList:
    """
    a list of query statistics
    """

    name: str  # the name of the list
    stats: List[QueryStats] = field(default_factory=list)

`snapquery_view`

Created on 2024-05-03

@author: wf

`NamedQuerySearch`

Bases: BaseQueryView

search for namedqueries

Source code in snapquery/snapquery_view.py

class NamedQuerySearch(BaseQueryView):
    """
    search for namedqueries
    """

    def __init__(self, solution: InputWebSolution):
        super().__init__(solution)

`NamedQueryView`

display a named Query

Source code in snapquery/snapquery_view.py

class NamedQueryView:
    """
    display a named Query
    """

    def __init__(
        self,
        solution: InputWebSolution,
        query_bundle: QueryBundle,
        r_format_str: str = "html",
    ):
        self.solution = solution
        self.endpoint_name = "wikidata"
        self.nqm: NamedQueryManager = self.solution.nqm
        self.query_bundle = query_bundle
        self.r_format_str = r_format_str
        self.load_task = None
        self.limit = 200
        self.timeout = 20.0
        # preload ValueFormatter
        ValueFormatter.getFormats()
        self.setup_ui()

    def setup_ui(self):
        """
        setup my user interface
        """
        nq = self.query_bundle.named_query
        url = self.query_bundle.query.tryItUrl
        text = nq.title
        tooltip = "try it!"
        link = Link.create(url, text, tooltip, target="_blank")
        with self.solution.container:
            with ui.column():
                with ui.row() as self.query_settings_row:
                    self.query_settings_row.classes("w-full")
                    ui.number(label="limit").bind_value(self, "limit")
                    ui.number(label="time out").bind_value(self, "timeout")
                    endpoint_selector = ui.select(
                        list(self.nqm.endpoints.keys()),
                        value=self.solution.endpoint_name,
                        label="endpoint",
                    )
                    endpoint_selector.bind_value(
                        self,
                        "endpoint_name",
                    )
                    endpoint_selector.classes("w-64")
                with ui.row() as self.query_row:
                    self.try_it_link = ui.html(link)
                    ui.label(nq.description)
                    self.params = Params(nq.sparql)
                    if self.params.has_params:
                        self.params_view = ParamsView(self, self.params)
                        self.params_edit = self.params_view.get_dict_edit()
                        pass
                    ui.button(icon="play_arrow", on_click=self.run_query)
                    self.stats_html = ui.html()
                with ui.row():
                    with ui.expansion("Show Query", icon="manage_search").classes("w-full"):
                        query_syntax_highlight = QuerySyntaxHighlight(self.query_bundle.query)
                        syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
                        annotated_query = SparqlQueryAnnotater(self.query_bundle.query)
                        ui.add_css(syntax_highlight_css)
                        # ui.html(query_syntax_highlight.highlight())
                        ui.html(annotated_query.annotate())
                if self.solution.webserver.authenticated():
                    with ui.row().classes("w-full"):
                        with ui.expansion("Show Query Stats", icon="query_stats") as self.stats_container:
                            self.stats_container.classes("w-full")
                            self.load_stats()
                self.grid_row = ui.expansion("Query Results", icon="table_rows", value=True)
                self.grid_row.classes("w-full")
                with self.grid_row:
                    ui.label("Not yet executed ")
                    ui.button("Run Query", icon="play_arrow", on_click=self.run_query)
                pass

    def load_stats(self):
        """
        display query stats
        """
        self.stats_container.clear()
        with self.stats_container:
            container = ui.row()
        query_stats = self.nqm.get_query_stats(self.query_bundle.named_query.query_id)
        errors = [stat for stat in query_stats if not stat.is_successful()]
        successful = [stat for stat in query_stats if stat.is_successful()]
        if successful:
            exec_times_by_endpoint: dict[str, list[QueryStats]] = {}
            for stat in successful:
                if stat.endpoint_name not in exec_times_by_endpoint:
                    exec_times_by_endpoint[stat.endpoint_name] = []
                exec_times_by_endpoint[stat.endpoint_name].append(stat)
            data = []
            for endpoint_name, stats in exec_times_by_endpoint.items():
                record = {
                    "type": "box",
                    "name": endpoint_name,
                    "x": [stat.duration for stat in stats],
                }
                data.append(record)
            fig = {
                "data": data,
                "layout": {
                    "margin": {"l": 200, "r": 15, "t": 30, "b": 30},
                    "plot_bgcolor": "#E5ECF6",
                    "xaxis": {"gridcolor": "white", "title": "Execution Time [s]"},
                    "yaxis": {"gridcolor": "white", "title": "Endpoint"},
                    "title": "Query Execution Times by Endpoint",
                },
                "config": {
                    "staticPlot": True,
                },
            }
            with container:
                ui.plotly(fig)
        if errors:
            error_records = [stat.as_record() for stat in errors]
            for record in error_records:
                if record["error_msg"]:
                    record["error_msg"] = record["error_msg"][:16] + "..."
                else:
                    record["error_msg"] = "<unkown>"
            error_df = pd.DataFrame.from_records(error_records)
            error_df_grouped = error_df.groupby(["endpoint_name", "error_msg"], as_index=False).count()
            error_fig = px.bar(
                error_df_grouped,
                x="endpoint_name",
                y="query_id",
                title="Query Execution Errors",
                labels={"query_id": "count", "endpoint_name": "Endpoint"},
                color="error_msg",
            )
            error_fig.update_layout(margin=dict(l=15, r=15, t=30, b=15))
            with container:
                ui.plotly(error_fig)
        if not successful and not errors:
            with container:
                ui.label("No query statistics available")
        with container:
            ui.button("Update statistics", icon="update", on_click=self.load_stats)

    async def load_query_results(self):
        """
        (re) load the query results
        """
        if self.params.has_params:
            self.query_bundle.query.query = self.params.apply_parameters()
            self.params_view.close()
        self.query_bundle.set_limit(int(self.limit))
        endpoint = self.nqm.endpoints[self.endpoint_name]
        self.query_bundle.update_endpoint(endpoint)
        result = await run.io_bound(self.query_bundle.get_lod_with_stats)
        if not result:
            with self.solution.container:
                ui.notify("query execution failure")
            return
        lod, stats = result
        self.nqm.store_stats([stats])
        self.grid_row.clear()
        if stats.error_msg:
            with self.grid_row:
                stats.apply_error_filter()
                markup = f'<span style="color: red;">{stats.filtered_msg}</span>'
                ui.html(markup)
        else:
            with self.query_row:
                record_count = len(lod) if lod is not None else 0
                markup = f'<span style="color: green;">{record_count} records in {stats.duration:.2f} secs</span>'
                self.stats_html.content = markup
        if not lod:
            with self.query_row:
                ui.notify("query failed")
            return
        query = self.query_bundle.query
        query.formats = ["*:wikidata"]
        tablefmt = "html"
        query.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        query.formatWithValueFormatters(lod, tablefmt=tablefmt)
        for record in lod:
            for key, value in record.items():
                if isinstance(value, str):
                    if value.startswith("http"):
                        record[key] = Link.create(value, value)
        with self.grid_row:
            self.lod_grid = ListOfDictsGrid()
            self.lod_grid.load_lod(lod)
        self.grid_row.update()

    async def run_query(self, _args):
        """
        run the current query
        """

        def cancel_running():
            if self.load_task:
                self.load_task.cancel()

        self.grid_row.clear()
        with self.grid_row:
            ui.spinner()
        self.grid_row.update()
        # cancel task still running
        cancel_running()
        # cancel task if it takes too long
        ui.timer(self.timeout, lambda: cancel_running(), once=True)
        # run task in background
        self.load_task = background_tasks.create(self.load_query_results())

`load_query_results()` `async`

(re) load the query results

Source code in snapquery/snapquery_view.py

async def load_query_results(self):
    """
    (re) load the query results
    """
    if self.params.has_params:
        self.query_bundle.query.query = self.params.apply_parameters()
        self.params_view.close()
    self.query_bundle.set_limit(int(self.limit))
    endpoint = self.nqm.endpoints[self.endpoint_name]
    self.query_bundle.update_endpoint(endpoint)
    result = await run.io_bound(self.query_bundle.get_lod_with_stats)
    if not result:
        with self.solution.container:
            ui.notify("query execution failure")
        return
    lod, stats = result
    self.nqm.store_stats([stats])
    self.grid_row.clear()
    if stats.error_msg:
        with self.grid_row:
            stats.apply_error_filter()
            markup = f'<span style="color: red;">{stats.filtered_msg}</span>'
            ui.html(markup)
    else:
        with self.query_row:
            record_count = len(lod) if lod is not None else 0
            markup = f'<span style="color: green;">{record_count} records in {stats.duration:.2f} secs</span>'
            self.stats_html.content = markup
    if not lod:
        with self.query_row:
            ui.notify("query failed")
        return
    query = self.query_bundle.query
    query.formats = ["*:wikidata"]
    tablefmt = "html"
    query.preFormatWithCallBacks(lod, tablefmt=tablefmt)
    query.formatWithValueFormatters(lod, tablefmt=tablefmt)
    for record in lod:
        for key, value in record.items():
            if isinstance(value, str):
                if value.startswith("http"):
                    record[key] = Link.create(value, value)
    with self.grid_row:
        self.lod_grid = ListOfDictsGrid()
        self.lod_grid.load_lod(lod)
    self.grid_row.update()

`load_stats()`

display query stats

Source code in snapquery/snapquery_view.py

def load_stats(self):
    """
    display query stats
    """
    self.stats_container.clear()
    with self.stats_container:
        container = ui.row()
    query_stats = self.nqm.get_query_stats(self.query_bundle.named_query.query_id)
    errors = [stat for stat in query_stats if not stat.is_successful()]
    successful = [stat for stat in query_stats if stat.is_successful()]
    if successful:
        exec_times_by_endpoint: dict[str, list[QueryStats]] = {}
        for stat in successful:
            if stat.endpoint_name not in exec_times_by_endpoint:
                exec_times_by_endpoint[stat.endpoint_name] = []
            exec_times_by_endpoint[stat.endpoint_name].append(stat)
        data = []
        for endpoint_name, stats in exec_times_by_endpoint.items():
            record = {
                "type": "box",
                "name": endpoint_name,
                "x": [stat.duration for stat in stats],
            }
            data.append(record)
        fig = {
            "data": data,
            "layout": {
                "margin": {"l": 200, "r": 15, "t": 30, "b": 30},
                "plot_bgcolor": "#E5ECF6",
                "xaxis": {"gridcolor": "white", "title": "Execution Time [s]"},
                "yaxis": {"gridcolor": "white", "title": "Endpoint"},
                "title": "Query Execution Times by Endpoint",
            },
            "config": {
                "staticPlot": True,
            },
        }
        with container:
            ui.plotly(fig)
    if errors:
        error_records = [stat.as_record() for stat in errors]
        for record in error_records:
            if record["error_msg"]:
                record["error_msg"] = record["error_msg"][:16] + "..."
            else:
                record["error_msg"] = "<unkown>"
        error_df = pd.DataFrame.from_records(error_records)
        error_df_grouped = error_df.groupby(["endpoint_name", "error_msg"], as_index=False).count()
        error_fig = px.bar(
            error_df_grouped,
            x="endpoint_name",
            y="query_id",
            title="Query Execution Errors",
            labels={"query_id": "count", "endpoint_name": "Endpoint"},
            color="error_msg",
        )
        error_fig.update_layout(margin=dict(l=15, r=15, t=30, b=15))
        with container:
            ui.plotly(error_fig)
    if not successful and not errors:
        with container:
            ui.label("No query statistics available")
    with container:
        ui.button("Update statistics", icon="update", on_click=self.load_stats)

`run_query(_args)` `async`

run the current query

Source code in snapquery/snapquery_view.py

async def run_query(self, _args):
    """
    run the current query
    """

    def cancel_running():
        if self.load_task:
            self.load_task.cancel()

    self.grid_row.clear()
    with self.grid_row:
        ui.spinner()
    self.grid_row.update()
    # cancel task still running
    cancel_running()
    # cancel task if it takes too long
    ui.timer(self.timeout, lambda: cancel_running(), once=True)
    # run task in background
    self.load_task = background_tasks.create(self.load_query_results())

`setup_ui()`

setup my user interface

Source code in snapquery/snapquery_view.py

def setup_ui(self):
    """
    setup my user interface
    """
    nq = self.query_bundle.named_query
    url = self.query_bundle.query.tryItUrl
    text = nq.title
    tooltip = "try it!"
    link = Link.create(url, text, tooltip, target="_blank")
    with self.solution.container:
        with ui.column():
            with ui.row() as self.query_settings_row:
                self.query_settings_row.classes("w-full")
                ui.number(label="limit").bind_value(self, "limit")
                ui.number(label="time out").bind_value(self, "timeout")
                endpoint_selector = ui.select(
                    list(self.nqm.endpoints.keys()),
                    value=self.solution.endpoint_name,
                    label="endpoint",
                )
                endpoint_selector.bind_value(
                    self,
                    "endpoint_name",
                )
                endpoint_selector.classes("w-64")
            with ui.row() as self.query_row:
                self.try_it_link = ui.html(link)
                ui.label(nq.description)
                self.params = Params(nq.sparql)
                if self.params.has_params:
                    self.params_view = ParamsView(self, self.params)
                    self.params_edit = self.params_view.get_dict_edit()
                    pass
                ui.button(icon="play_arrow", on_click=self.run_query)
                self.stats_html = ui.html()
            with ui.row():
                with ui.expansion("Show Query", icon="manage_search").classes("w-full"):
                    query_syntax_highlight = QuerySyntaxHighlight(self.query_bundle.query)
                    syntax_highlight_css = query_syntax_highlight.formatter.get_style_defs()
                    annotated_query = SparqlQueryAnnotater(self.query_bundle.query)
                    ui.add_css(syntax_highlight_css)
                    # ui.html(query_syntax_highlight.highlight())
                    ui.html(annotated_query.annotate())
            if self.solution.webserver.authenticated():
                with ui.row().classes("w-full"):
                    with ui.expansion("Show Query Stats", icon="query_stats") as self.stats_container:
                        self.stats_container.classes("w-full")
                        self.load_stats()
            self.grid_row = ui.expansion("Query Results", icon="table_rows", value=True)
            self.grid_row.classes("w-full")
            with self.grid_row:
                ui.label("Not yet executed ")
                ui.button("Run Query", icon="play_arrow", on_click=self.run_query)
            pass

`snapquery_webserver`

Created on 2024-05-03 @author: wf

`SnapQuerySolution`

Bases: InputWebSolution

the Snap Query solution

Source code in snapquery/snapquery_webserver.py

class SnapQuerySolution(InputWebSolution):
    """
    the Snap Query solution
    """

    def __init__(self, webserver: SnapQueryWebServer, client: Client):
        """
        Initialize the solution

        Calls the constructor of the base solution
        Args:
            webserver (SnapQueryWebServer): The webserver instance associated with this context.
            client (Client): The client instance this context is associated with.
        """
        super().__init__(webserver, client)  # Call to the superclass constructor
        self.webserver: SnapQueryWebServer
        self.nqm = self.webserver.nqm
        self.endpoint_name = self.get_user_endpoint()

    def configure_settings(self):
        """
        add additional settings
        """
        self.add_select("default Endpoint", list(self.nqm.endpoints.keys()), value=self.endpoint_name,).bind_value(
            app.storage.user,
            "endpoint_name",
        )
        self.add_select(
            "prefix merger",
            {merger.name: merger.value for merger in QueryPrefixMerger},
            value=self.get_user_prefix_merger().name,
        ).bind_value(
            app.storage.user,
            "prefix_merger",
        )

    def setup_menu(self, detailed: bool = True):
        """
        setup the menu
        """
        ui.button(icon="menu", on_click=lambda: self.header.toggle())
        self.webserver: SnapQueryWebServer
        super().setup_menu(detailed=detailed)
        with self.header:
            self.header.value = False
            self.link_button("Nominate a Query", "/nominate", "post_add", new_tab=False)
            self.link_button(
                "Queries by Namespace",
                "/queries_by_namespace",
                "view_list",
                new_tab=False,
            )
            if self.webserver.authenticated():
                self.link_button("logout", "/logout", "logout", new_tab=False)
                if self.webserver.login.authenticated():
                    self.link_button("admin", "/admin", "supervisor_account", new_tab=False)
                self.link_button("stats", "/stats", icon_name="query_stats", new_tab=False)
            else:
                self.link_button("login", "/login", "login", new_tab=False)
                if self.webserver.orcid_auth.available():
                    redirect_url = self.webserver.orcid_auth.authenticate_url()
                    self.link_button("login with orcid", redirect_url, "login", new_tab=False)
            if self.webserver.orcid_auth.authenticated():
                orcid_token = self.webserver.orcid_auth.get_cached_user_access_token()
                ui.markdown(f"*logged in as* **{orcid_token.name} ({orcid_token.orcid})**").props(
                    "flat color=white icon=folder"
                ).classes("ml-auto")

    async def nominate_ui(self):
        """
        nominate a new query
        """

        def show():
            """
            show the nominate ui
            """

            def selection_callback(person: Person):
                self.container.clear()
                with self.container:
                    with ui.row().classes("w-full"):
                        with ui.column():
                            ui.label(text="Nominate your Query").classes("text-xl")
                            ui.link(
                                text="see the documentation for detailed information on the nomination procedure",
                                new_tab=True,
                                target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                            )
                        PersonView(person).classes("ml-auto bg-slate-100 rounded-md")
                with ui.row().classes("w-full"):
                    self.query_import_view = QueryImportView(self, allow_importing_from_url=False, person=person)

            with ui.column():
                ui.label(text="Nominate your Query").classes("text-xl")
                ui.link(
                    text="see the documentation for detailed information on the nomination procedure",
                    new_tab=True,
                    target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                )
                ui.label("Please identify yourself by entering or looking up a valid PID(Wikidata ID, ORCID, dblp).")
                self.person_selector = PersonSelector(solution=self, selection_callback=selection_callback)

        await self.setup_content_div(show)

    async def admin_ui(self):
        """
        admin ui
        """

        def show():
            """ """
            self.query_import_view = QueryImportView(self)

        await self.setup_content_div(show)

    async def login_ui(self):
        """
        login ui
        """
        await self.webserver.login.login(self)

    async def stats_ui(self):
        """
        stats ui
        """

        def show():
            """ """
            QueryStatsView(self)

        await self.setup_content_div(show)

    def setup_ui(self):
        """
        setup my user interface
        """
        self.search = NamedQuerySearch(self)

    async def home(
        self,
    ):
        """Generates the home page"""
        await self.setup_content_div(self.setup_ui)

    async def queries_by_namespace(self):
        def show():
            _nsv = NamespaceStatsView(self)

        await self.setup_content_div(show)

    async def query_page(
        self,
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
        r_format_str: str = "html",
    ):
        def show():
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(
                query_name=query_name,
                endpoint_name=endpoint_name,
                limit=limit,
                prefix_merger=self.get_user_prefix_merger(),
            )
            self.named_query_view = NamedQueryView(self, query_bundle=qb, r_format_str=r_format_str)

        await self.setup_content_div(show)

    @staticmethod
    def get_user_endpoint() -> str:
        """
        Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata
        """
        endpoint = app.storage.user.get("endpoint_name", "wikidata")
        return endpoint

    @staticmethod
    def get_user_prefix_merger() -> QueryPrefixMerger:
        """
        Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used
        """
        merger_name = app.storage.user.get("prefix_merger", None)
        merger = QueryPrefixMerger.get_by_name(merger_name)
        if merger_name is None:
            app.storage.user["prefix_merger"] = merger.name
        return merger

`init(webserver, client)`

Initialize the solution

Calls the constructor of the base solution Args: webserver (SnapQueryWebServer): The webserver instance associated with this context. client (Client): The client instance this context is associated with.

Source code in snapquery/snapquery_webserver.py

def __init__(self, webserver: SnapQueryWebServer, client: Client):
    """
    Initialize the solution

    Calls the constructor of the base solution
    Args:
        webserver (SnapQueryWebServer): The webserver instance associated with this context.
        client (Client): The client instance this context is associated with.
    """
    super().__init__(webserver, client)  # Call to the superclass constructor
    self.webserver: SnapQueryWebServer
    self.nqm = self.webserver.nqm
    self.endpoint_name = self.get_user_endpoint()

`admin_ui()` `async`

admin ui

Source code in snapquery/snapquery_webserver.py

async def admin_ui(self):
    """
    admin ui
    """

    def show():
        """ """
        self.query_import_view = QueryImportView(self)

    await self.setup_content_div(show)

`configure_settings()`

add additional settings

Source code in snapquery/snapquery_webserver.py

def configure_settings(self):
    """
    add additional settings
    """
    self.add_select("default Endpoint", list(self.nqm.endpoints.keys()), value=self.endpoint_name,).bind_value(
        app.storage.user,
        "endpoint_name",
    )
    self.add_select(
        "prefix merger",
        {merger.name: merger.value for merger in QueryPrefixMerger},
        value=self.get_user_prefix_merger().name,
    ).bind_value(
        app.storage.user,
        "prefix_merger",
    )

`get_user_endpoint()` `staticmethod`

Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata

Source code in snapquery/snapquery_webserver.py

@staticmethod
def get_user_endpoint() -> str:
    """
    Get the endpoint selected by the user. If no endpoint is selected return the default endpoint wikidata
    """
    endpoint = app.storage.user.get("endpoint_name", "wikidata")
    return endpoint

`get_user_prefix_merger()` `staticmethod`

Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used

Source code in snapquery/snapquery_webserver.py

@staticmethod
def get_user_prefix_merger() -> QueryPrefixMerger:
    """
    Get the prefix merger selected by the user. If no merger is selected the default merger Simple merger is used
    """
    merger_name = app.storage.user.get("prefix_merger", None)
    merger = QueryPrefixMerger.get_by_name(merger_name)
    if merger_name is None:
        app.storage.user["prefix_merger"] = merger.name
    return merger

`home()` `async`

Generates the home page

Source code in snapquery/snapquery_webserver.py

async def home(
    self,
):
    """Generates the home page"""
    await self.setup_content_div(self.setup_ui)

`login_ui()` `async`

login ui

Source code in snapquery/snapquery_webserver.py

async def login_ui(self):
    """
    login ui
    """
    await self.webserver.login.login(self)

`nominate_ui()` `async`

nominate a new query

Source code in snapquery/snapquery_webserver.py

async def nominate_ui(self):
    """
    nominate a new query
    """

    def show():
        """
        show the nominate ui
        """

        def selection_callback(person: Person):
            self.container.clear()
            with self.container:
                with ui.row().classes("w-full"):
                    with ui.column():
                        ui.label(text="Nominate your Query").classes("text-xl")
                        ui.link(
                            text="see the documentation for detailed information on the nomination procedure",
                            new_tab=True,
                            target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
                        )
                    PersonView(person).classes("ml-auto bg-slate-100 rounded-md")
            with ui.row().classes("w-full"):
                self.query_import_view = QueryImportView(self, allow_importing_from_url=False, person=person)

        with ui.column():
            ui.label(text="Nominate your Query").classes("text-xl")
            ui.link(
                text="see the documentation for detailed information on the nomination procedure",
                new_tab=True,
                target="https://wiki.bitplan.com/index.php/Snapquery#nominate",
            )
            ui.label("Please identify yourself by entering or looking up a valid PID(Wikidata ID, ORCID, dblp).")
            self.person_selector = PersonSelector(solution=self, selection_callback=selection_callback)

    await self.setup_content_div(show)

`setup_menu(detailed=True)`

setup the menu

Source code in snapquery/snapquery_webserver.py

def setup_menu(self, detailed: bool = True):
    """
    setup the menu
    """
    ui.button(icon="menu", on_click=lambda: self.header.toggle())
    self.webserver: SnapQueryWebServer
    super().setup_menu(detailed=detailed)
    with self.header:
        self.header.value = False
        self.link_button("Nominate a Query", "/nominate", "post_add", new_tab=False)
        self.link_button(
            "Queries by Namespace",
            "/queries_by_namespace",
            "view_list",
            new_tab=False,
        )
        if self.webserver.authenticated():
            self.link_button("logout", "/logout", "logout", new_tab=False)
            if self.webserver.login.authenticated():
                self.link_button("admin", "/admin", "supervisor_account", new_tab=False)
            self.link_button("stats", "/stats", icon_name="query_stats", new_tab=False)
        else:
            self.link_button("login", "/login", "login", new_tab=False)
            if self.webserver.orcid_auth.available():
                redirect_url = self.webserver.orcid_auth.authenticate_url()
                self.link_button("login with orcid", redirect_url, "login", new_tab=False)
        if self.webserver.orcid_auth.authenticated():
            orcid_token = self.webserver.orcid_auth.get_cached_user_access_token()
            ui.markdown(f"*logged in as* **{orcid_token.name} ({orcid_token.orcid})**").props(
                "flat color=white icon=folder"
            ).classes("ml-auto")

`setup_ui()`

setup my user interface

Source code in snapquery/snapquery_webserver.py

def setup_ui(self):
    """
    setup my user interface
    """
    self.search = NamedQuerySearch(self)

`stats_ui()` `async`

stats ui

Source code in snapquery/snapquery_webserver.py

async def stats_ui(self):
    """
    stats ui
    """

    def show():
        """ """
        QueryStatsView(self)

    await self.setup_content_div(show)

`SnapQueryWebServer`

Bases: InputWebserver

server to supply named Queries

Source code in snapquery/snapquery_webserver.py

class SnapQueryWebServer(InputWebserver):
    """
    server to supply named Queries
    """

    @classmethod
    def get_config(cls) -> WebserverConfig:
        """
        get the configuration for this Webserver
        """
        copy_right = ""
        config = WebserverConfig(
            short_name="snapquery",
            copy_right=copy_right,
            version=Version(),
            default_port=9862,
            timeout=6.0,
        )
        server_config = WebserverConfig.get(config)
        server_config.solution_class = SnapQuerySolution
        return server_config

    def __init__(self):
        """Constructs all the necessary attributes for the WebServer object."""
        InputWebserver.__init__(self, config=SnapQueryWebServer.get_config())
        users = Users("~/.solutions/snapquery")
        self.login = Login(self, users)
        self.orcid_auth = OrcidAuth(Path(self.config.base_path))
        self.nqm = NamedQueryManager.from_samples()

        @ui.page("/admin")
        async def admin(client: Client):
            if not self.login.authenticated():
                return RedirectResponse("/login")
            return await self.page(client, SnapQuerySolution.admin_ui)

        @ui.page("/nominate")
        async def nominate(client: Client):
            return await self.page(client, SnapQuerySolution.nominate_ui)

        @ui.page("/stats")
        async def stats(client: Client):
            if not self.authenticated():
                return RedirectResponse("/login")
            return await self.page(client, SnapQuerySolution.stats_ui)

        @ui.page("/login")
        async def login(client: Client):
            return await self.page(client, SnapQuerySolution.login_ui)

        @app.get("/orcid_callback")
        async def orcid_authenticate_callback(code: str):
            try:
                self.orcid_auth.login(code)
            except Exception as e:
                return HTTPException(status_code=401, detail=str(e))
            return RedirectResponse("/")

        @ui.page("/logout")
        async def logout(client: Client) -> RedirectResponse:
            if self.login.authenticated():
                await self.login.logout()
            if self.orcid_auth.authenticated():
                self.orcid_auth.logout()
            return RedirectResponse("/")

        @ui.page("/queries_by_namespace")
        async def queries_by_namespace(client: Client):
            return await self.page(client, SnapQuerySolution.queries_by_namespace)

        @ui.page("/query/{domain}/{namespace}/{name}")
        async def query_page(
            client: Client,
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = None,
            limit: int = None,
            format: str = "html",
        ):
            """
            show the query page for the given namespace and name
            """
            if endpoint_name is None:
                endpoint_name = SnapQuerySolution.get_user_endpoint()
            return await self.page(
                client,
                SnapQuerySolution.query_page,
                domain=domain,
                namespace=namespace,
                name=name,
                endpoint_name=endpoint_name,
                limit=limit,
                r_format_str=format,
            )

        @app.get("/api/endpoints")
        def get_endpoints():
            """
            list all endpoints
            """
            endpoints = self.nqm.endpoints
            return endpoints

        @app.get("/api/meta_query/{name}")
        def meta_query(name: str, limit: int = None):
            """
            run the meta query with the given name
            """
            name, r_format = self.get_r_format(name, "json")
            if name not in self.nqm.meta_qm.queriesByName:
                raise HTTPException(status_code=404, detail=f"meta query {name} not known")
            query = self.nqm.meta_qm.queriesByName[name]
            qb = QueryBundle(named_query=None, query=query)
            qlod = self.nqm.sql_db.query(query.query)
            if limit:
                qlod = qlod[:limit]
            content = qb.format_result(qlod, r_format)
            # content=content.replace("\n", "<br>\n")
            if r_format == Format.html:
                return HTMLResponse(content)
            return PlainTextResponse(content)

        @app.get("/api/sparql/{domain}/{namespace}/{name}")
        def sparql(
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = "wikidata",
            limit: int = None,
        ) -> PlainTextResponse:
            """
            Gets a SPARQL query by name within a specified namespace

            Args:
                domain (str): The domain identifying the domain of the query.
                namespace (str): The namespace identifying the group or category of the query.
                name (str): The specific name of the query to be executed.
                endpoint_name (str): the name of the endpoint to use
                limit (int): a limit to set, default=None
            Returns:
                HTMLResponse: The plain text SPARQL code

            Raises:
                HTTPException: If the query cannot be found or fails to execute.
            """
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            sparql_query = qb.query.query
            return PlainTextResponse(sparql_query)

        @app.get("/api/query/{domain}/{namespace}/{name}")
        def query(
            domain: str,
            namespace: str,
            name: str,
            endpoint_name: str = "wikidata",
            limit: int = None,
        ) -> HTMLResponse:
            """
            Executes a SPARQL query by name within a specified namespace, formats the results, and returns them as an HTML response.

            Args:
                domain (str): The domain identifying the domain of the query.
                namespace (str): The namespace identifying the group or category of the query.
                name (str): The specific name of the query to be executed.
                endpoint_name (str): the name of the endpoint to use
                limit(int): a limit to set, default=None

            Returns:
                HTMLResponse: The HTML formatted response containing the results of the query execution.

            Raises:
                HTTPException: If the query cannot be found or fails to execute.
            """
            content = self.query(
                name=name,
                namespace=namespace,
                domain=domain,
                endpoint_name=endpoint_name,
                limit=limit,
            )
            if not content:
                raise HTTPException(status_code=500, detail="Could not create result")

            # Return the content as an HTML response
            return HTMLResponse(content)

    def get_r_format(self, name: str, default_format_str: str = "html") -> Format:
        """
        get the result format from the given query name following the
        dot convention that <name>.<r_format_str> specifies the result format
        e.g. cats.json will ask for the json result format

        Args:
            name (str): the name of the query/meta query
            default_format_str (str): the name of the default format to use

        Returns:
            Format: the result format
        """
        if "." in name:
            r_format_str = name.split(".")[-1]
            name = name[: name.rfind(".")]
        else:
            r_format_str = default_format_str
        r_format = Format[r_format_str]
        return name, r_format

    def query(
        self,
        name: str,
        namespace: str,
        domain: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> str:
        """
        Queries an external API to retrieve data based on a given namespace and name.

        Args:
            name (str): The name identifier of the data to be queried.
            namespace (str): The namespace to which the query belongs. It helps in categorizing the data.
            domain (str): The domain identifying the domain of the query.
            endpoint_name (str): The name of the endpoint to be used for the query. Defaults to 'wikidata'.
            limit (int): the limit for the query default: None

            Returns:
                str: the content retrieved
        """
        try:
            # content negotiation
            name, r_format = self.get_r_format(name)
            query_name = QueryName(domain=domain, namespace=namespace, name=name)
            qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
            (qlod, stats) = qb.get_lod_with_stats()
            self.nqm.store_stats([stats])
            content = qb.format_result(qlod, r_format)
            return content
        except Exception as e:
            # Handling specific exceptions can be more detailed based on what nqm.get_sparql and nqm.query can raise
            raise HTTPException(status_code=404, detail=str(e))

    def authenticated(self) -> bool:
        """
        Check if the user is authenticated.
        Returns:
            True if the user is authenticated, False otherwise.
        """
        return self.login.authenticated() or self.orcid_auth.authenticated()

`init()`

Constructs all the necessary attributes for the WebServer object.

Source code in snapquery/snapquery_webserver.py

def __init__(self):
    """Constructs all the necessary attributes for the WebServer object."""
    InputWebserver.__init__(self, config=SnapQueryWebServer.get_config())
    users = Users("~/.solutions/snapquery")
    self.login = Login(self, users)
    self.orcid_auth = OrcidAuth(Path(self.config.base_path))
    self.nqm = NamedQueryManager.from_samples()

    @ui.page("/admin")
    async def admin(client: Client):
        if not self.login.authenticated():
            return RedirectResponse("/login")
        return await self.page(client, SnapQuerySolution.admin_ui)

    @ui.page("/nominate")
    async def nominate(client: Client):
        return await self.page(client, SnapQuerySolution.nominate_ui)

    @ui.page("/stats")
    async def stats(client: Client):
        if not self.authenticated():
            return RedirectResponse("/login")
        return await self.page(client, SnapQuerySolution.stats_ui)

    @ui.page("/login")
    async def login(client: Client):
        return await self.page(client, SnapQuerySolution.login_ui)

    @app.get("/orcid_callback")
    async def orcid_authenticate_callback(code: str):
        try:
            self.orcid_auth.login(code)
        except Exception as e:
            return HTTPException(status_code=401, detail=str(e))
        return RedirectResponse("/")

    @ui.page("/logout")
    async def logout(client: Client) -> RedirectResponse:
        if self.login.authenticated():
            await self.login.logout()
        if self.orcid_auth.authenticated():
            self.orcid_auth.logout()
        return RedirectResponse("/")

    @ui.page("/queries_by_namespace")
    async def queries_by_namespace(client: Client):
        return await self.page(client, SnapQuerySolution.queries_by_namespace)

    @ui.page("/query/{domain}/{namespace}/{name}")
    async def query_page(
        client: Client,
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = None,
        limit: int = None,
        format: str = "html",
    ):
        """
        show the query page for the given namespace and name
        """
        if endpoint_name is None:
            endpoint_name = SnapQuerySolution.get_user_endpoint()
        return await self.page(
            client,
            SnapQuerySolution.query_page,
            domain=domain,
            namespace=namespace,
            name=name,
            endpoint_name=endpoint_name,
            limit=limit,
            r_format_str=format,
        )

    @app.get("/api/endpoints")
    def get_endpoints():
        """
        list all endpoints
        """
        endpoints = self.nqm.endpoints
        return endpoints

    @app.get("/api/meta_query/{name}")
    def meta_query(name: str, limit: int = None):
        """
        run the meta query with the given name
        """
        name, r_format = self.get_r_format(name, "json")
        if name not in self.nqm.meta_qm.queriesByName:
            raise HTTPException(status_code=404, detail=f"meta query {name} not known")
        query = self.nqm.meta_qm.queriesByName[name]
        qb = QueryBundle(named_query=None, query=query)
        qlod = self.nqm.sql_db.query(query.query)
        if limit:
            qlod = qlod[:limit]
        content = qb.format_result(qlod, r_format)
        # content=content.replace("\n", "<br>\n")
        if r_format == Format.html:
            return HTMLResponse(content)
        return PlainTextResponse(content)

    @app.get("/api/sparql/{domain}/{namespace}/{name}")
    def sparql(
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> PlainTextResponse:
        """
        Gets a SPARQL query by name within a specified namespace

        Args:
            domain (str): The domain identifying the domain of the query.
            namespace (str): The namespace identifying the group or category of the query.
            name (str): The specific name of the query to be executed.
            endpoint_name (str): the name of the endpoint to use
            limit (int): a limit to set, default=None
        Returns:
            HTMLResponse: The plain text SPARQL code

        Raises:
            HTTPException: If the query cannot be found or fails to execute.
        """
        query_name = QueryName(domain=domain, namespace=namespace, name=name)
        qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        sparql_query = qb.query.query
        return PlainTextResponse(sparql_query)

    @app.get("/api/query/{domain}/{namespace}/{name}")
    def query(
        domain: str,
        namespace: str,
        name: str,
        endpoint_name: str = "wikidata",
        limit: int = None,
    ) -> HTMLResponse:
        """
        Executes a SPARQL query by name within a specified namespace, formats the results, and returns them as an HTML response.

        Args:
            domain (str): The domain identifying the domain of the query.
            namespace (str): The namespace identifying the group or category of the query.
            name (str): The specific name of the query to be executed.
            endpoint_name (str): the name of the endpoint to use
            limit(int): a limit to set, default=None

        Returns:
            HTMLResponse: The HTML formatted response containing the results of the query execution.

        Raises:
            HTTPException: If the query cannot be found or fails to execute.
        """
        content = self.query(
            name=name,
            namespace=namespace,
            domain=domain,
            endpoint_name=endpoint_name,
            limit=limit,
        )
        if not content:
            raise HTTPException(status_code=500, detail="Could not create result")

        # Return the content as an HTML response
        return HTMLResponse(content)

`authenticated()`

Check if the user is authenticated. Returns: True if the user is authenticated, False otherwise.

Source code in snapquery/snapquery_webserver.py

def authenticated(self) -> bool:
    """
    Check if the user is authenticated.
    Returns:
        True if the user is authenticated, False otherwise.
    """
    return self.login.authenticated() or self.orcid_auth.authenticated()

`get_config()` `classmethod`

get the configuration for this Webserver

Source code in snapquery/snapquery_webserver.py

@classmethod
def get_config(cls) -> WebserverConfig:
    """
    get the configuration for this Webserver
    """
    copy_right = ""
    config = WebserverConfig(
        short_name="snapquery",
        copy_right=copy_right,
        version=Version(),
        default_port=9862,
        timeout=6.0,
    )
    server_config = WebserverConfig.get(config)
    server_config.solution_class = SnapQuerySolution
    return server_config

`get_r_format(name, default_format_str='html')`

get the result format from the given query name following the dot convention that . specifies the result format e.g. cats.json will ask for the json result format

Parameters:

Name	Type	Description	Default
`name`	`str`	the name of the query/meta query	required
`default_format_str`	`str`	the name of the default format to use	`'html'`

Returns:

Name	Type	Description
`Format`	`Format`	the result format

Source code in snapquery/snapquery_webserver.py

def get_r_format(self, name: str, default_format_str: str = "html") -> Format:
    """
    get the result format from the given query name following the
    dot convention that <name>.<r_format_str> specifies the result format
    e.g. cats.json will ask for the json result format

    Args:
        name (str): the name of the query/meta query
        default_format_str (str): the name of the default format to use

    Returns:
        Format: the result format
    """
    if "." in name:
        r_format_str = name.split(".")[-1]
        name = name[: name.rfind(".")]
    else:
        r_format_str = default_format_str
    r_format = Format[r_format_str]
    return name, r_format

`query(name, namespace, domain, endpoint_name='wikidata', limit=None)`

Queries an external API to retrieve data based on a given namespace and name.

Parameters:

Name	Type	Description	Default
`name`	`str`	The name identifier of the data to be queried.	required
`namespace`	`str`	The namespace to which the query belongs. It helps in categorizing the data.	required
`domain`	`str`	The domain identifying the domain of the query.	required
`endpoint_name`	`str`	The name of the endpoint to be used for the query. Defaults to 'wikidata'.	`'wikidata'`
`limit`	`int`	the limit for the query default: None	`None`
`Returns`		str: the content retrieved	required

Source code in snapquery/snapquery_webserver.py

def query(
    self,
    name: str,
    namespace: str,
    domain: str,
    endpoint_name: str = "wikidata",
    limit: int = None,
) -> str:
    """
    Queries an external API to retrieve data based on a given namespace and name.

    Args:
        name (str): The name identifier of the data to be queried.
        namespace (str): The namespace to which the query belongs. It helps in categorizing the data.
        domain (str): The domain identifying the domain of the query.
        endpoint_name (str): The name of the endpoint to be used for the query. Defaults to 'wikidata'.
        limit (int): the limit for the query default: None

        Returns:
            str: the content retrieved
    """
    try:
        # content negotiation
        name, r_format = self.get_r_format(name)
        query_name = QueryName(domain=domain, namespace=namespace, name=name)
        qb = self.nqm.get_query(query_name=query_name, endpoint_name=endpoint_name, limit=limit)
        (qlod, stats) = qb.get_lod_with_stats()
        self.nqm.store_stats([stats])
        content = qb.format_result(qlod, r_format)
        return content
    except Exception as e:
        # Handling specific exceptions can be more detailed based on what nqm.get_sparql and nqm.query can raise
        raise HTTPException(status_code=404, detail=str(e))

`sparql_analyzer`

`SparqlAnalyzer`

SPARQL Query Analyzer

Source code in snapquery/sparql_analyzer.py

class SparqlAnalyzer:
    """
    SPARQL Query Analyzer
    """

    BLAZEGRAPH_NAMED_SUBQUERY_PATTERN = r"""WITH[\s\n]*(#[\w\s://\.\n,]+)?{(#[\w\s://\.\n,]+)?[\s\n](?P<subquery>[\n\r\b\w\d:\t\.";,\{\)\(\?\}\W#]*?)\s+[Aa][Ss]\s+%(?P<name>[A-Za-z\d_]+)"""

    @classmethod
    def get_prefix_luts(cls) -> dict[str, str]:
        return {
            "biopax": "http://www.biopax.org/release/biopax-level3.owl#",
            "bd": "http://www.bigdata.com/rdf#",
            "cc": "http://creativecommons.org/ns#",
            "datacite": "http://purl.org/spar/datacite/",
            "dblp": "https://dblp.org/rdf/schema#",
            "dc": "http://purl.org/dc/elements/1.1/",
            "dct": "http://purl.org/dc/terms/",
            "freq": "http://purl.org/cld/freq/",
            "geo": "http://www.opengis.net/ont/geosparql#",
            "geof": "http://www.opengis.net/def/function/geosparql/",
            "geom": "http://geovocab.org/geometry#",
            "gpml": "http://vocabularies.wikipathways.org/gpml#",
            "litre": "http://purl.org/spar/literal/",
            "lgdo": "http://linkedgeodata.org/ontology/",
            "ontolex": "http://www.w3.org/ns/lemon/ontolex#",
            "orkgp": "http://orkg.org/orkg/predicate/",
            "orkgc": "http://orkg.org/orkg/class/",
            "orkgr": "http://orkg.org/orkg/resource/",
            "owl": "http://www.w3.org/2002/07/owl#",
            "p": "http://www.wikidata.org/prop/",
            "pav": "http://purl.org/pav/",
            "pq": "http://www.wikidata.org/prop/qualifier/",
            "pqn": "http://www.wikidata.org/prop/qualifier/value-normalized/",
            "pqv": "http://www.wikidata.org/prop/qualifier/value/",
            "pr": "http://www.wikidata.org/prop/reference/",
            "prn": "http://www.wikidata.org/prop/reference/value-normalized/",
            "prov": "http://www.w3.org/ns/prov#",
            "prv": "http://www.wikidata.org/prop/reference/value/",
            "ps": "http://www.wikidata.org/prop/statement/",
            "psn": "http://www.wikidata.org/prop/statement/value-normalized/",
            "psv": "http://www.wikidata.org/prop/statement/value/",
            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
            "schema": "http://schema.org/",
            "skos": "http://www.w3.org/2004/02/skos/core#",
            "void": "http://rdfs.org/ns/void#",
            "vrank": "http://purl.org/voc/vrank#",
            "wd": "http://www.wikidata.org/entity/",
            "wdata": "http://www.wikidata.org/wiki/Special:EntityData/",
            "wdno": "http://www.wikidata.org/prop/novalue/",
            "wdref": "http://www.wikidata.org/reference/",
            "wds": "http://www.wikidata.org/entity/statement/",
            "wdt": "http://www.wikidata.org/prop/direct/",
            "wdtn": "http://www.wikidata.org/prop/direct-normalized/",
            "wdv": "http://www.wikidata.org/value/",
            "wikibase": "http://wikiba.se/ontology#",
            "wp": "http://vocabularies.wikipathways.org/wp#",
            "wprdf": "http://rdf.wikipathways.org/",
            "xsd": "http://www.w3.org/2001/XMLSchema#",
            "mwapi": "https://www.mediawiki.org/ontology#API/",
            "hint": "http://www.bigdata.com/queryHints#",
            "gas": "http://www.bigdata.com/rdf/gas#",
        }

    @classmethod
    def prefix_clause(cls, prefix: str, iri: str) -> str:
        """
        Provide SPARQL refix clause for given prefix and url
        Args:
            prefix: prefix name
            iri: iri

        Returns:
            prefix clause
        """
        return f"PREFIX {prefix}:  <{iri}>"

    @classmethod
    def extract_used_prefixes(cls, query: str) -> tuple[dict[str, str], set[str]]:
        """
        Extract used prefixes from SPARQL query
        Args:
            query: SPARQL query

        Returns:
            dict of declared prefixes
        """
        # add prefixes to avoid parsing error due to missing prefix
        prefix_lut = cls.get_prefix_luts()
        prefixed_query = cls._add_prefixes(prefix_lut, query)
        parsed_query = parseQuery(prefixed_query)
        elements = parsed_query.as_list()
        defined_prefixes = []
        used_prefixes = []
        for element in elements:
            if isinstance(element, CompValue) and element.name == "PrefixDecl":
                defined_prefixes.append(element)
            elif isinstance(element, CompValue) and element.name == "pname":
                used_prefixes.append(element)
            elif isinstance(element, Iterable) and not isinstance(element, str):
                if isinstance(element, dict):
                    elements.extend(element.values())
                else:
                    elements.extend(element)
            else:
                pass
        declared_prefix_counter = Counter([value.get("prefix") for value in defined_prefixes])
        multi_declarations = [prefix for prefix, count in declared_prefix_counter.items() if count > 1]
        used_prefix_names = {value.get("prefix") for value in used_prefixes}
        used_prefix_map = dict()
        for prefix_value in reversed(defined_prefixes):
            prefix_name = prefix_value.get("prefix")
            prefix_iri = prefix_value.get("iri")
            if prefix_name in multi_declarations or prefix_name not in prefix_lut:
                used_prefix_map[prefix_name] = str(prefix_iri)
        return used_prefix_map, used_prefix_names

    @classmethod
    def add_missing_prefixes(cls, query: str):
        """
        Add missing prefixes to SPARQL query
        Args:
            query: SPARQL query

        Returns:
            SPARQL query
        """
        try:
            # normalize query for parsing
            prepared_query = query
            if cls.has_parameter(prepared_query):
                prepared_query = cls.fill_with_sample_query_parameters(prepared_query)
            if cls.has_blazegraph_with_clause(prepared_query):
                prepared_query = cls.transform_with_clause_to_subquery(prepared_query)
            # extract used and declared prefixes
            declared_prefixes, used_prefixes = cls.extract_used_prefixes(prepared_query)
            missing_prefix_declarations = used_prefixes - set(declared_prefixes.keys())
            undefined_prefixes = missing_prefix_declarations.difference(cls.get_prefix_luts().keys())
            if undefined_prefixes:
                logger.error(
                    f"Prefix definitions missing for: {undefined_prefixes} → Not all prefixes that are missing can be added"
                )
            missing_prefix_declarations_lut = {
                key: value for key, value in cls.get_prefix_luts().items() if key in missing_prefix_declarations
            }
            fixed_query = cls._add_prefixes(missing_prefix_declarations_lut, query)
        except Exception as e:
            logger.debug("Adding missing prefixes to query failed → Unable to parse SPARQL query")
            logging.error(e)
            fixed_query = query
        return fixed_query

    @classmethod
    def transform_with_clause_to_subquery(cls, query: str) -> str:
        """
        Transform blazegraph with clause to subquery statement
        Args:
            query:

        Returns:

        """
        match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
        if match:
            subquery = match.group("subquery")
            name = match.group("name")
            start_pos, end_pos = match.span()
            # check if Where mus be added
            select_part = query[:start_pos]
            where_part = query[end_pos + 1 :]
            if cls.has_blazegraph_with_clause(where_part):
                where_part = cls.transform_with_clause_to_subquery(where_part)
            if where_part.lower().strip().startswith("where"):
                query_with_removed = select_part + where_part
            else:
                query_with_removed = f"{select_part}\nWHERE\n{where_part}"

            include_pattern = f"[Ii][Nn][Cc][Ll][Uu][Dd][Ee]\s+%{name}"
            subquery = f"{{{subquery}\n"
            query_transformed = re.sub(include_pattern, subquery, query_with_removed)
            return query_transformed

    @classmethod
    def has_blazegraph_with_clause(cls, query: str) -> bool:
        """
        Check if the given query has a WITH clause (named subquery)
        For details see https://github.com/blazegraph/database/wiki/NamedSubquery
        Args:
            query: SPARQL query

        Returns:
            True if the query has a WITH clause (named subquery)
        """
        match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
        return True if match else False

    @classmethod
    def _add_prefixes(cls, prefixes: dict[str, str], query: str) -> str:
        """
        Add prefixes to SPARQL query
        Args:
            prefixes: prefixes to add
            query: SPARQL query

        Returns:
            SPARQL query with prefixes added
        """
        prefixes_clauses = [cls.prefix_clause(prefix, iri) for prefix, iri in prefixes.items()]
        prefixes_clauses_str = "\n".join(prefixes_clauses)
        return prefixes_clauses_str + "\n" + query

    @classmethod
    def has_parameter(cls, query: str) -> bool:
        """
        Check if the given query has parameters that need to need set
        Args:
            query: SPARQL query

        Returns:
            True if the query has parameters that need to need set
        """
        vars = cls.get_query_parameter(query)
        return len(vars) > 0

    @classmethod
    def get_query_parameter(cls, query: str) -> set[str]:
        env = Environment()
        ast = env.parse(query)
        vars = meta.find_undeclared_variables(ast)
        return vars

    @classmethod
    def fill_with_sample_query_parameters(cls, query: str) -> str:
        """
        Fill the given SPARQL query with sample query parameters
        Args:
            query: SPARQL query

        Returns:

        """
        if not cls.has_parameter(query):
            return query
        parameter_names = cls.get_query_parameter(query)
        params = cls._prepare_sample_parameter(parameter_names)
        return cls.bind_parameters_to_query(query, params)

    @classmethod
    def bind_parameters_to_query(cls, query: str, params: dict[str, str]) -> str:
        """
        Bind the parameters to the given query
        Args:
            query: SPARQL query
            params: quera params

        Returns:
            Query with parameters binded
        """
        template = Template(query)
        query_with_param_values = template.render(**params)
        return query_with_param_values

    @classmethod
    def _prepare_sample_parameter(cls, parameter_names: set[str]) -> dict[str, str]:
        """
        Prepare sample query parameters
        """
        params = dict()
        for name in parameter_names:
            params[name] = f"Q{random.randint(1, 1000)}"
        return params

    @classmethod
    def is_valid(cls, query: str):
        """
        Check if query is valid SPARQL query
        Args:
            query: SPARQL query

        Returns:
            True if query is valid SPARQL query
        """
        try:
            prepareQuery(query)
            return True
        except Exception as e:
            logger.debug(f"Query is not valid SPARQL query: {e}")
            return False

`add_missing_prefixes(query)` `classmethod`

Add missing prefixes to SPARQL query Args: query: SPARQL query

Returns:

Type	Description
	SPARQL query

Source code in snapquery/sparql_analyzer.py

@classmethod
def add_missing_prefixes(cls, query: str):
    """
    Add missing prefixes to SPARQL query
    Args:
        query: SPARQL query

    Returns:
        SPARQL query
    """
    try:
        # normalize query for parsing
        prepared_query = query
        if cls.has_parameter(prepared_query):
            prepared_query = cls.fill_with_sample_query_parameters(prepared_query)
        if cls.has_blazegraph_with_clause(prepared_query):
            prepared_query = cls.transform_with_clause_to_subquery(prepared_query)
        # extract used and declared prefixes
        declared_prefixes, used_prefixes = cls.extract_used_prefixes(prepared_query)
        missing_prefix_declarations = used_prefixes - set(declared_prefixes.keys())
        undefined_prefixes = missing_prefix_declarations.difference(cls.get_prefix_luts().keys())
        if undefined_prefixes:
            logger.error(
                f"Prefix definitions missing for: {undefined_prefixes} → Not all prefixes that are missing can be added"
            )
        missing_prefix_declarations_lut = {
            key: value for key, value in cls.get_prefix_luts().items() if key in missing_prefix_declarations
        }
        fixed_query = cls._add_prefixes(missing_prefix_declarations_lut, query)
    except Exception as e:
        logger.debug("Adding missing prefixes to query failed → Unable to parse SPARQL query")
        logging.error(e)
        fixed_query = query
    return fixed_query

`bind_parameters_to_query(query, params)` `classmethod`

Bind the parameters to the given query Args: query: SPARQL query params: quera params

Returns:

Type	Description
`str`	Query with parameters binded

Source code in snapquery/sparql_analyzer.py

@classmethod
def bind_parameters_to_query(cls, query: str, params: dict[str, str]) -> str:
    """
    Bind the parameters to the given query
    Args:
        query: SPARQL query
        params: quera params

    Returns:
        Query with parameters binded
    """
    template = Template(query)
    query_with_param_values = template.render(**params)
    return query_with_param_values

`extract_used_prefixes(query)` `classmethod`

Extract used prefixes from SPARQL query Args: query: SPARQL query

Returns:

Type	Description
`tuple[dict[str, str], set[str]]`	dict of declared prefixes

Source code in snapquery/sparql_analyzer.py

@classmethod
def extract_used_prefixes(cls, query: str) -> tuple[dict[str, str], set[str]]:
    """
    Extract used prefixes from SPARQL query
    Args:
        query: SPARQL query

    Returns:
        dict of declared prefixes
    """
    # add prefixes to avoid parsing error due to missing prefix
    prefix_lut = cls.get_prefix_luts()
    prefixed_query = cls._add_prefixes(prefix_lut, query)
    parsed_query = parseQuery(prefixed_query)
    elements = parsed_query.as_list()
    defined_prefixes = []
    used_prefixes = []
    for element in elements:
        if isinstance(element, CompValue) and element.name == "PrefixDecl":
            defined_prefixes.append(element)
        elif isinstance(element, CompValue) and element.name == "pname":
            used_prefixes.append(element)
        elif isinstance(element, Iterable) and not isinstance(element, str):
            if isinstance(element, dict):
                elements.extend(element.values())
            else:
                elements.extend(element)
        else:
            pass
    declared_prefix_counter = Counter([value.get("prefix") for value in defined_prefixes])
    multi_declarations = [prefix for prefix, count in declared_prefix_counter.items() if count > 1]
    used_prefix_names = {value.get("prefix") for value in used_prefixes}
    used_prefix_map = dict()
    for prefix_value in reversed(defined_prefixes):
        prefix_name = prefix_value.get("prefix")
        prefix_iri = prefix_value.get("iri")
        if prefix_name in multi_declarations or prefix_name not in prefix_lut:
            used_prefix_map[prefix_name] = str(prefix_iri)
    return used_prefix_map, used_prefix_names

`fill_with_sample_query_parameters(query)` `classmethod`

Fill the given SPARQL query with sample query parameters Args: query: SPARQL query

Returns:

Source code in snapquery/sparql_analyzer.py

@classmethod
def fill_with_sample_query_parameters(cls, query: str) -> str:
    """
    Fill the given SPARQL query with sample query parameters
    Args:
        query: SPARQL query

    Returns:

    """
    if not cls.has_parameter(query):
        return query
    parameter_names = cls.get_query_parameter(query)
    params = cls._prepare_sample_parameter(parameter_names)
    return cls.bind_parameters_to_query(query, params)

`has_blazegraph_with_clause(query)` `classmethod`

Check if the given query has a WITH clause (named subquery) For details see https://github.com/blazegraph/database/wiki/NamedSubquery Args: query: SPARQL query

Returns:

Type	Description
`bool`	True if the query has a WITH clause (named subquery)

Source code in snapquery/sparql_analyzer.py

@classmethod
def has_blazegraph_with_clause(cls, query: str) -> bool:
    """
    Check if the given query has a WITH clause (named subquery)
    For details see https://github.com/blazegraph/database/wiki/NamedSubquery
    Args:
        query: SPARQL query

    Returns:
        True if the query has a WITH clause (named subquery)
    """
    match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
    return True if match else False

`has_parameter(query)` `classmethod`

Check if the given query has parameters that need to need set Args: query: SPARQL query

Returns:

Type	Description
`bool`	True if the query has parameters that need to need set

Source code in snapquery/sparql_analyzer.py

@classmethod
def has_parameter(cls, query: str) -> bool:
    """
    Check if the given query has parameters that need to need set
    Args:
        query: SPARQL query

    Returns:
        True if the query has parameters that need to need set
    """
    vars = cls.get_query_parameter(query)
    return len(vars) > 0

`is_valid(query)` `classmethod`

Check if query is valid SPARQL query Args: query: SPARQL query

Returns:

Type	Description
	True if query is valid SPARQL query

Source code in snapquery/sparql_analyzer.py

@classmethod
def is_valid(cls, query: str):
    """
    Check if query is valid SPARQL query
    Args:
        query: SPARQL query

    Returns:
        True if query is valid SPARQL query
    """
    try:
        prepareQuery(query)
        return True
    except Exception as e:
        logger.debug(f"Query is not valid SPARQL query: {e}")
        return False

`prefix_clause(prefix, iri)` `classmethod`

Provide SPARQL refix clause for given prefix and url Args: prefix: prefix name iri: iri

Returns:

Type	Description
`str`	prefix clause

Source code in snapquery/sparql_analyzer.py

@classmethod
def prefix_clause(cls, prefix: str, iri: str) -> str:
    """
    Provide SPARQL refix clause for given prefix and url
    Args:
        prefix: prefix name
        iri: iri

    Returns:
        prefix clause
    """
    return f"PREFIX {prefix}:  <{iri}>"

`transform_with_clause_to_subquery(query)` `classmethod`

Transform blazegraph with clause to subquery statement Args: query:

Returns:

Source code in snapquery/sparql_analyzer.py

@classmethod
def transform_with_clause_to_subquery(cls, query: str) -> str:
    """
    Transform blazegraph with clause to subquery statement
    Args:
        query:

    Returns:

    """
    match = re.search(cls.BLAZEGRAPH_NAMED_SUBQUERY_PATTERN, query)
    if match:
        subquery = match.group("subquery")
        name = match.group("name")
        start_pos, end_pos = match.span()
        # check if Where mus be added
        select_part = query[:start_pos]
        where_part = query[end_pos + 1 :]
        if cls.has_blazegraph_with_clause(where_part):
            where_part = cls.transform_with_clause_to_subquery(where_part)
        if where_part.lower().strip().startswith("where"):
            query_with_removed = select_part + where_part
        else:
            query_with_removed = f"{select_part}\nWHERE\n{where_part}"

        include_pattern = f"[Ii][Nn][Cc][Ll][Uu][Dd][Ee]\s+%{name}"
        subquery = f"{{{subquery}\n"
        query_transformed = re.sub(include_pattern, subquery, query_with_removed)
        return query_transformed

`stats_view`

`QueryStatsView`

display Query Import UI

Source code in snapquery/stats_view.py

class QueryStatsView:
    """
    display Query Import UI
    """

    def __init__(self, solution=None):
        self.solution = solution
        if self.solution:
            self.nqm = self.solution.nqm
            self.setup_ui()

    def setup_ui(self):
        """
        setup the user interface
        """
        with self.solution.container:
            with ui.expansion(
                text="Statistics about the properties and items used in the stored queries",
                value=True,
            ):
                self.input_row = ui.column()
                self.input_row.classes("w-full")
                self.show_entity_usage()
                self.show_property_usage()
            with ui.expansion(text="Query Stats", value=True):
                ui.label("ToDo:")

    def show_entity_usage(self):
        """
        show entity usage in the queries
        """
        stats = QUERY_ITEM_STATS.get_entity_stats()
        records = [{"name": stat.label, "count": stat.count, "id": stat.identifier} for stat in stats]
        df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
        fig = px.bar(df, x="name", y="count", title="Entity usage in queries")
        with self.input_row:
            ui.plotly(fig).classes("w-full")

    def show_property_usage(self):
        """
        show property usage in the queries
        """
        stats = QUERY_ITEM_STATS.get_property_stats()
        records = [{"name": stat.label, "count": stat.count} for stat in stats]
        df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
        fig = px.bar(df, x="name", y="count", title="Property usage in queries")
        with self.input_row:
            ui.plotly(fig).classes("w-full")

`setup_ui()`

setup the user interface

Source code in snapquery/stats_view.py

def setup_ui(self):
    """
    setup the user interface
    """
    with self.solution.container:
        with ui.expansion(
            text="Statistics about the properties and items used in the stored queries",
            value=True,
        ):
            self.input_row = ui.column()
            self.input_row.classes("w-full")
            self.show_entity_usage()
            self.show_property_usage()
        with ui.expansion(text="Query Stats", value=True):
            ui.label("ToDo:")

`show_entity_usage()`

show entity usage in the queries

Source code in snapquery/stats_view.py

def show_entity_usage(self):
    """
    show entity usage in the queries
    """
    stats = QUERY_ITEM_STATS.get_entity_stats()
    records = [{"name": stat.label, "count": stat.count, "id": stat.identifier} for stat in stats]
    df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
    fig = px.bar(df, x="name", y="count", title="Entity usage in queries")
    with self.input_row:
        ui.plotly(fig).classes("w-full")

`show_property_usage()`

show property usage in the queries

Source code in snapquery/stats_view.py

def show_property_usage(self):
    """
    show property usage in the queries
    """
    stats = QUERY_ITEM_STATS.get_property_stats()
    records = [{"name": stat.label, "count": stat.count} for stat in stats]
    df = DataFrame.from_records(records).sort_values(by="count", ascending=False)
    fig = px.bar(df, x="name", y="count", title="Property usage in queries")
    with self.input_row:
        ui.plotly(fig).classes("w-full")

`version`

Created on 2024-05-01

@author: wf

`Version` `dataclass`

Version handling for nicepdf

Source code in snapquery/version.py

@dataclass
class Version:
    """
    Version handling for nicepdf
    """

    name = "snapquery"
    version = snapquery.__version__
    date = "2024-05-03"
    updated = "2024-07-13"
    description = "Introduce Named Queries and Named Query Middleware to wikidata"

    authors = "Wolfgang Fahl"

    doc_url = "https://wiki.bitplan.com/index.php/snapquery"
    chat_url = "https://github.com/WolfgangFahl/snapquery/discussions"
    cm_url = "https://github.com/WolfgangFahl/snapquery"

    license = """Copyright 2024 contributors. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied."""

    longDescription = f"""{name} version {version}
{description}

  Created by {authors} on {date} last updated {updated}"""

`wd_page_query_extractor`

Created on 2024-05-04 Author: tholzheim

`WikipediaQueryExtractor`

A class to handle the extraction and management of SPARQL queries from a Wikipedia page.

Source code in snapquery/wd_page_query_extractor.py

class WikipediaQueryExtractor:
    """
    A class to handle the extraction and management
    of SPARQL queries from a Wikipedia page.
    """

    def __init__(
        self,
        nqm: NamedQueryManager,
        base_url: str,
        domain: str,
        namespace: str,
        target_graph_name: str,
        template_name: str = "SPARQL",  # https://en.wikipedia.org/wiki/Template:SPARQL) - if None seek for short-urls
        debug: bool=False
    ):
        """
        Constructor
        """
        self.nqm = nqm
        self.base_url = base_url
        self.domain = domain
        self.namespace = namespace
        self.target_graph_name = target_graph_name
        self.template_name = template_name
        self.debug=debug
        self.logger = logging.getLogger("snapquery.wd_page_extractor.WikipediaQueryExtractor")

        self.named_query_list = NamedQuerySet(
            domain=self.domain, 
            namespace=self.namespace, 
            target_graph_name=self.target_graph_name
        )
        self.errors = []

    def log(self, message: str, is_error: bool = False):
        if self.debug:
            print(message)
        if is_error:
            self.logger.debug(message) 
            self.errors.append(message)

    def get_wikitext(self) -> str:
        """
        Get wiki text with SPARQL query examples.

        Returns:
            str: Raw wikitext of the page.
        """
        res = requests.get(f"{self.base_url}?action=raw")
        return res.text

    def sanitize_text(self, text: str) -> str:
        """
        General method to sanitize text by removing translation tags, comments,
        and other non-essential markup.

        Args:
            text (str): The text to be sanitized.

        Returns:
            str: The sanitized text.
        """
        # Remove <translate>...</translate> tags
        text = re.sub(r"<translate>(.*?)<\/translate>", r"\1", text, flags=re.DOTALL)
        # Remove <!--T:...--> tags
        text = re.sub(r"<!--T:\d+-->", "", text)
        # Strip whitespace that might be left at the beginning and end
        text = text.strip()
        return text

    def extract_query_from_wiki_markup(self, title: str, markup: str, sparql: str,url:str=None) -> NamedQuery:
        """
        Extracts a named query from wiki markup.

        This method processes the title, markup, and SPARQL query to create a NamedQuery object.
        It sanitizes the text, removes section headers from the description, and constructs 
        a URL that points to the specific section of the Wikipedia page.

        Args:
            title (str): The title of the query section.
            markup (str): The wiki markup text containing the query description.
            sparql (str): The SPARQL query string.
            url(str): the url to assign - if not given derive from base_url and section title

        Returns:
            NamedQuery: A NamedQuery object containing the processed information.

        Note:
            The method sanitizes the title and description, removes section headers from the
            description, and constructs a URL with a section anchor based on the title.
        """
        desc = self.sanitize_text(markup)
        if desc:
            # Remove section headers
            desc = re.sub(r"\n*={2,4}.*?={2,4}\n*", "", desc)
            desc = desc.strip()
        title = self.sanitize_text(title)
        if url is None:
            url=f"{self.base_url}#{title.replace(' ', '_')}"
        named_query = NamedQuery(
            domain=self.domain,
            namespace=self.namespace,
            name=title,
            title=title,
            description=desc,
            url=url,
            sparql=sparql,
        )
        return named_query

    def extract_queries_from_wiki_markup(self, markup: str) -> List[NamedQuery]:
        named_queries = []
        pattern = r"(.*?)(https?://w\.wiki/\S+)(.*?)(?=https?://w\.wiki/|\Z)"
        matches = re.findall(pattern, markup, re.DOTALL | re.MULTILINE)

        for pre_text, short_url, post_text in matches:
            self.log(f"Processing short URL: {short_url}")
            pre_text = pre_text.strip()
            post_text = post_text.strip()
            description = f"{pre_text} {post_text}".strip()
            short_url_instance = ShortUrl(short_url=short_url)

            title = short_url_instance.name
            query_name = QueryName(name=title, namespace=self.namespace, domain=self.domain)

            if query_name.query_id in self.named_query_list._query_dict:
                self.log(f"Query with ID {query_name.query_id} already exists. Skipping.", is_error=True)
                continue

            sparql_query = short_url_instance.read_query()
            if short_url_instance.error:
                self.log(f"Error reading query from {short_url}: {short_url_instance.error}", is_error=True)
                continue

            if sparql_query:
                query = self.extract_query_from_wiki_markup(
                    title=title, 
                    markup=description, 
                    sparql=sparql_query,
                    url=short_url_instance.short_url)
                self.named_query_list.add(query)
                self.log(f"Added query: {title}")
            else:
                self.log(f"No query found for short URL {short_url}", is_error=True)

        if not self.debug and self.errors:
            self.logger.info(f"Encountered {len(self.errors)} errors during extraction. Set debug=True for more details.")

        return named_queries

    def extract_queries_from_section(self, section: Section):
        """
        Extract named queries from section.

        Args:
            section (Section): Wikitext section containing a SPARQL query.
        """
        if self.template_name:
            template = self.get_template(section.templates)
            if template:
                sparql = template.arguments[0].value
                if sparql:
                    query = self.extract_query_from_wiki_markup(
                        section.title, markup=section.plain_text(), sparql=sparql
                    )
                    self.named_query_list.add(query)
        else:
            markup = section.plain_text()
            self.extract_queries_from_wiki_markup(markup)

    def get_template(self, templates: list[Template]) -> Template:
        """
        Get template from the list of templates.

        Args:
            templates (list[Template]): List of Wikitext templates.

        Returns:
            Template: template if available, otherwise None.
        """
        queries = [template for template in templates if template.name == self.template_name]
        return queries[0] if len(queries) == 1 else None

    def extract_queries(self, wikitext: str = None):
        """
        Extract all queries from the base_url page.
        """
        if wikitext is None:
            wikitext = self.get_wikitext()
        parsed = wtp.parse(wikitext)
        for section in parsed.sections:
            self.extract_queries_from_section(section)

    def save_to_json(self, file_path: str):
        """
        Save the NamedQueryList to a JSON file.

        Args:
           file_path (str): Path to the JSON file.
        """
        self.named_query_list.save_to_json_file(file_path, indent=2)

    def store_queries(self):
        """
        Store the named queries into the database.
        """
        self.nqm.store_named_query_list(self.named_query_list)

    def show_queries(self):
        for query in self.named_query_list.queries:
            pprint.pprint(query)
        print(f"Found {len(self.named_query_list.queries)} queries")

    def show_errors(self):
        print(f"{len(self.errors)} errors:")
        for i,error in enumerate(self.errors,start=1):
            print(f"{i:3}:{error}")

`init(nqm, base_url, domain, namespace, target_graph_name, template_name='SPARQL', debug=False)`

Constructor

Source code in snapquery/wd_page_query_extractor.py

def __init__(
    self,
    nqm: NamedQueryManager,
    base_url: str,
    domain: str,
    namespace: str,
    target_graph_name: str,
    template_name: str = "SPARQL",  # https://en.wikipedia.org/wiki/Template:SPARQL) - if None seek for short-urls
    debug: bool=False
):
    """
    Constructor
    """
    self.nqm = nqm
    self.base_url = base_url
    self.domain = domain
    self.namespace = namespace
    self.target_graph_name = target_graph_name
    self.template_name = template_name
    self.debug=debug
    self.logger = logging.getLogger("snapquery.wd_page_extractor.WikipediaQueryExtractor")

    self.named_query_list = NamedQuerySet(
        domain=self.domain, 
        namespace=self.namespace, 
        target_graph_name=self.target_graph_name
    )
    self.errors = []

`extract_queries(wikitext=None)`

Extract all queries from the base_url page.

Source code in snapquery/wd_page_query_extractor.py

def extract_queries(self, wikitext: str = None):
    """
    Extract all queries from the base_url page.
    """
    if wikitext is None:
        wikitext = self.get_wikitext()
    parsed = wtp.parse(wikitext)
    for section in parsed.sections:
        self.extract_queries_from_section(section)

`extract_queries_from_section(section)`

Extract named queries from section.

Parameters:

Name	Type	Description	Default
`section`	`Section`	Wikitext section containing a SPARQL query.	required

Source code in snapquery/wd_page_query_extractor.py

def extract_queries_from_section(self, section: Section):
    """
    Extract named queries from section.

    Args:
        section (Section): Wikitext section containing a SPARQL query.
    """
    if self.template_name:
        template = self.get_template(section.templates)
        if template:
            sparql = template.arguments[0].value
            if sparql:
                query = self.extract_query_from_wiki_markup(
                    section.title, markup=section.plain_text(), sparql=sparql
                )
                self.named_query_list.add(query)
    else:
        markup = section.plain_text()
        self.extract_queries_from_wiki_markup(markup)

`extract_query_from_wiki_markup(title, markup, sparql, url=None)`

Extracts a named query from wiki markup.

This method processes the title, markup, and SPARQL query to create a NamedQuery object. It sanitizes the text, removes section headers from the description, and constructs a URL that points to the specific section of the Wikipedia page.

Parameters:

Name	Type	Description	Default
`title`	`str`	The title of the query section.	required
`markup`	`str`	The wiki markup text containing the query description.	required
`sparql`	`str`	The SPARQL query string.	required
`url(str)`		the url to assign - if not given derive from base_url and section title	required

Returns:

Name	Type	Description
`NamedQuery`	`NamedQuery`	A NamedQuery object containing the processed information.

Note

The method sanitizes the title and description, removes section headers from the description, and constructs a URL with a section anchor based on the title.

Source code in snapquery/wd_page_query_extractor.py

def extract_query_from_wiki_markup(self, title: str, markup: str, sparql: str,url:str=None) -> NamedQuery:
    """
    Extracts a named query from wiki markup.

    This method processes the title, markup, and SPARQL query to create a NamedQuery object.
    It sanitizes the text, removes section headers from the description, and constructs 
    a URL that points to the specific section of the Wikipedia page.

    Args:
        title (str): The title of the query section.
        markup (str): The wiki markup text containing the query description.
        sparql (str): The SPARQL query string.
        url(str): the url to assign - if not given derive from base_url and section title

    Returns:
        NamedQuery: A NamedQuery object containing the processed information.

    Note:
        The method sanitizes the title and description, removes section headers from the
        description, and constructs a URL with a section anchor based on the title.
    """
    desc = self.sanitize_text(markup)
    if desc:
        # Remove section headers
        desc = re.sub(r"\n*={2,4}.*?={2,4}\n*", "", desc)
        desc = desc.strip()
    title = self.sanitize_text(title)
    if url is None:
        url=f"{self.base_url}#{title.replace(' ', '_')}"
    named_query = NamedQuery(
        domain=self.domain,
        namespace=self.namespace,
        name=title,
        title=title,
        description=desc,
        url=url,
        sparql=sparql,
    )
    return named_query

`get_template(templates)`

Get template from the list of templates.

Parameters:

Name	Type	Description	Default
`templates`	`list[Template]`	List of Wikitext templates.	required

Returns:

Name	Type	Description
`Template`	`Template`	template if available, otherwise None.

Source code in snapquery/wd_page_query_extractor.py

def get_template(self, templates: list[Template]) -> Template:
    """
    Get template from the list of templates.

    Args:
        templates (list[Template]): List of Wikitext templates.

    Returns:
        Template: template if available, otherwise None.
    """
    queries = [template for template in templates if template.name == self.template_name]
    return queries[0] if len(queries) == 1 else None

`get_wikitext()`

Get wiki text with SPARQL query examples.

Returns:

Name	Type	Description
`str`	`str`	Raw wikitext of the page.

Source code in snapquery/wd_page_query_extractor.py

def get_wikitext(self) -> str:
    """
    Get wiki text with SPARQL query examples.

    Returns:
        str: Raw wikitext of the page.
    """
    res = requests.get(f"{self.base_url}?action=raw")
    return res.text

`sanitize_text(text)`

General method to sanitize text by removing translation tags, comments, and other non-essential markup.

Parameters:

Name	Type	Description	Default
`text`	`str`	The text to be sanitized.	required

Returns:

Name	Type	Description
`str`	`str`	The sanitized text.

Source code in snapquery/wd_page_query_extractor.py

def sanitize_text(self, text: str) -> str:
    """
    General method to sanitize text by removing translation tags, comments,
    and other non-essential markup.

    Args:
        text (str): The text to be sanitized.

    Returns:
        str: The sanitized text.
    """
    # Remove <translate>...</translate> tags
    text = re.sub(r"<translate>(.*?)<\/translate>", r"\1", text, flags=re.DOTALL)
    # Remove <!--T:...--> tags
    text = re.sub(r"<!--T:\d+-->", "", text)
    # Strip whitespace that might be left at the beginning and end
    text = text.strip()
    return text

`save_to_json(file_path)`

Save the NamedQueryList to a JSON file.

Parameters:

Name	Type	Description	Default
`file_path`	`str`	Path to the JSON file.	required

Source code in snapquery/wd_page_query_extractor.py

def save_to_json(self, file_path: str):
    """
    Save the NamedQueryList to a JSON file.

    Args:
       file_path (str): Path to the JSON file.
    """
    self.named_query_list.save_to_json_file(file_path, indent=2)

`store_queries()`

Store the named queries into the database.

Source code in snapquery/wd_page_query_extractor.py

def store_queries(self):
    """
    Store the named queries into the database.
    """
    self.nqm.store_named_query_list(self.named_query_list)

`wd_short_url`

Created on 2024-05-12

@author: wf

`ShortIds`

short id handling

Source code in snapquery/wd_short_url.py

class ShortIds:
    """
    short id handling
    """

    def __init__(
        self,
        base_chars: str = "123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz$",
    ):
        self.base_chars = base_chars

    def id_to_int(self, id_str: str) -> int:
        """
        Convert an ID string to an integer using my base character set.

        Args:
            id_str (str): The custom ID string to convert.

        Returns:
            int: The converted integer value.
        """
        base = len(self.base_chars)
        value = 0

        for char in id_str:
            value *= base
            value += self.base_chars.index(char)

        return value

    def get_random(self, k: int = 4) -> str:
        """
        get a random short id

        Returns:
            str: a random short id
        """
        short_id = "".join(random.choices(self.base_chars, k=k))
        return short_id

`get_random(k=4)`

get a random short id

Returns:

Name	Type	Description
`str`	`str`	a random short id

Source code in snapquery/wd_short_url.py

def get_random(self, k: int = 4) -> str:
    """
    get a random short id

    Returns:
        str: a random short id
    """
    short_id = "".join(random.choices(self.base_chars, k=k))
    return short_id

`id_to_int(id_str)`

Convert an ID string to an integer using my base character set.

Parameters:

Name	Type	Description	Default
`id_str`	`str`	The custom ID string to convert.	required

Returns:

Name	Type	Description
`int`	`int`	The converted integer value.

Source code in snapquery/wd_short_url.py

def id_to_int(self, id_str: str) -> int:
    """
    Convert an ID string to an integer using my base character set.

    Args:
        id_str (str): The custom ID string to convert.

    Returns:
        int: The converted integer value.
    """
    base = len(self.base_chars)
    value = 0

    for char in id_str:
        value *= base
        value += self.base_chars.index(char)

    return value

`ShortUrl`

Handles operations related to wikidata and similar short URLs such as QLever. see https://meta.wikimedia.org/wiki/Wikimedia_URL_Shortener for

Source code in snapquery/wd_short_url.py

class ShortUrl:
    """
    Handles operations related to wikidata and similar short URLs such as QLever.
    see https://meta.wikimedia.org/wiki/Wikimedia_URL_Shortener for
    """
    # see https://stackoverflow.com/questions/62396801/how-to-handle-too-many-requests-on-wikidata-using-sparqlwrapper
    CALLS_PER_MINUTE = 30
    ONE_MINUTE = 60

    def __init__(self, short_url: str, scheme: str = "https", netloc: str = "query.wikidata.org"):
        """
        Constructor

        Args:
            short_url (str): The URL to be processed.
            scheme (str): URL scheme to be used (e.g., 'https' or 'http') for validating URL format.
            netloc (str): Network location part of the URL, typically the domain name, to be used for validating URL format.
        """

        self.short_url = short_url
        self.scheme = scheme
        self.netloc = netloc
        self.url = None
        self.sparql = None
        self.error = None
        self.user_agent = self.get_user_agent()

    @staticmethod
    def get_user_agent():
        version = Version()
        return f"{version.name}/{version.version} ({version.cm_url}; {version.authors}) Python-requests/{requests.__version__}"


    @property
    def name(self):
        """
        Extracts and returns the name part of the short URL.

        Returns:
            str: The name part of the short URL.
        """
        # Assuming the short URL ends with the name part after the last '/'
        if self.short_url:
            name_part = self.short_url.rsplit("/", 1)[-1]
            return name_part
        return None

    @classmethod
    def get_prompt_text(cls, sparql: str) -> str:
        prompt_text = f"""give an english name, title and description in json 
for cut &paste for the SPARQL query below- the name should be less than 60 chars be a proper identifier which has no special chars so it can be used in an url without escaping. The title should be less than 80 chars and the 
description not more than three lines of 80 chars. 
A valid example result would be e.g.
{{
  "name": "Locations_in_Rennes_with_French_Wikipedia_Article"
  "title": "Locations in Rennes with a French Wikipedia Article",
  "description": "Maps locations in Rennes linked to French Wikipedia articles. It displays entities within 10 km of Rennes' center, showing their names, coordinates, and linked Wikipedia pages. The results include entities' identifiers, coordinates, and related site links."
}}

The example is just an example - do not use it's content if it does not match. 
Avoid  hallucinating and stick to the facts.
If the you can not determine a proper name, title and description return {{}}
SPARQL: {sparql}
"""
        return prompt_text

    @classmethod
    def get_random_query_list(
        cls,
        namespace: str,
        count: int,
        max_postfix="9pfu",
        with_llm=False,
        with_progress: bool = False,
        debug=False,
    ) -> NamedQuerySet:
        """
        Read a specified number of random queries from a list of short URLs.

        Args:
            namespace(str): the name to use for the named query list
            count (int): Number of random URLs to fetch.
            max_postfix(str): the maximum ID to try
            with_progress(bool): if True show progress

        Returns:
            NamedQueryList: A NamedQueryList containing the queries read from the URLs.
        """
        if with_llm:
            llm = LLM(model="gpt-4")
        short_ids = ShortIds()
        base_url = "https://w.wiki/"
        unique_urls = set()
        unique_names = set()

        nq_set = NamedQuerySet(domain="wikidata.org", namespace=namespace, target_graph_name="wikidata")
        give_up = (
            count * 15
        )  # heuristic factor for probability that a short url points to a wikidata entry - 14 has worked so far
        max_short_int = short_ids.id_to_int(max_postfix)
        while len(unique_urls) < count and give_up > 0:
            if with_progress and not debug:
                print(".", end="")
                if give_up % 80 == 0:
                    print()
            # Generate a 4-char base36 string
            postfix = short_ids.get_random()
            if short_ids.id_to_int(postfix) > max_short_int:
                continue
            if debug:
                print(f"{give_up:4}:{postfix}")
            wd_short_url = f"{base_url}{postfix}"
            short_url = cls(short_url=wd_short_url)
            short_url.read_query()
            if short_url.sparql and not short_url.error:
                nq = NamedQuery(
                    domain=nq_set.domain,
                    name=postfix,
                    namespace=nq_set.namespace,
                    url=wd_short_url,
                    sparql=short_url.sparql,
                )
                if with_llm:
                    try:
                        llm_response = llm.ask(cls.get_prompt_text(short_url.sparql))
                        if llm_response:
                            response_json = json.loads(llm_response)
                            name = response_json.get("name", None)
                            if name in unique_names:
                                # try again with a different url to avoid name clash
                                give_up -= 1
                                continue
                            if name:
                                nq.name = name
                            title = response_json.get("title", "")
                            description = response_json.get("description", "")
                            nq.title = title
                            nq.description = description
                            nq.__post_init__()
                    except Exception as ex:
                        if debug:
                            print(f"Failed to get LLM response: {str(ex)}")
                        continue
                nq_set.queries.append(nq)
                unique_urls.add(nq.url)
                unique_names.add(nq.name)
                if debug:
                    print(nq)
            else:
                give_up -= 1
        return nq_set

    @sleep_and_retry
    @limits(calls=CALLS_PER_MINUTE, period=ONE_MINUTE)
    def fetch_final_url(self):
        """
        Follow the redirection to get the final URL with rate limiting.

        Returns:
            str: The final URL after redirection.
        """
        try:
            headers = {'User-Agent': self.user_agent}
            response = requests.get(self.short_url, headers=headers, allow_redirects=True)
            response.raise_for_status()
            self.url = response.url
        except Exception as ex:
            self.error = ex
        return self.url

    def read_query(self) -> str:
        """
        Read a query from a short URL.

        Returns:
            str: The SPARQL query extracted from the short URL.
        """
        self.fetch_final_url()
        if self.url:
            parsed_url = urllib.parse.urlparse(self.url)
            if parsed_url.scheme == self.scheme and parsed_url.netloc == self.netloc:
                if parsed_url.fragment:
                    self.sparql = urllib.parse.unquote(parsed_url.fragment)
                else:
                    query_params = urllib.parse.parse_qs(parsed_url.query)
                    if "query" in query_params:
                        self.sparql = query_params["query"][0]
        return self.sparql

`name` `property`

Extracts and returns the name part of the short URL.

Returns:

Name	Type	Description
`str`		The name part of the short URL.

`init(short_url, scheme='https', netloc='query.wikidata.org')`

Constructor

Parameters:

Name	Type	Description	Default
`short_url`	`str`	The URL to be processed.	required
`scheme`	`str`	URL scheme to be used (e.g., 'https' or 'http') for validating URL format.	`'https'`
`netloc`	`str`	Network location part of the URL, typically the domain name, to be used for validating URL format.	`'query.wikidata.org'`

Source code in snapquery/wd_short_url.py

def __init__(self, short_url: str, scheme: str = "https", netloc: str = "query.wikidata.org"):
    """
    Constructor

    Args:
        short_url (str): The URL to be processed.
        scheme (str): URL scheme to be used (e.g., 'https' or 'http') for validating URL format.
        netloc (str): Network location part of the URL, typically the domain name, to be used for validating URL format.
    """

    self.short_url = short_url
    self.scheme = scheme
    self.netloc = netloc
    self.url = None
    self.sparql = None
    self.error = None
    self.user_agent = self.get_user_agent()

`fetch_final_url()`

Follow the redirection to get the final URL with rate limiting.

Returns:

Name	Type	Description
`str`		The final URL after redirection.

Source code in snapquery/wd_short_url.py

@sleep_and_retry
@limits(calls=CALLS_PER_MINUTE, period=ONE_MINUTE)
def fetch_final_url(self):
    """
    Follow the redirection to get the final URL with rate limiting.

    Returns:
        str: The final URL after redirection.
    """
    try:
        headers = {'User-Agent': self.user_agent}
        response = requests.get(self.short_url, headers=headers, allow_redirects=True)
        response.raise_for_status()
        self.url = response.url
    except Exception as ex:
        self.error = ex
    return self.url

`get_random_query_list(namespace, count, max_postfix='9pfu', with_llm=False, with_progress=False, debug=False)` `classmethod`

Read a specified number of random queries from a list of short URLs.

Parameters:

Name	Type	Description	Default
`namespace(str)`		the name to use for the named query list	required
`count`	`int`	Number of random URLs to fetch.	required
`max_postfix(str)`		the maximum ID to try	required
`with_progress(bool)`		if True show progress	required

Returns:

Name	Type	Description
`NamedQueryList`	`NamedQuerySet`	A NamedQueryList containing the queries read from the URLs.

Source code in snapquery/wd_short_url.py

@classmethod
def get_random_query_list(
    cls,
    namespace: str,
    count: int,
    max_postfix="9pfu",
    with_llm=False,
    with_progress: bool = False,
    debug=False,
) -> NamedQuerySet:
    """
    Read a specified number of random queries from a list of short URLs.

    Args:
        namespace(str): the name to use for the named query list
        count (int): Number of random URLs to fetch.
        max_postfix(str): the maximum ID to try
        with_progress(bool): if True show progress

    Returns:
        NamedQueryList: A NamedQueryList containing the queries read from the URLs.
    """
    if with_llm:
        llm = LLM(model="gpt-4")
    short_ids = ShortIds()
    base_url = "https://w.wiki/"
    unique_urls = set()
    unique_names = set()

    nq_set = NamedQuerySet(domain="wikidata.org", namespace=namespace, target_graph_name="wikidata")
    give_up = (
        count * 15
    )  # heuristic factor for probability that a short url points to a wikidata entry - 14 has worked so far
    max_short_int = short_ids.id_to_int(max_postfix)
    while len(unique_urls) < count and give_up > 0:
        if with_progress and not debug:
            print(".", end="")
            if give_up % 80 == 0:
                print()
        # Generate a 4-char base36 string
        postfix = short_ids.get_random()
        if short_ids.id_to_int(postfix) > max_short_int:
            continue
        if debug:
            print(f"{give_up:4}:{postfix}")
        wd_short_url = f"{base_url}{postfix}"
        short_url = cls(short_url=wd_short_url)
        short_url.read_query()
        if short_url.sparql and not short_url.error:
            nq = NamedQuery(
                domain=nq_set.domain,
                name=postfix,
                namespace=nq_set.namespace,
                url=wd_short_url,
                sparql=short_url.sparql,
            )
            if with_llm:
                try:
                    llm_response = llm.ask(cls.get_prompt_text(short_url.sparql))
                    if llm_response:
                        response_json = json.loads(llm_response)
                        name = response_json.get("name", None)
                        if name in unique_names:
                            # try again with a different url to avoid name clash
                            give_up -= 1
                            continue
                        if name:
                            nq.name = name
                        title = response_json.get("title", "")
                        description = response_json.get("description", "")
                        nq.title = title
                        nq.description = description
                        nq.__post_init__()
                except Exception as ex:
                    if debug:
                        print(f"Failed to get LLM response: {str(ex)}")
                    continue
            nq_set.queries.append(nq)
            unique_urls.add(nq.url)
            unique_names.add(nq.name)
            if debug:
                print(nq)
        else:
            give_up -= 1
    return nq_set

`read_query()`

Read a query from a short URL.

Returns:

Name	Type	Description
`str`	`str`	The SPARQL query extracted from the short URL.

Source code in snapquery/wd_short_url.py

def read_query(self) -> str:
    """
    Read a query from a short URL.

    Returns:
        str: The SPARQL query extracted from the short URL.
    """
    self.fetch_final_url()
    if self.url:
        parsed_url = urllib.parse.urlparse(self.url)
        if parsed_url.scheme == self.scheme and parsed_url.netloc == self.netloc:
            if parsed_url.fragment:
                self.sparql = urllib.parse.unquote(parsed_url.fragment)
            else:
                query_params = urllib.parse.parse_qs(parsed_url.query)
                if "query" in query_params:
                    self.sparql = query_params["query"][0]
    return self.sparql

snapquery API Documentation

basequeryview

BaseQueryView

on_search_change(_args=None) async

perform_search() async

setup_ui()

show_lod(q_lod)

ceurws

CeurWSQueries

__init__(nqm, debug=False)

extract_queries(limit=None)

save_to_json(file_path='/tmp/ceurws-queries.json')

store_queries()

dblp

DblpPersonLookup

search(name_part, limit=10)

endpoint

Endpoint

__post_init__()

get_samples() classmethod

EndpointManager

get_endpoint(name)

error_filter

ErrorFilter

categorize_error()

get_message(for_html=True)

execution

Execution

__init__(nqm, debug=False)

execute(nq, endpoint_name, title, context='test', prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)

graph

Graph

__post_init__()

get_samples() classmethod

GraphManager

get_graph(name)

models

person

Affiliation dataclass

Person dataclass

has_pid: bool property

merge_with(other)

share_identifier(other)

PersonName dataclass

parse_label()

mwlogin

Login

complete_login(response_qs)

identify_user()

initiate_login()

namespace_stats_view

NamespaceStatsView

__init__(solution)

execute_queries(namespace, endpoint_name, domain)

fetch_query_lod()

on_cell_clicked(event) async

on_fetch_lod(_args=None) async

process_stats_lod(raw_lod)

setup_ui()

orcid

OrcidAccessToken

OrcidAuth

logout()

OrcidConfig

OrcidSearchParams dataclass

params_view

ParamsView

__init__(solution, params)

close()

get_dict_edit()

open()

person_selector

PersonSelector

__init__(solution, selection_callback, limit=10)

load_person_suggestions(search_name) async

merge_and_update_suggestions(new_persons)

person_selection()

select_person_suggestion(person)

suggest_persons() async

update_suggestions_view()

`basequeryview`

`BaseQueryView`

`on_search_change(_args=None)` `async`

`perform_search()` `async`

`setup_ui()`

`show_lod(q_lod)`

`ceurws`

`CeurWSQueries`

`init(nqm, debug=False)`

`extract_queries(limit=None)`

`save_to_json(file_path='/tmp/ceurws-queries.json')`

`store_queries()`

`dblp`

`DblpPersonLookup`

`search(name_part, limit=10)`

`endpoint`

`Endpoint`

`__post_init__()`

`get_samples()` `classmethod`

`EndpointManager`

`get_endpoint(name)`

`error_filter`

`ErrorFilter`

`categorize_error()`

`get_message(for_html=True)`

`execution`

`Execution`

`init(nqm, debug=False)`

`execute(nq, endpoint_name, title, context='test', prefix_merger=QueryPrefixMerger.SIMPLE_MERGER)`

`graph`

`Graph`

`__post_init__()`

`get_samples()` `classmethod`

`GraphManager`

`get_graph(name)`

`models`

`person`

`Affiliation` `dataclass`

`Person` `dataclass`

`has_pid: bool` `property`

`merge_with(other)`

`share_identifier(other)`

`PersonName` `dataclass`

`parse_label()`

`mwlogin`

`Login`

`complete_login(response_qs)`

`identify_user()`

`initiate_login()`

`namespace_stats_view`

`NamespaceStatsView`

`init(solution)`

`execute_queries(namespace, endpoint_name, domain)`

`fetch_query_lod()`

`on_cell_clicked(event)` `async`

`on_fetch_lod(_args=None)` `async`

`process_stats_lod(raw_lod)`

`setup_ui()`

`orcid`

`OrcidAccessToken`

`OrcidAuth`

`logout()`

`OrcidConfig`

`OrcidSearchParams` `dataclass`

`params_view`

`ParamsView`

`init(solution, params)`

`close()`

`get_dict_edit()`

`open()`

`person_selector`

`PersonSelector`

`init(solution, selection_callback, limit=10)`

`load_person_suggestions(search_name)` `async`

`merge_and_update_suggestions(new_persons)`

`person_selection()`

`select_person_suggestion(person)`

`suggest_persons()` `async`

`update_suggestions_view()`

`PersonSuggestion`