Skip to content

pyLodStorage API Documentation

docstring_parser

Created on 2024-01-21

@author: wf

DocstringParser

A Python docstring parser.

Source code in lodstorage/docstring_parser.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class DocstringParser:
    """
    A Python docstring parser.
    """

    def __init__(self):
        # Define basic elements
        identifier = Word(alphas, alphanums + "_")
        type_identifier = Word(alphas, alphanums + "_.[]")
        description = restOfLine

        # Define patterns for capturing attributes
        attribute_start = Suppress(Literal("Attributes:"))
        self.attribute = Group(
            identifier("name")
            + Suppress("(")
            + Optional(type_identifier("type"))
            + Suppress("):")
            + description("description")
        )

        # Define pattern for class docstring
        class_docstring = restOfLine("class_description") + Optional(
            attribute_start + OneOrMore(self.attribute)("attributes")
        )

        # Updated class_docstring pattern to correctly handle multi-line class descriptions
        self.class_docstring = class_docstring + Optional(
            OneOrMore(~attribute_start + restOfLine)("class_description")
            + attribute_start
            + OneOrMore(self.attribute)("attributes")
        )

    def parse(self, docstring: str):
        """
        Parse the given docstring.
        """
        result = self.class_docstring.parseString(docstring, parseAll=True)
        class_description = " ".join(result.class_description).strip()
        attributes = {
            attr.name: {"type": attr.type, "description": attr.description.strip()}
            for attr in result.attributes
        }
        return class_description, attributes

parse(docstring)

Parse the given docstring.

Source code in lodstorage/docstring_parser.py
53
54
55
56
57
58
59
60
61
62
63
def parse(self, docstring: str):
    """
    Parse the given docstring.
    """
    result = self.class_docstring.parseString(docstring, parseAll=True)
    class_description = " ".join(result.class_description).strip()
    attributes = {
        attr.name: {"type": attr.type, "description": attr.description.strip()}
        for attr in result.attributes
    }
    return class_description, attributes

exception_handler

Created on 2025-12-01

@author: wf

ExceptionHandler

handle exceptions

Source code in lodstorage/exception_handler.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class ExceptionHandler:
    """
    handle exceptions
    """

    @classmethod
    def handle(
        cls, msg: str, ex: Exception, debug: bool = False
    ):
        """Centralized exception logging (non-fatal).

        Args:
            msg: context message
            ex: the exception caught
            debug: if True, print full traceback
        """
        full_msg = f"{msg}: {str(ex)}"
        logging.warning(full_msg)
        if debug:
            traceback.print_exc()

handle(msg, ex, debug=False) classmethod

Centralized exception logging (non-fatal).

Parameters:

Name Type Description Default
msg str

context message

required
ex Exception

the exception caught

required
debug bool

if True, print full traceback

False
Source code in lodstorage/exception_handler.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
@classmethod
def handle(
    cls, msg: str, ex: Exception, debug: bool = False
):
    """Centralized exception logging (non-fatal).

    Args:
        msg: context message
        ex: the exception caught
        debug: if True, print full traceback
    """
    full_msg = f"{msg}: {str(ex)}"
    logging.warning(full_msg)
    if debug:
        traceback.print_exc()

linkml

Created on 2024-01-28

@author: wf

Class

Represents a class in the LinkML schema.

Source code in lodstorage/linkml.py
26
27
28
29
30
31
32
33
@lod_storable
class Class:
    """
    Represents a class in the LinkML schema.
    """

    description: str
    slots: List[Slot]

PythonTypes

python type handling

Source code in lodstorage/linkml.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class PythonTypes:
    """
    python type handling
    """

    # Define a mapping from Python types to LinkML ranges
    to_linkml_ranges = {
        str: "string",
        int: "integer",
        float: "float",
        bool: "boolean",
        list: "list",
        dict: "dictionary",
    }
    # Mapping from Python types to RDF (XSD) datatypes
    to_rdf_datatypes = {
        str: XSD.string,
        int: XSD.integer,
        float: XSD.float,
        bool: XSD.boolean,
        # Add more mappings if needed
    }

    @classmethod
    def get_linkml_range(cls, ptype: Type) -> str:
        """
        Determines the LinkML range for a given Python type.

        Args:
            ptype (Type): The Python type for which the LinkML range is required.

        Returns:
            str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
        """
        return cls.to_linkml_ranges.get(ptype, "string")

    @classmethod
    def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
        """
        Determines the RDF (XSD) datatype for a given Python type.

        Args:
            ptype (Type): The Python type for which the RDF (XSD) datatype is required.

        Returns:
            XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
        """
        return cls.to_rdf_datatypes.get(ptype)

get_linkml_range(ptype) classmethod

Determines the LinkML range for a given Python type.

Parameters:

Name Type Description Default
ptype Type

The Python type for which the LinkML range is required.

required

Returns:

Name Type Description
str str

The corresponding LinkML range as a string. Defaults to "string" if the type is not found.

Source code in lodstorage/linkml.py
113
114
115
116
117
118
119
120
121
122
123
124
@classmethod
def get_linkml_range(cls, ptype: Type) -> str:
    """
    Determines the LinkML range for a given Python type.

    Args:
        ptype (Type): The Python type for which the LinkML range is required.

    Returns:
        str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
    """
    return cls.to_linkml_ranges.get(ptype, "string")

get_rdf_datatype(ptype) classmethod

Determines the RDF (XSD) datatype for a given Python type.

Parameters:

Name Type Description Default
ptype Type

The Python type for which the RDF (XSD) datatype is required.

required

Returns:

Name Type Description
XSD Optional[XSD]

The corresponding RDF (XSD) datatype. Returns None if the type is not found.

Source code in lodstorage/linkml.py
126
127
128
129
130
131
132
133
134
135
136
137
@classmethod
def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
    """
    Determines the RDF (XSD) datatype for a given Python type.

    Args:
        ptype (Type): The Python type for which the RDF (XSD) datatype is required.

    Returns:
        XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
    """
    return cls.to_rdf_datatypes.get(ptype)

Schema

Represents the entire LinkML schema.

Source code in lodstorage/linkml.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
@lod_storable
class Schema:
    """
    Represents the entire LinkML schema.
    """

    name: str
    id: str
    description: str
    title: Optional[str] = None
    version: Optional[str] = None
    license: Optional[str] = None

    default_prefix: Optional[str] = None

    prefixes: Dict[str, str] = field(default_factory=dict)
    imports: List[str] = field(default_factory=list)
    default_range: str = "string"
    classes: Dict[str, Class] = field(default_factory=dict)
    slots: Dict[str, Slot] = field(default_factory=dict)
    types: Dict[str, Type] = field(default_factory=dict)

    def __post_init__(self):
        if not self.title:
            self.title = self.name

Slot

Represents a slot in the LinkML schema, equivalent to a field or property.

Source code in lodstorage/linkml.py
14
15
16
17
18
19
20
21
22
23
@lod_storable
class Slot:
    """
    Represents a slot in the LinkML schema, equivalent to a field or property.
    """

    description: str
    range: str = "string"
    multivalued: bool = False
    identifier: bool = False

linkml_gen

Created on 2024-01-21

@author: wf

LinkMLGen

Class for generating LinkML YAML schema from Python data models using dataclasses.

Source code in lodstorage/linkml_gen.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class LinkMLGen:
    """
    Class for generating LinkML YAML schema from Python data models using dataclasses.
    """

    def __init__(self, schema: Schema):
        """
        Initialize the LinkMLGen.

        Args:
            schema (Schema): The LinkML schema to be generated.
        """
        self.schema = schema

    def gen_schema(self, data_model_class) -> Schema:
        # Use DocstringParser to extract class description
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_class.__doc__)

        class_name = data_model_class.__name__
        new_class = Class(description=class_description, slots=[])

        # Iterate over the fields of the dataclass
        for field_info in fields(data_model_class):
            attr_name = field_info.name
            attr_type = field_info.type

            # Handle Optional and List types
            is_optional = False
            is_list = False
            content_type = None
            if hasattr(attr_type, "__origin__"):
                if attr_type.__origin__ is Union and type(None) in attr_type.__args__:
                    is_optional = True
                    attr_type = [t for t in attr_type.__args__ if t is not type(None)][
                        0
                    ]  # unwrap Optional type
                elif attr_type.__origin__ is list:
                    is_list = True
                    content_type = attr_type.__args__[0]  # unwrap List type
                elif attr_type.__origin__ is dict:
                    # Assuming dictionary values are of interest, keys are strings
                    content_type = attr_type.__args__[
                        1
                    ]  # unwrap Dict type, focusing on value type

            # Check and handle nested dataclasses for lists or dicts
            if is_dataclass(content_type):
                # Recursive call to handle nested dataclass
                self.gen_schema(content_type)
                # Set the range to the name of the dataclass
                linkml_range = (
                    content_type.__name__
                )  # Use the name of the dataclass as the range
            elif is_list:
                # If it's a list, get the LinkML range for the base type
                # Use self.get_linkml_range to ensure consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(content_type)
            else:
                # For non-list and non-dataclass types, use self.get_linkml_range for consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Extract description from doc_attributes
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )

            # Create a new slot for the field
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=is_list
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        self.schema.classes[class_name] = new_class
        return self.schema

    def gen_schema_from_instance(self, data_model_instance) -> Schema:
        """
        Generate a LinkML YAML schema from a Python data model using dataclasses.

        Args:
            data_model_instance: An instance of the Python data model.

        Returns:
            Schema: The LinkML schema generated from the data model.
        """
        # Use DocstringParser to extract class description and attributes
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

        class_name = data_model_instance.__class__.__name__
        new_class = Class(description=class_description, slots=[])

        for field_info in fields(data_model_instance):
            attr_name = field_info.name
            attr_type = field_info.type

            # Extract field type/range
            linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Check values for multivalued and type consistency
            attr_value = getattr(data_model_instance, attr_name)
            multivalued, actual_type = self.check_value(attr_value)

            # Ensure documentation, declaration, and value type are consistent
            self.ensure_consistency(
                attr_name, linkml_range, actual_type, doc_attributes
            )

            # Prepare slot
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )
            if attr_name not in self.schema.slots:
                new_slot = Slot(
                    description=description, range=linkml_range, multivalued=multivalued
                )
                self.schema.slots[attr_name] = new_slot
                new_class.slots.append(attr_name)

            if multivalued:
                # recursive call if type of list or dict is a dataclass
                if hasattr(attr_type, "__args__"):
                    content_type = attr_type.__args__[
                        0
                    ]  # Get the declared content type
                    if is_dataclass(content_type):
                        self.gen_schema(content_type)

        self.schema.classes[class_name] = new_class
        return self.schema

    def check_value(self, value):
        # Method to check if the value is multivalued and determine its type
        multivalued = isinstance(value, (Iterable, Mapping)) and not isinstance(
            value, (str, bytes)
        )
        value_type = type(value).__name__
        return multivalued, value_type

    def ensure_consistency(self, name, declared_type, actual_type, doc_attributes):
        # Adjust this method to handle complex types like list, dict, etc.

        # Check if the actual type is a list or dict, and if so, get the type of its elements
        if actual_type == "list" or actual_type == "dict":
            # You may need a more complex logic here to handle lists of custom dataclasses
            # For simplicity, let's assume it's a list of strings for now
            actual_type = "string"

        # Now compare the adjusted actual type with the declared type
        if declared_type != actual_type:
            raise ValueError(
                f"Type mismatch for '{name}': declared as '{declared_type}', actual type is '{actual_type}'"
            )

        # Check for documentation
        if name not in doc_attributes:
            raise ValueError(f"Missing documentation for field '{name}'")

__init__(schema)

Initialize the LinkMLGen.

Parameters:

Name Type Description Default
schema Schema

The LinkML schema to be generated.

required
Source code in lodstorage/linkml_gen.py
20
21
22
23
24
25
26
27
def __init__(self, schema: Schema):
    """
    Initialize the LinkMLGen.

    Args:
        schema (Schema): The LinkML schema to be generated.
    """
    self.schema = schema

gen_schema_from_instance(data_model_instance)

Generate a LinkML YAML schema from a Python data model using dataclasses.

Parameters:

Name Type Description Default
data_model_instance

An instance of the Python data model.

required

Returns:

Name Type Description
Schema Schema

The LinkML schema generated from the data model.

Source code in lodstorage/linkml_gen.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def gen_schema_from_instance(self, data_model_instance) -> Schema:
    """
    Generate a LinkML YAML schema from a Python data model using dataclasses.

    Args:
        data_model_instance: An instance of the Python data model.

    Returns:
        Schema: The LinkML schema generated from the data model.
    """
    # Use DocstringParser to extract class description and attributes
    parser = DocstringParser()
    class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

    class_name = data_model_instance.__class__.__name__
    new_class = Class(description=class_description, slots=[])

    for field_info in fields(data_model_instance):
        attr_name = field_info.name
        attr_type = field_info.type

        # Extract field type/range
        linkml_range = PythonTypes.get_linkml_range(attr_type)

        # Check values for multivalued and type consistency
        attr_value = getattr(data_model_instance, attr_name)
        multivalued, actual_type = self.check_value(attr_value)

        # Ensure documentation, declaration, and value type are consistent
        self.ensure_consistency(
            attr_name, linkml_range, actual_type, doc_attributes
        )

        # Prepare slot
        description = doc_attributes.get(attr_name, {}).get(
            "description", f"{attr_name} - missing description"
        )
        if attr_name not in self.schema.slots:
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=multivalued
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        if multivalued:
            # recursive call if type of list or dict is a dataclass
            if hasattr(attr_type, "__args__"):
                content_type = attr_type.__args__[
                    0
                ]  # Get the declared content type
                if is_dataclass(content_type):
                    self.gen_schema(content_type)

    self.schema.classes[class_name] = new_class
    return self.schema

lod

Created on 2021-01-31

@author: wf

LOD

Bases: object

list of Dict aka Table

Source code in lodstorage/lod.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
class LOD(object):
    """
    list of Dict aka Table
    """

    def __init__(self, name):
        """
        Constructor
        """
        self.name = name
        pass

    @staticmethod
    def getFields(listOfDicts, sampleCount: int = None):
        """
        Extract field names from a list of dictionaries.

        Args:
            listOfDicts: List of dictionaries to extract field names from
            sampleCount: Number of dictionaries to sample. If None, all are sampled

        Returns:
            list: Field names found in the dictionaries, or None if listOfDicts is None
        """
        if listOfDicts is None:
            return None
        if sampleCount is None:
            sampleCount = len(listOfDicts)

        fields = []
        for i, row in enumerate(listOfDicts):
            if i >= sampleCount:
                break
            for key in row.keys():
                if key not in fields:
                    fields.append(key)
        return fields

    @staticmethod
    def setNone4List(listOfDicts, fields):
        """
        set the given fields to None for the records in the given listOfDicts
        if they are not set
        Args:
            listOfDicts(list): the list of records to work on
            fields(list): the list of fields to set to None
        """
        for record in listOfDicts:
            LOD.setNone(record, fields)

    @staticmethod
    def setNone(record, fields):
        """
        make sure the given fields in the given record are set to none
        Args:
            record(dict): the record to work on
            fields(list): the list of fields to set to None
        """
        for field in fields:
            if not field in record:
                record[field] = None

    """
    https://stackoverflow.com/questions/33542997/python-intersection-of-2-lists-of-dictionaries/33543164
    """

    @staticmethod
    def sortKey(d, key=None):
        """get the sort key for the given dict d with the given key"""
        if key is None:
            # https://stackoverflow.com/a/60765557/1497139
            return hash(tuple(d.items()))
        else:
            return d[key]

    @staticmethod
    def intersect(listOfDict1, listOfDict2, key=None):
        """
        get the  intersection of the two lists of Dicts by the given key
        """
        i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
        i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
        c1 = next(i1)
        c2 = next(i2)
        lr = []
        while True:
            try:
                val1 = LOD.sortKey(c1, key)
                val2 = LOD.sortKey(c2, key)
                if val1 < val2:
                    c1 = next(i1)
                elif val1 > val2:
                    c2 = next(i2)
                else:
                    lr.append(c1)
                    c1 = next(i1)
                    c2 = next(i2)
            except StopIteration:
                break
        return lr

    @staticmethod
    def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
        """
        add a single lookup result

        Args:
            lookup(dict): the lookup map
            duplicates(list): the list of duplicates
            record(dict): the current record
            value(object): the current value to lookup
            withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
            list is created
        """
        if value in lookup:
            if withDuplicates:
                lookupResult = lookup[value]
                lookupResult.append(record)
            else:
                duplicates.append(record)
                return
        else:
            if withDuplicates:
                lookupResult = [record]
            else:
                lookupResult = record
        lookup[value] = lookupResult

    @staticmethod
    def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
        """
        create a lookup dictionary by the given attribute name for the given list of dicts

        Args:
            lod(list): the list of dicts to get the lookup dictionary for
            attrName(str): the attribute to lookup
            withDuplicates(bool): whether to retain single values or lists

        Return:
            a dictionary for lookup
        """
        lookup = {}
        duplicates = []
        for record in lod:
            value = None
            if isinstance(record, dict):
                if attrName in record:
                    value = record[attrName]
            else:
                if hasattr(record, attrName):
                    value = getattr(record, attrName)
            if value is not None:
                if isinstance(value, list):
                    for listValue in value:
                        LOD.addLookup(
                            lookup, duplicates, record, listValue, withDuplicates
                        )
                else:
                    LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
        if withDuplicates:
            return lookup
        else:
            return lookup, duplicates

    @classmethod
    def handleListTypes(cls, lod, doFilter=False, separator=","):
        """
        handle list types in the given list of dicts

        Args:
            cls: this class
            lod(list): a list of dicts
            doFilter(bool): True if records containing lists value items should be filtered
            separator(str): the separator to use when converting lists
        """
        # see https://stackoverflow.com/a/1207485/1497139
        for i in range(len(lod) - 1, -1, -1):
            record = lod[i]
            if isinstance(record, dict):
                for key in record:
                    value = record[key]
                    if isinstance(value, list):
                        if doFilter:
                            del lod[i]
                            continue
                        else:
                            newValue = separator.join(filter(None, value))
                            record[key] = newValue

    @staticmethod
    def filterFields(lod: list, fields: list, reverse: bool = False):
        """
        filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
        fields contained in the list depending on the state of the reverse parameter

        Args:
            lod(list): list of dicts from which the fields should be excluded
            fields(list): list of fields that should be excluded from the lod
            reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

        Returns:
            LoD
        """
        res = []
        for record in lod:
            if reverse:
                recordReduced = {d: record[d] for d in record if d in fields}
            else:
                recordReduced = {d: record[d] for d in record if d not in fields}
            res.append(recordReduced)
        return res

__init__(name)

Constructor

Source code in lodstorage/lod.py
13
14
15
16
17
18
def __init__(self, name):
    """
    Constructor
    """
    self.name = name
    pass

addLookup(lookup, duplicates, record, value, withDuplicates) staticmethod

add a single lookup result

Parameters:

Name Type Description Default
lookup(dict)

the lookup map

required
duplicates(list)

the list of duplicates

required
record(dict)

the current record

required
value(object)

the current value to lookup

required
withDuplicates(bool)

if True duplicates should be allowed and lists returned if False a separate duplicates

required
Source code in lodstorage/lod.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
@staticmethod
def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
    """
    add a single lookup result

    Args:
        lookup(dict): the lookup map
        duplicates(list): the list of duplicates
        record(dict): the current record
        value(object): the current value to lookup
        withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
        list is created
    """
    if value in lookup:
        if withDuplicates:
            lookupResult = lookup[value]
            lookupResult.append(record)
        else:
            duplicates.append(record)
            return
    else:
        if withDuplicates:
            lookupResult = [record]
        else:
            lookupResult = record
    lookup[value] = lookupResult

filterFields(lod, fields, reverse=False) staticmethod

filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the fields contained in the list depending on the state of the reverse parameter

Parameters:

Name Type Description Default
lod(list)

list of dicts from which the fields should be excluded

required
fields(list)

list of fields that should be excluded from the lod

required
reverse(bool)

If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

required

Returns:

Type Description

LoD

Source code in lodstorage/lod.py
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
@staticmethod
def filterFields(lod: list, fields: list, reverse: bool = False):
    """
    filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
    fields contained in the list depending on the state of the reverse parameter

    Args:
        lod(list): list of dicts from which the fields should be excluded
        fields(list): list of fields that should be excluded from the lod
        reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

    Returns:
        LoD
    """
    res = []
    for record in lod:
        if reverse:
            recordReduced = {d: record[d] for d in record if d in fields}
        else:
            recordReduced = {d: record[d] for d in record if d not in fields}
        res.append(recordReduced)
    return res

getFields(listOfDicts, sampleCount=None) staticmethod

Extract field names from a list of dictionaries.

Parameters:

Name Type Description Default
listOfDicts

List of dictionaries to extract field names from

required
sampleCount int

Number of dictionaries to sample. If None, all are sampled

None

Returns:

Name Type Description
list

Field names found in the dictionaries, or None if listOfDicts is None

Source code in lodstorage/lod.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
@staticmethod
def getFields(listOfDicts, sampleCount: int = None):
    """
    Extract field names from a list of dictionaries.

    Args:
        listOfDicts: List of dictionaries to extract field names from
        sampleCount: Number of dictionaries to sample. If None, all are sampled

    Returns:
        list: Field names found in the dictionaries, or None if listOfDicts is None
    """
    if listOfDicts is None:
        return None
    if sampleCount is None:
        sampleCount = len(listOfDicts)

    fields = []
    for i, row in enumerate(listOfDicts):
        if i >= sampleCount:
            break
        for key in row.keys():
            if key not in fields:
                fields.append(key)
    return fields

getLookup(lod, attrName, withDuplicates=False) staticmethod

create a lookup dictionary by the given attribute name for the given list of dicts

Parameters:

Name Type Description Default
lod(list)

the list of dicts to get the lookup dictionary for

required
attrName(str)

the attribute to lookup

required
withDuplicates(bool)

whether to retain single values or lists

required
Return

a dictionary for lookup

Source code in lodstorage/lod.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
@staticmethod
def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
    """
    create a lookup dictionary by the given attribute name for the given list of dicts

    Args:
        lod(list): the list of dicts to get the lookup dictionary for
        attrName(str): the attribute to lookup
        withDuplicates(bool): whether to retain single values or lists

    Return:
        a dictionary for lookup
    """
    lookup = {}
    duplicates = []
    for record in lod:
        value = None
        if isinstance(record, dict):
            if attrName in record:
                value = record[attrName]
        else:
            if hasattr(record, attrName):
                value = getattr(record, attrName)
        if value is not None:
            if isinstance(value, list):
                for listValue in value:
                    LOD.addLookup(
                        lookup, duplicates, record, listValue, withDuplicates
                    )
            else:
                LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
    if withDuplicates:
        return lookup
    else:
        return lookup, duplicates

handleListTypes(lod, doFilter=False, separator=',') classmethod

handle list types in the given list of dicts

Parameters:

Name Type Description Default
cls

this class

required
lod(list)

a list of dicts

required
doFilter(bool)

True if records containing lists value items should be filtered

required
separator(str)

the separator to use when converting lists

required
Source code in lodstorage/lod.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
@classmethod
def handleListTypes(cls, lod, doFilter=False, separator=","):
    """
    handle list types in the given list of dicts

    Args:
        cls: this class
        lod(list): a list of dicts
        doFilter(bool): True if records containing lists value items should be filtered
        separator(str): the separator to use when converting lists
    """
    # see https://stackoverflow.com/a/1207485/1497139
    for i in range(len(lod) - 1, -1, -1):
        record = lod[i]
        if isinstance(record, dict):
            for key in record:
                value = record[key]
                if isinstance(value, list):
                    if doFilter:
                        del lod[i]
                        continue
                    else:
                        newValue = separator.join(filter(None, value))
                        record[key] = newValue

intersect(listOfDict1, listOfDict2, key=None) staticmethod

get the intersection of the two lists of Dicts by the given key

Source code in lodstorage/lod.py
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
@staticmethod
def intersect(listOfDict1, listOfDict2, key=None):
    """
    get the  intersection of the two lists of Dicts by the given key
    """
    i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
    i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
    c1 = next(i1)
    c2 = next(i2)
    lr = []
    while True:
        try:
            val1 = LOD.sortKey(c1, key)
            val2 = LOD.sortKey(c2, key)
            if val1 < val2:
                c1 = next(i1)
            elif val1 > val2:
                c2 = next(i2)
            else:
                lr.append(c1)
                c1 = next(i1)
                c2 = next(i2)
        except StopIteration:
            break
    return lr

setNone(record, fields) staticmethod

make sure the given fields in the given record are set to none Args: record(dict): the record to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py
58
59
60
61
62
63
64
65
66
67
68
@staticmethod
def setNone(record, fields):
    """
    make sure the given fields in the given record are set to none
    Args:
        record(dict): the record to work on
        fields(list): the list of fields to set to None
    """
    for field in fields:
        if not field in record:
            record[field] = None

setNone4List(listOfDicts, fields) staticmethod

set the given fields to None for the records in the given listOfDicts if they are not set Args: listOfDicts(list): the list of records to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py
46
47
48
49
50
51
52
53
54
55
56
@staticmethod
def setNone4List(listOfDicts, fields):
    """
    set the given fields to None for the records in the given listOfDicts
    if they are not set
    Args:
        listOfDicts(list): the list of records to work on
        fields(list): the list of fields to set to None
    """
    for record in listOfDicts:
        LOD.setNone(record, fields)

sortKey(d, key=None) staticmethod

get the sort key for the given dict d with the given key

Source code in lodstorage/lod.py
74
75
76
77
78
79
80
81
@staticmethod
def sortKey(d, key=None):
    """get the sort key for the given dict d with the given key"""
    if key is None:
        # https://stackoverflow.com/a/60765557/1497139
        return hash(tuple(d.items()))
    else:
        return d[key]

lod2xml

Created on 2022-06-20

see https://github.com/tyleradams/json-toolkit https://stackoverflow.com/questions/36021526/converting-an-array-dict-to-xml-in-python

@author: tyleradams @author: wf

Lod2Xml

convert a list of dicts to XML

Source code in lodstorage/lod2xml.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
class Lod2Xml:
    """
    convert a list of dicts to XML
    """

    def __init__(
        self, lod, root: str = "root", node_name: callable = (lambda x: "node")
    ):
        """
        construct me with the given list of dicts

        Args:
            lod (list): the list of dicts to convert to XML
            root (str): the name of the root nod
            item_name (func): the function to use to calculate node names
        """
        self.lod = lod
        self.root = root
        self.item_name = node_name

    def asXml(self, pretty: bool = True):
        """
        convert result to XML

        Args:
            pretty (bool): if True pretty print the result

        """
        xml = dicttoxml(
            self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
        )
        if pretty:
            dom = parseString(xml)
            prettyXml = dom.toprettyxml()
        else:
            prettyXml = xml
        return prettyXml

__init__(lod, root='root', node_name=lambda x: 'node')

construct me with the given list of dicts

Parameters:

Name Type Description Default
lod list

the list of dicts to convert to XML

required
root str

the name of the root nod

'root'
item_name func

the function to use to calculate node names

required
Source code in lodstorage/lod2xml.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(
    self, lod, root: str = "root", node_name: callable = (lambda x: "node")
):
    """
    construct me with the given list of dicts

    Args:
        lod (list): the list of dicts to convert to XML
        root (str): the name of the root nod
        item_name (func): the function to use to calculate node names
    """
    self.lod = lod
    self.root = root
    self.item_name = node_name

asXml(pretty=True)

convert result to XML

Parameters:

Name Type Description Default
pretty bool

if True pretty print the result

True
Source code in lodstorage/lod2xml.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
def asXml(self, pretty: bool = True):
    """
    convert result to XML

    Args:
        pretty (bool): if True pretty print the result

    """
    xml = dicttoxml(
        self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
    )
    if pretty:
        dom = parseString(xml)
        prettyXml = dom.toprettyxml()
    else:
        prettyXml = xml
    return prettyXml

lod_csv

Created 2021

@author: wf

CSV

helper for converting data in csv format to list of dicts (LoD) and vice versa

Source code in lodstorage/lod_csv.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class CSV:
    """
    helper for converting data in csv format to list of dicts (LoD) and vice versa
    """

    _instance = None

    def __new__(cls, dialect: str = "excel", quoting: int = csv.QUOTE_NONNUMERIC):
        """
        constructor to set dialect and quoting defaults
        """
        if cls._instance is None:
            cls._instance = super().__new__(cls)
            cls._instance.dialect = dialect
            cls._instance.quoting = quoting
        return cls._instance

    @classmethod
    def get_instance(cls, dialect: str = "excel", quoting: int = csv.QUOTE_NONNUMERIC):
        """Get singleton instance of CSV class"""
        return cls(dialect, quoting)

    def restoreFromCSVFile(
        self, filePath: str, headerNames: List[str] = None, withPostfix: bool = False
    ) -> List[Dict[str, Any]]:
        """
        restore LOD from given csv file

        Args:
            filePath(str): file name
            headerNames(List[str]): Names of the headers that should be used. If None it is assumed that the header is given.
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

        Returns:
            List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv file
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = self.readFile(filePath)
        lod = self.fromCSV(csvStr, headerNames)
        return lod

    def fromCSV(
        self,
        csvString: str,
        fields: List[str] = None,
        dialect: str = None,
        quoting: int = None,
        **kwargs,
    ) -> List[Dict[str, Any]]:
        """
        convert given csv string to list of dicts (LOD)

        Args:
            csvString(str): csv string that should be converted to LOD
            fields(List[str]): Names of the headers that should be used. If None it is assumed that the header is given.
            dialect(str): CSV dialect to use
            quoting(int): CSV quoting behavior

        Returns:
            List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv string
        """
        if dialect is None:
            dialect = self.dialect
        if quoting is None:
            quoting = self.quoting
        csvStream = io.StringIO(csvString)
        reader = csv.DictReader(
            csvStream, fieldnames=fields, dialect=dialect, quoting=quoting, **kwargs
        )
        lod = list(reader)
        self.fixTypes(lod)
        return lod

    def storeToCSVFile(
        self, lod: List[Dict[str, Any]], filePath: str, withPostfix: bool = False
    ):
        """
        converts the given lod to CSV file.

        Args:
            lod(List[Dict[str, Any]]): lod that should be converted to csv file
            filePath(str): file name the csv should be stored to
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = self.toCSV(lod)
        self.writeFile(csvStr, filePath)

    def toCSV(
        self,
        lod: List[Dict[str, Any]],
        includeFields: List[str] = None,
        excludeFields: List[str] = None,
        dialect: str = None,
        quoting: int = None,
        **kwargs,
    ) -> str:
        """
        converts the given lod to CSV string.

        Args:
            lod(List[Dict[str, Any]]): lod that should be converted to csv string
            includeFields(List[str]): list of fields that should be included in the csv
            excludeFields(List[str]): list of fields that should be excluded from the csv
            dialect(str): CSV dialect to use
            quoting(int): CSV quoting behavior

        Returns:
            str: csv string of the given lod
        """
        if dialect is None:
            dialect = self.dialect
        if quoting is None:
            quoting = self.quoting
        if lod is None:
            return ""
        if excludeFields is not None:
            lod = LOD.filterFields(lod, excludeFields)
        if includeFields is None:
            fields = LOD.getFields(lod)
        else:
            fields = includeFields
            lod = LOD.filterFields(lod, includeFields, reverse=True)
        csvStream = io.StringIO()
        dict_writer = csv.DictWriter(
            csvStream, fieldnames=fields, dialect=dialect, quoting=quoting, **kwargs
        )
        dict_writer.writeheader()
        dict_writer.writerows(lod)
        csvString = csvStream.getvalue()
        return csvString

    def readFile(self, filename: str) -> str:
        """
        Reads the given filename and returns it as string

        Args:
            filename(str): Name of the file that should be returned as string

        Returns:
            str: Content of the file as string
        """
        with open(filename, "r") as file:
            content = file.read()
        return content

    def writeFile(self, content: str, filename: str):
        """
        Write the given str to the given filename

        Args:
            content(str): string that should be written into the file
            filename(str): Name of the file the given str should be written to
        """
        with open(filename, "w") as file:
            file.write(content)

    def fixTypes(self, lod: List[Dict[str, Any]]) -> None:
        """
        fixes the types of the given LoD.

        Args:
            lod(List[Dict[str, Any]]): List of dictionaries to fix types for
        """
        for record in lod:
            for key, value in record.items():
                if value == "":
                    record[key] = None

__new__(dialect='excel', quoting=csv.QUOTE_NONNUMERIC)

constructor to set dialect and quoting defaults

Source code in lodstorage/lod_csv.py
21
22
23
24
25
26
27
28
29
def __new__(cls, dialect: str = "excel", quoting: int = csv.QUOTE_NONNUMERIC):
    """
    constructor to set dialect and quoting defaults
    """
    if cls._instance is None:
        cls._instance = super().__new__(cls)
        cls._instance.dialect = dialect
        cls._instance.quoting = quoting
    return cls._instance

fixTypes(lod)

fixes the types of the given LoD.

Parameters:

Name Type Description Default
lod(List[Dict[str, Any]]

List of dictionaries to fix types for

required
Source code in lodstorage/lod_csv.py
173
174
175
176
177
178
179
180
181
182
183
def fixTypes(self, lod: List[Dict[str, Any]]) -> None:
    """
    fixes the types of the given LoD.

    Args:
        lod(List[Dict[str, Any]]): List of dictionaries to fix types for
    """
    for record in lod:
        for key, value in record.items():
            if value == "":
                record[key] = None

fromCSV(csvString, fields=None, dialect=None, quoting=None, **kwargs)

convert given csv string to list of dicts (LOD)

Parameters:

Name Type Description Default
csvString(str)

csv string that should be converted to LOD

required
fields(List[str])

Names of the headers that should be used. If None it is assumed that the header is given.

required
dialect(str)

CSV dialect to use

required
quoting(int)

CSV quoting behavior

required

Returns:

Type Description
List[Dict[str, Any]]

List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv string

Source code in lodstorage/lod_csv.py
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def fromCSV(
    self,
    csvString: str,
    fields: List[str] = None,
    dialect: str = None,
    quoting: int = None,
    **kwargs,
) -> List[Dict[str, Any]]:
    """
    convert given csv string to list of dicts (LOD)

    Args:
        csvString(str): csv string that should be converted to LOD
        fields(List[str]): Names of the headers that should be used. If None it is assumed that the header is given.
        dialect(str): CSV dialect to use
        quoting(int): CSV quoting behavior

    Returns:
        List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv string
    """
    if dialect is None:
        dialect = self.dialect
    if quoting is None:
        quoting = self.quoting
    csvStream = io.StringIO(csvString)
    reader = csv.DictReader(
        csvStream, fieldnames=fields, dialect=dialect, quoting=quoting, **kwargs
    )
    lod = list(reader)
    self.fixTypes(lod)
    return lod

get_instance(dialect='excel', quoting=csv.QUOTE_NONNUMERIC) classmethod

Get singleton instance of CSV class

Source code in lodstorage/lod_csv.py
31
32
33
34
@classmethod
def get_instance(cls, dialect: str = "excel", quoting: int = csv.QUOTE_NONNUMERIC):
    """Get singleton instance of CSV class"""
    return cls(dialect, quoting)

readFile(filename)

Reads the given filename and returns it as string

Parameters:

Name Type Description Default
filename(str)

Name of the file that should be returned as string

required

Returns:

Name Type Description
str str

Content of the file as string

Source code in lodstorage/lod_csv.py
148
149
150
151
152
153
154
155
156
157
158
159
160
def readFile(self, filename: str) -> str:
    """
    Reads the given filename and returns it as string

    Args:
        filename(str): Name of the file that should be returned as string

    Returns:
        str: Content of the file as string
    """
    with open(filename, "r") as file:
        content = file.read()
    return content

restoreFromCSVFile(filePath, headerNames=None, withPostfix=False)

restore LOD from given csv file

Parameters:

Name Type Description Default
filePath(str)

file name

required
headerNames(List[str])

Names of the headers that should be used. If None it is assumed that the header is given.

required
withPostfix(bool)

If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

required

Returns:

Type Description
List[Dict[str, Any]]

List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv file

Source code in lodstorage/lod_csv.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
def restoreFromCSVFile(
    self, filePath: str, headerNames: List[str] = None, withPostfix: bool = False
) -> List[Dict[str, Any]]:
    """
    restore LOD from given csv file

    Args:
        filePath(str): file name
        headerNames(List[str]): Names of the headers that should be used. If None it is assumed that the header is given.
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

    Returns:
        List[Dict[str, Any]]: list of dicts (LoD) containing the content of the given csv file
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = self.readFile(filePath)
    lod = self.fromCSV(csvStr, headerNames)
    return lod

storeToCSVFile(lod, filePath, withPostfix=False)

converts the given lod to CSV file.

Parameters:

Name Type Description Default
lod(List[Dict[str, Any]]

lod that should be converted to csv file

required
filePath(str)

file name the csv should be stored to

required
withPostfix(bool)

If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

required
Source code in lodstorage/lod_csv.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def storeToCSVFile(
    self, lod: List[Dict[str, Any]], filePath: str, withPostfix: bool = False
):
    """
    converts the given lod to CSV file.

    Args:
        lod(List[Dict[str, Any]]): lod that should be converted to csv file
        filePath(str): file name the csv should be stored to
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = self.toCSV(lod)
    self.writeFile(csvStr, filePath)

toCSV(lod, includeFields=None, excludeFields=None, dialect=None, quoting=None, **kwargs)

converts the given lod to CSV string.

Parameters:

Name Type Description Default
lod(List[Dict[str, Any]]

lod that should be converted to csv string

required
includeFields(List[str])

list of fields that should be included in the csv

required
excludeFields(List[str])

list of fields that should be excluded from the csv

required
dialect(str)

CSV dialect to use

required
quoting(int)

CSV quoting behavior

required

Returns:

Name Type Description
str str

csv string of the given lod

Source code in lodstorage/lod_csv.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def toCSV(
    self,
    lod: List[Dict[str, Any]],
    includeFields: List[str] = None,
    excludeFields: List[str] = None,
    dialect: str = None,
    quoting: int = None,
    **kwargs,
) -> str:
    """
    converts the given lod to CSV string.

    Args:
        lod(List[Dict[str, Any]]): lod that should be converted to csv string
        includeFields(List[str]): list of fields that should be included in the csv
        excludeFields(List[str]): list of fields that should be excluded from the csv
        dialect(str): CSV dialect to use
        quoting(int): CSV quoting behavior

    Returns:
        str: csv string of the given lod
    """
    if dialect is None:
        dialect = self.dialect
    if quoting is None:
        quoting = self.quoting
    if lod is None:
        return ""
    if excludeFields is not None:
        lod = LOD.filterFields(lod, excludeFields)
    if includeFields is None:
        fields = LOD.getFields(lod)
    else:
        fields = includeFields
        lod = LOD.filterFields(lod, includeFields, reverse=True)
    csvStream = io.StringIO()
    dict_writer = csv.DictWriter(
        csvStream, fieldnames=fields, dialect=dialect, quoting=quoting, **kwargs
    )
    dict_writer.writeheader()
    dict_writer.writerows(lod)
    csvString = csvStream.getvalue()
    return csvString

writeFile(content, filename)

Write the given str to the given filename

Parameters:

Name Type Description Default
content(str)

string that should be written into the file

required
filename(str)

Name of the file the given str should be written to

required
Source code in lodstorage/lod_csv.py
162
163
164
165
166
167
168
169
170
171
def writeFile(self, content: str, filename: str):
    """
    Write the given str to the given filename

    Args:
        content(str): string that should be written into the file
        filename(str): Name of the file the given str should be written to
    """
    with open(filename, "w") as file:
        file.write(content)

mwTable

Created on 2020-08-21

@author: wf

MediaWikiTable

Bases: object

helper for https://www.mediawiki.org/wiki/Help:Tables

Source code in lodstorage/mwTable.py
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
class MediaWikiTable(object):
    """
    helper for https://www.mediawiki.org/wiki/Help:Tables
    """

    def __init__(
        self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
    ):
        """
        Constructor
        """
        self.colFormats = colFormats
        cssDelim = ""
        if wikiTable:
            cWikiTable = "wikitable"
            cssDelim = " "
        else:
            cWikiTable = ""
        if sortable:
            cSortable = "sortable"
        else:
            cSortable = ""

        self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
        self.header = None
        self.content = ""
        self.end = "\n|}\n"
        self.withNewLines = withNewLines
        pass

    def addHeader(self, record):
        """
        add the given record as a "sample" header
        """
        if self.withNewLines:
            headerStart = "|+"
            firstColDelim = "\n!"
            colDelim = firstColDelim
        else:
            headerStart = "|+\n"
            firstColDelim = "!"
            colDelim = "!!"
        self.header = headerStart
        first = True
        for key in record.keys():
            if first:
                delim = firstColDelim
                first = False
            else:
                delim = colDelim
            self.header += "%s%s" % (delim, key)

    def addRow4Dict(self, record):
        if self.header is None:
            self.addHeader(record)
        if self.withNewLines:
            rowStart = "\n|-"
            colDelim = "\n|"
        else:
            rowStart = "\n|-\n"
            colDelim = "||"
        self.content += rowStart
        for key in record.keys():
            value = record[key]
            if self.colFormats is not None and key in self.colFormats:
                colFormat = self.colFormats[key]
            else:
                colFormat = "%s"
            self.content += ("%s" + colFormat) % (colDelim, value)

    def fromListOfDicts(self, listOfDicts):
        for record in listOfDicts:
            self.addRow4Dict(record)
        pass

    def noneReplace(self, value):
        return "" if value is None else value

    def asWikiMarkup(self):
        """
        convert me to MediaWiki markup

        Returns:
            string: the MediWiki Markup for this table
        """
        markup = (
            self.noneReplace(self.start)
            + self.noneReplace(self.header)
            + self.noneReplace(self.content)
            + self.noneReplace(self.end)
        )
        return markup

__init__(wikiTable=True, colFormats=None, sortable=True, withNewLines=False)

Constructor

Source code in lodstorage/mwTable.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def __init__(
    self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
):
    """
    Constructor
    """
    self.colFormats = colFormats
    cssDelim = ""
    if wikiTable:
        cWikiTable = "wikitable"
        cssDelim = " "
    else:
        cWikiTable = ""
    if sortable:
        cSortable = "sortable"
    else:
        cSortable = ""

    self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
    self.header = None
    self.content = ""
    self.end = "\n|}\n"
    self.withNewLines = withNewLines
    pass

addHeader(record)

add the given record as a "sample" header

Source code in lodstorage/mwTable.py
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def addHeader(self, record):
    """
    add the given record as a "sample" header
    """
    if self.withNewLines:
        headerStart = "|+"
        firstColDelim = "\n!"
        colDelim = firstColDelim
    else:
        headerStart = "|+\n"
        firstColDelim = "!"
        colDelim = "!!"
    self.header = headerStart
    first = True
    for key in record.keys():
        if first:
            delim = firstColDelim
            first = False
        else:
            delim = colDelim
        self.header += "%s%s" % (delim, key)

asWikiMarkup()

convert me to MediaWiki markup

Returns:

Name Type Description
string

the MediWiki Markup for this table

Source code in lodstorage/mwTable.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def asWikiMarkup(self):
    """
    convert me to MediaWiki markup

    Returns:
        string: the MediWiki Markup for this table
    """
    markup = (
        self.noneReplace(self.start)
        + self.noneReplace(self.header)
        + self.noneReplace(self.content)
        + self.noneReplace(self.end)
    )
    return markup

mysql

mysql.py:

MySQL and MariaDB support

MySqlQuery

A class to manage and execute mySQL queries with optional debugging.

Attributes:

Name Type Description
endpoint_info Endpoint

endpoint configuration.

debug bool

Flag to enable debugging.

Source code in lodstorage/mysql.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class MySqlQuery:
    """
    A class to manage and execute mySQL queries with optional debugging.

    Attributes:
        endpoint_info (Endpoint): endpoint configuration.
        debug (bool): Flag to enable debugging.
    """

    def __init__(self, endpoint: Endpoint, debug: bool = False):
        """
        Initializes the Query class with command-line arguments.

        Args:
            endpoint (Endpoint): endpoint configuration.
            debug (bool): Flag to enable debugging.
        """
        self.db_params = {
            "host": endpoint.host or "localhost",
            "port": endpoint.port or 3306,
            "user": endpoint.user or "root",
            "password": endpoint.password,
            "database": endpoint.database,
            "charset": endpoint.charset or "utf8mb4",
            "use_unicode": True,  # ensure proper unicode handling
        }

        self.debug = debug

    def get_cursor(self, query: str):
        if self.debug:
            logging.debug(f"Executing query: {query}")
            logging.debug(f"With connection parameters: {self.db_params}")

        connection = pymysql.connect(**self.db_params)
        cursor = connection.cursor(pymysql.cursors.DictCursor)
        return connection, cursor

    def decode_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
        """
        Converts binary values to UTF-8 strings.

        Args:
            data (Dict[str, Any]): Raw database row data

        Returns:
            Dict[str, Any]: Data with binary values decoded to strings
        """
        decoded_record = {}
        for key, value in record.items():
            if isinstance(value, bytes):
                decoded_record[key] = value.decode("utf-8", errors="replace")
            else:
                decoded_record[key] = value
        return decoded_record

    def execute_sql_query(self, query: str) -> List[Dict[str, Any]]:
        """
        Executes an SQL query using the provided connection parameters.

        Args:
            query (str): The SQL query to execute.
            connection_params (dict): Database connection parameters.

        Returns:
            list: A list of dictionaries representing the query results.
        """
        connection, cursor = self.get_cursor(query)
        cursor.execute(query)
        raw_lod = cursor.fetchall()
        connection.close()
        lod = []
        for raw_row in raw_lod:
            row = self.decode_record(raw_row)
            lod.append(row)
        return lod

    def query_generator(self, query: str) -> Generator[Dict[str, Any], None, None]:
        """
        Generator for fetching records one by one from a SQL query.
        """
        connection, cursor = self.get_cursor(query)
        try:
            cursor.execute(query)
            while True:
                raw_record = cursor.fetchone()
                if not raw_record:
                    break
                record = self.decode_record(raw_record)
                yield record

        finally:
            cursor.close()
            connection.close()

__init__(endpoint, debug=False)

Initializes the Query class with command-line arguments.

Parameters:

Name Type Description Default
endpoint Endpoint

endpoint configuration.

required
debug bool

Flag to enable debugging.

False
Source code in lodstorage/mysql.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def __init__(self, endpoint: Endpoint, debug: bool = False):
    """
    Initializes the Query class with command-line arguments.

    Args:
        endpoint (Endpoint): endpoint configuration.
        debug (bool): Flag to enable debugging.
    """
    self.db_params = {
        "host": endpoint.host or "localhost",
        "port": endpoint.port or 3306,
        "user": endpoint.user or "root",
        "password": endpoint.password,
        "database": endpoint.database,
        "charset": endpoint.charset or "utf8mb4",
        "use_unicode": True,  # ensure proper unicode handling
    }

    self.debug = debug

decode_record(record)

Converts binary values to UTF-8 strings.

Parameters:

Name Type Description Default
data Dict[str, Any]

Raw database row data

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: Data with binary values decoded to strings

Source code in lodstorage/mysql.py
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def decode_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
    """
    Converts binary values to UTF-8 strings.

    Args:
        data (Dict[str, Any]): Raw database row data

    Returns:
        Dict[str, Any]: Data with binary values decoded to strings
    """
    decoded_record = {}
    for key, value in record.items():
        if isinstance(value, bytes):
            decoded_record[key] = value.decode("utf-8", errors="replace")
        else:
            decoded_record[key] = value
    return decoded_record

execute_sql_query(query)

Executes an SQL query using the provided connection parameters.

Parameters:

Name Type Description Default
query str

The SQL query to execute.

required
connection_params dict

Database connection parameters.

required

Returns:

Name Type Description
list List[Dict[str, Any]]

A list of dictionaries representing the query results.

Source code in lodstorage/mysql.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def execute_sql_query(self, query: str) -> List[Dict[str, Any]]:
    """
    Executes an SQL query using the provided connection parameters.

    Args:
        query (str): The SQL query to execute.
        connection_params (dict): Database connection parameters.

    Returns:
        list: A list of dictionaries representing the query results.
    """
    connection, cursor = self.get_cursor(query)
    cursor.execute(query)
    raw_lod = cursor.fetchall()
    connection.close()
    lod = []
    for raw_row in raw_lod:
        row = self.decode_record(raw_row)
        lod.append(row)
    return lod

query_generator(query)

Generator for fetching records one by one from a SQL query.

Source code in lodstorage/mysql.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def query_generator(self, query: str) -> Generator[Dict[str, Any], None, None]:
    """
    Generator for fetching records one by one from a SQL query.
    """
    connection, cursor = self.get_cursor(query)
    try:
        cursor.execute(query)
        while True:
            raw_record = cursor.fetchone()
            if not raw_record:
                break
            record = self.decode_record(raw_record)
            yield record

    finally:
        cursor.close()
        connection.close()

params

Created on 2024-05-06

@author: wf

Param

a parameter (input or output) for a query

Source code in lodstorage/params.py
15
16
17
18
19
20
21
22
23
24
25
@lod_storable
class Param:
    """
    a parameter  (input or output) for a query
    """

    name: str
    type: str  # python type
    default_value: Optional[str] = None  # for input parameters only
    range: Optional[list] = field(default=None)  # for output only
    description: Optional[str] = None  # optional for doc/UI

Params

parameter handling

Source code in lodstorage/params.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
class Params:
    """
    parameter handling
    """

    def __init__(
        self, query: str, illegal_chars: str = """"[;<>&|]"'""", with_audit: bool = True
    ):
        """
        constructor

        Args:
            query (str): the query to analyze for parameters
            illegal_chars (str): chars that may not be in the values
            with_audit (bool): if True audit parameters
        """
        self.illegal_chars = illegal_chars
        self.query = query
        self.with_audit = with_audit
        self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
        self.params = self.pattern.findall(query)
        self.params_dict = {param: "" for param in self.params}
        self.has_params = len(self.params) > 0

    def set(self, params_dict: Dict):
        """
        set my params
        """
        self.params_dict = params_dict

    def audit(self) -> None:
        """
        Audit the usage of parameters in the query.

        Raises:
            ValueError: If potentially malicious values are detected in the parameter dictionary.
        """
        for param, value in self.params_dict.items():
            if isinstance(value, str):
                for char in self.illegal_chars:
                    if char in value:
                        raise ValueError(
                            f"Potentially malicious value detected for parameter '{param}'"
                        )

    def apply_parameters(self) -> str:
        """
        Replace Jinja templates in the query with corresponding parameter values.

        Returns:
            str: The query with Jinja templates replaced by parameter values.
        """
        if self.with_audit:
            self.audit()
        query = self.query
        for param, value in self.params_dict.items():
            pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
            value_str = str(value)
            query = re.sub(pattern, value_str, query)
        return query

    def apply_parameters_with_check(self, param_dict: dict = None) -> str:
        """
        Apply parameters to the query string with parameter checking.

        This method checks if the query requires parameters. If parameters are required
        but not provided, it raises an exception with a descriptive message. If parameters
        are provided, it applies them to the query.

        Args:
            param_dict (dict, optional): A dictionary of parameter names and values.

        Returns:
            str: The query string with parameters applied, if applicable.

        Raises:
            Exception: If parameters are required but not provided.
        """
        query = self.query
        if self.has_params:
            if not param_dict:
                param_names = list(
                    dict.fromkeys(self.params)
                )  # remove duplicates while preserving order
                if len(param_names) > 3:
                    displayed_params = ", ".join(param_names[:3]) + ", ..."
                else:
                    displayed_params = ", ".join(param_names)
                plural_suffix = "s" if len(param_names) > 1 else ""
                msg = f"Query needs {len(param_names)} parameter{plural_suffix}: {displayed_params}"
                raise Exception(msg)
            else:
                self.set(param_dict)
                query = self.apply_parameters()
        return query

__init__(query, illegal_chars='"[;<>&|]"\'', with_audit=True)

constructor

Parameters:

Name Type Description Default
query str

the query to analyze for parameters

required
illegal_chars str

chars that may not be in the values

'"[;<>&|]"\''
with_audit bool

if True audit parameters

True
Source code in lodstorage/params.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(
    self, query: str, illegal_chars: str = """"[;<>&|]"'""", with_audit: bool = True
):
    """
    constructor

    Args:
        query (str): the query to analyze for parameters
        illegal_chars (str): chars that may not be in the values
        with_audit (bool): if True audit parameters
    """
    self.illegal_chars = illegal_chars
    self.query = query
    self.with_audit = with_audit
    self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
    self.params = self.pattern.findall(query)
    self.params_dict = {param: "" for param in self.params}
    self.has_params = len(self.params) > 0

apply_parameters()

Replace Jinja templates in the query with corresponding parameter values.

Returns:

Name Type Description
str str

The query with Jinja templates replaced by parameter values.

Source code in lodstorage/params.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def apply_parameters(self) -> str:
    """
    Replace Jinja templates in the query with corresponding parameter values.

    Returns:
        str: The query with Jinja templates replaced by parameter values.
    """
    if self.with_audit:
        self.audit()
    query = self.query
    for param, value in self.params_dict.items():
        pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
        value_str = str(value)
        query = re.sub(pattern, value_str, query)
    return query

apply_parameters_with_check(param_dict=None)

Apply parameters to the query string with parameter checking.

This method checks if the query requires parameters. If parameters are required but not provided, it raises an exception with a descriptive message. If parameters are provided, it applies them to the query.

Parameters:

Name Type Description Default
param_dict dict

A dictionary of parameter names and values.

None

Returns:

Name Type Description
str str

The query string with parameters applied, if applicable.

Raises:

Type Description
Exception

If parameters are required but not provided.

Source code in lodstorage/params.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def apply_parameters_with_check(self, param_dict: dict = None) -> str:
    """
    Apply parameters to the query string with parameter checking.

    This method checks if the query requires parameters. If parameters are required
    but not provided, it raises an exception with a descriptive message. If parameters
    are provided, it applies them to the query.

    Args:
        param_dict (dict, optional): A dictionary of parameter names and values.

    Returns:
        str: The query string with parameters applied, if applicable.

    Raises:
        Exception: If parameters are required but not provided.
    """
    query = self.query
    if self.has_params:
        if not param_dict:
            param_names = list(
                dict.fromkeys(self.params)
            )  # remove duplicates while preserving order
            if len(param_names) > 3:
                displayed_params = ", ".join(param_names[:3]) + ", ..."
            else:
                displayed_params = ", ".join(param_names)
            plural_suffix = "s" if len(param_names) > 1 else ""
            msg = f"Query needs {len(param_names)} parameter{plural_suffix}: {displayed_params}"
            raise Exception(msg)
        else:
            self.set(param_dict)
            query = self.apply_parameters()
    return query

audit()

Audit the usage of parameters in the query.

Raises:

Type Description
ValueError

If potentially malicious values are detected in the parameter dictionary.

Source code in lodstorage/params.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def audit(self) -> None:
    """
    Audit the usage of parameters in the query.

    Raises:
        ValueError: If potentially malicious values are detected in the parameter dictionary.
    """
    for param, value in self.params_dict.items():
        if isinstance(value, str):
            for char in self.illegal_chars:
                if char in value:
                    raise ValueError(
                        f"Potentially malicious value detected for parameter '{param}'"
                    )

set(params_dict)

set my params

Source code in lodstorage/params.py
52
53
54
55
56
def set(self, params_dict: Dict):
    """
    set my params
    """
    self.params_dict = params_dict

StoreDictKeyPair

Bases: Action

Custom argparse action to store key-value pairs as a dictionary.

This class implements an argparse action to parse and store command-line arguments in the form of key-value pairs. The pairs should be separated by a comma and each key-value pair should be separated by an equals sign.

Example

--option key1=value1,key2=value2,key3=value3

Reference

https://stackoverflow.com/a/42355279/1497139

Source code in lodstorage/params.py
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
class StoreDictKeyPair(argparse.Action):
    """
    Custom argparse action to store key-value pairs as a dictionary.

    This class implements an argparse action to parse and store command-line
    arguments in the form of key-value pairs. The pairs should be separated by
    a comma and each key-value pair should be separated by an equals sign.

    Example:
        --option key1=value1,key2=value2,key3=value3

    Reference:
        https://stackoverflow.com/a/42355279/1497139
    """

    def __call__(
        self,
        _parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: str,
        _option_string: Optional[str] = None,
    ) -> None:
        """
        Parse key-value pairs and store them as a dictionary in the namespace.

        Args:
            parser (argparse.ArgumentParser): The argument parser object.
            namespace (argparse.Namespace): The namespace to store the parsed values.
            values (str): The string containing key-value pairs separated by commas.
            option_string (Optional[str]): The option string, if provided.
        """
        my_dict = {}
        for kv in values.split(","):
            k, v = kv.split("=")
            my_dict[k] = v
        setattr(namespace, self.dest, my_dict)

__call__(_parser, namespace, values, _option_string=None)

Parse key-value pairs and store them as a dictionary in the namespace.

Parameters:

Name Type Description Default
parser ArgumentParser

The argument parser object.

required
namespace Namespace

The namespace to store the parsed values.

required
values str

The string containing key-value pairs separated by commas.

required
option_string Optional[str]

The option string, if provided.

required
Source code in lodstorage/params.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def __call__(
    self,
    _parser: argparse.ArgumentParser,
    namespace: argparse.Namespace,
    values: str,
    _option_string: Optional[str] = None,
) -> None:
    """
    Parse key-value pairs and store them as a dictionary in the namespace.

    Args:
        parser (argparse.ArgumentParser): The argument parser object.
        namespace (argparse.Namespace): The namespace to store the parsed values.
        values (str): The string containing key-value pairs separated by commas.
        option_string (Optional[str]): The option string, if provided.
    """
    my_dict = {}
    for kv in values.split(","):
        k, v = kv.split("=")
        my_dict[k] = v
    setattr(namespace, self.dest, my_dict)

plot

Created on 2020-07-05

@author: wf

Plot

Bases: object

create Plot based on counters see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram

Source code in lodstorage/plot.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
class Plot(object):
    """
    create Plot based on counters
    see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram
    """

    def __init__(
        self,
        valueList,
        title,
        xlabel=None,
        ylabel=None,
        gformat=".png",
        fontsize=12,
        plotdir=None,
        debug=False,
    ):
        """
        Constructor
        """
        self.counter = Counter(valueList)
        self.valueList = valueList
        self.title = title
        self.xlabel = xlabel
        self.ylabel = ylabel
        self.fontsize = fontsize
        self.gformat = gformat
        self.debug = debug
        path = os.path.dirname(__file__)
        if plotdir is not None:
            self.plotdir = plotdir
        else:
            self.plotdir = path + "/../plots/"
            os.makedirs(self.plotdir, exist_ok=True)

    def titleMe(self):
        """set my title and labels"""
        plt.title(self.title, fontsize=self.fontsize)
        if self.xlabel is not None:
            plt.xlabel(self.xlabel)
        if self.ylabel is not None:
            plt.ylabel(self.ylabel)

    def showMe(self, mode="show", close=True):
        """show me in the given mode"""
        if mode == "show":
            plt.show()
        else:
            plt.savefig(self.plotdir + self.title + self.gformat)
        if close:
            plt.close()

    def barchart(self, mode="show"):
        """barchart based histogram for the given counter"""
        labels, values = zip(*self.counter.items())
        indexes = np.arange(len(labels))
        width = 1
        self.titleMe()
        plt.bar(indexes, values, width)
        plt.xticks(indexes + width * 0.5, labels)
        plt.yticks(np.arange(1, max(values) + 1, step=1))
        self.showMe(mode)

    def showDebug(self):
        print("   value  list: ", self.valueList)
        print("counter  items: ", self.counter.items())
        print("counter values: ", self.counter.values())
        print("counter   keys: ", self.counter.keys())

    def hist(self, mode="show"):
        """create histogram for the given counter"""
        if self.debug:
            self.showDebug()
        self.titleMe()
        # see https://stackoverflow.com/a/2162045/1497139
        plt.hist(self.valueList, bins=len(self.counter.keys()))
        self.showMe(mode)
        pass

__init__(valueList, title, xlabel=None, ylabel=None, gformat='.png', fontsize=12, plotdir=None, debug=False)

Constructor

Source code in lodstorage/plot.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def __init__(
    self,
    valueList,
    title,
    xlabel=None,
    ylabel=None,
    gformat=".png",
    fontsize=12,
    plotdir=None,
    debug=False,
):
    """
    Constructor
    """
    self.counter = Counter(valueList)
    self.valueList = valueList
    self.title = title
    self.xlabel = xlabel
    self.ylabel = ylabel
    self.fontsize = fontsize
    self.gformat = gformat
    self.debug = debug
    path = os.path.dirname(__file__)
    if plotdir is not None:
        self.plotdir = plotdir
    else:
        self.plotdir = path + "/../plots/"
        os.makedirs(self.plotdir, exist_ok=True)

barchart(mode='show')

barchart based histogram for the given counter

Source code in lodstorage/plot.py
66
67
68
69
70
71
72
73
74
75
def barchart(self, mode="show"):
    """barchart based histogram for the given counter"""
    labels, values = zip(*self.counter.items())
    indexes = np.arange(len(labels))
    width = 1
    self.titleMe()
    plt.bar(indexes, values, width)
    plt.xticks(indexes + width * 0.5, labels)
    plt.yticks(np.arange(1, max(values) + 1, step=1))
    self.showMe(mode)

hist(mode='show')

create histogram for the given counter

Source code in lodstorage/plot.py
83
84
85
86
87
88
89
90
91
def hist(self, mode="show"):
    """create histogram for the given counter"""
    if self.debug:
        self.showDebug()
    self.titleMe()
    # see https://stackoverflow.com/a/2162045/1497139
    plt.hist(self.valueList, bins=len(self.counter.keys()))
    self.showMe(mode)
    pass

showMe(mode='show', close=True)

show me in the given mode

Source code in lodstorage/plot.py
57
58
59
60
61
62
63
64
def showMe(self, mode="show", close=True):
    """show me in the given mode"""
    if mode == "show":
        plt.show()
    else:
        plt.savefig(self.plotdir + self.title + self.gformat)
    if close:
        plt.close()

titleMe()

set my title and labels

Source code in lodstorage/plot.py
49
50
51
52
53
54
55
def titleMe(self):
    """set my title and labels"""
    plt.title(self.title, fontsize=self.fontsize)
    if self.xlabel is not None:
        plt.xlabel(self.xlabel)
    if self.ylabel is not None:
        plt.ylabel(self.ylabel)

prefix_config

Created on 2025-06-04

@author: wf

PrefixConfig dataclass

Configuration for SPARQL prefixes

Source code in lodstorage/prefix_config.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
@dataclass
class PrefixConfig:
    """
    Configuration for SPARQL prefixes
    """

    name: str
    wikidata_id: Optional[str] = None
    url: Optional[str] = None
    prefix_prefix: Optional[str] = None

    description: Optional[str] = None
    prefixes: Dict[str, str] = field(default_factory=dict)

    def __post_init__(self):
        """
        Set default values after initialization
        """
        # nothing to do yet
        pass

    def as_text(self) -> str:
        """
        Get prefixes as formatted text block.

        Returns:
            str: Newline-separated prefix declarations
        """
        text = "\n".join(self.prefixes.values())
        return text

__post_init__()

Set default values after initialization

Source code in lodstorage/prefix_config.py
30
31
32
33
34
35
def __post_init__(self):
    """
    Set default values after initialization
    """
    # nothing to do yet
    pass

as_text()

Get prefixes as formatted text block.

Returns:

Name Type Description
str str

Newline-separated prefix declarations

Source code in lodstorage/prefix_config.py
37
38
39
40
41
42
43
44
45
def as_text(self) -> str:
    """
    Get prefixes as formatted text block.

    Returns:
        str: Newline-separated prefix declarations
    """
    text = "\n".join(self.prefixes.values())
    return text

PrefixConfigs

Collection of prefix configurations loaded from YAML.

Source code in lodstorage/prefix_config.py
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
@lod_storable
class PrefixConfigs:
    """Collection of prefix configurations loaded from YAML."""

    # ClassVars: IGNORED by @dataclass
    # Enables singleton
    _instance: ClassVar[Optional["PrefixConfigs"]] = None
    _prefixes_path: ClassVar[Optional[str]] = None

    prefix_sets: Dict[str, PrefixConfig] = field(default_factory=dict)

    @classmethod
    def get_instance(cls) -> "PrefixConfigs":
        """Get singleton PrefixConfigs (loads prefixes.yaml via YamlPath if needed)."""
        if cls._instance is None:
            cls._instance = cls.of_yaml()
        return cls._instance

    @classmethod
    def preload(cls, prefixes_path: str) -> "PrefixConfigs":
        """Preload singleton with specific prefixes path."""
        cls._instance = cls.of_yaml(prefixes_path)
        cls._prefixes_path = prefixes_path
        return cls._instance

    @classmethod
    def of_yaml(cls, yaml_path: str = None) -> "PrefixConfigs":
        """Load from YAML (uses prefixes.yaml via YamlPath if yaml_path=None)."""
        if yaml_path is None:
            paths = YamlPath.getPaths("prefixes.yaml")
            yaml_path = paths[0] if paths else None
        if yaml_path and os.path.exists(yaml_path):
            prefix_configs = cls.load_from_yaml_file(yaml_path)
        else:
            prefix_configs = cls()  # Empty if no file
        return prefix_configs

    def __post_init__(self):
        """
        initialize all prefixes and test prefix prefix for all prefix configs
        """
        self.all_prefixes = {}
        for key, prefix_config in self.prefix_sets.items():
            if prefix_config.prefix_prefix is None:
                prefix_config.prefix_prefix = key
        self.all_prefixes.update(prefix_config.prefixes)
        pass

    def get_selected_declarations(self, prefix_set: List[str]) -> str:
        """
        Get PREFIX declarations for selected prefix sets.

        Args:
            prefix_set: List of prefix set names to include

        Returns:
            str: Combined PREFIX declarations
        """
        selected_prefixes = {}
        for prefix_set_name in prefix_set:
            if prefix_set_name in self.prefix_sets:
                prefix_config = self.prefix_sets[prefix_set_name]
                selected_prefixes.update(prefix_config.prefixes)
        declarations = self.get_prefix_declarations(selected_prefixes)
        return declarations

    def get_prefix_declarations(self, prefixes: Dict[str, str] = None) -> str:
        """
        Convert prefixes to PREFIX declarations.

        Args:
            prefixes: Dictionary of prefix mappings, defaults to all_prefixes

        Returns:
            str: Newline-separated PREFIX declarations
        """
        if prefixes is None:
            prefixes = self.all_prefixes
        prefix_lines = []
        for prefix_name, prefix_uri in prefixes.items():
            prefix_lines.append(f"PREFIX {prefix_name}: <{prefix_uri}>")
            declarations = "\n".join(prefix_lines)
        return declarations

__post_init__()

initialize all prefixes and test prefix prefix for all prefix configs

Source code in lodstorage/prefix_config.py
85
86
87
88
89
90
91
92
93
94
def __post_init__(self):
    """
    initialize all prefixes and test prefix prefix for all prefix configs
    """
    self.all_prefixes = {}
    for key, prefix_config in self.prefix_sets.items():
        if prefix_config.prefix_prefix is None:
            prefix_config.prefix_prefix = key
    self.all_prefixes.update(prefix_config.prefixes)
    pass

get_instance() classmethod

Get singleton PrefixConfigs (loads prefixes.yaml via YamlPath if needed).

Source code in lodstorage/prefix_config.py
59
60
61
62
63
64
@classmethod
def get_instance(cls) -> "PrefixConfigs":
    """Get singleton PrefixConfigs (loads prefixes.yaml via YamlPath if needed)."""
    if cls._instance is None:
        cls._instance = cls.of_yaml()
    return cls._instance

get_prefix_declarations(prefixes=None)

Convert prefixes to PREFIX declarations.

Parameters:

Name Type Description Default
prefixes Dict[str, str]

Dictionary of prefix mappings, defaults to all_prefixes

None

Returns:

Name Type Description
str str

Newline-separated PREFIX declarations

Source code in lodstorage/prefix_config.py
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_prefix_declarations(self, prefixes: Dict[str, str] = None) -> str:
    """
    Convert prefixes to PREFIX declarations.

    Args:
        prefixes: Dictionary of prefix mappings, defaults to all_prefixes

    Returns:
        str: Newline-separated PREFIX declarations
    """
    if prefixes is None:
        prefixes = self.all_prefixes
    prefix_lines = []
    for prefix_name, prefix_uri in prefixes.items():
        prefix_lines.append(f"PREFIX {prefix_name}: <{prefix_uri}>")
        declarations = "\n".join(prefix_lines)
    return declarations

get_selected_declarations(prefix_set)

Get PREFIX declarations for selected prefix sets.

Parameters:

Name Type Description Default
prefix_set List[str]

List of prefix set names to include

required

Returns:

Name Type Description
str str

Combined PREFIX declarations

Source code in lodstorage/prefix_config.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def get_selected_declarations(self, prefix_set: List[str]) -> str:
    """
    Get PREFIX declarations for selected prefix sets.

    Args:
        prefix_set: List of prefix set names to include

    Returns:
        str: Combined PREFIX declarations
    """
    selected_prefixes = {}
    for prefix_set_name in prefix_set:
        if prefix_set_name in self.prefix_sets:
            prefix_config = self.prefix_sets[prefix_set_name]
            selected_prefixes.update(prefix_config.prefixes)
    declarations = self.get_prefix_declarations(selected_prefixes)
    return declarations

of_yaml(yaml_path=None) classmethod

Load from YAML (uses prefixes.yaml via YamlPath if yaml_path=None).

Source code in lodstorage/prefix_config.py
73
74
75
76
77
78
79
80
81
82
83
@classmethod
def of_yaml(cls, yaml_path: str = None) -> "PrefixConfigs":
    """Load from YAML (uses prefixes.yaml via YamlPath if yaml_path=None)."""
    if yaml_path is None:
        paths = YamlPath.getPaths("prefixes.yaml")
        yaml_path = paths[0] if paths else None
    if yaml_path and os.path.exists(yaml_path):
        prefix_configs = cls.load_from_yaml_file(yaml_path)
    else:
        prefix_configs = cls()  # Empty if no file
    return prefix_configs

preload(prefixes_path) classmethod

Preload singleton with specific prefixes path.

Source code in lodstorage/prefix_config.py
66
67
68
69
70
71
@classmethod
def preload(cls, prefixes_path: str) -> "PrefixConfigs":
    """Preload singleton with specific prefixes path."""
    cls._instance = cls.of_yaml(prefixes_path)
    cls._prefixes_path = prefixes_path
    return cls._instance

prefixes

Created on 2024-03-02

@author: wf

Prefixes

Handles the generation of standard SPARQL prefix declarations for queries. This utility class simplifies the inclusion of common prefixes used in SPARQL queries by providing a method to generate the necessary PREFIX lines based on a list of prefix keys.

The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy and improve clarity in SPARQL query construction by centralizing prefix management.

Methods:

Name Description
getPrefixes

Generates SPARQL PREFIX lines for a given list of prefix keys.

Source code in lodstorage/prefixes.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
class Prefixes:
    """
    Handles the generation of standard SPARQL prefix declarations for queries.
    This utility class simplifies the inclusion of common prefixes used in SPARQL
    queries by providing a method to generate the necessary PREFIX lines based on
    a list of prefix keys.

    The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL
    usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy
    and improve clarity in SPARQL query construction by centralizing prefix management.

    Attributes:
        None

    Methods:
        getPrefixes(prefixes): Generates SPARQL PREFIX lines for a given list of prefix keys.
    """

    # see https://www.wikidata.org/wiki/EntitySchema:E49
    prefixMap = {
        "bd": "<http://www.bigdata.com/rdf#>",
        "cc": "<http://creativecommons.org/ns#>",
        "dct": "<http://purl.org/dc/terms/>",
        "geo": "<http://www.opengis.net/ont/geosparql#>",
        "mwapi": "<https://www.mediawiki.org/ontology#API/>",
        "ontolex": "<http://www.w3.org/ns/lemon/ontolex#>",
        "owl": "<http://www.w3.org/2002/07/owl#>",
        "p": "<http://www.wikidata.org/prop/>",
        "pq": "<http://www.wikidata.org/prop/qualifier/>",
        "pqn": "<http://www.wikidata.org/prop/qualifier/value-normalized/>",
        "pqv": "<http://www.wikidata.org/prop/qualifier/value/>",
        "pr": "<http://www.wikidata.org/prop/reference/>",
        "prn": "<http://www.wikidata.org/prop/reference/value-normalized/>",
        "prov": "<http://www.w3.org/ns/prov#>",
        "prv": "<http://www.wikidata.org/prop/reference/value/>",
        "ps": "<http://www.wikidata.org/prop/statement/>",
        "psn": "<http://www.wikidata.org/prop/statement/value-normalized/>",
        "psv": "<http://www.wikidata.org/prop/statement/value/>",
        "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
        "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>",
        "schema": "<http://schema.org/>",
        "skos": "<http://www.w3.org/2004/02/skos/core#>",
        "wd": "<http://www.wikidata.org/entity/>",
        "wdsubgraph": "<https://query.wikidata.org/subgraph/>",
        "wdata": "<http://www.wikidata.org/wiki/Special:EntityData/>",
        "wdno": "<http://www.wikidata.org/prop/novalue/>",
        "wdref": "<http://www.wikidata.org/reference/>",
        "wds": "<http://www.wikidata.org/entity/statement/>",
        "wdt": "<http://www.wikidata.org/prop/direct/>",
        "wdtn": "<http://www.wikidata.org/prop/direct-normalized/>",
        "wdv": "<http://www.wikidata.org/value/>",
        "wikibase": "<http://wikiba.se/ontology#>",
        "xsd": "<http://www.w3.org/2001/XMLSchema#>",
    }

    @classmethod
    def getPrefixes(
        cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
    ) -> str:
        """Generates SPARQL PREFIX lines for a given list of prefix keys.

        This method looks up URIs for the specified prefixes from a predefined map and constructs
        PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
        and flexible specification of the prefixes needed for a particular query.

        Args:
            prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
                Defaults to a common set of prefixes used in Wikidata queries.

        Returns:
            str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
                with a newline character. If a prefix key is not recognized, it is ignored.

        Example:
            >>> Prefixes.getPrefixes(["wd", "wdt"])
            'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
        """

        # see also https://www.wikidata.org/wiki/EntitySchema:E49
        prefixes = cls.prefix_string(cls.prefixMap, prefixes)
        return prefixes

    @classmethod
    def prefix_string(cls, prefix_dict: dict, prefix_keys: list[str]):
        prefixes = ""
        for prefix in prefix_keys:
            if prefix in prefix_dict:
                prefixes += cls.prefix_line(prefix_dict, prefix)
        return prefixes

    @classmethod
    def prefix_line(cls, prefix_dict: dict, prefix: str) -> str:
        line = f"PREFIX {prefix}: {prefix_dict[prefix]}\n"
        return line

    @classmethod
    def extract_prefixes(cls, sparql_query: str) -> dict:
        """
        Extract only the explicitly declared prefixes from a SPARQL query string.
        Simple regex-based extraction that finds PREFIX declarations in the query text.

        Args:
            sparql_query (str): The SPARQL query containing PREFIX declarations

        Returns:
            dict: Dictionary mapping prefix names to their URI strings
        """
        declared_prefixes = {}

        # Simple pattern to match PREFIX declarations: PREFIX name: <uri>
        prefix_pattern = r"PREFIX\s+(\w+):\s*<([^>]+)>"

        # Find all PREFIX declarations (case insensitive)
        matches = re.findall(prefix_pattern, sparql_query, re.IGNORECASE)

        # Convert matches to dictionary
        for prefix_name, uri in matches:
            declared_prefixes[prefix_name] = f"<{uri}>"

        return declared_prefixes

    @classmethod
    def merge_prefix_dict(cls, query: str, prefix_dict: dict) -> str:
        """
        Merge prefixes from dict into SPARQL query by prepending missing prefix declarations.

        Args:
            query (str): The SPARQL query
            prefix_dict (dict): Dictionary of prefixes to merge

        Returns:
            str: SPARQL query with missing prefixes prepended
        """
        existing_prefixes = cls.extract_prefixes(query)
        missing = set(prefix_dict.keys()) - set(existing_prefixes.keys())
        prepend = cls.prefix_string(prefix_dict, list(missing))
        query = prepend + query

        return query

    @classmethod
    def merge_prefixes(cls, query: str, prefixes: str) -> str:
        """
        Merge prefixes from string into SPARQL query by prepending missing prefix declarations.

        Args:
            query (str): The SPARQL query
            prefixes (str): String containing PREFIX declarations

        Returns:
            str: SPARQL query with missing prefixes prepended
        """
        prefix_dict = cls.extract_prefixes(prefixes)
        merged_query = cls.merge_prefix_dict(query, prefix_dict)
        return merged_query

extract_prefixes(sparql_query) classmethod

Extract only the explicitly declared prefixes from a SPARQL query string. Simple regex-based extraction that finds PREFIX declarations in the query text.

Parameters:

Name Type Description Default
sparql_query str

The SPARQL query containing PREFIX declarations

required

Returns:

Name Type Description
dict dict

Dictionary mapping prefix names to their URI strings

Source code in lodstorage/prefixes.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
@classmethod
def extract_prefixes(cls, sparql_query: str) -> dict:
    """
    Extract only the explicitly declared prefixes from a SPARQL query string.
    Simple regex-based extraction that finds PREFIX declarations in the query text.

    Args:
        sparql_query (str): The SPARQL query containing PREFIX declarations

    Returns:
        dict: Dictionary mapping prefix names to their URI strings
    """
    declared_prefixes = {}

    # Simple pattern to match PREFIX declarations: PREFIX name: <uri>
    prefix_pattern = r"PREFIX\s+(\w+):\s*<([^>]+)>"

    # Find all PREFIX declarations (case insensitive)
    matches = re.findall(prefix_pattern, sparql_query, re.IGNORECASE)

    # Convert matches to dictionary
    for prefix_name, uri in matches:
        declared_prefixes[prefix_name] = f"<{uri}>"

    return declared_prefixes

getPrefixes(prefixes=['rdf', 'rdfs', 'schema', 'wd', 'wdt', 'wikibase', 'xsd']) classmethod

Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>

PREFIX wdt: http://www.wikidata.org/prop/direct/ '

Source code in lodstorage/prefixes.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
@classmethod
def getPrefixes(
    cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
) -> str:
    """Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
    """

    # see also https://www.wikidata.org/wiki/EntitySchema:E49
    prefixes = cls.prefix_string(cls.prefixMap, prefixes)
    return prefixes

merge_prefix_dict(query, prefix_dict) classmethod

Merge prefixes from dict into SPARQL query by prepending missing prefix declarations.

Parameters:

Name Type Description Default
query str

The SPARQL query

required
prefix_dict dict

Dictionary of prefixes to merge

required

Returns:

Name Type Description
str str

SPARQL query with missing prefixes prepended

Source code in lodstorage/prefixes.py
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
@classmethod
def merge_prefix_dict(cls, query: str, prefix_dict: dict) -> str:
    """
    Merge prefixes from dict into SPARQL query by prepending missing prefix declarations.

    Args:
        query (str): The SPARQL query
        prefix_dict (dict): Dictionary of prefixes to merge

    Returns:
        str: SPARQL query with missing prefixes prepended
    """
    existing_prefixes = cls.extract_prefixes(query)
    missing = set(prefix_dict.keys()) - set(existing_prefixes.keys())
    prepend = cls.prefix_string(prefix_dict, list(missing))
    query = prepend + query

    return query

merge_prefixes(query, prefixes) classmethod

Merge prefixes from string into SPARQL query by prepending missing prefix declarations.

Parameters:

Name Type Description Default
query str

The SPARQL query

required
prefixes str

String containing PREFIX declarations

required

Returns:

Name Type Description
str str

SPARQL query with missing prefixes prepended

Source code in lodstorage/prefixes.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
@classmethod
def merge_prefixes(cls, query: str, prefixes: str) -> str:
    """
    Merge prefixes from string into SPARQL query by prepending missing prefix declarations.

    Args:
        query (str): The SPARQL query
        prefixes (str): String containing PREFIX declarations

    Returns:
        str: SPARQL query with missing prefixes prepended
    """
    prefix_dict = cls.extract_prefixes(prefixes)
    merged_query = cls.merge_prefix_dict(query, prefix_dict)
    return merged_query

profiler

Created on 2022-11-18

@author: wf

Profiler

simple profiler

Source code in lodstorage/profiler.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True, with_start: bool = True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        if with_start:
            self.start()

    def start(self):
        """
        start profiling
        """
        self.starttime = time.time()
        if self.profile:
            print(f"Starting {self.msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

__init__(msg, profile=True, with_start=True)

construct me with the given msg and profile active flag

Parameters:

Name Type Description Default
msg(str)

the message to show if profiling is active

required
profile(bool)

True if messages should be shown

required
Source code in lodstorage/profiler.py
15
16
17
18
19
20
21
22
23
24
25
26
def __init__(self, msg, profile=True, with_start: bool = True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    if with_start:
        self.start()

start()

start profiling

Source code in lodstorage/profiler.py
28
29
30
31
32
33
34
def start(self):
    """
    start profiling
    """
    self.starttime = time.time()
    if self.profile:
        print(f"Starting {self.msg} ...")

time(extraMsg='')

time the action and print if profile is active

Source code in lodstorage/profiler.py
36
37
38
39
40
41
42
43
def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

query

Created on 2020-08-22

@author: wf

Endpoint

a query endpoint

Source code in lodstorage/query.py
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
@lod_storable
class Endpoint:
    """
    a query endpoint
    """

    # Basic identification
    name: str = ""
    description: Optional[str] = None

    # Connection details
    lang: str = "SPARQL"
    endpoint: str = ""
    website: Optional[str] = None
    database: str = "blazegraph"
    method: str = "POST"
    # JDBC endpoints e.g. SQL
    host: Optional[str] = "localhost"
    port: Optional[int] = 3306
    charset: Optional[str] = "utf8mb4"

    # Authentication and rate limiting
    calls_per_minute: Optional[int] = None
    auth: Optional[str] = None
    user: Optional[str] = None
    password: Optional[str] = None

    # Prefix handling
    prefix_sets: Optional[List[str]] = None  # References to prefix set names
    prefixes: Optional[str] = None  # Legacy: inline prefixes for backward compatibility

    # Dataset characteristics
    data_seeded: Optional[str] = (
        None  # ISO date when data was initially seeded/imported: "2012-10-29"
    )
    auto_update: Optional[bool] = (
        None  # if false data_seeded is the most recent state of data
    )
    mtriples: Optional[int] = None  # Dataset size in millions of triples

    @staticmethod
    def getSamples():
        samples = [
            {
                "name": "wikidata",
                "lang": "sparql",
                "endpoint": "https://query.wikidata.org/sparql",
                "website": "https://query.wikidata.org/",
                "database": "blazegraph",
                "method": "POST",
                "calls_per_minute": 30,
                "prefixes": "PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
            },
            {
                "name": "dbis-jena",
                "lang": "sparql",
                "endpoint": "https://confident.dbis.rwth-aachen.de/jena/",
                "website": "https://confident.dbis.rwth-aachen.de",
                "auth": "BASIC",
                "user": "secret",
                "password": "#not public - example not usable for access#",
            },
            {
                "name": "qlever-wikidata",
                "lang": "sparql",
                "method": "POST",
                "database": "qlever",
                "endpoint": "https://qlever.cs.uni-freiburg.de/api/wikidata",
                "website": "https://qlever.cs.uni-freiburg.de/wikidata",
            },
        ]
        return samples

    @classmethod
    def getDefault(cls):
        """
        get the default endpoint cofiguration
        """
        sample_data = cls.getSamples()[0]
        endpoint_conf = cls.from_dict(sample_data)
        return endpoint_conf

    def get_prefixes(self, prefix_configs: Optional[PrefixConfigs] = None) -> str:
        """
        Get prefix declarations for this endpoint.

        Args:
            prefix_configs: PrefixConfigs instance to resolve prefix_sets

        Returns:
            str: PREFIX declarations
        """
        # default: empty
        prefixes = ""
        # Use inline prefixes if defined (legacy support)
        if self.prefixes:
            prefixes = self.prefixes

        # Resolve from prefix_sets if available
        if self.prefix_sets and prefix_configs:
            prefixes = prefix_configs.get_selected_declarations(self.prefix_sets)

        return prefixes

    def __str__(self):
        """
        Returns:
            str: a string representation of this Endpoint
        """
        text = f"{self.name or ''}:{self.website or ''}:{self.endpoint or ''}({self.method or ''})"
        return text

__str__()

Returns:

Name Type Description
str

a string representation of this Endpoint

Source code in lodstorage/query.py
784
785
786
787
788
789
790
def __str__(self):
    """
    Returns:
        str: a string representation of this Endpoint
    """
    text = f"{self.name or ''}:{self.website or ''}:{self.endpoint or ''}({self.method or ''})"
    return text

getDefault() classmethod

get the default endpoint cofiguration

Source code in lodstorage/query.py
753
754
755
756
757
758
759
760
@classmethod
def getDefault(cls):
    """
    get the default endpoint cofiguration
    """
    sample_data = cls.getSamples()[0]
    endpoint_conf = cls.from_dict(sample_data)
    return endpoint_conf

get_prefixes(prefix_configs=None)

Get prefix declarations for this endpoint.

Parameters:

Name Type Description Default
prefix_configs Optional[PrefixConfigs]

PrefixConfigs instance to resolve prefix_sets

None

Returns:

Name Type Description
str str

PREFIX declarations

Source code in lodstorage/query.py
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
def get_prefixes(self, prefix_configs: Optional[PrefixConfigs] = None) -> str:
    """
    Get prefix declarations for this endpoint.

    Args:
        prefix_configs: PrefixConfigs instance to resolve prefix_sets

    Returns:
        str: PREFIX declarations
    """
    # default: empty
    prefixes = ""
    # Use inline prefixes if defined (legacy support)
    if self.prefixes:
        prefixes = self.prefixes

    # Resolve from prefix_sets if available
    if self.prefix_sets and prefix_configs:
        prefixes = prefix_configs.get_selected_declarations(self.prefix_sets)

    return prefixes

EndpointManager

Bases: object

manages a set of SPARQL endpoints

Source code in lodstorage/query.py
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
@lod_storable
class EndpointManager(object):
    """
    manages a set of SPARQL endpoints
    """

    endpoints: Dict[str, Endpoint] = field(default_factory=dict)

    @classmethod
    def ofYaml(cls, yaml_path: str) -> "EndpointManager":
        """Load prefix configurations from YAML file."""
        em = cls.load_from_yaml_file(yaml_path)
        return em

    @classmethod
    def getEndpoints(
        cls, endpointPath: str = None, lang: str = None, with_default: bool = True
    ):
        """
        get the endpoints for the given endpointPath

        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language
            with_default(bool): if True include the default endpoints
        """
        endpointPaths = YamlPath.getPaths(
            "endpoints.yaml", endpointPath, with_default=with_default
        )
        endpoints = {}
        for lEndpointPath in endpointPaths:
            em = cls.ofYaml(lEndpointPath)
            for name, endpoint in em.endpoints.items():
                selected = lang is None or endpoint.lang == lang
                if selected:
                    endpoints[name] = endpoint
                    endpoint.name = name
        return endpoints

    @staticmethod
    def getEndpointNames(endpointPath=None, lang: str = None) -> list:
        """
        Returns a list of all available endpoint names
        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language

        """
        endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
        endpoint_names = list(endpoints.keys())
        return endpoint_names

getEndpointNames(endpointPath=None, lang=None) staticmethod

Returns a list of all available endpoint names Args: endpointPath(str): the path to the yaml file with the endpoint configurations lang(str): if lang is given filter by the given language

Source code in lodstorage/query.py
832
833
834
835
836
837
838
839
840
841
842
843
@staticmethod
def getEndpointNames(endpointPath=None, lang: str = None) -> list:
    """
    Returns a list of all available endpoint names
    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language

    """
    endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
    endpoint_names = list(endpoints.keys())
    return endpoint_names

getEndpoints(endpointPath=None, lang=None, with_default=True) classmethod

get the endpoints for the given endpointPath

Parameters:

Name Type Description Default
endpointPath(str)

the path to the yaml file with the endpoint configurations

required
lang(str)

if lang is given filter by the given language

required
with_default(bool)

if True include the default endpoints

required
Source code in lodstorage/query.py
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
@classmethod
def getEndpoints(
    cls, endpointPath: str = None, lang: str = None, with_default: bool = True
):
    """
    get the endpoints for the given endpointPath

    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language
        with_default(bool): if True include the default endpoints
    """
    endpointPaths = YamlPath.getPaths(
        "endpoints.yaml", endpointPath, with_default=with_default
    )
    endpoints = {}
    for lEndpointPath in endpointPaths:
        em = cls.ofYaml(lEndpointPath)
        for name, endpoint in em.endpoints.items():
            selected = lang is None or endpoint.lang == lang
            if selected:
                endpoints[name] = endpoint
                endpoint.name = name
    return endpoints

ofYaml(yaml_path) classmethod

Load prefix configurations from YAML file.

Source code in lodstorage/query.py
801
802
803
804
805
@classmethod
def ofYaml(cls, yaml_path: str) -> "EndpointManager":
    """Load prefix configurations from YAML file."""
    em = cls.load_from_yaml_file(yaml_path)
    return em

Format

Bases: Enum

the supported formats for the results to be delivered

Source code in lodstorage/query.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
class Format(Enum):
    """
    the supported formats for the results to be delivered
    """

    csv = "csv"
    json = "json"
    html = "html"
    xml = "xml"
    tsv = "tsv"
    latex = "latex"
    mediawiki = "mediawiki"
    raw = "raw"
    github = "github"

    def __str__(self):
        return self.value

Query

A Query e.g. for SPARQL

Attributes:

Name Type Description
name str

the name/label of the query

query str

the native Query text e.g. in SPARQL

lang str

the language of the query e.g. SPARQL

sparql(str) str

SPARQL querycode

sql(str) str

SQL query code

ask(atr) str

SMW ASK query code

endpoint str

the endpoint url to use

database str

the type of database e.g. "blazegraph"

title str

the header/title of the query

description str

the description of the query

limit int

the limit of the query

prefixes list

list of prefixes to be resolved

tryItUrl str

the url of a "tryit" webpage

short_urls dict

dictionary of short urls keyed by endpoint name

formats list

key,value pairs of ValueFormatters to be applied

debug bool

true if debug mode should be switched on

Source code in lodstorage/query.py
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
@lod_storable
class Query:
    """
    A Query e.g. for SPARQL

    Attributes:
        name (str): the name/label of the query
        query (str): the native Query text e.g. in SPARQL
        lang (str): the language of the query e.g. SPARQL

        sparql(str): SPARQL querycode
        sql(str): SQL query code
        ask(atr): SMW ASK query code

        endpoint (str): the endpoint url to use
        database (str): the type of database e.g. "blazegraph"
        title (str): the header/title of the query
        description (str): the description of the query
        limit (int): the limit of the query
        prefixes (list): list of prefixes to be resolved
        tryItUrl (str): the url of a "tryit" webpage
        short_urls (dict): dictionary of short urls keyed by endpoint name
        formats (list): key,value pairs of ValueFormatters to be applied
        debug (bool): true if debug mode should be switched on
    """

    name: str
    query: str
    lang: str = "sparql"
    sparql: Optional[str] = None
    sql: Optional[str] = None
    ask: Optional[str] = None
    endpoint: Optional[str] = None
    database: str = "blazegraph"
    title: Optional[str] = None
    description: Optional[str] = ""
    limit: Optional[int] = None
    prefixes: Optional[List[str]] = None
    tryItUrl: Optional[str] = None
    short_urls: Dict[str, str] = field(default_factory=dict)
    formats: Optional[List] = None
    debug: bool = False
    formatCallBacks: List = field(default_factory=list)
    param_list: List[Param] = field(default_factory=list)  # input
    output: List[Param] = field(default_factory=list)  # output

    def __post_init__(self):
        if self.title is None:
            self.title = self.name
        if self.query:
            self.params = Params(self.query)

    def __str__(self):
        queryStr = "\n".join(
            [
                f"{key}:{value}"
                for key, value in self.__dict__.items()
                if value is not None
            ]
        )
        return f"{queryStr}"

    def set_default_params(self, params_dict: Dict[str, Any]):
        """
        set the default parameters for the given params_dict
        """
        for param in self.param_list:
            value = param.default_value
            params_dict[param.name] = value

    def apply_default_params(self):
        """
        apply my default parameters
        """
        self.set_default_params(self.params.params_dict)
        self.params.apply_parameters()

    def addFormatCallBack(self, callback):
        self.formatCallBacks.append(callback)

    def preFormatWithCallBacks(self, lod, tablefmt: str):
        """
        run the configured call backs to pre-format the given list of dicts for the given tableformat

        Args:
            lod(list): the list of dicts to handle
            tablefmt(str): the table format (according to tabulate) to apply

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if value is not None:
                    for formatCallBack in self.formatCallBacks:
                        formatCallBack(record, key, value, tablefmt)

    def formatWithValueFormatters(self, lod, tablefmt: str):
        """
        format the given list of Dicts with the ValueFormatters
        """
        # is there anything to do?
        if self.formats is None:
            # no
            return
        # get the value Formatters that might apply here
        valueFormatters = ValueFormatters.get_instance()
        formatsToApply = {}
        for valueFormatSpec in self.formats:
            parts = valueFormatSpec.split(":")
            # e.g. president:wikidata
            keytoformat = parts[0]
            formatName = parts[1]
            if formatName in valueFormatters.formatters:
                formatsToApply[keytoformat] = valueFormatters.formatters[formatName]
        for record in lod:
            for keytoformat in formatsToApply:
                valueFormatter = formatsToApply[keytoformat]
                # format all key values
                if keytoformat == "*":
                    for key in record:
                        valueFormatter.apply_format(record, key, tablefmt)
                # or just a selected one
                elif keytoformat in record:
                    valueFormatter.apply_format(record, keytoformat, tablefmt)

    def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
        """
        return the "try it!" url for the given baseurl

        Args:
            baseurl(str): the baseurl to used

        Returns:
            str: the "try it!" url for the given query
        """
        # https://stackoverflow.com/a/9345102/1497139
        prefixed_query = str(self.query)
        if self.prefixes:
            prepend = "\n".join(self.prefixes)
            prefixed_query = prepend + prefixed_query
        quoted = urllib.parse.quote(prefixed_query)
        if database == "blazegraph":
            delim = "/#"
        else:
            delim = "?query="
        url = f"{baseurl}{delim}{quoted}"
        return url

    def getLink(self, url, title, tablefmt):
        """
        convert the given url and title to a link for the given tablefmt

        Args:
            url(str): the url to convert
            title(str): the title to show
            tablefmt(str): the table format to use
        """
        # create a safe url
        if url is None:
            return ""
        markup = f"{title}:{url}"
        if tablefmt == "mediawiki":
            markup = f"[{url} {title}]"
        elif tablefmt == "github":
            markup = f"[{title}]({url})"
        elif tablefmt == "latex":
            markup = r"\href{%s}{%s}" % (url, title)
        return markup

    def add_endpoint_prefixes(
        self, endpoint: "Endpoint", prefix_configs: PrefixConfigs
    ) -> None:
        """
        Add endpoint-specific PREFIX declarations to this query (via prefix_sets or legacy prefixes).

        Merges (deduplicates by prefix name) endpoint prefixes into self.query using Prefixes.merge_prefixes().
        Updates self.prefixes to full unique PREFIX lines list. Safe/idempotent (no-op if prefixes_str empty).

        Args:
            endpoint (Endpoint): Endpoint config with prefix_sets or legacy prefixes.
            prefix_configs (PrefixConfigs): Loaded prefix configurations resolver.
        """
        prefixes_str = endpoint.get_prefixes(prefix_configs)
        if not prefixes_str.strip():
            return

        # Merge: Prepend ONLY missing prefixes (no dups like 'rdfs')
        self.query = Prefixes.merge_prefixes(self.query, prefixes_str)

        # Update self.prefixes: Full unique lines from merged query
        prefix_dict = Prefixes.extract_prefixes(self.query)
        self.prefixes = [
            Prefixes.prefix_line(prefix_dict, prefix) for prefix in sorted(prefix_dict)
        ]

    def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
        """
        convert url prefixes to link according to the given table format
        TODO - refactor as preFormat callback

        Args:
            lod(list): the list of dicts to convert
            prefix(str): the prefix to strip
            tablefmt(str): the tabulate tableformat to use

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if (
                    value is not None
                    and isinstance(value, str)
                    and value.startswith(prefix)
                ):
                    item = value.replace(prefix, "")
                    uqitem = urllib.parse.unquote(item)
                    if tablefmt == "latex":
                        link = uqitem
                    else:
                        link = self.getLink(value, uqitem, tablefmt)
                    record[key] = link

    def asWikiSourceMarkup(self):
        """
        convert me to Mediawiki markup for syntax highlighting using the "source" tag


        Returns:
            string: the Markup
        """
        markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
        return markup

    def asWikiMarkup(self, listOfDicts):
        """
        convert the given listOfDicts result to MediaWiki markup

        Args:
            listOfDicts(list): the list of Dicts to convert to MediaWiki markup

        Returns:
            string: the markup
        """
        if self.debug:
            print(listOfDicts)
        mwTable = MediaWikiTable()
        mwTable.fromListOfDicts(listOfDicts)
        markup = mwTable.asWikiMarkup()
        return markup

    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

add_endpoint_prefixes(endpoint, prefix_configs)

Add endpoint-specific PREFIX declarations to this query (via prefix_sets or legacy prefixes).

Merges (deduplicates by prefix name) endpoint prefixes into self.query using Prefixes.merge_prefixes(). Updates self.prefixes to full unique PREFIX lines list. Safe/idempotent (no-op if prefixes_str empty).

Parameters:

Name Type Description Default
endpoint Endpoint

Endpoint config with prefix_sets or legacy prefixes.

required
prefix_configs PrefixConfigs

Loaded prefix configurations resolver.

required
Source code in lodstorage/query.py
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
def add_endpoint_prefixes(
    self, endpoint: "Endpoint", prefix_configs: PrefixConfigs
) -> None:
    """
    Add endpoint-specific PREFIX declarations to this query (via prefix_sets or legacy prefixes).

    Merges (deduplicates by prefix name) endpoint prefixes into self.query using Prefixes.merge_prefixes().
    Updates self.prefixes to full unique PREFIX lines list. Safe/idempotent (no-op if prefixes_str empty).

    Args:
        endpoint (Endpoint): Endpoint config with prefix_sets or legacy prefixes.
        prefix_configs (PrefixConfigs): Loaded prefix configurations resolver.
    """
    prefixes_str = endpoint.get_prefixes(prefix_configs)
    if not prefixes_str.strip():
        return

    # Merge: Prepend ONLY missing prefixes (no dups like 'rdfs')
    self.query = Prefixes.merge_prefixes(self.query, prefixes_str)

    # Update self.prefixes: Full unique lines from merged query
    prefix_dict = Prefixes.extract_prefixes(self.query)
    self.prefixes = [
        Prefixes.prefix_line(prefix_dict, prefix) for prefix in sorted(prefix_dict)
    ]

apply_default_params()

apply my default parameters

Source code in lodstorage/query.py
333
334
335
336
337
338
def apply_default_params(self):
    """
    apply my default parameters
    """
    self.set_default_params(self.params.params_dict)
    self.params.apply_parameters()

asWikiMarkup(listOfDicts)

convert the given listOfDicts result to MediaWiki markup

Parameters:

Name Type Description Default
listOfDicts(list)

the list of Dicts to convert to MediaWiki markup

required

Returns:

Name Type Description
string

the markup

Source code in lodstorage/query.py
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
def asWikiMarkup(self, listOfDicts):
    """
    convert the given listOfDicts result to MediaWiki markup

    Args:
        listOfDicts(list): the list of Dicts to convert to MediaWiki markup

    Returns:
        string: the markup
    """
    if self.debug:
        print(listOfDicts)
    mwTable = MediaWikiTable()
    mwTable.fromListOfDicts(listOfDicts)
    markup = mwTable.asWikiMarkup()
    return markup

asWikiSourceMarkup()

convert me to Mediawiki markup for syntax highlighting using the "source" tag

Returns:

Name Type Description
string

the Markup

Source code in lodstorage/query.py
485
486
487
488
489
490
491
492
493
494
def asWikiSourceMarkup(self):
    """
    convert me to Mediawiki markup for syntax highlighting using the "source" tag


    Returns:
        string: the Markup
    """
    markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
    return markup

documentQueryResult(qlod, limit=None, tablefmt='mediawiki', tryItUrl=None, withSourceCode=True, **kwArgs)

document the given query results - note that a copy of the whole list is going to be created for being able to format

Parameters:

Name Type Description Default
qlod list

the list of dicts result

required
limit(int)

the maximum number of records to display in result tabulate

required
tablefmt(str)

the table format to use

required
tryItUrl str

the "try it!" url to show

None
withSourceCode(bool)

if True document the source code

required
Return

str: the documentation tabular text for the given parameters

Source code in lodstorage/query.py
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

formatWithValueFormatters(lod, tablefmt)

format the given list of Dicts with the ValueFormatters

Source code in lodstorage/query.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
def formatWithValueFormatters(self, lod, tablefmt: str):
    """
    format the given list of Dicts with the ValueFormatters
    """
    # is there anything to do?
    if self.formats is None:
        # no
        return
    # get the value Formatters that might apply here
    valueFormatters = ValueFormatters.get_instance()
    formatsToApply = {}
    for valueFormatSpec in self.formats:
        parts = valueFormatSpec.split(":")
        # e.g. president:wikidata
        keytoformat = parts[0]
        formatName = parts[1]
        if formatName in valueFormatters.formatters:
            formatsToApply[keytoformat] = valueFormatters.formatters[formatName]
    for record in lod:
        for keytoformat in formatsToApply:
            valueFormatter = formatsToApply[keytoformat]
            # format all key values
            if keytoformat == "*":
                for key in record:
                    valueFormatter.apply_format(record, key, tablefmt)
            # or just a selected one
            elif keytoformat in record:
                valueFormatter.apply_format(record, keytoformat, tablefmt)

convert the given url and title to a link for the given tablefmt

Parameters:

Name Type Description Default
url(str)

the url to convert

required
title(str)

the title to show

required
tablefmt(str)

the table format to use

required
Source code in lodstorage/query.py
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
def getLink(self, url, title, tablefmt):
    """
    convert the given url and title to a link for the given tablefmt

    Args:
        url(str): the url to convert
        title(str): the title to show
        tablefmt(str): the table format to use
    """
    # create a safe url
    if url is None:
        return ""
    markup = f"{title}:{url}"
    if tablefmt == "mediawiki":
        markup = f"[{url} {title}]"
    elif tablefmt == "github":
        markup = f"[{title}]({url})"
    elif tablefmt == "latex":
        markup = r"\href{%s}{%s}" % (url, title)
    return markup

getTryItUrl(baseurl, database='blazegraph')

return the "try it!" url for the given baseurl

Parameters:

Name Type Description Default
baseurl(str)

the baseurl to used

required

Returns:

Name Type Description
str

the "try it!" url for the given query

Source code in lodstorage/query.py
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
    """
    return the "try it!" url for the given baseurl

    Args:
        baseurl(str): the baseurl to used

    Returns:
        str: the "try it!" url for the given query
    """
    # https://stackoverflow.com/a/9345102/1497139
    prefixed_query = str(self.query)
    if self.prefixes:
        prepend = "\n".join(self.prefixes)
        prefixed_query = prepend + prefixed_query
    quoted = urllib.parse.quote(prefixed_query)
    if database == "blazegraph":
        delim = "/#"
    else:
        delim = "?query="
    url = f"{baseurl}{delim}{quoted}"
    return url

preFormatWithCallBacks(lod, tablefmt)

run the configured call backs to pre-format the given list of dicts for the given tableformat

Parameters:

Name Type Description Default
lod(list)

the list of dicts to handle

required
tablefmt(str)

the table format (according to tabulate) to apply

required
Source code in lodstorage/query.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
def preFormatWithCallBacks(self, lod, tablefmt: str):
    """
    run the configured call backs to pre-format the given list of dicts for the given tableformat

    Args:
        lod(list): the list of dicts to handle
        tablefmt(str): the table format (according to tabulate) to apply

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if value is not None:
                for formatCallBack in self.formatCallBacks:
                    formatCallBack(record, key, value, tablefmt)

convert url prefixes to link according to the given table format TODO - refactor as preFormat callback

Parameters:

Name Type Description Default
lod(list)

the list of dicts to convert

required
prefix(str)

the prefix to strip

required
tablefmt(str)

the tabulate tableformat to use

required
Source code in lodstorage/query.py
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
    """
    convert url prefixes to link according to the given table format
    TODO - refactor as preFormat callback

    Args:
        lod(list): the list of dicts to convert
        prefix(str): the prefix to strip
        tablefmt(str): the tabulate tableformat to use

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if (
                value is not None
                and isinstance(value, str)
                and value.startswith(prefix)
            ):
                item = value.replace(prefix, "")
                uqitem = urllib.parse.unquote(item)
                if tablefmt == "latex":
                    link = uqitem
                else:
                    link = self.getLink(value, uqitem, tablefmt)
                record[key] = link

set_default_params(params_dict)

set the default parameters for the given params_dict

Source code in lodstorage/query.py
325
326
327
328
329
330
331
def set_default_params(self, params_dict: Dict[str, Any]):
    """
    set the default parameters for the given params_dict
    """
    for param in self.param_list:
        value = param.default_value
        params_dict[param.name] = value

QueryManager

Bases: object

manages pre packaged Queries

Source code in lodstorage/query.py
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
class QueryManager(object):
    """
    manages pre packaged Queries
    """

    def __init__(
        self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
    ):
        """
        Constructor
        Args:
            lang(str): the language to use for the queries sql or sparql
            queriesPath(str): the path of the yaml file to load queries from
            debug(bool): True if debug information should be shown
            with_default(bool): if True also load the default yaml file
        """
        if lang is None:
            lang = "sql"
        self.queriesByName = {}
        self.lang = lang
        self.debug = debug
        queries = self.getQueries(queriesPath=queriesPath, with_default=with_default)
        for name, queryDict in queries.items():
            if self.lang in queryDict:
                queryDict["name"] = name
                queryDict["lang"] = self.lang
                if not "query" in queryDict:
                    queryDict["query"] = queryDict[self.lang]
                try:
                    query = Query.from_dict(queryDict)
                    query.debug = self.debug
                    self.queriesByName[name] = query
                except Exception as ex:
                    msg = f"Failed to load query '{name}' ({self.lang})"
                    ExceptionHandler.handle(msg, ex, debug=self.debug)

    def getQueries(self, queriesPath=None, with_default: bool = True):
        """
        get the queries for the given queries Path

        Args:
            queriesPath(str): the path of the yaml file to load queries from
            with_default(bool): if True also load the default yaml file

        """
        queriesPaths = YamlPath.getPaths(
            "queries.yaml", queriesPath, with_default=with_default
        )
        queries = {}
        for queriesPath in queriesPaths:
            if os.path.isfile(queriesPath):
                with open(queriesPath, "r") as stream:
                    lqueries = yaml.safe_load(stream)
                    for key in lqueries:
                        queries[key] = lqueries[key]
        return queries

__init__(lang=None, debug=False, queriesPath=None, with_default=True)

Constructor Args: lang(str): the language to use for the queries sql or sparql queriesPath(str): the path of the yaml file to load queries from debug(bool): True if debug information should be shown with_default(bool): if True also load the default yaml file

Source code in lodstorage/query.py
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
def __init__(
    self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
):
    """
    Constructor
    Args:
        lang(str): the language to use for the queries sql or sparql
        queriesPath(str): the path of the yaml file to load queries from
        debug(bool): True if debug information should be shown
        with_default(bool): if True also load the default yaml file
    """
    if lang is None:
        lang = "sql"
    self.queriesByName = {}
    self.lang = lang
    self.debug = debug
    queries = self.getQueries(queriesPath=queriesPath, with_default=with_default)
    for name, queryDict in queries.items():
        if self.lang in queryDict:
            queryDict["name"] = name
            queryDict["lang"] = self.lang
            if not "query" in queryDict:
                queryDict["query"] = queryDict[self.lang]
            try:
                query = Query.from_dict(queryDict)
                query.debug = self.debug
                self.queriesByName[name] = query
            except Exception as ex:
                msg = f"Failed to load query '{name}' ({self.lang})"
                ExceptionHandler.handle(msg, ex, debug=self.debug)

getQueries(queriesPath=None, with_default=True)

get the queries for the given queries Path

Parameters:

Name Type Description Default
queriesPath(str)

the path of the yaml file to load queries from

required
with_default(bool)

if True also load the default yaml file

required
Source code in lodstorage/query.py
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
def getQueries(self, queriesPath=None, with_default: bool = True):
    """
    get the queries for the given queries Path

    Args:
        queriesPath(str): the path of the yaml file to load queries from
        with_default(bool): if True also load the default yaml file

    """
    queriesPaths = YamlPath.getPaths(
        "queries.yaml", queriesPath, with_default=with_default
    )
    queries = {}
    for queriesPath in queriesPaths:
        if os.path.isfile(queriesPath):
            with open(queriesPath, "r") as stream:
                lqueries = yaml.safe_load(stream)
                for key in lqueries:
                    queries[key] = lqueries[key]
    return queries

QueryResultDocumentation

documentation of a query result

Source code in lodstorage/query.py
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
class QueryResultDocumentation:
    """
    documentation of a query result
    """

    def __init__(
        self,
        query,
        title: str,
        tablefmt: str,
        tryItMarkup: str,
        sourceCodeHeader: str,
        sourceCode: str,
        resultHeader: str,
        result: str,
    ):
        """
        constructor

        Args:
            query(Query): the query to be documented
            title(str): the title markup
            tablefmt(str): the tableformat that has been used
            tryItMarkup: the "try it!" markup to show
            sourceCodeHeader(str): the header title to use for the sourceCode
            sourceCode(str): the sourceCode
            resultCodeHeader(str): the header title to use for the result
            result(str): the result header

        """
        self.query = query
        self.title = title
        self.tablefmt = tablefmt
        self.tryItMarkup = f"\n{tryItMarkup}"
        self.sourceCodeHeader = sourceCodeHeader
        self.sourceCode = sourceCode
        self.resultHeader = resultHeader
        self.result = result

    @staticmethod
    def uniCode2Latex(text: str, withConvert: bool = False) -> str:
        """
        converts unicode text to latex and
        fixes UTF-8 chars for latex in a certain range:
            ₀:$_0$ ... ₉:$_9$

        see https://github.com/phfaist/pylatexenc/issues/72

        Args:
            text(str): the string to fix
            withConvert(bool): if unicode to latex libary conversion should be used

        Return:
            str: latex presentation of UTF-8 char
        """
        for code in range(8320, 8330):
            text = text.replace(chr(code), f"$_{code-8320}$")
        if withConvert:
            # workaround - hidden dependency!
            from pylatexenc.latexencode import unicode_to_latex

            latex = unicode_to_latex(text)
            # workaround {\textbackslash} being returned
            # latex=latex.replace("{\\textbackslash}",'\\')
            text = latex
        return text

    def __str__(self):
        """
        simple string representation
        """
        return self.asText()

    def asText(self):
        """
        return my text representation

        Returns:
            str: description, sourceCodeHeader, sourceCode, tryIt link and result table
        """
        text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
        fixedStr = (
            self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
        )
        return fixedStr

__init__(query, title, tablefmt, tryItMarkup, sourceCodeHeader, sourceCode, resultHeader, result)

constructor

Parameters:

Name Type Description Default
query(Query)

the query to be documented

required
title(str)

the title markup

required
tablefmt(str)

the tableformat that has been used

required
tryItMarkup str

the "try it!" markup to show

required
sourceCodeHeader(str)

the header title to use for the sourceCode

required
sourceCode(str)

the sourceCode

required
resultCodeHeader(str)

the header title to use for the result

required
result(str)

the result header

required
Source code in lodstorage/query.py
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def __init__(
    self,
    query,
    title: str,
    tablefmt: str,
    tryItMarkup: str,
    sourceCodeHeader: str,
    sourceCode: str,
    resultHeader: str,
    result: str,
):
    """
    constructor

    Args:
        query(Query): the query to be documented
        title(str): the title markup
        tablefmt(str): the tableformat that has been used
        tryItMarkup: the "try it!" markup to show
        sourceCodeHeader(str): the header title to use for the sourceCode
        sourceCode(str): the sourceCode
        resultCodeHeader(str): the header title to use for the result
        result(str): the result header

    """
    self.query = query
    self.title = title
    self.tablefmt = tablefmt
    self.tryItMarkup = f"\n{tryItMarkup}"
    self.sourceCodeHeader = sourceCodeHeader
    self.sourceCode = sourceCode
    self.resultHeader = resultHeader
    self.result = result

__str__()

simple string representation

Source code in lodstorage/query.py
243
244
245
246
247
def __str__(self):
    """
    simple string representation
    """
    return self.asText()

asText()

return my text representation

Returns:

Name Type Description
str

description, sourceCodeHeader, sourceCode, tryIt link and result table

Source code in lodstorage/query.py
249
250
251
252
253
254
255
256
257
258
259
260
def asText(self):
    """
    return my text representation

    Returns:
        str: description, sourceCodeHeader, sourceCode, tryIt link and result table
    """
    text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
    fixedStr = (
        self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
    )
    return fixedStr

uniCode2Latex(text, withConvert=False) staticmethod

converts unicode text to latex and fixes UTF-8 chars for latex in a certain range: ₀:$_0$ ... ₉:$_9$

see https://github.com/phfaist/pylatexenc/issues/72

Parameters:

Name Type Description Default
text(str)

the string to fix

required
withConvert(bool)

if unicode to latex libary conversion should be used

required
Return

str: latex presentation of UTF-8 char

Source code in lodstorage/query.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
@staticmethod
def uniCode2Latex(text: str, withConvert: bool = False) -> str:
    """
    converts unicode text to latex and
    fixes UTF-8 chars for latex in a certain range:
        ₀:$_0$ ... ₉:$_9$

    see https://github.com/phfaist/pylatexenc/issues/72

    Args:
        text(str): the string to fix
        withConvert(bool): if unicode to latex libary conversion should be used

    Return:
        str: latex presentation of UTF-8 char
    """
    for code in range(8320, 8330):
        text = text.replace(chr(code), f"$_{code-8320}$")
    if withConvert:
        # workaround - hidden dependency!
        from pylatexenc.latexencode import unicode_to_latex

        latex = unicode_to_latex(text)
        # workaround {\textbackslash} being returned
        # latex=latex.replace("{\\textbackslash}",'\\')
        text = latex
    return text

QuerySyntaxHighlight

Syntax highlighting for queries with pygments

Source code in lodstorage/query.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class QuerySyntaxHighlight:
    """
    Syntax highlighting for queries with pygments
    """

    def __init__(self, query, highlightFormat: str = "html"):
        """
        construct me for the given query and highlightFormat

        Args:
            query(Query): the query to do the syntax highlighting for
            highlightFormat(str): the highlight format to be used
        """
        self.query = query
        self.highlightFormat = highlightFormat
        self.lexer = get_lexer_by_name(self.query.lang)
        if self.highlightFormat == "html":
            self.formatter = HtmlFormatter()
        elif self.highlightFormat == "latex":
            self.formatter = LatexFormatter()

    def highlight(self):
        """
        Returns:
            str: the result of the syntax highlighting with pygments
        """
        syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
        return syntaxResult

__init__(query, highlightFormat='html')

construct me for the given query and highlightFormat

Parameters:

Name Type Description Default
query(Query)

the query to do the syntax highlighting for

required
highlightFormat(str)

the highlight format to be used

required
Source code in lodstorage/query.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
def __init__(self, query, highlightFormat: str = "html"):
    """
    construct me for the given query and highlightFormat

    Args:
        query(Query): the query to do the syntax highlighting for
        highlightFormat(str): the highlight format to be used
    """
    self.query = query
    self.highlightFormat = highlightFormat
    self.lexer = get_lexer_by_name(self.query.lang)
    if self.highlightFormat == "html":
        self.formatter = HtmlFormatter()
    elif self.highlightFormat == "latex":
        self.formatter = LatexFormatter()

highlight()

Returns:

Name Type Description
str

the result of the syntax highlighting with pygments

Source code in lodstorage/query.py
167
168
169
170
171
172
173
def highlight(self):
    """
    Returns:
        str: the result of the syntax highlighting with pygments
    """
    syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
    return syntaxResult

ValueFormatter

a value Formatter

Source code in lodstorage/query.py
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
@lod_storable
class ValueFormatter:
    """
    a value Formatter
    """

    format: str
    regexps: List[str] = field(default_factory=list)

    def apply_format(self, record, key, resultFormat: Format):
        """
        apply the given format to the given record

        Args:
            record(dict): the record to handle
            key(str): the property key
            resultFormat(str): the resultFormat Style to apply
        """
        if key in record:
            value = record[key]
            if value is not None and isinstance(value, str):
                # if there are no regular expressions specified always format
                doformat = len(self.regexps) == 0
                for regexp in self.regexps:
                    try:
                        vmatch = re.match(regexp, value)
                        if vmatch:
                            # we found a match and will format it if the value is not none
                            doformat = True
                            value = vmatch.group("value")
                    except Exception as ex:
                        print(
                            f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                            file=sys.stderr,
                        )
                if value is not None and doformat:
                    link = self.format.format(value=value)
                    newValue = None
                    if resultFormat == "github":
                        newValue = f"[{value}]({link})"
                    elif resultFormat == "mediawiki":
                        newValue = f"[{link} {value}]"
                    elif resultFormat == "latex":
                        newValue = rf"\href{{{link}}}{{{value}}}"
                    if newValue is not None:
                        record[key] = newValue

    def applyFormat(self, record, key, resultFormat: Format):
        """
        legacy delegate
        """
        self.apply_format(record, key, resultFormat)

applyFormat(record, key, resultFormat)

legacy delegate

Source code in lodstorage/query.py
101
102
103
104
105
def applyFormat(self, record, key, resultFormat: Format):
    """
    legacy delegate
    """
    self.apply_format(record, key, resultFormat)

apply_format(record, key, resultFormat)

apply the given format to the given record

Parameters:

Name Type Description Default
record(dict)

the record to handle

required
key(str)

the property key

required
resultFormat(str)

the resultFormat Style to apply

required
Source code in lodstorage/query.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def apply_format(self, record, key, resultFormat: Format):
    """
    apply the given format to the given record

    Args:
        record(dict): the record to handle
        key(str): the property key
        resultFormat(str): the resultFormat Style to apply
    """
    if key in record:
        value = record[key]
        if value is not None and isinstance(value, str):
            # if there are no regular expressions specified always format
            doformat = len(self.regexps) == 0
            for regexp in self.regexps:
                try:
                    vmatch = re.match(regexp, value)
                    if vmatch:
                        # we found a match and will format it if the value is not none
                        doformat = True
                        value = vmatch.group("value")
                except Exception as ex:
                    print(
                        f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                        file=sys.stderr,
                    )
            if value is not None and doformat:
                link = self.format.format(value=value)
                newValue = None
                if resultFormat == "github":
                    newValue = f"[{value}]({link})"
                elif resultFormat == "mediawiki":
                    newValue = f"[{link} {value}]"
                elif resultFormat == "latex":
                    newValue = rf"\href{{{link}}}{{{value}}}"
                if newValue is not None:
                    record[key] = newValue

ValueFormatters

manages a set of ValueFormatters

Source code in lodstorage/query.py
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
@lod_storable
class ValueFormatters:
    """
    manages a set of ValueFormatters
    """

    formatters: Dict[str, ValueFormatter] = field(default_factory=dict)

    _instance: Optional["ValueFormatters"] = None
    _formats_path: Optional[str] = None

    @classmethod
    def get_instance(cls) -> "ValueFormatters":
        """Get the singleton instance."""
        if cls._instance is None:
            cls._instance = cls.of_yaml()
        return cls._instance

    @classmethod
    def preload(cls, formats_path: str) -> "ValueFormatters":
        """Preload singleton with specific formats path."""
        cls._instance = cls.of_yaml(formats_path)
        return cls._instance

    @classmethod
    def of_yaml(cls, yaml_path: str = None) -> "ValueFormatters":
        """Load ValueFormatters from YAML file."""
        vf = None
        if yaml_path is None:
            paths = YamlPath.getPaths("formats.yaml")
            if len(paths) > 0:
                yaml_path = paths[0]
        if yaml_path:
            vf = cls.load_from_yaml_file(yaml_path)
            cls._formats_path = yaml_path
        return vf

get_instance() classmethod

Get the singleton instance.

Source code in lodstorage/query.py
119
120
121
122
123
124
@classmethod
def get_instance(cls) -> "ValueFormatters":
    """Get the singleton instance."""
    if cls._instance is None:
        cls._instance = cls.of_yaml()
    return cls._instance

of_yaml(yaml_path=None) classmethod

Load ValueFormatters from YAML file.

Source code in lodstorage/query.py
132
133
134
135
136
137
138
139
140
141
142
143
@classmethod
def of_yaml(cls, yaml_path: str = None) -> "ValueFormatters":
    """Load ValueFormatters from YAML file."""
    vf = None
    if yaml_path is None:
        paths = YamlPath.getPaths("formats.yaml")
        if len(paths) > 0:
            yaml_path = paths[0]
    if yaml_path:
        vf = cls.load_from_yaml_file(yaml_path)
        cls._formats_path = yaml_path
    return vf

preload(formats_path) classmethod

Preload singleton with specific formats path.

Source code in lodstorage/query.py
126
127
128
129
130
@classmethod
def preload(cls, formats_path: str) -> "ValueFormatters":
    """Preload singleton with specific formats path."""
    cls._instance = cls.of_yaml(formats_path)
    return cls._instance

query_cmd

Created on 2024-08-21

@author: wf

QueryCmd

command line support queries

Source code in lodstorage/query_cmd.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
class QueryCmd:
    """
    command line support queries
    """

    def __init__(self, args: Namespace, with_default_queries: bool = True):
        """
        command line args

        Args:
            args (Namespace): the command line arguments
            with_default_queries (bool): should default queries be made available/listed?
        """
        self.args = args
        self.debug = args.debug
        self.with_default_queries = with_default_queries

    def init_managers(self):
        self.endpoints = EndpointManager.getEndpoints(self.args.endpointPath)
        self.qm = QueryManager(
            lang=self.args.language,
            debug=self.debug,
            queriesPath=self.args.queriesPath,
            with_default=self.with_default_queries,
        )

    def handle_args(self) -> bool:
        """
        handle the command line arguments
        """
        handled = False
        debug = self.debug
        args = self.args
        self.init_managers()
        self.query = None
        self.queryCode = args.query
        self.formats = None
        # preload ValueFormatters
        ValueFormatters.preload(args.formatsPath)
        if args.list:
            for name, query in self.qm.queriesByName.items():
                print(f"{name}:{query.title}")
            handled = True
        elif args.listEndpoints:
            # list endpoints
            for endpoint in self.endpoints.values():
                if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                    print(endpoint)
            handled = True
        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in self.qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            self.query = self.qm.queriesByName[args.queryName]
            if self.query.limit is None and args.limit is not None:
                self.query.limit = args.limit
            self.formats = self.query.formats
            self.queryCode = self.query.query
            if debug or args.showQuery:
                if (
                    hasattr(self.query, "description")
                    and self.query.description is not None
                ):
                    print(self.query.description)
        if self.query is None:
            name = "?"
            if self.queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                self.queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            self.query = Query(name="?", query=self.queryCode, lang=args.language)

        if self.queryCode:
            params = Params(self.query.query)
            self.query.query = params.apply_parameters_with_check(args.params)
            self.queryCode = self.query.query
            if debug or args.showQuery:
                print(f"{args.language}:\n{self.query.query}")
        return handled

    def format_output(self, qlod: List[Dict[str, Any]]):
        """
        Format and print the query results.

        This method formats the query results based on the specified output format
        (e.g., CSV, JSON, XML) and prints them to the console.

        Args:
            qlod (List[Dict[str, Any]]): A list of dictionaries containing the query results.
                Each dictionary represents a row of the query result, with column names as keys
                and the corresponding values.
        """
        args = self.args
        if args.format is Format.csv:
            csv_converter = CSV.get_instance()
            csv = csv_converter.toCSV(qlod)
            print(csv)
        elif args.format in [Format.latex, Format.github, Format.mediawiki]:
            doc = self.query.documentQueryResult(
                qlod, tablefmt=str(args.format), floatfmt=".0f"
            )
            docstr = doc.asText()
            print(docstr)
        elif args.format in [Format.json] or args.format is None:  # set as default
            # https://stackoverflow.com/a/36142844/1497139
            print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
        elif args.format in [Format.xml]:
            lod2xml = Lod2Xml(qlod)
            xml = lod2xml.asXml()
            print(xml)

        else:
            raise Exception(f"format {args.format} not supported yet")

    @classmethod
    def argument_exists(cls, parser, arg_name):
        return any(arg_name in action.option_strings for action in parser._actions)

    @classmethod
    def add_args(cls, parser: ArgumentParser):
        if not cls.argument_exists(parser, "--debug"):
            parser.add_argument(
                "-d",
                "--debug",
                dest="debug",
                action="store_true",
                help="set debug [default: %(default)s]",
            )
        parser.add_argument(
            "-ep",
            "--endpointPath",
            default=None,
            help="path to yaml file to configure endpoints to use for queries",
        )
        ValueFormatters.get_instance()
        parser.add_argument(
            "-fp",
            "--formatsPath",
            default=ValueFormatters._formats_path,
            help="path to yaml file to configure formats to use for query result documentation",
        )
        parser.add_argument(
            "-li",
            "--list",
            action="store_true",
            help="show the list of available queries",
        )
        parser.add_argument(
            "--limit", type=int, default=None, help="set limit parameter of query"
        )
        parser.add_argument(
            "--params",
            action=StoreDictKeyPair,
            help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
        )
        parser.add_argument(
            "-le",
            "--listEndpoints",
            action="store_true",
            help="show the list of available endpoints",
        )
        parser.add_argument(
            "-sq", "--showQuery", action="store_true", help="show the query"
        )
        parser.add_argument(
            "-qp", "--queriesPath", help="path to YAML file with query definitions"
        )
        parser.add_argument("-q", "--query", help="the query to run")
        parser.add_argument("-qf", "--queryFile", help="the query file to run")
        parser.add_argument("-qn", "--queryName", help="run a named query")

__init__(args, with_default_queries=True)

command line args

Parameters:

Name Type Description Default
args Namespace

the command line arguments

required
with_default_queries bool

should default queries be made available/listed?

True
Source code in lodstorage/query_cmd.py
29
30
31
32
33
34
35
36
37
38
39
def __init__(self, args: Namespace, with_default_queries: bool = True):
    """
    command line args

    Args:
        args (Namespace): the command line arguments
        with_default_queries (bool): should default queries be made available/listed?
    """
    self.args = args
    self.debug = args.debug
    self.with_default_queries = with_default_queries

format_output(qlod)

Format and print the query results.

This method formats the query results based on the specified output format (e.g., CSV, JSON, XML) and prints them to the console.

Parameters:

Name Type Description Default
qlod List[Dict[str, Any]]

A list of dictionaries containing the query results. Each dictionary represents a row of the query result, with column names as keys and the corresponding values.

required
Source code in lodstorage/query_cmd.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def format_output(self, qlod: List[Dict[str, Any]]):
    """
    Format and print the query results.

    This method formats the query results based on the specified output format
    (e.g., CSV, JSON, XML) and prints them to the console.

    Args:
        qlod (List[Dict[str, Any]]): A list of dictionaries containing the query results.
            Each dictionary represents a row of the query result, with column names as keys
            and the corresponding values.
    """
    args = self.args
    if args.format is Format.csv:
        csv_converter = CSV.get_instance()
        csv = csv_converter.toCSV(qlod)
        print(csv)
    elif args.format in [Format.latex, Format.github, Format.mediawiki]:
        doc = self.query.documentQueryResult(
            qlod, tablefmt=str(args.format), floatfmt=".0f"
        )
        docstr = doc.asText()
        print(docstr)
    elif args.format in [Format.json] or args.format is None:  # set as default
        # https://stackoverflow.com/a/36142844/1497139
        print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
    elif args.format in [Format.xml]:
        lod2xml = Lod2Xml(qlod)
        xml = lod2xml.asXml()
        print(xml)

    else:
        raise Exception(f"format {args.format} not supported yet")

handle_args()

handle the command line arguments

Source code in lodstorage/query_cmd.py
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def handle_args(self) -> bool:
    """
    handle the command line arguments
    """
    handled = False
    debug = self.debug
    args = self.args
    self.init_managers()
    self.query = None
    self.queryCode = args.query
    self.formats = None
    # preload ValueFormatters
    ValueFormatters.preload(args.formatsPath)
    if args.list:
        for name, query in self.qm.queriesByName.items():
            print(f"{name}:{query.title}")
        handled = True
    elif args.listEndpoints:
        # list endpoints
        for endpoint in self.endpoints.values():
            if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                print(endpoint)
        handled = True
    elif args.queryName is not None:
        if debug or args.showQuery:
            print(f"named query {args.queryName}:")
        if args.queryName not in self.qm.queriesByName:
            raise Exception(f"named query {args.queryName} not available")
        self.query = self.qm.queriesByName[args.queryName]
        if self.query.limit is None and args.limit is not None:
            self.query.limit = args.limit
        self.formats = self.query.formats
        self.queryCode = self.query.query
        if debug or args.showQuery:
            if (
                hasattr(self.query, "description")
                and self.query.description is not None
            ):
                print(self.query.description)
    if self.query is None:
        name = "?"
        if self.queryCode is None and args.queryFile is not None:
            queryFilePath = Path(args.queryFile)
            self.queryCode = queryFilePath.read_text()
            name = queryFilePath.stem
        self.query = Query(name="?", query=self.queryCode, lang=args.language)

    if self.queryCode:
        params = Params(self.query.query)
        self.query.query = params.apply_parameters_with_check(args.params)
        self.queryCode = self.query.query
        if debug or args.showQuery:
            print(f"{args.language}:\n{self.query.query}")
    return handled

querymain

Created on 2022-02-13

@author: wf

QueryMain

Bases: QueryCmd

Commandline handler

Source code in lodstorage/querymain.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class QueryMain(QueryCmd):
    """
    Commandline handler
    """

    def __init__(self, args):
        """
        command line args

        Args:
            args(list): the command line arguments
        """
        super().__init__(args=args)
        self.rate_limiter = RateLimiter(
            calls_per_minute=(
                args.calls_per_minute if hasattr(args, "calls_per_minute") else None
            )
        )

    def handle_args(self) -> bool:
        args = self.args
        handled = super().handle_args()

        if self.queryCode:
            endpointConf = Endpoint()
            endpointConf.method = "POST"
            if args.endpointName:
                endpointConf = self.endpoints.get(args.endpointName)
            else:
                endpointConf.endpoint = self.query.endpoint
            if args.method:
                endpointConf.method = args.method
            if endpointConf:
                self.query.tryItUrl = endpointConf.website
                self.query.database = endpointConf.database
            if self.query.limit:
                if "limit" in self.queryCode or "LIMIT" in self.queryCode:
                    self.queryCode = re.sub(
                        r"(limit|LIMIT)\s+(\d+)",
                        f"LIMIT {self.query.limit}",
                        self.queryCode,
                    )
                else:
                    self.queryCode += f"\nLIMIT {self.query.limit}"
            if args.language == "sparql":
                sparql = SPARQL.fromEndpointConf(endpointConf)
                if args.prefixes and endpointConf is not None:
                    self.query.add_endpoint_prefixes(
                        endpointConf, PrefixConfigs.get_instance()
                    )
                if args.raw:
                    qres = self.rawQuery(
                        endpointConf,
                        query=self.query.query,
                        resultFormat=args.format,
                        mimeType=args.mimeType,
                    )
                    print(qres)
                    return
                if "wikidata" in args.endpointName and self.formats is None:
                    self.formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(self.queryCode)
            elif args.language == "sql":
                if endpointConf.endpoint.startswith("jdbc:mysql"):
                    query_tool = MySqlQuery(endpointConf, debug=args.debug)
                    qlod = query_tool.execute_sql_query(self.queryCode)
                else:
                    # Use existing SQLDB for other SQL endpoints
                    sqlDB = SQLDB(endpointConf.endpoint)
                    qlod = sqlDB.query(self.queryCode)
            else:
                raise Exception(f"language {args.language} not known/supported")
            self.format_output(qlod)
            handled = True
        return handled

    def rawQuery(
        self,
        endpointConf,
        query: str,
        resultFormat: str,
        mimeType: str,
        content_type: str = "application/sparql-query",
        timeout: float = 10.0,
        lenient: bool = True,
    ):
        """
        Returns raw result of the endpoint.

        Args:
            endpointConf: EndPoint
            query (str): query
            resultFormat (str): format of the result
            mimeType (str): mimeType
            content_type (str): content type of the request
            timeout (float): timeout in seconds
            lenient (bool): if True do not raise errors but just log

        Returns:
            raw result of the query
        """

        headers = {"User-Agent": f"{Version.name}/{Version.version}"}

        if mimeType:
            headers["Accept"] = mimeType

        endpoint = endpointConf.endpoint
        method = endpointConf.method.upper()

        if method == "POST":
            headers["Content-Type"] = "application/x-www-form-urlencoded"
            data = urllib.parse.urlencode({"query": query, "format": resultFormat})
            params = None
        else:
            headers["Content-Type"] = content_type
            params = {"query": query, "format": resultFormat}
            data = None

        try:
            response = requests.request(
                method,
                endpoint,
                headers=headers,
                data=data,
                params=params,
                timeout=timeout,
            )

            # Check for HTTP errors
            response.raise_for_status()

            # Handle different response content types
            if "application/json" in response.headers.get("Content-Type", ""):
                return response.json()  # Return JSON if applicable
            else:
                return response.text  # Fallback to plain text

        except requests.exceptions.RequestException as e:
            # Log or handle the error as needed
            err_msg = f"An error occurred while querying the endpoint: {e}"
            # Attempt to retrieve response content if available
            if hasattr(e, "response") and e.response is not None:
                error_content = e.response.content.decode("utf-8", errors="replace")
                err_msg += f"\nResponse content: {error_content}"

            if lenient:
                logging.error(err_msg)
                return None
            else:
                raise RuntimeError(err_msg)

__init__(args)

command line args

Parameters:

Name Type Description Default
args(list)

the command line arguments

required
Source code in lodstorage/querymain.py
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(self, args):
    """
    command line args

    Args:
        args(list): the command line arguments
    """
    super().__init__(args=args)
    self.rate_limiter = RateLimiter(
        calls_per_minute=(
            args.calls_per_minute if hasattr(args, "calls_per_minute") else None
        )
    )

rawQuery(endpointConf, query, resultFormat, mimeType, content_type='application/sparql-query', timeout=10.0, lenient=True)

Returns raw result of the endpoint.

Parameters:

Name Type Description Default
endpointConf

EndPoint

required
query str

query

required
resultFormat str

format of the result

required
mimeType str

mimeType

required
content_type str

content type of the request

'application/sparql-query'
timeout float

timeout in seconds

10.0
lenient bool

if True do not raise errors but just log

True

Returns:

Type Description

raw result of the query

Source code in lodstorage/querymain.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def rawQuery(
    self,
    endpointConf,
    query: str,
    resultFormat: str,
    mimeType: str,
    content_type: str = "application/sparql-query",
    timeout: float = 10.0,
    lenient: bool = True,
):
    """
    Returns raw result of the endpoint.

    Args:
        endpointConf: EndPoint
        query (str): query
        resultFormat (str): format of the result
        mimeType (str): mimeType
        content_type (str): content type of the request
        timeout (float): timeout in seconds
        lenient (bool): if True do not raise errors but just log

    Returns:
        raw result of the query
    """

    headers = {"User-Agent": f"{Version.name}/{Version.version}"}

    if mimeType:
        headers["Accept"] = mimeType

    endpoint = endpointConf.endpoint
    method = endpointConf.method.upper()

    if method == "POST":
        headers["Content-Type"] = "application/x-www-form-urlencoded"
        data = urllib.parse.urlencode({"query": query, "format": resultFormat})
        params = None
    else:
        headers["Content-Type"] = content_type
        params = {"query": query, "format": resultFormat}
        data = None

    try:
        response = requests.request(
            method,
            endpoint,
            headers=headers,
            data=data,
            params=params,
            timeout=timeout,
        )

        # Check for HTTP errors
        response.raise_for_status()

        # Handle different response content types
        if "application/json" in response.headers.get("Content-Type", ""):
            return response.json()  # Return JSON if applicable
        else:
            return response.text  # Fallback to plain text

    except requests.exceptions.RequestException as e:
        # Log or handle the error as needed
        err_msg = f"An error occurred while querying the endpoint: {e}"
        # Attempt to retrieve response content if available
        if hasattr(e, "response") and e.response is not None:
            error_content = e.response.content.decode("utf-8", errors="replace")
            err_msg += f"\nResponse content: {error_content}"

        if lenient:
            logging.error(err_msg)
            return None
        else:
            raise RuntimeError(err_msg)

main(argv=None, lang=None)

main program.

commandline access to List of Dicts / Linked Open Data Queries

Source code in lodstorage/querymain.py
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def main(argv=None, lang=None):  # IGNORE:C0111
    """
    main program.

    commandline access to List of Dicts / Linked Open Data Queries
    """
    if argv is None:
        argv = sys.argv[1:]

    program_name = os.path.basename(__file__)
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = "%%(prog)s %s (%s)" % (
        program_version,
        program_build_date,
    )
    program_shortdesc = (
        "commandline query of endpoints in diverse languages such as SPARQL/SQL"
    )
    user_name = "Wolfgang Fahl"
    program_license = """%s

  Created by %s on %s.
  Copyright 2020-2025 Wolfgang Fahl. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
""" % (
        program_shortdesc,
        user_name,
        str(__date__),
    )

    try:
        # Setup argument parser
        parser = ArgumentParser(
            description=program_license, formatter_class=RawDescriptionHelpFormatter
        )
        QueryCmd.add_args(parser)

        parser.add_argument(
            "-en",
            "--endpointName",
            default="wikidata",
            help=f"Name of the endpoint to use for queries. Available by default: {EndpointManager.getEndpointNames()}",
        )
        parser.add_argument("--method", help="method to be used for SPARQL queries")
        parser.add_argument("-f", "--format", type=Format, choices=list(Format))
        parser.add_argument(
            "-m", "--mimeType", help="MIME-type to use for the raw query"
        )
        parser.add_argument(
            "-p",
            "--prefixes",
            action="store_true",
            help="add predefined prefixes for endpoint",
        )
        parser.add_argument(
            "-raw",
            action="store_true",
            help="return the raw query result from the endpoint. (MIME type defined over -f or -m)",
        )
        parser.add_argument(
            "-V", "--version", action="version", version=program_version_message
        )
        if lang is None:
            parser.add_argument(
                "-l", "--language", help="the query language to use", required=True
            )
        args = parser.parse_args(argv)
        if lang is not None:
            args.language = lang
        query_main = QueryMain(args)
        query_main.handle_args()

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        if DEBUG:
            raise (e)
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        print(traceback.format_exc())
        return 2

mainSPARQL(argv=None)

commandline for SPARQL queries

Source code in lodstorage/querymain.py
193
194
195
196
197
def mainSPARQL(argv=None):
    """
    commandline for SPARQL queries
    """
    main(argv, lang="sparql")

mainSQL(argv=None)

commandline for SQL queries

Source code in lodstorage/querymain.py
186
187
188
189
190
def mainSQL(argv=None):
    """
    commandline for SQL queries
    """
    main(argv, lang="sql")

rate_limiter

Created on 2024-08-24

@author: wf

RateLimiter

Wrap the @limits decorator in a new decorator

Source code in lodstorage/rate_limiter.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
class RateLimiter:
    """
    Wrap the @limits decorator in a new decorator
    """

    def __init__(self, calls_per_minute: int = None):
        if calls_per_minute is None:
            calls_per_minute = 60 * 1000 * 1000  # use an irrationally high value
        self.calls_per_minute = calls_per_minute

    def rate_limited(self, f: callable):
        @wraps(f)
        def wrapper(*args, **kwargs):
            @sleep_and_retry
            @limits(calls=self.calls_per_minute, period=60)
            def rate_limited_function():
                return f(*args, **kwargs)

            return rate_limited_function()

        return wrapper

rdf

Created on 2024-01-27

@author: wf, using ChatGPT-4 prompting

RDFDumper

A class to convert instances of data models (based on a LinkML schema) into an RDF graph.

Source code in lodstorage/rdf.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
class RDFDumper:
    """
    A class to convert instances of data models (based on a LinkML schema) into an RDF graph.
    """

    def __init__(self, schema: Schema, instance: object):
        """
        Initialize the RDFDumper.

        Args:
            schema (Schema): The LinkML schema defining the structure of the data models.
            instance (object): The instance of the data model to be converted into RDF.
        """
        self.schema = schema
        self.instance = instance
        self.graph = Graph()
        self.namespaces = {
            prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
        }

    def convert_to_rdf(self):
        """
        Converts the provided instance into RDF triples based on the LinkML schema.
        """
        # Process the instance data according to its class in the schema
        instance_class = self.instance.__class__.__name__
        if instance_class in self.schema.classes:
            self.process_class(instance_class, self.instance)

    def serialize(self, rdf_format: str = "turtle") -> str:
        """
        Serializes the RDF graph into a string representation in the specified format.

        Args:
            format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

        Returns:
            str: The serialized RDF graph.
        """
        return self.graph.serialize(format=rdf_format)

    def value_iterator(self, value: Any):
        """
        Iterates over values in a mapping or iterable.

        Args:
            value: The value to iterate over. It can be a mapping, iterable, or a single value.

        Yields:
            Tuples of (key, value) from the input value. For single values, key is None.
        """
        if isinstance(value, Mapping):
            yield from value.items()
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            yield from ((None, v) for v in value)
        else:
            yield (None, value)

    def process_class(self, class_name: str, instance_data: object):
        # Get the base namespace URI
        self.base_uri = self.namespaces[self.schema.default_prefix]
        # get the class object
        # class_obj = self.schema.classes[class_name]
        # Construct class_uri using the namespace and class_name with a separator
        class_uri = URIRef(f"{self.base_uri}:{class_name}")

        # Create a unique URI or a Blank Node for the instance
        instance_uri = self.get_instance_uri(instance_data)

        # Type the instance with its class
        self.graph.add((instance_uri, RDF.type, class_uri))

        # loop over all fieds of the instance data
        for field_info in fields(instance_data):
            slot_name = field_info.name
            # assure we only work on fields defined
            # in our schema
            slot_obj = self.schema.slots.get(slot_name)
            if not slot_obj:
                continue

            # Combine the namespace with the slot name to form the field URI
            field_uri = URIRef(f"{self.base_uri}:{slot_name}")
            field_value = getattr(instance_data, slot_name, None)

            # Use value_iterator to handle different types of values
            for key, item in self.value_iterator(field_value):
                if key is not None:
                    # Handle as a mapping
                    key_uri = URIRef(self.namespaces[self.schema.default_prefix][key])
                    self.graph.add((instance_uri, field_uri, key_uri))
                    self.graph.add(
                        (key_uri, RDF.value, self.convert_to_literal(item, slot_obj))
                    )
                else:
                    # Handle as a single value or an item from an iterable
                    # Check if item has an 'identifier' property
                    if hasattr(item, "identifier") and getattr(item, "identifier"):
                        item_uri = self.get_instance_uri(item)
                        self.graph.add((instance_uri, field_uri, item_uri))
                        self.process_class(item.__class__.__name__, item)
                    else:
                        self.graph.add(
                            (
                                instance_uri,
                                field_uri,
                                self.convert_to_literal(item, slot_obj),
                            )
                        )

    def get_instance_uri(self, instance_data):
        """
        Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
        Otherwise, it generates or retrieves a unique URI.
        """
        if hasattr(instance_data, "identifier") and getattr(
            instance_data, "identifier"
        ):
            identifier = getattr(instance_data, "identifier")
            return URIRef(f"{self.base_uri}:{identifier}")
        else:
            # Fallback to a blank node if no identifier is found
            return BNode()

    def convert_to_literal(self, value, slot_obj):
        """
        Converts a value to an RDFLib Literal with appropriate datatype.

        Args:
            value: The value to be converted.
            slot_obj: The slot object containing information about the field.

        Returns:
            An RDFLib Literal with the value and appropriate datatype.
        """
        # Determine the datatype based on the Python type of the value
        datatype = PythonTypes.get_rdf_datatype(type(value))

        # Create and return the literal
        return Literal(value, datatype=datatype)

__init__(schema, instance)

Initialize the RDFDumper.

Parameters:

Name Type Description Default
schema Schema

The LinkML schema defining the structure of the data models.

required
instance object

The instance of the data model to be converted into RDF.

required
Source code in lodstorage/rdf.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def __init__(self, schema: Schema, instance: object):
    """
    Initialize the RDFDumper.

    Args:
        schema (Schema): The LinkML schema defining the structure of the data models.
        instance (object): The instance of the data model to be converted into RDF.
    """
    self.schema = schema
    self.instance = instance
    self.graph = Graph()
    self.namespaces = {
        prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
    }

convert_to_literal(value, slot_obj)

Converts a value to an RDFLib Literal with appropriate datatype.

Parameters:

Name Type Description Default
value

The value to be converted.

required
slot_obj

The slot object containing information about the field.

required

Returns:

Type Description

An RDFLib Literal with the value and appropriate datatype.

Source code in lodstorage/rdf.py
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
def convert_to_literal(self, value, slot_obj):
    """
    Converts a value to an RDFLib Literal with appropriate datatype.

    Args:
        value: The value to be converted.
        slot_obj: The slot object containing information about the field.

    Returns:
        An RDFLib Literal with the value and appropriate datatype.
    """
    # Determine the datatype based on the Python type of the value
    datatype = PythonTypes.get_rdf_datatype(type(value))

    # Create and return the literal
    return Literal(value, datatype=datatype)

convert_to_rdf()

Converts the provided instance into RDF triples based on the LinkML schema.

Source code in lodstorage/rdf.py
37
38
39
40
41
42
43
44
def convert_to_rdf(self):
    """
    Converts the provided instance into RDF triples based on the LinkML schema.
    """
    # Process the instance data according to its class in the schema
    instance_class = self.instance.__class__.__name__
    if instance_class in self.schema.classes:
        self.process_class(instance_class, self.instance)

get_instance_uri(instance_data)

Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI. Otherwise, it generates or retrieves a unique URI.

Source code in lodstorage/rdf.py
127
128
129
130
131
132
133
134
135
136
137
138
139
def get_instance_uri(self, instance_data):
    """
    Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
    Otherwise, it generates or retrieves a unique URI.
    """
    if hasattr(instance_data, "identifier") and getattr(
        instance_data, "identifier"
    ):
        identifier = getattr(instance_data, "identifier")
        return URIRef(f"{self.base_uri}:{identifier}")
    else:
        # Fallback to a blank node if no identifier is found
        return BNode()

serialize(rdf_format='turtle')

Serializes the RDF graph into a string representation in the specified format.

Parameters:

Name Type Description Default
format str

The serialization format (e.g., 'turtle', 'xml', 'json-ld').

required

Returns:

Name Type Description
str str

The serialized RDF graph.

Source code in lodstorage/rdf.py
46
47
48
49
50
51
52
53
54
55
56
def serialize(self, rdf_format: str = "turtle") -> str:
    """
    Serializes the RDF graph into a string representation in the specified format.

    Args:
        format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

    Returns:
        str: The serialized RDF graph.
    """
    return self.graph.serialize(format=rdf_format)

value_iterator(value)

Iterates over values in a mapping or iterable.

Parameters:

Name Type Description Default
value Any

The value to iterate over. It can be a mapping, iterable, or a single value.

required

Yields:

Type Description

Tuples of (key, value) from the input value. For single values, key is None.

Source code in lodstorage/rdf.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def value_iterator(self, value: Any):
    """
    Iterates over values in a mapping or iterable.

    Args:
        value: The value to iterate over. It can be a mapping, iterable, or a single value.

    Yields:
        Tuples of (key, value) from the input value. For single values, key is None.
    """
    if isinstance(value, Mapping):
        yield from value.items()
    elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
        yield from ((None, v) for v in value)
    else:
        yield (None, value)

rdf_format

Created on 2025-06-01

@author: wf

RdfFormat

Bases: Enum

RDF serialization formats with MIME types, file extensions, and SPARQLWrapper constants.

Source code in lodstorage/rdf_format.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
class RdfFormat(Enum):
    """
    RDF serialization formats with MIME types, file extensions, and SPARQLWrapper constants.
    """

    TURTLE = ("turtle", "text/turtle", ".ttl", TURTLE)
    RDF_XML = ("rdf-xml", "application/rdf+xml", ".rdf", RDF)
    N3 = ("n3", "text/n3", ".n3", N3)
    JSON_LD = ("json-ld", "application/ld+json", ".jsonld", JSON)

    def __init__(self, label: str, mime_type: str, extension: str, sparql_format):
        self.label = label
        self.mime_type = mime_type
        self.extension = extension
        self.sparql_format = sparql_format

    @classmethod
    def by_label(cls, label: str):
        """Get format by label"""
        for rdf_format in cls:
            if rdf_format.label == label:
                return rdf_format
        raise ValueError(f"Unknown format: {label}")

by_label(label) classmethod

Get format by label

Source code in lodstorage/rdf_format.py
28
29
30
31
32
33
34
@classmethod
def by_label(cls, label: str):
    """Get format by label"""
    for rdf_format in cls:
        if rdf_format.label == label:
            return rdf_format
    raise ValueError(f"Unknown format: {label}")

sample2

Created on 2024-01-21

@author: wf

Countries

Represents a collection of country instances.

Attributes:

Name Type Description
countries List[Country]

A list of Country instances.

Source code in lodstorage/sample2.py
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
@lod_storable
class Countries:
    """
    Represents a collection of country instances.

    Attributes:
        countries (List[Country]): A list of Country instances.
    """

    countries: List[Country]

    @classmethod
    def get_countries_erdem(cls) -> "Countries":
        """
        get Erdem Ozkol's country list
        """
        countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
        json_str = cls.read_from_url(countries_json_url)
        countries_list = json.loads(json_str)
        countries_dict = {"countries": countries_list}
        instance = cls.from_dict(countries_dict)
        return instance

    @classmethod
    def get_samples(cls) -> dict[str, "Countries"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names
            and values as `Countries` instances.
        """
        samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
        return samples

get_countries_erdem() classmethod

get Erdem Ozkol's country list

Source code in lodstorage/sample2.py
189
190
191
192
193
194
195
196
197
198
199
@classmethod
def get_countries_erdem(cls) -> "Countries":
    """
    get Erdem Ozkol's country list
    """
    countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
    json_str = cls.read_from_url(countries_json_url)
    countries_list = json.loads(json_str)
    countries_dict = {"countries": countries_list}
    instance = cls.from_dict(countries_dict)
    return instance

get_samples() classmethod

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name Type Description
dict dict[str, Countries]

A dictionary with keys as sample names

dict[str, Countries]

and values as Countries instances.

Source code in lodstorage/sample2.py
201
202
203
204
205
206
207
208
209
210
211
212
213
@classmethod
def get_samples(cls) -> dict[str, "Countries"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names
        and values as `Countries` instances.
    """
    samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
    return samples

Country

Represents a country with its details.

Attributes:

Name Type Description
name str

The name of the country.

country_code str

The country code.

capital Optional[str]

The capital city of the country.

timezones List[str]

List of timezones in the country.

latlng List[float]

Latitude and longitude of the country.

Source code in lodstorage/sample2.py
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
@lod_storable
class Country:
    """
    Represents a country with its details.

    Attributes:
        name (str): The name of the country.
        country_code (str): The country code.
        capital (Optional[str]): The capital city of the country.
        timezones (List[str]): List of timezones in the country.
        latlng (List[float]): Latitude and longitude of the country.
    """

    name: str
    country_code: str
    capital: Optional[str] = None
    timezones: List[str] = field(default_factory=list)
    latlng: List[float] = field(default_factory=list)

Royal

Represents a member of the royal family, with various personal details.

Attributes:

Name Type Description
name str

The full name of the royal member.

wikidata_id str

The Wikidata identifier associated with the royal member.

number_in_line Optional[int]

The number in line to succession, if applicable.

born_iso_date Optional[str]

The ISO date of birth.

died_iso_date Optional[str]

The ISO date of death, if deceased.

last_modified_iso str

ISO timestamp of the last modification.

age Optional[int]

The age of the royal member.

of_age Optional[bool]

Indicates whether the member is of legal age.

wikidata_url Optional[str]

URL to the Wikidata page of the member.

Source code in lodstorage/sample2.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@lod_storable
class Royal:
    """
    Represents a member of the royal family, with various personal details.

    Attributes:
        name (str): The full name of the royal member.
        wikidata_id (str): The Wikidata identifier associated with the royal member.
        number_in_line (Optional[int]): The number in line to succession, if applicable.
        born_iso_date (Optional[str]): The ISO date of birth.
        died_iso_date (Optional[str]): The ISO date of death, if deceased.
        last_modified_iso (str): ISO timestamp of the last modification.
        age (Optional[int]): The age of the royal member.
        of_age (Optional[bool]): Indicates whether the member is of legal age.
        wikidata_url (Optional[str]): URL to the Wikidata page of the member.
    """

    name: str
    wikidata_id: str
    number_in_line: Optional[int] = None
    born_iso_date: Optional[str] = None
    died_iso_date: Optional[str] = None
    lastmodified_iso: Optional[str] = None
    age: Optional[int] = field(init=None)
    of_age: Optional[bool] = field(init=None)
    wikidata_url: Optional[str] = field(init=None)

    def __post_init__(self):
        """
        init calculated fields
        """
        end_date = self.died if self.died else date.today()
        self.age = int((end_date - self.born).days / 365.2425)
        self.of_age = self.age >= 18
        if self.wikidata_id:
            self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

    @property
    def identifier(self) -> str:
        """
        Generates a unique identifier for the Royal instance.
        The identifier is a combination of a slugified name and the Wikidata ID (if available).
        """
        slugified_name = slugify(self.name, lowercase=False, regex_pattern=r"[^\w\-]")
        if self.wikidata_id:
            return f"{slugified_name}-{self.wikidata_id}"
        return slugified_name

    @property
    def born(self) -> date:
        """Return the date of birth from the ISO date string."""
        born_date = DateConvert.iso_date_to_datetime(self.born_iso_date)
        return born_date

    @property
    def died(self) -> Optional[date]:
        """Return the date of death from the ISO date string, if available."""
        died_date = DateConvert.iso_date_to_datetime(self.died_iso_date)
        return died_date

born property

Return the date of birth from the ISO date string.

died property

Return the date of death from the ISO date string, if available.

identifier property

Generates a unique identifier for the Royal instance. The identifier is a combination of a slugified name and the Wikidata ID (if available).

__post_init__()

init calculated fields

Source code in lodstorage/sample2.py
44
45
46
47
48
49
50
51
52
def __post_init__(self):
    """
    init calculated fields
    """
    end_date = self.died if self.died else date.today()
    self.age = int((end_date - self.born).days / 365.2425)
    self.of_age = self.age >= 18
    if self.wikidata_id:
        self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

Royals

Represents a collection of Royal family members.

Attributes:

Name Type Description
members List[Royal]

A list of Royal family members.

Source code in lodstorage/sample2.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@lod_storable
class Royals:
    """
    Represents a collection of Royal family members.

    Attributes:
        members (List[Royal]): A list of Royal family members.
    """

    members: List[Royal] = field(default_factory=list)

    @classmethod
    def get_samples(cls) -> dict[str, "Royals"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names and values as `Royals` instances.
        """
        samples = {
            "QE2 heirs up to number in line 5": Royals(
                members=[
                    Royal(
                        name="Elizabeth Alexandra Mary Windsor",
                        born_iso_date="1926-04-21",
                        died_iso_date="2022-09-08",
                        wikidata_id="Q9682",
                        number_in_line=-1,  # for deceased or unranked
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="Charles III of the United Kingdom",
                        born_iso_date="1948-11-14",
                        number_in_line=0,
                        wikidata_id="Q43274",
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="William, Duke of Cambridge",
                        born_iso_date="1982-06-21",
                        number_in_line=1,
                        wikidata_id="Q36812",
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="Prince George of Wales",
                        born_iso_date="2013-07-22",
                        number_in_line=2,
                        wikidata_id="Q13590412",
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="Princess Charlotte of Wales",
                        born_iso_date="2015-05-02",
                        number_in_line=3,
                        wikidata_id="Q18002970",
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="Prince Louis of Wales",
                        born_iso_date="2018-04-23",
                        number_in_line=4,
                        wikidata_id="Q38668629",
                        lastmodified_iso="2022-09-08",
                    ),
                    Royal(
                        name="Harry Duke of Sussex",
                        born_iso_date="1984-09-15",
                        number_in_line=5,
                        wikidata_id="Q152316",
                        lastmodified_iso="2022-09-08",
                    ),
                ]
            )
        }
        return samples

get_samples() classmethod

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name Type Description
dict dict[str, Royals]

A dictionary with keys as sample names and values as Royals instances.

Source code in lodstorage/sample2.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
@classmethod
def get_samples(cls) -> dict[str, "Royals"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names and values as `Royals` instances.
    """
    samples = {
        "QE2 heirs up to number in line 5": Royals(
            members=[
                Royal(
                    name="Elizabeth Alexandra Mary Windsor",
                    born_iso_date="1926-04-21",
                    died_iso_date="2022-09-08",
                    wikidata_id="Q9682",
                    number_in_line=-1,  # for deceased or unranked
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="Charles III of the United Kingdom",
                    born_iso_date="1948-11-14",
                    number_in_line=0,
                    wikidata_id="Q43274",
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="William, Duke of Cambridge",
                    born_iso_date="1982-06-21",
                    number_in_line=1,
                    wikidata_id="Q36812",
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="Prince George of Wales",
                    born_iso_date="2013-07-22",
                    number_in_line=2,
                    wikidata_id="Q13590412",
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="Princess Charlotte of Wales",
                    born_iso_date="2015-05-02",
                    number_in_line=3,
                    wikidata_id="Q18002970",
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="Prince Louis of Wales",
                    born_iso_date="2018-04-23",
                    number_in_line=4,
                    wikidata_id="Q38668629",
                    lastmodified_iso="2022-09-08",
                ),
                Royal(
                    name="Harry Duke of Sussex",
                    born_iso_date="1984-09-15",
                    number_in_line=5,
                    wikidata_id="Q152316",
                    lastmodified_iso="2022-09-08",
                ),
            ]
        )
    }
    return samples

Sample

Sample dataset provider

Source code in lodstorage/sample2.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
class Sample:
    """
    Sample dataset provider
    """

    cityList = None

    @staticmethod
    def get(dataset_name: str):
        """
        Get the given sample dataset name
        """
        samples = None
        if dataset_name == "royals":
            samples = Royals.get_samples()
        elif dataset_name == "countries":
            samples = Countries.get_samples()
        else:
            raise ValueError("Unknown dataset name")
        return samples

    @staticmethod
    def getRoyalsSample() -> Royals:
        samples = Royals.get_samples()
        all_members = []
        for royals in samples.values():
            all_members.extend(royals.members)
        royals = Royals(members=all_members)
        return royals

    @staticmethod
    def getSample(size) -> List[Dict[str, Any]]:
        """
        get a generated sample of the given size
        """
        listOfDicts = []
        for index in range(size):
            listOfDicts.append({"pkey": "index%d" % index, "cindex": index})
        return listOfDicts

    @staticmethod
    def getRoyals() -> List[Dict[str, Any]]:
        """
        compatibility for old sample module
        return list of dicts
        """
        royals_dict = Royals.get_samples()
        royals = royals_dict.get("QE2 heirs up to number in line 5")
        royals_lod = []
        for royal in royals.members:
            record = royal.to_dict()
            royals_lod.append(record)
        return royals_lod

    @staticmethod
    def getCities() -> List[Dict[str, Any]]:
        """
        get a list of city records
        compatibility for old sample module
        return list of dicts
        """
        if Sample.cityList is None:
            cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
            with urllib.request.urlopen(cityJsonUrl) as url:
                Sample.cityList = json.loads(url.read().decode())
            for city in Sample.cityList:
                city["cityId"] = "%s-%s" % (city["country"], city["name"])
        return Sample.cityList

get(dataset_name) staticmethod

Get the given sample dataset name

Source code in lodstorage/sample2.py
223
224
225
226
227
228
229
230
231
232
233
234
235
@staticmethod
def get(dataset_name: str):
    """
    Get the given sample dataset name
    """
    samples = None
    if dataset_name == "royals":
        samples = Royals.get_samples()
    elif dataset_name == "countries":
        samples = Countries.get_samples()
    else:
        raise ValueError("Unknown dataset name")
    return samples

getCities() staticmethod

get a list of city records compatibility for old sample module return list of dicts

Source code in lodstorage/sample2.py
270
271
272
273
274
275
276
277
278
279
280
281
282
283
@staticmethod
def getCities() -> List[Dict[str, Any]]:
    """
    get a list of city records
    compatibility for old sample module
    return list of dicts
    """
    if Sample.cityList is None:
        cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
        with urllib.request.urlopen(cityJsonUrl) as url:
            Sample.cityList = json.loads(url.read().decode())
        for city in Sample.cityList:
            city["cityId"] = "%s-%s" % (city["country"], city["name"])
    return Sample.cityList

getRoyals() staticmethod

compatibility for old sample module return list of dicts

Source code in lodstorage/sample2.py
256
257
258
259
260
261
262
263
264
265
266
267
268
@staticmethod
def getRoyals() -> List[Dict[str, Any]]:
    """
    compatibility for old sample module
    return list of dicts
    """
    royals_dict = Royals.get_samples()
    royals = royals_dict.get("QE2 heirs up to number in line 5")
    royals_lod = []
    for royal in royals.members:
        record = royal.to_dict()
        royals_lod.append(record)
    return royals_lod

getSample(size) staticmethod

get a generated sample of the given size

Source code in lodstorage/sample2.py
246
247
248
249
250
251
252
253
254
@staticmethod
def getSample(size) -> List[Dict[str, Any]]:
    """
    get a generated sample of the given size
    """
    listOfDicts = []
    for index in range(size):
        listOfDicts.append({"pkey": "index%d" % index, "cindex": index})
    return listOfDicts

schema

Created on 2021-01-26

@author: wf

Schema

Bases: object

a relational Schema

Source code in lodstorage/schema.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class Schema(object):
    """
    a relational Schema
    """

    def __init__(self, name: str, title: str):
        """
        Constructor

        Args:
            name(str): the name of the schema
            title(str): the title of the schema
        """
        self.name = name
        self.title = title
        self.propsByName = {}

    @staticmethod
    def generalizeColumn(tableList, colName: str):
        """
        remove the column with the given name from all tables in the tablelist and
        return it

        Args:
            tableList(list): a list of Tables
            colName(string): the name of the column to generalize

        Returns:
            string: the column having been generalized and removed
        """
        gCol = None
        for table in tableList:
            for col in table["columns"]:
                if col["name"] == colName:
                    gCol = col.copy()
                    # no linking yet @FIXME - will need this later
                    if "link" in gCol:
                        gCol.pop("link")
                    # is generalization protected for this column?
                    if not "special" in col or not col["special"]:
                        table["columns"].remove(col)
        return gCol

    @staticmethod
    def getGeneral(tableList, name: str, debug: bool = False):
        """
        derive a general table from the given table list
        Args:
            tableList(list): a list of tables
            name(str): name of the general table
            debug(bool): True if column names should be shown

        Returns:
            a table dict for the generalized table
        """
        general = {"name": name, "columns": []}
        colCount = Counter()
        for table in tableList:
            for col in table["columns"]:
                columnId = "%s.%s" % (col["name"], col["type"])
                if debug:
                    print(columnId)
                colCount[columnId] += 1
        for columnId, count in colCount.items():
            if count == len(tableList):
                colName = columnId.split(".")[0]
                generalCol = Schema.generalizeColumn(tableList, colName)
                general["columns"].append(generalCol)
        return general

    @staticmethod
    def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
        """
        get the DDL statement to create a general view

        Args:
            tableList: the list of tables
            name (str): the name of the view
            debug (bool): True if debug should be set
        """
        general = Schema.getGeneral(tableList, name, debug)
        cols = ""
        delim = ""
        for col in general["columns"]:
            col_name = col["name"].strip()
            cols += f"{delim}{col_name}"
            delim = ","
        ddl = f"CREATE VIEW {name} AS\n"
        delim = ""
        for table in tableList:
            table_name = table["name"].strip()
            ddl += f"{delim}  SELECT {cols} FROM {table_name}"
            delim = "\nUNION\n"
        return ddl

__init__(name, title)

Constructor

Parameters:

Name Type Description Default
name(str)

the name of the schema

required
title(str)

the title of the schema

required
Source code in lodstorage/schema.py
38
39
40
41
42
43
44
45
46
47
48
def __init__(self, name: str, title: str):
    """
    Constructor

    Args:
        name(str): the name of the schema
        title(str): the title of the schema
    """
    self.name = name
    self.title = title
    self.propsByName = {}

generalizeColumn(tableList, colName) staticmethod

remove the column with the given name from all tables in the tablelist and return it

Parameters:

Name Type Description Default
tableList(list)

a list of Tables

required
colName(string)

the name of the column to generalize

required

Returns:

Name Type Description
string

the column having been generalized and removed

Source code in lodstorage/schema.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@staticmethod
def generalizeColumn(tableList, colName: str):
    """
    remove the column with the given name from all tables in the tablelist and
    return it

    Args:
        tableList(list): a list of Tables
        colName(string): the name of the column to generalize

    Returns:
        string: the column having been generalized and removed
    """
    gCol = None
    for table in tableList:
        for col in table["columns"]:
            if col["name"] == colName:
                gCol = col.copy()
                # no linking yet @FIXME - will need this later
                if "link" in gCol:
                    gCol.pop("link")
                # is generalization protected for this column?
                if not "special" in col or not col["special"]:
                    table["columns"].remove(col)
    return gCol

getGeneral(tableList, name, debug=False) staticmethod

derive a general table from the given table list Args: tableList(list): a list of tables name(str): name of the general table debug(bool): True if column names should be shown

Returns:

Type Description

a table dict for the generalized table

Source code in lodstorage/schema.py
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
@staticmethod
def getGeneral(tableList, name: str, debug: bool = False):
    """
    derive a general table from the given table list
    Args:
        tableList(list): a list of tables
        name(str): name of the general table
        debug(bool): True if column names should be shown

    Returns:
        a table dict for the generalized table
    """
    general = {"name": name, "columns": []}
    colCount = Counter()
    for table in tableList:
        for col in table["columns"]:
            columnId = "%s.%s" % (col["name"], col["type"])
            if debug:
                print(columnId)
            colCount[columnId] += 1
    for columnId, count in colCount.items():
        if count == len(tableList):
            colName = columnId.split(".")[0]
            generalCol = Schema.generalizeColumn(tableList, colName)
            general["columns"].append(generalCol)
    return general

getGeneralViewDDL(tableList, name, debug=False) staticmethod

get the DDL statement to create a general view

Parameters:

Name Type Description Default
tableList

the list of tables

required
name str

the name of the view

required
debug bool

True if debug should be set

False
Source code in lodstorage/schema.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
@staticmethod
def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
    """
    get the DDL statement to create a general view

    Args:
        tableList: the list of tables
        name (str): the name of the view
        debug (bool): True if debug should be set
    """
    general = Schema.getGeneral(tableList, name, debug)
    cols = ""
    delim = ""
    for col in general["columns"]:
        col_name = col["name"].strip()
        cols += f"{delim}{col_name}"
        delim = ","
    ddl = f"CREATE VIEW {name} AS\n"
    delim = ""
    for table in tableList:
        table_name = table["name"].strip()
        ddl += f"{delim}  SELECT {cols} FROM {table_name}"
        delim = "\nUNION\n"
    return ddl

SchemaManager

Bases: object

a manager for schemas

Source code in lodstorage/schema.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
class SchemaManager(
    object,
):
    """
    a manager for schemas
    """

    def __init__(self, schemaDefs=None, baseUrl: str = None):
        """
        constructor
            Args:
                schemaDefs(dict): a dictionary of schema names
                baseUrl(str): the base url to use for links
        """
        self.baseUrl = baseUrl
        self.schemasByName = {}
        if schemaDefs is None:
            schemaDefs = {}
        for key, name in schemaDefs.items():
            self.schemasByName[key] = Schema(key, name)
        pass

__init__(schemaDefs=None, baseUrl=None)

constructor Args: schemaDefs(dict): a dictionary of schema names baseUrl(str): the base url to use for links

Source code in lodstorage/schema.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
def __init__(self, schemaDefs=None, baseUrl: str = None):
    """
    constructor
        Args:
            schemaDefs(dict): a dictionary of schema names
            baseUrl(str): the base url to use for links
    """
    self.baseUrl = baseUrl
    self.schemasByName = {}
    if schemaDefs is None:
        schemaDefs = {}
    for key, name in schemaDefs.items():
        self.schemasByName[key] = Schema(key, name)
    pass

sparql

Created on 2020-08-14

@author: wf

SPARQL

Bases: object

wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

:ivar url: full endpoint url (including mode) :ivar mode: 'query' or 'update' :ivar debug: True if debugging is active :ivar typedLiterals: True if INSERT should be done with typedLiterals :ivar profile(boolean): True if profiling / timing information should be displayed :ivar sparql: the SPARQLWrapper2 instance to be used :ivar method(str): the HTTP method to be used 'POST' or 'GET'

Source code in lodstorage/sparql.py
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
class SPARQL(object):
    """
    wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

    :ivar url: full endpoint url (including mode)
    :ivar mode: 'query' or 'update'
    :ivar debug: True if debugging is active
    :ivar typedLiterals: True if INSERT should be done with typedLiterals
    :ivar profile(boolean): True if profiling / timing information should be displayed
    :ivar sparql: the SPARQLWrapper2 instance to be used
    :ivar method(str): the HTTP method to be used 'POST' or 'GET'
    """

    def __init__(
        self,
        url,
        mode="query",
        debug=False,
        isFuseki=False,
        typedLiterals=False,
        profile=False,
        agent="PyLodStorage",
        method="POST",
        calls_per_minute: int = None,
    ):
        """
        Construct a SPARQL wrapper

        Args:
            url (string): the base URL of the endpoint - the mode query/update is going to be appended
            mode (string): 'query' or 'update'
            debug (bool): True if debugging is to be activated
            typedLiterals (bool): True if INSERT should be done with typedLiterals
            profile (boolean): True if profiling / timing information should be displayed
            agent (string): the User agent to use
            method (string): the HTTP method to be used 'POST' or 'GET'
        """
        if isFuseki:
            self.url = f"{url}/{mode}"
        else:
            self.url = url
        self.mode = mode
        self.debug = debug
        self.typedLiterals = typedLiterals
        self.profile = profile
        self.sparql = SPARQLWrapper2(url)
        self.method = method
        self.sparql.agent = agent
        self.rate_limiter = RateLimiter(calls_per_minute=calls_per_minute)

    @classmethod
    def fromEndpointConf(cls, endpointConf) -> "SPARQL":
        """
        create a SPARQL endpoint from the given EndpointConfiguration

        Args:
            endpointConf (Endpoint): the endpoint configuration to be used
        """
        if not endpointConf:
            raise ValueError("endpointConf must be specified")
        sparql = SPARQL(
            url=endpointConf.endpoint,
            method=endpointConf.method,
            calls_per_minute=endpointConf.calls_per_minute,
        )
        if hasattr(endpointConf, "auth"):
            authMethod = None
            if endpointConf.auth == "BASIC":
                authMethod = "BASIC"
            elif endpointConf.auth == "DIGEST":
                authMethod = "DIGEST"
            sparql.addAuthentication(
                endpointConf.user, endpointConf.password, method=authMethod
            )
        return sparql

    def addAuthentication(self, username: str, password: str, method: str = "BASIC"):
        """
        Add Http Authentication credentials to the sparql wrapper
        Args:
            username: name of the user
            password: password of the user
            method: HTTP Authentication method
        """
        if method:
            self.sparql.setHTTPAuth(method)

        if username and password:
            self.sparql.setCredentials(username, password)

    def test_query(
        self,
        query: str = "SELECT * WHERE { ?s ?p ?o } LIMIT 1",
        expected_bindings: int = 1,
    ) -> Exception:
        """
        Check if the SPARQL endpoint is available using a standard SPARQL query.

        Args:
            query (str): the SPARQL query to use for testing

        Returns:
            Exception if the endpoint fails
        """
        result = None
        try:
            query_result = self.rawQuery(query, method=self.method)
            bindings = query_result.bindings
            if not len(bindings) == expected_bindings:
                raise Exception(
                    f"SPARQL query {query} returned {len(bindings)} bindings instead of {expected_bindings}"
                )
        except Exception as ex:
            result = ex
        return result

    def post_query_direct(
        self, query: str, rdf_format: str = "n3", timeout: int = 60
    ) -> str:
        """
        Fetch raw RDF response via direct HTTP POST.

        Args:
            query: SPARQL CONSTRUCT query
            rdf_format: RDF format label (e.g. 'turtle', 'rdf-xml', 'json-ld', 'n3')
            timeout: timeout in seconds (default: 60)

        Returns:
            Raw RDF content as string

        Raises:
            Exception if HTTP request fails
        """
        rdf_format = RdfFormat.by_label(rdf_format)
        mime_type = rdf_format.mime_type
        headers = {"Accept": mime_type}
        response = requests.post(
            self.url,
            data={"query": query},
            headers=headers,
            timeout=timeout,
        )
        if response.status_code != 200:
            msg = f"HTTP {response.status_code}: {response.text}"
            raise Exception(msg)
        text = response.text.strip()
        return text

    def rawQuery(self, queryString: str, method=POST):
        """
        query with the given query string

        Args:
            queryString(str): the SPARQL query to be performed
            method(str): POST or GET - POST is mandatory for update queries
        Returns:
            list: the raw query result as bindings
        """
        queryString = self.fix_comments(queryString)
        self.sparql.setQuery(queryString)
        self.sparql.method = method
        bindings = self.sparql.query()
        return bindings

    def fix_comments(self, query_string: str) -> str:
        """
        make sure broken SPARQLWrapper will find comments
        """
        if query_string is None:
            return None
        return "#\n" + query_string

    def getValue(self, sparqlQuery: str, attr: str):
        """
        get the value for the given SPARQL query using the given attr

        Args:
            sparql(SPARQL): the SPARQL endpoint to ge the value for
            sparqlQuery(str): the SPARQL query to run
            attr(str): the attribute to get
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        return self.getFirst(qLod, attr)

    def getValues(self, sparqlQuery: str, attrList: list):
        """
        get Values for the given sparlQuery and attribute list

        Args:
            sparqlQuery(str): the query which did not return any values
            attrList(list): the list of attributes
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        if not (len(qLod) == 1):
            msg = f"getValues for {attrList} failed for {qLod}"
            raise Exception(msg)
        record = qLod[0]
        values = ()
        for attr in attrList:
            if not attr in record:
                msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
                raise Exception(msg)
            recordTuple = (record[attr],)
            values += recordTuple
        return values

    def getFirst(self, qLod: list, attr: str):
        """
        get the column attr of the first row of the given qLod list

        Args:
            qLod(list): the list of dicts (returned by a query)
            attr(str): the attribute to retrieve

        Returns:
            object: the value
        """
        if len(qLod) == 1 and attr in qLod[0]:
            value = qLod[0][attr]
            return value
        raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

    def getResults(self, jsonResult):
        """
        get the result from the given jsonResult

        Args:
            jsonResult: the JSON encoded result

        Returns:
            list: the list of bindings
        """
        return jsonResult.bindings

    def insert(self, insertCommand):
        """
        run an insert

        Args:
            insertCommand(string): the SPARQL INSERT command

        Returns:
            a response
        """
        self.sparql.setRequestMethod(POSTDIRECTLY)
        response = None
        exception = None
        try:
            response = self.rawQuery(insertCommand, method=POST)
            # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
            # dummy read the body
            response.response.read()
        except Exception as ex:
            exception = ex
            if self.debug:
                print(ex)
        return response, exception

    def getLocalName(self, name):
        """
        retrieve valid localname from a string based primary key
        https://www.w3.org/TR/sparql11-query/#prefNames

        Args:
            name(string): the name to convert

        Returns:
            string: a valid local name
        """
        localName = "".join(ch for ch in name if ch.isalnum())
        return localName

    def insertListOfDicts(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        limit=None,
        batchSize=None,
        profile=False,
    ):
        """
        insert the given list of dicts mapping datatypes

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            limit(int): maximum number of records to insert
            batchSize(int): number of records to send per request

        Return:
            a list of errors which should be empty on full success

        datatype maping according to
        https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

        mapped from
        https://docs.python.org/3/library/stdtypes.html

        compare to
        https://www.w3.org/2001/sw/rdb2rdf/directGraph/
        http://www.bobdc.com/blog/json2rdf/
        https://www.w3.org/TR/json-ld11-api/#data-round-tripping
        https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
        """
        if limit is not None:
            listOfDicts = listOfDicts[:limit]
        else:
            limit = len(listOfDicts)
        total = len(listOfDicts)
        if batchSize is None:
            return self.insertListOfDictsBatch(
                listOfDicts, entityType, primaryKey, prefixes, total=total
            )
        else:
            startTime = time.time()
            errors = []
            # store the list in batches
            for i in range(0, total, batchSize):
                recordBatch = listOfDicts[i : i + batchSize]
                batchErrors = self.insertListOfDictsBatch(
                    recordBatch,
                    entityType,
                    primaryKey,
                    prefixes,
                    batchIndex=i,
                    total=total,
                    startTime=startTime,
                )
                errors.extend(batchErrors)
            if self.profile:
                print(
                    "insertListOfDicts for %9d records in %6.1f secs"
                    % (len(listOfDicts), time.time() - startTime),
                    flush=True,
                )
            return errors

    def insertListOfDictsBatch(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        title="batch",
        batchIndex=None,
        total=None,
        startTime=None,
    ):
        """
        insert a Batch part of listOfDicts

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            title(string): the title to display for the profiling (if any)
            batchIndex(int): the start index of the current batch
            total(int): the total number of records for all batches
            starttime(datetime): the start of the batch processing

        Return:
            a list of errors which should be empty on full success
        """
        errors = []
        size = len(listOfDicts)
        if batchIndex is None:
            batchIndex = 0
        batchStartTime = time.time()
        if startTime is None:
            startTime = batchStartTime
        rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
        insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
        for index, record in enumerate(listOfDicts):
            if not primaryKey in record:
                errors.append(f"missing primary key {primaryKey} in record {index}")
            else:
                primaryValue = record[primaryKey]
                if primaryValue is None:
                    errors.append(
                        f"primary key {primaryKey} value is None in record {index}"
                    )
                else:
                    encodedPrimaryValue = self.getLocalName(primaryValue)
                    tSubject = f"{entityType}__{encodedPrimaryValue}"
                    insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                    for keyValue in record.items():
                        key, value = keyValue
                        # convert key if necessary
                        key = self.getLocalName(key)
                        valueType = type(value)
                        if self.debug:
                            print("%s(%s)=%s" % (key, valueType, value))
                        tPredicate = f"{entityType}_{key}"
                        tObject = value
                        if valueType == str:
                            escapedString = self.controlEscape(value)
                            tObject = '"%s"' % escapedString
                        elif valueType == int:
                            if self.typedLiterals:
                                tObject = (
                                    '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                    % value
                                )
                            pass
                        elif valueType == float:
                            if self.typedLiterals:
                                tObject = (
                                    '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                    % value
                                )
                            pass
                        elif valueType == bool:
                            pass
                        elif valueType == datetime.date:
                            # if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                            )
                            pass
                        elif valueType == datetime.datetime:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                                % value
                            )
                            pass
                        else:
                            errors.append(
                                "can't handle type %s in record %d" % (valueType, index)
                            )
                            tObject = None
                        if tObject is not None:
                            insertRecord = "  %s %s %s.\n" % (
                                tSubject,
                                tPredicate,
                                tObject,
                            )
                            insertCommand += insertRecord
        insertCommand += "\n}"
        if self.debug:
            print(insertCommand, flush=True)
        response, ex = self.insert(insertCommand)
        if response is None and ex is not None:
            errors.append("%s for record %d" % (str(ex), index))
        if self.profile:
            print(
                "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
                % (
                    title,
                    batchIndex + 1,
                    batchIndex + size,
                    total,
                    entityType,
                    time.time() - batchStartTime,
                    time.time() - startTime,
                ),
                flush=True,
            )
        return errors

    controlChars = [chr(c) for c in range(0x20)]

    @staticmethod
    def controlEscape(s):
        """
        escape control characters

        see https://stackoverflow.com/a/9778992/1497139
        """
        escaped = "".join(
            [
                (
                    c.encode("unicode_escape").decode("ascii")
                    if c in SPARQL.controlChars
                    else c
                )
                for c in s
            ]
        )
        escaped = escaped.replace('"', '\\"')
        return escaped

    def query(self, queryString, method=POST):
        """
        get a list of results for the given query

        Args:
            queryString(string): the SPARQL query to execute
            method(string): the method eg. POST to use

        Returns:
            list: list of bindings
        """
        queryResult = self.rawQuery(queryString, method=method)
        if self.debug:
            print(queryString)
        if hasattr(queryResult, "info"):
            if "content-type" in queryResult.info():
                ct = queryResult.info()["content-type"]
                if "text/html" in ct:
                    response = queryResult.response.read().decode()
                    if not "Success" in response:
                        raise ("%s failed: %s", response)
                return None
        jsonResult = queryResult.convert()
        return self.getResults(jsonResult)

    def queryAsListOfDicts(
        self,
        queryString,
        fixNone: bool = False,
        sampleCount: int = None,
        param_dict: dict = None,
    ):
        """
        Get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

        Args:
            queryString (str): the SPARQL query to execute
            fixNone (bool): if True add None values for empty columns in Dict
            sampleCount (int): the number of samples to check
            param_dict (dict): dictionary of parameter names and values to be applied to the query

        Returns:
            list: a list of Dicts

        Raises:
            Exception: If the query requires parameters but they are not provided
        """
        params = Params(queryString)
        queryString = params.apply_parameters_with_check(param_dict)

        records = self.query(queryString, method=self.method)
        listOfDicts = self.asListOfDicts(
            records, fixNone=fixNone, sampleCount=sampleCount
        )
        return listOfDicts

    @staticmethod
    def strToDatetime(value, debug=False):
        """
        convert a string to a datetime
        Args:
            value(str): the value to convert
        Returns:
            datetime: the datetime
        """
        dateFormat = "%Y-%m-%d %H:%M:%S.%f"
        if "T" in value and "Z" in value:
            dateFormat = "%Y-%m-%dT%H:%M:%SZ"
        dt = None
        try:
            dt = datetime.datetime.strptime(value, dateFormat)
        except ValueError as ve:
            if debug:
                print(str(ve))
        return dt

    def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
        """
        convert SPARQL result back to python native

        Args:
            record(list): the list of bindings
            fixNone(bool): if True add None values for empty columns in Dict
            sampleCount(int): the number of samples to check

        Returns:
            list: a list of Dicts
        """
        resultList = []
        fields = None
        if fixNone:
            fields = LOD.getFields(records, sampleCount)
        for record in records:
            resultDict = {}
            for keyValue in record.items():
                key, value = keyValue
                datatype = value.datatype
                if datatype is not None:
                    if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                        resultValue = int(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                        resultValue = float(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                        resultValue = value.value in ["TRUE", "true"]
                    elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                        dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                        resultValue = dt.date()
                    elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                        dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                        resultValue = dt
                    else:
                        # unsupported datatype
                        resultValue = value.value
                else:
                    resultValue = value.value
                resultDict[key] = resultValue
            if fixNone:
                for field in fields:
                    if not field in resultDict:
                        resultDict[field] = None
            resultList.append(resultDict)
        return resultList

    def printErrors(self, errors):
        """
        print the given list of errors

        Args:
            errors(list): a list of error strings

        Returns:
            boolean: True if the list is empty else false
        """
        if len(errors) > 0:
            print("ERRORS:")
            for error in errors:
                print(error, flush=True, file=stderr)
            return True
        else:
            return False

__init__(url, mode='query', debug=False, isFuseki=False, typedLiterals=False, profile=False, agent='PyLodStorage', method='POST', calls_per_minute=None)

Construct a SPARQL wrapper

Parameters:

Name Type Description Default
url string

the base URL of the endpoint - the mode query/update is going to be appended

required
mode string

'query' or 'update'

'query'
debug bool

True if debugging is to be activated

False
typedLiterals bool

True if INSERT should be done with typedLiterals

False
profile boolean

True if profiling / timing information should be displayed

False
agent string

the User agent to use

'PyLodStorage'
method string

the HTTP method to be used 'POST' or 'GET'

'POST'
Source code in lodstorage/sparql.py
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def __init__(
    self,
    url,
    mode="query",
    debug=False,
    isFuseki=False,
    typedLiterals=False,
    profile=False,
    agent="PyLodStorage",
    method="POST",
    calls_per_minute: int = None,
):
    """
    Construct a SPARQL wrapper

    Args:
        url (string): the base URL of the endpoint - the mode query/update is going to be appended
        mode (string): 'query' or 'update'
        debug (bool): True if debugging is to be activated
        typedLiterals (bool): True if INSERT should be done with typedLiterals
        profile (boolean): True if profiling / timing information should be displayed
        agent (string): the User agent to use
        method (string): the HTTP method to be used 'POST' or 'GET'
    """
    if isFuseki:
        self.url = f"{url}/{mode}"
    else:
        self.url = url
    self.mode = mode
    self.debug = debug
    self.typedLiterals = typedLiterals
    self.profile = profile
    self.sparql = SPARQLWrapper2(url)
    self.method = method
    self.sparql.agent = agent
    self.rate_limiter = RateLimiter(calls_per_minute=calls_per_minute)

addAuthentication(username, password, method='BASIC')

Add Http Authentication credentials to the sparql wrapper Args: username: name of the user password: password of the user method: HTTP Authentication method

Source code in lodstorage/sparql.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def addAuthentication(self, username: str, password: str, method: str = "BASIC"):
    """
    Add Http Authentication credentials to the sparql wrapper
    Args:
        username: name of the user
        password: password of the user
        method: HTTP Authentication method
    """
    if method:
        self.sparql.setHTTPAuth(method)

    if username and password:
        self.sparql.setCredentials(username, password)

asListOfDicts(records, fixNone=False, sampleCount=None)

convert SPARQL result back to python native

Parameters:

Name Type Description Default
record(list)

the list of bindings

required
fixNone(bool)

if True add None values for empty columns in Dict

required
sampleCount(int)

the number of samples to check

required

Returns:

Name Type Description
list

a list of Dicts

Source code in lodstorage/sparql.py
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
    """
    convert SPARQL result back to python native

    Args:
        record(list): the list of bindings
        fixNone(bool): if True add None values for empty columns in Dict
        sampleCount(int): the number of samples to check

    Returns:
        list: a list of Dicts
    """
    resultList = []
    fields = None
    if fixNone:
        fields = LOD.getFields(records, sampleCount)
    for record in records:
        resultDict = {}
        for keyValue in record.items():
            key, value = keyValue
            datatype = value.datatype
            if datatype is not None:
                if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                    resultValue = int(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                    resultValue = float(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                    resultValue = value.value in ["TRUE", "true"]
                elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                    dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                    resultValue = dt.date()
                elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                    dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                    resultValue = dt
                else:
                    # unsupported datatype
                    resultValue = value.value
            else:
                resultValue = value.value
            resultDict[key] = resultValue
        if fixNone:
            for field in fields:
                if not field in resultDict:
                    resultDict[field] = None
        resultList.append(resultDict)
    return resultList

controlEscape(s) staticmethod

escape control characters

see https://stackoverflow.com/a/9778992/1497139

Source code in lodstorage/sparql.py
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
@staticmethod
def controlEscape(s):
    """
    escape control characters

    see https://stackoverflow.com/a/9778992/1497139
    """
    escaped = "".join(
        [
            (
                c.encode("unicode_escape").decode("ascii")
                if c in SPARQL.controlChars
                else c
            )
            for c in s
        ]
    )
    escaped = escaped.replace('"', '\\"')
    return escaped

fix_comments(query_string)

make sure broken SPARQLWrapper will find comments

Source code in lodstorage/sparql.py
186
187
188
189
190
191
192
def fix_comments(self, query_string: str) -> str:
    """
    make sure broken SPARQLWrapper will find comments
    """
    if query_string is None:
        return None
    return "#\n" + query_string

fromEndpointConf(endpointConf) classmethod

create a SPARQL endpoint from the given EndpointConfiguration

Parameters:

Name Type Description Default
endpointConf Endpoint

the endpoint configuration to be used

required
Source code in lodstorage/sparql.py
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
@classmethod
def fromEndpointConf(cls, endpointConf) -> "SPARQL":
    """
    create a SPARQL endpoint from the given EndpointConfiguration

    Args:
        endpointConf (Endpoint): the endpoint configuration to be used
    """
    if not endpointConf:
        raise ValueError("endpointConf must be specified")
    sparql = SPARQL(
        url=endpointConf.endpoint,
        method=endpointConf.method,
        calls_per_minute=endpointConf.calls_per_minute,
    )
    if hasattr(endpointConf, "auth"):
        authMethod = None
        if endpointConf.auth == "BASIC":
            authMethod = "BASIC"
        elif endpointConf.auth == "DIGEST":
            authMethod = "DIGEST"
        sparql.addAuthentication(
            endpointConf.user, endpointConf.password, method=authMethod
        )
    return sparql

getFirst(qLod, attr)

get the column attr of the first row of the given qLod list

Parameters:

Name Type Description Default
qLod(list)

the list of dicts (returned by a query)

required
attr(str)

the attribute to retrieve

required

Returns:

Name Type Description
object

the value

Source code in lodstorage/sparql.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
def getFirst(self, qLod: list, attr: str):
    """
    get the column attr of the first row of the given qLod list

    Args:
        qLod(list): the list of dicts (returned by a query)
        attr(str): the attribute to retrieve

    Returns:
        object: the value
    """
    if len(qLod) == 1 and attr in qLod[0]:
        value = qLod[0][attr]
        return value
    raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

getLocalName(name)

retrieve valid localname from a string based primary key https://www.w3.org/TR/sparql11-query/#prefNames

Parameters:

Name Type Description Default
name(string)

the name to convert

required

Returns:

Name Type Description
string

a valid local name

Source code in lodstorage/sparql.py
284
285
286
287
288
289
290
291
292
293
294
295
296
def getLocalName(self, name):
    """
    retrieve valid localname from a string based primary key
    https://www.w3.org/TR/sparql11-query/#prefNames

    Args:
        name(string): the name to convert

    Returns:
        string: a valid local name
    """
    localName = "".join(ch for ch in name if ch.isalnum())
    return localName

getResults(jsonResult)

get the result from the given jsonResult

Parameters:

Name Type Description Default
jsonResult

the JSON encoded result

required

Returns:

Name Type Description
list

the list of bindings

Source code in lodstorage/sparql.py
248
249
250
251
252
253
254
255
256
257
258
def getResults(self, jsonResult):
    """
    get the result from the given jsonResult

    Args:
        jsonResult: the JSON encoded result

    Returns:
        list: the list of bindings
    """
    return jsonResult.bindings

getValue(sparqlQuery, attr)

get the value for the given SPARQL query using the given attr

Parameters:

Name Type Description Default
sparql(SPARQL)

the SPARQL endpoint to ge the value for

required
sparqlQuery(str)

the SPARQL query to run

required
attr(str)

the attribute to get

required
Source code in lodstorage/sparql.py
194
195
196
197
198
199
200
201
202
203
204
205
206
def getValue(self, sparqlQuery: str, attr: str):
    """
    get the value for the given SPARQL query using the given attr

    Args:
        sparql(SPARQL): the SPARQL endpoint to ge the value for
        sparqlQuery(str): the SPARQL query to run
        attr(str): the attribute to get
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    return self.getFirst(qLod, attr)

getValues(sparqlQuery, attrList)

get Values for the given sparlQuery and attribute list

Parameters:

Name Type Description Default
sparqlQuery(str)

the query which did not return any values

required
attrList(list)

the list of attributes

required
Source code in lodstorage/sparql.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def getValues(self, sparqlQuery: str, attrList: list):
    """
    get Values for the given sparlQuery and attribute list

    Args:
        sparqlQuery(str): the query which did not return any values
        attrList(list): the list of attributes
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    if not (len(qLod) == 1):
        msg = f"getValues for {attrList} failed for {qLod}"
        raise Exception(msg)
    record = qLod[0]
    values = ()
    for attr in attrList:
        if not attr in record:
            msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
            raise Exception(msg)
        recordTuple = (record[attr],)
        values += recordTuple
    return values

insert(insertCommand)

run an insert

Parameters:

Name Type Description Default
insertCommand(string)

the SPARQL INSERT command

required

Returns:

Type Description

a response

Source code in lodstorage/sparql.py
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
def insert(self, insertCommand):
    """
    run an insert

    Args:
        insertCommand(string): the SPARQL INSERT command

    Returns:
        a response
    """
    self.sparql.setRequestMethod(POSTDIRECTLY)
    response = None
    exception = None
    try:
        response = self.rawQuery(insertCommand, method=POST)
        # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
        # dummy read the body
        response.response.read()
    except Exception as ex:
        exception = ex
        if self.debug:
            print(ex)
    return response, exception

insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes, limit=None, batchSize=None, profile=False)

insert the given list of dicts mapping datatypes

Parameters:

Name Type Description Default
entityType(string)

the entityType to use as a

required
primaryKey(string)

the name of the primary key attribute to use

required
prefix(string)

any PREFIX statements to be used

required
limit(int)

maximum number of records to insert

required
batchSize(int)

number of records to send per request

required
Return

a list of errors which should be empty on full success

datatype maping according to https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

mapped from https://docs.python.org/3/library/stdtypes.html

compare to https://www.w3.org/2001/sw/rdb2rdf/directGraph/ http://www.bobdc.com/blog/json2rdf/ https://www.w3.org/TR/json-ld11-api/#data-round-tripping https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python

Source code in lodstorage/sparql.py
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
def insertListOfDicts(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    limit=None,
    batchSize=None,
    profile=False,
):
    """
    insert the given list of dicts mapping datatypes

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        limit(int): maximum number of records to insert
        batchSize(int): number of records to send per request

    Return:
        a list of errors which should be empty on full success

    datatype maping according to
    https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

    mapped from
    https://docs.python.org/3/library/stdtypes.html

    compare to
    https://www.w3.org/2001/sw/rdb2rdf/directGraph/
    http://www.bobdc.com/blog/json2rdf/
    https://www.w3.org/TR/json-ld11-api/#data-round-tripping
    https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
    """
    if limit is not None:
        listOfDicts = listOfDicts[:limit]
    else:
        limit = len(listOfDicts)
    total = len(listOfDicts)
    if batchSize is None:
        return self.insertListOfDictsBatch(
            listOfDicts, entityType, primaryKey, prefixes, total=total
        )
    else:
        startTime = time.time()
        errors = []
        # store the list in batches
        for i in range(0, total, batchSize):
            recordBatch = listOfDicts[i : i + batchSize]
            batchErrors = self.insertListOfDictsBatch(
                recordBatch,
                entityType,
                primaryKey,
                prefixes,
                batchIndex=i,
                total=total,
                startTime=startTime,
            )
            errors.extend(batchErrors)
        if self.profile:
            print(
                "insertListOfDicts for %9d records in %6.1f secs"
                % (len(listOfDicts), time.time() - startTime),
                flush=True,
            )
        return errors

insertListOfDictsBatch(listOfDicts, entityType, primaryKey, prefixes, title='batch', batchIndex=None, total=None, startTime=None)

insert a Batch part of listOfDicts

Parameters:

Name Type Description Default
entityType(string)

the entityType to use as a

required
primaryKey(string)

the name of the primary key attribute to use

required
prefix(string)

any PREFIX statements to be used

required
title(string)

the title to display for the profiling (if any)

required
batchIndex(int)

the start index of the current batch

required
total(int)

the total number of records for all batches

required
starttime(datetime)

the start of the batch processing

required
Return

a list of errors which should be empty on full success

Source code in lodstorage/sparql.py
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
def insertListOfDictsBatch(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    title="batch",
    batchIndex=None,
    total=None,
    startTime=None,
):
    """
    insert a Batch part of listOfDicts

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        title(string): the title to display for the profiling (if any)
        batchIndex(int): the start index of the current batch
        total(int): the total number of records for all batches
        starttime(datetime): the start of the batch processing

    Return:
        a list of errors which should be empty on full success
    """
    errors = []
    size = len(listOfDicts)
    if batchIndex is None:
        batchIndex = 0
    batchStartTime = time.time()
    if startTime is None:
        startTime = batchStartTime
    rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
    insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
    for index, record in enumerate(listOfDicts):
        if not primaryKey in record:
            errors.append(f"missing primary key {primaryKey} in record {index}")
        else:
            primaryValue = record[primaryKey]
            if primaryValue is None:
                errors.append(
                    f"primary key {primaryKey} value is None in record {index}"
                )
            else:
                encodedPrimaryValue = self.getLocalName(primaryValue)
                tSubject = f"{entityType}__{encodedPrimaryValue}"
                insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                for keyValue in record.items():
                    key, value = keyValue
                    # convert key if necessary
                    key = self.getLocalName(key)
                    valueType = type(value)
                    if self.debug:
                        print("%s(%s)=%s" % (key, valueType, value))
                    tPredicate = f"{entityType}_{key}"
                    tObject = value
                    if valueType == str:
                        escapedString = self.controlEscape(value)
                        tObject = '"%s"' % escapedString
                    elif valueType == int:
                        if self.typedLiterals:
                            tObject = (
                                '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                % value
                            )
                        pass
                    elif valueType == float:
                        if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                % value
                            )
                        pass
                    elif valueType == bool:
                        pass
                    elif valueType == datetime.date:
                        # if self.typedLiterals:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                        )
                        pass
                    elif valueType == datetime.datetime:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                            % value
                        )
                        pass
                    else:
                        errors.append(
                            "can't handle type %s in record %d" % (valueType, index)
                        )
                        tObject = None
                    if tObject is not None:
                        insertRecord = "  %s %s %s.\n" % (
                            tSubject,
                            tPredicate,
                            tObject,
                        )
                        insertCommand += insertRecord
    insertCommand += "\n}"
    if self.debug:
        print(insertCommand, flush=True)
    response, ex = self.insert(insertCommand)
    if response is None and ex is not None:
        errors.append("%s for record %d" % (str(ex), index))
    if self.profile:
        print(
            "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
            % (
                title,
                batchIndex + 1,
                batchIndex + size,
                total,
                entityType,
                time.time() - batchStartTime,
                time.time() - startTime,
            ),
            flush=True,
        )
    return errors

post_query_direct(query, rdf_format='n3', timeout=60)

Fetch raw RDF response via direct HTTP POST.

Parameters:

Name Type Description Default
query str

SPARQL CONSTRUCT query

required
rdf_format str

RDF format label (e.g. 'turtle', 'rdf-xml', 'json-ld', 'n3')

'n3'
timeout int

timeout in seconds (default: 60)

60

Returns:

Type Description
str

Raw RDF content as string

Source code in lodstorage/sparql.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def post_query_direct(
    self, query: str, rdf_format: str = "n3", timeout: int = 60
) -> str:
    """
    Fetch raw RDF response via direct HTTP POST.

    Args:
        query: SPARQL CONSTRUCT query
        rdf_format: RDF format label (e.g. 'turtle', 'rdf-xml', 'json-ld', 'n3')
        timeout: timeout in seconds (default: 60)

    Returns:
        Raw RDF content as string

    Raises:
        Exception if HTTP request fails
    """
    rdf_format = RdfFormat.by_label(rdf_format)
    mime_type = rdf_format.mime_type
    headers = {"Accept": mime_type}
    response = requests.post(
        self.url,
        data={"query": query},
        headers=headers,
        timeout=timeout,
    )
    if response.status_code != 200:
        msg = f"HTTP {response.status_code}: {response.text}"
        raise Exception(msg)
    text = response.text.strip()
    return text

printErrors(errors)

print the given list of errors

Parameters:

Name Type Description Default
errors(list)

a list of error strings

required

Returns:

Name Type Description
boolean

True if the list is empty else false

Source code in lodstorage/sparql.py
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
def printErrors(self, errors):
    """
    print the given list of errors

    Args:
        errors(list): a list of error strings

    Returns:
        boolean: True if the list is empty else false
    """
    if len(errors) > 0:
        print("ERRORS:")
        for error in errors:
            print(error, flush=True, file=stderr)
        return True
    else:
        return False

query(queryString, method=POST)

get a list of results for the given query

Parameters:

Name Type Description Default
queryString(string)

the SPARQL query to execute

required
method(string)

the method eg. POST to use

required

Returns:

Name Type Description
list

list of bindings

Source code in lodstorage/sparql.py
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
def query(self, queryString, method=POST):
    """
    get a list of results for the given query

    Args:
        queryString(string): the SPARQL query to execute
        method(string): the method eg. POST to use

    Returns:
        list: list of bindings
    """
    queryResult = self.rawQuery(queryString, method=method)
    if self.debug:
        print(queryString)
    if hasattr(queryResult, "info"):
        if "content-type" in queryResult.info():
            ct = queryResult.info()["content-type"]
            if "text/html" in ct:
                response = queryResult.response.read().decode()
                if not "Success" in response:
                    raise ("%s failed: %s", response)
            return None
    jsonResult = queryResult.convert()
    return self.getResults(jsonResult)

queryAsListOfDicts(queryString, fixNone=False, sampleCount=None, param_dict=None)

Get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

Parameters:

Name Type Description Default
queryString str

the SPARQL query to execute

required
fixNone bool

if True add None values for empty columns in Dict

False
sampleCount int

the number of samples to check

None
param_dict dict

dictionary of parameter names and values to be applied to the query

None

Returns:

Name Type Description
list

a list of Dicts

Raises:

Type Description
Exception

If the query requires parameters but they are not provided

Source code in lodstorage/sparql.py
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
def queryAsListOfDicts(
    self,
    queryString,
    fixNone: bool = False,
    sampleCount: int = None,
    param_dict: dict = None,
):
    """
    Get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

    Args:
        queryString (str): the SPARQL query to execute
        fixNone (bool): if True add None values for empty columns in Dict
        sampleCount (int): the number of samples to check
        param_dict (dict): dictionary of parameter names and values to be applied to the query

    Returns:
        list: a list of Dicts

    Raises:
        Exception: If the query requires parameters but they are not provided
    """
    params = Params(queryString)
    queryString = params.apply_parameters_with_check(param_dict)

    records = self.query(queryString, method=self.method)
    listOfDicts = self.asListOfDicts(
        records, fixNone=fixNone, sampleCount=sampleCount
    )
    return listOfDicts

rawQuery(queryString, method=POST)

query with the given query string

Parameters:

Name Type Description Default
queryString(str)

the SPARQL query to be performed

required
method(str)

POST or GET - POST is mandatory for update queries

required

Returns: list: the raw query result as bindings

Source code in lodstorage/sparql.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
def rawQuery(self, queryString: str, method=POST):
    """
    query with the given query string

    Args:
        queryString(str): the SPARQL query to be performed
        method(str): POST or GET - POST is mandatory for update queries
    Returns:
        list: the raw query result as bindings
    """
    queryString = self.fix_comments(queryString)
    self.sparql.setQuery(queryString)
    self.sparql.method = method
    bindings = self.sparql.query()
    return bindings

strToDatetime(value, debug=False) staticmethod

convert a string to a datetime Args: value(str): the value to convert Returns: datetime: the datetime

Source code in lodstorage/sparql.py
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
@staticmethod
def strToDatetime(value, debug=False):
    """
    convert a string to a datetime
    Args:
        value(str): the value to convert
    Returns:
        datetime: the datetime
    """
    dateFormat = "%Y-%m-%d %H:%M:%S.%f"
    if "T" in value and "Z" in value:
        dateFormat = "%Y-%m-%dT%H:%M:%SZ"
    dt = None
    try:
        dt = datetime.datetime.strptime(value, dateFormat)
    except ValueError as ve:
        if debug:
            print(str(ve))
    return dt

test_query(query='SELECT * WHERE { ?s ?p ?o } LIMIT 1', expected_bindings=1)

Check if the SPARQL endpoint is available using a standard SPARQL query.

Parameters:

Name Type Description Default
query str

the SPARQL query to use for testing

'SELECT * WHERE { ?s ?p ?o } LIMIT 1'

Returns:

Type Description
Exception

Exception if the endpoint fails

Source code in lodstorage/sparql.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def test_query(
    self,
    query: str = "SELECT * WHERE { ?s ?p ?o } LIMIT 1",
    expected_bindings: int = 1,
) -> Exception:
    """
    Check if the SPARQL endpoint is available using a standard SPARQL query.

    Args:
        query (str): the SPARQL query to use for testing

    Returns:
        Exception if the endpoint fails
    """
    result = None
    try:
        query_result = self.rawQuery(query, method=self.method)
        bindings = query_result.bindings
        if not len(bindings) == expected_bindings:
            raise Exception(
                f"SPARQL query {query} returned {len(bindings)} bindings instead of {expected_bindings}"
            )
    except Exception as ex:
        result = ex
    return result

sql

Created on 2020-08-24

@author: wf

EntityInfo

Bases: object

holds entity meta Info

:ivar name(string): entity name = table name

:ivar primaryKey(string): the name of the primary key column

:ivar typeMap(dict): maps column names to python types

:ivar debug(boolean): True if debug information should be shown

Source code in lodstorage/sql.py
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
class EntityInfo(object):
    """
    holds entity meta Info

    :ivar name(string): entity name = table name

    :ivar primaryKey(string): the name of the primary key column

    :ivar typeMap(dict): maps column names to python types

    :ivar debug(boolean): True if debug information should be shown

    """

    def __init__(self, sampleRecords, name, primaryKey=None, quiet=False, debug=False):
        """
        construct me from the given name and primary key

        Args:
           sampleRecords:a list of sample record dicts
           name(string): the name of the entity
           primaryKey(string): the name of the primary key column
           quite(boolean): if True suppress all print messages
           debug(boolean): True if debug information should be shown
        """
        self.sampleRecords = sampleRecords
        self.name = name
        self.primaryKey = primaryKey
        self.quiet = quiet
        self.debug = debug
        self.typeMap = {}
        self.sqlTypeMap = {}
        self.createTableCmd = self.getCreateTableCmd(sampleRecords)
        self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
        self.insertCmd = self.getInsertCmd()

    def getCreateTableCmd(self, sampleRecords):
        """
        get the CREATE TABLE DDL command for the given sample records

        Args:
            sampleRecords(list): a list of Dicts of sample Records

        Returns:
            string: CREATE TABLE DDL command for this entity info

        Example:

        .. code-block:: sql

            CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

        """
        ddlCmd = "CREATE TABLE %s(" % self.name
        delim = ""
        for sampleRecord in sampleRecords:
            for key, value in sampleRecord.items():
                sqlType = None
                valueType = None
                if value is None:
                    if len(sampleRecords) == 1:
                        msg = f"Warning sampleRecord column {key} is None - using TEXT as type"
                        if not self.quiet:
                            print(msg)
                        valueType = str
                else:
                    valueType = type(value)
                if valueType == str:
                    sqlType = "TEXT"
                elif valueType == int:
                    sqlType = "INTEGER"
                elif valueType == float:
                    sqlType = "FLOAT"
                elif valueType == bool:
                    sqlType = "BOOLEAN"
                elif valueType == datetime.date:
                    sqlType = "DATE"
                elif valueType == datetime.datetime:
                    sqlType = "TIMESTAMP"
                else:
                    msg = f"warning: unsupported type {valueType} for column {key}"
                    if not self.quiet:
                        print(msg)
                if sqlType is not None and valueType is not None:
                    self.addType(key, valueType, sqlType)
        for key, sqlType in self.sqlTypeMap.items():
            is_primary = " PRIMARY KEY" if key == self.primaryKey else ""
            ddl_col = f"{delim}{key} {sqlType}{is_primary}"
            ddlCmd += ddl_col
            delim = ","
        ddlCmd += ")"
        if self.debug and not self.quiet:
            print(ddlCmd)
        return ddlCmd

    def getInsertCmd(self, replace: bool = False) -> str:
        """
        get the INSERT command for this entityInfo

        Args:
             replace(bool): if True allow replace for insert

        Returns:
            str: the INSERT INTO SQL command for his entityInfo e.g.

        Example:

        .. code-block:: sql

            INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

        """
        columns = ",".join(self.typeMap.keys())
        placeholders = ":" + ",:".join(self.typeMap.keys())
        replaceClause = " OR REPLACE" if replace else ""
        insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
        if self.debug and not self.quiet:
            print(insertCmd)
        return insertCmd

    def addType(self, column, valueType, sqlType):
        """
        add the python type for the given column to the typeMap

        Args:
           column(string): the name of the column

           valueType(type): the python type of the column
        """
        if not column in self.typeMap:
            self.typeMap[column] = valueType
            self.sqlTypeMap[column] = sqlType

    def fixDates(self, resultList):
        """
        fix date entries in the given resultList by parsing the date content e.g.
        converting '1926-04-21' back to datetime.date(1926, 4, 21)

        Args:
            resultList(list): the list of records to be fixed
        """
        for record in resultList:
            for key, valueType in self.typeMap.items():
                if valueType == datetime.date:
                    dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                    dateValue = dt.date()
                    record[key] = dateValue

__init__(sampleRecords, name, primaryKey=None, quiet=False, debug=False)

construct me from the given name and primary key

Parameters:

Name Type Description Default
sampleRecords

a list of sample record dicts

required
name(string)

the name of the entity

required
primaryKey(string)

the name of the primary key column

required
quite(boolean)

if True suppress all print messages

required
debug(boolean)

True if debug information should be shown

required
Source code in lodstorage/sql.py
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
def __init__(self, sampleRecords, name, primaryKey=None, quiet=False, debug=False):
    """
    construct me from the given name and primary key

    Args:
       sampleRecords:a list of sample record dicts
       name(string): the name of the entity
       primaryKey(string): the name of the primary key column
       quite(boolean): if True suppress all print messages
       debug(boolean): True if debug information should be shown
    """
    self.sampleRecords = sampleRecords
    self.name = name
    self.primaryKey = primaryKey
    self.quiet = quiet
    self.debug = debug
    self.typeMap = {}
    self.sqlTypeMap = {}
    self.createTableCmd = self.getCreateTableCmd(sampleRecords)
    self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
    self.insertCmd = self.getInsertCmd()

addType(column, valueType, sqlType)

add the python type for the given column to the typeMap

Parameters:

Name Type Description Default
column(string)

the name of the column

required
valueType(type)

the python type of the column

required
Source code in lodstorage/sql.py
625
626
627
628
629
630
631
632
633
634
635
636
def addType(self, column, valueType, sqlType):
    """
    add the python type for the given column to the typeMap

    Args:
       column(string): the name of the column

       valueType(type): the python type of the column
    """
    if not column in self.typeMap:
        self.typeMap[column] = valueType
        self.sqlTypeMap[column] = sqlType

fixDates(resultList)

fix date entries in the given resultList by parsing the date content e.g. converting '1926-04-21' back to datetime.date(1926, 4, 21)

Parameters:

Name Type Description Default
resultList(list)

the list of records to be fixed

required
Source code in lodstorage/sql.py
638
639
640
641
642
643
644
645
646
647
648
649
650
651
def fixDates(self, resultList):
    """
    fix date entries in the given resultList by parsing the date content e.g.
    converting '1926-04-21' back to datetime.date(1926, 4, 21)

    Args:
        resultList(list): the list of records to be fixed
    """
    for record in resultList:
        for key, valueType in self.typeMap.items():
            if valueType == datetime.date:
                dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                dateValue = dt.date()
                record[key] = dateValue

getCreateTableCmd(sampleRecords)

get the CREATE TABLE DDL command for the given sample records

Parameters:

Name Type Description Default
sampleRecords(list)

a list of Dicts of sample Records

required

Returns:

Name Type Description
string

CREATE TABLE DDL command for this entity info

Example:

.. code-block:: sql

CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)
Source code in lodstorage/sql.py
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
def getCreateTableCmd(self, sampleRecords):
    """
    get the CREATE TABLE DDL command for the given sample records

    Args:
        sampleRecords(list): a list of Dicts of sample Records

    Returns:
        string: CREATE TABLE DDL command for this entity info

    Example:

    .. code-block:: sql

        CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

    """
    ddlCmd = "CREATE TABLE %s(" % self.name
    delim = ""
    for sampleRecord in sampleRecords:
        for key, value in sampleRecord.items():
            sqlType = None
            valueType = None
            if value is None:
                if len(sampleRecords) == 1:
                    msg = f"Warning sampleRecord column {key} is None - using TEXT as type"
                    if not self.quiet:
                        print(msg)
                    valueType = str
            else:
                valueType = type(value)
            if valueType == str:
                sqlType = "TEXT"
            elif valueType == int:
                sqlType = "INTEGER"
            elif valueType == float:
                sqlType = "FLOAT"
            elif valueType == bool:
                sqlType = "BOOLEAN"
            elif valueType == datetime.date:
                sqlType = "DATE"
            elif valueType == datetime.datetime:
                sqlType = "TIMESTAMP"
            else:
                msg = f"warning: unsupported type {valueType} for column {key}"
                if not self.quiet:
                    print(msg)
            if sqlType is not None and valueType is not None:
                self.addType(key, valueType, sqlType)
    for key, sqlType in self.sqlTypeMap.items():
        is_primary = " PRIMARY KEY" if key == self.primaryKey else ""
        ddl_col = f"{delim}{key} {sqlType}{is_primary}"
        ddlCmd += ddl_col
        delim = ","
    ddlCmd += ")"
    if self.debug and not self.quiet:
        print(ddlCmd)
    return ddlCmd

getInsertCmd(replace=False)

get the INSERT command for this entityInfo

Parameters:

Name Type Description Default
replace(bool)

if True allow replace for insert

required

Returns:

Name Type Description
str str

the INSERT INTO SQL command for his entityInfo e.g.

Example:

.. code-block:: sql

INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).
Source code in lodstorage/sql.py
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
def getInsertCmd(self, replace: bool = False) -> str:
    """
    get the INSERT command for this entityInfo

    Args:
         replace(bool): if True allow replace for insert

    Returns:
        str: the INSERT INTO SQL command for his entityInfo e.g.

    Example:

    .. code-block:: sql

        INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

    """
    columns = ",".join(self.typeMap.keys())
    placeholders = ":" + ",:".join(self.typeMap.keys())
    replaceClause = " OR REPLACE" if replace else ""
    insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
    if self.debug and not self.quiet:
        print(insertCmd)
    return insertCmd

SQLDB

Bases: object

Structured Query Language Database wrapper

:ivar dbname(string): name of the database :ivar debug(boolean): True if debug info should be provided :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
class SQLDB(object):
    """
    Structured Query Language Database wrapper

    :ivar dbname(string): name of the database
    :ivar debug(boolean): True if debug info should be provided
    :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """

    RAM = ":memory:"

    def __init__(
        self,
        dbname: str = ":memory:",
        connection=None,
        check_same_thread=True,
        timeout=5,
        debug=False,
        errorDebug=False,
    ):
        """
        Construct me for the given dbname and debug

        Args:

           dbname(string): name of the database - default is a RAM based database
           connection(Connection): an optional connection to be reused
           check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
           timeout(float): number of seconds for connection timeout
           debug(boolean): if True switch on debug
           errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        self.dbname = dbname
        self.debug = debug
        self.errorDebug = errorDebug
        SQLiteApiFixer.install(lenient=debug)
        if connection is None:
            self.c = sqlite3.connect(
                dbname,
                detect_types=sqlite3.PARSE_DECLTYPES,
                check_same_thread=check_same_thread,
                timeout=timeout,
            )
        else:
            self.c = connection

    def logError(self, msg):
        """
        log the given error message to stderr

        Args:
            msg(str): the error messsage to display
        """
        print(msg, file=sys.stderr, flush=True)

    def close(self):
        """close my connection"""
        self.c.close()

    def execute(self, ddlCmd):
        """
        execute the given Data Definition Command

        Args:
            ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
        """
        self.c.execute(ddlCmd)

    def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
        """
        Create a table based on the provided EntityInfo.

        Args:
            entityInfo (EntityInfo): The EntityInfo object containing table metadata.
            withDrop (bool): If True, drop the existing table before creation.
            withCreate (bool): If True, execute the CREATE TABLE command.

        Returns:
            EntityInfo: The provided EntityInfo object.
        """
        if withDrop:
            self.c.execute(entityInfo.dropTableCmd)
        if withCreate:
            try:
                self.c.execute(entityInfo.createTableCmd)
            except sqlite3.OperationalError as oe:
                raise Exception(
                    f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
                )
        return entityInfo

    def createTable(
        self,
        listOfRecords,
        entityName: str,
        primaryKey: str = None,
        withCreate: bool = True,
        withDrop: bool = False,
        sampleRecordCount=1,
        failIfTooFew=True,
    ):
        """
        Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
        as defining sample record and execute DDL command.

        Args:
            listOfRecords (list): A list of Dicts.
            entityName (str): The entity / table name to use.
            primaryKey (str): The key/column to use as a primary key.
            withDrop (bool): True if the existing Table should be dropped.
            withCreate (bool): True if the create Table command should be executed.
            sampleRecordCount (int): Number of sample records expected and to be inspected.
            failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

        Returns:
            EntityInfo: Meta data information for the created table.
        """
        l = len(listOfRecords)
        if sampleRecordCount < 0:
            sampleRecordCount = l
        if l < sampleRecordCount:
            msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
            if failIfTooFew:
                raise Exception(msg)
            elif self.debug:
                self.logError(msg)

        sampleRecords = listOfRecords[:sampleRecordCount]
        entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

        return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

    def getDebugInfo(self, record, index, executeMany):
        """
        get the debug info for the given record at the given index depending on the state of executeMany

        Args:
            record(dict): the record to show
            index(int): the index of the record
            executeMany(boolean): if True the record may be valid else not
        """
        debugInfo = ""
        if not executeMany:
            # shall we shoe the details of the record (which might be a security risk)
            if self.errorDebug:
                # show details of record
                debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
            else:
                # show only index
                debugInfo = "\nrecord #%d" % index
        return debugInfo

    def store(
        self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
    ):
        """
        store the given list of records based on the given entityInfo

        Args:

           listOfRecords(list): the list of Dicts to be stored
           entityInfo(EntityInfo): the meta data to be used for storing
           executeMany(bool): if True the insert command is done with many/all records at once
           fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
           replace(bool): if True allow replace for insert
        """
        insertCmd = entityInfo.getInsertCmd(replace=replace)
        record = None
        index = 0
        try:
            if executeMany:
                if fixNone:
                    LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
                self.c.executemany(insertCmd, listOfRecords)
            else:
                for record in listOfRecords:
                    index += 1
                    if fixNone:
                        LOD.setNone(record, entityInfo.typeMap.keys())
                    self.c.execute(insertCmd, record)
            self.c.commit()
        except sqlite3.ProgrammingError as pe:
            msg = pe.args[0]
            if "You did not supply a value for binding" in msg:
                if ":" in msg:
                    # sqlite now returns the parameter name not the number
                    # You did not supply a value for binding parameter :type.
                    columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                    columnName = columnName.replace(":", "")
                else:
                    # pre python 3.10
                    # You did not supply a value for binding 2.
                    columnIndex = int(re.findall(r"\d+", msg)[0])
                    columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: no value supplied for column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise pe
        except sqlite3.InterfaceError as ie:
            msg = ie.args[0]
            if "Error binding parameter" in msg:
                columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: error binding column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise ie
        except Exception as ex:
            debugInfo = self.getDebugInfo(record, index, executeMany)
            msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
            raise Exception(msg)

    def queryGen(self, sqlQuery, params=None):
        """
        run the given sqlQuery a a generator for dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            a generator of dicts
        """
        if self.debug:
            print(sqlQuery)
            if params is not None:
                print(params)
        # https://stackoverflow.com/a/13735506/1497139
        cur = self.c.cursor()
        if params is not None:
            query = cur.execute(sqlQuery, params)
        else:
            query = cur.execute(sqlQuery)
        colname = [d[0] for d in query.description]
        try:
            # loop over all rows
            for row in query:
                record = dict(zip(colname, row))
                yield record
        except Exception as ex:
            msg = str(ex)
            self.logError(msg)
            pass
        cur.close()

    def query(self, sqlQuery, params=None):
        """
        run the given sqlQuery and return a list of Dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            list: a list of Dicts
        """
        resultList = []
        for record in self.queryGen(sqlQuery, params):
            resultList.append(record)
        return resultList

    def queryAll(self, entityInfo, fixDates=True):
        """
        query all records for the given entityName/tableName

        Args:
           entityName(string): name of the entity/table to qury
           fixDates(boolean): True if date entries should be returned as such and not as strings
        """
        sqlQuery = "SELECT * FROM %s" % entityInfo.name
        resultList = self.query(sqlQuery)
        if fixDates:
            entityInfo.fixDates(resultList)
        return resultList

    def getTableList(self, tableType="table"):
        """
        get the schema information from this database

        Args:
            tableType(str): table or view

        Return:
            list: a list as derived from PRAGMA table_info
        """
        tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
        tableList = self.query(tableQuery)
        for table in tableList:
            tableName = table["name"]
            columnQuery = f"PRAGMA table_info('{tableName}')"
            columns = self.query(columnQuery)
            table["columns"] = columns
        return tableList

    def getTableDict(self, tableType="table"):
        """
        get the schema information from this database as a dict

        Args:
            tableType(str): table or view

        Returns:
            dict: Lookup map of tables with columns also being converted to dict
        """
        tableDict = {}
        for table in self.getTableList(tableType=tableType):
            colDict = {}
            for col in table["columns"]:
                colDict[col["name"]] = col
            table["columns"] = colDict
            tableDict[table["name"]] = table
        return tableDict

    def restoreProgress(self, status, remaining, total):
        self.progress("Restore", status, remaining, total)

    def backupProgress(self, status, remaining, total):
        self.progress("Backup", status, remaining, total)

    def progress(self, action, status, remaining, total):
        """
        show progress
        """
        print(
            "%s %s at %5.0f%%"
            % (
                action,
                "... " if status == 0 else "done",
                (total - remaining) / total * 100,
            )
        )

    def backup(
        self,
        backupDB,
        action="Backup",
        profile=False,
        showProgress: int = 200,
        doClose=True,
    ):
        """
        create backup of this SQLDB to the given backup db

        see https://stackoverflow.com/a/59042442/1497139

        Args:
            backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
            action(string): the action to display
            profile(boolean): True if timing information shall be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        if sys.version_info <= (3, 6):
            raise Exception(
                "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
            )
        startTime = time.time()
        bck = sqlite3.connect(backupDB)
        if showProgress > 0:
            if action == "Restore":
                progress = self.restoreProgress
            else:
                progress = self.backupProgress
        else:
            progress = None
        with bck:
            self.c.backup(bck, pages=showProgress, progress=progress)
        elapsed = time.time() - startTime
        if profile:
            print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
        if doClose:
            bck.close()
            return None
        else:
            return bck

    def showDump(self, dump, limit=10):
        """
        show the given dump up to the given limit

        Args:
            dump(string): the SQL dump to show
            limit(int): the maximum number of lines to display
        """
        s = io.StringIO(dump)
        index = 0
        for line in s:
            if index <= limit:
                print(line)
                index += 1
            else:
                break

    def executeDump(
        self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
    ):
        """
        execute the given dump for the given connection

        Args:
            connection(Connection): the sqlite3 connection to use
            dump(string): the SQL commands for the dump
            title(string): the title of the dump
            maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
            profile(boolean): True if profiling information should be shown
        Returns:
            a list of errors
        """
        if self.debug:
            self.showDump(dump)
        startTime = time.time()
        if profile:
            print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
        errors = []
        index = 0
        # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
        for line in dump.split(";\n"):
            try:
                connection.execute(line)
            except sqlite3.OperationalError as soe:
                msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
                errors.append(msg)
                if len(errors) <= errorDisplayLimit:
                    print(msg)
                if len(errors) >= maxErrors:
                    connection.execute("ROLLBACK;")
                    break

            index = index + 1
        if profile:
            print(
                "finished executing dump %s with %d lines and %d errors in %5.1f s"
                % (title, index, len(errors), time.time() - startTime)
            )
        return errors

    def copyTo(self, copyDB, profile=True):
        """
        copy my content to another database

        Args:

           copyDB(Connection): the target database
           profile(boolean): if True show profile information
        """
        startTime = time.time()
        dump = "\n".join(self.c.iterdump())
        # cursor.executescript(dump)
        if profile:
            print(
                "finished getting dump of %s in %5.1f s"
                % (self.dbname, time.time() - startTime)
            )
        dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
        return dumpErrors

    @staticmethod
    def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
        """
        restore the restoreDB from the given backup DB

        Args:
            backupDB(string): path to the backupDB e.g. backup.db
            restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
            profile(boolean): True if timing information should be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        backupSQLDB = SQLDB(backupDB)
        connection = backupSQLDB.backup(
            restoreDB,
            action="Restore",
            profile=profile,
            showProgress=showProgress,
            doClose=False,
        )
        restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
        return restoreSQLDB

__init__(dbname=':memory:', connection=None, check_same_thread=True, timeout=5, debug=False, errorDebug=False)

Construct me for the given dbname and debug

Args:

dbname(string): name of the database - default is a RAM based database connection(Connection): an optional connection to be reused check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139 timeout(float): number of seconds for connection timeout debug(boolean): if True switch on debug errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def __init__(
    self,
    dbname: str = ":memory:",
    connection=None,
    check_same_thread=True,
    timeout=5,
    debug=False,
    errorDebug=False,
):
    """
    Construct me for the given dbname and debug

    Args:

       dbname(string): name of the database - default is a RAM based database
       connection(Connection): an optional connection to be reused
       check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
       timeout(float): number of seconds for connection timeout
       debug(boolean): if True switch on debug
       errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    self.dbname = dbname
    self.debug = debug
    self.errorDebug = errorDebug
    SQLiteApiFixer.install(lenient=debug)
    if connection is None:
        self.c = sqlite3.connect(
            dbname,
            detect_types=sqlite3.PARSE_DECLTYPES,
            check_same_thread=check_same_thread,
            timeout=timeout,
        )
    else:
        self.c = connection

backup(backupDB, action='Backup', profile=False, showProgress=200, doClose=True)

create backup of this SQLDB to the given backup db

see https://stackoverflow.com/a/59042442/1497139

Parameters:

Name Type Description Default
backupDB(string)

the path to the backupdb or SQLDB.RAM for in memory

required
action(string)

the action to display

required
profile(boolean)

True if timing information shall be shown

required
showProgress(int)

show progress at each showProgress page (0=show no progress)

required
Source code in lodstorage/sql.py
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
def backup(
    self,
    backupDB,
    action="Backup",
    profile=False,
    showProgress: int = 200,
    doClose=True,
):
    """
    create backup of this SQLDB to the given backup db

    see https://stackoverflow.com/a/59042442/1497139

    Args:
        backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
        action(string): the action to display
        profile(boolean): True if timing information shall be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    if sys.version_info <= (3, 6):
        raise Exception(
            "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
        )
    startTime = time.time()
    bck = sqlite3.connect(backupDB)
    if showProgress > 0:
        if action == "Restore":
            progress = self.restoreProgress
        else:
            progress = self.backupProgress
    else:
        progress = None
    with bck:
        self.c.backup(bck, pages=showProgress, progress=progress)
    elapsed = time.time() - startTime
    if profile:
        print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
    if doClose:
        bck.close()
        return None
    else:
        return bck

close()

close my connection

Source code in lodstorage/sql.py
75
76
77
def close(self):
    """close my connection"""
    self.c.close()

copyTo(copyDB, profile=True)

copy my content to another database

Args:

copyDB(Connection): the target database profile(boolean): if True show profile information

Source code in lodstorage/sql.py
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
def copyTo(self, copyDB, profile=True):
    """
    copy my content to another database

    Args:

       copyDB(Connection): the target database
       profile(boolean): if True show profile information
    """
    startTime = time.time()
    dump = "\n".join(self.c.iterdump())
    # cursor.executescript(dump)
    if profile:
        print(
            "finished getting dump of %s in %5.1f s"
            % (self.dbname, time.time() - startTime)
        )
    dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
    return dumpErrors

createTable(listOfRecords, entityName, primaryKey=None, withCreate=True, withDrop=False, sampleRecordCount=1, failIfTooFew=True)

Derive Data Definition Language CREATE TABLE command from list of Records by examining first record as defining sample record and execute DDL command.

Parameters:

Name Type Description Default
listOfRecords list

A list of Dicts.

required
entityName str

The entity / table name to use.

required
primaryKey str

The key/column to use as a primary key.

None
withDrop bool

True if the existing Table should be dropped.

False
withCreate bool

True if the create Table command should be executed.

True
sampleRecordCount int

Number of sample records expected and to be inspected.

1
failIfTooFew bool

Raise an Exception if too few sample records, else warn only.

True

Returns:

Name Type Description
EntityInfo

Meta data information for the created table.

Source code in lodstorage/sql.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
def createTable(
    self,
    listOfRecords,
    entityName: str,
    primaryKey: str = None,
    withCreate: bool = True,
    withDrop: bool = False,
    sampleRecordCount=1,
    failIfTooFew=True,
):
    """
    Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
    as defining sample record and execute DDL command.

    Args:
        listOfRecords (list): A list of Dicts.
        entityName (str): The entity / table name to use.
        primaryKey (str): The key/column to use as a primary key.
        withDrop (bool): True if the existing Table should be dropped.
        withCreate (bool): True if the create Table command should be executed.
        sampleRecordCount (int): Number of sample records expected and to be inspected.
        failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

    Returns:
        EntityInfo: Meta data information for the created table.
    """
    l = len(listOfRecords)
    if sampleRecordCount < 0:
        sampleRecordCount = l
    if l < sampleRecordCount:
        msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
        if failIfTooFew:
            raise Exception(msg)
        elif self.debug:
            self.logError(msg)

    sampleRecords = listOfRecords[:sampleRecordCount]
    entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

    return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

createTable4EntityInfo(entityInfo, withDrop=False, withCreate=True)

Create a table based on the provided EntityInfo.

Parameters:

Name Type Description Default
entityInfo EntityInfo

The EntityInfo object containing table metadata.

required
withDrop bool

If True, drop the existing table before creation.

False
withCreate bool

If True, execute the CREATE TABLE command.

True

Returns:

Name Type Description
EntityInfo

The provided EntityInfo object.

Source code in lodstorage/sql.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
    """
    Create a table based on the provided EntityInfo.

    Args:
        entityInfo (EntityInfo): The EntityInfo object containing table metadata.
        withDrop (bool): If True, drop the existing table before creation.
        withCreate (bool): If True, execute the CREATE TABLE command.

    Returns:
        EntityInfo: The provided EntityInfo object.
    """
    if withDrop:
        self.c.execute(entityInfo.dropTableCmd)
    if withCreate:
        try:
            self.c.execute(entityInfo.createTableCmd)
        except sqlite3.OperationalError as oe:
            raise Exception(
                f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
            )
    return entityInfo

execute(ddlCmd)

execute the given Data Definition Command

Parameters:

Name Type Description Default
ddlCmd(string)

e.g. a CREATE TABLE or CREATE View command

required
Source code in lodstorage/sql.py
79
80
81
82
83
84
85
86
def execute(self, ddlCmd):
    """
    execute the given Data Definition Command

    Args:
        ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
    """
    self.c.execute(ddlCmd)

executeDump(connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True)

execute the given dump for the given connection

Parameters:

Name Type Description Default
connection(Connection)

the sqlite3 connection to use

required
dump(string)

the SQL commands for the dump

required
title(string)

the title of the dump

required
maxErrors(int)

maximum number of errors to be tolerated before stopping and doing a rollback

required
profile(boolean)

True if profiling information should be shown

required

Returns: a list of errors

Source code in lodstorage/sql.py
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
def executeDump(
    self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
):
    """
    execute the given dump for the given connection

    Args:
        connection(Connection): the sqlite3 connection to use
        dump(string): the SQL commands for the dump
        title(string): the title of the dump
        maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
        profile(boolean): True if profiling information should be shown
    Returns:
        a list of errors
    """
    if self.debug:
        self.showDump(dump)
    startTime = time.time()
    if profile:
        print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
    errors = []
    index = 0
    # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
    for line in dump.split(";\n"):
        try:
            connection.execute(line)
        except sqlite3.OperationalError as soe:
            msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
            errors.append(msg)
            if len(errors) <= errorDisplayLimit:
                print(msg)
            if len(errors) >= maxErrors:
                connection.execute("ROLLBACK;")
                break

        index = index + 1
    if profile:
        print(
            "finished executing dump %s with %d lines and %d errors in %5.1f s"
            % (title, index, len(errors), time.time() - startTime)
        )
    return errors

getDebugInfo(record, index, executeMany)

get the debug info for the given record at the given index depending on the state of executeMany

Parameters:

Name Type Description Default
record(dict)

the record to show

required
index(int)

the index of the record

required
executeMany(boolean)

if True the record may be valid else not

required
Source code in lodstorage/sql.py
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def getDebugInfo(self, record, index, executeMany):
    """
    get the debug info for the given record at the given index depending on the state of executeMany

    Args:
        record(dict): the record to show
        index(int): the index of the record
        executeMany(boolean): if True the record may be valid else not
    """
    debugInfo = ""
    if not executeMany:
        # shall we shoe the details of the record (which might be a security risk)
        if self.errorDebug:
            # show details of record
            debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
        else:
            # show only index
            debugInfo = "\nrecord #%d" % index
    return debugInfo

getTableDict(tableType='table')

get the schema information from this database as a dict

Parameters:

Name Type Description Default
tableType(str)

table or view

required

Returns:

Name Type Description
dict

Lookup map of tables with columns also being converted to dict

Source code in lodstorage/sql.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
def getTableDict(self, tableType="table"):
    """
    get the schema information from this database as a dict

    Args:
        tableType(str): table or view

    Returns:
        dict: Lookup map of tables with columns also being converted to dict
    """
    tableDict = {}
    for table in self.getTableList(tableType=tableType):
        colDict = {}
        for col in table["columns"]:
            colDict[col["name"]] = col
        table["columns"] = colDict
        tableDict[table["name"]] = table
    return tableDict

getTableList(tableType='table')

get the schema information from this database

Parameters:

Name Type Description Default
tableType(str)

table or view

required
Return

list: a list as derived from PRAGMA table_info

Source code in lodstorage/sql.py
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
def getTableList(self, tableType="table"):
    """
    get the schema information from this database

    Args:
        tableType(str): table or view

    Return:
        list: a list as derived from PRAGMA table_info
    """
    tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
    tableList = self.query(tableQuery)
    for table in tableList:
        tableName = table["name"]
        columnQuery = f"PRAGMA table_info('{tableName}')"
        columns = self.query(columnQuery)
        table["columns"] = columns
    return tableList

logError(msg)

log the given error message to stderr

Parameters:

Name Type Description Default
msg(str)

the error messsage to display

required
Source code in lodstorage/sql.py
66
67
68
69
70
71
72
73
def logError(self, msg):
    """
    log the given error message to stderr

    Args:
        msg(str): the error messsage to display
    """
    print(msg, file=sys.stderr, flush=True)

progress(action, status, remaining, total)

show progress

Source code in lodstorage/sql.py
346
347
348
349
350
351
352
353
354
355
356
357
def progress(self, action, status, remaining, total):
    """
    show progress
    """
    print(
        "%s %s at %5.0f%%"
        % (
            action,
            "... " if status == 0 else "done",
            (total - remaining) / total * 100,
        )
    )

query(sqlQuery, params=None)

run the given sqlQuery and return a list of Dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Name Type Description
list

a list of Dicts

Source code in lodstorage/sql.py
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
def query(self, sqlQuery, params=None):
    """
    run the given sqlQuery and return a list of Dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        list: a list of Dicts
    """
    resultList = []
    for record in self.queryGen(sqlQuery, params):
        resultList.append(record)
    return resultList

queryAll(entityInfo, fixDates=True)

query all records for the given entityName/tableName

Parameters:

Name Type Description Default
entityName(string)

name of the entity/table to qury

required
fixDates(boolean)

True if date entries should be returned as such and not as strings

required
Source code in lodstorage/sql.py
288
289
290
291
292
293
294
295
296
297
298
299
300
def queryAll(self, entityInfo, fixDates=True):
    """
    query all records for the given entityName/tableName

    Args:
       entityName(string): name of the entity/table to qury
       fixDates(boolean): True if date entries should be returned as such and not as strings
    """
    sqlQuery = "SELECT * FROM %s" % entityInfo.name
    resultList = self.query(sqlQuery)
    if fixDates:
        entityInfo.fixDates(resultList)
    return resultList

queryGen(sqlQuery, params=None)

run the given sqlQuery a a generator for dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Type Description

a generator of dicts

Source code in lodstorage/sql.py
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
def queryGen(self, sqlQuery, params=None):
    """
    run the given sqlQuery a a generator for dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        a generator of dicts
    """
    if self.debug:
        print(sqlQuery)
        if params is not None:
            print(params)
    # https://stackoverflow.com/a/13735506/1497139
    cur = self.c.cursor()
    if params is not None:
        query = cur.execute(sqlQuery, params)
    else:
        query = cur.execute(sqlQuery)
    colname = [d[0] for d in query.description]
    try:
        # loop over all rows
        for row in query:
            record = dict(zip(colname, row))
            yield record
    except Exception as ex:
        msg = str(ex)
        self.logError(msg)
        pass
    cur.close()

restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False) staticmethod

restore the restoreDB from the given backup DB

Parameters:

Name Type Description Default
backupDB(string)

path to the backupDB e.g. backup.db

required
restoreDB(string)

path to the restoreDB or in Memory SQLDB.RAM

required
profile(boolean)

True if timing information should be shown

required
showProgress(int)

show progress at each showProgress page (0=show no progress)

required
Source code in lodstorage/sql.py
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
@staticmethod
def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
    """
    restore the restoreDB from the given backup DB

    Args:
        backupDB(string): path to the backupDB e.g. backup.db
        restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
        profile(boolean): True if timing information should be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    backupSQLDB = SQLDB(backupDB)
    connection = backupSQLDB.backup(
        restoreDB,
        action="Restore",
        profile=profile,
        showProgress=showProgress,
        doClose=False,
    )
    restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
    return restoreSQLDB

showDump(dump, limit=10)

show the given dump up to the given limit

Parameters:

Name Type Description Default
dump(string)

the SQL dump to show

required
limit(int)

the maximum number of lines to display

required
Source code in lodstorage/sql.py
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
def showDump(self, dump, limit=10):
    """
    show the given dump up to the given limit

    Args:
        dump(string): the SQL dump to show
        limit(int): the maximum number of lines to display
    """
    s = io.StringIO(dump)
    index = 0
    for line in s:
        if index <= limit:
            print(line)
            index += 1
        else:
            break

store(listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False)

store the given list of records based on the given entityInfo

Args:

listOfRecords(list): the list of Dicts to be stored entityInfo(EntityInfo): the meta data to be used for storing executeMany(bool): if True the insert command is done with many/all records at once fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values replace(bool): if True allow replace for insert

Source code in lodstorage/sql.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
def store(
    self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
):
    """
    store the given list of records based on the given entityInfo

    Args:

       listOfRecords(list): the list of Dicts to be stored
       entityInfo(EntityInfo): the meta data to be used for storing
       executeMany(bool): if True the insert command is done with many/all records at once
       fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
       replace(bool): if True allow replace for insert
    """
    insertCmd = entityInfo.getInsertCmd(replace=replace)
    record = None
    index = 0
    try:
        if executeMany:
            if fixNone:
                LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
            self.c.executemany(insertCmd, listOfRecords)
        else:
            for record in listOfRecords:
                index += 1
                if fixNone:
                    LOD.setNone(record, entityInfo.typeMap.keys())
                self.c.execute(insertCmd, record)
        self.c.commit()
    except sqlite3.ProgrammingError as pe:
        msg = pe.args[0]
        if "You did not supply a value for binding" in msg:
            if ":" in msg:
                # sqlite now returns the parameter name not the number
                # You did not supply a value for binding parameter :type.
                columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                columnName = columnName.replace(":", "")
            else:
                # pre python 3.10
                # You did not supply a value for binding 2.
                columnIndex = int(re.findall(r"\d+", msg)[0])
                columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: no value supplied for column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise pe
    except sqlite3.InterfaceError as ie:
        msg = ie.args[0]
        if "Error binding parameter" in msg:
            columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: error binding column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise ie
    except Exception as ex:
        debugInfo = self.getDebugInfo(record, index, executeMany)
        msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
        raise Exception(msg)

sqlite_api

Created on 2024-08-24

@author: wf

DatetimeAdapter

Class for converting date and time formats with optional lenient error handling.

Source code in lodstorage/sqlite_api.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class DatetimeAdapter:
    """Class for converting date and time formats with optional lenient error handling."""

    def __init__(self, lenient: bool = False):
        """Initialize with optional lenient error handling."""
        self.lenient = lenient

    def _handle_input(self, val: bytes) -> str:
        """Validate and decode the input bytes into string."""
        if not isinstance(val, bytes):
            raise TypeError("Input must be a byte string.")
        return val.decode()

    def _handle_error(self, error: Exception, val: bytes):
        """Handle errors based on the lenient mode."""
        if self.lenient:
            logging.warning(f"Failed to convert {val}: {error}")
            return None
        else:
            raise error

    def convert_date(self, val: bytes) -> datetime.date:
        """Convert ISO 8601 date byte string to a datetime.date object."""
        try:
            decoded_date = self._handle_input(val)
            dt = datetime.date.fromisoformat(decoded_date)
            return dt
        except Exception as e:
            return self._handle_error(e, val)

    def convert_datetime(self, val: bytes) -> datetime.datetime:
        """Convert ISO 8601 datetime byte string to a datetime.datetime object."""
        try:
            decoded_datetime = self._handle_input(val)
            return datetime.datetime.fromisoformat(decoded_datetime)
        except Exception as e:
            return self._handle_error(e, val)

    def convert_timestamp(self, val: bytes) -> datetime.datetime:
        """Convert Unix epoch timestamp byte string to a datetime.datetime object."""
        try:
            decoded_string = self._handle_input(val)
            timestamp_float = float(decoded_string) / 10**6
            dt = datetime.datetime.fromtimestamp(timestamp_float)
            return dt
        except ValueError as _ve:
            try:
                dt = datetime.datetime.fromisoformat(decoded_string)
                return dt
            except Exception as e:
                return self._handle_error(e, val)
        except Exception as e:
            return self._handle_error(e, val)

__init__(lenient=False)

Initialize with optional lenient error handling.

Source code in lodstorage/sqlite_api.py
15
16
17
def __init__(self, lenient: bool = False):
    """Initialize with optional lenient error handling."""
    self.lenient = lenient

convert_date(val)

Convert ISO 8601 date byte string to a datetime.date object.

Source code in lodstorage/sqlite_api.py
33
34
35
36
37
38
39
40
def convert_date(self, val: bytes) -> datetime.date:
    """Convert ISO 8601 date byte string to a datetime.date object."""
    try:
        decoded_date = self._handle_input(val)
        dt = datetime.date.fromisoformat(decoded_date)
        return dt
    except Exception as e:
        return self._handle_error(e, val)

convert_datetime(val)

Convert ISO 8601 datetime byte string to a datetime.datetime object.

Source code in lodstorage/sqlite_api.py
42
43
44
45
46
47
48
def convert_datetime(self, val: bytes) -> datetime.datetime:
    """Convert ISO 8601 datetime byte string to a datetime.datetime object."""
    try:
        decoded_datetime = self._handle_input(val)
        return datetime.datetime.fromisoformat(decoded_datetime)
    except Exception as e:
        return self._handle_error(e, val)

convert_timestamp(val)

Convert Unix epoch timestamp byte string to a datetime.datetime object.

Source code in lodstorage/sqlite_api.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def convert_timestamp(self, val: bytes) -> datetime.datetime:
    """Convert Unix epoch timestamp byte string to a datetime.datetime object."""
    try:
        decoded_string = self._handle_input(val)
        timestamp_float = float(decoded_string) / 10**6
        dt = datetime.datetime.fromtimestamp(timestamp_float)
        return dt
    except ValueError as _ve:
        try:
            dt = datetime.datetime.fromisoformat(decoded_string)
            return dt
        except Exception as e:
            return self._handle_error(e, val)
    except Exception as e:
        return self._handle_error(e, val)

SQLiteApiFixer

Class to register SQLite adapters and converters using a DatetimeAdapter instance.

Source code in lodstorage/sqlite_api.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
class SQLiteApiFixer:
    """
    Class to register SQLite adapters
    and converters using a DatetimeAdapter instance.
    """

    _instance = None  # Singleton instance

    def __init__(self, lenient: bool = True):
        """Private constructor to initialize the singleton instance."""
        self.adapter = DatetimeAdapter(lenient=lenient)
        self.register_converters()
        self.register_adapters()

    @classmethod
    def install(cls, lenient: bool = True):
        """Install the singleton instance and register SQLite adapters and converters."""
        if cls._instance is None:
            cls._instance = cls(lenient=lenient)
        return cls._instance

    def register_adapters(self):
        """Register the necessary SQLite adapters."""
        sqlite3.register_adapter(datetime.date, self.adapt_date_iso)
        sqlite3.register_adapter(datetime.datetime, self.adapt_datetime_iso)
        sqlite3.register_adapter(bool, self.adapt_boolean)

    def register_converters(self):
        """Register the necessary SQLite converters."""
        sqlite3.register_converter("date", self.adapter.convert_date)
        sqlite3.register_converter("datetime", self.adapter.convert_datetime)
        sqlite3.register_converter("timestamp", self.adapter.convert_timestamp)
        sqlite3.register_converter("boolean", self.convert_boolean)

    @staticmethod
    def adapt_date_iso(val: datetime.date):
        """Adapt datetime.date to ISO 8601 date."""
        return val.isoformat()

    @staticmethod
    def adapt_datetime_iso(val: datetime.datetime):
        """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
        return val.isoformat()

    @staticmethod
    def adapt_boolean(val: bool):
        """Adapt boolean to int."""
        return 1 if val else 0

    @staticmethod
    def convert_boolean(val: bytes):
        """Convert 0 or 1 to boolean."""
        return bool(int(val))

__init__(lenient=True)

Private constructor to initialize the singleton instance.

Source code in lodstorage/sqlite_api.py
75
76
77
78
79
def __init__(self, lenient: bool = True):
    """Private constructor to initialize the singleton instance."""
    self.adapter = DatetimeAdapter(lenient=lenient)
    self.register_converters()
    self.register_adapters()

adapt_boolean(val) staticmethod

Adapt boolean to int.

Source code in lodstorage/sqlite_api.py
111
112
113
114
@staticmethod
def adapt_boolean(val: bool):
    """Adapt boolean to int."""
    return 1 if val else 0

adapt_date_iso(val) staticmethod

Adapt datetime.date to ISO 8601 date.

Source code in lodstorage/sqlite_api.py
101
102
103
104
@staticmethod
def adapt_date_iso(val: datetime.date):
    """Adapt datetime.date to ISO 8601 date."""
    return val.isoformat()

adapt_datetime_iso(val) staticmethod

Adapt datetime.datetime to timezone-naive ISO 8601 date.

Source code in lodstorage/sqlite_api.py
106
107
108
109
@staticmethod
def adapt_datetime_iso(val: datetime.datetime):
    """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
    return val.isoformat()

convert_boolean(val) staticmethod

Convert 0 or 1 to boolean.

Source code in lodstorage/sqlite_api.py
116
117
118
119
@staticmethod
def convert_boolean(val: bytes):
    """Convert 0 or 1 to boolean."""
    return bool(int(val))

install(lenient=True) classmethod

Install the singleton instance and register SQLite adapters and converters.

Source code in lodstorage/sqlite_api.py
81
82
83
84
85
86
@classmethod
def install(cls, lenient: bool = True):
    """Install the singleton instance and register SQLite adapters and converters."""
    if cls._instance is None:
        cls._instance = cls(lenient=lenient)
    return cls._instance

register_adapters()

Register the necessary SQLite adapters.

Source code in lodstorage/sqlite_api.py
88
89
90
91
92
def register_adapters(self):
    """Register the necessary SQLite adapters."""
    sqlite3.register_adapter(datetime.date, self.adapt_date_iso)
    sqlite3.register_adapter(datetime.datetime, self.adapt_datetime_iso)
    sqlite3.register_adapter(bool, self.adapt_boolean)

register_converters()

Register the necessary SQLite converters.

Source code in lodstorage/sqlite_api.py
94
95
96
97
98
99
def register_converters(self):
    """Register the necessary SQLite converters."""
    sqlite3.register_converter("date", self.adapter.convert_date)
    sqlite3.register_converter("datetime", self.adapter.convert_datetime)
    sqlite3.register_converter("timestamp", self.adapter.convert_timestamp)
    sqlite3.register_converter("boolean", self.convert_boolean)

storageconfig

Created on 2020-08-29

@author: wf

StorageConfig

Bases: object

a storage configuration

Source code in lodstorage/storageconfig.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
class StorageConfig(object):
    """
    a storage configuration
    """

    def getCachePath(self, ensureExists=True) -> str:
        """
        get the path to the default cache

        Args:
            name(str): the name of the cache to use
        """

        cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

        if ensureExists:
            if not os.path.exists(cachedir):
                os.makedirs(cachedir)
        return cachedir

    def __init__(
        self,
        mode=StoreMode.SQL,
        cacheRootDir: str = None,
        cacheDirName: str = "lodstorage",
        cacheFile=None,
        withShowProgress=True,
        profile=True,
        debug=False,
        errorDebug=True,
    ):
        """
        Constructor

        Args:
            mode(StoreMode): the storage mode e.g. sql
            cacheRootDir(str): the cache root directory to use - if None the home directory will be used
            cacheFile(string): the common cacheFile to use (if any)
            withShowProgress(boolean): True if progress should be shown
            profile(boolean): True if timing / profiling information should be shown
            debug(boolean): True if debugging information should be shown
            errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        if cacheRootDir is None:
            home = str(Path.home())
            self.cacheRootDir = f"{home}"
        else:
            self.cacheRootDir = cacheRootDir
        self.cacheDirName = cacheDirName
        self.mode = mode
        self.cacheFile = cacheFile
        self.profile = profile
        self.withShowProgress = withShowProgress
        self.debug = debug
        self.errorDebug = errorDebug

    @staticmethod
    def getDefault(debug=False):
        return StorageConfig.getSQL(debug)

    @staticmethod
    def getSQL(debug=False):
        config = StorageConfig(mode=StoreMode.SQL, debug=debug)
        config.tableName = None
        return config

    @staticmethod
    def getJSON(debug=False):
        config = StorageConfig(mode=StoreMode.JSON, debug=debug)
        return config

    @staticmethod
    def getJsonPickle(debug=False):
        config = StorageConfig(mode=StoreMode.JSONPICKLE, debug=debug)
        return config

    @staticmethod
    def getSPARQL(prefix, endpoint, host, debug=False):
        config = StorageConfig(mode=StoreMode.SPARQL, debug=debug)
        config.prefix = prefix
        config.host = host
        config.endpoint = endpoint
        return config

    @staticmethod
    def getYaml(debug=False):
        config = StorageConfig(mode=StoreMode.YAML, debug=debug)
        return config

__init__(mode=StoreMode.SQL, cacheRootDir=None, cacheDirName='lodstorage', cacheFile=None, withShowProgress=True, profile=True, debug=False, errorDebug=True)

Constructor

Parameters:

Name Type Description Default
mode(StoreMode)

the storage mode e.g. sql

required
cacheRootDir(str)

the cache root directory to use - if None the home directory will be used

required
cacheFile(string)

the common cacheFile to use (if any)

required
withShowProgress(boolean)

True if progress should be shown

required
profile(boolean)

True if timing / profiling information should be shown

required
debug(boolean)

True if debugging information should be shown

required
errorDebug(boolean)

True if debug info should be provided on errors (should not be used for production since it might reveal data)

required
Source code in lodstorage/storageconfig.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def __init__(
    self,
    mode=StoreMode.SQL,
    cacheRootDir: str = None,
    cacheDirName: str = "lodstorage",
    cacheFile=None,
    withShowProgress=True,
    profile=True,
    debug=False,
    errorDebug=True,
):
    """
    Constructor

    Args:
        mode(StoreMode): the storage mode e.g. sql
        cacheRootDir(str): the cache root directory to use - if None the home directory will be used
        cacheFile(string): the common cacheFile to use (if any)
        withShowProgress(boolean): True if progress should be shown
        profile(boolean): True if timing / profiling information should be shown
        debug(boolean): True if debugging information should be shown
        errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    if cacheRootDir is None:
        home = str(Path.home())
        self.cacheRootDir = f"{home}"
    else:
        self.cacheRootDir = cacheRootDir
    self.cacheDirName = cacheDirName
    self.mode = mode
    self.cacheFile = cacheFile
    self.profile = profile
    self.withShowProgress = withShowProgress
    self.debug = debug
    self.errorDebug = errorDebug

getCachePath(ensureExists=True)

get the path to the default cache

Parameters:

Name Type Description Default
name(str)

the name of the cache to use

required
Source code in lodstorage/storageconfig.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
def getCachePath(self, ensureExists=True) -> str:
    """
    get the path to the default cache

    Args:
        name(str): the name of the cache to use
    """

    cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

    if ensureExists:
        if not os.path.exists(cachedir):
            os.makedirs(cachedir)
    return cachedir

StoreMode

Bases: Enum

possible supported storage modes

Source code in lodstorage/storageconfig.py
12
13
14
15
16
17
18
19
20
21
class StoreMode(Enum):
    """
    possible supported storage modes
    """

    JSONPICKLE = 1  # JSON Pickle
    JSON = 2
    SQL = 3
    SPARQL = 4
    YAML = 5

sync

Created on 2023-12-27

@author: wf

Sync

A class to help with synchronization between two sets of data, each represented as a list of dictionaries.

Source code in lodstorage/sync.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
class Sync:
    """
    A class to help with synchronization between two sets of data, each represented as a list of dictionaries.
    """

    def __init__(self, pair: SyncPair):
        """
        Initialize the Sync class with the given Synchronization Pair.
        """
        self.pair = pair
        self.sync_dict = self._create_sync_dict()
        self.directions = ["←", "↔", "→"]
        self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

    def handle_direction_error(self, direction: str):
        invalid_direction_msg = (
            f"Invalid direction '{direction}'. Use {', '.join(self.directions)}."
        )
        raise ValueError(invalid_direction_msg)

    def handle_side_error(self, side: str):
        invalid_side_msg = f"Invalid side '{side}'. Use {', '.join(self.sides['left'])} for left or {', '.join(self.sides['right'])} for right."
        raise ValueError(invalid_side_msg)

    def _create_sync_dict(self) -> dict:
        """
        Create a dictionary representing the synchronization state between left and right data sources.
        """
        l_keys = {d[self.pair.l_key] for d in self.pair.l_data if self.pair.l_key in d}
        r_keys = {d[self.pair.r_key] for d in self.pair.r_data if self.pair.r_key in d}

        sync_dict = {
            "←": r_keys - l_keys,  # Present in right but not in left
            "↔": l_keys.intersection(r_keys),  # Present in both
            "→": l_keys - r_keys,  # Present in left but not in right
        }
        return sync_dict

    def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
        """
        Retrieves a record by primary key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            pkey (str): The primary key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.
        """
        record = None
        if side in self.sides["left"]:  # retrieve from left
            record = self.pair.l_by_pkey.get(pkey)
        elif side in self.sides["right"]:  # retrieve from right
            record = self.pair.r_by_pkey.get(pkey)
        else:
            self.handle_side_error(side)
        return record

    def get_record_by_key(self, side: str, key: str) -> dict:
        """
        Retrieves a record by the given unique key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            key (str): The unique key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.

        Raises:
            ValueError: If the provided direction is invalid.
        """
        record = None
        if side in ["←", "l", "left"]:
            record = next(
                (item for item in self.pair.l_data if item[self.pair.l_key] == key),
                None,
            )
        elif side in ["→", "r", "right"]:
            record = next(
                (item for item in self.pair.r_data if item[self.pair.r_key] == key),
                None,
            )
        else:
            self.handle_side_error(side)
        return record

    def get_keys(self, direction: str) -> set:
        """
        Get the keys for a given direction of synchronization.
        """
        if direction in self.sync_dict:
            return self.sync_dict[direction]
        else:
            self.handle_direction_error(direction)

    def status_table(self, tablefmt: str = "grid") -> str:
        """
        Create a table representing the synchronization status.
        """
        total_records = sum(len(keys) for keys in self.sync_dict.values())
        if total_records == 0:  # Avoid division by zero
            total_records = 1

        table_data = []
        for direction, keys in self.sync_dict.items():
            num_records = len(keys)
            percentage = (num_records / total_records) * 100
            table_data.append(
                {
                    "left": self.pair.l_name,
                    "↔": direction,
                    "right": self.pair.r_name,
                    "#": num_records,
                    "%": f"{percentage:7.2f}%",
                }
            )

        markup = tabulate(
            table_data,
            headers="keys",
            tablefmt=tablefmt,
            colalign=("right", "center", "left", "right", "right"),
        )
        return markup

__init__(pair)

Initialize the Sync class with the given Synchronization Pair.

Source code in lodstorage/sync.py
66
67
68
69
70
71
72
73
def __init__(self, pair: SyncPair):
    """
    Initialize the Sync class with the given Synchronization Pair.
    """
    self.pair = pair
    self.sync_dict = self._create_sync_dict()
    self.directions = ["←", "↔", "→"]
    self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

get_keys(direction)

Get the keys for a given direction of synchronization.

Source code in lodstorage/sync.py
148
149
150
151
152
153
154
155
def get_keys(self, direction: str) -> set:
    """
    Get the keys for a given direction of synchronization.
    """
    if direction in self.sync_dict:
        return self.sync_dict[direction]
    else:
        self.handle_direction_error(direction)

get_record_by_key(side, key)

Retrieves a record by the given unique key from the appropriate data source as specified by direction.

Parameters:

Name Type Description Default
side str

The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.

required
key str

The unique key of the record to retrieve.

required

Returns:

Type Description
dict

Optional[Dict[str, Any]]: The record if found, otherwise None.

Raises:

Type Description
ValueError

If the provided direction is invalid.

Source code in lodstorage/sync.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def get_record_by_key(self, side: str, key: str) -> dict:
    """
    Retrieves a record by the given unique key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        key (str): The unique key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.

    Raises:
        ValueError: If the provided direction is invalid.
    """
    record = None
    if side in ["←", "l", "left"]:
        record = next(
            (item for item in self.pair.l_data if item[self.pair.l_key] == key),
            None,
        )
    elif side in ["→", "r", "right"]:
        record = next(
            (item for item in self.pair.r_data if item[self.pair.r_key] == key),
            None,
        )
    else:
        self.handle_side_error(side)
    return record

get_record_by_pkey(side, pkey)

Retrieves a record by primary key from the appropriate data source as specified by direction.

Parameters:

Name Type Description Default
side str

The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.

required
pkey str

The primary key of the record to retrieve.

required

Returns:

Type Description
Optional[Dict[str, Any]]

Optional[Dict[str, Any]]: The record if found, otherwise None.

Source code in lodstorage/sync.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
    """
    Retrieves a record by primary key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        pkey (str): The primary key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.
    """
    record = None
    if side in self.sides["left"]:  # retrieve from left
        record = self.pair.l_by_pkey.get(pkey)
    elif side in self.sides["right"]:  # retrieve from right
        record = self.pair.r_by_pkey.get(pkey)
    else:
        self.handle_side_error(side)
    return record

status_table(tablefmt='grid')

Create a table representing the synchronization status.

Source code in lodstorage/sync.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def status_table(self, tablefmt: str = "grid") -> str:
    """
    Create a table representing the synchronization status.
    """
    total_records = sum(len(keys) for keys in self.sync_dict.values())
    if total_records == 0:  # Avoid division by zero
        total_records = 1

    table_data = []
    for direction, keys in self.sync_dict.items():
        num_records = len(keys)
        percentage = (num_records / total_records) * 100
        table_data.append(
            {
                "left": self.pair.l_name,
                "↔": direction,
                "right": self.pair.r_name,
                "#": num_records,
                "%": f"{percentage:7.2f}%",
            }
        )

    markup = tabulate(
        table_data,
        headers="keys",
        tablefmt=tablefmt,
        colalign=("right", "center", "left", "right", "right"),
    )
    return markup

SyncPair dataclass

A class to represent a pair of data sources for synchronization.

Attributes: title (str): The title of the synchronization pair. l_name (str): Name of the left data source (e.g., 'local'). r_name (str): Name of the right data source (e.g., 'wikidata'). l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source. r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source. l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization. r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization. l_pkey(str): the primary key field of the left data source r_pkey(str): the primary key field of the right data source

Example usage: l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}] r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}] pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r') sync = Sync(pair) print(sync.status_table())

Source code in lodstorage/sync.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@dataclass
class SyncPair:
    """
       A class to represent a pair of data sources for synchronization.

       Attributes:
           title (str): The title of the synchronization pair.
           l_name (str): Name of the left data source (e.g., 'local').
           r_name (str): Name of the right data source (e.g., 'wikidata').
           l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source.
           r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source.
           l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization.
           r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization.
           l_pkey(str): the primary key field of the left data source
           r_pkey(str): the primary key field of the right data source

    Example usage:
    l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}]
    r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}]
    pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r')
    sync = Sync(pair)
    print(sync.status_table())
    """

    title: str
    l_name: str
    r_name: str
    l_data: List[Dict[str, Any]]
    r_data: List[Dict[str, Any]]
    l_key: str
    r_key: str
    l_pkey: Optional[str] = None
    r_pkey: Optional[str] = None
    # Add dictionaries for quick primary key access
    l_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)
    r_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)

    def __post_init__(self):
        # Set the l_pkey to l_key if not provided
        if self.l_pkey is None:
            self.l_pkey = self.l_key
        # Set the r_pkey to r_key if not provided
        if self.r_pkey is None:
            self.r_pkey = self.r_key
        self.l_by_pkey = {d[self.l_pkey]: d for d in self.l_data if self.l_pkey in d}
        self.r_by_pkey = {d[self.r_pkey]: d for d in self.r_data if self.r_pkey in d}

tabulateCounter

Created on 2021-06-13

@author: wf

TabulateCounter

Bases: object

helper for tabulating Counters

Source code in lodstorage/tabulateCounter.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
class TabulateCounter(object):
    """
    helper for tabulating Counters
    """

    def __init__(self, counter):
        """
        Constructor
        """
        self.counter = counter

    def mostCommonTable(
        self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
    ):
        """
        get the most common Table
        """
        bins = len(self.counter.keys())
        limit = min(bins, limit)
        total = sum(self.counter.values())
        binTable = [("total", bins, total)]
        for i, bintuple in enumerate(self.counter.most_common(limit)):
            key, count = bintuple
            binTable.append((i + 1, key, count, count / total * 100.0))

        table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
        return table

__init__(counter)

Constructor

Source code in lodstorage/tabulateCounter.py
15
16
17
18
19
def __init__(self, counter):
    """
    Constructor
    """
    self.counter = counter

mostCommonTable(headers=['#', 'key', 'count', '%'], tablefmt='pretty', limit=50)

get the most common Table

Source code in lodstorage/tabulateCounter.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def mostCommonTable(
    self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
):
    """
    get the most common Table
    """
    bins = len(self.counter.keys())
    limit = min(bins, limit)
    total = sum(self.counter.values())
    binTable = [("total", bins, total)]
    for i, bintuple in enumerate(self.counter.most_common(limit)):
        key, count = bintuple
        binTable.append((i + 1, key, count, count / total * 100.0))

    table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
    return table

uml

Created on 2020-09-04

@author: wf

UML

Bases: object

UML diagrams via plantuml

Source code in lodstorage/uml.py
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
class UML(object):
    """
    UML diagrams via plantuml

    """

    skinparams = """
' BITPlan Corporate identity skin params
' Copyright (c) 2015-2020 BITPlan GmbH
' see http://wiki.bitplan.com/PlantUmlSkinParams#BITPlanCI
' skinparams generated by com.bitplan.restmodelmanager
skinparam note {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam component {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam package {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam usecase {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam activity {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam classAttribute {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam interface {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam class {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam object {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
hide Circle
' end of skinparams '
"""

    def __init__(self, debug=False):
        """
        Constructor
        Args:
            debug(boolean): True if debug information should be shown
        """
        self.debug = debug

    def tableListToPlantUml(
        self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
    ):
        """
        convert tableList to PlantUml notation

        Args:
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist
        """
        uml = ""
        indent = ""
        inherit = ""
        if title is not None:
            uml += "title\n%s\nend title\n" % title
        if packageName is not None:
            uml += "package %s {\n" % packageName
            indent = "  "
        if generalizeTo is not None:
            generalTable = Schema.getGeneral(tableList, generalizeTo)
            for table in tableList:
                inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
            tableList.insert(0, generalTable)
        for table in tableList:
            classUml = ""
            sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
            for col in sortedColumns:
                mandatory = "*" if col["notnull"] == 1 else ""
                pk = " <<PK>>" if col["pk"] == 1 else ""
                colName = col["name"].strip()
                colType = col["type"].strip()
                if "link" in col:
                    colName = col["link"]
                colUml = f"{indent} {mandatory}{colName} : {colType}{pk}\n"
                classUml += colUml
            tableName = table["name"]
            if "notes" in table:
                uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
            uml += f"{indent}class {tableName} << Entity >> {{\n{classUml}{indent}}}\n"
        uml += inherit
        if packageName is not None:
            uml += "}\n"
        if withSkin:
            uml += UML.skinparams
        return uml

    def mergeSchema(
        self,
        schemaManager,
        tableList,
        title=None,
        packageName=None,
        generalizeTo=None,
        withSkin=True,
    ):
        """
        merge Schema and tableList to PlantUml notation

        Args:
            schemaManager(SchemaManager): a schema manager to be used
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist

        """
        if schemaManager is not None:
            for table in tableList:
                if "schema" in table:
                    schema = schemaManager.schemasByName[table["schema"]]
                    url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                    url = url.replace(" ", "_")  # mediawiki
                    instanceNote = ""
                    if "instances" in table:
                        instanceNote = "\n%d instances " % (table["instances"])
                    table["notes"] = """[[%s %s]]%s""" % (
                        url,
                        schema.name,
                        instanceNote,
                    )
                    for col in table["columns"]:
                        colName = col["name"]
                        if colName in schema.propsByName:
                            prop = schema.propsByName[colName]
                            if prop.iri is not None:
                                tooltip = ""
                                if prop.definition is not None:
                                    tooltip = "{%s}" % prop.definition
                                col["link"] = "[[%s%s %s]]" % (
                                    prop.iri,
                                    tooltip,
                                    colName,
                                )
                                col["special"] = True  # keep column even if generalized
                    pass
        plantuml = self.tableListToPlantUml(
            tableList,
            title=title,
            packageName=packageName,
            generalizeTo=generalizeTo,
            withSkin=withSkin,
        )
        return plantuml

__init__(debug=False)

Constructor Args: debug(boolean): True if debug information should be shown

Source code in lodstorage/uml.py
 97
 98
 99
100
101
102
103
def __init__(self, debug=False):
    """
    Constructor
    Args:
        debug(boolean): True if debug information should be shown
    """
    self.debug = debug

mergeSchema(schemaManager, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)

merge Schema and tableList to PlantUml notation

Parameters:

Name Type Description Default
schemaManager(SchemaManager)

a schema manager to be used

required
tableList(list)

the tableList list of Dicts from getTableList() to convert

required
title(string)

optional title to be added

required
packageName(string)

optional packageName to be added

required
generalizeTo(string)

optional name of a general table to be derived

required
withSkin(boolean)

if True add default BITPlan skin parameters

required

Returns:

Name Type Description
string

the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def mergeSchema(
    self,
    schemaManager,
    tableList,
    title=None,
    packageName=None,
    generalizeTo=None,
    withSkin=True,
):
    """
    merge Schema and tableList to PlantUml notation

    Args:
        schemaManager(SchemaManager): a schema manager to be used
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist

    """
    if schemaManager is not None:
        for table in tableList:
            if "schema" in table:
                schema = schemaManager.schemasByName[table["schema"]]
                url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                url = url.replace(" ", "_")  # mediawiki
                instanceNote = ""
                if "instances" in table:
                    instanceNote = "\n%d instances " % (table["instances"])
                table["notes"] = """[[%s %s]]%s""" % (
                    url,
                    schema.name,
                    instanceNote,
                )
                for col in table["columns"]:
                    colName = col["name"]
                    if colName in schema.propsByName:
                        prop = schema.propsByName[colName]
                        if prop.iri is not None:
                            tooltip = ""
                            if prop.definition is not None:
                                tooltip = "{%s}" % prop.definition
                            col["link"] = "[[%s%s %s]]" % (
                                prop.iri,
                                tooltip,
                                colName,
                            )
                            col["special"] = True  # keep column even if generalized
                pass
    plantuml = self.tableListToPlantUml(
        tableList,
        title=title,
        packageName=packageName,
        generalizeTo=generalizeTo,
        withSkin=withSkin,
    )
    return plantuml

tableListToPlantUml(tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)

convert tableList to PlantUml notation

Parameters:

Name Type Description Default
tableList(list)

the tableList list of Dicts from getTableList() to convert

required
title(string)

optional title to be added

required
packageName(string)

optional packageName to be added

required
generalizeTo(string)

optional name of a general table to be derived

required
withSkin(boolean)

if True add default BITPlan skin parameters

required

Returns:

Name Type Description
string

the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def tableListToPlantUml(
    self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
):
    """
    convert tableList to PlantUml notation

    Args:
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist
    """
    uml = ""
    indent = ""
    inherit = ""
    if title is not None:
        uml += "title\n%s\nend title\n" % title
    if packageName is not None:
        uml += "package %s {\n" % packageName
        indent = "  "
    if generalizeTo is not None:
        generalTable = Schema.getGeneral(tableList, generalizeTo)
        for table in tableList:
            inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
        tableList.insert(0, generalTable)
    for table in tableList:
        classUml = ""
        sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
        for col in sortedColumns:
            mandatory = "*" if col["notnull"] == 1 else ""
            pk = " <<PK>>" if col["pk"] == 1 else ""
            colName = col["name"].strip()
            colType = col["type"].strip()
            if "link" in col:
                colName = col["link"]
            colUml = f"{indent} {mandatory}{colName} : {colType}{pk}\n"
            classUml += colUml
        tableName = table["name"]
        if "notes" in table:
            uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
        uml += f"{indent}class {tableName} << Entity >> {{\n{classUml}{indent}}}\n"
    uml += inherit
    if packageName is not None:
        uml += "}\n"
    if withSkin:
        uml += UML.skinparams
    return uml

version

Created on 2022-03-06

@author: wf

Version

Bases: object

Version handling for pyLoDStorage

Source code in lodstorage/version.py
10
11
12
13
14
15
16
17
18
19
class Version(object):
    """
    Version handling for pyLoDStorage
    """

    name = "pylodstorage"
    version = lodstorage.__version__
    date = "2020-09-10"
    updated = "2025-12-01"
    description = "python List of Dict (Table) Storage library"

yaml_path

Created on 2025-11-23

@author: wf

YamlPath

provide path to loading configuration or data files by checking: - a provided path or an optional user-specific location (~/.pylodstorage).

Source code in lodstorage/yaml_path.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
class YamlPath:
    """
    provide path to loading configuration or data files by checking:
    - a provided path or an optional user-specific location (~/.pylodstorage).
    """

    @classmethod
    def getSamplePath(cls, yamlFileName: str) -> str:
        """
        Get the path to the sample YAML file usually located in ../sampledata relative to this script.

        Args:
            yamlFileName (str): The name of the YAML file.

        Returns:
            str: The absolute path to the sample file.
        """
        base_dir = os.path.dirname(__file__)
        sample_path = os.path.abspath(
            os.path.join(base_dir, "..", "sampledata", yamlFileName)
        )
        return sample_path

    @classmethod
    def getDefaultPath(cls, yamlFileName: str) -> str:
        """
        Get the path to the YAML file in the default user home location (.pylodstorage).

        Args:
            yamlFileName (str): The name of the YAML file.

        Returns:
            str: The full path to the file in the user's home directory.
        """
        home = str(Path.home())
        default_path = f"{home}/.pylodstorage/{yamlFileName}"
        return default_path

    @classmethod
    def getPaths(
        cls, yamlFileName: str, yamlPath: str = None, with_default: bool = True
    ):
        """
        Get a list of YAML file paths to be used for loading configuration/data.

        Args:
            yamlFileName (str): The name of the YAML file.
            yamlPath (str, optional): The full path to read from. Defaults to None (uses getSamplePath).
            with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

        Returns:
            list: A list of file paths found.
        """
        if yamlPath is None:
            yamlPath = cls.getSamplePath(yamlFileName)

        yamlPaths = [yamlPath]

        if with_default:
            homepath = cls.getDefaultPath(yamlFileName)
            if os.path.isfile(homepath):
                yamlPaths.append(homepath)

        return yamlPaths

getDefaultPath(yamlFileName) classmethod

Get the path to the YAML file in the default user home location (.pylodstorage).

Parameters:

Name Type Description Default
yamlFileName str

The name of the YAML file.

required

Returns:

Name Type Description
str str

The full path to the file in the user's home directory.

Source code in lodstorage/yaml_path.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
@classmethod
def getDefaultPath(cls, yamlFileName: str) -> str:
    """
    Get the path to the YAML file in the default user home location (.pylodstorage).

    Args:
        yamlFileName (str): The name of the YAML file.

    Returns:
        str: The full path to the file in the user's home directory.
    """
    home = str(Path.home())
    default_path = f"{home}/.pylodstorage/{yamlFileName}"
    return default_path

getPaths(yamlFileName, yamlPath=None, with_default=True) classmethod

Get a list of YAML file paths to be used for loading configuration/data.

Parameters:

Name Type Description Default
yamlFileName str

The name of the YAML file.

required
yamlPath str

The full path to read from. Defaults to None (uses getSamplePath).

None
with_default bool

Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

True

Returns:

Name Type Description
list

A list of file paths found.

Source code in lodstorage/yaml_path.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@classmethod
def getPaths(
    cls, yamlFileName: str, yamlPath: str = None, with_default: bool = True
):
    """
    Get a list of YAML file paths to be used for loading configuration/data.

    Args:
        yamlFileName (str): The name of the YAML file.
        yamlPath (str, optional): The full path to read from. Defaults to None (uses getSamplePath).
        with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

    Returns:
        list: A list of file paths found.
    """
    if yamlPath is None:
        yamlPath = cls.getSamplePath(yamlFileName)

    yamlPaths = [yamlPath]

    if with_default:
        homepath = cls.getDefaultPath(yamlFileName)
        if os.path.isfile(homepath):
            yamlPaths.append(homepath)

    return yamlPaths

getSamplePath(yamlFileName) classmethod

Get the path to the sample YAML file usually located in ../sampledata relative to this script.

Parameters:

Name Type Description Default
yamlFileName str

The name of the YAML file.

required

Returns:

Name Type Description
str str

The absolute path to the sample file.

Source code in lodstorage/yaml_path.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
@classmethod
def getSamplePath(cls, yamlFileName: str) -> str:
    """
    Get the path to the sample YAML file usually located in ../sampledata relative to this script.

    Args:
        yamlFileName (str): The name of the YAML file.

    Returns:
        str: The absolute path to the sample file.
    """
    base_dir = os.path.dirname(__file__)
    sample_path = os.path.abspath(
        os.path.join(base_dir, "..", "sampledata", yamlFileName)
    )
    return sample_path