Skip to content

pyLodStorage API Documentation

cache

Created on 2024-03-09

@author: wf

refactored from https://github.com/WolfgangFahl/pyCEURmake/blob/main/ceurws/utils/json_cache.py by Tim Holzheim

Cache

Represents cache metadata and its file extension.

Attributes:

Name Type Description
name str

The name of the cache.

extension str

The file extension for the cache (e.g., 'json', 'csv').

size int

The size of the cache file in bytes.

count Optional[int]

Optional; the number of items in the cache, if applicable.

count_attr str

the name of the attribute to determine the number of items, if applicable

last_accessed datetime

Optional; the last accessed timestamp of the cache.

Source code in lodstorage/cache.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
@lod_storable
class Cache:
    """
    Represents cache metadata and its file extension.

    Attributes:
        name: The name of the cache.
        extension: The file extension for the cache (e.g., 'json', 'csv').
        size: The size of the cache file in bytes.
        count: Optional; the number of items in the cache, if applicable.
        count_attr: the name of the attribute to determine the number of items, if applicable
        last_accessed: Optional; the last accessed timestamp of the cache.
    """

    name: str
    extension: str
    count_attr: str = None
    count: Optional[int] = None

    def set_path(self, base_path: str):
        """
        Set my path based on the given base_path and ensure the parent directory is created.

        Args:
            base_path (str): The base path where the directory should be created.
        """
        self.path = Path(f"{base_path}/{self.name}{self.extension}")
        # Ensure parent directory is created
        self.path.parent.mkdir(parents=True, exist_ok=True)

    @property
    def is_stored(self) -> bool:
        """Determines if the cache file exists and is not empty."""
        return self.path.is_file() and self.path.stat().st_size > 1

    @property
    def size(self) -> int:
        cache_size = os.path.getsize(self.path) if os.path.isfile(self.path) else 0
        return cache_size

    @property
    def last_accessed(self) -> datetime:
        cache_last_accessed = (
            datetime.fromtimestamp(os.path.getmtime(self.path))
            if os.path.isfile(self.path)
            else None
        )
        return cache_last_accessed

is_stored: bool property

Determines if the cache file exists and is not empty.

set_path(base_path)

Set my path based on the given base_path and ensure the parent directory is created.

Parameters:

Name Type Description Default
base_path str

The base path where the directory should be created.

required
Source code in lodstorage/cache.py
40
41
42
43
44
45
46
47
48
49
def set_path(self, base_path: str):
    """
    Set my path based on the given base_path and ensure the parent directory is created.

    Args:
        base_path (str): The base path where the directory should be created.
    """
    self.path = Path(f"{base_path}/{self.name}{self.extension}")
    # Ensure parent directory is created
    self.path.parent.mkdir(parents=True, exist_ok=True)

CacheManager

Manages multiple cache files with various extensions.

Attributes:

Name Type Description
name str

The name used for the base directory where cache files are stored.

caches Dict[str, Cache]

A dictionary to track each cache's metadata.

Source code in lodstorage/cache.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
@lod_storable
class CacheManager:
    """Manages multiple cache files with various extensions.

    Attributes:
        name: The name used for the base directory where cache files are stored.
        caches: A dictionary to track each cache's metadata.
    """

    name: str
    caches: Dict[str, Cache] = field(default_factory=dict)

    def __post_init__(self):
        self.base_dir = None

    def base_path(self) -> str:
        """Fetches the base path for this cache manager.

        Args:
            cache: The cache for which to generate the file path.

        Returns:
            The base path
        """
        if self.base_dir is None:
            self.base_dir = os.path.expanduser("~")
        base_path = os.path.join(self.base_dir, f".{self.name}")
        os.makedirs(base_path, exist_ok=True)
        return base_path

    def get_cache_by_name(self, lod_name, ext=".json") -> Cache:
        """
        Retrieves or creates a cache object by name and extension.

        Args:
            cache_name (str): The name of the cache to retrieve or create.
            ext (str): The file extension for the cache.

        Returns:
            Cache: An existing or newly created Cache object.
        """
        if lod_name in self.caches:
            cache = self.caches[lod_name + ext]
        else:
            cache = Cache(lod_name, ext)
            self.caches[lod_name + ext] = cache
        base_path = self.base_path()
        cache.set_path(base_path)
        return cache

    def load(
        self,
        lod_name: str,
        ext: str = ".json",
        cls: Optional[Type[YamlAble]] = None,
        count_attr: str = None,
    ) -> Union[List, Dict, None]:
        """
        Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

        Args:
            lod_name (str): The name of the list of dicts or class instances to read from cache.
            ext (str): The extension of the cache file, indicating the format (default is ".json").
            cls (Optional[Type[YamlAble]]): The class type for deserialization. This class must have from_json() or from_yaml()
                                             class methods for deserialization, depending on the file extension.
            count_attr(str): the name of attribute data_to_store for updating the cache.count s
        Returns:
            Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.
        """
        cache = self.get_cache_by_name(lod_name, ext)
        cache.count_attr = count_attr
        result = None
        if cache.is_stored:
            if ext == ".json":
                if cls and hasattr(cls, "load_from_yaml_file"):
                    result = cls.load_from_json_file(
                        cache.path
                    )  # Adjusted for class method
                else:
                    with open(cache.path, encoding="utf-8") as json_file:
                        result = orjson.loads(json_file.read())
            elif ext == ".yaml":
                if cls and hasattr(cls, "load_from_yaml_file"):
                    result = cls.load_from_yaml_file(
                        cache.path
                    )  # Adjusted for class method
                else:
                    raise ValueError(
                        "YAML deserialization requires a cls parameter that is a subclass of YamlAble."
                    )
            else:
                raise ValueError(f"Unsupported file extension {ext} for loading.")

            # Dynamic count update based on count_attr if applicable
            if count_attr and hasattr(result, count_attr):
                cache.count = len(getattr(result, count_attr))
            elif isinstance(result, list):
                cache.count = len(result)

        return result

    def store(
        self,
        cache_name: str,
        data_to_store: Union[List, Dict],
        ext: str = ".json",
        count_attr: str = None,
    ) -> Cache:
        """
        Stores data into a cache file, handling serialization based on the specified file extension.
        Supports JSON and YAML formats, and custom serialization for classes that provide specific
        serialization methods.

        Args:
            cache_name (str): The identifier for the cache where the data will be stored.
            data_to_store (Union[List, Dict]): The data to be stored in the cache. This can be a list of dictionaries,
                                               a single dictionary, or instances of data classes if `cls` is provided.
            ext (str): The file extension indicating the serialization format (e.g., '.json', '.yaml').
                       Defaults to '.json'.
            count_attr(str): the name of attribute data_to_store for updating the cache.count s

        Raises:
            ValueError: If the file extension is unsupported or if required methods for serialization are not implemented in `cls`.
        """
        cache = self.get_cache_by_name(cache_name, ext)
        cache.count_attr = count_attr
        cache.set_path(self.base_path())

        if ext == ".json":
            # Check if  cls has a method `save_to_json_file`
            # that accepts a file path and data to store
            if isinstance(data_to_store, list):
                json_str = orjson.dumps(data_to_store, option=orjson.OPT_INDENT_2)
                with cache.path.open("wb") as json_file:
                    json_file.write(json_str)
            else:
                if hasattr(data_to_store, "save_to_json_file"):
                    data_to_store.save_to_json_file(str(cache.path))
                else:
                    raise ValueError(
                        "JSON serialization requires a 'save_to_json_file' method"
                    )
        elif ext == ".yaml":
            if hasattr(data_to_store, "save_to_yaml_file"):
                # Assuming cls has a method `save_to_yaml_file` that accepts a file path and data to store
                data_to_store.save_to_yaml_file(str(cache.path))
            else:
                raise ValueError(
                    "YAML serialization requires a 'save_to_yaml_file' method."
                )
        else:
            raise ValueError(f"Unsupported file extension {ext}.")

        # Update cache metadata post storing
        if count_attr and hasattr(data_to_store, count_attr):
            cache.count = len(getattr(data_to_store, count_attr))
        elif isinstance(data_to_store, list):
            cache.count = len(data_to_store)

        return cache

base_path()

Fetches the base path for this cache manager.

Parameters:

Name Type Description Default
cache

The cache for which to generate the file path.

required

Returns:

Type Description
str

The base path

Source code in lodstorage/cache.py
86
87
88
89
90
91
92
93
94
95
96
97
98
99
def base_path(self) -> str:
    """Fetches the base path for this cache manager.

    Args:
        cache: The cache for which to generate the file path.

    Returns:
        The base path
    """
    if self.base_dir is None:
        self.base_dir = os.path.expanduser("~")
    base_path = os.path.join(self.base_dir, f".{self.name}")
    os.makedirs(base_path, exist_ok=True)
    return base_path

get_cache_by_name(lod_name, ext='.json')

Retrieves or creates a cache object by name and extension.

Parameters:

Name Type Description Default
cache_name str

The name of the cache to retrieve or create.

required
ext str

The file extension for the cache.

'.json'

Returns:

Name Type Description
Cache Cache

An existing or newly created Cache object.

Source code in lodstorage/cache.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
def get_cache_by_name(self, lod_name, ext=".json") -> Cache:
    """
    Retrieves or creates a cache object by name and extension.

    Args:
        cache_name (str): The name of the cache to retrieve or create.
        ext (str): The file extension for the cache.

    Returns:
        Cache: An existing or newly created Cache object.
    """
    if lod_name in self.caches:
        cache = self.caches[lod_name + ext]
    else:
        cache = Cache(lod_name, ext)
        self.caches[lod_name + ext] = cache
    base_path = self.base_path()
    cache.set_path(base_path)
    return cache

load(lod_name, ext='.json', cls=None, count_attr=None)

Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

Parameters:

Name Type Description Default
lod_name str

The name of the list of dicts or class instances to read from cache.

required
ext str

The extension of the cache file, indicating the format (default is ".json").

'.json'
cls Optional[Type[YamlAble]]

The class type for deserialization. This class must have from_json() or from_yaml() class methods for deserialization, depending on the file extension.

None
count_attr(str)

the name of attribute data_to_store for updating the cache.count s

required

Returns: Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.

Source code in lodstorage/cache.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def load(
    self,
    lod_name: str,
    ext: str = ".json",
    cls: Optional[Type[YamlAble]] = None,
    count_attr: str = None,
) -> Union[List, Dict, None]:
    """
    Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

    Args:
        lod_name (str): The name of the list of dicts or class instances to read from cache.
        ext (str): The extension of the cache file, indicating the format (default is ".json").
        cls (Optional[Type[YamlAble]]): The class type for deserialization. This class must have from_json() or from_yaml()
                                         class methods for deserialization, depending on the file extension.
        count_attr(str): the name of attribute data_to_store for updating the cache.count s
    Returns:
        Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.
    """
    cache = self.get_cache_by_name(lod_name, ext)
    cache.count_attr = count_attr
    result = None
    if cache.is_stored:
        if ext == ".json":
            if cls and hasattr(cls, "load_from_yaml_file"):
                result = cls.load_from_json_file(
                    cache.path
                )  # Adjusted for class method
            else:
                with open(cache.path, encoding="utf-8") as json_file:
                    result = orjson.loads(json_file.read())
        elif ext == ".yaml":
            if cls and hasattr(cls, "load_from_yaml_file"):
                result = cls.load_from_yaml_file(
                    cache.path
                )  # Adjusted for class method
            else:
                raise ValueError(
                    "YAML deserialization requires a cls parameter that is a subclass of YamlAble."
                )
        else:
            raise ValueError(f"Unsupported file extension {ext} for loading.")

        # Dynamic count update based on count_attr if applicable
        if count_attr and hasattr(result, count_attr):
            cache.count = len(getattr(result, count_attr))
        elif isinstance(result, list):
            cache.count = len(result)

    return result

store(cache_name, data_to_store, ext='.json', count_attr=None)

Stores data into a cache file, handling serialization based on the specified file extension. Supports JSON and YAML formats, and custom serialization for classes that provide specific serialization methods.

Parameters:

Name Type Description Default
cache_name str

The identifier for the cache where the data will be stored.

required
data_to_store Union[List, Dict]

The data to be stored in the cache. This can be a list of dictionaries, a single dictionary, or instances of data classes if cls is provided.

required
ext str

The file extension indicating the serialization format (e.g., '.json', '.yaml'). Defaults to '.json'.

'.json'
count_attr(str)

the name of attribute data_to_store for updating the cache.count s

required

Raises:

Type Description
ValueError

If the file extension is unsupported or if required methods for serialization are not implemented in cls.

Source code in lodstorage/cache.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
def store(
    self,
    cache_name: str,
    data_to_store: Union[List, Dict],
    ext: str = ".json",
    count_attr: str = None,
) -> Cache:
    """
    Stores data into a cache file, handling serialization based on the specified file extension.
    Supports JSON and YAML formats, and custom serialization for classes that provide specific
    serialization methods.

    Args:
        cache_name (str): The identifier for the cache where the data will be stored.
        data_to_store (Union[List, Dict]): The data to be stored in the cache. This can be a list of dictionaries,
                                           a single dictionary, or instances of data classes if `cls` is provided.
        ext (str): The file extension indicating the serialization format (e.g., '.json', '.yaml').
                   Defaults to '.json'.
        count_attr(str): the name of attribute data_to_store for updating the cache.count s

    Raises:
        ValueError: If the file extension is unsupported or if required methods for serialization are not implemented in `cls`.
    """
    cache = self.get_cache_by_name(cache_name, ext)
    cache.count_attr = count_attr
    cache.set_path(self.base_path())

    if ext == ".json":
        # Check if  cls has a method `save_to_json_file`
        # that accepts a file path and data to store
        if isinstance(data_to_store, list):
            json_str = orjson.dumps(data_to_store, option=orjson.OPT_INDENT_2)
            with cache.path.open("wb") as json_file:
                json_file.write(json_str)
        else:
            if hasattr(data_to_store, "save_to_json_file"):
                data_to_store.save_to_json_file(str(cache.path))
            else:
                raise ValueError(
                    "JSON serialization requires a 'save_to_json_file' method"
                )
    elif ext == ".yaml":
        if hasattr(data_to_store, "save_to_yaml_file"):
            # Assuming cls has a method `save_to_yaml_file` that accepts a file path and data to store
            data_to_store.save_to_yaml_file(str(cache.path))
        else:
            raise ValueError(
                "YAML serialization requires a 'save_to_yaml_file' method."
            )
    else:
        raise ValueError(f"Unsupported file extension {ext}.")

    # Update cache metadata post storing
    if count_attr and hasattr(data_to_store, count_attr):
        cache.count = len(getattr(data_to_store, count_attr))
    elif isinstance(data_to_store, list):
        cache.count = len(data_to_store)

    return cache

docstring_parser

Created on 2024-01-21

@author: wf

DocstringParser

A Python docstring parser.

Source code in lodstorage/docstring_parser.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
class DocstringParser:
    """
    A Python docstring parser.
    """

    def __init__(self):
        # Define basic elements
        identifier = Word(alphas, alphanums + "_")
        type_identifier = Word(alphas, alphanums + "_.[]")
        description = restOfLine

        # Define patterns for capturing attributes
        attribute_start = Suppress(Literal("Attributes:"))
        self.attribute = Group(
            identifier("name")
            + Suppress("(")
            + Optional(type_identifier("type"))
            + Suppress("):")
            + description("description")
        )

        # Define pattern for class docstring
        class_docstring = restOfLine("class_description") + Optional(
            attribute_start + OneOrMore(self.attribute)("attributes")
        )

        # Updated class_docstring pattern to correctly handle multi-line class descriptions
        self.class_docstring = class_docstring + Optional(
            OneOrMore(~attribute_start + restOfLine)("class_description")
            + attribute_start
            + OneOrMore(self.attribute)("attributes")
        )

    def parse(self, docstring: str):
        """
        Parse the given docstring.
        """
        result = self.class_docstring.parseString(docstring, parseAll=True)
        class_description = " ".join(result.class_description).strip()
        attributes = {
            attr.name: {"type": attr.type, "description": attr.description.strip()}
            for attr in result.attributes
        }
        return class_description, attributes

parse(docstring)

Parse the given docstring.

Source code in lodstorage/docstring_parser.py
52
53
54
55
56
57
58
59
60
61
62
def parse(self, docstring: str):
    """
    Parse the given docstring.
    """
    result = self.class_docstring.parseString(docstring, parseAll=True)
    class_description = " ".join(result.class_description).strip()
    attributes = {
        attr.name: {"type": attr.type, "description": attr.description.strip()}
        for attr in result.attributes
    }
    return class_description, attributes

entity

Created on 2020-08-19

@author: wf

EntityManager

Bases: YamlAbleMixin, JsonPickleMixin, JSONAbleList

generic entity manager

Source code in lodstorage/entity.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
class EntityManager(YamlAbleMixin, JsonPickleMixin, JSONAbleList):
    """
    generic entity manager
    """

    def __init__(
        self,
        name,
        entityName,
        entityPluralName: str,
        listName: str = None,
        clazz=None,
        tableName: str = None,
        primaryKey: str = None,
        config=None,
        handleInvalidListTypes=False,
        filterInvalidListTypes=False,
        listSeparator="⇹",
        debug=False,
    ):
        """
        Constructor

        Args:
            name(string): name of this eventManager
            entityName(string): entityType to be managed e.g. Country
            entityPluralName(string): plural of the the entityType e.g. Countries
            config(StorageConfig): the configuration to be used if None a default configuration will be used
            handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
            filterInvalidListTypes(bool): True if invalidListTypes should be deleted
            listSeparator(str): the symbol to use as a list separator
            debug(boolean): override debug setting when default of config is used via config=None
        """
        self.name = name
        self.entityName = entityName
        self.entityPluralName = entityPluralName
        if listName is None:
            listName = entityPluralName
        if tableName is None:
            tableName = entityName
        self.primaryKey = primaryKey
        if config is None:
            config = StorageConfig.getDefault()
            if debug:
                config.debug = debug
        self.config = config
        super(EntityManager, self).__init__(
            listName=listName,
            clazz=clazz,
            tableName=tableName,
            handleInvalidListTypes=handleInvalidListTypes,
            filterInvalidListTypes=filterInvalidListTypes,
        )
        cacheFile = self.getCacheFile(config=config, mode=config.mode)
        self.showProgress(
            "Creating %smanager(%s) for %s using cache %s"
            % (self.entityName, config.mode, self.name, cacheFile)
        )
        if config.mode is StoreMode.SPARQL:
            if config.endpoint is None:
                raise Exception("no endpoint set for mode sparql")
            self.endpoint = config.endpoint
            self.sparql = SPARQL(
                config.endpoint, debug=config.debug, profile=config.profile
            )
        elif config.mode is StoreMode.SQL:
            self.executeMany = False  # may be True when issues are fixed
        self.listSeparator = listSeparator

    def storeMode(self):
        """
        return my store mode
        """
        return self.config.mode

    def showProgress(self, msg):
        """display a progress message

        Args:
          msg(string): the message to display
        """
        if self.config.withShowProgress:
            print(msg, flush=True)

    def getCacheFile(self, config=None, mode=StoreMode.SQL):
        """
        get the cache file for this event manager
        Args:
            config(StorageConfig): if None get the cache for my mode
            mode(StoreMode): the storeMode to use
        """
        if config is None:
            config = self.config
        cachedir = config.getCachePath()
        if config.cacheFile is not None:
            return config.cacheFile
        """ get the path to the file for my cached data """
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            extension = f".{mode.name.lower()}"
            cachepath = f"{cachedir}/{self.name}-{self.listName}{extension}"
        elif mode is StoreMode.SPARQL:
            cachepath = f"SPAQRL {self.name}:{config.endpoint}"
        elif mode is StoreMode.SQL:
            cachepath = f"{cachedir}/{self.name}.db"
        else:
            cachepath = f"undefined cachepath for StoreMode {mode}"
        return cachepath

    def removeCacheFile(self):
        """remove my cache file"""
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            cacheFile = self.getCacheFile(mode=mode)
            if os.path.isfile(cacheFile):
                os.remove(cacheFile)

    def getSQLDB(self, cacheFile):
        """
        get the SQL database for the given cacheFile

        Args:
            cacheFile(string): the file to get the SQL db from
        """
        config = self.config
        sqldb = self.sqldb = SQLDB(
            cacheFile, debug=config.debug, errorDebug=config.errorDebug
        )
        return sqldb

    def initSQLDB(
        self,
        sqldb,
        listOfDicts=None,
        withCreate: bool = True,
        withDrop: bool = True,
        sampleRecordCount=-1,
    ):
        """
        initialize my sql DB

        Args:
            listOfDicts(list): the list of dicts to analyze for type information
            withDrop(boolean): true if the existing Table should be dropped
            withCreate(boolean): true if the create Table command should be executed - false if only the entityInfo should be returned
            sampleRecordCount(int): the number of records to analyze for type information
        Return:
            EntityInfo: the entity information such as CREATE Table command
        """
        if listOfDicts is None:
            listOfDicts = JSONAble.getJsonTypeSamplesForClass(self.clazz)
        entityInfo = sqldb.createTable(
            listOfDicts,
            self.tableName,
            primaryKey=self.primaryKey,
            withCreate=withCreate,
            withDrop=withDrop,
            sampleRecordCount=sampleRecordCount,
        )
        return entityInfo

    def setNone(self, record, fields):
        """
        make sure the given fields in the given record are set to none
        Args:
            record(dict): the record to work on
            fields(list): the list of fields to set to None
        """
        LOD.setNone(record, fields)

    def isCached(self):
        """check whether there is a file containing cached
        data for me"""
        result = False
        config = self.config
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            result = os.path.isfile(self.getCacheFile(config=self.config, mode=mode))
        elif mode is StoreMode.SPARQL:
            # @FIXME - make abstract
            query = (
                config.prefix
                + """
SELECT  ?source (COUNT(?source) AS ?sourcecount)
WHERE { 
   ?event cr:Event_source ?source.
}
GROUP by ?source
"""
            )
            sourceCountList = self.sparql.queryAsListOfDicts(query)
            for sourceCount in sourceCountList:
                source = sourceCount["source"]
                recordCount = sourceCount["sourcecount"]
                if source == self.name and recordCount > 100:
                    result = True
        elif mode is StoreMode.SQL:
            cacheFile = self.getCacheFile(config=self.config, mode=StoreMode.SQL)
            if os.path.isfile(cacheFile):
                sqlQuery = f"SELECT COUNT(*) AS count FROM {self.tableName}"
                try:
                    sqlDB = self.getSQLDB(cacheFile)
                    countResults = sqlDB.query(sqlQuery)
                    countResult = countResults[0]
                    count = countResult["count"]
                    result = count >= 0
                except Exception as ex:
                    msg = str(ex)
                    if self.debug:
                        print(msg, file=sys.stderr)
                        sys.stderr.flush()
                    # e.g. sqlite3.OperationalError: no such table: Event_crossref
                    pass
        else:
            raise Exception("unsupported mode %s" % self.mode)
        return result

    def fromCache(
        self,
        force: bool = False,
        getListOfDicts=None,
        append=False,
        sampleRecordCount=-1,
    ):
        """
        get my entries from the cache or from the callback provided

        Args:
            force(bool): force ignoring the cache
            getListOfDicts(callable): a function to call for getting the data
            append(bool): True if records should be appended
            sampleRecordCount(int): the number of records to analyze for type information

        Returns:
            the list of Dicts and as a side effect setting self.cacheFile
        """
        if not self.isCached() or force:
            startTime = time.time()
            self.showProgress(f"getting {self.entityPluralName} for {self.name} ...")
            if getListOfDicts is None:
                if hasattr(self, "getListOfDicts"):
                    getListOfDicts = self.getListOfDicts
                else:
                    raise Exception(
                        "from Cache failed and no secondary cache via getListOfDicts specified"
                    )
            listOfDicts = getListOfDicts()
            duration = time.time() - startTime
            self.showProgress(
                f"got {len(listOfDicts)} {self.entityPluralName} in {duration:5.1f} s"
            )
            self.cacheFile = self.storeLoD(
                listOfDicts, append=append, sampleRecordCount=sampleRecordCount
            )
            self.setListFromLoD(listOfDicts)
        else:
            # fromStore also sets self.cacheFile
            listOfDicts = self.fromStore()
        return listOfDicts

    def fromStore(self, cacheFile=None, setList: bool = True) -> list:
        """
        restore me from the store
        Args:
            cacheFile(String): the cacheFile to use if None use the pre configured cachefile
            setList(bool): if True set my list with the data from the cache file

        Returns:
            list: list of dicts or JSON entitymanager
        """
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        self.cacheFile = cacheFile
        self.showProgress(
            "reading %s for %s from cache %s"
            % (self.entityPluralName, self.name, cacheFile)
        )
        mode = self.config.mode
        if mode is StoreMode.JSONPICKLE:
            JSONem = JsonPickleMixin.readJsonPickle(cacheFile)
            if self.clazz is not None:
                listOfDicts = JSONem.getLoD()
            else:
                listOfDicts = JSONem.getList()
        elif mode is StoreMode.JSON:
            listOfDicts = self.readLodFromJsonFile(cacheFile)
            pass
        elif mode is StoreMode.SPARQL:
            # @FIXME make abstract
            eventQuery = (
                """
PREFIX cr: <http://cr.bitplan.com/>
SELECT ?eventId ?acronym ?series ?title ?year ?country ?city ?startDate ?endDate ?url ?source WHERE { 
   OPTIONAL { ?event cr:Event_eventId ?eventId. }
   OPTIONAL { ?event cr:Event_acronym ?acronym. }
   OPTIONAL { ?event cr:Event_series ?series. }
   OPTIONAL { ?event cr:Event_title ?title. }
   OPTIONAL { ?event cr:Event_year ?year.  }
   OPTIONAL { ?event cr:Event_country ?country. }
   OPTIONAL { ?event cr:Event_city ?city. }
   OPTIONAL { ?event cr:Event_startDate ?startDate. }
   OPTIONAL { ?event cr:Event_endDate ?endDate. }
   OPTIONAL { ?event cr:Event_url ?url. }
   ?event cr:Event_source ?source FILTER(?source='%s').
}
"""
                % self.name
            )
            listOfDicts = self.sparql.queryAsListOfDicts(eventQuery)
        elif mode is StoreMode.SQL:
            sqlQuery = "SELECT * FROM %s" % self.tableName
            sqlDB = self.getSQLDB(cacheFile)
            listOfDicts = sqlDB.query(sqlQuery)
            sqlDB.close()
            pass
        else:
            raise Exception("unsupported store mode %s" % self.mode)

        self.showProgress(
            "read %d %s from %s in %5.1f s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                time.time() - startTime,
            )
        )
        if setList:
            self.setListFromLoD(listOfDicts)
        return listOfDicts

    def getLoD(self):
        """
        Return the LoD of the entities in the list

        Return:
            list: a list of Dicts

        """
        lod = []
        for entity in self.getList():
            # TODO - optionally filter by samples
            lod.append(entity.__dict__)
        return lod

    def store(
        self,
        limit=10000000,
        batchSize=250,
        append=False,
        fixNone=True,
        sampleRecordCount=-1,
        replace: bool = False,
    ) -> str:
        """
        store my list of dicts

        Args:
            limit(int): maximum number of records to store per batch
            batchSize(int): size of batch for storing
            append(bool): True if records should be appended
            fixNone(bool): if True make sure the dicts are filled with None references for each record
            sampleRecordCount(int): the number of records to analyze for type information
            replace(bool): if True allow replace for insert

        Return:
            str: The cachefile being used
        """
        lod = self.getLoD()
        return self.storeLoD(
            lod,
            limit=limit,
            batchSize=batchSize,
            append=append,
            fixNone=fixNone,
            sampleRecordCount=sampleRecordCount,
            replace=replace,
        )

    def storeLoD(
        self,
        listOfDicts,
        limit=10000000,
        batchSize=250,
        cacheFile=None,
        append=False,
        fixNone=True,
        sampleRecordCount=1,
        replace: bool = False,
    ) -> str:
        """
        store my entities

        Args:
            listOfDicts(list): the list of dicts to store
            limit(int): maximum number of records to store
            batchSize(int): size of batch for storing
            cacheFile(string): the name of the storage e.g path to JSON or sqlite3 file
            append(bool): True if records should be appended
            fixNone(bool): if True make sure the dicts are filled with None references for each record
            sampleRecordCount(int): the number of records to analyze for type information
            replace(bool): if True allow replace for insert
        Return:
            str: The cachefile being used
        """
        config = self.config
        mode = config.mode
        if self.handleInvalidListTypes:
            LOD.handleListTypes(
                lod=listOfDicts,
                doFilter=self.filterInvalidListTypes,
                separator=self.listSeparator,
            )
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            if cacheFile is None:
                cacheFile = self.getCacheFile(config=self.config, mode=mode)
            self.showProgress(
                f"storing {len(listOfDicts)} {self.entityPluralName} for {self.name} to cache {cacheFile}"
            )
            if mode is StoreMode.JSONPICKLE:
                self.writeJsonPickle(cacheFile)
            if mode is StoreMode.JSON:
                self.storeToJsonFile(cacheFile)
                pass
        elif mode is StoreMode.SPARQL:
            startTime = time.time()
            msg = f"storing {len(listOfDicts)} {self.entityPluralName} to {self.config.mode} ({self.config.endpoint})"
            self.showProgress(msg)
            # @ FIXME make abstract /configurable
            entityType = "cr:Event"
            prefixes = self.config.prefix
            self.sparql.insertListOfDicts(
                listOfDicts,
                entityType,
                self.primaryKey,
                prefixes,
                limit=limit,
                batchSize=batchSize,
            )
            self.showProgress(
                "store for %s done after %5.1f secs"
                % (self.name, time.time() - startTime)
            )
        elif mode is StoreMode.SQL:
            startTime = time.time()
            if cacheFile is None:
                cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
            sqldb = self.getSQLDB(cacheFile)
            self.showProgress(
                "storing %d %s for %s to %s:%s"
                % (
                    len(listOfDicts),
                    self.entityPluralName,
                    self.name,
                    config.mode,
                    cacheFile,
                )
            )
            if append:
                withDrop = False
                withCreate = False
            else:
                withDrop = True
                withCreate = True
            entityInfo = self.initSQLDB(
                sqldb,
                listOfDicts,
                withCreate=withCreate,
                withDrop=withDrop,
                sampleRecordCount=sampleRecordCount,
            )
            self.sqldb.store(
                listOfDicts,
                entityInfo,
                executeMany=self.executeMany,
                fixNone=fixNone,
                replace=replace,
            )
            self.showProgress(
                "store for %s done after %5.1f secs"
                % (self.name, time.time() - startTime)
            )
        else:
            raise Exception(f"unsupported store mode {self.mode}")
        return cacheFile

__init__(name, entityName, entityPluralName, listName=None, clazz=None, tableName=None, primaryKey=None, config=None, handleInvalidListTypes=False, filterInvalidListTypes=False, listSeparator='⇹', debug=False)

Constructor

Parameters:

Name Type Description Default
name(string)

name of this eventManager

required
entityName(string)

entityType to be managed e.g. Country

required
entityPluralName(string)

plural of the the entityType e.g. Countries

required
config(StorageConfig)

the configuration to be used if None a default configuration will be used

required
handleInvalidListTypes(bool)

True if invalidListTypes should be converted or filtered

required
filterInvalidListTypes(bool)

True if invalidListTypes should be deleted

required
listSeparator(str)

the symbol to use as a list separator

required
debug(boolean)

override debug setting when default of config is used via config=None

required
Source code in lodstorage/entity.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
def __init__(
    self,
    name,
    entityName,
    entityPluralName: str,
    listName: str = None,
    clazz=None,
    tableName: str = None,
    primaryKey: str = None,
    config=None,
    handleInvalidListTypes=False,
    filterInvalidListTypes=False,
    listSeparator="⇹",
    debug=False,
):
    """
    Constructor

    Args:
        name(string): name of this eventManager
        entityName(string): entityType to be managed e.g. Country
        entityPluralName(string): plural of the the entityType e.g. Countries
        config(StorageConfig): the configuration to be used if None a default configuration will be used
        handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
        filterInvalidListTypes(bool): True if invalidListTypes should be deleted
        listSeparator(str): the symbol to use as a list separator
        debug(boolean): override debug setting when default of config is used via config=None
    """
    self.name = name
    self.entityName = entityName
    self.entityPluralName = entityPluralName
    if listName is None:
        listName = entityPluralName
    if tableName is None:
        tableName = entityName
    self.primaryKey = primaryKey
    if config is None:
        config = StorageConfig.getDefault()
        if debug:
            config.debug = debug
    self.config = config
    super(EntityManager, self).__init__(
        listName=listName,
        clazz=clazz,
        tableName=tableName,
        handleInvalidListTypes=handleInvalidListTypes,
        filterInvalidListTypes=filterInvalidListTypes,
    )
    cacheFile = self.getCacheFile(config=config, mode=config.mode)
    self.showProgress(
        "Creating %smanager(%s) for %s using cache %s"
        % (self.entityName, config.mode, self.name, cacheFile)
    )
    if config.mode is StoreMode.SPARQL:
        if config.endpoint is None:
            raise Exception("no endpoint set for mode sparql")
        self.endpoint = config.endpoint
        self.sparql = SPARQL(
            config.endpoint, debug=config.debug, profile=config.profile
        )
    elif config.mode is StoreMode.SQL:
        self.executeMany = False  # may be True when issues are fixed
    self.listSeparator = listSeparator

fromCache(force=False, getListOfDicts=None, append=False, sampleRecordCount=-1)

get my entries from the cache or from the callback provided

Parameters:

Name Type Description Default
force(bool)

force ignoring the cache

required
getListOfDicts(callable)

a function to call for getting the data

required
append(bool)

True if records should be appended

required
sampleRecordCount(int)

the number of records to analyze for type information

required

Returns:

Type Description

the list of Dicts and as a side effect setting self.cacheFile

Source code in lodstorage/entity.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
def fromCache(
    self,
    force: bool = False,
    getListOfDicts=None,
    append=False,
    sampleRecordCount=-1,
):
    """
    get my entries from the cache or from the callback provided

    Args:
        force(bool): force ignoring the cache
        getListOfDicts(callable): a function to call for getting the data
        append(bool): True if records should be appended
        sampleRecordCount(int): the number of records to analyze for type information

    Returns:
        the list of Dicts and as a side effect setting self.cacheFile
    """
    if not self.isCached() or force:
        startTime = time.time()
        self.showProgress(f"getting {self.entityPluralName} for {self.name} ...")
        if getListOfDicts is None:
            if hasattr(self, "getListOfDicts"):
                getListOfDicts = self.getListOfDicts
            else:
                raise Exception(
                    "from Cache failed and no secondary cache via getListOfDicts specified"
                )
        listOfDicts = getListOfDicts()
        duration = time.time() - startTime
        self.showProgress(
            f"got {len(listOfDicts)} {self.entityPluralName} in {duration:5.1f} s"
        )
        self.cacheFile = self.storeLoD(
            listOfDicts, append=append, sampleRecordCount=sampleRecordCount
        )
        self.setListFromLoD(listOfDicts)
    else:
        # fromStore also sets self.cacheFile
        listOfDicts = self.fromStore()
    return listOfDicts

fromStore(cacheFile=None, setList=True)

restore me from the store Args: cacheFile(String): the cacheFile to use if None use the pre configured cachefile setList(bool): if True set my list with the data from the cache file

Returns:

Name Type Description
list list

list of dicts or JSON entitymanager

Source code in lodstorage/entity.py
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
    def fromStore(self, cacheFile=None, setList: bool = True) -> list:
        """
        restore me from the store
        Args:
            cacheFile(String): the cacheFile to use if None use the pre configured cachefile
            setList(bool): if True set my list with the data from the cache file

        Returns:
            list: list of dicts or JSON entitymanager
        """
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        self.cacheFile = cacheFile
        self.showProgress(
            "reading %s for %s from cache %s"
            % (self.entityPluralName, self.name, cacheFile)
        )
        mode = self.config.mode
        if mode is StoreMode.JSONPICKLE:
            JSONem = JsonPickleMixin.readJsonPickle(cacheFile)
            if self.clazz is not None:
                listOfDicts = JSONem.getLoD()
            else:
                listOfDicts = JSONem.getList()
        elif mode is StoreMode.JSON:
            listOfDicts = self.readLodFromJsonFile(cacheFile)
            pass
        elif mode is StoreMode.SPARQL:
            # @FIXME make abstract
            eventQuery = (
                """
PREFIX cr: <http://cr.bitplan.com/>
SELECT ?eventId ?acronym ?series ?title ?year ?country ?city ?startDate ?endDate ?url ?source WHERE { 
   OPTIONAL { ?event cr:Event_eventId ?eventId. }
   OPTIONAL { ?event cr:Event_acronym ?acronym. }
   OPTIONAL { ?event cr:Event_series ?series. }
   OPTIONAL { ?event cr:Event_title ?title. }
   OPTIONAL { ?event cr:Event_year ?year.  }
   OPTIONAL { ?event cr:Event_country ?country. }
   OPTIONAL { ?event cr:Event_city ?city. }
   OPTIONAL { ?event cr:Event_startDate ?startDate. }
   OPTIONAL { ?event cr:Event_endDate ?endDate. }
   OPTIONAL { ?event cr:Event_url ?url. }
   ?event cr:Event_source ?source FILTER(?source='%s').
}
"""
                % self.name
            )
            listOfDicts = self.sparql.queryAsListOfDicts(eventQuery)
        elif mode is StoreMode.SQL:
            sqlQuery = "SELECT * FROM %s" % self.tableName
            sqlDB = self.getSQLDB(cacheFile)
            listOfDicts = sqlDB.query(sqlQuery)
            sqlDB.close()
            pass
        else:
            raise Exception("unsupported store mode %s" % self.mode)

        self.showProgress(
            "read %d %s from %s in %5.1f s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                time.time() - startTime,
            )
        )
        if setList:
            self.setListFromLoD(listOfDicts)
        return listOfDicts

getCacheFile(config=None, mode=StoreMode.SQL)

get the cache file for this event manager Args: config(StorageConfig): if None get the cache for my mode mode(StoreMode): the storeMode to use

Source code in lodstorage/entity.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def getCacheFile(self, config=None, mode=StoreMode.SQL):
    """
    get the cache file for this event manager
    Args:
        config(StorageConfig): if None get the cache for my mode
        mode(StoreMode): the storeMode to use
    """
    if config is None:
        config = self.config
    cachedir = config.getCachePath()
    if config.cacheFile is not None:
        return config.cacheFile
    """ get the path to the file for my cached data """
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        extension = f".{mode.name.lower()}"
        cachepath = f"{cachedir}/{self.name}-{self.listName}{extension}"
    elif mode is StoreMode.SPARQL:
        cachepath = f"SPAQRL {self.name}:{config.endpoint}"
    elif mode is StoreMode.SQL:
        cachepath = f"{cachedir}/{self.name}.db"
    else:
        cachepath = f"undefined cachepath for StoreMode {mode}"
    return cachepath

getLoD()

Return the LoD of the entities in the list

Return

list: a list of Dicts

Source code in lodstorage/entity.py
350
351
352
353
354
355
356
357
358
359
360
361
362
def getLoD(self):
    """
    Return the LoD of the entities in the list

    Return:
        list: a list of Dicts

    """
    lod = []
    for entity in self.getList():
        # TODO - optionally filter by samples
        lod.append(entity.__dict__)
    return lod

getSQLDB(cacheFile)

get the SQL database for the given cacheFile

Parameters:

Name Type Description Default
cacheFile(string)

the file to get the SQL db from

required
Source code in lodstorage/entity.py
135
136
137
138
139
140
141
142
143
144
145
146
def getSQLDB(self, cacheFile):
    """
    get the SQL database for the given cacheFile

    Args:
        cacheFile(string): the file to get the SQL db from
    """
    config = self.config
    sqldb = self.sqldb = SQLDB(
        cacheFile, debug=config.debug, errorDebug=config.errorDebug
    )
    return sqldb

initSQLDB(sqldb, listOfDicts=None, withCreate=True, withDrop=True, sampleRecordCount=-1)

initialize my sql DB

Parameters:

Name Type Description Default
listOfDicts(list)

the list of dicts to analyze for type information

required
withDrop(boolean)

true if the existing Table should be dropped

required
withCreate(boolean)

true if the create Table command should be executed - false if only the entityInfo should be returned

required
sampleRecordCount(int)

the number of records to analyze for type information

required

Return: EntityInfo: the entity information such as CREATE Table command

Source code in lodstorage/entity.py
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def initSQLDB(
    self,
    sqldb,
    listOfDicts=None,
    withCreate: bool = True,
    withDrop: bool = True,
    sampleRecordCount=-1,
):
    """
    initialize my sql DB

    Args:
        listOfDicts(list): the list of dicts to analyze for type information
        withDrop(boolean): true if the existing Table should be dropped
        withCreate(boolean): true if the create Table command should be executed - false if only the entityInfo should be returned
        sampleRecordCount(int): the number of records to analyze for type information
    Return:
        EntityInfo: the entity information such as CREATE Table command
    """
    if listOfDicts is None:
        listOfDicts = JSONAble.getJsonTypeSamplesForClass(self.clazz)
    entityInfo = sqldb.createTable(
        listOfDicts,
        self.tableName,
        primaryKey=self.primaryKey,
        withCreate=withCreate,
        withDrop=withDrop,
        sampleRecordCount=sampleRecordCount,
    )
    return entityInfo

isCached()

check whether there is a file containing cached data for me

Source code in lodstorage/entity.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
    def isCached(self):
        """check whether there is a file containing cached
        data for me"""
        result = False
        config = self.config
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            result = os.path.isfile(self.getCacheFile(config=self.config, mode=mode))
        elif mode is StoreMode.SPARQL:
            # @FIXME - make abstract
            query = (
                config.prefix
                + """
SELECT  ?source (COUNT(?source) AS ?sourcecount)
WHERE { 
   ?event cr:Event_source ?source.
}
GROUP by ?source
"""
            )
            sourceCountList = self.sparql.queryAsListOfDicts(query)
            for sourceCount in sourceCountList:
                source = sourceCount["source"]
                recordCount = sourceCount["sourcecount"]
                if source == self.name and recordCount > 100:
                    result = True
        elif mode is StoreMode.SQL:
            cacheFile = self.getCacheFile(config=self.config, mode=StoreMode.SQL)
            if os.path.isfile(cacheFile):
                sqlQuery = f"SELECT COUNT(*) AS count FROM {self.tableName}"
                try:
                    sqlDB = self.getSQLDB(cacheFile)
                    countResults = sqlDB.query(sqlQuery)
                    countResult = countResults[0]
                    count = countResult["count"]
                    result = count >= 0
                except Exception as ex:
                    msg = str(ex)
                    if self.debug:
                        print(msg, file=sys.stderr)
                        sys.stderr.flush()
                    # e.g. sqlite3.OperationalError: no such table: Event_crossref
                    pass
        else:
            raise Exception("unsupported mode %s" % self.mode)
        return result

removeCacheFile()

remove my cache file

Source code in lodstorage/entity.py
127
128
129
130
131
132
133
def removeCacheFile(self):
    """remove my cache file"""
    mode = self.config.mode
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        cacheFile = self.getCacheFile(mode=mode)
        if os.path.isfile(cacheFile):
            os.remove(cacheFile)

setNone(record, fields)

make sure the given fields in the given record are set to none Args: record(dict): the record to work on fields(list): the list of fields to set to None

Source code in lodstorage/entity.py
179
180
181
182
183
184
185
186
def setNone(self, record, fields):
    """
    make sure the given fields in the given record are set to none
    Args:
        record(dict): the record to work on
        fields(list): the list of fields to set to None
    """
    LOD.setNone(record, fields)

showProgress(msg)

display a progress message

Parameters:

Name Type Description Default
msg(string)

the message to display

required
Source code in lodstorage/entity.py
 94
 95
 96
 97
 98
 99
100
101
def showProgress(self, msg):
    """display a progress message

    Args:
      msg(string): the message to display
    """
    if self.config.withShowProgress:
        print(msg, flush=True)

store(limit=10000000, batchSize=250, append=False, fixNone=True, sampleRecordCount=-1, replace=False)

store my list of dicts

Parameters:

Name Type Description Default
limit(int)

maximum number of records to store per batch

required
batchSize(int)

size of batch for storing

required
append(bool)

True if records should be appended

required
fixNone(bool)

if True make sure the dicts are filled with None references for each record

required
sampleRecordCount(int)

the number of records to analyze for type information

required
replace(bool)

if True allow replace for insert

required
Return

str: The cachefile being used

Source code in lodstorage/entity.py
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
def store(
    self,
    limit=10000000,
    batchSize=250,
    append=False,
    fixNone=True,
    sampleRecordCount=-1,
    replace: bool = False,
) -> str:
    """
    store my list of dicts

    Args:
        limit(int): maximum number of records to store per batch
        batchSize(int): size of batch for storing
        append(bool): True if records should be appended
        fixNone(bool): if True make sure the dicts are filled with None references for each record
        sampleRecordCount(int): the number of records to analyze for type information
        replace(bool): if True allow replace for insert

    Return:
        str: The cachefile being used
    """
    lod = self.getLoD()
    return self.storeLoD(
        lod,
        limit=limit,
        batchSize=batchSize,
        append=append,
        fixNone=fixNone,
        sampleRecordCount=sampleRecordCount,
        replace=replace,
    )

storeLoD(listOfDicts, limit=10000000, batchSize=250, cacheFile=None, append=False, fixNone=True, sampleRecordCount=1, replace=False)

store my entities

Parameters:

Name Type Description Default
listOfDicts(list)

the list of dicts to store

required
limit(int)

maximum number of records to store

required
batchSize(int)

size of batch for storing

required
cacheFile(string)

the name of the storage e.g path to JSON or sqlite3 file

required
append(bool)

True if records should be appended

required
fixNone(bool)

if True make sure the dicts are filled with None references for each record

required
sampleRecordCount(int)

the number of records to analyze for type information

required
replace(bool)

if True allow replace for insert

required

Return: str: The cachefile being used

Source code in lodstorage/entity.py
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
def storeLoD(
    self,
    listOfDicts,
    limit=10000000,
    batchSize=250,
    cacheFile=None,
    append=False,
    fixNone=True,
    sampleRecordCount=1,
    replace: bool = False,
) -> str:
    """
    store my entities

    Args:
        listOfDicts(list): the list of dicts to store
        limit(int): maximum number of records to store
        batchSize(int): size of batch for storing
        cacheFile(string): the name of the storage e.g path to JSON or sqlite3 file
        append(bool): True if records should be appended
        fixNone(bool): if True make sure the dicts are filled with None references for each record
        sampleRecordCount(int): the number of records to analyze for type information
        replace(bool): if True allow replace for insert
    Return:
        str: The cachefile being used
    """
    config = self.config
    mode = config.mode
    if self.handleInvalidListTypes:
        LOD.handleListTypes(
            lod=listOfDicts,
            doFilter=self.filterInvalidListTypes,
            separator=self.listSeparator,
        )
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=mode)
        self.showProgress(
            f"storing {len(listOfDicts)} {self.entityPluralName} for {self.name} to cache {cacheFile}"
        )
        if mode is StoreMode.JSONPICKLE:
            self.writeJsonPickle(cacheFile)
        if mode is StoreMode.JSON:
            self.storeToJsonFile(cacheFile)
            pass
    elif mode is StoreMode.SPARQL:
        startTime = time.time()
        msg = f"storing {len(listOfDicts)} {self.entityPluralName} to {self.config.mode} ({self.config.endpoint})"
        self.showProgress(msg)
        # @ FIXME make abstract /configurable
        entityType = "cr:Event"
        prefixes = self.config.prefix
        self.sparql.insertListOfDicts(
            listOfDicts,
            entityType,
            self.primaryKey,
            prefixes,
            limit=limit,
            batchSize=batchSize,
        )
        self.showProgress(
            "store for %s done after %5.1f secs"
            % (self.name, time.time() - startTime)
        )
    elif mode is StoreMode.SQL:
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        sqldb = self.getSQLDB(cacheFile)
        self.showProgress(
            "storing %d %s for %s to %s:%s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                config.mode,
                cacheFile,
            )
        )
        if append:
            withDrop = False
            withCreate = False
        else:
            withDrop = True
            withCreate = True
        entityInfo = self.initSQLDB(
            sqldb,
            listOfDicts,
            withCreate=withCreate,
            withDrop=withDrop,
            sampleRecordCount=sampleRecordCount,
        )
        self.sqldb.store(
            listOfDicts,
            entityInfo,
            executeMany=self.executeMany,
            fixNone=fixNone,
            replace=replace,
        )
        self.showProgress(
            "store for %s done after %5.1f secs"
            % (self.name, time.time() - startTime)
        )
    else:
        raise Exception(f"unsupported store mode {self.mode}")
    return cacheFile

storeMode()

return my store mode

Source code in lodstorage/entity.py
88
89
90
91
92
def storeMode(self):
    """
    return my store mode
    """
    return self.config.mode

jsonable

This module has a class JSONAble for serialization of tables/list of dicts to and from JSON encoding

Created on 2020-09-03

@author: wf

JSONAble

Bases: object

mixin to allow classes to be JSON serializable see

  • https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable
Source code in lodstorage/jsonable.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
class JSONAble(object):
    """
    mixin to allow classes to be JSON serializable see

    - https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable

    """

    def __init__(self):
        """
        Constructor
        """

    @classmethod
    def getPluralname(cls):
        return "%ss" % cls.__name__

    @staticmethod
    def singleQuoteToDoubleQuote(singleQuoted, useRegex=False):
        """
        convert a single quoted string to a double quoted one

        Args:
            singleQuoted (str): a single quoted string e.g.

                .. highlight:: json

                {'cities': [{'name': "Upper Hell's Gate"}]}

            useRegex (boolean): True if a regular expression shall be used for matching

        Returns:
            string: the double quoted version of the string

        Note:
            see
            - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

        """
        if useRegex:
            doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted)
        else:
            doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop(
                singleQuoted
            )
        return doubleQuoted

    @staticmethod
    def singleQuoteToDoubleQuoteUsingRegex(singleQuoted):
        """
        convert a single quoted string to a double quoted one using a regular expression

        Args:
            singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
            useRegex(boolean): True if a regular expression shall be used for matching
        Returns:
            string: the double quoted version of the string e.g.
        Note:
            see https://stackoverflow.com/a/50257217/1497139
        """
        doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted)
        return doubleQuoted

    @staticmethod
    def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted):
        """
        convert a single quoted string to a double quoted one using a regular expression

        Args:
            singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
            useRegex(boolean): True if a regular expression shall be used for matching
        Returns:
            string: the double quoted version of the string e.g.
        Note:
            see https://stackoverflow.com/a/63862387/1497139

        """
        cList = list(singleQuoted)
        inDouble = False
        inSingle = False
        for i, c in enumerate(cList):
            # print ("%d:%s %r %r" %(i,c,inSingle,inDouble))
            if c == "'":
                if not inDouble:
                    inSingle = not inSingle
                    cList[i] = '"'
            elif c == '"':
                inDouble = not inDouble
                inSingle = False
        doubleQuoted = "".join(cList)
        return doubleQuoted

    def getJsonTypeSamples(self):
        """
        does my class provide a "getSamples" method?
        """
        if hasattr(self, "__class__"):
            cls = self.__class__
            if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"):
                cls = self.clazz
            return JSONAble.getJsonTypeSamplesForClass(cls)
        return None

    @staticmethod
    def getJsonTypeSamplesForClass(cls):
        """
        return the type samples for the given class

        Return:
            list: a list of dict that specify the types by example
        """
        if hasattr(cls, "getSamples"):
            getSamples = getattr(cls, "getSamples")
            if callable(getSamples):
                return getSamples()
        return None

    @staticmethod
    def readJsonFromFile(jsonFilePath):
        """
        read json string from the given jsonFilePath

        Args:
            jsonFilePath(string): the path of the file where to read the result from

        Returns:
            the JSON string read from the file
        """
        with open(jsonFilePath, "r") as jsonFile:
            jsonStr = jsonFile.read()
        return jsonStr

    @staticmethod
    def storeJsonToFile(jsonStr, jsonFilePath):
        """
        store the given json string to the given jsonFilePath

        Args:
            jsonStr(string): the string to store
            jsonFilePath(string): the path of the file where to store the result

        """
        with open(jsonFilePath, "w") as jsonFile:
            jsonFile.write(jsonStr)

    def checkExtension(self, jsonFile: str, extension: str = ".json") -> str:
        """
        make sure the jsonFile has the given extension e.g. ".json"

        Args:
            jsonFile(str): the jsonFile name - potentially without ".json" suffix

        Returns:
            str: the jsonFile name with ".json" as an extension guaranteed
        """
        if not jsonFile.endswith(extension):
            jsonFile = f"{jsonFile}{extension}"
        return jsonFile

    def storeToJsonFile(
        self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False
    ):
        """
        store me to the given jsonFile

        Args:
            jsonFile(str): the JSON file name (optionally without extension)
            exension(str): the extension to use if not part of the jsonFile name
            limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
        """
        jsonFile = self.checkExtension(jsonFile, extension)
        JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)

    def restoreFromJsonFile(self, jsonFile: str):
        """
        restore me from the given jsonFile

        Args:
            jsonFile(string): the jsonFile to restore me from
        """
        jsonFile = self.checkExtension(jsonFile)
        jsonStr = JSONAble.readJsonFromFile(jsonFile)
        self.fromJson(jsonStr)

    def fromJson(self, jsonStr):
        """
        initialize me from the given JSON string

        Args:
            jsonStr(str): the JSON string
        """
        jsonMap = json.loads(jsonStr)
        self.fromDict(jsonMap)

    def fromDict(self, data: dict):
        """
        initialize me from the given data

        Args:
            data(dict): the dictionary to initialize me from
        """
        # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o
        for key in data.keys():
            value = data[key]
            setattr(self, key, value)

    def toJsonAbleValue(self, v):
        """
        return the JSON able value of the given value v
        Args:
            v(object): the value to convert
        """
        # objects have __dict__ hash tables which can be JSON-converted
        if hasattr(v, "__dict__"):
            return v.__dict__
        elif isinstance(v, datetime.datetime):
            return v.isoformat()
        elif isinstance(v, datetime.date):
            return v.isoformat()
        else:
            return ""

    def toJSON(self, limitToSampleFields: bool = False):
        """

        Args:
            limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

        Returns:
            a recursive JSON dump of the dicts of my objects
        """
        data = {}
        if limitToSampleFields:
            samples = self.getJsonTypeSamples()
            sampleFields = LOD.getFields(samples)
            if isinstance(self, JSONAbleList):
                limitedRecords = []
                for record in self.__dict__[self.listName]:
                    limitedRecord = {}
                    for key, value in record.__dict__.items():
                        if key in sampleFields:
                            limitedRecord[key] = value
                    limitedRecords.append(limitedRecord)
                data[self.listName] = limitedRecords
            else:
                for key, value in self.__dict__.items():
                    if key in sampleFields:
                        data[key] = value
        else:
            data = self
        jsonStr = json.dumps(
            data,
            default=lambda v: self.toJsonAbleValue(v),
            sort_keys=True,
            indent=JSONAbleSettings.indent,
        )
        return jsonStr

    def getJSONValue(self, v):
        """
        get the value of the given v as JSON

        Args:
            v(object): the value to get

        Returns:
            the the value making sure objects are return as dicts
        """
        if hasattr(v, "asJSON"):
            return v.asJSON(asString=False)
        elif type(v) is dict:
            return self.reprDict(v)
        elif type(v) is list:
            vlist = []
            for vitem in v:
                vlist.append(self.getJSONValue(vitem))
            return vlist
        elif isinstance(v, datetime.datetime):
            return v.isoformat()
        elif isinstance(v, datetime.date):
            return v.isoformat()
        elif isinstance(v, bool):
            # convert True,False to -> true,false
            return str(v).lower()
        else:
            return v

    def reprDict(self, srcDict):
        """
        get the given srcDict as new dict with fields being converted with getJSONValue

        Args:
            scrcDict(dict): the source dictionary

        Returns
            dict: the converted dictionary
        """
        d = dict()
        for a, v in srcDict.items():
            d[a] = self.getJSONValue(v)
        return d

    def asJSON(self, asString=True, data=None):
        """
        recursively return my dict elements

        Args:
            asString(boolean): if True return my result as a string
        """
        if data is None:
            data = self.__dict__
        jsonDict = self.reprDict(data)
        if asString:
            jsonStr = str(jsonDict)
            jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr)
            return jsonStr
        return jsonDict

__init__()

Constructor

Source code in lodstorage/jsonable.py
37
38
39
40
def __init__(self):
    """
    Constructor
    """

asJSON(asString=True, data=None)

recursively return my dict elements

Parameters:

Name Type Description Default
asString(boolean)

if True return my result as a string

required
Source code in lodstorage/jsonable.py
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
def asJSON(self, asString=True, data=None):
    """
    recursively return my dict elements

    Args:
        asString(boolean): if True return my result as a string
    """
    if data is None:
        data = self.__dict__
    jsonDict = self.reprDict(data)
    if asString:
        jsonStr = str(jsonDict)
        jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr)
        return jsonStr
    return jsonDict

checkExtension(jsonFile, extension='.json')

make sure the jsonFile has the given extension e.g. ".json"

Parameters:

Name Type Description Default
jsonFile(str)

the jsonFile name - potentially without ".json" suffix

required

Returns:

Name Type Description
str str

the jsonFile name with ".json" as an extension guaranteed

Source code in lodstorage/jsonable.py
174
175
176
177
178
179
180
181
182
183
184
185
186
def checkExtension(self, jsonFile: str, extension: str = ".json") -> str:
    """
    make sure the jsonFile has the given extension e.g. ".json"

    Args:
        jsonFile(str): the jsonFile name - potentially without ".json" suffix

    Returns:
        str: the jsonFile name with ".json" as an extension guaranteed
    """
    if not jsonFile.endswith(extension):
        jsonFile = f"{jsonFile}{extension}"
    return jsonFile

fromDict(data)

initialize me from the given data

Parameters:

Name Type Description Default
data(dict)

the dictionary to initialize me from

required
Source code in lodstorage/jsonable.py
223
224
225
226
227
228
229
230
231
232
233
def fromDict(self, data: dict):
    """
    initialize me from the given data

    Args:
        data(dict): the dictionary to initialize me from
    """
    # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o
    for key in data.keys():
        value = data[key]
        setattr(self, key, value)

fromJson(jsonStr)

initialize me from the given JSON string

Parameters:

Name Type Description Default
jsonStr(str)

the JSON string

required
Source code in lodstorage/jsonable.py
213
214
215
216
217
218
219
220
221
def fromJson(self, jsonStr):
    """
    initialize me from the given JSON string

    Args:
        jsonStr(str): the JSON string
    """
    jsonMap = json.loads(jsonStr)
    self.fromDict(jsonMap)

getJSONValue(v)

get the value of the given v as JSON

Parameters:

Name Type Description Default
v(object)

the value to get

required

Returns:

Type Description

the the value making sure objects are return as dicts

Source code in lodstorage/jsonable.py
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
def getJSONValue(self, v):
    """
    get the value of the given v as JSON

    Args:
        v(object): the value to get

    Returns:
        the the value making sure objects are return as dicts
    """
    if hasattr(v, "asJSON"):
        return v.asJSON(asString=False)
    elif type(v) is dict:
        return self.reprDict(v)
    elif type(v) is list:
        vlist = []
        for vitem in v:
            vlist.append(self.getJSONValue(vitem))
        return vlist
    elif isinstance(v, datetime.datetime):
        return v.isoformat()
    elif isinstance(v, datetime.date):
        return v.isoformat()
    elif isinstance(v, bool):
        # convert True,False to -> true,false
        return str(v).lower()
    else:
        return v

getJsonTypeSamples()

does my class provide a "getSamples" method?

Source code in lodstorage/jsonable.py
121
122
123
124
125
126
127
128
129
130
def getJsonTypeSamples(self):
    """
    does my class provide a "getSamples" method?
    """
    if hasattr(self, "__class__"):
        cls = self.__class__
        if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"):
            cls = self.clazz
        return JSONAble.getJsonTypeSamplesForClass(cls)
    return None

getJsonTypeSamplesForClass() staticmethod

return the type samples for the given class

Return

list: a list of dict that specify the types by example

Source code in lodstorage/jsonable.py
132
133
134
135
136
137
138
139
140
141
142
143
144
@staticmethod
def getJsonTypeSamplesForClass(cls):
    """
    return the type samples for the given class

    Return:
        list: a list of dict that specify the types by example
    """
    if hasattr(cls, "getSamples"):
        getSamples = getattr(cls, "getSamples")
        if callable(getSamples):
            return getSamples()
    return None

readJsonFromFile(jsonFilePath) staticmethod

read json string from the given jsonFilePath

Parameters:

Name Type Description Default
jsonFilePath(string)

the path of the file where to read the result from

required

Returns:

Type Description

the JSON string read from the file

Source code in lodstorage/jsonable.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
@staticmethod
def readJsonFromFile(jsonFilePath):
    """
    read json string from the given jsonFilePath

    Args:
        jsonFilePath(string): the path of the file where to read the result from

    Returns:
        the JSON string read from the file
    """
    with open(jsonFilePath, "r") as jsonFile:
        jsonStr = jsonFile.read()
    return jsonStr

reprDict(srcDict)

get the given srcDict as new dict with fields being converted with getJSONValue

Parameters:

Name Type Description Default
scrcDict(dict)

the source dictionary

required

Returns dict: the converted dictionary

Source code in lodstorage/jsonable.py
316
317
318
319
320
321
322
323
324
325
326
327
328
329
def reprDict(self, srcDict):
    """
    get the given srcDict as new dict with fields being converted with getJSONValue

    Args:
        scrcDict(dict): the source dictionary

    Returns
        dict: the converted dictionary
    """
    d = dict()
    for a, v in srcDict.items():
        d[a] = self.getJSONValue(v)
    return d

restoreFromJsonFile(jsonFile)

restore me from the given jsonFile

Parameters:

Name Type Description Default
jsonFile(string)

the jsonFile to restore me from

required
Source code in lodstorage/jsonable.py
202
203
204
205
206
207
208
209
210
211
def restoreFromJsonFile(self, jsonFile: str):
    """
    restore me from the given jsonFile

    Args:
        jsonFile(string): the jsonFile to restore me from
    """
    jsonFile = self.checkExtension(jsonFile)
    jsonStr = JSONAble.readJsonFromFile(jsonFile)
    self.fromJson(jsonStr)

singleQuoteToDoubleQuote(singleQuoted, useRegex=False) staticmethod

convert a single quoted string to a double quoted one

Parameters:

Name Type Description Default
singleQuoted str

a single quoted string e.g.

.. highlight:: json

{'cities': [{'name': "Upper Hell's Gate"}]}

required
useRegex boolean

True if a regular expression shall be used for matching

False

Returns:

Name Type Description
string

the double quoted version of the string

Note

see - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

Source code in lodstorage/jsonable.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
@staticmethod
def singleQuoteToDoubleQuote(singleQuoted, useRegex=False):
    """
    convert a single quoted string to a double quoted one

    Args:
        singleQuoted (str): a single quoted string e.g.

            .. highlight:: json

            {'cities': [{'name': "Upper Hell's Gate"}]}

        useRegex (boolean): True if a regular expression shall be used for matching

    Returns:
        string: the double quoted version of the string

    Note:
        see
        - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

    """
    if useRegex:
        doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted)
    else:
        doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop(
            singleQuoted
        )
    return doubleQuoted

singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted) staticmethod

convert a single quoted string to a double quoted one using a regular expression

Parameters:

Name Type Description Default
singleQuoted(string)

a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}

required
useRegex(boolean)

True if a regular expression shall be used for matching

required

Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/63862387/1497139

Source code in lodstorage/jsonable.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
@staticmethod
def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted):
    """
    convert a single quoted string to a double quoted one using a regular expression

    Args:
        singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
        useRegex(boolean): True if a regular expression shall be used for matching
    Returns:
        string: the double quoted version of the string e.g.
    Note:
        see https://stackoverflow.com/a/63862387/1497139

    """
    cList = list(singleQuoted)
    inDouble = False
    inSingle = False
    for i, c in enumerate(cList):
        # print ("%d:%s %r %r" %(i,c,inSingle,inDouble))
        if c == "'":
            if not inDouble:
                inSingle = not inSingle
                cList[i] = '"'
        elif c == '"':
            inDouble = not inDouble
            inSingle = False
    doubleQuoted = "".join(cList)
    return doubleQuoted

singleQuoteToDoubleQuoteUsingRegex(singleQuoted) staticmethod

convert a single quoted string to a double quoted one using a regular expression

Parameters:

Name Type Description Default
singleQuoted(string)

a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}

required
useRegex(boolean)

True if a regular expression shall be used for matching

required

Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/50257217/1497139

Source code in lodstorage/jsonable.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
@staticmethod
def singleQuoteToDoubleQuoteUsingRegex(singleQuoted):
    """
    convert a single quoted string to a double quoted one using a regular expression

    Args:
        singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
        useRegex(boolean): True if a regular expression shall be used for matching
    Returns:
        string: the double quoted version of the string e.g.
    Note:
        see https://stackoverflow.com/a/50257217/1497139
    """
    doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted)
    return doubleQuoted

storeJsonToFile(jsonStr, jsonFilePath) staticmethod

store the given json string to the given jsonFilePath

Parameters:

Name Type Description Default
jsonStr(string)

the string to store

required
jsonFilePath(string)

the path of the file where to store the result

required
Source code in lodstorage/jsonable.py
161
162
163
164
165
166
167
168
169
170
171
172
@staticmethod
def storeJsonToFile(jsonStr, jsonFilePath):
    """
    store the given json string to the given jsonFilePath

    Args:
        jsonStr(string): the string to store
        jsonFilePath(string): the path of the file where to store the result

    """
    with open(jsonFilePath, "w") as jsonFile:
        jsonFile.write(jsonStr)

storeToJsonFile(jsonFile, extension='.json', limitToSampleFields=False)

store me to the given jsonFile

Parameters:

Name Type Description Default
jsonFile(str)

the JSON file name (optionally without extension)

required
exension(str)

the extension to use if not part of the jsonFile name

required
limitToSampleFields(bool)

If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

required
Source code in lodstorage/jsonable.py
188
189
190
191
192
193
194
195
196
197
198
199
200
def storeToJsonFile(
    self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False
):
    """
    store me to the given jsonFile

    Args:
        jsonFile(str): the JSON file name (optionally without extension)
        exension(str): the extension to use if not part of the jsonFile name
        limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
    """
    jsonFile = self.checkExtension(jsonFile, extension)
    JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)

toJSON(limitToSampleFields=False)

Parameters:

Name Type Description Default
limitToSampleFields(bool)

If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

required

Returns:

Type Description

a recursive JSON dump of the dicts of my objects

Source code in lodstorage/jsonable.py
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
def toJSON(self, limitToSampleFields: bool = False):
    """

    Args:
        limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

    Returns:
        a recursive JSON dump of the dicts of my objects
    """
    data = {}
    if limitToSampleFields:
        samples = self.getJsonTypeSamples()
        sampleFields = LOD.getFields(samples)
        if isinstance(self, JSONAbleList):
            limitedRecords = []
            for record in self.__dict__[self.listName]:
                limitedRecord = {}
                for key, value in record.__dict__.items():
                    if key in sampleFields:
                        limitedRecord[key] = value
                limitedRecords.append(limitedRecord)
            data[self.listName] = limitedRecords
        else:
            for key, value in self.__dict__.items():
                if key in sampleFields:
                    data[key] = value
    else:
        data = self
    jsonStr = json.dumps(
        data,
        default=lambda v: self.toJsonAbleValue(v),
        sort_keys=True,
        indent=JSONAbleSettings.indent,
    )
    return jsonStr

toJsonAbleValue(v)

return the JSON able value of the given value v Args: v(object): the value to convert

Source code in lodstorage/jsonable.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def toJsonAbleValue(self, v):
    """
    return the JSON able value of the given value v
    Args:
        v(object): the value to convert
    """
    # objects have __dict__ hash tables which can be JSON-converted
    if hasattr(v, "__dict__"):
        return v.__dict__
    elif isinstance(v, datetime.datetime):
        return v.isoformat()
    elif isinstance(v, datetime.date):
        return v.isoformat()
    else:
        return ""

JSONAbleList

Bases: JSONAble

Container class

Source code in lodstorage/jsonable.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
class JSONAbleList(JSONAble):
    """
    Container class
    """

    def __init__(
        self,
        listName: str = None,
        clazz=None,
        tableName: str = None,
        initList: bool = True,
        handleInvalidListTypes=False,
        filterInvalidListTypes=False,
    ):
        """
        Constructor

        Args:
            listName(str): the name of the list attribute to be used for storing the List
            clazz(class): a class to be used for Object relational mapping (if any)
            tableName(str): the name of the "table" to be used
            initList(bool): True if the list should be initialized
            handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
            filterInvalidListTypes(bool): True if invalidListTypes should be deleted
        """
        self.clazz = clazz
        self.handleInvalidListTypes = handleInvalidListTypes
        self.filterInvalidListTypes = filterInvalidListTypes
        if listName is None:
            if self.clazz is not None:
                listName = self.clazz.getPluralname()
            else:
                listName = self.__class__.name.lower()
        self.listName = listName
        if tableName is None:
            self.tableName = listName
        else:
            self.tableName = tableName
        if initList:
            self.__dict__[self.listName] = []

    def getList(self):
        """
        get my list
        """
        return self.__dict__[self.listName]

    def setListFromLoD(self, lod: list) -> list:
        """
        set my list from the given list of dicts

        Args:
            lod(list) a raw record list of dicts

        Returns:
            list: a list of dicts if no clazz is set
                otherwise a list of objects
        """
        # non OO mode
        if self.clazz is None:
            result = lod
            self.__dict__[self.listName] = result
        else:
            # ORM mode
            # TODO - handle errors
            self.fromLoD(lod, append=False)
        return self.getList()

    def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None):
        """
        get a list of Dicts form the given JSON String

        Args:
            jsonStr(str): the JSON string
            fixType(Types): the types to be fixed
        Returns:
            list: a list of dicts
        """
        # read a data structe from the given JSON string
        lodOrDict = json.loads(jsonStr)
        # it should be a list only of dict with my list
        if not isinstance(lodOrDict, dict) and listName is not None:
            lod = lodOrDict
        else:
            if self.listName in lodOrDict:
                # get the relevant list of dicts
                lod = lodOrDict[self.listName]
            else:
                msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead"
                raise Exception(msg)
        if types is not None:
            types.fixTypes(lod, self.listName)
        return lod

    def fromLoD(self, lod, append: bool = True, debug: bool = False):
        """
        load my entityList from the given list of dicts

        Args:
            lod(list): the list of dicts to load
            append(bool): if True append to my existing entries

        Return:
            list: a list of errors (if any)

        """
        errors = []
        entityList = self.getList()
        if not append:
            del entityList[:]
        if self.handleInvalidListTypes:
            LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)

        for record in lod:
            # call the constructor to get a new instance
            try:
                entity = self.clazz()
                entity.fromDict(record)
                entityList.append(entity)
            except Exception as ex:
                error = {self.listName: record, "error": ex}
                errors.append(error)
                if debug:
                    print(error)
        return errors

    def getLookup(self, attrName: str, withDuplicates: bool = False):
        """
        create a lookup dictionary by the given attribute name

        Args:
            attrName(str): the attribute to lookup
            withDuplicates(bool): whether to retain single values or lists

        Return:
            a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
        """
        return LOD.getLookup(self.getList(), attrName, withDuplicates)

    def getJsonData(self):
        """
        get my Jsondata
        """
        jsonData = {self.listName: self.__dict__[self.listName]}
        return jsonData

    def toJsonAbleValue(self, v):
        """
        make sure we don't store our meta information
        clazz, tableName and listName but just the list we are holding
        """
        if v == self:
            return self.getJsonData()
        else:
            return super().toJsonAbleValue(v)

    def fromJson(self, jsonStr, types=None):
        """
        initialize me from the given JSON string

        Args:
            jsonStr(str): the JSON string
            fixType(Types): the types to be fixed
        """
        lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
        self.setListFromLoD(lod)

    def asJSON(self, asString=True):
        jsonData = self.getJsonData()
        return super().asJSON(asString, data=jsonData)

    def restoreFromJsonFile(self, jsonFile: str) -> list:
        """
        read my list of dicts and restore it
        """
        lod = self.readLodFromJsonFile(jsonFile)
        return self.setListFromLoD(lod)

    def restoreFromJsonStr(self, jsonStr: str) -> list:
        """
        restore me from the given jsonStr

        Args:
            jsonStr(str): the json string to restore me from
        """
        lod = self.readLodFromJsonStr(jsonStr)
        return self.setListFromLoD(lod)

    def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"):
        """
        read the list of dicts from the given jsonFile

        Args:
            jsonFile(string): the jsonFile to read from

        Returns:
            list: a list of dicts
        """
        jsonFile = self.checkExtension(jsonFile, extension)
        jsonStr = JSONAble.readJsonFromFile(jsonFile)
        lod = self.readLodFromJsonStr(jsonStr)
        return lod

    def readLodFromJsonStr(self, jsonStr) -> list:
        """
        restore me from the given jsonStr

        Args:
            storeFilePrefix(string): the prefix for the JSON file name
        """
        if self.clazz is None:
            typeSamples = self.getJsonTypeSamples()
        else:
            typeSamples = self.clazz.getSamples()
        if typeSamples is None:
            types = None
        else:
            types = Types(
                self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes
            )
            types.getTypes(self.listName, typeSamples, len(typeSamples))
        lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
        return lod

__init__(listName=None, clazz=None, tableName=None, initList=True, handleInvalidListTypes=False, filterInvalidListTypes=False)

Constructor

Parameters:

Name Type Description Default
listName(str)

the name of the list attribute to be used for storing the List

required
clazz(class)

a class to be used for Object relational mapping (if any)

required
tableName(str)

the name of the "table" to be used

required
initList(bool)

True if the list should be initialized

required
handleInvalidListTypes(bool)

True if invalidListTypes should be converted or filtered

required
filterInvalidListTypes(bool)

True if invalidListTypes should be deleted

required
Source code in lodstorage/jsonable.py
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
def __init__(
    self,
    listName: str = None,
    clazz=None,
    tableName: str = None,
    initList: bool = True,
    handleInvalidListTypes=False,
    filterInvalidListTypes=False,
):
    """
    Constructor

    Args:
        listName(str): the name of the list attribute to be used for storing the List
        clazz(class): a class to be used for Object relational mapping (if any)
        tableName(str): the name of the "table" to be used
        initList(bool): True if the list should be initialized
        handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
        filterInvalidListTypes(bool): True if invalidListTypes should be deleted
    """
    self.clazz = clazz
    self.handleInvalidListTypes = handleInvalidListTypes
    self.filterInvalidListTypes = filterInvalidListTypes
    if listName is None:
        if self.clazz is not None:
            listName = self.clazz.getPluralname()
        else:
            listName = self.__class__.name.lower()
    self.listName = listName
    if tableName is None:
        self.tableName = listName
    else:
        self.tableName = tableName
    if initList:
        self.__dict__[self.listName] = []

fromJson(jsonStr, types=None)

initialize me from the given JSON string

Parameters:

Name Type Description Default
jsonStr(str)

the JSON string

required
fixType(Types)

the types to be fixed

required
Source code in lodstorage/jsonable.py
504
505
506
507
508
509
510
511
512
513
def fromJson(self, jsonStr, types=None):
    """
    initialize me from the given JSON string

    Args:
        jsonStr(str): the JSON string
        fixType(Types): the types to be fixed
    """
    lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
    self.setListFromLoD(lod)

fromLoD(lod, append=True, debug=False)

load my entityList from the given list of dicts

Parameters:

Name Type Description Default
lod(list)

the list of dicts to load

required
append(bool)

if True append to my existing entries

required
Return

list: a list of errors (if any)

Source code in lodstorage/jsonable.py
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
def fromLoD(self, lod, append: bool = True, debug: bool = False):
    """
    load my entityList from the given list of dicts

    Args:
        lod(list): the list of dicts to load
        append(bool): if True append to my existing entries

    Return:
        list: a list of errors (if any)

    """
    errors = []
    entityList = self.getList()
    if not append:
        del entityList[:]
    if self.handleInvalidListTypes:
        LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)

    for record in lod:
        # call the constructor to get a new instance
        try:
            entity = self.clazz()
            entity.fromDict(record)
            entityList.append(entity)
        except Exception as ex:
            error = {self.listName: record, "error": ex}
            errors.append(error)
            if debug:
                print(error)
    return errors

getJsonData()

get my Jsondata

Source code in lodstorage/jsonable.py
487
488
489
490
491
492
def getJsonData(self):
    """
    get my Jsondata
    """
    jsonData = {self.listName: self.__dict__[self.listName]}
    return jsonData

getList()

get my list

Source code in lodstorage/jsonable.py
389
390
391
392
393
def getList(self):
    """
    get my list
    """
    return self.__dict__[self.listName]

getLoDfromJson(jsonStr, types=None, listName=None)

get a list of Dicts form the given JSON String

Parameters:

Name Type Description Default
jsonStr(str)

the JSON string

required
fixType(Types)

the types to be fixed

required

Returns: list: a list of dicts

Source code in lodstorage/jsonable.py
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None):
    """
    get a list of Dicts form the given JSON String

    Args:
        jsonStr(str): the JSON string
        fixType(Types): the types to be fixed
    Returns:
        list: a list of dicts
    """
    # read a data structe from the given JSON string
    lodOrDict = json.loads(jsonStr)
    # it should be a list only of dict with my list
    if not isinstance(lodOrDict, dict) and listName is not None:
        lod = lodOrDict
    else:
        if self.listName in lodOrDict:
            # get the relevant list of dicts
            lod = lodOrDict[self.listName]
        else:
            msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead"
            raise Exception(msg)
    if types is not None:
        types.fixTypes(lod, self.listName)
    return lod

getLookup(attrName, withDuplicates=False)

create a lookup dictionary by the given attribute name

Parameters:

Name Type Description Default
attrName(str)

the attribute to lookup

required
withDuplicates(bool)

whether to retain single values or lists

required
Return

a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates

Source code in lodstorage/jsonable.py
474
475
476
477
478
479
480
481
482
483
484
485
def getLookup(self, attrName: str, withDuplicates: bool = False):
    """
    create a lookup dictionary by the given attribute name

    Args:
        attrName(str): the attribute to lookup
        withDuplicates(bool): whether to retain single values or lists

    Return:
        a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
    """
    return LOD.getLookup(self.getList(), attrName, withDuplicates)

readLodFromJsonFile(jsonFile, extension='.json')

read the list of dicts from the given jsonFile

Parameters:

Name Type Description Default
jsonFile(string)

the jsonFile to read from

required

Returns:

Name Type Description
list

a list of dicts

Source code in lodstorage/jsonable.py
536
537
538
539
540
541
542
543
544
545
546
547
548
549
def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"):
    """
    read the list of dicts from the given jsonFile

    Args:
        jsonFile(string): the jsonFile to read from

    Returns:
        list: a list of dicts
    """
    jsonFile = self.checkExtension(jsonFile, extension)
    jsonStr = JSONAble.readJsonFromFile(jsonFile)
    lod = self.readLodFromJsonStr(jsonStr)
    return lod

readLodFromJsonStr(jsonStr)

restore me from the given jsonStr

Parameters:

Name Type Description Default
storeFilePrefix(string)

the prefix for the JSON file name

required
Source code in lodstorage/jsonable.py
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
def readLodFromJsonStr(self, jsonStr) -> list:
    """
    restore me from the given jsonStr

    Args:
        storeFilePrefix(string): the prefix for the JSON file name
    """
    if self.clazz is None:
        typeSamples = self.getJsonTypeSamples()
    else:
        typeSamples = self.clazz.getSamples()
    if typeSamples is None:
        types = None
    else:
        types = Types(
            self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes
        )
        types.getTypes(self.listName, typeSamples, len(typeSamples))
    lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
    return lod

restoreFromJsonFile(jsonFile)

read my list of dicts and restore it

Source code in lodstorage/jsonable.py
519
520
521
522
523
524
def restoreFromJsonFile(self, jsonFile: str) -> list:
    """
    read my list of dicts and restore it
    """
    lod = self.readLodFromJsonFile(jsonFile)
    return self.setListFromLoD(lod)

restoreFromJsonStr(jsonStr)

restore me from the given jsonStr

Parameters:

Name Type Description Default
jsonStr(str)

the json string to restore me from

required
Source code in lodstorage/jsonable.py
526
527
528
529
530
531
532
533
534
def restoreFromJsonStr(self, jsonStr: str) -> list:
    """
    restore me from the given jsonStr

    Args:
        jsonStr(str): the json string to restore me from
    """
    lod = self.readLodFromJsonStr(jsonStr)
    return self.setListFromLoD(lod)

setListFromLoD(lod)

set my list from the given list of dicts

Returns:

Name Type Description
list list

a list of dicts if no clazz is set otherwise a list of objects

Source code in lodstorage/jsonable.py
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def setListFromLoD(self, lod: list) -> list:
    """
    set my list from the given list of dicts

    Args:
        lod(list) a raw record list of dicts

    Returns:
        list: a list of dicts if no clazz is set
            otherwise a list of objects
    """
    # non OO mode
    if self.clazz is None:
        result = lod
        self.__dict__[self.listName] = result
    else:
        # ORM mode
        # TODO - handle errors
        self.fromLoD(lod, append=False)
    return self.getList()

toJsonAbleValue(v)

make sure we don't store our meta information clazz, tableName and listName but just the list we are holding

Source code in lodstorage/jsonable.py
494
495
496
497
498
499
500
501
502
def toJsonAbleValue(self, v):
    """
    make sure we don't store our meta information
    clazz, tableName and listName but just the list we are holding
    """
    if v == self:
        return self.getJsonData()
    else:
        return super().toJsonAbleValue(v)

JSONAbleSettings

settings for JSONAble - put in a separate class so they would not be serialized

Source code in lodstorage/jsonable.py
15
16
17
18
19
20
21
22
23
24
25
26
class JSONAbleSettings:
    """
    settings for JSONAble - put in a separate class so they would not be
    serialized
    """

    indent = 4
    """
    regular expression to be used for conversion from singleQuote to doubleQuote
    see https://stackoverflow.com/a/50257217/1497139
    """
    singleQuoteRegex = re.compile("(?<!\\\\)'")

indent = 4 class-attribute instance-attribute

regular expression to be used for conversion from singleQuote to doubleQuote see https://stackoverflow.com/a/50257217/1497139

Types

Bases: JSONAble

Types

holds entity meta Info

:ivar name(string): entity name = table name

Source code in lodstorage/jsonable.py
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
class Types(JSONAble):
    """
    Types

    holds entity meta Info

    :ivar name(string): entity name = table name
    """

    typeName2Type = {
        "bool": bool,
        "date": datetime.date,
        "datetime": datetime.datetime,
        "float": float,
        "int": int,
        "str": str,
    }

    def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False):
        """
        Constructor

        Args:
            name(str): the name of the type map
            warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
            debug(bool): if True - debugging information should be shown
        """
        self.name = name
        self.warnOnUnsupportedTypes = warnOnUnsupportedTypes
        self.debug = debug
        self.typeMap = {}

    @staticmethod
    def forTable(
        instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False
    ):
        """
        get the types for the list of Dicts (table) in the given instance with the given listName
        Args:
            instance(object): the instance to inspect
            listName(string): the list of dicts to inspect
            warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
            debug(bool): True if debuggin information should be shown

        Returns:
            Types: a types object
        """
        clazz = type(instance)
        types = Types(
            clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug
        )
        types.getTypes(listName, instance.__dict__[listName])
        return types

    def addType(self, listName, field, valueType):
        """
        add the python type for the given field to the typeMap

        Args:
           listName(string): the name of the list of the field
           field(string): the name of the field

           valueType(type): the python type of the field
        """
        if listName not in self.typeMap:
            self.typeMap[listName] = {}
        typeMap = self.typeMap[listName]
        if not field in typeMap:
            typeMap[field] = valueType

    def getTypes(self, listName: str, sampleRecords: list, limit: int = 10):
        """
        determine the types for the given sample records

        Args:
            listName(str): the name of the list
            sampleRecords(list): a list of items
            limit(int): the maximum number of items to check
        """
        for sampleRecord in sampleRecords[:limit]:
            items = sampleRecord.items()
            self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)

    def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False):
        """
        get the types for the given items
        side effect is setting my types

        Args:
            listName(str): the name of the list
            items(list): a list of items
            warnOnNone(bool): if TRUE warn if an item value is None

        """
        for key, value in items:
            valueType = None
            if value is None:
                if warnOnNone and self.debug:
                    print(
                        f"Warning sampleRecord field {key} is None - using string as type"
                    )
                    valueType = str
            else:
                valueType = type(value)
            if valueType == str:
                pass
            elif valueType == int:
                pass
            elif valueType == float:
                pass
            elif valueType == bool:
                pass
            elif valueType == datetime.date:
                pass
            elif valueType == datetime.datetime:
                pass
            else:
                if valueType is not None:
                    msg = f"warning: unsupported type {str(valueType)} for field {key}"
                    if self.debug and self.warnOnUnsupportedTypes:
                        print(msg)
            if valueType is not None:
                self.addType(listName, key, valueType.__name__)

    def fixTypes(self, lod: list, listName: str):
        """
        fix the types in the given data structure

        Args:
            lod(list): a list of dicts
            listName(str): the types to lookup by list name
        """
        for listName in self.typeMap:
            self.fixListOfDicts(self.typeMap[listName], lod)

    def getType(self, typeName):
        """
        get the type for the given type name
        """
        if typeName in Types.typeName2Type:
            return Types.typeName2Type[typeName]
        else:
            if self.debug:
                print("Warning unsupported type %s" % typeName)
            return None

    def fixListOfDicts(self, typeMap, listOfDicts):
        """
        fix the type in the given list of Dicts
        """
        for record in listOfDicts:
            for keyValue in record.items():
                key, value = keyValue
                if value is None:
                    record[key] = None
                elif key in typeMap:
                    valueType = self.getType(typeMap[key])
                    if valueType == bool:
                        if type(value) == str:
                            b = value in ["True", "TRUE", "true"]
                        else:
                            b = value
                        record[key] = b
                    elif valueType == datetime.date:
                        dt = datetime.datetime.strptime(value, "%Y-%m-%d")
                        record[key] = dt.date()
                    elif valueType == datetime.datetime:
                        # see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date
                        if isinstance(value, str):
                            if sys.version_info >= (3, 7):
                                dtime = datetime.datetime.fromisoformat(value)
                            else:
                                dtime = datetime.datetime.strptime(
                                    value, "%Y-%m-%dT%H:%M:%S.%f"
                                )
                        else:
                            # TODO: error handling
                            dtime = None
                        record[key] = dtime

__init__(name, warnOnUnsupportedTypes=True, debug=False)

Constructor

Parameters:

Name Type Description Default
name(str)

the name of the type map

required
warnOnUnsupportedTypes(bool)

if TRUE warn if an item value has an unsupported type

required
debug(bool)

if True - debugging information should be shown

required
Source code in lodstorage/jsonable.py
591
592
593
594
595
596
597
598
599
600
601
602
603
def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False):
    """
    Constructor

    Args:
        name(str): the name of the type map
        warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
        debug(bool): if True - debugging information should be shown
    """
    self.name = name
    self.warnOnUnsupportedTypes = warnOnUnsupportedTypes
    self.debug = debug
    self.typeMap = {}

addType(listName, field, valueType)

add the python type for the given field to the typeMap

Parameters:

Name Type Description Default
listName(string)

the name of the list of the field

required
field(string)

the name of the field

required
valueType(type)

the python type of the field

required
Source code in lodstorage/jsonable.py
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
def addType(self, listName, field, valueType):
    """
    add the python type for the given field to the typeMap

    Args:
       listName(string): the name of the list of the field
       field(string): the name of the field

       valueType(type): the python type of the field
    """
    if listName not in self.typeMap:
        self.typeMap[listName] = {}
    typeMap = self.typeMap[listName]
    if not field in typeMap:
        typeMap[field] = valueType

fixListOfDicts(typeMap, listOfDicts)

fix the type in the given list of Dicts

Source code in lodstorage/jsonable.py
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
def fixListOfDicts(self, typeMap, listOfDicts):
    """
    fix the type in the given list of Dicts
    """
    for record in listOfDicts:
        for keyValue in record.items():
            key, value = keyValue
            if value is None:
                record[key] = None
            elif key in typeMap:
                valueType = self.getType(typeMap[key])
                if valueType == bool:
                    if type(value) == str:
                        b = value in ["True", "TRUE", "true"]
                    else:
                        b = value
                    record[key] = b
                elif valueType == datetime.date:
                    dt = datetime.datetime.strptime(value, "%Y-%m-%d")
                    record[key] = dt.date()
                elif valueType == datetime.datetime:
                    # see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date
                    if isinstance(value, str):
                        if sys.version_info >= (3, 7):
                            dtime = datetime.datetime.fromisoformat(value)
                        else:
                            dtime = datetime.datetime.strptime(
                                value, "%Y-%m-%dT%H:%M:%S.%f"
                            )
                    else:
                        # TODO: error handling
                        dtime = None
                    record[key] = dtime

fixTypes(lod, listName)

fix the types in the given data structure

Parameters:

Name Type Description Default
lod(list)

a list of dicts

required
listName(str)

the types to lookup by list name

required
Source code in lodstorage/jsonable.py
697
698
699
700
701
702
703
704
705
706
def fixTypes(self, lod: list, listName: str):
    """
    fix the types in the given data structure

    Args:
        lod(list): a list of dicts
        listName(str): the types to lookup by list name
    """
    for listName in self.typeMap:
        self.fixListOfDicts(self.typeMap[listName], lod)

forTable(instance, listName, warnOnUnsupportedTypes=True, debug=False) staticmethod

get the types for the list of Dicts (table) in the given instance with the given listName Args: instance(object): the instance to inspect listName(string): the list of dicts to inspect warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type debug(bool): True if debuggin information should be shown

Returns:

Name Type Description
Types

a types object

Source code in lodstorage/jsonable.py
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
@staticmethod
def forTable(
    instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False
):
    """
    get the types for the list of Dicts (table) in the given instance with the given listName
    Args:
        instance(object): the instance to inspect
        listName(string): the list of dicts to inspect
        warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
        debug(bool): True if debuggin information should be shown

    Returns:
        Types: a types object
    """
    clazz = type(instance)
    types = Types(
        clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug
    )
    types.getTypes(listName, instance.__dict__[listName])
    return types

getType(typeName)

get the type for the given type name

Source code in lodstorage/jsonable.py
708
709
710
711
712
713
714
715
716
717
def getType(self, typeName):
    """
    get the type for the given type name
    """
    if typeName in Types.typeName2Type:
        return Types.typeName2Type[typeName]
    else:
        if self.debug:
            print("Warning unsupported type %s" % typeName)
        return None

getTypes(listName, sampleRecords, limit=10)

determine the types for the given sample records

Parameters:

Name Type Description Default
listName(str)

the name of the list

required
sampleRecords(list)

a list of items

required
limit(int)

the maximum number of items to check

required
Source code in lodstorage/jsonable.py
643
644
645
646
647
648
649
650
651
652
653
654
def getTypes(self, listName: str, sampleRecords: list, limit: int = 10):
    """
    determine the types for the given sample records

    Args:
        listName(str): the name of the list
        sampleRecords(list): a list of items
        limit(int): the maximum number of items to check
    """
    for sampleRecord in sampleRecords[:limit]:
        items = sampleRecord.items()
        self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)

getTypesForItems(listName, items, warnOnNone=False)

get the types for the given items side effect is setting my types

Parameters:

Name Type Description Default
listName(str)

the name of the list

required
items(list)

a list of items

required
warnOnNone(bool)

if TRUE warn if an item value is None

required
Source code in lodstorage/jsonable.py
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False):
    """
    get the types for the given items
    side effect is setting my types

    Args:
        listName(str): the name of the list
        items(list): a list of items
        warnOnNone(bool): if TRUE warn if an item value is None

    """
    for key, value in items:
        valueType = None
        if value is None:
            if warnOnNone and self.debug:
                print(
                    f"Warning sampleRecord field {key} is None - using string as type"
                )
                valueType = str
        else:
            valueType = type(value)
        if valueType == str:
            pass
        elif valueType == int:
            pass
        elif valueType == float:
            pass
        elif valueType == bool:
            pass
        elif valueType == datetime.date:
            pass
        elif valueType == datetime.datetime:
            pass
        else:
            if valueType is not None:
                msg = f"warning: unsupported type {str(valueType)} for field {key}"
                if self.debug and self.warnOnUnsupportedTypes:
                    print(msg)
        if valueType is not None:
            self.addType(listName, key, valueType.__name__)

jsonpicklemixin

JsonPickleMixin

Bases: object

allow reading and writing derived objects from a jsonpickle file

Source code in lodstorage/jsonpicklemixin.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
class JsonPickleMixin(object):
    """
    allow reading and writing derived objects from a jsonpickle file
    """

    debug = False

    @staticmethod
    def checkExtension(jsonFile: str, extension: str = ".json") -> str:
        """
        make sure the jsonFile has the given extension e.g. ".json"

        Args:
            jsonFile(str): the jsonFile name - potentially without ".json" suffix

        Returns:
            str: the jsonFile name with ".json" as an extension guaranteed
        """
        if not jsonFile.endswith(extension):
            jsonFile = f"{jsonFile}{extension}"
        return jsonFile

    # read me from a json pickle file
    @staticmethod
    def readJsonPickle(jsonFileName, extension=".jsonpickle"):
        """
        Args:
            jsonFileName(str): name of the file (optionally without ".json" postfix)
            extension(str): default file extension
        """
        jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
        # is there a jsonFile for the given name
        if os.path.isfile(jsonFileName):
            if JsonPickleMixin.debug:
                print("reading %s" % (jsonFileName))
            with open(jsonFileName) as jsonFile:
                json = jsonFile.read()
            result = jsonpickle.decode(json)
            if JsonPickleMixin.debug:
                print(json)
                print(result)
            return result
        else:
            return None

    def asJsonPickle(self) -> str:
        """
        convert me to JSON

        Returns:
            str: a JSON String with my JSON representation
        """
        json = jsonpickle.encode(self)
        return json

    def writeJsonPickle(self, jsonFileName: str, extension: str = ".jsonpickle"):
        """
        write me to the json file with the given name (optionally without postfix)

        Args:
            jsonFileName(str): name of the file (optionally without ".json" postfix)
            extension(str): default file extension
        """
        jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
        json = self.asJsonPickle()
        if JsonPickleMixin.debug:
            print("writing %s" % (jsonFileName))
            print(json)
            print(self)
        jsonFile = open(jsonFileName, "w")
        jsonFile.write(json)
        jsonFile.close()

asJsonPickle()

convert me to JSON

Returns:

Name Type Description
str str

a JSON String with my JSON representation

Source code in lodstorage/jsonpicklemixin.py
52
53
54
55
56
57
58
59
60
def asJsonPickle(self) -> str:
    """
    convert me to JSON

    Returns:
        str: a JSON String with my JSON representation
    """
    json = jsonpickle.encode(self)
    return json

checkExtension(jsonFile, extension='.json') staticmethod

make sure the jsonFile has the given extension e.g. ".json"

Parameters:

Name Type Description Default
jsonFile(str)

the jsonFile name - potentially without ".json" suffix

required

Returns:

Name Type Description
str str

the jsonFile name with ".json" as an extension guaranteed

Source code in lodstorage/jsonpicklemixin.py
14
15
16
17
18
19
20
21
22
23
24
25
26
27
@staticmethod
def checkExtension(jsonFile: str, extension: str = ".json") -> str:
    """
    make sure the jsonFile has the given extension e.g. ".json"

    Args:
        jsonFile(str): the jsonFile name - potentially without ".json" suffix

    Returns:
        str: the jsonFile name with ".json" as an extension guaranteed
    """
    if not jsonFile.endswith(extension):
        jsonFile = f"{jsonFile}{extension}"
    return jsonFile

readJsonPickle(jsonFileName, extension='.jsonpickle') staticmethod

Parameters:

Name Type Description Default
jsonFileName(str)

name of the file (optionally without ".json" postfix)

required
extension(str)

default file extension

required
Source code in lodstorage/jsonpicklemixin.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
@staticmethod
def readJsonPickle(jsonFileName, extension=".jsonpickle"):
    """
    Args:
        jsonFileName(str): name of the file (optionally without ".json" postfix)
        extension(str): default file extension
    """
    jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
    # is there a jsonFile for the given name
    if os.path.isfile(jsonFileName):
        if JsonPickleMixin.debug:
            print("reading %s" % (jsonFileName))
        with open(jsonFileName) as jsonFile:
            json = jsonFile.read()
        result = jsonpickle.decode(json)
        if JsonPickleMixin.debug:
            print(json)
            print(result)
        return result
    else:
        return None

writeJsonPickle(jsonFileName, extension='.jsonpickle')

write me to the json file with the given name (optionally without postfix)

Parameters:

Name Type Description Default
jsonFileName(str)

name of the file (optionally without ".json" postfix)

required
extension(str)

default file extension

required
Source code in lodstorage/jsonpicklemixin.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
def writeJsonPickle(self, jsonFileName: str, extension: str = ".jsonpickle"):
    """
    write me to the json file with the given name (optionally without postfix)

    Args:
        jsonFileName(str): name of the file (optionally without ".json" postfix)
        extension(str): default file extension
    """
    jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
    json = self.asJsonPickle()
    if JsonPickleMixin.debug:
        print("writing %s" % (jsonFileName))
        print(json)
        print(self)
    jsonFile = open(jsonFileName, "w")
    jsonFile.write(json)
    jsonFile.close()

linkml

Created on 2024-01-28

@author: wf

Class

Represents a class in the LinkML schema.

Source code in lodstorage/linkml.py
26
27
28
29
30
31
32
33
@lod_storable
class Class:
    """
    Represents a class in the LinkML schema.
    """

    description: str
    slots: List[Slot]

PythonTypes

python type handling

Source code in lodstorage/linkml.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
class PythonTypes:
    """
    python type handling
    """

    # Define a mapping from Python types to LinkML ranges
    to_linkml_ranges = {
        str: "string",
        int: "integer",
        float: "float",
        bool: "boolean",
        list: "list",
        dict: "dictionary",
    }
    # Mapping from Python types to RDF (XSD) datatypes
    to_rdf_datatypes = {
        str: XSD.string,
        int: XSD.integer,
        float: XSD.float,
        bool: XSD.boolean,
        # Add more mappings if needed
    }

    @classmethod
    def get_linkml_range(cls, ptype: Type) -> str:
        """
        Determines the LinkML range for a given Python type.

        Args:
            ptype (Type): The Python type for which the LinkML range is required.

        Returns:
            str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
        """
        return cls.to_linkml_ranges.get(ptype, "string")

    @classmethod
    def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
        """
        Determines the RDF (XSD) datatype for a given Python type.

        Args:
            ptype (Type): The Python type for which the RDF (XSD) datatype is required.

        Returns:
            XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
        """
        return cls.to_rdf_datatypes.get(ptype)

get_linkml_range(ptype) classmethod

Determines the LinkML range for a given Python type.

Parameters:

Name Type Description Default
ptype Type

The Python type for which the LinkML range is required.

required

Returns:

Name Type Description
str str

The corresponding LinkML range as a string. Defaults to "string" if the type is not found.

Source code in lodstorage/linkml.py
113
114
115
116
117
118
119
120
121
122
123
124
@classmethod
def get_linkml_range(cls, ptype: Type) -> str:
    """
    Determines the LinkML range for a given Python type.

    Args:
        ptype (Type): The Python type for which the LinkML range is required.

    Returns:
        str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
    """
    return cls.to_linkml_ranges.get(ptype, "string")

get_rdf_datatype(ptype) classmethod

Determines the RDF (XSD) datatype for a given Python type.

Parameters:

Name Type Description Default
ptype Type

The Python type for which the RDF (XSD) datatype is required.

required

Returns:

Name Type Description
XSD Optional[XSD]

The corresponding RDF (XSD) datatype. Returns None if the type is not found.

Source code in lodstorage/linkml.py
126
127
128
129
130
131
132
133
134
135
136
137
@classmethod
def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
    """
    Determines the RDF (XSD) datatype for a given Python type.

    Args:
        ptype (Type): The Python type for which the RDF (XSD) datatype is required.

    Returns:
        XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
    """
    return cls.to_rdf_datatypes.get(ptype)

Schema

Represents the entire LinkML schema.

Source code in lodstorage/linkml.py
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
@lod_storable
class Schema:
    """
    Represents the entire LinkML schema.
    """

    name: str
    id: str
    description: str
    title: Optional[str] = None
    version: Optional[str] = None
    license: Optional[str] = None

    default_prefix: Optional[str] = None

    prefixes: Dict[str, str] = field(default_factory=dict)
    imports: List[str] = field(default_factory=list)
    default_range: str = "string"
    classes: Dict[str, Class] = field(default_factory=dict)
    slots: Dict[str, Slot] = field(default_factory=dict)
    types: Dict[str, Type] = field(default_factory=dict)

    def __post_init__(self):
        if not self.title:
            self.title = self.name

Slot

Represents a slot in the LinkML schema, equivalent to a field or property.

Source code in lodstorage/linkml.py
14
15
16
17
18
19
20
21
22
23
@lod_storable
class Slot:
    """
    Represents a slot in the LinkML schema, equivalent to a field or property.
    """

    description: str
    range: str = "string"
    multivalued: bool = False
    identifier: bool = False

linkml_gen

Created on 2024-01-21

@author: wf

LinkMLGen

Class for generating LinkML YAML schema from Python data models using dataclasses.

Source code in lodstorage/linkml_gen.py
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
class LinkMLGen:
    """
    Class for generating LinkML YAML schema from Python data models using dataclasses.
    """

    def __init__(self, schema: Schema):
        """
        Initialize the LinkMLGen.

        Args:
            schema (Schema): The LinkML schema to be generated.
        """
        self.schema = schema

    def gen_schema(self, data_model_class) -> Schema:
        # Use DocstringParser to extract class description
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_class.__doc__)

        class_name = data_model_class.__name__
        new_class = Class(description=class_description, slots=[])

        # Iterate over the fields of the dataclass
        for field_info in fields(data_model_class):
            attr_name = field_info.name
            attr_type = field_info.type

            # Handle Optional and List types
            is_optional = False
            is_list = False
            content_type = None
            if hasattr(attr_type, "__origin__"):
                if attr_type.__origin__ is Union and type(None) in attr_type.__args__:
                    is_optional = True
                    attr_type = [t for t in attr_type.__args__ if t is not type(None)][
                        0
                    ]  # unwrap Optional type
                elif attr_type.__origin__ is list:
                    is_list = True
                    content_type = attr_type.__args__[0]  # unwrap List type
                elif attr_type.__origin__ is dict:
                    # Assuming dictionary values are of interest, keys are strings
                    content_type = attr_type.__args__[
                        1
                    ]  # unwrap Dict type, focusing on value type

            # Check and handle nested dataclasses for lists or dicts
            if is_dataclass(content_type):
                # Recursive call to handle nested dataclass
                self.gen_schema(content_type)
                # Set the range to the name of the dataclass
                linkml_range = (
                    content_type.__name__
                )  # Use the name of the dataclass as the range
            elif is_list:
                # If it's a list, get the LinkML range for the base type
                # Use self.get_linkml_range to ensure consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(content_type)
            else:
                # For non-list and non-dataclass types, use self.get_linkml_range for consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Extract description from doc_attributes
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )

            # Create a new slot for the field
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=is_list
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        self.schema.classes[class_name] = new_class
        return self.schema

    def gen_schema_from_instance(self, data_model_instance) -> Schema:
        """
        Generate a LinkML YAML schema from a Python data model using dataclasses.

        Args:
            data_model_instance: An instance of the Python data model.

        Returns:
            Schema: The LinkML schema generated from the data model.
        """
        # Use DocstringParser to extract class description and attributes
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

        class_name = data_model_instance.__class__.__name__
        new_class = Class(description=class_description, slots=[])

        for field_info in fields(data_model_instance):
            attr_name = field_info.name
            attr_type = field_info.type

            # Extract field type/range
            linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Check values for multivalued and type consistency
            attr_value = getattr(data_model_instance, attr_name)
            multivalued, actual_type = self.check_value(attr_value)

            # Ensure documentation, declaration, and value type are consistent
            self.ensure_consistency(
                attr_name, linkml_range, actual_type, doc_attributes
            )

            # Prepare slot
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )
            if attr_name not in self.schema.slots:
                new_slot = Slot(
                    description=description, range=linkml_range, multivalued=multivalued
                )
                self.schema.slots[attr_name] = new_slot
                new_class.slots.append(attr_name)

            if multivalued:
                # recursive call if type of list or dict is a dataclass
                if hasattr(attr_type, "__args__"):
                    content_type = attr_type.__args__[
                        0
                    ]  # Get the declared content type
                    if is_dataclass(content_type):
                        self.gen_schema(content_type)

        self.schema.classes[class_name] = new_class
        return self.schema

    def check_value(self, value):
        # Method to check if the value is multivalued and determine its type
        multivalued = isinstance(value, (Iterable, Mapping)) and not isinstance(
            value, (str, bytes)
        )
        value_type = type(value).__name__
        return multivalued, value_type

    def ensure_consistency(self, name, declared_type, actual_type, doc_attributes):
        # Adjust this method to handle complex types like list, dict, etc.

        # Check if the actual type is a list or dict, and if so, get the type of its elements
        if actual_type == "list" or actual_type == "dict":
            # You may need a more complex logic here to handle lists of custom dataclasses
            # For simplicity, let's assume it's a list of strings for now
            actual_type = "string"

        # Now compare the adjusted actual type with the declared type
        if declared_type != actual_type:
            raise ValueError(
                f"Type mismatch for '{name}': declared as '{declared_type}', actual type is '{actual_type}'"
            )

        # Check for documentation
        if name not in doc_attributes:
            raise ValueError(f"Missing documentation for field '{name}'")

__init__(schema)

Initialize the LinkMLGen.

Parameters:

Name Type Description Default
schema Schema

The LinkML schema to be generated.

required
Source code in lodstorage/linkml_gen.py
20
21
22
23
24
25
26
27
def __init__(self, schema: Schema):
    """
    Initialize the LinkMLGen.

    Args:
        schema (Schema): The LinkML schema to be generated.
    """
    self.schema = schema

gen_schema_from_instance(data_model_instance)

Generate a LinkML YAML schema from a Python data model using dataclasses.

Parameters:

Name Type Description Default
data_model_instance

An instance of the Python data model.

required

Returns:

Name Type Description
Schema Schema

The LinkML schema generated from the data model.

Source code in lodstorage/linkml_gen.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def gen_schema_from_instance(self, data_model_instance) -> Schema:
    """
    Generate a LinkML YAML schema from a Python data model using dataclasses.

    Args:
        data_model_instance: An instance of the Python data model.

    Returns:
        Schema: The LinkML schema generated from the data model.
    """
    # Use DocstringParser to extract class description and attributes
    parser = DocstringParser()
    class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

    class_name = data_model_instance.__class__.__name__
    new_class = Class(description=class_description, slots=[])

    for field_info in fields(data_model_instance):
        attr_name = field_info.name
        attr_type = field_info.type

        # Extract field type/range
        linkml_range = PythonTypes.get_linkml_range(attr_type)

        # Check values for multivalued and type consistency
        attr_value = getattr(data_model_instance, attr_name)
        multivalued, actual_type = self.check_value(attr_value)

        # Ensure documentation, declaration, and value type are consistent
        self.ensure_consistency(
            attr_name, linkml_range, actual_type, doc_attributes
        )

        # Prepare slot
        description = doc_attributes.get(attr_name, {}).get(
            "description", f"{attr_name} - missing description"
        )
        if attr_name not in self.schema.slots:
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=multivalued
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        if multivalued:
            # recursive call if type of list or dict is a dataclass
            if hasattr(attr_type, "__args__"):
                content_type = attr_type.__args__[
                    0
                ]  # Get the declared content type
                if is_dataclass(content_type):
                    self.gen_schema(content_type)

    self.schema.classes[class_name] = new_class
    return self.schema

lod

Created on 2021-01-31

@author: wf

LOD

Bases: object

list of Dict aka Table

Source code in lodstorage/lod.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
class LOD(object):
    """
    list of Dict aka Table
    """

    def __init__(self, name):
        """
        Constructor
        """
        self.name = name
        pass

    @staticmethod
    def getFields(listOfDicts, sampleCount: int = None):
        if sampleCount is None:
            if listOfDicts is None:
                return None
            sampleCount = len(listOfDicts)
        fields = []
        from lodstorage.jsonable import JSONAble

        for row in listOfDicts:
            if isinstance(row, JSONAble):
                row = vars(row)
            for key in row.keys():
                if not key in fields:
                    fields.append(key)
        return fields

    @staticmethod
    def setNone4List(listOfDicts, fields):
        """
        set the given fields to None for the records in the given listOfDicts
        if they are not set
        Args:
            listOfDicts(list): the list of records to work on
            fields(list): the list of fields to set to None
        """
        for record in listOfDicts:
            LOD.setNone(record, fields)

    @staticmethod
    def setNone(record, fields):
        """
        make sure the given fields in the given record are set to none
        Args:
            record(dict): the record to work on
            fields(list): the list of fields to set to None
        """
        for field in fields:
            if not field in record:
                record[field] = None

    """
    https://stackoverflow.com/questions/33542997/python-intersection-of-2-lists-of-dictionaries/33543164
    """

    @staticmethod
    def sortKey(d, key=None):
        """get the sort key for the given dict d with the given key"""
        if key is None:
            # https://stackoverflow.com/a/60765557/1497139
            return hash(tuple(d.items()))
        else:
            return d[key]

    @staticmethod
    def intersect(listOfDict1, listOfDict2, key=None):
        """
        get the  intersection of the two lists of Dicts by the given key
        """
        i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
        i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
        c1 = next(i1)
        c2 = next(i2)
        lr = []
        while True:
            try:
                val1 = LOD.sortKey(c1, key)
                val2 = LOD.sortKey(c2, key)
                if val1 < val2:
                    c1 = next(i1)
                elif val1 > val2:
                    c2 = next(i2)
                else:
                    lr.append(c1)
                    c1 = next(i1)
                    c2 = next(i2)
            except StopIteration:
                break
        return lr

    @staticmethod
    def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
        """
        add a single lookup result

        Args:
            lookup(dict): the lookup map
            duplicates(list): the list of duplicates
            record(dict): the current record
            value(object): the current value to lookup
            withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
            list is created
        """
        if value in lookup:
            if withDuplicates:
                lookupResult = lookup[value]
                lookupResult.append(record)
            else:
                duplicates.append(record)
                return
        else:
            if withDuplicates:
                lookupResult = [record]
            else:
                lookupResult = record
        lookup[value] = lookupResult

    @staticmethod
    def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
        """
        create a lookup dictionary by the given attribute name for the given list of dicts

        Args:
            lod(list): the list of dicts to get the lookup dictionary for
            attrName(str): the attribute to lookup
            withDuplicates(bool): whether to retain single values or lists

        Return:
            a dictionary for lookup
        """
        lookup = {}
        duplicates = []
        for record in lod:
            value = None
            if isinstance(record, dict):
                if attrName in record:
                    value = record[attrName]
            else:
                if hasattr(record, attrName):
                    value = getattr(record, attrName)
            if value is not None:
                if isinstance(value, list):
                    for listValue in value:
                        LOD.addLookup(
                            lookup, duplicates, record, listValue, withDuplicates
                        )
                else:
                    LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
        if withDuplicates:
            return lookup
        else:
            return lookup, duplicates

    @classmethod
    def handleListTypes(cls, lod, doFilter=False, separator=","):
        """
        handle list types in the given list of dicts

        Args:
            cls: this class
            lod(list): a list of dicts
            doFilter(bool): True if records containing lists value items should be filtered
            separator(str): the separator to use when converting lists
        """
        # see https://stackoverflow.com/a/1207485/1497139
        for i in range(len(lod) - 1, -1, -1):
            record = lod[i]
            if isinstance(record, dict):
                for key in record:
                    value = record[key]
                    if isinstance(value, list):
                        if doFilter:
                            del lod[i]
                            continue
                        else:
                            newValue = separator.join(filter(None, value))
                            record[key] = newValue

    @staticmethod
    def filterFields(lod: list, fields: list, reverse: bool = False):
        """
        filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
        fields contained in the list depending on the state of the reverse parameter

        Args:
            lod(list): list of dicts from which the fields should be excluded
            fields(list): list of fields that should be excluded from the lod
            reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

        Returns:
            LoD
        """
        res = []
        for record in lod:
            if reverse:
                recordReduced = {d: record[d] for d in record if d in fields}
            else:
                recordReduced = {d: record[d] for d in record if d not in fields}
            res.append(recordReduced)
        return res

__init__(name)

Constructor

Source code in lodstorage/lod.py
13
14
15
16
17
18
def __init__(self, name):
    """
    Constructor
    """
    self.name = name
    pass

addLookup(lookup, duplicates, record, value, withDuplicates) staticmethod

add a single lookup result

Parameters:

Name Type Description Default
lookup(dict)

the lookup map

required
duplicates(list)

the list of duplicates

required
record(dict)

the current record

required
value(object)

the current value to lookup

required
withDuplicates(bool)

if True duplicates should be allowed and lists returned if False a separate duplicates

required
Source code in lodstorage/lod.py
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
@staticmethod
def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
    """
    add a single lookup result

    Args:
        lookup(dict): the lookup map
        duplicates(list): the list of duplicates
        record(dict): the current record
        value(object): the current value to lookup
        withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
        list is created
    """
    if value in lookup:
        if withDuplicates:
            lookupResult = lookup[value]
            lookupResult.append(record)
        else:
            duplicates.append(record)
            return
    else:
        if withDuplicates:
            lookupResult = [record]
        else:
            lookupResult = record
    lookup[value] = lookupResult

filterFields(lod, fields, reverse=False) staticmethod

filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the fields contained in the list depending on the state of the reverse parameter

Parameters:

Name Type Description Default
lod(list)

list of dicts from which the fields should be excluded

required
fields(list)

list of fields that should be excluded from the lod

required
reverse(bool)

If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

required

Returns:

Type Description

LoD

Source code in lodstorage/lod.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
@staticmethod
def filterFields(lod: list, fields: list, reverse: bool = False):
    """
    filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
    fields contained in the list depending on the state of the reverse parameter

    Args:
        lod(list): list of dicts from which the fields should be excluded
        fields(list): list of fields that should be excluded from the lod
        reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

    Returns:
        LoD
    """
    res = []
    for record in lod:
        if reverse:
            recordReduced = {d: record[d] for d in record if d in fields}
        else:
            recordReduced = {d: record[d] for d in record if d not in fields}
        res.append(recordReduced)
    return res

getLookup(lod, attrName, withDuplicates=False) staticmethod

create a lookup dictionary by the given attribute name for the given list of dicts

Parameters:

Name Type Description Default
lod(list)

the list of dicts to get the lookup dictionary for

required
attrName(str)

the attribute to lookup

required
withDuplicates(bool)

whether to retain single values or lists

required
Return

a dictionary for lookup

Source code in lodstorage/lod.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
@staticmethod
def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
    """
    create a lookup dictionary by the given attribute name for the given list of dicts

    Args:
        lod(list): the list of dicts to get the lookup dictionary for
        attrName(str): the attribute to lookup
        withDuplicates(bool): whether to retain single values or lists

    Return:
        a dictionary for lookup
    """
    lookup = {}
    duplicates = []
    for record in lod:
        value = None
        if isinstance(record, dict):
            if attrName in record:
                value = record[attrName]
        else:
            if hasattr(record, attrName):
                value = getattr(record, attrName)
        if value is not None:
            if isinstance(value, list):
                for listValue in value:
                    LOD.addLookup(
                        lookup, duplicates, record, listValue, withDuplicates
                    )
            else:
                LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
    if withDuplicates:
        return lookup
    else:
        return lookup, duplicates

handleListTypes(lod, doFilter=False, separator=',') classmethod

handle list types in the given list of dicts

Parameters:

Name Type Description Default
cls

this class

required
lod(list)

a list of dicts

required
doFilter(bool)

True if records containing lists value items should be filtered

required
separator(str)

the separator to use when converting lists

required
Source code in lodstorage/lod.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
@classmethod
def handleListTypes(cls, lod, doFilter=False, separator=","):
    """
    handle list types in the given list of dicts

    Args:
        cls: this class
        lod(list): a list of dicts
        doFilter(bool): True if records containing lists value items should be filtered
        separator(str): the separator to use when converting lists
    """
    # see https://stackoverflow.com/a/1207485/1497139
    for i in range(len(lod) - 1, -1, -1):
        record = lod[i]
        if isinstance(record, dict):
            for key in record:
                value = record[key]
                if isinstance(value, list):
                    if doFilter:
                        del lod[i]
                        continue
                    else:
                        newValue = separator.join(filter(None, value))
                        record[key] = newValue

intersect(listOfDict1, listOfDict2, key=None) staticmethod

get the intersection of the two lists of Dicts by the given key

Source code in lodstorage/lod.py
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
@staticmethod
def intersect(listOfDict1, listOfDict2, key=None):
    """
    get the  intersection of the two lists of Dicts by the given key
    """
    i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
    i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
    c1 = next(i1)
    c2 = next(i2)
    lr = []
    while True:
        try:
            val1 = LOD.sortKey(c1, key)
            val2 = LOD.sortKey(c2, key)
            if val1 < val2:
                c1 = next(i1)
            elif val1 > val2:
                c2 = next(i2)
            else:
                lr.append(c1)
                c1 = next(i1)
                c2 = next(i2)
        except StopIteration:
            break
    return lr

setNone(record, fields) staticmethod

make sure the given fields in the given record are set to none Args: record(dict): the record to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py
49
50
51
52
53
54
55
56
57
58
59
@staticmethod
def setNone(record, fields):
    """
    make sure the given fields in the given record are set to none
    Args:
        record(dict): the record to work on
        fields(list): the list of fields to set to None
    """
    for field in fields:
        if not field in record:
            record[field] = None

setNone4List(listOfDicts, fields) staticmethod

set the given fields to None for the records in the given listOfDicts if they are not set Args: listOfDicts(list): the list of records to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py
37
38
39
40
41
42
43
44
45
46
47
@staticmethod
def setNone4List(listOfDicts, fields):
    """
    set the given fields to None for the records in the given listOfDicts
    if they are not set
    Args:
        listOfDicts(list): the list of records to work on
        fields(list): the list of fields to set to None
    """
    for record in listOfDicts:
        LOD.setNone(record, fields)

sortKey(d, key=None) staticmethod

get the sort key for the given dict d with the given key

Source code in lodstorage/lod.py
65
66
67
68
69
70
71
72
@staticmethod
def sortKey(d, key=None):
    """get the sort key for the given dict d with the given key"""
    if key is None:
        # https://stackoverflow.com/a/60765557/1497139
        return hash(tuple(d.items()))
    else:
        return d[key]

lod_csv

CSV

Bases: LOD

helper for converting data in csv format to list of dicts (LoD) and vice versa

Source code in lodstorage/lod_csv.py
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class CSV(LOD):
    """
    helper for converting data in csv format to list of dicts (LoD) and vice versa
    """

    @staticmethod
    def restoreFromCSVFile(
        filePath: str, headerNames: list = None, withPostfix: bool = False
    ):
        """
        restore LOD from given csv file

        Args:
            filePath(str): file name
            headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

        Returns:
            list of dicts (LoD) containing the content of the given csv file
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = CSV.readFile(filePath)
        lod = CSV.fromCSV(csvStr, headerNames)
        return lod

    @staticmethod
    def fromCSV(
        csvString: str,
        fields: list = None,
        delimiter=",",
        quoting=csv.QUOTE_NONNUMERIC,
        **kwargs
    ):
        """
        convert given csv string to list of dicts (LOD)

        Args:
            csvStr(str): csv string that should be converted to LOD
            headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.

        Returns:
            list of dicts (LoD) containing the content of the given csv string
        """
        csvStream = io.StringIO(csvString)
        reader = csv.DictReader(
            csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
        )
        lod = list(reader)
        CSV.fixTypes(lod)
        return lod

    @staticmethod
    def storeToCSVFile(lod: list, filePath: str, withPostfix: bool = False):
        """
        converts the given lod to CSV file.

        Args:
            lod(list): lod that should be converted to csv file
            filePath(str): file name the csv should be stored to
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
        Returns:
            csv string of the given lod
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = CSV.toCSV(lod)
        CSV.writeFile(csvStr, filePath)

    @staticmethod
    def toCSV(
        lod: list,
        includeFields: list = None,
        excludeFields: list = None,
        delimiter=",",
        quoting=csv.QUOTE_NONNUMERIC,
        **kwargs
    ):
        """
        converts the given lod to CSV string.
        For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

        Args:
            lod(list): lod that should be converted to csv string
            includeFields(list): list of fields that should be included in the csv (positive list)
            excludeFields(list): list of fields that should be excluded from the csv (negative list)
            kwargs: csv dialect parameters
        Returns:
            csv string of the given lod
        """
        if lod is None:
            return ""
        if isinstance(lod[0], JSONAble):
            lod = [vars(d) for d in lod]
        if excludeFields is not None:
            lod = LOD.filterFields(lod, excludeFields)
        if includeFields is None:
            fields = LOD.getFields(lod)
        else:
            fields = includeFields
            lod = LOD.filterFields(lod, includeFields, reverse=True)
        csvStream = io.StringIO()
        dict_writer = csv.DictWriter(
            csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
        )
        dict_writer.writeheader()
        dict_writer.writerows(lod)
        csvString = csvStream.getvalue()
        return csvString

    @staticmethod
    def readFile(filename: str) -> str:
        """
        Reads the given filename and returns it as string
        Args:
            filename: Name of the file that should be returned as string

        Returns:
            Content of the file as string
        """
        with open(filename, "r") as file:
            content = file.read()
        return content

    @staticmethod
    def writeFile(content: str, filename: str) -> str:
        """
        Write the given str to the given filename
        Args:
            content(str): string that should be written into the file
            filename: Name of the file the given str should be written to
        Returns:
            Nothing
        """
        with open(filename, "w") as file:
            file.write(content)

    @staticmethod
    def fixTypes(lod: list):
        """
        fixes the types of the given LoD.

        """
        for record in lod:
            for key, value in record.items():
                # fix empty csv value: "cell1,,cell3" converts the second value to empty string instead of None
                if value == "":
                    record[key] = None

fixTypes(lod) staticmethod

fixes the types of the given LoD.

Source code in lodstorage/lod_csv.py
145
146
147
148
149
150
151
152
153
154
155
@staticmethod
def fixTypes(lod: list):
    """
    fixes the types of the given LoD.

    """
    for record in lod:
        for key, value in record.items():
            # fix empty csv value: "cell1,,cell3" converts the second value to empty string instead of None
            if value == "":
                record[key] = None

fromCSV(csvString, fields=None, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, **kwargs) staticmethod

convert given csv string to list of dicts (LOD)

Parameters:

Name Type Description Default
csvStr(str)

csv string that should be converted to LOD

required
headerNames(list)

Names of the headers that should be used. If None it is assumed that the header is given.

required

Returns:

Type Description

list of dicts (LoD) containing the content of the given csv string

Source code in lodstorage/lod_csv.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@staticmethod
def fromCSV(
    csvString: str,
    fields: list = None,
    delimiter=",",
    quoting=csv.QUOTE_NONNUMERIC,
    **kwargs
):
    """
    convert given csv string to list of dicts (LOD)

    Args:
        csvStr(str): csv string that should be converted to LOD
        headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.

    Returns:
        list of dicts (LoD) containing the content of the given csv string
    """
    csvStream = io.StringIO(csvString)
    reader = csv.DictReader(
        csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
    )
    lod = list(reader)
    CSV.fixTypes(lod)
    return lod

readFile(filename) staticmethod

Reads the given filename and returns it as string Args: filename: Name of the file that should be returned as string

Returns:

Type Description
str

Content of the file as string

Source code in lodstorage/lod_csv.py
118
119
120
121
122
123
124
125
126
127
128
129
130
@staticmethod
def readFile(filename: str) -> str:
    """
    Reads the given filename and returns it as string
    Args:
        filename: Name of the file that should be returned as string

    Returns:
        Content of the file as string
    """
    with open(filename, "r") as file:
        content = file.read()
    return content

restoreFromCSVFile(filePath, headerNames=None, withPostfix=False) staticmethod

restore LOD from given csv file

Parameters:

Name Type Description Default
filePath(str)

file name

required
headerNames(list)

Names of the headers that should be used. If None it is assumed that the header is given.

required
withPostfix(bool)

If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

required

Returns:

Type Description

list of dicts (LoD) containing the content of the given csv file

Source code in lodstorage/lod_csv.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
@staticmethod
def restoreFromCSVFile(
    filePath: str, headerNames: list = None, withPostfix: bool = False
):
    """
    restore LOD from given csv file

    Args:
        filePath(str): file name
        headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

    Returns:
        list of dicts (LoD) containing the content of the given csv file
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = CSV.readFile(filePath)
    lod = CSV.fromCSV(csvStr, headerNames)
    return lod

storeToCSVFile(lod, filePath, withPostfix=False) staticmethod

converts the given lod to CSV file.

Parameters:

Name Type Description Default
lod(list)

lod that should be converted to csv file

required
filePath(str)

file name the csv should be stored to

required
withPostfix(bool)

If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

required

Returns: csv string of the given lod

Source code in lodstorage/lod_csv.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@staticmethod
def storeToCSVFile(lod: list, filePath: str, withPostfix: bool = False):
    """
    converts the given lod to CSV file.

    Args:
        lod(list): lod that should be converted to csv file
        filePath(str): file name the csv should be stored to
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
    Returns:
        csv string of the given lod
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = CSV.toCSV(lod)
    CSV.writeFile(csvStr, filePath)

toCSV(lod, includeFields=None, excludeFields=None, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, **kwargs) staticmethod

converts the given lod to CSV string. For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

Parameters:

Name Type Description Default
lod(list)

lod that should be converted to csv string

required
includeFields(list)

list of fields that should be included in the csv (positive list)

required
excludeFields(list)

list of fields that should be excluded from the csv (negative list)

required
kwargs

csv dialect parameters

{}

Returns: csv string of the given lod

Source code in lodstorage/lod_csv.py
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
@staticmethod
def toCSV(
    lod: list,
    includeFields: list = None,
    excludeFields: list = None,
    delimiter=",",
    quoting=csv.QUOTE_NONNUMERIC,
    **kwargs
):
    """
    converts the given lod to CSV string.
    For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

    Args:
        lod(list): lod that should be converted to csv string
        includeFields(list): list of fields that should be included in the csv (positive list)
        excludeFields(list): list of fields that should be excluded from the csv (negative list)
        kwargs: csv dialect parameters
    Returns:
        csv string of the given lod
    """
    if lod is None:
        return ""
    if isinstance(lod[0], JSONAble):
        lod = [vars(d) for d in lod]
    if excludeFields is not None:
        lod = LOD.filterFields(lod, excludeFields)
    if includeFields is None:
        fields = LOD.getFields(lod)
    else:
        fields = includeFields
        lod = LOD.filterFields(lod, includeFields, reverse=True)
    csvStream = io.StringIO()
    dict_writer = csv.DictWriter(
        csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
    )
    dict_writer.writeheader()
    dict_writer.writerows(lod)
    csvString = csvStream.getvalue()
    return csvString

writeFile(content, filename) staticmethod

Write the given str to the given filename Args: content(str): string that should be written into the file filename: Name of the file the given str should be written to Returns: Nothing

Source code in lodstorage/lod_csv.py
132
133
134
135
136
137
138
139
140
141
142
143
@staticmethod
def writeFile(content: str, filename: str) -> str:
    """
    Write the given str to the given filename
    Args:
        content(str): string that should be written into the file
        filename: Name of the file the given str should be written to
    Returns:
        Nothing
    """
    with open(filename, "w") as file:
        file.write(content)

mwTable

Created on 2020-08-21

@author: wf

MediaWikiTable

Bases: object

helper for https://www.mediawiki.org/wiki/Help:Tables

Source code in lodstorage/mwTable.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
class MediaWikiTable(object):
    """
    helper for https://www.mediawiki.org/wiki/Help:Tables
    """

    def __init__(
        self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
    ):
        """
        Constructor
        """
        self.colFormats = colFormats
        cssDelim = ""
        if wikiTable:
            cWikiTable = "wikitable"
            cssDelim = " "
        else:
            cWikiTable = ""
        if sortable:
            cSortable = "sortable"
        else:
            cSortable = ""

        self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
        self.header = None
        self.content = ""
        self.end = "\n|}\n"
        self.withNewLines = withNewLines
        pass

    def addHeader(self, record):
        """
        add the given record as a "sample" header
        """
        if self.withNewLines:
            headerStart = "|+"
            firstColDelim = "\n!"
            colDelim = firstColDelim
        else:
            headerStart = "|+\n"
            firstColDelim = "!"
            colDelim = "!!"
        self.header = headerStart
        first = True
        for key in record.keys():
            if first:
                delim = firstColDelim
                first = False
            else:
                delim = colDelim
            self.header += "%s%s" % (delim, key)

    def addRow4Dict(self, record):
        if self.header is None:
            self.addHeader(record)
        if self.withNewLines:
            rowStart = "\n|-"
            colDelim = "\n|"
        else:
            rowStart = "\n|-\n"
            colDelim = "||"
        self.content += rowStart
        for key in record.keys():
            value = record[key]
            if self.colFormats is not None and key in self.colFormats:
                colFormat = self.colFormats[key]
            else:
                colFormat = "%s"
            self.content += ("%s" + colFormat) % (colDelim, value)

    def fromListOfDicts(self, listOfDicts):
        for record in listOfDicts:
            self.addRow4Dict(record)
        pass

    def noneReplace(self, value):
        return "" if value is None else value

    def asWikiMarkup(self):
        """
        convert me to MediaWiki markup

        Returns:
            string: the MediWiki Markup for this table
        """
        markup = (
            self.noneReplace(self.start)
            + self.noneReplace(self.header)
            + self.noneReplace(self.content)
            + self.noneReplace(self.end)
        )
        return markup

__init__(wikiTable=True, colFormats=None, sortable=True, withNewLines=False)

Constructor

Source code in lodstorage/mwTable.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def __init__(
    self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
):
    """
    Constructor
    """
    self.colFormats = colFormats
    cssDelim = ""
    if wikiTable:
        cWikiTable = "wikitable"
        cssDelim = " "
    else:
        cWikiTable = ""
    if sortable:
        cSortable = "sortable"
    else:
        cSortable = ""

    self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
    self.header = None
    self.content = ""
    self.end = "\n|}\n"
    self.withNewLines = withNewLines
    pass

addHeader(record)

add the given record as a "sample" header

Source code in lodstorage/mwTable.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
def addHeader(self, record):
    """
    add the given record as a "sample" header
    """
    if self.withNewLines:
        headerStart = "|+"
        firstColDelim = "\n!"
        colDelim = firstColDelim
    else:
        headerStart = "|+\n"
        firstColDelim = "!"
        colDelim = "!!"
    self.header = headerStart
    first = True
    for key in record.keys():
        if first:
            delim = firstColDelim
            first = False
        else:
            delim = colDelim
        self.header += "%s%s" % (delim, key)

asWikiMarkup()

convert me to MediaWiki markup

Returns:

Name Type Description
string

the MediWiki Markup for this table

Source code in lodstorage/mwTable.py
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def asWikiMarkup(self):
    """
    convert me to MediaWiki markup

    Returns:
        string: the MediWiki Markup for this table
    """
    markup = (
        self.noneReplace(self.start)
        + self.noneReplace(self.header)
        + self.noneReplace(self.content)
        + self.noneReplace(self.end)
    )
    return markup

params

Created on 2024-05-06

@author: wf

Params

parameter handling

Source code in lodstorage/params.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class Params:
    """
    parameter handling
    """

    def __init__(self, query: str, illegal_chars: str = """"[;<>&|]"'"""):
        """
        constructor

        Args:
            query(str): the query to analyze for parameters
            illegal_chars: chars that may not be in the values
        """
        self.illegal_chars = illegal_chars
        self.query = query
        self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
        self.params = self.pattern.findall(query)
        self.params_dict = {param: "" for param in self.params}
        self.has_params = len(self.params) > 0

    def set(self, params_dict: Dict):
        """
        set my params
        """
        self.params_dict = params_dict

    def audit(self) -> None:
        """
        Audit the usage of parameters in the query.

        Raises:
            ValueError: If potentially malicious values are detected in the parameter dictionary.
        """
        for param, value in self.params_dict.items():
            for char in self.illegal_chars:
                if char in value:
                    raise ValueError(
                        f"Potentially malicious value detected for parameter '{param}'"
                    )

    def apply_parameters(self) -> str:
        """
        Replace Jinja templates in the query with corresponding parameter values.

        Returns:
            str: The query with Jinja templates replaced by parameter values.
        """
        self.audit()
        query = self.query
        for param, value in self.params_dict.items():
            pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
            query = re.sub(pattern, value, query)
        return query

__init__(query, illegal_chars='"[;<>&|]"\'')

constructor

Parameters:

Name Type Description Default
query(str)

the query to analyze for parameters

required
illegal_chars str

chars that may not be in the values

'"[;<>&|]"\''
Source code in lodstorage/params.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, query: str, illegal_chars: str = """"[;<>&|]"'"""):
    """
    constructor

    Args:
        query(str): the query to analyze for parameters
        illegal_chars: chars that may not be in the values
    """
    self.illegal_chars = illegal_chars
    self.query = query
    self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
    self.params = self.pattern.findall(query)
    self.params_dict = {param: "" for param in self.params}
    self.has_params = len(self.params) > 0

apply_parameters()

Replace Jinja templates in the query with corresponding parameter values.

Returns:

Name Type Description
str str

The query with Jinja templates replaced by parameter values.

Source code in lodstorage/params.py
51
52
53
54
55
56
57
58
59
60
61
62
63
def apply_parameters(self) -> str:
    """
    Replace Jinja templates in the query with corresponding parameter values.

    Returns:
        str: The query with Jinja templates replaced by parameter values.
    """
    self.audit()
    query = self.query
    for param, value in self.params_dict.items():
        pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
        query = re.sub(pattern, value, query)
    return query

audit()

Audit the usage of parameters in the query.

Raises:

Type Description
ValueError

If potentially malicious values are detected in the parameter dictionary.

Source code in lodstorage/params.py
37
38
39
40
41
42
43
44
45
46
47
48
49
def audit(self) -> None:
    """
    Audit the usage of parameters in the query.

    Raises:
        ValueError: If potentially malicious values are detected in the parameter dictionary.
    """
    for param, value in self.params_dict.items():
        for char in self.illegal_chars:
            if char in value:
                raise ValueError(
                    f"Potentially malicious value detected for parameter '{param}'"
                )

set(params_dict)

set my params

Source code in lodstorage/params.py
31
32
33
34
35
def set(self, params_dict: Dict):
    """
    set my params
    """
    self.params_dict = params_dict

StoreDictKeyPair

Bases: Action

Custom argparse action to store key-value pairs as a dictionary.

This class implements an argparse action to parse and store command-line arguments in the form of key-value pairs. The pairs should be separated by a comma and each key-value pair should be separated by an equals sign.

Example

--option key1=value1,key2=value2,key3=value3

Reference

https://stackoverflow.com/a/42355279/1497139

Source code in lodstorage/params.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
class StoreDictKeyPair(argparse.Action):
    """
    Custom argparse action to store key-value pairs as a dictionary.

    This class implements an argparse action to parse and store command-line
    arguments in the form of key-value pairs. The pairs should be separated by
    a comma and each key-value pair should be separated by an equals sign.

    Example:
        --option key1=value1,key2=value2,key3=value3

    Reference:
        https://stackoverflow.com/a/42355279/1497139
    """

    def __call__(
        self,
        _parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: str,
        _option_string: Optional[str] = None,
    ) -> None:
        """
        Parse key-value pairs and store them as a dictionary in the namespace.

        Args:
            parser (argparse.ArgumentParser): The argument parser object.
            namespace (argparse.Namespace): The namespace to store the parsed values.
            values (str): The string containing key-value pairs separated by commas.
            option_string (Optional[str]): The option string, if provided.
        """
        my_dict = {}
        for kv in values.split(","):
            k, v = kv.split("=")
            my_dict[k] = v
        setattr(namespace, self.dest, my_dict)

__call__(_parser, namespace, values, _option_string=None)

Parse key-value pairs and store them as a dictionary in the namespace.

Parameters:

Name Type Description Default
parser ArgumentParser

The argument parser object.

required
namespace Namespace

The namespace to store the parsed values.

required
values str

The string containing key-value pairs separated by commas.

required
option_string Optional[str]

The option string, if provided.

required
Source code in lodstorage/params.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def __call__(
    self,
    _parser: argparse.ArgumentParser,
    namespace: argparse.Namespace,
    values: str,
    _option_string: Optional[str] = None,
) -> None:
    """
    Parse key-value pairs and store them as a dictionary in the namespace.

    Args:
        parser (argparse.ArgumentParser): The argument parser object.
        namespace (argparse.Namespace): The namespace to store the parsed values.
        values (str): The string containing key-value pairs separated by commas.
        option_string (Optional[str]): The option string, if provided.
    """
    my_dict = {}
    for kv in values.split(","):
        k, v = kv.split("=")
        my_dict[k] = v
    setattr(namespace, self.dest, my_dict)

plot

Created on 2020-07-05

@author: wf

Plot

Bases: object

create Plot based on counters see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram

Source code in lodstorage/plot.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class Plot(object):
    """
    create Plot based on counters
    see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram
    """

    def __init__(
        self,
        valueList,
        title,
        xlabel=None,
        ylabel=None,
        gformat=".png",
        fontsize=12,
        plotdir=None,
        debug=False,
    ):
        """
        Constructor
        """
        self.counter = Counter(valueList)
        self.valueList = valueList
        self.title = title
        self.xlabel = xlabel
        self.ylabel = ylabel
        self.fontsize = fontsize
        self.gformat = gformat
        self.debug = debug
        path = os.path.dirname(__file__)
        if plotdir is not None:
            self.plotdir = plotdir
        else:
            self.plotdir = path + "/../plots/"
            os.makedirs(self.plotdir, exist_ok=True)

    def titleMe(self):
        """set my title and labels"""
        plt.title(self.title, fontsize=self.fontsize)
        if self.xlabel is not None:
            plt.xlabel(self.xlabel)
        if self.ylabel is not None:
            plt.ylabel(self.ylabel)

    def showMe(self, mode="show", close=True):
        """show me in the given mode"""
        if mode == "show":
            plt.show()
        else:
            plt.savefig(self.plotdir + self.title + self.gformat)
        if close:
            plt.close()

    def barchart(self, mode="show"):
        """barchart based histogram for the given counter"""
        labels, values = zip(*self.counter.items())
        indexes = np.arange(len(labels))
        width = 1
        self.titleMe()
        plt.bar(indexes, values, width)
        plt.xticks(indexes + width * 0.5, labels)
        plt.yticks(np.arange(1, max(values) + 1, step=1))
        self.showMe(mode)

    def showDebug(self):
        print("   value  list: ", self.valueList)
        print("counter  items: ", self.counter.items())
        print("counter values: ", self.counter.values())
        print("counter   keys: ", self.counter.keys())

    def hist(self, mode="show"):
        """create histogram for the given counter"""
        if self.debug:
            self.showDebug()
        self.titleMe()
        # see https://stackoverflow.com/a/2162045/1497139
        plt.hist(self.valueList, bins=len(self.counter.keys()))
        self.showMe(mode)
        pass

__init__(valueList, title, xlabel=None, ylabel=None, gformat='.png', fontsize=12, plotdir=None, debug=False)

Constructor

Source code in lodstorage/plot.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
def __init__(
    self,
    valueList,
    title,
    xlabel=None,
    ylabel=None,
    gformat=".png",
    fontsize=12,
    plotdir=None,
    debug=False,
):
    """
    Constructor
    """
    self.counter = Counter(valueList)
    self.valueList = valueList
    self.title = title
    self.xlabel = xlabel
    self.ylabel = ylabel
    self.fontsize = fontsize
    self.gformat = gformat
    self.debug = debug
    path = os.path.dirname(__file__)
    if plotdir is not None:
        self.plotdir = plotdir
    else:
        self.plotdir = path + "/../plots/"
        os.makedirs(self.plotdir, exist_ok=True)

barchart(mode='show')

barchart based histogram for the given counter

Source code in lodstorage/plot.py
65
66
67
68
69
70
71
72
73
74
def barchart(self, mode="show"):
    """barchart based histogram for the given counter"""
    labels, values = zip(*self.counter.items())
    indexes = np.arange(len(labels))
    width = 1
    self.titleMe()
    plt.bar(indexes, values, width)
    plt.xticks(indexes + width * 0.5, labels)
    plt.yticks(np.arange(1, max(values) + 1, step=1))
    self.showMe(mode)

hist(mode='show')

create histogram for the given counter

Source code in lodstorage/plot.py
82
83
84
85
86
87
88
89
90
def hist(self, mode="show"):
    """create histogram for the given counter"""
    if self.debug:
        self.showDebug()
    self.titleMe()
    # see https://stackoverflow.com/a/2162045/1497139
    plt.hist(self.valueList, bins=len(self.counter.keys()))
    self.showMe(mode)
    pass

showMe(mode='show', close=True)

show me in the given mode

Source code in lodstorage/plot.py
56
57
58
59
60
61
62
63
def showMe(self, mode="show", close=True):
    """show me in the given mode"""
    if mode == "show":
        plt.show()
    else:
        plt.savefig(self.plotdir + self.title + self.gformat)
    if close:
        plt.close()

titleMe()

set my title and labels

Source code in lodstorage/plot.py
48
49
50
51
52
53
54
def titleMe(self):
    """set my title and labels"""
    plt.title(self.title, fontsize=self.fontsize)
    if self.xlabel is not None:
        plt.xlabel(self.xlabel)
    if self.ylabel is not None:
        plt.ylabel(self.ylabel)

prefixes

Created on 2024-03-02

@author: wf

Prefixes

Handles the generation of standard SPARQL prefix declarations for queries. This utility class simplifies the inclusion of common prefixes used in SPARQL queries by providing a method to generate the necessary PREFIX lines based on a list of prefix keys.

The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy and improve clarity in SPARQL query construction by centralizing prefix management.

Methods:

Name Description
getPrefixes

Generates SPARQL PREFIX lines for a given list of prefix keys.

Source code in lodstorage/prefixes.py
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
class Prefixes:
    """
    Handles the generation of standard SPARQL prefix declarations for queries.
    This utility class simplifies the inclusion of common prefixes used in SPARQL
    queries by providing a method to generate the necessary PREFIX lines based on
    a list of prefix keys.

    The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL
    usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy
    and improve clarity in SPARQL query construction by centralizing prefix management.

    Attributes:
        None

    Methods:
        getPrefixes(prefixes): Generates SPARQL PREFIX lines for a given list of prefix keys.
    """

    @classmethod
    def getPrefixes(
        cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
    ) -> str:
        """Generates SPARQL PREFIX lines for a given list of prefix keys.

        This method looks up URIs for the specified prefixes from a predefined map and constructs
        PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
        and flexible specification of the prefixes needed for a particular query.

        Args:
            prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
                Defaults to a common set of prefixes used in Wikidata queries.

        Returns:
            str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
                with a newline character. If a prefix key is not recognized, it is ignored.

        Example:
            >>> Prefixes.getPrefixes(["wd", "wdt"])
            'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
        """
        prefixMap = {
            "bd": "<http://www.bigdata.com/rdf#>",
            "cc": "<http://creativecommons.org/ns#>",
            "dct": "<http://purl.org/dc/terms/>",
            "geo": "<http://www.opengis.net/ont/geosparql#>",
            "ontolex": "<http://www.w3.org/ns/lemon/ontolex#>",
            "owl": "<http://www.w3.org/2002/07/owl#>",
            "p": "<http://www.wikidata.org/prop/>",
            "pq": "<http://www.wikidata.org/prop/qualifier/>",
            "pqn": "<http://www.wikidata.org/prop/qualifier/value-normalized/>",
            "pqv": "<http://www.wikidata.org/prop/qualifier/value/>",
            "pr": "<http://www.wikidata.org/prop/reference/>",
            "prn": "<http://www.wikidata.org/prop/reference/value-normalized/>",
            "prov": "<http://www.w3.org/ns/prov#>",
            "prv": "<http://www.wikidata.org/prop/reference/value/>",
            "ps": "<http://www.wikidata.org/prop/statement/>",
            "psn": "<http://www.wikidata.org/prop/statement/value-normalized/>",
            "psv": "<http://www.wikidata.org/prop/statement/value/>",
            "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
            "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>",
            "schema": "<http://schema.org/>",
            "skos": "<http://www.w3.org/2004/02/skos/core#>",
            "wd": "<http://www.wikidata.org/entity/>",
            "wdata": "<http://www.wikidata.org/wiki/Special:EntityData/>",
            "wdno": "<http://www.wikidata.org/prop/novalue/>",
            "wdref": "<http://www.wikidata.org/reference/>",
            "wds": "<http://www.wikidata.org/entity/statement/>",
            "wdt": "<http://www.wikidata.org/prop/direct/>",
            "wdtn": "<http://www.wikidata.org/prop/direct-normalized/>",
            "wdv": "<http://www.wikidata.org/value/>",
            "wikibase": "<http://wikiba.se/ontology#>",
            "xsd": "<http://www.w3.org/2001/XMLSchema#>",
        }
        # see also https://www.wikidata.org/wiki/EntitySchema:E49
        sparql = ""
        for prefix in prefixes:
            if prefix in prefixMap:
                sparql += f"PREFIX {prefix}: {prefixMap[prefix]}\n"
        return sparql

getPrefixes(prefixes=['rdf', 'rdfs', 'schema', 'wd', 'wdt', 'wikibase', 'xsd']) classmethod

Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>

PREFIX wdt: http://www.wikidata.org/prop/direct/ '

Source code in lodstorage/prefixes.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
@classmethod
def getPrefixes(
    cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
) -> str:
    """Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
    """
    prefixMap = {
        "bd": "<http://www.bigdata.com/rdf#>",
        "cc": "<http://creativecommons.org/ns#>",
        "dct": "<http://purl.org/dc/terms/>",
        "geo": "<http://www.opengis.net/ont/geosparql#>",
        "ontolex": "<http://www.w3.org/ns/lemon/ontolex#>",
        "owl": "<http://www.w3.org/2002/07/owl#>",
        "p": "<http://www.wikidata.org/prop/>",
        "pq": "<http://www.wikidata.org/prop/qualifier/>",
        "pqn": "<http://www.wikidata.org/prop/qualifier/value-normalized/>",
        "pqv": "<http://www.wikidata.org/prop/qualifier/value/>",
        "pr": "<http://www.wikidata.org/prop/reference/>",
        "prn": "<http://www.wikidata.org/prop/reference/value-normalized/>",
        "prov": "<http://www.w3.org/ns/prov#>",
        "prv": "<http://www.wikidata.org/prop/reference/value/>",
        "ps": "<http://www.wikidata.org/prop/statement/>",
        "psn": "<http://www.wikidata.org/prop/statement/value-normalized/>",
        "psv": "<http://www.wikidata.org/prop/statement/value/>",
        "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
        "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>",
        "schema": "<http://schema.org/>",
        "skos": "<http://www.w3.org/2004/02/skos/core#>",
        "wd": "<http://www.wikidata.org/entity/>",
        "wdata": "<http://www.wikidata.org/wiki/Special:EntityData/>",
        "wdno": "<http://www.wikidata.org/prop/novalue/>",
        "wdref": "<http://www.wikidata.org/reference/>",
        "wds": "<http://www.wikidata.org/entity/statement/>",
        "wdt": "<http://www.wikidata.org/prop/direct/>",
        "wdtn": "<http://www.wikidata.org/prop/direct-normalized/>",
        "wdv": "<http://www.wikidata.org/value/>",
        "wikibase": "<http://wikiba.se/ontology#>",
        "xsd": "<http://www.w3.org/2001/XMLSchema#>",
    }
    # see also https://www.wikidata.org/wiki/EntitySchema:E49
    sparql = ""
    for prefix in prefixes:
        if prefix in prefixMap:
            sparql += f"PREFIX {prefix}: {prefixMap[prefix]}\n"
    return sparql

profiler

Created on 2022-11-18

@author: wf

Profiler

simple profiler

Source code in lodstorage/profiler.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True, with_start: bool = True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        if with_start:
            self.start()

    def start(self):
        """
        start profiling
        """
        self.starttime = time.time()
        if self.profile:
            print(f"Starting {self.msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

__init__(msg, profile=True, with_start=True)

construct me with the given msg and profile active flag

Parameters:

Name Type Description Default
msg(str)

the message to show if profiling is active

required
profile(bool)

True if messages should be shown

required
Source code in lodstorage/profiler.py
14
15
16
17
18
19
20
21
22
23
24
25
def __init__(self, msg, profile=True, with_start: bool = True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    if with_start:
        self.start()

start()

start profiling

Source code in lodstorage/profiler.py
27
28
29
30
31
32
33
def start(self):
    """
    start profiling
    """
    self.starttime = time.time()
    if self.profile:
        print(f"Starting {self.msg} ...")

time(extraMsg='')

time the action and print if profile is active

Source code in lodstorage/profiler.py
35
36
37
38
39
40
41
42
def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

query

Created on 2020-08-22

@author: wf

Endpoint

Bases: JSONAble

a query endpoint

Source code in lodstorage/query.py
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
class Endpoint(JSONAble):
    """
    a query endpoint
    """

    @staticmethod
    def getSamples():
        samples = [
            {
                "name": "wikidata",
                "lang": "sparql",
                "endpoint": "https://query.wikidata.org/sparql",
                "website": "https://query.wikidata.org/",
                "database": "blazegraph",
                "method": "POST",
                "prefixes": "PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
            },
            {
                "name": "dbis-jena",
                "lang": "sparql",
                "endpoint": "https://confident.dbis.rwth-aachen.de/jena/",
                "website": "https://confident.dbis.rwth-aachen.de",
                "auth": "BASIC",
                "user": "secret",
                "password": "#not public - example not usable for access#",
            },
        ]
        return samples

    @classmethod
    def getDefault(cls):
        endpointConf = Endpoint()
        endpointConf.fromDict(Endpoint.getSamples()[0])
        return endpointConf

    def __init__(self):
        """
        constructor for setting defaults
        """
        self.method = "POST"
        self.lang = "SPARQL"

    def __str__(self):
        """
        Returns:
            str: a string representation of this Endpoint
        """
        text = f"{self.name}:{self.website}:{self.endpoint}({self.method})"
        return text

__init__()

constructor for setting defaults

Source code in lodstorage/query.py
746
747
748
749
750
751
def __init__(self):
    """
    constructor for setting defaults
    """
    self.method = "POST"
    self.lang = "SPARQL"

__str__()

Returns:

Name Type Description
str

a string representation of this Endpoint

Source code in lodstorage/query.py
753
754
755
756
757
758
759
def __str__(self):
    """
    Returns:
        str: a string representation of this Endpoint
    """
    text = f"{self.name}:{self.website}:{self.endpoint}({self.method})"
    return text

EndpointManager

Bases: object

manages a set of SPARQL endpoints

Source code in lodstorage/query.py
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
class EndpointManager(object):
    """
    manages a set of SPARQL endpoints
    """

    @staticmethod
    def getEndpoints(
        endpointPath: str = None, lang: str = None, with_default: bool = True
    ):
        """
        get the endpoints for the given endpointPath

        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language
            with_default(bool): if True include the default endpoints
        """
        endpointPaths = YamlPath.getPaths(
            "endpoints.yaml", endpointPath, with_default=with_default
        )
        endpoints = {}
        for lEndpointPath in endpointPaths:
            with open(lEndpointPath, "r") as stream:
                endpointRecords = yaml.safe_load(stream)
                for name, record in endpointRecords.items():
                    select = True
                    if lang is not None:
                        select = record["lang"] == lang
                    if select:
                        endpoint = Endpoint()
                        endpoint.fromDict({"name": name, **record})
                        endpoints[name] = endpoint
        return endpoints

    @staticmethod
    def getEndpointNames(endpointPath=None, lang: str = None) -> list:
        """
        Returns a list of all available endpoint names
        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language

        """
        endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
        return list(endpoints.keys())

getEndpointNames(endpointPath=None, lang=None) staticmethod

Returns a list of all available endpoint names Args: endpointPath(str): the path to the yaml file with the endpoint configurations lang(str): if lang is given filter by the given language

Source code in lodstorage/query.py
698
699
700
701
702
703
704
705
706
707
708
@staticmethod
def getEndpointNames(endpointPath=None, lang: str = None) -> list:
    """
    Returns a list of all available endpoint names
    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language

    """
    endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
    return list(endpoints.keys())

getEndpoints(endpointPath=None, lang=None, with_default=True) staticmethod

get the endpoints for the given endpointPath

Parameters:

Name Type Description Default
endpointPath(str)

the path to the yaml file with the endpoint configurations

required
lang(str)

if lang is given filter by the given language

required
with_default(bool)

if True include the default endpoints

required
Source code in lodstorage/query.py
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
@staticmethod
def getEndpoints(
    endpointPath: str = None, lang: str = None, with_default: bool = True
):
    """
    get the endpoints for the given endpointPath

    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language
        with_default(bool): if True include the default endpoints
    """
    endpointPaths = YamlPath.getPaths(
        "endpoints.yaml", endpointPath, with_default=with_default
    )
    endpoints = {}
    for lEndpointPath in endpointPaths:
        with open(lEndpointPath, "r") as stream:
            endpointRecords = yaml.safe_load(stream)
            for name, record in endpointRecords.items():
                select = True
                if lang is not None:
                    select = record["lang"] == lang
                if select:
                    endpoint = Endpoint()
                    endpoint.fromDict({"name": name, **record})
                    endpoints[name] = endpoint
    return endpoints

Format

Bases: Enum

the supported formats for the results to be delivered

Source code in lodstorage/query.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class Format(Enum):
    """
    the supported formats for the results to be delivered
    """

    csv = "csv"
    json = "json"
    html = "html"
    xml = "xml"
    tsv = "tsv"
    latex = "latex"
    mediawiki = "mediawiki"
    raw = "raw"
    github = "github"

    def __str__(self):
        return self.value

Query

Bases: object

a Query e.g. for SPAQRL

Source code in lodstorage/query.py
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
class Query(object):
    """a Query e.g. for SPAQRL"""

    def __init__(
        self,
        name: str,
        query: str,
        lang="sparql",
        endpoint: str = None,
        database: str = "blazegraph",
        title: str = None,
        description: str = None,
        limit: int = None,
        prefixes=None,
        tryItUrl: str = None,
        formats: list = None,
        debug=False,
    ):
        """
        constructor
        Args:
            name(string): the name/label of the query
            query(string): the native Query text e.g. in SPARQL
            lang(string): the language of the query e.g. SPARQL
            endpoint(string): the endpoint url to use
            database(string): the type of database e.g. "blazegraph"
            title(string): the header/title of the query
            description(string): the description of the query
            limit(int): the limit of the query default: None
            prefixes(list): list of prefixes to be resolved
            tryItUrl(str): the url of a "tryit" webpage
            formats(list): key,value pairs of ValueFormatters to be applied
            debug(boolean): true if debug mode should be switched on
        """
        self.name = name
        self.query = query
        self.lang = lang
        self.endpoint = endpoint
        self.database = database
        self.tryItUrl = tryItUrl

        self.title = title = name if title is None else title
        self.description = "" if description is None else description
        self.limit = limit
        self.prefixes = prefixes
        self.debug = debug
        self.formats = formats
        self.formatCallBacks = []

    def __str__(self):
        queryStr = "\n".join(
            [
                f"{key}:{value}"
                for key, value in self.__dict__.items()
                if value is not None
            ]
        )
        return f"{queryStr}"

    def addFormatCallBack(self, callback):
        self.formatCallBacks.append(callback)

    def preFormatWithCallBacks(self, lod, tablefmt: str):
        """
        run the configured call backs to pre-format the given list of dicts for the given tableformat

        Args:
            lod(list): the list of dicts to handle
            tablefmt(str): the table format (according to tabulate) to apply

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if value is not None:
                    for formatCallBack in self.formatCallBacks:
                        formatCallBack(record, key, value, tablefmt)

    def formatWithValueFormatters(self, lod, tablefmt: str):
        """
        format the given list of Dicts with the ValueFormatters
        """
        # is there anything to do?
        if self.formats is None:
            # no
            return
        # get the value Formatters that might apply here
        valueFormatters = ValueFormatter.getFormats()
        formatsToApply = {}
        for valueFormatSpec in self.formats:
            parts = valueFormatSpec.split(":")
            # e.g. president:wikidata
            keytoformat = parts[0]
            formatName = parts[1]
            if formatName in valueFormatters:
                formatsToApply[keytoformat] = valueFormatters[formatName]
        for record in lod:
            for keytoformat in formatsToApply:
                valueFormatter = formatsToApply[keytoformat]
                # format all key values
                if keytoformat == "*":
                    for key in record:
                        valueFormatter.applyFormat(record, key, tablefmt)
                # or just a selected one
                elif keytoformat in record:
                    valueFormatter.applyFormat(record, keytoformat, tablefmt)
            pass

    def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
        """
        return the "try it!" url for the given baseurl

        Args:
            baseurl(str): the baseurl to used

        Returns:
            str: the "try it!" url for the given query
        """
        # https://stackoverflow.com/a/9345102/1497139
        quoted = urllib.parse.quote(str(self.query))
        if database == "blazegraph":
            delim = "/#"
        else:
            delim = "?query="
        url = f"{baseurl}{delim}{quoted}"
        return url

    def getLink(self, url, title, tablefmt):
        """
        convert the given url and title to a link for the given tablefmt

        Args:
            url(str): the url to convert
            title(str): the title to show
            tablefmt(str): the table format to use
        """
        # create a safe url
        if url is None:
            return ""
        markup = f"{title}:{url}"
        if tablefmt == "mediawiki":
            markup = f"[{url} {title}]"
        elif tablefmt == "github":
            markup = f"[{title}]({url})"
        elif tablefmt == "latex":
            markup = r"\href{%s}{%s}" % (url, title)
        return markup

    def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
        """
        convert url prefixes to link according to the given table format
        TODO - refactor as preFormat callback

        Args:
            lod(list): the list of dicts to convert
            prefix(str): the prefix to strip
            tablefmt(str): the tabulate tableformat to use

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if (
                    value is not None
                    and isinstance(value, str)
                    and value.startswith(prefix)
                ):
                    item = value.replace(prefix, "")
                    uqitem = urllib.parse.unquote(item)
                    if tablefmt == "latex":
                        link = uqitem
                    else:
                        link = self.getLink(value, uqitem, tablefmt)
                    record[key] = link

    def asYaml(self):
        yamlMarkup = yaml.dump(self)
        return yamlMarkup

    def asWikiSourceMarkup(self):
        """
        convert me to Mediawiki markup for syntax highlighting using the "source" tag


        Returns:
            string: the Markup
        """
        markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
        return markup

    def asWikiMarkup(self, listOfDicts):
        """
        convert the given listOfDicts result to MediaWiki markup

        Args:
            listOfDicts(list): the list of Dicts to convert to MediaWiki markup

        Returns:
            string: the markup
        """
        if self.debug:
            print(listOfDicts)
        mwTable = MediaWikiTable()
        mwTable.fromListOfDicts(listOfDicts)
        markup = mwTable.asWikiMarkup()
        return markup

    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

__init__(name, query, lang='sparql', endpoint=None, database='blazegraph', title=None, description=None, limit=None, prefixes=None, tryItUrl=None, formats=None, debug=False)

constructor Args: name(string): the name/label of the query query(string): the native Query text e.g. in SPARQL lang(string): the language of the query e.g. SPARQL endpoint(string): the endpoint url to use database(string): the type of database e.g. "blazegraph" title(string): the header/title of the query description(string): the description of the query limit(int): the limit of the query default: None prefixes(list): list of prefixes to be resolved tryItUrl(str): the url of a "tryit" webpage formats(list): key,value pairs of ValueFormatters to be applied debug(boolean): true if debug mode should be switched on

Source code in lodstorage/query.py
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
def __init__(
    self,
    name: str,
    query: str,
    lang="sparql",
    endpoint: str = None,
    database: str = "blazegraph",
    title: str = None,
    description: str = None,
    limit: int = None,
    prefixes=None,
    tryItUrl: str = None,
    formats: list = None,
    debug=False,
):
    """
    constructor
    Args:
        name(string): the name/label of the query
        query(string): the native Query text e.g. in SPARQL
        lang(string): the language of the query e.g. SPARQL
        endpoint(string): the endpoint url to use
        database(string): the type of database e.g. "blazegraph"
        title(string): the header/title of the query
        description(string): the description of the query
        limit(int): the limit of the query default: None
        prefixes(list): list of prefixes to be resolved
        tryItUrl(str): the url of a "tryit" webpage
        formats(list): key,value pairs of ValueFormatters to be applied
        debug(boolean): true if debug mode should be switched on
    """
    self.name = name
    self.query = query
    self.lang = lang
    self.endpoint = endpoint
    self.database = database
    self.tryItUrl = tryItUrl

    self.title = title = name if title is None else title
    self.description = "" if description is None else description
    self.limit = limit
    self.prefixes = prefixes
    self.debug = debug
    self.formats = formats
    self.formatCallBacks = []

asWikiMarkup(listOfDicts)

convert the given listOfDicts result to MediaWiki markup

Parameters:

Name Type Description Default
listOfDicts(list)

the list of Dicts to convert to MediaWiki markup

required

Returns:

Name Type Description
string

the markup

Source code in lodstorage/query.py
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
def asWikiMarkup(self, listOfDicts):
    """
    convert the given listOfDicts result to MediaWiki markup

    Args:
        listOfDicts(list): the list of Dicts to convert to MediaWiki markup

    Returns:
        string: the markup
    """
    if self.debug:
        print(listOfDicts)
    mwTable = MediaWikiTable()
    mwTable.fromListOfDicts(listOfDicts)
    markup = mwTable.asWikiMarkup()
    return markup

asWikiSourceMarkup()

convert me to Mediawiki markup for syntax highlighting using the "source" tag

Returns:

Name Type Description
string

the Markup

Source code in lodstorage/query.py
465
466
467
468
469
470
471
472
473
474
def asWikiSourceMarkup(self):
    """
    convert me to Mediawiki markup for syntax highlighting using the "source" tag


    Returns:
        string: the Markup
    """
    markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
    return markup

documentQueryResult(qlod, limit=None, tablefmt='mediawiki', tryItUrl=None, withSourceCode=True, **kwArgs)

document the given query results - note that a copy of the whole list is going to be created for being able to format

Parameters:

Name Type Description Default
qlod list

the list of dicts result

required
limit(int)

the maximum number of records to display in result tabulate

required
tablefmt(str)

the table format to use

required
tryItUrl str

the "try it!" url to show

None
withSourceCode(bool)

if True document the source code

required
Return

str: the documentation tabular text for the given parameters

Source code in lodstorage/query.py
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

formatWithValueFormatters(lod, tablefmt)

format the given list of Dicts with the ValueFormatters

Source code in lodstorage/query.py
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
def formatWithValueFormatters(self, lod, tablefmt: str):
    """
    format the given list of Dicts with the ValueFormatters
    """
    # is there anything to do?
    if self.formats is None:
        # no
        return
    # get the value Formatters that might apply here
    valueFormatters = ValueFormatter.getFormats()
    formatsToApply = {}
    for valueFormatSpec in self.formats:
        parts = valueFormatSpec.split(":")
        # e.g. president:wikidata
        keytoformat = parts[0]
        formatName = parts[1]
        if formatName in valueFormatters:
            formatsToApply[keytoformat] = valueFormatters[formatName]
    for record in lod:
        for keytoformat in formatsToApply:
            valueFormatter = formatsToApply[keytoformat]
            # format all key values
            if keytoformat == "*":
                for key in record:
                    valueFormatter.applyFormat(record, key, tablefmt)
            # or just a selected one
            elif keytoformat in record:
                valueFormatter.applyFormat(record, keytoformat, tablefmt)
        pass

convert the given url and title to a link for the given tablefmt

Parameters:

Name Type Description Default
url(str)

the url to convert

required
title(str)

the title to show

required
tablefmt(str)

the table format to use

required
Source code in lodstorage/query.py
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
def getLink(self, url, title, tablefmt):
    """
    convert the given url and title to a link for the given tablefmt

    Args:
        url(str): the url to convert
        title(str): the title to show
        tablefmt(str): the table format to use
    """
    # create a safe url
    if url is None:
        return ""
    markup = f"{title}:{url}"
    if tablefmt == "mediawiki":
        markup = f"[{url} {title}]"
    elif tablefmt == "github":
        markup = f"[{title}]({url})"
    elif tablefmt == "latex":
        markup = r"\href{%s}{%s}" % (url, title)
    return markup

getTryItUrl(baseurl, database='blazegraph')

return the "try it!" url for the given baseurl

Parameters:

Name Type Description Default
baseurl(str)

the baseurl to used

required

Returns:

Name Type Description
str

the "try it!" url for the given query

Source code in lodstorage/query.py
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
    """
    return the "try it!" url for the given baseurl

    Args:
        baseurl(str): the baseurl to used

    Returns:
        str: the "try it!" url for the given query
    """
    # https://stackoverflow.com/a/9345102/1497139
    quoted = urllib.parse.quote(str(self.query))
    if database == "blazegraph":
        delim = "/#"
    else:
        delim = "?query="
    url = f"{baseurl}{delim}{quoted}"
    return url

preFormatWithCallBacks(lod, tablefmt)

run the configured call backs to pre-format the given list of dicts for the given tableformat

Parameters:

Name Type Description Default
lod(list)

the list of dicts to handle

required
tablefmt(str)

the table format (according to tabulate) to apply

required
Source code in lodstorage/query.py
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def preFormatWithCallBacks(self, lod, tablefmt: str):
    """
    run the configured call backs to pre-format the given list of dicts for the given tableformat

    Args:
        lod(list): the list of dicts to handle
        tablefmt(str): the table format (according to tabulate) to apply

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if value is not None:
                for formatCallBack in self.formatCallBacks:
                    formatCallBack(record, key, value, tablefmt)

convert url prefixes to link according to the given table format TODO - refactor as preFormat callback

Parameters:

Name Type Description Default
lod(list)

the list of dicts to convert

required
prefix(str)

the prefix to strip

required
tablefmt(str)

the tabulate tableformat to use

required
Source code in lodstorage/query.py
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
    """
    convert url prefixes to link according to the given table format
    TODO - refactor as preFormat callback

    Args:
        lod(list): the list of dicts to convert
        prefix(str): the prefix to strip
        tablefmt(str): the tabulate tableformat to use

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if (
                value is not None
                and isinstance(value, str)
                and value.startswith(prefix)
            ):
                item = value.replace(prefix, "")
                uqitem = urllib.parse.unquote(item)
                if tablefmt == "latex":
                    link = uqitem
                else:
                    link = self.getLink(value, uqitem, tablefmt)
                record[key] = link

QueryManager

Bases: object

manages pre packaged Queries

Source code in lodstorage/query.py
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
class QueryManager(object):
    """
    manages pre packaged Queries
    """

    def __init__(
        self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
    ):
        """
        Constructor
        Args:
            lang(str): the language to use for the queries sql or sparql
            queriesPath(str): the path of the yaml file to load queries from
            debug(bool): True if debug information should be shown
            with_default(bool): if True also load the default yaml file
        """
        if lang is None:
            lang = "sql"
        self.queriesByName = {}
        self.lang = lang
        self.debug = debug
        queries = QueryManager.getQueries(
            queriesPath=queriesPath, with_default=with_default
        )
        for name, queryDict in queries.items():
            if self.lang in queryDict:
                queryText = queryDict.pop(self.lang)
                for qformat in ["sparql", "sql"]:  # drop not needed query variants
                    if qformat in queryDict:
                        queryDict.pop(qformat)
                query = Query(
                    name=name,
                    query=queryText,
                    lang=self.lang,
                    **queryDict,
                    debug=self.debug,
                )
                self.queriesByName[name] = query

    @staticmethod
    def getQueries(queriesPath=None, with_default: bool = True):
        """
        get the queries for the given queries Path

        Args:
            queriesPath(str): the path of the yaml file to load queries from
            with_default(bool): if True also load the default yaml file

        """
        queriesPaths = YamlPath.getPaths(
            "queries.yaml", queriesPath, with_default=with_default
        )
        queries = {}
        for queriesPath in queriesPaths:
            if os.path.isfile(queriesPath):
                with open(queriesPath, "r") as stream:
                    lqueries = yaml.safe_load(stream)
                    for key in lqueries:
                        queries[key] = lqueries[key]
        return queries

__init__(lang=None, debug=False, queriesPath=None, with_default=True)

Constructor Args: lang(str): the language to use for the queries sql or sparql queriesPath(str): the path of the yaml file to load queries from debug(bool): True if debug information should be shown with_default(bool): if True also load the default yaml file

Source code in lodstorage/query.py
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
def __init__(
    self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
):
    """
    Constructor
    Args:
        lang(str): the language to use for the queries sql or sparql
        queriesPath(str): the path of the yaml file to load queries from
        debug(bool): True if debug information should be shown
        with_default(bool): if True also load the default yaml file
    """
    if lang is None:
        lang = "sql"
    self.queriesByName = {}
    self.lang = lang
    self.debug = debug
    queries = QueryManager.getQueries(
        queriesPath=queriesPath, with_default=with_default
    )
    for name, queryDict in queries.items():
        if self.lang in queryDict:
            queryText = queryDict.pop(self.lang)
            for qformat in ["sparql", "sql"]:  # drop not needed query variants
                if qformat in queryDict:
                    queryDict.pop(qformat)
            query = Query(
                name=name,
                query=queryText,
                lang=self.lang,
                **queryDict,
                debug=self.debug,
            )
            self.queriesByName[name] = query

getQueries(queriesPath=None, with_default=True) staticmethod

get the queries for the given queries Path

Parameters:

Name Type Description Default
queriesPath(str)

the path of the yaml file to load queries from

required
with_default(bool)

if True also load the default yaml file

required
Source code in lodstorage/query.py
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
@staticmethod
def getQueries(queriesPath=None, with_default: bool = True):
    """
    get the queries for the given queries Path

    Args:
        queriesPath(str): the path of the yaml file to load queries from
        with_default(bool): if True also load the default yaml file

    """
    queriesPaths = YamlPath.getPaths(
        "queries.yaml", queriesPath, with_default=with_default
    )
    queries = {}
    for queriesPath in queriesPaths:
        if os.path.isfile(queriesPath):
            with open(queriesPath, "r") as stream:
                lqueries = yaml.safe_load(stream)
                for key in lqueries:
                    queries[key] = lqueries[key]
    return queries

QueryResultDocumentation

documentation of a query result

Source code in lodstorage/query.py
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
class QueryResultDocumentation:
    """
    documentation of a query result
    """

    def __init__(
        self,
        query,
        title: str,
        tablefmt: str,
        tryItMarkup: str,
        sourceCodeHeader: str,
        sourceCode: str,
        resultHeader: str,
        result: str,
    ):
        """
        constructor

        Args:
            query(Query): the query to be documented
            title(str): the title markup
            tablefmt(str): the tableformat that has been used
            tryItMarkup: the "try it!" markup to show
            sourceCodeHeader(str): the header title to use for the sourceCode
            sourceCode(str): the sourceCode
            resultCodeHeader(str): the header title to use for the result
            result(str): the result header

        """
        self.query = query
        self.title = title
        self.tablefmt = tablefmt
        self.tryItMarkup = f"\n{tryItMarkup}"
        self.sourceCodeHeader = sourceCodeHeader
        self.sourceCode = sourceCode
        self.resultHeader = resultHeader
        self.result = result

    @staticmethod
    def uniCode2Latex(text: str, withConvert: bool = False) -> str:
        """
        converts unicode text to latex and
        fixes UTF-8 chars for latex in a certain range:
            ₀:$_0$ ... ₉:$_9$

        see https://github.com/phfaist/pylatexenc/issues/72

        Args:
            text(str): the string to fix
            withConvert(bool): if unicode to latex libary conversion should be used

        Return:
            str: latex presentation of UTF-8 char
        """
        for code in range(8320, 8330):
            text = text.replace(chr(code), f"$_{code-8320}$")
        if withConvert:
            latex = unicode_to_latex(text)
            # workaround {\textbackslash} being returned
            # latex=latex.replace("{\\textbackslash}",'\\')
            text = latex
        return text

    def __str__(self):
        """
        simple string representation
        """
        return self.asText()

    def asText(self):
        """
        return my text representation

        Returns:
            str: description, sourceCodeHeader, sourceCode, tryIt link and result table
        """
        text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
        fixedStr = (
            self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
        )
        return fixedStr

__init__(query, title, tablefmt, tryItMarkup, sourceCodeHeader, sourceCode, resultHeader, result)

constructor

Parameters:

Name Type Description Default
query(Query)

the query to be documented

required
title(str)

the title markup

required
tablefmt(str)

the tableformat that has been used

required
tryItMarkup str

the "try it!" markup to show

required
sourceCodeHeader(str)

the header title to use for the sourceCode

required
sourceCode(str)

the sourceCode

required
resultCodeHeader(str)

the header title to use for the result

required
result(str)

the result header

required
Source code in lodstorage/query.py
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
def __init__(
    self,
    query,
    title: str,
    tablefmt: str,
    tryItMarkup: str,
    sourceCodeHeader: str,
    sourceCode: str,
    resultHeader: str,
    result: str,
):
    """
    constructor

    Args:
        query(Query): the query to be documented
        title(str): the title markup
        tablefmt(str): the tableformat that has been used
        tryItMarkup: the "try it!" markup to show
        sourceCodeHeader(str): the header title to use for the sourceCode
        sourceCode(str): the sourceCode
        resultCodeHeader(str): the header title to use for the result
        result(str): the result header

    """
    self.query = query
    self.title = title
    self.tablefmt = tablefmt
    self.tryItMarkup = f"\n{tryItMarkup}"
    self.sourceCodeHeader = sourceCodeHeader
    self.sourceCode = sourceCode
    self.resultHeader = resultHeader
    self.result = result

__str__()

simple string representation

Source code in lodstorage/query.py
266
267
268
269
270
def __str__(self):
    """
    simple string representation
    """
    return self.asText()

asText()

return my text representation

Returns:

Name Type Description
str

description, sourceCodeHeader, sourceCode, tryIt link and result table

Source code in lodstorage/query.py
272
273
274
275
276
277
278
279
280
281
282
283
def asText(self):
    """
    return my text representation

    Returns:
        str: description, sourceCodeHeader, sourceCode, tryIt link and result table
    """
    text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
    fixedStr = (
        self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
    )
    return fixedStr

uniCode2Latex(text, withConvert=False) staticmethod

converts unicode text to latex and fixes UTF-8 chars for latex in a certain range: ₀:$_0$ ... ₉:$_9$

see https://github.com/phfaist/pylatexenc/issues/72

Parameters:

Name Type Description Default
text(str)

the string to fix

required
withConvert(bool)

if unicode to latex libary conversion should be used

required
Return

str: latex presentation of UTF-8 char

Source code in lodstorage/query.py
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
@staticmethod
def uniCode2Latex(text: str, withConvert: bool = False) -> str:
    """
    converts unicode text to latex and
    fixes UTF-8 chars for latex in a certain range:
        ₀:$_0$ ... ₉:$_9$

    see https://github.com/phfaist/pylatexenc/issues/72

    Args:
        text(str): the string to fix
        withConvert(bool): if unicode to latex libary conversion should be used

    Return:
        str: latex presentation of UTF-8 char
    """
    for code in range(8320, 8330):
        text = text.replace(chr(code), f"$_{code-8320}$")
    if withConvert:
        latex = unicode_to_latex(text)
        # workaround {\textbackslash} being returned
        # latex=latex.replace("{\\textbackslash}",'\\')
        text = latex
    return text

QuerySyntaxHighlight

Syntax highlighting for queries with pygments

Source code in lodstorage/query.py
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
class QuerySyntaxHighlight:
    """
    Syntax highlighting for queries with pygments
    """

    def __init__(self, query, highlightFormat: str = "html"):
        """
        construct me for the given query and highlightFormat

        Args:
            query(Query): the query to do the syntax highlighting for
            highlightFormat(str): the highlight format to be used
        """
        self.query = query
        self.highlightFormat = highlightFormat
        self.lexer = get_lexer_by_name(self.query.lang)
        if self.highlightFormat == "html":
            self.formatter = HtmlFormatter()
        elif self.highlightFormat == "latex":
            self.formatter = LatexFormatter()

    def highlight(self):
        """
        Returns:
            str: the result of the syntax highlighting with pygments
        """
        syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
        return syntaxResult

__init__(query, highlightFormat='html')

construct me for the given query and highlightFormat

Parameters:

Name Type Description Default
query(Query)

the query to do the syntax highlighting for

required
highlightFormat(str)

the highlight format to be used

required
Source code in lodstorage/query.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def __init__(self, query, highlightFormat: str = "html"):
    """
    construct me for the given query and highlightFormat

    Args:
        query(Query): the query to do the syntax highlighting for
        highlightFormat(str): the highlight format to be used
    """
    self.query = query
    self.highlightFormat = highlightFormat
    self.lexer = get_lexer_by_name(self.query.lang)
    if self.highlightFormat == "html":
        self.formatter = HtmlFormatter()
    elif self.highlightFormat == "latex":
        self.formatter = LatexFormatter()

highlight()

Returns:

Name Type Description
str

the result of the syntax highlighting with pygments

Source code in lodstorage/query.py
193
194
195
196
197
198
199
def highlight(self):
    """
    Returns:
        str: the result of the syntax highlighting with pygments
    """
    syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
    return syntaxResult

ValueFormatter

a value Formatter

Source code in lodstorage/query.py
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class ValueFormatter:
    """
    a value Formatter
    """

    home = str(Path.home())
    # additional endpoints from users endpoint configuration
    formatsPath = f"{os.path.dirname(__file__)}/../sampledata/formats.yaml"
    valueFormats = None

    def __init__(
        self,
        name: str,
        formatString: str,
        regexps: list = None,
    ):
        """
        constructor

        Args:
            fstring(str): the format String to use
            regexps(list): the regular expressions to apply
        """
        self.name = name
        self.regexps = regexps
        self.formatString = formatString

    @classmethod
    def fromDict(cls, name: str, record: dict):
        """
        create a ValueFormatter from the given dict
        """
        if "regexps" in record:
            regexps = record["regexps"]
        else:
            regexps = []
        vf = ValueFormatter(name=name, formatString=record["format"], regexps=regexps)
        return vf

    @classmethod
    def getFormats(cls, formatsPath: str = None) -> dict:
        """
        get the available ValueFormatters

        Args:
            formatsPath(str): the path to the yaml file to read the format specs from
        Returns:
            dict: a map for ValueFormatters by formatter Name
        """
        if cls.valueFormats is None:
            valueFormats = {}
            formatPaths = YamlPath.getPaths("formats.yaml", formatsPath)
            for formatPath in formatPaths:
                with open(formatPath, "r", encoding="utf-8") as stream:
                    valueFormatRecords = yaml.safe_load(stream)
                    for valueFormatKey, valueFormatRecord in valueFormatRecords.items():
                        valueFormats[valueFormatKey] = ValueFormatter.fromDict(
                            name=valueFormatKey, record=valueFormatRecord
                        )
            cls.valueFormats = valueFormats
        return cls.valueFormats

    def applyFormat(self, record, key, resultFormat: Format):
        """
        apply the given format to the given record

        Args:
            record(dict): the record to handle
            key(str): the property key
            resultFormat(str): the resultFormat Style to apply
        """
        if key in record:
            value = record[key]
            if value is not None and isinstance(value, str):
                # if there are no regular expressions specified always format
                doformat = len(self.regexps) == 0
                for regexp in self.regexps:
                    try:
                        vmatch = re.match(regexp, value)
                        if vmatch:
                            # we found a match and will format it if the value is not none
                            doformat = True
                            value = vmatch.group("value")
                    except Exception as ex:
                        print(
                            f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                            file=sys.stderr,
                        )
                if value is not None and doformat:
                    link = self.formatString.format(value=value)
                    newValue = None
                    if resultFormat == "github":
                        newValue = f"[{value}]({link})"
                    elif resultFormat == "mediawiki":
                        newValue = f"[{link} {value}]"
                    elif resultFormat == "latex":
                        newValue = f"\href{{{link}}}{{{value}}}"
                    if newValue is not None:
                        record[key] = newValue

__init__(name, formatString, regexps=None)

constructor

Parameters:

Name Type Description Default
fstring(str)

the format String to use

required
regexps(list)

the regular expressions to apply

required
Source code in lodstorage/query.py
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
def __init__(
    self,
    name: str,
    formatString: str,
    regexps: list = None,
):
    """
    constructor

    Args:
        fstring(str): the format String to use
        regexps(list): the regular expressions to apply
    """
    self.name = name
    self.regexps = regexps
    self.formatString = formatString

applyFormat(record, key, resultFormat)

apply the given format to the given record

Parameters:

Name Type Description Default
record(dict)

the record to handle

required
key(str)

the property key

required
resultFormat(str)

the resultFormat Style to apply

required
Source code in lodstorage/query.py
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def applyFormat(self, record, key, resultFormat: Format):
    """
    apply the given format to the given record

    Args:
        record(dict): the record to handle
        key(str): the property key
        resultFormat(str): the resultFormat Style to apply
    """
    if key in record:
        value = record[key]
        if value is not None and isinstance(value, str):
            # if there are no regular expressions specified always format
            doformat = len(self.regexps) == 0
            for regexp in self.regexps:
                try:
                    vmatch = re.match(regexp, value)
                    if vmatch:
                        # we found a match and will format it if the value is not none
                        doformat = True
                        value = vmatch.group("value")
                except Exception as ex:
                    print(
                        f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                        file=sys.stderr,
                    )
            if value is not None and doformat:
                link = self.formatString.format(value=value)
                newValue = None
                if resultFormat == "github":
                    newValue = f"[{value}]({link})"
                elif resultFormat == "mediawiki":
                    newValue = f"[{link} {value}]"
                elif resultFormat == "latex":
                    newValue = f"\href{{{link}}}{{{value}}}"
                if newValue is not None:
                    record[key] = newValue

fromDict(name, record) classmethod

create a ValueFormatter from the given dict

Source code in lodstorage/query.py
 98
 99
100
101
102
103
104
105
106
107
108
@classmethod
def fromDict(cls, name: str, record: dict):
    """
    create a ValueFormatter from the given dict
    """
    if "regexps" in record:
        regexps = record["regexps"]
    else:
        regexps = []
    vf = ValueFormatter(name=name, formatString=record["format"], regexps=regexps)
    return vf

getFormats(formatsPath=None) classmethod

get the available ValueFormatters

Parameters:

Name Type Description Default
formatsPath(str)

the path to the yaml file to read the format specs from

required

Returns: dict: a map for ValueFormatters by formatter Name

Source code in lodstorage/query.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
@classmethod
def getFormats(cls, formatsPath: str = None) -> dict:
    """
    get the available ValueFormatters

    Args:
        formatsPath(str): the path to the yaml file to read the format specs from
    Returns:
        dict: a map for ValueFormatters by formatter Name
    """
    if cls.valueFormats is None:
        valueFormats = {}
        formatPaths = YamlPath.getPaths("formats.yaml", formatsPath)
        for formatPath in formatPaths:
            with open(formatPath, "r", encoding="utf-8") as stream:
                valueFormatRecords = yaml.safe_load(stream)
                for valueFormatKey, valueFormatRecord in valueFormatRecords.items():
                    valueFormats[valueFormatKey] = ValueFormatter.fromDict(
                        name=valueFormatKey, record=valueFormatRecord
                    )
        cls.valueFormats = valueFormats
    return cls.valueFormats

YamlPath

Source code in lodstorage/query.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
class YamlPath:
    @staticmethod
    def getPaths(yamlFileName: str, yamlPath: str = None, with_default: bool = True):
        """
        Args:
            yamlFileName (str): The name of the YAML file to read from if (any) - legacy way to specify name
            yamlPath (str, optional): The full path to read from. Defaults to None.
            with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

        """
        if yamlPath is None:
            yamlPath = f"{os.path.dirname(__file__)}/../sampledata/{yamlFileName}"
        yamlPaths = [yamlPath]
        if with_default:
            home = str(Path.home())
            # additional yamls from users yaml configuration
            homepath = f"{home}/.pylodstorage/{yamlFileName}"
            if os.path.isfile(homepath):
                yamlPaths.append(homepath)
        return yamlPaths

getPaths(yamlFileName, yamlPath=None, with_default=True) staticmethod

Parameters:

Name Type Description Default
yamlFileName str

The name of the YAML file to read from if (any) - legacy way to specify name

required
yamlPath str

The full path to read from. Defaults to None.

None
with_default bool

Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

True
Source code in lodstorage/query.py
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
@staticmethod
def getPaths(yamlFileName: str, yamlPath: str = None, with_default: bool = True):
    """
    Args:
        yamlFileName (str): The name of the YAML file to read from if (any) - legacy way to specify name
        yamlPath (str, optional): The full path to read from. Defaults to None.
        with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

    """
    if yamlPath is None:
        yamlPath = f"{os.path.dirname(__file__)}/../sampledata/{yamlFileName}"
    yamlPaths = [yamlPath]
    if with_default:
        home = str(Path.home())
        # additional yamls from users yaml configuration
        homepath = f"{home}/.pylodstorage/{yamlFileName}"
        if os.path.isfile(homepath):
            yamlPaths.append(homepath)
    return yamlPaths

querymain

Created on 2022-02-13

@author: wf

QueryMain

Commandline handler

Source code in lodstorage/querymain.py
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
class QueryMain:
    """
    Commandline handler
    """

    @classmethod
    def main(cls, args):
        """
        command line activation with parsed args

        Args:
            args(list): the command line arguments
        """
        debug = args.debug
        endpoints = EndpointManager.getEndpoints(args.endpointPath)
        qm = QueryManager(lang=args.language, debug=debug, queriesPath=args.queriesPath)
        query = None
        queryCode = args.query
        formats = None
        # preload ValueFormatter
        ValueFormatter.getFormats(args.formatsPath)
        if args.list:
            for name, query in qm.queriesByName.items():
                print(f"{name}:{query.title}")
        elif args.listEndpoints:
            # list endpoints
            for endpoint in endpoints.values():
                if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                    print(endpoint)

        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            query = qm.queriesByName[args.queryName]
            if query.limit is None and args.limit is not None:
                query.limit = args.limit
            formats = query.formats
            queryCode = query.query
            if debug or args.showQuery:
                if hasattr(query, "description") and query.description is not None:
                    print(query.description)
        if query is None:
            name = "?"
            if queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            query = Query(name="?", query=queryCode, lang=args.language)

        if queryCode:
            params = Params(query.query)
            if params.has_params:
                if not args.params:
                    raise Exception(f"{query.name} needs parameters")
                else:
                    params.set(args.params)
                    query.query = params.apply_parameters()
                    queryCode = query.query
            if debug or args.showQuery:
                print(f"{args.language}:\n{query.query}")
            endpointConf = Endpoint()
            endpointConf.method = "POST"
            if args.endpointName:
                endpointConf = endpoints.get(args.endpointName)
                query.tryItUrl = endpointConf.website
                query.database = endpointConf.database
            else:
                endpointConf.endpoint = query.endpoint
            if args.method:
                endpointConf.method = args.method
            if query.limit:
                if "limit" in queryCode or "LIMIT" in queryCode:
                    queryCode = re.sub(
                        r"(limit|LIMIT)\s+(\d+)", f"LIMIT {query.limit}", queryCode
                    )
                else:
                    queryCode += f"\nLIMIT {query.limit}"
            if args.language == "sparql":
                sparql = SPARQL.fromEndpointConf(endpointConf)
                if args.prefixes and endpointConf is not None:
                    queryCode = f"{endpointConf.prefixes}\n{queryCode}"
                if args.raw:
                    qres = cls.rawQuery(
                        endpointConf,
                        query=query.query,
                        resultFormat=args.format,
                        mimeType=args.mimeType,
                    )
                    print(qres)
                    return
                if "wikidata" in args.endpointName and formats is None:
                    formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(queryCode)
            elif args.language == "sql":
                sqlDB = SQLDB(endpointConf.endpoint)
                qlod = sqlDB.query(queryCode)
            else:
                raise Exception(f"language {args.language} not known/supported")
            if args.format is Format.csv:
                csv = CSV.toCSV(qlod)
                print(csv)
            elif args.format in [Format.latex, Format.github, Format.mediawiki]:
                doc = query.documentQueryResult(
                    qlod, tablefmt=str(args.format), floatfmt=".0f"
                )
                docstr = doc.asText()
                print(docstr)
            elif args.format in [Format.json] or args.format is None:  # set as default
                # https://stackoverflow.com/a/36142844/1497139
                print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
            elif args.format in [Format.xml]:
                lod2xml = Lod2Xml(qlod)
                xml = lod2xml.asXml()
                print(xml)

            else:
                raise Exception(f"format {args.format} not supported yet")

    @staticmethod
    def rawQuery(endpointConf, query, resultFormat, mimeType, timeout: float = 10.0):
        """
        returns raw result of the endpoint

        Args:
            endpointConf: EndPoint
            query(str): query
            resultFormat(str): format of the result
            mimeType(str): mimeType
            timoeout(float): timeout in seconds

        Returns:
            raw result of the query
        """
        params = {"query": query, "format": resultFormat}
        payload = {}
        if mimeType:
            headers = {"Accept": mimeType}
        else:
            headers = {}
        endpoint = endpointConf.endpoint
        method = endpointConf.method
        response = requests.request(
            method,
            endpoint,
            headers=headers,
            data=payload,
            params=params,
            timeout=timeout,
        )
        return response.text

main(args) classmethod

command line activation with parsed args

Parameters:

Name Type Description Default
args(list)

the command line arguments

required
Source code in lodstorage/querymain.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
@classmethod
def main(cls, args):
    """
    command line activation with parsed args

    Args:
        args(list): the command line arguments
    """
    debug = args.debug
    endpoints = EndpointManager.getEndpoints(args.endpointPath)
    qm = QueryManager(lang=args.language, debug=debug, queriesPath=args.queriesPath)
    query = None
    queryCode = args.query
    formats = None
    # preload ValueFormatter
    ValueFormatter.getFormats(args.formatsPath)
    if args.list:
        for name, query in qm.queriesByName.items():
            print(f"{name}:{query.title}")
    elif args.listEndpoints:
        # list endpoints
        for endpoint in endpoints.values():
            if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                print(endpoint)

    elif args.queryName is not None:
        if debug or args.showQuery:
            print(f"named query {args.queryName}:")
        if args.queryName not in qm.queriesByName:
            raise Exception(f"named query {args.queryName} not available")
        query = qm.queriesByName[args.queryName]
        if query.limit is None and args.limit is not None:
            query.limit = args.limit
        formats = query.formats
        queryCode = query.query
        if debug or args.showQuery:
            if hasattr(query, "description") and query.description is not None:
                print(query.description)
    if query is None:
        name = "?"
        if queryCode is None and args.queryFile is not None:
            queryFilePath = Path(args.queryFile)
            queryCode = queryFilePath.read_text()
            name = queryFilePath.stem
        query = Query(name="?", query=queryCode, lang=args.language)

    if queryCode:
        params = Params(query.query)
        if params.has_params:
            if not args.params:
                raise Exception(f"{query.name} needs parameters")
            else:
                params.set(args.params)
                query.query = params.apply_parameters()
                queryCode = query.query
        if debug or args.showQuery:
            print(f"{args.language}:\n{query.query}")
        endpointConf = Endpoint()
        endpointConf.method = "POST"
        if args.endpointName:
            endpointConf = endpoints.get(args.endpointName)
            query.tryItUrl = endpointConf.website
            query.database = endpointConf.database
        else:
            endpointConf.endpoint = query.endpoint
        if args.method:
            endpointConf.method = args.method
        if query.limit:
            if "limit" in queryCode or "LIMIT" in queryCode:
                queryCode = re.sub(
                    r"(limit|LIMIT)\s+(\d+)", f"LIMIT {query.limit}", queryCode
                )
            else:
                queryCode += f"\nLIMIT {query.limit}"
        if args.language == "sparql":
            sparql = SPARQL.fromEndpointConf(endpointConf)
            if args.prefixes and endpointConf is not None:
                queryCode = f"{endpointConf.prefixes}\n{queryCode}"
            if args.raw:
                qres = cls.rawQuery(
                    endpointConf,
                    query=query.query,
                    resultFormat=args.format,
                    mimeType=args.mimeType,
                )
                print(qres)
                return
            if "wikidata" in args.endpointName and formats is None:
                formats = ["*:wikidata"]
            qlod = sparql.queryAsListOfDicts(queryCode)
        elif args.language == "sql":
            sqlDB = SQLDB(endpointConf.endpoint)
            qlod = sqlDB.query(queryCode)
        else:
            raise Exception(f"language {args.language} not known/supported")
        if args.format is Format.csv:
            csv = CSV.toCSV(qlod)
            print(csv)
        elif args.format in [Format.latex, Format.github, Format.mediawiki]:
            doc = query.documentQueryResult(
                qlod, tablefmt=str(args.format), floatfmt=".0f"
            )
            docstr = doc.asText()
            print(docstr)
        elif args.format in [Format.json] or args.format is None:  # set as default
            # https://stackoverflow.com/a/36142844/1497139
            print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
        elif args.format in [Format.xml]:
            lod2xml = Lod2Xml(qlod)
            xml = lod2xml.asXml()
            print(xml)

        else:
            raise Exception(f"format {args.format} not supported yet")

rawQuery(endpointConf, query, resultFormat, mimeType, timeout=10.0) staticmethod

returns raw result of the endpoint

Parameters:

Name Type Description Default
endpointConf

EndPoint

required
query(str)

query

required
resultFormat(str)

format of the result

required
mimeType(str)

mimeType

required
timoeout(float)

timeout in seconds

required

Returns:

Type Description

raw result of the query

Source code in lodstorage/querymain.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
@staticmethod
def rawQuery(endpointConf, query, resultFormat, mimeType, timeout: float = 10.0):
    """
    returns raw result of the endpoint

    Args:
        endpointConf: EndPoint
        query(str): query
        resultFormat(str): format of the result
        mimeType(str): mimeType
        timoeout(float): timeout in seconds

    Returns:
        raw result of the query
    """
    params = {"query": query, "format": resultFormat}
    payload = {}
    if mimeType:
        headers = {"Accept": mimeType}
    else:
        headers = {}
    endpoint = endpointConf.endpoint
    method = endpointConf.method
    response = requests.request(
        method,
        endpoint,
        headers=headers,
        data=payload,
        params=params,
        timeout=timeout,
    )
    return response.text

main(argv=None, lang=None)

main program.

commandline access to List of Dicts / Linked Open Data Queries

Source code in lodstorage/querymain.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
def main(argv=None, lang=None):  # IGNORE:C0111
    """
    main program.

    commandline access to List of Dicts / Linked Open Data Queries
    """
    if argv is None:
        argv = sys.argv[1:]

    program_name = os.path.basename(__file__)
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = "%%(prog)s %s (%s)" % (
        program_version,
        program_build_date,
    )
    program_shortdesc = (
        "commandline query of endpoints in diverse languages such as SPARQL/SQL"
    )
    user_name = "Wolfgang Fahl"
    program_license = """%s

  Created by %s on %s.
  Copyright 2020-2024 Wolfgang Fahl. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
""" % (
        program_shortdesc,
        user_name,
        str(__date__),
    )

    try:
        # Setup argument parser
        parser = ArgumentParser(
            description=program_license, formatter_class=RawDescriptionHelpFormatter
        )
        parser.add_argument(
            "-d",
            "--debug",
            dest="debug",
            action="store_true",
            help="set debug [default: %(default)s]",
        )
        parser.add_argument(
            "-ep",
            "--endpointPath",
            default=None,
            help="path to yaml file to configure endpoints to use for queries",
        )
        parser.add_argument(
            "-fp",
            "--formatsPath",
            default=ValueFormatter.formatsPath,
            help="path to yaml file to configure formats to use for query result documentation",
        )
        parser.add_argument(
            "-en",
            "--endpointName",
            default="wikidata",
            help=f"Name of the endpoint to use for queries. Available by default: {EndpointManager.getEndpointNames()}",
        )
        parser.add_argument("--method", help="method to be used for SPARQL queries")
        parser.add_argument("-f", "--format", type=Format, choices=list(Format))
        parser.add_argument(
            "-li",
            "--list",
            action="store_true",
            help="show the list of available queries",
        )
        parser.add_argument(
            "--limit", type=int, default=None, help="set limit parameter of query"
        )
        parser.add_argument(
            "--params",
            action=StoreDictKeyPair,
            help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
        )
        parser.add_argument(
            "-le",
            "--listEndpoints",
            action="store_true",
            help="show the list of available endpoints",
        )
        parser.add_argument(
            "-m", "--mimeType", help="MIME-type to use for the raw query"
        )
        parser.add_argument(
            "-p",
            "--prefixes",
            action="store_true",
            help="add predefined prefixes for endpoint",
        )
        parser.add_argument(
            "-sq", "--showQuery", action="store_true", help="show the query"
        )
        parser.add_argument(
            "-qp", "--queriesPath", help="path to YAML file with query definitions"
        )
        parser.add_argument("-q", "--query", help="the query to run")
        parser.add_argument("-qf", "--queryFile", help="the query file to run")
        parser.add_argument("-qn", "--queryName", help="run a named query")
        parser.add_argument(
            "-raw",
            action="store_true",
            help="return the raw query result from the endpoint. (MIME type defined over -f or -m)",
        )
        parser.add_argument(
            "-V", "--version", action="version", version=program_version_message
        )
        if lang is None:
            parser.add_argument(
                "-l", "--language", help="the query language to use", required=True
            )
        args = parser.parse_args(argv)
        if lang is not None:
            args.language = lang
        QueryMain.main(args)

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        if DEBUG:
            raise (e)
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if args.debug:
            print(traceback.format_exc())
        return 2

mainSPARQL(argv=None)

commandline for SPARQL queries

Source code in lodstorage/querymain.py
201
202
203
204
205
def mainSPARQL(argv=None):
    """
    commandline for SPARQL queries
    """
    main(argv, lang="sparql")

mainSQL(argv=None)

commandline for SQL queries

Source code in lodstorage/querymain.py
194
195
196
197
198
def mainSQL(argv=None):
    """
    commandline for SQL queries
    """
    main(argv, lang="sql")

rdf

Created on 2024-01-27

@author: wf, using ChatGPT-4 prompting

RDFDumper

A class to convert instances of data models (based on a LinkML schema) into an RDF graph.

Source code in lodstorage/rdf.py
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
class RDFDumper:
    """
    A class to convert instances of data models (based on a LinkML schema) into an RDF graph.
    """

    def __init__(self, schema: Schema, instance: object):
        """
        Initialize the RDFDumper.

        Args:
            schema (Schema): The LinkML schema defining the structure of the data models.
            instance (object): The instance of the data model to be converted into RDF.
        """
        self.schema = schema
        self.instance = instance
        self.graph = Graph()
        self.namespaces = {
            prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
        }

    def convert_to_rdf(self):
        """
        Converts the provided instance into RDF triples based on the LinkML schema.
        """
        # Process the instance data according to its class in the schema
        instance_class = self.instance.__class__.__name__
        if instance_class in self.schema.classes:
            self.process_class(instance_class, self.instance)

    def serialize(self, rdf_format: str = "turtle") -> str:
        """
        Serializes the RDF graph into a string representation in the specified format.

        Args:
            format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

        Returns:
            str: The serialized RDF graph.
        """
        return self.graph.serialize(format=rdf_format)

    def value_iterator(self, value: Any):
        """
        Iterates over values in a mapping or iterable.

        Args:
            value: The value to iterate over. It can be a mapping, iterable, or a single value.

        Yields:
            Tuples of (key, value) from the input value. For single values, key is None.
        """
        if isinstance(value, Mapping):
            yield from value.items()
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            yield from ((None, v) for v in value)
        else:
            yield (None, value)

    def process_class(self, class_name: str, instance_data: object):
        # Get the base namespace URI
        self.base_uri = self.namespaces[self.schema.default_prefix]
        # get the class object
        # class_obj = self.schema.classes[class_name]
        # Construct class_uri using the namespace and class_name with a separator
        class_uri = URIRef(f"{self.base_uri}:{class_name}")

        # Create a unique URI or a Blank Node for the instance
        instance_uri = self.get_instance_uri(instance_data)

        # Type the instance with its class
        self.graph.add((instance_uri, RDF.type, class_uri))

        # loop over all fieds of the instance data
        for field_info in fields(instance_data):
            slot_name = field_info.name
            # assure we only work on fields defined
            # in our schema
            slot_obj = self.schema.slots.get(slot_name)
            if not slot_obj:
                continue

            # Combine the namespace with the slot name to form the field URI
            field_uri = URIRef(f"{self.base_uri}:{slot_name}")
            field_value = getattr(instance_data, slot_name, None)

            # Use value_iterator to handle different types of values
            for key, item in self.value_iterator(field_value):
                if key is not None:
                    # Handle as a mapping
                    key_uri = URIRef(self.namespaces[self.schema.default_prefix][key])
                    self.graph.add((instance_uri, field_uri, key_uri))
                    self.graph.add(
                        (key_uri, RDF.value, self.convert_to_literal(item, slot_obj))
                    )
                else:
                    # Handle as a single value or an item from an iterable
                    # Check if item has an 'identifier' property
                    if hasattr(item, "identifier") and getattr(item, "identifier"):
                        item_uri = self.get_instance_uri(item)
                        self.graph.add((instance_uri, field_uri, item_uri))
                        self.process_class(item.__class__.__name__, item)
                    else:
                        self.graph.add(
                            (
                                instance_uri,
                                field_uri,
                                self.convert_to_literal(item, slot_obj),
                            )
                        )

    def get_instance_uri(self, instance_data):
        """
        Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
        Otherwise, it generates or retrieves a unique URI.
        """
        if hasattr(instance_data, "identifier") and getattr(
            instance_data, "identifier"
        ):
            identifier = getattr(instance_data, "identifier")
            return URIRef(f"{self.base_uri}:{identifier}")
        else:
            # Fallback to a blank node if no identifier is found
            return BNode()

    def convert_to_literal(self, value, slot_obj):
        """
        Converts a value to an RDFLib Literal with appropriate datatype.

        Args:
            value: The value to be converted.
            slot_obj: The slot object containing information about the field.

        Returns:
            An RDFLib Literal with the value and appropriate datatype.
        """
        # Determine the datatype based on the Python type of the value
        datatype = PythonTypes.get_rdf_datatype(type(value))

        # Create and return the literal
        return Literal(value, datatype=datatype)

__init__(schema, instance)

Initialize the RDFDumper.

Parameters:

Name Type Description Default
schema Schema

The LinkML schema defining the structure of the data models.

required
instance object

The instance of the data model to be converted into RDF.

required
Source code in lodstorage/rdf.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(self, schema: Schema, instance: object):
    """
    Initialize the RDFDumper.

    Args:
        schema (Schema): The LinkML schema defining the structure of the data models.
        instance (object): The instance of the data model to be converted into RDF.
    """
    self.schema = schema
    self.instance = instance
    self.graph = Graph()
    self.namespaces = {
        prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
    }

convert_to_literal(value, slot_obj)

Converts a value to an RDFLib Literal with appropriate datatype.

Parameters:

Name Type Description Default
value

The value to be converted.

required
slot_obj

The slot object containing information about the field.

required

Returns:

Type Description

An RDFLib Literal with the value and appropriate datatype.

Source code in lodstorage/rdf.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
def convert_to_literal(self, value, slot_obj):
    """
    Converts a value to an RDFLib Literal with appropriate datatype.

    Args:
        value: The value to be converted.
        slot_obj: The slot object containing information about the field.

    Returns:
        An RDFLib Literal with the value and appropriate datatype.
    """
    # Determine the datatype based on the Python type of the value
    datatype = PythonTypes.get_rdf_datatype(type(value))

    # Create and return the literal
    return Literal(value, datatype=datatype)

convert_to_rdf()

Converts the provided instance into RDF triples based on the LinkML schema.

Source code in lodstorage/rdf.py
36
37
38
39
40
41
42
43
def convert_to_rdf(self):
    """
    Converts the provided instance into RDF triples based on the LinkML schema.
    """
    # Process the instance data according to its class in the schema
    instance_class = self.instance.__class__.__name__
    if instance_class in self.schema.classes:
        self.process_class(instance_class, self.instance)

get_instance_uri(instance_data)

Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI. Otherwise, it generates or retrieves a unique URI.

Source code in lodstorage/rdf.py
126
127
128
129
130
131
132
133
134
135
136
137
138
def get_instance_uri(self, instance_data):
    """
    Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
    Otherwise, it generates or retrieves a unique URI.
    """
    if hasattr(instance_data, "identifier") and getattr(
        instance_data, "identifier"
    ):
        identifier = getattr(instance_data, "identifier")
        return URIRef(f"{self.base_uri}:{identifier}")
    else:
        # Fallback to a blank node if no identifier is found
        return BNode()

serialize(rdf_format='turtle')

Serializes the RDF graph into a string representation in the specified format.

Parameters:

Name Type Description Default
format str

The serialization format (e.g., 'turtle', 'xml', 'json-ld').

required

Returns:

Name Type Description
str str

The serialized RDF graph.

Source code in lodstorage/rdf.py
45
46
47
48
49
50
51
52
53
54
55
def serialize(self, rdf_format: str = "turtle") -> str:
    """
    Serializes the RDF graph into a string representation in the specified format.

    Args:
        format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

    Returns:
        str: The serialized RDF graph.
    """
    return self.graph.serialize(format=rdf_format)

value_iterator(value)

Iterates over values in a mapping or iterable.

Parameters:

Name Type Description Default
value Any

The value to iterate over. It can be a mapping, iterable, or a single value.

required

Yields:

Type Description

Tuples of (key, value) from the input value. For single values, key is None.

Source code in lodstorage/rdf.py
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def value_iterator(self, value: Any):
    """
    Iterates over values in a mapping or iterable.

    Args:
        value: The value to iterate over. It can be a mapping, iterable, or a single value.

    Yields:
        Tuples of (key, value) from the input value. For single values, key is None.
    """
    if isinstance(value, Mapping):
        yield from value.items()
    elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
        yield from ((None, v) for v in value)
    else:
        yield (None, value)

sample

Created on 2020-08-24

@author: wf

Royal

Bases: JSONAble

i am a single Royal

Source code in lodstorage/sample.py
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
class Royal(JSONAble):
    """
    i am a single Royal
    """

    @classmethod
    def getSamples(cls):
        listOfDicts = [
            {
                "name": "Elizabeth Alexandra Mary Windsor",
                "born": Sample.dob("1926-04-21"),
                "numberInLine": 0,
                "wikidataurl": "https://www.wikidata.org/wiki/Q9682",
            },
            {
                "name": "Charles, Prince of Wales",
                "born": Sample.dob("1948-11-14"),
                "numberInLine": 1,
                "wikidataurl": "https://www.wikidata.org/wiki/Q43274",
            },
            {
                "name": "George of Cambridge",
                "born": Sample.dob("2013-07-22"),
                "numberInLine": 3,
                "wikidataurl": "https://www.wikidata.org/wiki/Q1359041",
            },
            {
                "name": "Harry Duke of Sussex",
                "born": Sample.dob("1984-09-15"),
                "numberInLine": 6,
                "wikidataurl": "https://www.wikidata.org/wiki/Q152316",
            },
        ]
        today = date.today()
        for person in listOfDicts:
            born = person["born"]
            age = (today - born).days / 365.2425
            person["age"] = age
            person["ofAge"] = age >= 18
            person["lastmodified"] = datetime.now()
        return listOfDicts

    def __repr__(self):
        text = self.__class__.__name__
        attrs = ["name", "born"]
        delim = ":"
        for attr in attrs:
            if hasattr(self, attr):
                value = getattr(self, attr)
                text += f"{delim}{value}"
                delim = ":"
        return text

Royals

Bases: JSONAbleList

a non ORM Royals list

Source code in lodstorage/sample.py
76
77
78
79
80
81
82
83
84
85
86
class Royals(JSONAbleList):
    """
    a non ORM Royals list
    """

    def __init__(self, load=False):
        super(Royals, self).__init__("royals", clazz=None)
        if load:
            self.royals = Royal.getSamples()
        else:
            self.royals = None

Sample

Bases: object

Sample dataset generator

Source code in lodstorage/sample.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
class Sample(object):
    """
    Sample dataset generator
    """

    cityList = None

    def __init__(self):
        """
        Constructor
        """

    @staticmethod
    def getSample(size):
        listOfDicts = []
        for index in range(size):
            listOfDicts.append({"pkey": "index%d" % index, "cindex": index})
        return listOfDicts

    @staticmethod
    def getCountries():
        countryJsonUrl = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
        with urllib.request.urlopen(countryJsonUrl) as url:
            countryList = json.loads(url.read().decode())
        return countryList

    @staticmethod
    def getCities():
        """
        get a list of cities
        """
        if Sample.cityList is None:
            cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
            with urllib.request.urlopen(cityJsonUrl) as url:
                Sample.cityList = json.loads(url.read().decode())
            for city in Sample.cityList:
                city["cityId"] = "%s-%s" % (city["country"], city["name"])
        return Sample.cityList

    @staticmethod
    def dob(isoDateString):
        """get the date of birth from the given iso date state"""
        # if sys.version_info >= (3, 7):
        #    dt=datetime.fromisoformat(isoDateString)
        # else:
        dt = datetime.strptime(isoDateString, "%Y-%m-%d")
        return dt.date()

    @staticmethod
    def getRoyals():
        return Royal.getSamples()

    @staticmethod
    def getRoyalsInstances():
        lod = Royal.getSamples()
        royals = []
        for record in lod:
            royal = Royal()
            royal.fromDict(record)
            royals.append(royal)
        return royals

__init__()

Constructor

Source code in lodstorage/sample.py
20
21
22
23
def __init__(self):
    """
    Constructor
    """

dob(isoDateString) staticmethod

get the date of birth from the given iso date state

Source code in lodstorage/sample.py
52
53
54
55
56
57
58
59
@staticmethod
def dob(isoDateString):
    """get the date of birth from the given iso date state"""
    # if sys.version_info >= (3, 7):
    #    dt=datetime.fromisoformat(isoDateString)
    # else:
    dt = datetime.strptime(isoDateString, "%Y-%m-%d")
    return dt.date()

getCities() staticmethod

get a list of cities

Source code in lodstorage/sample.py
39
40
41
42
43
44
45
46
47
48
49
50
@staticmethod
def getCities():
    """
    get a list of cities
    """
    if Sample.cityList is None:
        cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
        with urllib.request.urlopen(cityJsonUrl) as url:
            Sample.cityList = json.loads(url.read().decode())
        for city in Sample.cityList:
            city["cityId"] = "%s-%s" % (city["country"], city["name"])
    return Sample.cityList

sample2

Created on 2024-01-21

@author: wf

Countries

Represents a collection of country instances.

Attributes:

Name Type Description
countries List[Country]

A list of Country instances.

Source code in lodstorage/sample2.py
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
@lod_storable
class Countries:
    """
    Represents a collection of country instances.

    Attributes:
        countries (List[Country]): A list of Country instances.
    """

    countries: List[Country]

    @classmethod
    def get_countries_erdem(cls) -> "Countries":
        """
        get Erdem Ozkol's country list
        """
        countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
        json_str = cls.read_from_url(countries_json_url)
        countries_list = json.loads(json_str)
        countries_dict = {"countries": countries_list}
        instance = cls.from_dict(countries_dict)
        return instance

    @classmethod
    def get_samples(cls) -> dict[str, "Countries"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names
            and values as `Countries` instances.
        """
        samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
        return samples

get_countries_erdem() classmethod

get Erdem Ozkol's country list

Source code in lodstorage/sample2.py
182
183
184
185
186
187
188
189
190
191
192
@classmethod
def get_countries_erdem(cls) -> "Countries":
    """
    get Erdem Ozkol's country list
    """
    countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
    json_str = cls.read_from_url(countries_json_url)
    countries_list = json.loads(json_str)
    countries_dict = {"countries": countries_list}
    instance = cls.from_dict(countries_dict)
    return instance

get_samples() classmethod

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name Type Description
dict dict[str, Countries]

A dictionary with keys as sample names

dict[str, Countries]

and values as Countries instances.

Source code in lodstorage/sample2.py
194
195
196
197
198
199
200
201
202
203
204
205
206
@classmethod
def get_samples(cls) -> dict[str, "Countries"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names
        and values as `Countries` instances.
    """
    samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
    return samples

Country

Represents a country with its details.

Attributes:

Name Type Description
name str

The name of the country.

country_code str

The country code.

capital Optional[str]

The capital city of the country.

timezones List[str]

List of timezones in the country.

latlng List[float]

Latitude and longitude of the country.

Source code in lodstorage/sample2.py
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
@lod_storable
class Country:
    """
    Represents a country with its details.

    Attributes:
        name (str): The name of the country.
        country_code (str): The country code.
        capital (Optional[str]): The capital city of the country.
        timezones (List[str]): List of timezones in the country.
        latlng (List[float]): Latitude and longitude of the country.
    """

    name: str
    country_code: str
    capital: Optional[str] = None
    timezones: List[str] = field(default_factory=list)
    latlng: List[float] = field(default_factory=list)

Royal

Represents a member of the royal family, with various personal details.

Attributes:

Name Type Description
name str

The full name of the royal member.

wikidata_id str

The Wikidata identifier associated with the royal member.

number_in_line Optional[int]

The number in line to succession, if applicable.

born_iso_date Optional[str]

The ISO date of birth.

died_iso_date Optional[str]

The ISO date of death, if deceased.

last_modified_iso str

ISO timestamp of the last modification.

age Optional[int]

The age of the royal member.

of_age Optional[bool]

Indicates whether the member is of legal age.

wikidata_url Optional[str]

URL to the Wikidata page of the member.

Source code in lodstorage/sample2.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
@lod_storable
class Royal:
    """
    Represents a member of the royal family, with various personal details.

    Attributes:
        name (str): The full name of the royal member.
        wikidata_id (str): The Wikidata identifier associated with the royal member.
        number_in_line (Optional[int]): The number in line to succession, if applicable.
        born_iso_date (Optional[str]): The ISO date of birth.
        died_iso_date (Optional[str]): The ISO date of death, if deceased.
        last_modified_iso (str): ISO timestamp of the last modification.
        age (Optional[int]): The age of the royal member.
        of_age (Optional[bool]): Indicates whether the member is of legal age.
        wikidata_url (Optional[str]): URL to the Wikidata page of the member.
    """

    name: str
    wikidata_id: str
    number_in_line: Optional[int] = None
    born_iso_date: Optional[str] = None
    died_iso_date: Optional[str] = None
    last_modified_iso: str = field(init=False)
    age: Optional[int] = field(init=None)
    of_age: Optional[bool] = field(init=None)
    wikidata_url: Optional[str] = field(init=None)

    def __post_init__(self):
        """
        init calculated fields
        """
        self.lastmodified = datetime.utcnow()
        self.last_modified_iso = self.lastmodified.strftime("%Y-%m-%dT%H:%M:%SZ")
        end_date = self.died if self.died else date.today()
        self.age = int((end_date - self.born).days / 365.2425)
        self.of_age = self.age >= 18
        if self.wikidata_id:
            self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

    @property
    def identifier(self) -> str:
        """
        Generates a unique identifier for the Royal instance.
        The identifier is a combination of a slugified name and the Wikidata ID (if available).
        """
        slugified_name = slugify(self.name, lowercase=False, regex_pattern=r"[^\w\-]")
        if self.wikidata_id:
            return f"{slugified_name}-{self.wikidata_id}"
        return slugified_name

    @property
    def born(self) -> date:
        """Return the date of birth from the ISO date string."""
        born_date = DateConvert.iso_date_to_datetime(self.born_iso_date)
        return born_date

    @property
    def died(self) -> Optional[date]:
        """Return the date of death from the ISO date string, if available."""
        died_date = DateConvert.iso_date_to_datetime(self.died_iso_date)
        return died_date

born: date property

Return the date of birth from the ISO date string.

died: Optional[date] property

Return the date of death from the ISO date string, if available.

identifier: str property

Generates a unique identifier for the Royal instance. The identifier is a combination of a slugified name and the Wikidata ID (if available).

__post_init__()

init calculated fields

Source code in lodstorage/sample2.py
43
44
45
46
47
48
49
50
51
52
53
def __post_init__(self):
    """
    init calculated fields
    """
    self.lastmodified = datetime.utcnow()
    self.last_modified_iso = self.lastmodified.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_date = self.died if self.died else date.today()
    self.age = int((end_date - self.born).days / 365.2425)
    self.of_age = self.age >= 18
    if self.wikidata_id:
        self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

Royals

Represents a collection of Royal family members.

Attributes:

Name Type Description
members List[Royal]

A list of Royal family members.

Source code in lodstorage/sample2.py
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
@lod_storable
class Royals:
    """
    Represents a collection of Royal family members.

    Attributes:
        members (List[Royal]): A list of Royal family members.
    """

    members: List[Royal] = field(default_factory=list)

    @classmethod
    def get_samples(cls) -> dict[str, "Royals"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names and values as `Royals` instances.
        """
        samples = {
            "QE2 heirs up to number in line 5": Royals(
                members=[
                    Royal(
                        name="Elizabeth Alexandra Mary Windsor",
                        born_iso_date="1926-04-21",
                        died_iso_date="2022-09-08",
                        wikidata_id="Q9682",
                    ),
                    Royal(
                        name="Charles III of the United Kingdom",
                        born_iso_date="1948-11-14",
                        number_in_line=0,
                        wikidata_id="Q43274",
                    ),
                    Royal(
                        name="William, Duke of Cambridge",
                        born_iso_date="1982-06-21",
                        number_in_line=1,
                        wikidata_id="Q36812",
                    ),
                    Royal(
                        name="Prince George of Wales",
                        born_iso_date="2013-07-22",
                        number_in_line=2,
                        wikidata_id="Q13590412",
                    ),
                    Royal(
                        name="Princess Charlotte of Wales",
                        born_iso_date="2015-05-02",
                        number_in_line=3,
                        wikidata_id="Q18002970",
                    ),
                    Royal(
                        name="Prince Louis of Wales",
                        born_iso_date="2018-04-23",
                        number_in_line=4,
                        wikidata_id="Q38668629",
                    ),
                    Royal(
                        name="Harry Duke of Sussex",
                        born_iso_date="1984-09-15",
                        number_in_line=5,
                        wikidata_id="Q152316",
                    ),
                ]
            )
        }
        return samples

get_samples() classmethod

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name Type Description
dict dict[str, Royals]

A dictionary with keys as sample names and values as Royals instances.

Source code in lodstorage/sample2.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
@classmethod
def get_samples(cls) -> dict[str, "Royals"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names and values as `Royals` instances.
    """
    samples = {
        "QE2 heirs up to number in line 5": Royals(
            members=[
                Royal(
                    name="Elizabeth Alexandra Mary Windsor",
                    born_iso_date="1926-04-21",
                    died_iso_date="2022-09-08",
                    wikidata_id="Q9682",
                ),
                Royal(
                    name="Charles III of the United Kingdom",
                    born_iso_date="1948-11-14",
                    number_in_line=0,
                    wikidata_id="Q43274",
                ),
                Royal(
                    name="William, Duke of Cambridge",
                    born_iso_date="1982-06-21",
                    number_in_line=1,
                    wikidata_id="Q36812",
                ),
                Royal(
                    name="Prince George of Wales",
                    born_iso_date="2013-07-22",
                    number_in_line=2,
                    wikidata_id="Q13590412",
                ),
                Royal(
                    name="Princess Charlotte of Wales",
                    born_iso_date="2015-05-02",
                    number_in_line=3,
                    wikidata_id="Q18002970",
                ),
                Royal(
                    name="Prince Louis of Wales",
                    born_iso_date="2018-04-23",
                    number_in_line=4,
                    wikidata_id="Q38668629",
                ),
                Royal(
                    name="Harry Duke of Sussex",
                    born_iso_date="1984-09-15",
                    number_in_line=5,
                    wikidata_id="Q152316",
                ),
            ]
        )
    }
    return samples

Sample

Sample dataset provider

Source code in lodstorage/sample2.py
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class Sample:
    """
    Sample dataset provider
    """

    @staticmethod
    def get(dataset_name: str):
        """
        Get the given sample dataset name
        """
        samples = None
        if dataset_name == "royals":
            samples = Royals.get_samples()
        elif dataset_name == "countries":
            samples = Countries.get_samples()
        else:
            raise ValueError("Unknown dataset name")
        return samples

get(dataset_name) staticmethod

Get the given sample dataset name

Source code in lodstorage/sample2.py
214
215
216
217
218
219
220
221
222
223
224
225
226
@staticmethod
def get(dataset_name: str):
    """
    Get the given sample dataset name
    """
    samples = None
    if dataset_name == "royals":
        samples = Royals.get_samples()
    elif dataset_name == "countries":
        samples = Countries.get_samples()
    else:
        raise ValueError("Unknown dataset name")
    return samples

schema

Created on 2021-01-26

@author: wf

Schema

Bases: object

a relational Schema

Source code in lodstorage/schema.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
class Schema(object):
    """
    a relational Schema
    """

    def __init__(self, name: str, title: str):
        """
        Constructor

        Args:
            name(str): the name of the schema
            title(str): the title of the schema
        """
        self.name = name
        self.title = title
        self.propsByName = {}

    @staticmethod
    def generalizeColumn(tableList, colName: str):
        """
        remove the column with the given name from all tables in the tablelist and
        return it

        Args:
            tableList(list): a list of Tables
            colName(string): the name of the column to generalize

        Returns:
            string: the column having been generalized and removed
        """
        gCol = None
        for table in tableList:
            for col in table["columns"]:
                if col["name"] == colName:
                    gCol = col.copy()
                    # no linking yet @FIXME - will need this later
                    if "link" in gCol:
                        gCol.pop("link")
                    # is generalization protected for this column?
                    if not "special" in col or not col["special"]:
                        table["columns"].remove(col)
        return gCol

    @staticmethod
    def getGeneral(tableList, name: str, debug: bool = False):
        """
        derive a general table from the given table list
        Args:
            tableList(list): a list of tables
            name(str): name of the general table
            debug(bool): True if column names should be shown

        Returns:
            at table dict for the generalized table
        """
        general = {"name": name, "columns": []}
        colCount = Counter()
        for table in tableList:
            for col in table["columns"]:
                columnId = "%s.%s" % (col["name"], col["type"])
                if debug:
                    print(columnId)
                colCount[columnId] += 1
        for columnId, count in colCount.items():
            if count == len(tableList):
                colName = columnId.split(".")[0]
                generalCol = Schema.generalizeColumn(tableList, colName)
                general["columns"].append(generalCol)
        return general

    @staticmethod
    def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
        """
        get the DDL statement to create a general view

        Args:
            tableList: the list of tables
            name(str): the name of the view
            debug(bool): True if debug should be set
        """
        general = Schema.getGeneral(tableList, name, debug)
        cols = ""
        delim = ""
        for col in general["columns"]:
            cols += "%s%s" % (delim, col["name"])
            delim = ","
        ddl = "CREATE VIEW %s AS \n" % name
        delim = ""
        for table in tableList:
            ddl += "%s  SELECT %s FROM %s" % (delim, cols, table["name"])
            delim = "\nUNION\n"
        return ddl

__init__(name, title)

Constructor

Parameters:

Name Type Description Default
name(str)

the name of the schema

required
title(str)

the title of the schema

required
Source code in lodstorage/schema.py
37
38
39
40
41
42
43
44
45
46
47
def __init__(self, name: str, title: str):
    """
    Constructor

    Args:
        name(str): the name of the schema
        title(str): the title of the schema
    """
    self.name = name
    self.title = title
    self.propsByName = {}

generalizeColumn(tableList, colName) staticmethod

remove the column with the given name from all tables in the tablelist and return it

Parameters:

Name Type Description Default
tableList(list)

a list of Tables

required
colName(string)

the name of the column to generalize

required

Returns:

Name Type Description
string

the column having been generalized and removed

Source code in lodstorage/schema.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
@staticmethod
def generalizeColumn(tableList, colName: str):
    """
    remove the column with the given name from all tables in the tablelist and
    return it

    Args:
        tableList(list): a list of Tables
        colName(string): the name of the column to generalize

    Returns:
        string: the column having been generalized and removed
    """
    gCol = None
    for table in tableList:
        for col in table["columns"]:
            if col["name"] == colName:
                gCol = col.copy()
                # no linking yet @FIXME - will need this later
                if "link" in gCol:
                    gCol.pop("link")
                # is generalization protected for this column?
                if not "special" in col or not col["special"]:
                    table["columns"].remove(col)
    return gCol

getGeneral(tableList, name, debug=False) staticmethod

derive a general table from the given table list Args: tableList(list): a list of tables name(str): name of the general table debug(bool): True if column names should be shown

Returns:

Type Description

at table dict for the generalized table

Source code in lodstorage/schema.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
@staticmethod
def getGeneral(tableList, name: str, debug: bool = False):
    """
    derive a general table from the given table list
    Args:
        tableList(list): a list of tables
        name(str): name of the general table
        debug(bool): True if column names should be shown

    Returns:
        at table dict for the generalized table
    """
    general = {"name": name, "columns": []}
    colCount = Counter()
    for table in tableList:
        for col in table["columns"]:
            columnId = "%s.%s" % (col["name"], col["type"])
            if debug:
                print(columnId)
            colCount[columnId] += 1
    for columnId, count in colCount.items():
        if count == len(tableList):
            colName = columnId.split(".")[0]
            generalCol = Schema.generalizeColumn(tableList, colName)
            general["columns"].append(generalCol)
    return general

getGeneralViewDDL(tableList, name, debug=False) staticmethod

get the DDL statement to create a general view

Parameters:

Name Type Description Default
tableList

the list of tables

required
name(str)

the name of the view

required
debug(bool)

True if debug should be set

required
Source code in lodstorage/schema.py
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
@staticmethod
def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
    """
    get the DDL statement to create a general view

    Args:
        tableList: the list of tables
        name(str): the name of the view
        debug(bool): True if debug should be set
    """
    general = Schema.getGeneral(tableList, name, debug)
    cols = ""
    delim = ""
    for col in general["columns"]:
        cols += "%s%s" % (delim, col["name"])
        delim = ","
    ddl = "CREATE VIEW %s AS \n" % name
    delim = ""
    for table in tableList:
        ddl += "%s  SELECT %s FROM %s" % (delim, cols, table["name"])
        delim = "\nUNION\n"
    return ddl

SchemaManager

Bases: object

a manager for schemas

Source code in lodstorage/schema.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
class SchemaManager(
    object,
):
    """
    a manager for schemas
    """

    def __init__(self, schemaDefs=None, baseUrl: str = None):
        """
        constructor
            Args:
                schemaDefs(dict): a dictionary of schema names
                baseUrl(str): the base url to use for links
        """
        self.baseUrl = baseUrl
        self.schemasByName = {}
        if schemaDefs is None:
            schemaDefs = {}
        for key, name in schemaDefs.items():
            self.schemasByName[key] = Schema(key, name)
        pass

__init__(schemaDefs=None, baseUrl=None)

constructor Args: schemaDefs(dict): a dictionary of schema names baseUrl(str): the base url to use for links

Source code in lodstorage/schema.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, schemaDefs=None, baseUrl: str = None):
    """
    constructor
        Args:
            schemaDefs(dict): a dictionary of schema names
            baseUrl(str): the base url to use for links
    """
    self.baseUrl = baseUrl
    self.schemasByName = {}
    if schemaDefs is None:
        schemaDefs = {}
    for key, name in schemaDefs.items():
        self.schemasByName[key] = Schema(key, name)
    pass

sparql

Created on 2020-08-14

@author: wf

SPARQL

Bases: object

wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

:ivar url: full endpoint url (including mode) :ivar mode: 'query' or 'update' :ivar debug: True if debugging is active :ivar typedLiterals: True if INSERT should be done with typedLiterals :ivar profile(boolean): True if profiling / timing information should be displayed :ivar sparql: the SPARQLWrapper2 instance to be used :ivar method(str): the HTTP method to be used 'POST' or 'GET'

Source code in lodstorage/sparql.py
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
class SPARQL(object):
    """
    wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

    :ivar url: full endpoint url (including mode)
    :ivar mode: 'query' or 'update'
    :ivar debug: True if debugging is active
    :ivar typedLiterals: True if INSERT should be done with typedLiterals
    :ivar profile(boolean): True if profiling / timing information should be displayed
    :ivar sparql: the SPARQLWrapper2 instance to be used
    :ivar method(str): the HTTP method to be used 'POST' or 'GET'
    """

    def __init__(
        self,
        url,
        mode="query",
        debug=False,
        isFuseki=False,
        typedLiterals=False,
        profile=False,
        agent="PyLodStorage",
        method="POST",
    ):
        """
        Constructor a SPARQL wrapper

        Args:
            url(string): the base URL of the endpoint - the mode query/update is going to be appended
            mode(string): 'query' or 'update'
            debug(bool): True if debugging is to be activated
            typedLiterals(bool): True if INSERT should be done with typedLiterals
            profile(boolean): True if profiling / timing information should be displayed
            agent(string): the User agent to use
            method(string): the HTTP method to be used 'POST' or 'GET'
        """
        if isFuseki:
            self.url = f"{url}/{mode}"
        else:
            self.url = url
        self.mode = mode
        self.debug = debug
        self.typedLiterals = typedLiterals
        self.profile = profile
        self.sparql = SPARQLWrapper2(url)
        self.method = method
        self.sparql.agent = agent

    @classmethod
    def fromEndpointConf(cls, endpointConf) -> "SPARQL":
        """
        create a SPARQL endpoint from the given EndpointConfiguration

        Args:
            endpointConf(Endpoint): the endpoint configuration to be used
        """
        sparql = SPARQL(url=endpointConf.endpoint, method=endpointConf.method)
        if hasattr(endpointConf, "auth"):
            authMethod = None
            if endpointConf.auth == "BASIC":
                authMethod = BASIC
            elif endpointConf.auth == "DIGEST":
                authMethod = DIGEST
            sparql.addAuthentication(
                endpointConf.user, endpointConf.passwd, method=authMethod
            )
        return sparql

    def addAuthentication(
        self, username: str, password: str, method: Union[BASIC, DIGEST] = BASIC
    ):
        """
        Add Http Authentication credentials to the sparql wrapper
        Args:
            username: name of the user
            password: password of the user
            method: HTTP Authentication method
        """
        self.sparql.setHTTPAuth(method)
        self.sparql.setCredentials(username, password)

    def rawQuery(self, queryString, method=POST):
        """
        query with the given query string

        Args:
            queryString(string): the SPARQL query to be performed
            method(string): POST or GET - POST is mandatory for update queries
        Returns:
            list: the raw query result as bindings
        """
        queryString = self.fix_comments(queryString)
        self.sparql.setQuery(queryString)
        self.sparql.method = method
        queryResult = self.sparql.query()
        return queryResult

    def fix_comments(self, query_string: str) -> str:
        """
        make sure broken SPARQLWrapper will find comments
        """
        if query_string is None:
            return None
        return "#\n" + query_string

    def getValue(self, sparqlQuery: str, attr: str):
        """
        get the value for the given SPARQL query using the given attr

        Args:
            sparql(SPARQL): the SPARQL endpoint to ge the value for
            sparqlQuery(str): the SPARQL query to run
            attr(str): the attribute to get
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        return self.getFirst(qLod, attr)

    def getValues(self, sparqlQuery: str, attrList: list):
        """
        get Values for the given sparlQuery and attribute list

        Args:
            sparqlQuery(str): the query which did not return any values
            attrList(list): the list of attributes
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        if not (len(qLod) == 1):
            msg = f"getValues for {attrList} failed for {qLod}"
            raise Exception(msg)
        record = qLod[0]
        values = ()
        for attr in attrList:
            if not attr in record:
                msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
                raise Exception(msg)
            recordTuple = (record[attr],)
            values += recordTuple
        return values

    def getFirst(self, qLod: list, attr: str):
        """
        get the column attr of the first row of the given qLod list

        Args:
            qLod(list): the list of dicts (returned by a query)
            attr(str): the attribute to retrieve

        Returns:
            object: the value
        """
        if len(qLod) == 1 and attr in qLod[0]:
            value = qLod[0][attr]
            return value
        raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

    def getResults(self, jsonResult):
        """
        get the result from the given jsonResult

        Args:
            jsonResult: the JSON encoded result

        Returns:
            list: the list of bindings
        """
        return jsonResult.bindings

    def insert(self, insertCommand):
        """
        run an insert

        Args:
            insertCommand(string): the SPARQL INSERT command

        Returns:
            a response
        """
        self.sparql.setRequestMethod(POSTDIRECTLY)
        response = None
        exception = None
        try:
            response = self.rawQuery(insertCommand, method=POST)
            # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
            # dummy read the body
            response.response.read()
        except Exception as ex:
            exception = ex
            if self.debug:
                print(ex)
        return response, exception

    def getLocalName(self, name):
        """
        retrieve valid localname from a string based primary key
        https://www.w3.org/TR/sparql11-query/#prefNames

        Args:
            name(string): the name to convert

        Returns:
            string: a valid local name
        """
        localName = "".join(ch for ch in name if ch.isalnum())
        return localName

    def insertListOfDicts(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        limit=None,
        batchSize=None,
        profile=False,
    ):
        """
        insert the given list of dicts mapping datatypes

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            limit(int): maximum number of records to insert
            batchSize(int): number of records to send per request

        Return:
            a list of errors which should be empty on full success

        datatype maping according to
        https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

        mapped from
        https://docs.python.org/3/library/stdtypes.html

        compare to
        https://www.w3.org/2001/sw/rdb2rdf/directGraph/
        http://www.bobdc.com/blog/json2rdf/
        https://www.w3.org/TR/json-ld11-api/#data-round-tripping
        https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
        """
        if limit is not None:
            listOfDicts = listOfDicts[:limit]
        else:
            limit = len(listOfDicts)
        total = len(listOfDicts)
        if batchSize is None:
            return self.insertListOfDictsBatch(
                listOfDicts, entityType, primaryKey, prefixes, total=total
            )
        else:
            startTime = time.time()
            errors = []
            # store the list in batches
            for i in range(0, total, batchSize):
                recordBatch = listOfDicts[i : i + batchSize]
                batchErrors = self.insertListOfDictsBatch(
                    recordBatch,
                    entityType,
                    primaryKey,
                    prefixes,
                    batchIndex=i,
                    total=total,
                    startTime=startTime,
                )
                errors.extend(batchErrors)
            if self.profile:
                print(
                    "insertListOfDicts for %9d records in %6.1f secs"
                    % (len(listOfDicts), time.time() - startTime),
                    flush=True,
                )
            return errors

    def insertListOfDictsBatch(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        title="batch",
        batchIndex=None,
        total=None,
        startTime=None,
    ):
        """
        insert a Batch part of listOfDicts

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            title(string): the title to display for the profiling (if any)
            batchIndex(int): the start index of the current batch
            total(int): the total number of records for all batches
            starttime(datetime): the start of the batch processing

        Return:
            a list of errors which should be empty on full success
        """
        errors = []
        size = len(listOfDicts)
        if batchIndex is None:
            batchIndex = 0
        batchStartTime = time.time()
        if startTime is None:
            startTime = batchStartTime
        rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
        insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
        for index, record in enumerate(listOfDicts):
            if not primaryKey in record:
                errors.append(f"missing primary key {primaryKey} in record {index}")
            else:
                primaryValue = record[primaryKey]
                if primaryValue is None:
                    errors.append(
                        f"primary key {primaryKey} value is None in record {index}"
                    )
                else:
                    encodedPrimaryValue = self.getLocalName(primaryValue)
                    tSubject = f"{entityType}__{encodedPrimaryValue}"
                    insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                    for keyValue in record.items():
                        key, value = keyValue
                        # convert key if necessary
                        key = self.getLocalName(key)
                        valueType = type(value)
                        if self.debug:
                            print("%s(%s)=%s" % (key, valueType, value))
                        tPredicate = f"{entityType}_{key}"
                        tObject = value
                        if valueType == str:
                            escapedString = self.controlEscape(value)
                            tObject = '"%s"' % escapedString
                        elif valueType == int:
                            if self.typedLiterals:
                                tObject = (
                                    '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                    % value
                                )
                            pass
                        elif valueType == float:
                            if self.typedLiterals:
                                tObject = (
                                    '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                    % value
                                )
                            pass
                        elif valueType == bool:
                            pass
                        elif valueType == datetime.date:
                            # if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                            )
                            pass
                        elif valueType == datetime.datetime:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                                % value
                            )
                            pass
                        else:
                            errors.append(
                                "can't handle type %s in record %d" % (valueType, index)
                            )
                            tObject = None
                        if tObject is not None:
                            insertRecord = "  %s %s %s.\n" % (
                                tSubject,
                                tPredicate,
                                tObject,
                            )
                            insertCommand += insertRecord
        insertCommand += "\n}"
        if self.debug:
            print(insertCommand, flush=True)
        response, ex = self.insert(insertCommand)
        if response is None and ex is not None:
            errors.append("%s for record %d" % (str(ex), index))
        if self.profile:
            print(
                "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
                % (
                    title,
                    batchIndex + 1,
                    batchIndex + size,
                    total,
                    entityType,
                    time.time() - batchStartTime,
                    time.time() - startTime,
                ),
                flush=True,
            )
        return errors

    controlChars = [chr(c) for c in range(0x20)]

    @staticmethod
    def controlEscape(s):
        """
        escape control characters

        see https://stackoverflow.com/a/9778992/1497139
        """
        escaped = "".join(
            [
                c.encode("unicode_escape").decode("ascii")
                if c in SPARQL.controlChars
                else c
                for c in s
            ]
        )
        escaped = escaped.replace('"', '\\"')
        return escaped

    def query(self, queryString, method=POST):
        """
        get a list of results for the given query

        Args:
            queryString(string): the SPARQL query to execute
            method(string): the method eg. POST to use

        Returns:
            list: list of bindings
        """
        queryResult = self.rawQuery(queryString, method=method)
        if self.debug:
            print(queryString)
        if hasattr(queryResult, "info"):
            if "content-type" in queryResult.info():
                ct = queryResult.info()["content-type"]
                if "text/html" in ct:
                    response = queryResult.response.read().decode()
                    if not "Success" in response:
                        raise ("%s failed: %s", response)
                return None
        jsonResult = queryResult.convert()
        return self.getResults(jsonResult)

    def queryAsListOfDicts(
        self, queryString, fixNone: bool = False, sampleCount: int = None
    ):
        """
        get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

        Args:
            queryString(string): the SPARQL query to execute
            fixNone(bool): if True add None values for empty columns in Dict
            sampleCount(int): the number of samples to check

        Returns:
            list: a list ofDicts
        """
        records = self.query(queryString, method=self.method)
        listOfDicts = self.asListOfDicts(
            records, fixNone=fixNone, sampleCount=sampleCount
        )
        return listOfDicts

    @staticmethod
    def strToDatetime(value, debug=False):
        """
        convert a string to a datetime
        Args:
            value(str): the value to convert
        Returns:
            datetime: the datetime
        """
        dateFormat = "%Y-%m-%d %H:%M:%S.%f"
        if "T" in value and "Z" in value:
            dateFormat = "%Y-%m-%dT%H:%M:%SZ"
        dt = None
        try:
            dt = datetime.datetime.strptime(value, dateFormat)
        except ValueError as ve:
            if debug:
                print(str(ve))
        return dt

    def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
        """
        convert SPARQL result back to python native

        Args:
            record(list): the list of bindings
            fixNone(bool): if True add None values for empty columns in Dict
            sampleCount(int): the number of samples to check

        Returns:
            list: a list of Dicts
        """
        resultList = []
        fields = None
        if fixNone:
            fields = LOD.getFields(records, sampleCount)
        for record in records:
            resultDict = {}
            for keyValue in record.items():
                key, value = keyValue
                datatype = value.datatype
                if datatype is not None:
                    if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                        resultValue = int(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                        resultValue = float(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                        resultValue = value.value in ["TRUE", "true"]
                    elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                        dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                        resultValue = dt.date()
                    elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                        dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                        resultValue = dt
                    else:
                        # unsupported datatype
                        resultValue = value.value
                else:
                    resultValue = value.value
                resultDict[key] = resultValue
            if fixNone:
                for field in fields:
                    if not field in resultDict:
                        resultDict[field] = None
            resultList.append(resultDict)
        return resultList

    def printErrors(self, errors):
        """
        print the given list of errors

        Args:
            errors(list): a list of error strings

        Returns:
            boolean: True if the list is empty else false
        """
        if len(errors) > 0:
            print("ERRORS:")
            for error in errors:
                print(error, flush=True, file=stderr)
            return True
        else:
            return False

__init__(url, mode='query', debug=False, isFuseki=False, typedLiterals=False, profile=False, agent='PyLodStorage', method='POST')

Constructor a SPARQL wrapper

Parameters:

Name Type Description Default
url(string)

the base URL of the endpoint - the mode query/update is going to be appended

required
mode(string)

'query' or 'update'

required
debug(bool)

True if debugging is to be activated

required
typedLiterals(bool)

True if INSERT should be done with typedLiterals

required
profile(boolean)

True if profiling / timing information should be displayed

required
agent(string)

the User agent to use

required
method(string)

the HTTP method to be used 'POST' or 'GET'

required
Source code in lodstorage/sparql.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def __init__(
    self,
    url,
    mode="query",
    debug=False,
    isFuseki=False,
    typedLiterals=False,
    profile=False,
    agent="PyLodStorage",
    method="POST",
):
    """
    Constructor a SPARQL wrapper

    Args:
        url(string): the base URL of the endpoint - the mode query/update is going to be appended
        mode(string): 'query' or 'update'
        debug(bool): True if debugging is to be activated
        typedLiterals(bool): True if INSERT should be done with typedLiterals
        profile(boolean): True if profiling / timing information should be displayed
        agent(string): the User agent to use
        method(string): the HTTP method to be used 'POST' or 'GET'
    """
    if isFuseki:
        self.url = f"{url}/{mode}"
    else:
        self.url = url
    self.mode = mode
    self.debug = debug
    self.typedLiterals = typedLiterals
    self.profile = profile
    self.sparql = SPARQLWrapper2(url)
    self.method = method
    self.sparql.agent = agent

addAuthentication(username, password, method=BASIC)

Add Http Authentication credentials to the sparql wrapper Args: username: name of the user password: password of the user method: HTTP Authentication method

Source code in lodstorage/sparql.py
85
86
87
88
89
90
91
92
93
94
95
96
def addAuthentication(
    self, username: str, password: str, method: Union[BASIC, DIGEST] = BASIC
):
    """
    Add Http Authentication credentials to the sparql wrapper
    Args:
        username: name of the user
        password: password of the user
        method: HTTP Authentication method
    """
    self.sparql.setHTTPAuth(method)
    self.sparql.setCredentials(username, password)

asListOfDicts(records, fixNone=False, sampleCount=None)

convert SPARQL result back to python native

Parameters:

Name Type Description Default
record(list)

the list of bindings

required
fixNone(bool)

if True add None values for empty columns in Dict

required
sampleCount(int)

the number of samples to check

required

Returns:

Name Type Description
list

a list of Dicts

Source code in lodstorage/sparql.py
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
    """
    convert SPARQL result back to python native

    Args:
        record(list): the list of bindings
        fixNone(bool): if True add None values for empty columns in Dict
        sampleCount(int): the number of samples to check

    Returns:
        list: a list of Dicts
    """
    resultList = []
    fields = None
    if fixNone:
        fields = LOD.getFields(records, sampleCount)
    for record in records:
        resultDict = {}
        for keyValue in record.items():
            key, value = keyValue
            datatype = value.datatype
            if datatype is not None:
                if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                    resultValue = int(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                    resultValue = float(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                    resultValue = value.value in ["TRUE", "true"]
                elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                    dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                    resultValue = dt.date()
                elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                    dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                    resultValue = dt
                else:
                    # unsupported datatype
                    resultValue = value.value
            else:
                resultValue = value.value
            resultDict[key] = resultValue
        if fixNone:
            for field in fields:
                if not field in resultDict:
                    resultDict[field] = None
        resultList.append(resultDict)
    return resultList

controlEscape(s) staticmethod

escape control characters

see https://stackoverflow.com/a/9778992/1497139

Source code in lodstorage/sparql.py
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
@staticmethod
def controlEscape(s):
    """
    escape control characters

    see https://stackoverflow.com/a/9778992/1497139
    """
    escaped = "".join(
        [
            c.encode("unicode_escape").decode("ascii")
            if c in SPARQL.controlChars
            else c
            for c in s
        ]
    )
    escaped = escaped.replace('"', '\\"')
    return escaped

fix_comments(query_string)

make sure broken SPARQLWrapper will find comments

Source code in lodstorage/sparql.py
114
115
116
117
118
119
120
def fix_comments(self, query_string: str) -> str:
    """
    make sure broken SPARQLWrapper will find comments
    """
    if query_string is None:
        return None
    return "#\n" + query_string

fromEndpointConf(endpointConf) classmethod

create a SPARQL endpoint from the given EndpointConfiguration

Parameters:

Name Type Description Default
endpointConf(Endpoint)

the endpoint configuration to be used

required
Source code in lodstorage/sparql.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
@classmethod
def fromEndpointConf(cls, endpointConf) -> "SPARQL":
    """
    create a SPARQL endpoint from the given EndpointConfiguration

    Args:
        endpointConf(Endpoint): the endpoint configuration to be used
    """
    sparql = SPARQL(url=endpointConf.endpoint, method=endpointConf.method)
    if hasattr(endpointConf, "auth"):
        authMethod = None
        if endpointConf.auth == "BASIC":
            authMethod = BASIC
        elif endpointConf.auth == "DIGEST":
            authMethod = DIGEST
        sparql.addAuthentication(
            endpointConf.user, endpointConf.passwd, method=authMethod
        )
    return sparql

getFirst(qLod, attr)

get the column attr of the first row of the given qLod list

Parameters:

Name Type Description Default
qLod(list)

the list of dicts (returned by a query)

required
attr(str)

the attribute to retrieve

required

Returns:

Name Type Description
object

the value

Source code in lodstorage/sparql.py
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def getFirst(self, qLod: list, attr: str):
    """
    get the column attr of the first row of the given qLod list

    Args:
        qLod(list): the list of dicts (returned by a query)
        attr(str): the attribute to retrieve

    Returns:
        object: the value
    """
    if len(qLod) == 1 and attr in qLod[0]:
        value = qLod[0][attr]
        return value
    raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

getLocalName(name)

retrieve valid localname from a string based primary key https://www.w3.org/TR/sparql11-query/#prefNames

Parameters:

Name Type Description Default
name(string)

the name to convert

required

Returns:

Name Type Description
string

a valid local name

Source code in lodstorage/sparql.py
212
213
214
215
216
217
218
219
220
221
222
223
224
def getLocalName(self, name):
    """
    retrieve valid localname from a string based primary key
    https://www.w3.org/TR/sparql11-query/#prefNames

    Args:
        name(string): the name to convert

    Returns:
        string: a valid local name
    """
    localName = "".join(ch for ch in name if ch.isalnum())
    return localName

getResults(jsonResult)

get the result from the given jsonResult

Parameters:

Name Type Description Default
jsonResult

the JSON encoded result

required

Returns:

Name Type Description
list

the list of bindings

Source code in lodstorage/sparql.py
176
177
178
179
180
181
182
183
184
185
186
def getResults(self, jsonResult):
    """
    get the result from the given jsonResult

    Args:
        jsonResult: the JSON encoded result

    Returns:
        list: the list of bindings
    """
    return jsonResult.bindings

getValue(sparqlQuery, attr)

get the value for the given SPARQL query using the given attr

Parameters:

Name Type Description Default
sparql(SPARQL)

the SPARQL endpoint to ge the value for

required
sparqlQuery(str)

the SPARQL query to run

required
attr(str)

the attribute to get

required
Source code in lodstorage/sparql.py
122
123
124
125
126
127
128
129
130
131
132
133
134
def getValue(self, sparqlQuery: str, attr: str):
    """
    get the value for the given SPARQL query using the given attr

    Args:
        sparql(SPARQL): the SPARQL endpoint to ge the value for
        sparqlQuery(str): the SPARQL query to run
        attr(str): the attribute to get
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    return self.getFirst(qLod, attr)

getValues(sparqlQuery, attrList)

get Values for the given sparlQuery and attribute list

Parameters:

Name Type Description Default
sparqlQuery(str)

the query which did not return any values

required
attrList(list)

the list of attributes

required
Source code in lodstorage/sparql.py
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
def getValues(self, sparqlQuery: str, attrList: list):
    """
    get Values for the given sparlQuery and attribute list

    Args:
        sparqlQuery(str): the query which did not return any values
        attrList(list): the list of attributes
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    if not (len(qLod) == 1):
        msg = f"getValues for {attrList} failed for {qLod}"
        raise Exception(msg)
    record = qLod[0]
    values = ()
    for attr in attrList:
        if not attr in record:
            msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
            raise Exception(msg)
        recordTuple = (record[attr],)
        values += recordTuple
    return values

insert(insertCommand)

run an insert

Parameters:

Name Type Description Default
insertCommand(string)

the SPARQL INSERT command

required

Returns:

Type Description

a response

Source code in lodstorage/sparql.py
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
def insert(self, insertCommand):
    """
    run an insert

    Args:
        insertCommand(string): the SPARQL INSERT command

    Returns:
        a response
    """
    self.sparql.setRequestMethod(POSTDIRECTLY)
    response = None
    exception = None
    try:
        response = self.rawQuery(insertCommand, method=POST)
        # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
        # dummy read the body
        response.response.read()
    except Exception as ex:
        exception = ex
        if self.debug:
            print(ex)
    return response, exception

insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes, limit=None, batchSize=None, profile=False)

insert the given list of dicts mapping datatypes

Parameters:

Name Type Description Default
entityType(string)

the entityType to use as a

required
primaryKey(string)

the name of the primary key attribute to use

required
prefix(string)

any PREFIX statements to be used

required
limit(int)

maximum number of records to insert

required
batchSize(int)

number of records to send per request

required
Return

a list of errors which should be empty on full success

datatype maping according to https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

mapped from https://docs.python.org/3/library/stdtypes.html

compare to https://www.w3.org/2001/sw/rdb2rdf/directGraph/ http://www.bobdc.com/blog/json2rdf/ https://www.w3.org/TR/json-ld11-api/#data-round-tripping https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python

Source code in lodstorage/sparql.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
def insertListOfDicts(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    limit=None,
    batchSize=None,
    profile=False,
):
    """
    insert the given list of dicts mapping datatypes

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        limit(int): maximum number of records to insert
        batchSize(int): number of records to send per request

    Return:
        a list of errors which should be empty on full success

    datatype maping according to
    https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

    mapped from
    https://docs.python.org/3/library/stdtypes.html

    compare to
    https://www.w3.org/2001/sw/rdb2rdf/directGraph/
    http://www.bobdc.com/blog/json2rdf/
    https://www.w3.org/TR/json-ld11-api/#data-round-tripping
    https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
    """
    if limit is not None:
        listOfDicts = listOfDicts[:limit]
    else:
        limit = len(listOfDicts)
    total = len(listOfDicts)
    if batchSize is None:
        return self.insertListOfDictsBatch(
            listOfDicts, entityType, primaryKey, prefixes, total=total
        )
    else:
        startTime = time.time()
        errors = []
        # store the list in batches
        for i in range(0, total, batchSize):
            recordBatch = listOfDicts[i : i + batchSize]
            batchErrors = self.insertListOfDictsBatch(
                recordBatch,
                entityType,
                primaryKey,
                prefixes,
                batchIndex=i,
                total=total,
                startTime=startTime,
            )
            errors.extend(batchErrors)
        if self.profile:
            print(
                "insertListOfDicts for %9d records in %6.1f secs"
                % (len(listOfDicts), time.time() - startTime),
                flush=True,
            )
        return errors

insertListOfDictsBatch(listOfDicts, entityType, primaryKey, prefixes, title='batch', batchIndex=None, total=None, startTime=None)

insert a Batch part of listOfDicts

Parameters:

Name Type Description Default
entityType(string)

the entityType to use as a

required
primaryKey(string)

the name of the primary key attribute to use

required
prefix(string)

any PREFIX statements to be used

required
title(string)

the title to display for the profiling (if any)

required
batchIndex(int)

the start index of the current batch

required
total(int)

the total number of records for all batches

required
starttime(datetime)

the start of the batch processing

required
Return

a list of errors which should be empty on full success

Source code in lodstorage/sparql.py
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def insertListOfDictsBatch(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    title="batch",
    batchIndex=None,
    total=None,
    startTime=None,
):
    """
    insert a Batch part of listOfDicts

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        title(string): the title to display for the profiling (if any)
        batchIndex(int): the start index of the current batch
        total(int): the total number of records for all batches
        starttime(datetime): the start of the batch processing

    Return:
        a list of errors which should be empty on full success
    """
    errors = []
    size = len(listOfDicts)
    if batchIndex is None:
        batchIndex = 0
    batchStartTime = time.time()
    if startTime is None:
        startTime = batchStartTime
    rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
    insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
    for index, record in enumerate(listOfDicts):
        if not primaryKey in record:
            errors.append(f"missing primary key {primaryKey} in record {index}")
        else:
            primaryValue = record[primaryKey]
            if primaryValue is None:
                errors.append(
                    f"primary key {primaryKey} value is None in record {index}"
                )
            else:
                encodedPrimaryValue = self.getLocalName(primaryValue)
                tSubject = f"{entityType}__{encodedPrimaryValue}"
                insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                for keyValue in record.items():
                    key, value = keyValue
                    # convert key if necessary
                    key = self.getLocalName(key)
                    valueType = type(value)
                    if self.debug:
                        print("%s(%s)=%s" % (key, valueType, value))
                    tPredicate = f"{entityType}_{key}"
                    tObject = value
                    if valueType == str:
                        escapedString = self.controlEscape(value)
                        tObject = '"%s"' % escapedString
                    elif valueType == int:
                        if self.typedLiterals:
                            tObject = (
                                '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                % value
                            )
                        pass
                    elif valueType == float:
                        if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                % value
                            )
                        pass
                    elif valueType == bool:
                        pass
                    elif valueType == datetime.date:
                        # if self.typedLiterals:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                        )
                        pass
                    elif valueType == datetime.datetime:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                            % value
                        )
                        pass
                    else:
                        errors.append(
                            "can't handle type %s in record %d" % (valueType, index)
                        )
                        tObject = None
                    if tObject is not None:
                        insertRecord = "  %s %s %s.\n" % (
                            tSubject,
                            tPredicate,
                            tObject,
                        )
                        insertCommand += insertRecord
    insertCommand += "\n}"
    if self.debug:
        print(insertCommand, flush=True)
    response, ex = self.insert(insertCommand)
    if response is None and ex is not None:
        errors.append("%s for record %d" % (str(ex), index))
    if self.profile:
        print(
            "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
            % (
                title,
                batchIndex + 1,
                batchIndex + size,
                total,
                entityType,
                time.time() - batchStartTime,
                time.time() - startTime,
            ),
            flush=True,
        )
    return errors

printErrors(errors)

print the given list of errors

Parameters:

Name Type Description Default
errors(list)

a list of error strings

required

Returns:

Name Type Description
boolean

True if the list is empty else false

Source code in lodstorage/sparql.py
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
def printErrors(self, errors):
    """
    print the given list of errors

    Args:
        errors(list): a list of error strings

    Returns:
        boolean: True if the list is empty else false
    """
    if len(errors) > 0:
        print("ERRORS:")
        for error in errors:
            print(error, flush=True, file=stderr)
        return True
    else:
        return False

query(queryString, method=POST)

get a list of results for the given query

Parameters:

Name Type Description Default
queryString(string)

the SPARQL query to execute

required
method(string)

the method eg. POST to use

required

Returns:

Name Type Description
list

list of bindings

Source code in lodstorage/sparql.py
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
def query(self, queryString, method=POST):
    """
    get a list of results for the given query

    Args:
        queryString(string): the SPARQL query to execute
        method(string): the method eg. POST to use

    Returns:
        list: list of bindings
    """
    queryResult = self.rawQuery(queryString, method=method)
    if self.debug:
        print(queryString)
    if hasattr(queryResult, "info"):
        if "content-type" in queryResult.info():
            ct = queryResult.info()["content-type"]
            if "text/html" in ct:
                response = queryResult.response.read().decode()
                if not "Success" in response:
                    raise ("%s failed: %s", response)
            return None
    jsonResult = queryResult.convert()
    return self.getResults(jsonResult)

queryAsListOfDicts(queryString, fixNone=False, sampleCount=None)

get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

Parameters:

Name Type Description Default
queryString(string)

the SPARQL query to execute

required
fixNone(bool)

if True add None values for empty columns in Dict

required
sampleCount(int)

the number of samples to check

required

Returns:

Name Type Description
list

a list ofDicts

Source code in lodstorage/sparql.py
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
def queryAsListOfDicts(
    self, queryString, fixNone: bool = False, sampleCount: int = None
):
    """
    get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

    Args:
        queryString(string): the SPARQL query to execute
        fixNone(bool): if True add None values for empty columns in Dict
        sampleCount(int): the number of samples to check

    Returns:
        list: a list ofDicts
    """
    records = self.query(queryString, method=self.method)
    listOfDicts = self.asListOfDicts(
        records, fixNone=fixNone, sampleCount=sampleCount
    )
    return listOfDicts

rawQuery(queryString, method=POST)

query with the given query string

Parameters:

Name Type Description Default
queryString(string)

the SPARQL query to be performed

required
method(string)

POST or GET - POST is mandatory for update queries

required

Returns: list: the raw query result as bindings

Source code in lodstorage/sparql.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def rawQuery(self, queryString, method=POST):
    """
    query with the given query string

    Args:
        queryString(string): the SPARQL query to be performed
        method(string): POST or GET - POST is mandatory for update queries
    Returns:
        list: the raw query result as bindings
    """
    queryString = self.fix_comments(queryString)
    self.sparql.setQuery(queryString)
    self.sparql.method = method
    queryResult = self.sparql.query()
    return queryResult

strToDatetime(value, debug=False) staticmethod

convert a string to a datetime Args: value(str): the value to convert Returns: datetime: the datetime

Source code in lodstorage/sparql.py
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
@staticmethod
def strToDatetime(value, debug=False):
    """
    convert a string to a datetime
    Args:
        value(str): the value to convert
    Returns:
        datetime: the datetime
    """
    dateFormat = "%Y-%m-%d %H:%M:%S.%f"
    if "T" in value and "Z" in value:
        dateFormat = "%Y-%m-%dT%H:%M:%SZ"
    dt = None
    try:
        dt = datetime.datetime.strptime(value, dateFormat)
    except ValueError as ve:
        if debug:
            print(str(ve))
    return dt

sql

Created on 2020-08-24

@author: wf

DatetimeAdapter

Singleton class for converting date and time formats with optional lenient error handling.

Attributes:

Name Type Description
lenient bool

Whether to handle conversion errors leniently, returning None and logging a warning.

Source code in lodstorage/sql.py
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
class DatetimeAdapter:
    """Singleton class for converting date and time formats with optional lenient error handling.

    Attributes:
        lenient (bool): Whether to handle conversion errors leniently, returning None and logging a warning.
    """

    _instance = None

    def __new__(cls, lenient: bool = False):
        """Ensure only one instance of the adapter exists.

        Args:
            lenient (bool): If True, the adapter will not raise exceptions on conversion failures.

        Returns:
            DatetimeAdapter: The singleton instance of the adapter.
        """
        if cls._instance is None:
            cls._instance = super(DatetimeAdapter, cls).__new__(cls)
            cls._instance.lenient = lenient
        return cls._instance

    def _handle_input(self, val: bytes) -> str:
        """Validate and decode the input bytes into string.

        Args:
            val (bytes): The bytes input to validate and decode.

        Returns:
            str: The decoded string from bytes.

        Raises:
            TypeError: If the input is not bytes.
        """
        if not isinstance(val, bytes):
            raise TypeError("Input must be a byte string.")
        return val.decode()

    def _handle_error(self, error: Exception, val: bytes):
        """Handle errors based on the lenient mode.

        Args:
            error (Exception): The exception that was raised.
            val (bytes): The input value that caused the error.

        Returns:
            None: If lenient mode is True and an error occurs.

        Raises:
            Exception: If lenient mode is False and an error occurs.
        """
        if self.lenient:
            logging.warning(f"Failed to convert {val}: {error}")
            return None
        else:
            raise error

    def convert_date(self, val: bytes) -> datetime.date:
        """Convert ISO 8601 date byte string to a datetime.date object.

        Args:
            val (bytes): The ISO 8601 date string in bytes.

        Returns:
            datetime.date: The converted date object.
        """
        try:
            decoded_date = self._handle_input(val)
            return datetime.date.fromisoformat(decoded_date)
        except Exception as e:
            return self._handle_error(e, val)

    def convert_datetime(self, val: bytes) -> datetime.datetime:
        """Convert ISO 8601 datetime byte string to a datetime.datetime object.

        Args:
            val (bytes): The ISO 8601 datetime string in bytes.

        Returns:
            datetime.datetime: The converted datetime object.
        """
        try:
            decoded_datetime = self._handle_input(val)
            return datetime.datetime.fromisoformat(decoded_datetime)
        except Exception as e:
            return self._handle_error(e, val)

    def convert_timestamp(self, val: bytes) -> datetime.datetime:
        """Convert Unix epoch timestamp byte string to a datetime.datetime object.

        Args:
            val (bytes): The Unix epoch timestamp in bytes.

        Returns:
            datetime.datetime: The converted datetime object.
        """
        try:
            decoded_string = self._handle_input(val)
            timestamp_float = float(decoded_string) / 10**6
            return datetime.datetime.fromtimestamp(timestamp_float)
        except ValueError as _ve:
            try:
                # If not, try to parse it as a datetime string
                dt = datetime.datetime.fromisoformat(decoded_string)
                return dt
            except Exception as e:
                return self._handle_error(e, val)
        except Exception as e:
            return self._handle_error(e, val)

    def set_lenient(self, lenient: bool):
        """Set the lenient mode of the adapter.

        Args:
            lenient (bool): True to enable lenient mode, False to disable it.
        """
        self.lenient = lenient

__new__(lenient=False)

Ensure only one instance of the adapter exists.

Parameters:

Name Type Description Default
lenient bool

If True, the adapter will not raise exceptions on conversion failures.

False

Returns:

Name Type Description
DatetimeAdapter

The singleton instance of the adapter.

Source code in lodstorage/sql.py
693
694
695
696
697
698
699
700
701
702
703
704
705
def __new__(cls, lenient: bool = False):
    """Ensure only one instance of the adapter exists.

    Args:
        lenient (bool): If True, the adapter will not raise exceptions on conversion failures.

    Returns:
        DatetimeAdapter: The singleton instance of the adapter.
    """
    if cls._instance is None:
        cls._instance = super(DatetimeAdapter, cls).__new__(cls)
        cls._instance.lenient = lenient
    return cls._instance

convert_date(val)

Convert ISO 8601 date byte string to a datetime.date object.

Parameters:

Name Type Description Default
val bytes

The ISO 8601 date string in bytes.

required

Returns:

Type Description
date

datetime.date: The converted date object.

Source code in lodstorage/sql.py
742
743
744
745
746
747
748
749
750
751
752
753
754
755
def convert_date(self, val: bytes) -> datetime.date:
    """Convert ISO 8601 date byte string to a datetime.date object.

    Args:
        val (bytes): The ISO 8601 date string in bytes.

    Returns:
        datetime.date: The converted date object.
    """
    try:
        decoded_date = self._handle_input(val)
        return datetime.date.fromisoformat(decoded_date)
    except Exception as e:
        return self._handle_error(e, val)

convert_datetime(val)

Convert ISO 8601 datetime byte string to a datetime.datetime object.

Parameters:

Name Type Description Default
val bytes

The ISO 8601 datetime string in bytes.

required

Returns:

Type Description
datetime

datetime.datetime: The converted datetime object.

Source code in lodstorage/sql.py
757
758
759
760
761
762
763
764
765
766
767
768
769
770
def convert_datetime(self, val: bytes) -> datetime.datetime:
    """Convert ISO 8601 datetime byte string to a datetime.datetime object.

    Args:
        val (bytes): The ISO 8601 datetime string in bytes.

    Returns:
        datetime.datetime: The converted datetime object.
    """
    try:
        decoded_datetime = self._handle_input(val)
        return datetime.datetime.fromisoformat(decoded_datetime)
    except Exception as e:
        return self._handle_error(e, val)

convert_timestamp(val)

Convert Unix epoch timestamp byte string to a datetime.datetime object.

Parameters:

Name Type Description Default
val bytes

The Unix epoch timestamp in bytes.

required

Returns:

Type Description
datetime

datetime.datetime: The converted datetime object.

Source code in lodstorage/sql.py
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
def convert_timestamp(self, val: bytes) -> datetime.datetime:
    """Convert Unix epoch timestamp byte string to a datetime.datetime object.

    Args:
        val (bytes): The Unix epoch timestamp in bytes.

    Returns:
        datetime.datetime: The converted datetime object.
    """
    try:
        decoded_string = self._handle_input(val)
        timestamp_float = float(decoded_string) / 10**6
        return datetime.datetime.fromtimestamp(timestamp_float)
    except ValueError as _ve:
        try:
            # If not, try to parse it as a datetime string
            dt = datetime.datetime.fromisoformat(decoded_string)
            return dt
        except Exception as e:
            return self._handle_error(e, val)
    except Exception as e:
        return self._handle_error(e, val)

set_lenient(lenient)

Set the lenient mode of the adapter.

Parameters:

Name Type Description Default
lenient bool

True to enable lenient mode, False to disable it.

required
Source code in lodstorage/sql.py
795
796
797
798
799
800
801
def set_lenient(self, lenient: bool):
    """Set the lenient mode of the adapter.

    Args:
        lenient (bool): True to enable lenient mode, False to disable it.
    """
    self.lenient = lenient

EntityInfo

Bases: object

holds entity meta Info

:ivar name(string): entity name = table name

:ivar primaryKey(string): the name of the primary key column

:ivar typeMap(dict): maps column names to python types

:ivar debug(boolean): True if debug information should be shown

Source code in lodstorage/sql.py
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
class EntityInfo(object):
    """
    holds entity meta Info

    :ivar name(string): entity name = table name

    :ivar primaryKey(string): the name of the primary key column

    :ivar typeMap(dict): maps column names to python types

    :ivar debug(boolean): True if debug information should be shown

    """

    def __init__(self, sampleRecords, name, primaryKey=None, debug=False):
        """
        construct me from the given name and primary key

        Args:
           name(string): the name of the entity
           primaryKey(string): the name of the primary key column
           debug(boolean): True if debug information should be shown
        """
        self.sampleRecords = sampleRecords
        self.name = name
        self.primaryKey = primaryKey
        self.debug = debug
        self.typeMap = {}
        self.sqlTypeMap = {}
        self.createTableCmd = self.getCreateTableCmd(sampleRecords)
        self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
        self.insertCmd = self.getInsertCmd()

    def getCreateTableCmd(self, sampleRecords):
        """
        get the CREATE TABLE DDL command for the given sample records

        Args:
            sampleRecords(list): a list of Dicts of sample Records

        Returns:
            string: CREATE TABLE DDL command for this entity info

        Example:

        .. code-block:: sql

            CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

        """
        ddlCmd = "CREATE TABLE %s(" % self.name
        delim = ""
        for sampleRecord in sampleRecords:
            for key, value in sampleRecord.items():
                sqlType = None
                valueType = None
                if value is None:
                    if len(sampleRecords) == 1:
                        print(
                            "Warning sampleRecord column %s is None - using TEXT as type"
                            % key
                        )
                        valueType = str
                else:
                    valueType = type(value)
                if valueType == str:
                    sqlType = "TEXT"
                elif valueType == int:
                    sqlType = "INTEGER"
                elif valueType == float:
                    sqlType = "FLOAT"
                elif valueType == bool:
                    sqlType = "BOOLEAN"
                elif valueType == datetime.date:
                    sqlType = "DATE"
                elif valueType == datetime.datetime:
                    sqlType = "TIMESTAMP"
                else:
                    if valueType is not None:
                        msg = "warning: unsupported type %s for column %s " % (
                            str(valueType),
                            key,
                        )
                        print(msg)
                if sqlType is not None and valueType is not None:
                    self.addType(key, valueType, sqlType)
        for key, sqlType in self.sqlTypeMap.items():
            ddlCmd += "%s%s %s%s" % (
                delim,
                key,
                sqlType,
                " PRIMARY KEY" if key == self.primaryKey else "",
            )
            delim = ","
        ddlCmd += ")"
        if self.debug:
            print(ddlCmd)
        return ddlCmd

    def getInsertCmd(self, replace: bool = False) -> str:
        """
        get the INSERT command for this entityInfo

        Args:
             replace(bool): if True allow replace for insert

        Returns:
            str: the INSERT INTO SQL command for his entityInfo e.g.

        Example:

        .. code-block:: sql

            INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

        """
        columns = ",".join(self.typeMap.keys())
        placeholders = ":" + ",:".join(self.typeMap.keys())
        replaceClause = " OR REPLACE" if replace else ""
        insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
        if self.debug:
            print(insertCmd)
        return insertCmd

    def addType(self, column, valueType, sqlType):
        """
        add the python type for the given column to the typeMap

        Args:
           column(string): the name of the column

           valueType(type): the python type of the column
        """
        if not column in self.typeMap:
            self.typeMap[column] = valueType
            self.sqlTypeMap[column] = sqlType

    def fixDates(self, resultList):
        """
        fix date entries in the given resultList by parsing the date content e.g.
        converting '1926-04-21' back to datetime.date(1926, 4, 21)

        Args:
            resultList(list): the list of records to be fixed
        """
        for record in resultList:
            for key, valueType in self.typeMap.items():
                if valueType == datetime.date:
                    dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                    dateValue = dt.date()
                    record[key] = dateValue

__init__(sampleRecords, name, primaryKey=None, debug=False)

construct me from the given name and primary key

Parameters:

Name Type Description Default
name(string)

the name of the entity

required
primaryKey(string)

the name of the primary key column

required
debug(boolean)

True if debug information should be shown

required
Source code in lodstorage/sql.py
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
def __init__(self, sampleRecords, name, primaryKey=None, debug=False):
    """
    construct me from the given name and primary key

    Args:
       name(string): the name of the entity
       primaryKey(string): the name of the primary key column
       debug(boolean): True if debug information should be shown
    """
    self.sampleRecords = sampleRecords
    self.name = name
    self.primaryKey = primaryKey
    self.debug = debug
    self.typeMap = {}
    self.sqlTypeMap = {}
    self.createTableCmd = self.getCreateTableCmd(sampleRecords)
    self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
    self.insertCmd = self.getInsertCmd()

addType(column, valueType, sqlType)

add the python type for the given column to the typeMap

Parameters:

Name Type Description Default
column(string)

the name of the column

required
valueType(type)

the python type of the column

required
Source code in lodstorage/sql.py
627
628
629
630
631
632
633
634
635
636
637
638
def addType(self, column, valueType, sqlType):
    """
    add the python type for the given column to the typeMap

    Args:
       column(string): the name of the column

       valueType(type): the python type of the column
    """
    if not column in self.typeMap:
        self.typeMap[column] = valueType
        self.sqlTypeMap[column] = sqlType

fixDates(resultList)

fix date entries in the given resultList by parsing the date content e.g. converting '1926-04-21' back to datetime.date(1926, 4, 21)

Parameters:

Name Type Description Default
resultList(list)

the list of records to be fixed

required
Source code in lodstorage/sql.py
640
641
642
643
644
645
646
647
648
649
650
651
652
653
def fixDates(self, resultList):
    """
    fix date entries in the given resultList by parsing the date content e.g.
    converting '1926-04-21' back to datetime.date(1926, 4, 21)

    Args:
        resultList(list): the list of records to be fixed
    """
    for record in resultList:
        for key, valueType in self.typeMap.items():
            if valueType == datetime.date:
                dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                dateValue = dt.date()
                record[key] = dateValue

getCreateTableCmd(sampleRecords)

get the CREATE TABLE DDL command for the given sample records

Parameters:

Name Type Description Default
sampleRecords(list)

a list of Dicts of sample Records

required

Returns:

Name Type Description
string

CREATE TABLE DDL command for this entity info

Example:

.. code-block:: sql

CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)
Source code in lodstorage/sql.py
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
def getCreateTableCmd(self, sampleRecords):
    """
    get the CREATE TABLE DDL command for the given sample records

    Args:
        sampleRecords(list): a list of Dicts of sample Records

    Returns:
        string: CREATE TABLE DDL command for this entity info

    Example:

    .. code-block:: sql

        CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

    """
    ddlCmd = "CREATE TABLE %s(" % self.name
    delim = ""
    for sampleRecord in sampleRecords:
        for key, value in sampleRecord.items():
            sqlType = None
            valueType = None
            if value is None:
                if len(sampleRecords) == 1:
                    print(
                        "Warning sampleRecord column %s is None - using TEXT as type"
                        % key
                    )
                    valueType = str
            else:
                valueType = type(value)
            if valueType == str:
                sqlType = "TEXT"
            elif valueType == int:
                sqlType = "INTEGER"
            elif valueType == float:
                sqlType = "FLOAT"
            elif valueType == bool:
                sqlType = "BOOLEAN"
            elif valueType == datetime.date:
                sqlType = "DATE"
            elif valueType == datetime.datetime:
                sqlType = "TIMESTAMP"
            else:
                if valueType is not None:
                    msg = "warning: unsupported type %s for column %s " % (
                        str(valueType),
                        key,
                    )
                    print(msg)
            if sqlType is not None and valueType is not None:
                self.addType(key, valueType, sqlType)
    for key, sqlType in self.sqlTypeMap.items():
        ddlCmd += "%s%s %s%s" % (
            delim,
            key,
            sqlType,
            " PRIMARY KEY" if key == self.primaryKey else "",
        )
        delim = ","
    ddlCmd += ")"
    if self.debug:
        print(ddlCmd)
    return ddlCmd

getInsertCmd(replace=False)

get the INSERT command for this entityInfo

Parameters:

Name Type Description Default
replace(bool)

if True allow replace for insert

required

Returns:

Name Type Description
str str

the INSERT INTO SQL command for his entityInfo e.g.

Example:

.. code-block:: sql

INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).
Source code in lodstorage/sql.py
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
def getInsertCmd(self, replace: bool = False) -> str:
    """
    get the INSERT command for this entityInfo

    Args:
         replace(bool): if True allow replace for insert

    Returns:
        str: the INSERT INTO SQL command for his entityInfo e.g.

    Example:

    .. code-block:: sql

        INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

    """
    columns = ",".join(self.typeMap.keys())
    placeholders = ":" + ",:".join(self.typeMap.keys())
    replaceClause = " OR REPLACE" if replace else ""
    insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
    if self.debug:
        print(insertCmd)
    return insertCmd

SQLDB

Bases: object

Structured Query Language Database wrapper

:ivar dbname(string): name of the database :ivar debug(boolean): True if debug info should be provided :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
class SQLDB(object):
    """
    Structured Query Language Database wrapper

    :ivar dbname(string): name of the database
    :ivar debug(boolean): True if debug info should be provided
    :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """

    RAM = ":memory:"

    def __init__(
        self,
        dbname: str = ":memory:",
        connection=None,
        check_same_thread=True,
        timeout=5,
        debug=False,
        errorDebug=False,
    ):
        """
        Construct me for the given dbname and debug

        Args:

           dbname(string): name of the database - default is a RAM based database
           connection(Connection): an optional connection to be reused
           check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
           timeout(float): number of seconds for connection timeout
           debug(boolean): if True switch on debug
           errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        self.dbname = dbname
        self.debug = debug
        self.errorDebug = errorDebug
        if connection is None:
            self.c = sqlite3.connect(
                dbname,
                detect_types=sqlite3.PARSE_DECLTYPES,
                check_same_thread=check_same_thread,
                timeout=timeout,
            )
        else:
            self.c = connection

    def logError(self, msg):
        """
        log the given error message to stderr

        Args:
            msg(str): the error messsage to display
        """
        print(msg, file=sys.stderr, flush=True)

    def close(self):
        """close my connection"""
        self.c.close()

    def execute(self, ddlCmd):
        """
        execute the given Data Definition Command

        Args:
            ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
        """
        self.c.execute(ddlCmd)

    def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
        """
        Create a table based on the provided EntityInfo.

        Args:
            entityInfo (EntityInfo): The EntityInfo object containing table metadata.
            withDrop (bool): If True, drop the existing table before creation.
            withCreate (bool): If True, execute the CREATE TABLE command.

        Returns:
            EntityInfo: The provided EntityInfo object.
        """
        if withDrop:
            self.c.execute(entityInfo.dropTableCmd)
        if withCreate:
            try:
                self.c.execute(entityInfo.createTableCmd)
            except sqlite3.OperationalError as oe:
                raise Exception(
                    f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
                )
        return entityInfo

    def createTable(
        self,
        listOfRecords,
        entityName: str,
        primaryKey: str = None,
        withCreate: bool = True,
        withDrop: bool = False,
        sampleRecordCount=1,
        failIfTooFew=True,
    ):
        """
        Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
        as defining sample record and execute DDL command.

        Args:
            listOfRecords (list): A list of Dicts.
            entityName (str): The entity / table name to use.
            primaryKey (str): The key/column to use as a primary key.
            withDrop (bool): True if the existing Table should be dropped.
            withCreate (bool): True if the create Table command should be executed.
            sampleRecordCount (int): Number of sample records expected and to be inspected.
            failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

        Returns:
            EntityInfo: Meta data information for the created table.
        """
        l = len(listOfRecords)
        if sampleRecordCount < 0:
            sampleRecordCount = l
        if l < sampleRecordCount:
            msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
            if failIfTooFew:
                raise Exception(msg)
            elif self.debug:
                self.logError(msg)

        sampleRecords = listOfRecords[:sampleRecordCount]
        entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

        return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

    def getDebugInfo(self, record, index, executeMany):
        """
        get the debug info for the given record at the given index depending on the state of executeMany

        Args:
            record(dict): the record to show
            index(int): the index of the record
            executeMany(boolean): if True the record may be valid else not
        """
        debugInfo = ""
        if not executeMany:
            # shall we shoe the details of the record (which might be a security risk)
            if self.errorDebug:
                # show details of record
                debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
            else:
                # show only index
                debugInfo = "\nrecord #%d" % index
        return debugInfo

    def store(
        self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
    ):
        """
        store the given list of records based on the given entityInfo

        Args:

           listOfRecords(list): the list of Dicts to be stored
           entityInfo(EntityInfo): the meta data to be used for storing
           executeMany(bool): if True the insert command is done with many/all records at once
           fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
           replace(bool): if True allow replace for insert
        """
        insertCmd = entityInfo.getInsertCmd(replace=replace)
        record = None
        index = 0
        try:
            if executeMany:
                if fixNone:
                    LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
                self.c.executemany(insertCmd, listOfRecords)
            else:
                for record in listOfRecords:
                    index += 1
                    if fixNone:
                        LOD.setNone(record, entityInfo.typeMap.keys())
                    self.c.execute(insertCmd, record)
            self.c.commit()
        except sqlite3.ProgrammingError as pe:
            msg = pe.args[0]
            if "You did not supply a value for binding" in msg:
                if ":" in msg:
                    # sqlite now returns the parameter name not the number
                    # You did not supply a value for binding parameter :type.
                    columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                    columnName = columnName.replace(":", "")
                else:
                    # pre python 3.10
                    # You did not supply a value for binding 2.
                    columnIndex = int(re.findall(r"\d+", msg)[0])
                    columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: no value supplied for column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise pe
        except sqlite3.InterfaceError as ie:
            msg = ie.args[0]
            if "Error binding parameter" in msg:
                columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: error binding column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise ie
        except Exception as ex:
            debugInfo = self.getDebugInfo(record, index, executeMany)
            msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
            raise Exception(msg)

    def queryGen(self, sqlQuery, params=None):
        """
        run the given sqlQuery a a generator for dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            a generator of dicts
        """
        if self.debug:
            print(sqlQuery)
            if params is not None:
                print(params)
        # https://stackoverflow.com/a/13735506/1497139
        cur = self.c.cursor()
        if params is not None:
            query = cur.execute(sqlQuery, params)
        else:
            query = cur.execute(sqlQuery)
        colname = [d[0] for d in query.description]
        try:
            # loop over all rows
            for row in query:
                record = dict(zip(colname, row))
                yield record
        except Exception as ex:
            msg = str(ex)
            self.logError(msg)
            pass
        cur.close()

    def query(self, sqlQuery, params=None):
        """
        run the given sqlQuery and return a list of Dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            list: a list of Dicts
        """
        resultList = []
        for record in self.queryGen(sqlQuery, params):
            resultList.append(record)
        return resultList

    def queryAll(self, entityInfo, fixDates=True):
        """
        query all records for the given entityName/tableName

        Args:
           entityName(string): name of the entity/table to qury
           fixDates(boolean): True if date entries should be returned as such and not as strings
        """
        sqlQuery = "SELECT * FROM %s" % entityInfo.name
        resultList = self.query(sqlQuery)
        if fixDates:
            entityInfo.fixDates(resultList)
        return resultList

    def getTableList(self, tableType="table"):
        """
        get the schema information from this database

        Args:
            tableType(str): table or view

        Return:
            list: a list as derived from PRAGMA table_info
        """
        tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
        tableList = self.query(tableQuery)
        for table in tableList:
            tableName = table["name"]
            columnQuery = f"PRAGMA table_info('{tableName}')"
            columns = self.query(columnQuery)
            table["columns"] = columns
        return tableList

    def getTableDict(self, tableType="table"):
        """
        get the schema information from this database as a dict

        Args:
            tableType(str): table or view

        Returns:
            dict: Lookup map of tables with columns also being converted to dict
        """
        tableDict = {}
        for table in self.getTableList(tableType=tableType):
            colDict = {}
            for col in table["columns"]:
                colDict[col["name"]] = col
            table["columns"] = colDict
            tableDict[table["name"]] = table
        return tableDict

    def restoreProgress(self, status, remaining, total):
        self.progress("Restore", status, remaining, total)

    def backupProgress(self, status, remaining, total):
        self.progress("Backup", status, remaining, total)

    def progress(self, action, status, remaining, total):
        """
        show progress
        """
        print(
            "%s %s at %5.0f%%"
            % (
                action,
                "... " if status == 0 else "done",
                (total - remaining) / total * 100,
            )
        )

    def backup(
        self,
        backupDB,
        action="Backup",
        profile=False,
        showProgress: int = 200,
        doClose=True,
    ):
        """
        create backup of this SQLDB to the given backup db

        see https://stackoverflow.com/a/59042442/1497139

        Args:
            backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
            action(string): the action to display
            profile(boolean): True if timing information shall be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        if sys.version_info <= (3, 6):
            raise Exception(
                "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
            )
        startTime = time.time()
        bck = sqlite3.connect(backupDB)
        if showProgress > 0:
            if action == "Restore":
                progress = self.restoreProgress
            else:
                progress = self.backupProgress
        else:
            progress = None
        with bck:
            self.c.backup(bck, pages=showProgress, progress=progress)
        elapsed = time.time() - startTime
        if profile:
            print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
        if doClose:
            bck.close()
            return None
        else:
            return bck

    def showDump(self, dump, limit=10):
        """
        show the given dump up to the given limit

        Args:
            dump(string): the SQL dump to show
            limit(int): the maximum number of lines to display
        """
        s = io.StringIO(dump)
        index = 0
        for line in s:
            if index <= limit:
                print(line)
                index += 1
            else:
                break

    def executeDump(
        self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
    ):
        """
        execute the given dump for the given connection

        Args:
            connection(Connection): the sqlite3 connection to use
            dump(string): the SQL commands for the dump
            title(string): the title of the dump
            maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
            profile(boolean): True if profiling information should be shown
        Returns:
            a list of errors
        """
        if self.debug:
            self.showDump(dump)
        startTime = time.time()
        if profile:
            print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
        errors = []
        index = 0
        # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
        for line in dump.split(";\n"):
            try:
                connection.execute(line)
            except sqlite3.OperationalError as soe:
                msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
                errors.append(msg)
                if len(errors) <= errorDisplayLimit:
                    print(msg)
                if len(errors) >= maxErrors:
                    connection.execute("ROLLBACK;")
                    break

            index = index + 1
        if profile:
            print(
                "finished executing dump %s with %d lines and %d errors in %5.1f s"
                % (title, index, len(errors), time.time() - startTime)
            )
        return errors

    def copyTo(self, copyDB, profile=True):
        """
        copy my content to another database

        Args:

           copyDB(Connection): the target database
           profile(boolean): if True show profile information
        """
        startTime = time.time()
        dump = "\n".join(self.c.iterdump())
        # cursor.executescript(dump)
        if profile:
            print(
                "finished getting dump of %s in %5.1f s"
                % (self.dbname, time.time() - startTime)
            )
        dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
        return dumpErrors

    @staticmethod
    def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
        """
        restore the restoreDB from the given backup DB

        Args:
            backupDB(string): path to the backupDB e.g. backup.db
            restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
            profile(boolean): True if timing information should be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        backupSQLDB = SQLDB(backupDB)
        connection = backupSQLDB.backup(
            restoreDB,
            action="Restore",
            profile=profile,
            showProgress=showProgress,
            doClose=False,
        )
        restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
        return restoreSQLDB

__init__(dbname=':memory:', connection=None, check_same_thread=True, timeout=5, debug=False, errorDebug=False)

Construct me for the given dbname and debug

Args:

dbname(string): name of the database - default is a RAM based database connection(Connection): an optional connection to be reused check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139 timeout(float): number of seconds for connection timeout debug(boolean): if True switch on debug errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def __init__(
    self,
    dbname: str = ":memory:",
    connection=None,
    check_same_thread=True,
    timeout=5,
    debug=False,
    errorDebug=False,
):
    """
    Construct me for the given dbname and debug

    Args:

       dbname(string): name of the database - default is a RAM based database
       connection(Connection): an optional connection to be reused
       check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
       timeout(float): number of seconds for connection timeout
       debug(boolean): if True switch on debug
       errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    self.dbname = dbname
    self.debug = debug
    self.errorDebug = errorDebug
    if connection is None:
        self.c = sqlite3.connect(
            dbname,
            detect_types=sqlite3.PARSE_DECLTYPES,
            check_same_thread=check_same_thread,
            timeout=timeout,
        )
    else:
        self.c = connection

backup(backupDB, action='Backup', profile=False, showProgress=200, doClose=True)

create backup of this SQLDB to the given backup db

see https://stackoverflow.com/a/59042442/1497139

Parameters:

Name Type Description Default
backupDB(string)

the path to the backupdb or SQLDB.RAM for in memory

required
action(string)

the action to display

required
profile(boolean)

True if timing information shall be shown

required
showProgress(int)

show progress at each showProgress page (0=show no progress)

required
Source code in lodstorage/sql.py
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
def backup(
    self,
    backupDB,
    action="Backup",
    profile=False,
    showProgress: int = 200,
    doClose=True,
):
    """
    create backup of this SQLDB to the given backup db

    see https://stackoverflow.com/a/59042442/1497139

    Args:
        backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
        action(string): the action to display
        profile(boolean): True if timing information shall be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    if sys.version_info <= (3, 6):
        raise Exception(
            "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
        )
    startTime = time.time()
    bck = sqlite3.connect(backupDB)
    if showProgress > 0:
        if action == "Restore":
            progress = self.restoreProgress
        else:
            progress = self.backupProgress
    else:
        progress = None
    with bck:
        self.c.backup(bck, pages=showProgress, progress=progress)
    elapsed = time.time() - startTime
    if profile:
        print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
    if doClose:
        bck.close()
        return None
    else:
        return bck

close()

close my connection

Source code in lodstorage/sql.py
73
74
75
def close(self):
    """close my connection"""
    self.c.close()

copyTo(copyDB, profile=True)

copy my content to another database

Args:

copyDB(Connection): the target database profile(boolean): if True show profile information

Source code in lodstorage/sql.py
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
def copyTo(self, copyDB, profile=True):
    """
    copy my content to another database

    Args:

       copyDB(Connection): the target database
       profile(boolean): if True show profile information
    """
    startTime = time.time()
    dump = "\n".join(self.c.iterdump())
    # cursor.executescript(dump)
    if profile:
        print(
            "finished getting dump of %s in %5.1f s"
            % (self.dbname, time.time() - startTime)
        )
    dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
    return dumpErrors

createTable(listOfRecords, entityName, primaryKey=None, withCreate=True, withDrop=False, sampleRecordCount=1, failIfTooFew=True)

Derive Data Definition Language CREATE TABLE command from list of Records by examining first record as defining sample record and execute DDL command.

Parameters:

Name Type Description Default
listOfRecords list

A list of Dicts.

required
entityName str

The entity / table name to use.

required
primaryKey str

The key/column to use as a primary key.

None
withDrop bool

True if the existing Table should be dropped.

False
withCreate bool

True if the create Table command should be executed.

True
sampleRecordCount int

Number of sample records expected and to be inspected.

1
failIfTooFew bool

Raise an Exception if too few sample records, else warn only.

True

Returns:

Name Type Description
EntityInfo

Meta data information for the created table.

Source code in lodstorage/sql.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
def createTable(
    self,
    listOfRecords,
    entityName: str,
    primaryKey: str = None,
    withCreate: bool = True,
    withDrop: bool = False,
    sampleRecordCount=1,
    failIfTooFew=True,
):
    """
    Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
    as defining sample record and execute DDL command.

    Args:
        listOfRecords (list): A list of Dicts.
        entityName (str): The entity / table name to use.
        primaryKey (str): The key/column to use as a primary key.
        withDrop (bool): True if the existing Table should be dropped.
        withCreate (bool): True if the create Table command should be executed.
        sampleRecordCount (int): Number of sample records expected and to be inspected.
        failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

    Returns:
        EntityInfo: Meta data information for the created table.
    """
    l = len(listOfRecords)
    if sampleRecordCount < 0:
        sampleRecordCount = l
    if l < sampleRecordCount:
        msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
        if failIfTooFew:
            raise Exception(msg)
        elif self.debug:
            self.logError(msg)

    sampleRecords = listOfRecords[:sampleRecordCount]
    entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

    return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

createTable4EntityInfo(entityInfo, withDrop=False, withCreate=True)

Create a table based on the provided EntityInfo.

Parameters:

Name Type Description Default
entityInfo EntityInfo

The EntityInfo object containing table metadata.

required
withDrop bool

If True, drop the existing table before creation.

False
withCreate bool

If True, execute the CREATE TABLE command.

True

Returns:

Name Type Description
EntityInfo

The provided EntityInfo object.

Source code in lodstorage/sql.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
    """
    Create a table based on the provided EntityInfo.

    Args:
        entityInfo (EntityInfo): The EntityInfo object containing table metadata.
        withDrop (bool): If True, drop the existing table before creation.
        withCreate (bool): If True, execute the CREATE TABLE command.

    Returns:
        EntityInfo: The provided EntityInfo object.
    """
    if withDrop:
        self.c.execute(entityInfo.dropTableCmd)
    if withCreate:
        try:
            self.c.execute(entityInfo.createTableCmd)
        except sqlite3.OperationalError as oe:
            raise Exception(
                f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
            )
    return entityInfo

execute(ddlCmd)

execute the given Data Definition Command

Parameters:

Name Type Description Default
ddlCmd(string)

e.g. a CREATE TABLE or CREATE View command

required
Source code in lodstorage/sql.py
77
78
79
80
81
82
83
84
def execute(self, ddlCmd):
    """
    execute the given Data Definition Command

    Args:
        ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
    """
    self.c.execute(ddlCmd)

executeDump(connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True)

execute the given dump for the given connection

Parameters:

Name Type Description Default
connection(Connection)

the sqlite3 connection to use

required
dump(string)

the SQL commands for the dump

required
title(string)

the title of the dump

required
maxErrors(int)

maximum number of errors to be tolerated before stopping and doing a rollback

required
profile(boolean)

True if profiling information should be shown

required

Returns: a list of errors

Source code in lodstorage/sql.py
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
def executeDump(
    self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
):
    """
    execute the given dump for the given connection

    Args:
        connection(Connection): the sqlite3 connection to use
        dump(string): the SQL commands for the dump
        title(string): the title of the dump
        maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
        profile(boolean): True if profiling information should be shown
    Returns:
        a list of errors
    """
    if self.debug:
        self.showDump(dump)
    startTime = time.time()
    if profile:
        print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
    errors = []
    index = 0
    # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
    for line in dump.split(";\n"):
        try:
            connection.execute(line)
        except sqlite3.OperationalError as soe:
            msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
            errors.append(msg)
            if len(errors) <= errorDisplayLimit:
                print(msg)
            if len(errors) >= maxErrors:
                connection.execute("ROLLBACK;")
                break

        index = index + 1
    if profile:
        print(
            "finished executing dump %s with %d lines and %d errors in %5.1f s"
            % (title, index, len(errors), time.time() - startTime)
        )
    return errors

getDebugInfo(record, index, executeMany)

get the debug info for the given record at the given index depending on the state of executeMany

Parameters:

Name Type Description Default
record(dict)

the record to show

required
index(int)

the index of the record

required
executeMany(boolean)

if True the record may be valid else not

required
Source code in lodstorage/sql.py
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def getDebugInfo(self, record, index, executeMany):
    """
    get the debug info for the given record at the given index depending on the state of executeMany

    Args:
        record(dict): the record to show
        index(int): the index of the record
        executeMany(boolean): if True the record may be valid else not
    """
    debugInfo = ""
    if not executeMany:
        # shall we shoe the details of the record (which might be a security risk)
        if self.errorDebug:
            # show details of record
            debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
        else:
            # show only index
            debugInfo = "\nrecord #%d" % index
    return debugInfo

getTableDict(tableType='table')

get the schema information from this database as a dict

Parameters:

Name Type Description Default
tableType(str)

table or view

required

Returns:

Name Type Description
dict

Lookup map of tables with columns also being converted to dict

Source code in lodstorage/sql.py
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def getTableDict(self, tableType="table"):
    """
    get the schema information from this database as a dict

    Args:
        tableType(str): table or view

    Returns:
        dict: Lookup map of tables with columns also being converted to dict
    """
    tableDict = {}
    for table in self.getTableList(tableType=tableType):
        colDict = {}
        for col in table["columns"]:
            colDict[col["name"]] = col
        table["columns"] = colDict
        tableDict[table["name"]] = table
    return tableDict

getTableList(tableType='table')

get the schema information from this database

Parameters:

Name Type Description Default
tableType(str)

table or view

required
Return

list: a list as derived from PRAGMA table_info

Source code in lodstorage/sql.py
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
def getTableList(self, tableType="table"):
    """
    get the schema information from this database

    Args:
        tableType(str): table or view

    Return:
        list: a list as derived from PRAGMA table_info
    """
    tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
    tableList = self.query(tableQuery)
    for table in tableList:
        tableName = table["name"]
        columnQuery = f"PRAGMA table_info('{tableName}')"
        columns = self.query(columnQuery)
        table["columns"] = columns
    return tableList

logError(msg)

log the given error message to stderr

Parameters:

Name Type Description Default
msg(str)

the error messsage to display

required
Source code in lodstorage/sql.py
64
65
66
67
68
69
70
71
def logError(self, msg):
    """
    log the given error message to stderr

    Args:
        msg(str): the error messsage to display
    """
    print(msg, file=sys.stderr, flush=True)

progress(action, status, remaining, total)

show progress

Source code in lodstorage/sql.py
344
345
346
347
348
349
350
351
352
353
354
355
def progress(self, action, status, remaining, total):
    """
    show progress
    """
    print(
        "%s %s at %5.0f%%"
        % (
            action,
            "... " if status == 0 else "done",
            (total - remaining) / total * 100,
        )
    )

query(sqlQuery, params=None)

run the given sqlQuery and return a list of Dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Name Type Description
list

a list of Dicts

Source code in lodstorage/sql.py
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
def query(self, sqlQuery, params=None):
    """
    run the given sqlQuery and return a list of Dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        list: a list of Dicts
    """
    resultList = []
    for record in self.queryGen(sqlQuery, params):
        resultList.append(record)
    return resultList

queryAll(entityInfo, fixDates=True)

query all records for the given entityName/tableName

Parameters:

Name Type Description Default
entityName(string)

name of the entity/table to qury

required
fixDates(boolean)

True if date entries should be returned as such and not as strings

required
Source code in lodstorage/sql.py
286
287
288
289
290
291
292
293
294
295
296
297
298
def queryAll(self, entityInfo, fixDates=True):
    """
    query all records for the given entityName/tableName

    Args:
       entityName(string): name of the entity/table to qury
       fixDates(boolean): True if date entries should be returned as such and not as strings
    """
    sqlQuery = "SELECT * FROM %s" % entityInfo.name
    resultList = self.query(sqlQuery)
    if fixDates:
        entityInfo.fixDates(resultList)
    return resultList

queryGen(sqlQuery, params=None)

run the given sqlQuery a a generator for dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Type Description

a generator of dicts

Source code in lodstorage/sql.py
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
def queryGen(self, sqlQuery, params=None):
    """
    run the given sqlQuery a a generator for dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        a generator of dicts
    """
    if self.debug:
        print(sqlQuery)
        if params is not None:
            print(params)
    # https://stackoverflow.com/a/13735506/1497139
    cur = self.c.cursor()
    if params is not None:
        query = cur.execute(sqlQuery, params)
    else:
        query = cur.execute(sqlQuery)
    colname = [d[0] for d in query.description]
    try:
        # loop over all rows
        for row in query:
            record = dict(zip(colname, row))
            yield record
    except Exception as ex:
        msg = str(ex)
        self.logError(msg)
        pass
    cur.close()

restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False) staticmethod

restore the restoreDB from the given backup DB

Parameters:

Name Type Description Default
backupDB(string)

path to the backupDB e.g. backup.db

required
restoreDB(string)

path to the restoreDB or in Memory SQLDB.RAM

required
profile(boolean)

True if timing information should be shown

required
showProgress(int)

show progress at each showProgress page (0=show no progress)

required
Source code in lodstorage/sql.py
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
@staticmethod
def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
    """
    restore the restoreDB from the given backup DB

    Args:
        backupDB(string): path to the backupDB e.g. backup.db
        restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
        profile(boolean): True if timing information should be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    backupSQLDB = SQLDB(backupDB)
    connection = backupSQLDB.backup(
        restoreDB,
        action="Restore",
        profile=profile,
        showProgress=showProgress,
        doClose=False,
    )
    restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
    return restoreSQLDB

showDump(dump, limit=10)

show the given dump up to the given limit

Parameters:

Name Type Description Default
dump(string)

the SQL dump to show

required
limit(int)

the maximum number of lines to display

required
Source code in lodstorage/sql.py
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
def showDump(self, dump, limit=10):
    """
    show the given dump up to the given limit

    Args:
        dump(string): the SQL dump to show
        limit(int): the maximum number of lines to display
    """
    s = io.StringIO(dump)
    index = 0
    for line in s:
        if index <= limit:
            print(line)
            index += 1
        else:
            break

store(listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False)

store the given list of records based on the given entityInfo

Args:

listOfRecords(list): the list of Dicts to be stored entityInfo(EntityInfo): the meta data to be used for storing executeMany(bool): if True the insert command is done with many/all records at once fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values replace(bool): if True allow replace for insert

Source code in lodstorage/sql.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
def store(
    self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
):
    """
    store the given list of records based on the given entityInfo

    Args:

       listOfRecords(list): the list of Dicts to be stored
       entityInfo(EntityInfo): the meta data to be used for storing
       executeMany(bool): if True the insert command is done with many/all records at once
       fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
       replace(bool): if True allow replace for insert
    """
    insertCmd = entityInfo.getInsertCmd(replace=replace)
    record = None
    index = 0
    try:
        if executeMany:
            if fixNone:
                LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
            self.c.executemany(insertCmd, listOfRecords)
        else:
            for record in listOfRecords:
                index += 1
                if fixNone:
                    LOD.setNone(record, entityInfo.typeMap.keys())
                self.c.execute(insertCmd, record)
        self.c.commit()
    except sqlite3.ProgrammingError as pe:
        msg = pe.args[0]
        if "You did not supply a value for binding" in msg:
            if ":" in msg:
                # sqlite now returns the parameter name not the number
                # You did not supply a value for binding parameter :type.
                columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                columnName = columnName.replace(":", "")
            else:
                # pre python 3.10
                # You did not supply a value for binding 2.
                columnIndex = int(re.findall(r"\d+", msg)[0])
                columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: no value supplied for column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise pe
    except sqlite3.InterfaceError as ie:
        msg = ie.args[0]
        if "Error binding parameter" in msg:
            columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: error binding column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise ie
    except Exception as ex:
        debugInfo = self.getDebugInfo(record, index, executeMany)
        msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
        raise Exception(msg)

adapt_boolean(val)

Adapt boolean to int

Source code in lodstorage/sql.py
672
673
674
def adapt_boolean(val: bool):
    """Adapt boolean to int"""
    return 1 if val else 0

adapt_date_iso(val)

Adapt datetime.date to ISO 8601 date.

Source code in lodstorage/sql.py
657
658
659
def adapt_date_iso(val: datetime.date):
    """Adapt datetime.date to ISO 8601 date."""
    return val.isoformat()

adapt_datetime_epoch(val)

Adapt datetime.datetime to Unix timestamp.

Source code in lodstorage/sql.py
667
668
669
def adapt_datetime_epoch(val: datetime.datetime):
    """Adapt datetime.datetime to Unix timestamp."""
    return float(val.timestamp()) * 10**6

adapt_datetime_iso(val)

Adapt datetime.datetime to timezone-naive ISO 8601 date.

Source code in lodstorage/sql.py
662
663
664
def adapt_datetime_iso(val: datetime.datetime):
    """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
    return val.isoformat()

convert_boolean(val)

Convert 0 or 1 to boolean

Source code in lodstorage/sql.py
823
824
825
826
827
def convert_boolean(val: bytes):
    """
    Convert 0 or 1 to boolean
    """
    return True if int(val) == 1 else False

convert_date(val)

Convert byte string to date using the DatetimeAdapter.

Source code in lodstorage/sql.py
805
806
807
808
def convert_date(val: bytes) -> datetime.date:
    """Convert byte string to date using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_date(val)

convert_datetime(val)

Convert byte string to datetime using the DatetimeAdapter.

Source code in lodstorage/sql.py
811
812
813
814
def convert_datetime(val: bytes) -> datetime.datetime:
    """Convert byte string to datetime using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_datetime(val)

convert_timestamp(val)

Convert byte string to timestamp using the DatetimeAdapter.

Source code in lodstorage/sql.py
817
818
819
820
def convert_timestamp(val: bytes) -> datetime.datetime:
    """Convert byte string to timestamp using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_timestamp(val)

sql_cache

Created on 2024-03-16

@author: wf

Cached

Manage cached entities.

Source code in lodstorage/sql_cache.py
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
class Cached:
    """
    Manage cached entities.
    """

    def __init__(
        self,
        clazz: Type[Any],
        sparql: SPARQL,
        sql_db: str,
        query_name: str,
        max_errors: int = 0,
        debug: bool = False,
    ):
        """
        Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string,
        query name, and an optional debug flag.

        Args:
            clazz (Type[Any]): The class reference for the type of objects managed by this manager.
            sparql (SPARQL): a SPARQL endpoint.
            sql_db (str): The connection string for the SQL database.
            query_name (str): The name of the query to be executed.
            debug (bool, optional): Flag to enable debug mode. Defaults to False.
        """
        self.clazz = clazz
        self.sparql = sparql
        self.sql_db = sql_db
        self.query_name = query_name
        self.max_errors = max_errors
        self.debug = debug
        self.entities = []
        self.errors = []
        self.fetched = False
        # Ensure the table for the class exists
        clazz.metadata.create_all(self.sql_db.engine)

    def fetch_or_query(self, qm, force_query=False) -> List[Dict]:
        """
        Fetches data from the local cache if available.
        If the data is not in the cache or if force_query is True,
        it queries via SPARQL and caches the results.

        Args:
            qm (QueryManager): The query manager object used for making SPARQL queries.
            force_query (bool, optional): A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.
        Returns:
            List: list of records from the SQL database
        """
        if not force_query and self.check_local_cache():
            lod = self.fetch_from_local()
        else:
            lod = self.get_lod(qm)
            self.store()
        return lod

    def check_local_cache(self) -> bool:
        """
        Checks if there is data in the local cache (SQL database).

        Returns:
            bool: True if  there is at least one record in the local SQL cache table
        """
        with self.sql_db.get_session() as session:
            result = session.exec(select(self.clazz)).first()
            return result is not None

    def fetch_from_local(self) -> List[Dict]:
        """
        Fetches data from the local SQL database as list of dicts and entities.

        Returns:
            List[Dict]: List of records from the SQL database in dictionary form.
        """
        profiler = Profiler(f"fetch {self.query_name} from local", profile=self.debug)
        with self.sql_db.get_session() as session:
            self.entities = session.exec(select(self.clazz)).all()
            self.lod = [entity.dict() for entity in self.entities]
            if self.debug:
                print(f"Loaded {len(self.entities)} records from local cache")
        profiler.time()
        return self.lod

    def get_lod(self, qm: QueryManager) -> List[Dict]:
        """
        Fetches data using the SPARQL query specified by my query_name.

        Args:
            qm (QueryManager): The query manager object used for making SPARQL queries.

        Returns:
            List[Dict]: A list of dictionaries representing the data fetched.
        """
        profiler = Profiler(
            f"fetch {self.query_name} from SPARQL endpoint {self.sparql.url}",
            profile=self.debug,
        )
        query = qm.queriesByName[self.query_name]
        self.lod = self.sparql.queryAsListOfDicts(query.query)
        profiler.time()
        if self.debug:
            print(f"Found {len(self.lod)} records for {self.query_name}")
        return self.lod

    def to_entities(self, max_errors: int = None, cached: bool = True) -> List[Any]:
        """
        Converts records fetched from the LOD into entity instances, applying validation.

        Args:
            max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.
            cached(bool): if True use existing entries
        Returns:
            List[Any]: A list of entity instances that have passed validation.
        """
        if not cached:
            self.entities = []
            self.errors = []
        elif self.fetched:
            return self.entities

        error_records = []
        if max_errors is None:
            max_errors = self.max_errors
        for record in self.lod:
            try:
                entity = self.clazz.model_validate(record)
                self.entities.append(entity)
            except Exception as e:
                self.errors.append(e)
                error_records.append(record)
        error_count = len(self.errors)
        if error_count > max_errors:
            msg = f"found {error_count} errors > maximum allowed {max_errors} errors"
            if self.debug:
                print(msg)
                for i, e in enumerate(self.errors):
                    print(f"{i}:{str(e)} for \n{error_records[i]}")
            raise Exception(msg)
        self.fetched = True
        return self.entities

    def store(self, max_errors: int = None) -> List[Any]:
        """
        Stores the fetched data into the local SQL database.

        Args:
            max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.

        Returns:
            List[Any]: A list of entity instances that were stored in the database.

        """
        profiler = Profiler(f"store {self.query_name}", profile=self.debug)
        self.to_entities(max_errors=max_errors, cached=False)
        with self.sql_db.get_session() as session:
            session.add_all(self.entities)
            session.commit()
            if self.debug:
                print(f"Stored {len(self.entities)} records in local cache")
        profiler.time()
        return self.entities

__init__(clazz, sparql, sql_db, query_name, max_errors=0, debug=False)

Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string, query name, and an optional debug flag.

Parameters:

Name Type Description Default
clazz Type[Any]

The class reference for the type of objects managed by this manager.

required
sparql SPARQL

a SPARQL endpoint.

required
sql_db str

The connection string for the SQL database.

required
query_name str

The name of the query to be executed.

required
debug bool

Flag to enable debug mode. Defaults to False.

False
Source code in lodstorage/sql_cache.py
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
def __init__(
    self,
    clazz: Type[Any],
    sparql: SPARQL,
    sql_db: str,
    query_name: str,
    max_errors: int = 0,
    debug: bool = False,
):
    """
    Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string,
    query name, and an optional debug flag.

    Args:
        clazz (Type[Any]): The class reference for the type of objects managed by this manager.
        sparql (SPARQL): a SPARQL endpoint.
        sql_db (str): The connection string for the SQL database.
        query_name (str): The name of the query to be executed.
        debug (bool, optional): Flag to enable debug mode. Defaults to False.
    """
    self.clazz = clazz
    self.sparql = sparql
    self.sql_db = sql_db
    self.query_name = query_name
    self.max_errors = max_errors
    self.debug = debug
    self.entities = []
    self.errors = []
    self.fetched = False
    # Ensure the table for the class exists
    clazz.metadata.create_all(self.sql_db.engine)

check_local_cache()

Checks if there is data in the local cache (SQL database).

Returns:

Name Type Description
bool bool

True if there is at least one record in the local SQL cache table

Source code in lodstorage/sql_cache.py
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def check_local_cache(self) -> bool:
    """
    Checks if there is data in the local cache (SQL database).

    Returns:
        bool: True if  there is at least one record in the local SQL cache table
    """
    with self.sql_db.get_session() as session:
        result = session.exec(select(self.clazz)).first()
        return result is not None

fetch_from_local()

Fetches data from the local SQL database as list of dicts and entities.

Returns:

Type Description
List[Dict]

List[Dict]: List of records from the SQL database in dictionary form.

Source code in lodstorage/sql_cache.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def fetch_from_local(self) -> List[Dict]:
    """
    Fetches data from the local SQL database as list of dicts and entities.

    Returns:
        List[Dict]: List of records from the SQL database in dictionary form.
    """
    profiler = Profiler(f"fetch {self.query_name} from local", profile=self.debug)
    with self.sql_db.get_session() as session:
        self.entities = session.exec(select(self.clazz)).all()
        self.lod = [entity.dict() for entity in self.entities]
        if self.debug:
            print(f"Loaded {len(self.entities)} records from local cache")
    profiler.time()
    return self.lod

fetch_or_query(qm, force_query=False)

Fetches data from the local cache if available. If the data is not in the cache or if force_query is True, it queries via SPARQL and caches the results.

Parameters:

Name Type Description Default
qm QueryManager

The query manager object used for making SPARQL queries.

required
force_query bool

A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.

False

Returns: List: list of records from the SQL database

Source code in lodstorage/sql_cache.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def fetch_or_query(self, qm, force_query=False) -> List[Dict]:
    """
    Fetches data from the local cache if available.
    If the data is not in the cache or if force_query is True,
    it queries via SPARQL and caches the results.

    Args:
        qm (QueryManager): The query manager object used for making SPARQL queries.
        force_query (bool, optional): A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.
    Returns:
        List: list of records from the SQL database
    """
    if not force_query and self.check_local_cache():
        lod = self.fetch_from_local()
    else:
        lod = self.get_lod(qm)
        self.store()
    return lod

get_lod(qm)

Fetches data using the SPARQL query specified by my query_name.

Parameters:

Name Type Description Default
qm QueryManager

The query manager object used for making SPARQL queries.

required

Returns:

Type Description
List[Dict]

List[Dict]: A list of dictionaries representing the data fetched.

Source code in lodstorage/sql_cache.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
def get_lod(self, qm: QueryManager) -> List[Dict]:
    """
    Fetches data using the SPARQL query specified by my query_name.

    Args:
        qm (QueryManager): The query manager object used for making SPARQL queries.

    Returns:
        List[Dict]: A list of dictionaries representing the data fetched.
    """
    profiler = Profiler(
        f"fetch {self.query_name} from SPARQL endpoint {self.sparql.url}",
        profile=self.debug,
    )
    query = qm.queriesByName[self.query_name]
    self.lod = self.sparql.queryAsListOfDicts(query.query)
    profiler.time()
    if self.debug:
        print(f"Found {len(self.lod)} records for {self.query_name}")
    return self.lod

store(max_errors=None)

Stores the fetched data into the local SQL database.

Parameters:

Name Type Description Default
max_errors int

Maximum allowed validation errors. Defaults to 0.

None

Returns:

Type Description
List[Any]

List[Any]: A list of entity instances that were stored in the database.

Source code in lodstorage/sql_cache.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
def store(self, max_errors: int = None) -> List[Any]:
    """
    Stores the fetched data into the local SQL database.

    Args:
        max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.

    Returns:
        List[Any]: A list of entity instances that were stored in the database.

    """
    profiler = Profiler(f"store {self.query_name}", profile=self.debug)
    self.to_entities(max_errors=max_errors, cached=False)
    with self.sql_db.get_session() as session:
        session.add_all(self.entities)
        session.commit()
        if self.debug:
            print(f"Stored {len(self.entities)} records in local cache")
    profiler.time()
    return self.entities

to_entities(max_errors=None, cached=True)

Converts records fetched from the LOD into entity instances, applying validation.

Parameters:

Name Type Description Default
max_errors int

Maximum allowed validation errors. Defaults to 0.

None
cached(bool)

if True use existing entries

required

Returns: List[Any]: A list of entity instances that have passed validation.

Source code in lodstorage/sql_cache.py
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
def to_entities(self, max_errors: int = None, cached: bool = True) -> List[Any]:
    """
    Converts records fetched from the LOD into entity instances, applying validation.

    Args:
        max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.
        cached(bool): if True use existing entries
    Returns:
        List[Any]: A list of entity instances that have passed validation.
    """
    if not cached:
        self.entities = []
        self.errors = []
    elif self.fetched:
        return self.entities

    error_records = []
    if max_errors is None:
        max_errors = self.max_errors
    for record in self.lod:
        try:
            entity = self.clazz.model_validate(record)
            self.entities.append(entity)
        except Exception as e:
            self.errors.append(e)
            error_records.append(record)
    error_count = len(self.errors)
    if error_count > max_errors:
        msg = f"found {error_count} errors > maximum allowed {max_errors} errors"
        if self.debug:
            print(msg)
            for i, e in enumerate(self.errors):
                print(f"{i}:{str(e)} for \n{error_records[i]}")
        raise Exception(msg)
    self.fetched = True
    return self.entities

SqlDB

general SQL database access using SQL Alchemy

Source code in lodstorage/sql_cache.py
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
class SqlDB:
    """
    general SQL database access using SQL Alchemy
    """

    def __init__(self, sqlite_file_path: str, debug: bool = False):
        self.debug = debug
        sqlite_url = f"sqlite:///{sqlite_file_path}"
        connect_args = {"check_same_thread": False}
        self.engine = create_engine(sqlite_url, echo=debug, connect_args=connect_args)

    def get_session(self) -> Session:
        """
        Provide a session for database operations.

        Returns:
            Session: A SQLAlchemy Session object bound to the engine for database operations.
        """
        return Session(bind=self.engine)

get_session()

Provide a session for database operations.

Returns:

Name Type Description
Session Session

A SQLAlchemy Session object bound to the engine for database operations.

Source code in lodstorage/sql_cache.py
26
27
28
29
30
31
32
33
def get_session(self) -> Session:
    """
    Provide a session for database operations.

    Returns:
        Session: A SQLAlchemy Session object bound to the engine for database operations.
    """
    return Session(bind=self.engine)

storageconfig

Created on 2020-08-29

@author: wf

StorageConfig

Bases: object

a storage configuration

Source code in lodstorage/storageconfig.py
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
class StorageConfig(object):
    """
    a storage configuration
    """

    def getCachePath(self, ensureExists=True) -> str:
        """
        get the path to the default cache

        Args:
            name(str): the name of the cache to use
        """

        cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

        if ensureExists:
            if not os.path.exists(cachedir):
                os.makedirs(cachedir)
        return cachedir

    def __init__(
        self,
        mode=StoreMode.SQL,
        cacheRootDir: str = None,
        cacheDirName: str = "lodstorage",
        cacheFile=None,
        withShowProgress=True,
        profile=True,
        debug=False,
        errorDebug=True,
    ):
        """
        Constructor

        Args:
            mode(StoreMode): the storage mode e.g. sql
            cacheRootDir(str): the cache root directory to use - if None the home directory will be used
            cacheFile(string): the common cacheFile to use (if any)
            withShowProgress(boolean): True if progress should be shown
            profile(boolean): True if timing / profiling information should be shown
            debug(boolean): True if debugging information should be shown
            errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        if cacheRootDir is None:
            home = str(Path.home())
            self.cacheRootDir = f"{home}"
        else:
            self.cacheRootDir = cacheRootDir
        self.cacheDirName = cacheDirName
        self.mode = mode
        self.cacheFile = cacheFile
        self.profile = profile
        self.withShowProgress = withShowProgress
        self.debug = debug
        self.errorDebug = errorDebug

    @staticmethod
    def getDefault(debug=False):
        return StorageConfig.getSQL(debug)

    @staticmethod
    def getSQL(debug=False):
        config = StorageConfig(mode=StoreMode.SQL, debug=debug)
        config.tableName = None
        return config

    @staticmethod
    def getJSON(debug=False):
        config = StorageConfig(mode=StoreMode.JSON, debug=debug)
        return config

    @staticmethod
    def getJsonPickle(debug=False):
        config = StorageConfig(mode=StoreMode.JSONPICKLE, debug=debug)
        return config

    @staticmethod
    def getSPARQL(prefix, endpoint, host, debug=False):
        config = StorageConfig(mode=StoreMode.SPARQL, debug=debug)
        config.prefix = prefix
        config.host = host
        config.endpoint = endpoint
        return config

    @staticmethod
    def getYaml(debug=False):
        config = StorageConfig(mode=StoreMode.YAML, debug=debug)
        return config

__init__(mode=StoreMode.SQL, cacheRootDir=None, cacheDirName='lodstorage', cacheFile=None, withShowProgress=True, profile=True, debug=False, errorDebug=True)

Constructor

Parameters:

Name Type Description Default
mode(StoreMode)

the storage mode e.g. sql

required
cacheRootDir(str)

the cache root directory to use - if None the home directory will be used

required
cacheFile(string)

the common cacheFile to use (if any)

required
withShowProgress(boolean)

True if progress should be shown

required
profile(boolean)

True if timing / profiling information should be shown

required
debug(boolean)

True if debugging information should be shown

required
errorDebug(boolean)

True if debug info should be provided on errors (should not be used for production since it might reveal data)

required
Source code in lodstorage/storageconfig.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def __init__(
    self,
    mode=StoreMode.SQL,
    cacheRootDir: str = None,
    cacheDirName: str = "lodstorage",
    cacheFile=None,
    withShowProgress=True,
    profile=True,
    debug=False,
    errorDebug=True,
):
    """
    Constructor

    Args:
        mode(StoreMode): the storage mode e.g. sql
        cacheRootDir(str): the cache root directory to use - if None the home directory will be used
        cacheFile(string): the common cacheFile to use (if any)
        withShowProgress(boolean): True if progress should be shown
        profile(boolean): True if timing / profiling information should be shown
        debug(boolean): True if debugging information should be shown
        errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    if cacheRootDir is None:
        home = str(Path.home())
        self.cacheRootDir = f"{home}"
    else:
        self.cacheRootDir = cacheRootDir
    self.cacheDirName = cacheDirName
    self.mode = mode
    self.cacheFile = cacheFile
    self.profile = profile
    self.withShowProgress = withShowProgress
    self.debug = debug
    self.errorDebug = errorDebug

getCachePath(ensureExists=True)

get the path to the default cache

Parameters:

Name Type Description Default
name(str)

the name of the cache to use

required
Source code in lodstorage/storageconfig.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def getCachePath(self, ensureExists=True) -> str:
    """
    get the path to the default cache

    Args:
        name(str): the name of the cache to use
    """

    cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

    if ensureExists:
        if not os.path.exists(cachedir):
            os.makedirs(cachedir)
    return cachedir

StoreMode

Bases: Enum

possible supported storage modes

Source code in lodstorage/storageconfig.py
11
12
13
14
15
16
17
18
19
20
class StoreMode(Enum):
    """
    possible supported storage modes
    """

    JSONPICKLE = 1  # JSON Pickle
    JSON = 2
    SQL = 3
    SPARQL = 4
    YAML = 5

sync

Created on 2023-12-27

@author: wf

Sync

A class to help with synchronization between two sets of data, each represented as a list of dictionaries.

Source code in lodstorage/sync.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
class Sync:
    """
    A class to help with synchronization between two sets of data, each represented as a list of dictionaries.
    """

    def __init__(self, pair: SyncPair):
        """
        Initialize the Sync class with the given Synchronization Pair.
        """
        self.pair = pair
        self.sync_dict = self._create_sync_dict()
        self.directions = ["←", "↔", "→"]
        self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

    def handle_direction_error(self, direction: str):
        invalid_direction_msg = (
            f"Invalid direction '{direction}'. Use {', '.join(self.directions)}."
        )
        raise ValueError(invalid_direction_msg)

    def handle_side_error(self, side: str):
        invalid_side_msg = f"Invalid side '{side}'. Use {', '.join(self.sides['left'])} for left or {', '.join(self.sides['right'])} for right."
        raise ValueError(invalid_side_msg)

    def _create_sync_dict(self) -> dict:
        """
        Create a dictionary representing the synchronization state between left and right data sources.
        """
        l_keys = {d[self.pair.l_key] for d in self.pair.l_data if self.pair.l_key in d}
        r_keys = {d[self.pair.r_key] for d in self.pair.r_data if self.pair.r_key in d}

        sync_dict = {
            "←": r_keys - l_keys,  # Present in right but not in left
            "↔": l_keys.intersection(r_keys),  # Present in both
            "→": l_keys - r_keys,  # Present in left but not in right
        }
        return sync_dict

    def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
        """
        Retrieves a record by primary key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            pkey (str): The primary key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.
        """
        record = None
        if side in self.sides["left"]:  # retrieve from left
            record = self.pair.l_by_pkey.get(pkey)
        elif side in self.sides["right"]:  # retrieve from right
            record = self.pair.r_by_pkey.get(pkey)
        else:
            self.handle_side_error(side)
        return record

    def get_record_by_key(self, side: str, key: str) -> dict:
        """
        Retrieves a record by the given unique key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            key (str): The unique key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.

        Raises:
            ValueError: If the provided direction is invalid.
        """
        record = None
        if side in ["←", "l", "left"]:
            record = next(
                (item for item in self.pair.l_data if item[self.pair.l_key] == key),
                None,
            )
        elif side in ["→", "r", "right"]:
            record = next(
                (item for item in self.pair.r_data if item[self.pair.r_key] == key),
                None,
            )
        else:
            self.handle_side_error(side)
        return record

    def get_keys(self, direction: str) -> set:
        """
        Get the keys for a given direction of synchronization.
        """
        if direction in self.sync_dict:
            return self.sync_dict[direction]
        else:
            self.handle_direction_error(direction)

    def status_table(self, tablefmt: str = "grid") -> str:
        """
        Create a table representing the synchronization status.
        """
        total_records = sum(len(keys) for keys in self.sync_dict.values())
        if total_records == 0:  # Avoid division by zero
            total_records = 1

        table_data = []
        for direction, keys in self.sync_dict.items():
            num_records = len(keys)
            percentage = (num_records / total_records) * 100
            table_data.append(
                {
                    "left": self.pair.l_name,
                    "↔": direction,
                    "right": self.pair.r_name,
                    "#": num_records,
                    "%": f"{percentage:7.2f}%",
                }
            )

        markup = tabulate(
            table_data,
            headers="keys",
            tablefmt=tablefmt,
            colalign=("right", "center", "left", "right", "right"),
        )
        return markup

__init__(pair)

Initialize the Sync class with the given Synchronization Pair.

Source code in lodstorage/sync.py
66
67
68
69
70
71
72
73
def __init__(self, pair: SyncPair):
    """
    Initialize the Sync class with the given Synchronization Pair.
    """
    self.pair = pair
    self.sync_dict = self._create_sync_dict()
    self.directions = ["←", "↔", "→"]
    self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

get_keys(direction)

Get the keys for a given direction of synchronization.

Source code in lodstorage/sync.py
148
149
150
151
152
153
154
155
def get_keys(self, direction: str) -> set:
    """
    Get the keys for a given direction of synchronization.
    """
    if direction in self.sync_dict:
        return self.sync_dict[direction]
    else:
        self.handle_direction_error(direction)

get_record_by_key(side, key)

Retrieves a record by the given unique key from the appropriate data source as specified by direction.

Parameters:

Name Type Description Default
side str

The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.

required
key str

The unique key of the record to retrieve.

required

Returns:

Type Description
dict

Optional[Dict[str, Any]]: The record if found, otherwise None.

Raises:

Type Description
ValueError

If the provided direction is invalid.

Source code in lodstorage/sync.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def get_record_by_key(self, side: str, key: str) -> dict:
    """
    Retrieves a record by the given unique key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        key (str): The unique key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.

    Raises:
        ValueError: If the provided direction is invalid.
    """
    record = None
    if side in ["←", "l", "left"]:
        record = next(
            (item for item in self.pair.l_data if item[self.pair.l_key] == key),
            None,
        )
    elif side in ["→", "r", "right"]:
        record = next(
            (item for item in self.pair.r_data if item[self.pair.r_key] == key),
            None,
        )
    else:
        self.handle_side_error(side)
    return record

get_record_by_pkey(side, pkey)

Retrieves a record by primary key from the appropriate data source as specified by direction.

Parameters:

Name Type Description Default
side str

The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.

required
pkey str

The primary key of the record to retrieve.

required

Returns:

Type Description
Optional[Dict[str, Any]]

Optional[Dict[str, Any]]: The record if found, otherwise None.

Source code in lodstorage/sync.py
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
    """
    Retrieves a record by primary key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        pkey (str): The primary key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.
    """
    record = None
    if side in self.sides["left"]:  # retrieve from left
        record = self.pair.l_by_pkey.get(pkey)
    elif side in self.sides["right"]:  # retrieve from right
        record = self.pair.r_by_pkey.get(pkey)
    else:
        self.handle_side_error(side)
    return record

status_table(tablefmt='grid')

Create a table representing the synchronization status.

Source code in lodstorage/sync.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def status_table(self, tablefmt: str = "grid") -> str:
    """
    Create a table representing the synchronization status.
    """
    total_records = sum(len(keys) for keys in self.sync_dict.values())
    if total_records == 0:  # Avoid division by zero
        total_records = 1

    table_data = []
    for direction, keys in self.sync_dict.items():
        num_records = len(keys)
        percentage = (num_records / total_records) * 100
        table_data.append(
            {
                "left": self.pair.l_name,
                "↔": direction,
                "right": self.pair.r_name,
                "#": num_records,
                "%": f"{percentage:7.2f}%",
            }
        )

    markup = tabulate(
        table_data,
        headers="keys",
        tablefmt=tablefmt,
        colalign=("right", "center", "left", "right", "right"),
    )
    return markup

SyncPair dataclass

A class to represent a pair of data sources for synchronization.

Attributes: title (str): The title of the synchronization pair. l_name (str): Name of the left data source (e.g., 'local'). r_name (str): Name of the right data source (e.g., 'wikidata'). l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source. r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source. l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization. r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization. l_pkey(str): the primary key field of the left data source r_pkey(str): the primary key field of the right data source

Example usage: l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}] r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}] pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r') sync = Sync(pair) print(sync.status_table())

Source code in lodstorage/sync.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
@dataclass
class SyncPair:
    """
       A class to represent a pair of data sources for synchronization.

       Attributes:
           title (str): The title of the synchronization pair.
           l_name (str): Name of the left data source (e.g., 'local').
           r_name (str): Name of the right data source (e.g., 'wikidata').
           l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source.
           r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source.
           l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization.
           r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization.
           l_pkey(str): the primary key field of the left data source
           r_pkey(str): the primary key field of the right data source

    Example usage:
    l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}]
    r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}]
    pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r')
    sync = Sync(pair)
    print(sync.status_table())
    """

    title: str
    l_name: str
    r_name: str
    l_data: List[Dict[str, Any]]
    r_data: List[Dict[str, Any]]
    l_key: str
    r_key: str
    l_pkey: Optional[str] = None
    r_pkey: Optional[str] = None
    # Add dictionaries for quick primary key access
    l_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)
    r_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)

    def __post_init__(self):
        # Set the l_pkey to l_key if not provided
        if self.l_pkey is None:
            self.l_pkey = self.l_key
        # Set the r_pkey to r_key if not provided
        if self.r_pkey is None:
            self.r_pkey = self.r_key
        self.l_by_pkey = {d[self.l_pkey]: d for d in self.l_data if self.l_pkey in d}
        self.r_by_pkey = {d[self.r_pkey]: d for d in self.r_data if self.r_pkey in d}

tabulateCounter

Created on 2021-06-13

@author: wf

TabulateCounter

Bases: object

helper for tabulating Counters

Source code in lodstorage/tabulateCounter.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
class TabulateCounter(object):
    """
    helper for tabulating Counters
    """

    def __init__(self, counter):
        """
        Constructor
        """
        self.counter = counter

    def mostCommonTable(
        self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
    ):
        """
        get the most common Table
        """
        bins = len(self.counter.keys())
        limit = min(bins, limit)
        total = sum(self.counter.values())
        binTable = [("total", bins, total)]
        for i, bintuple in enumerate(self.counter.most_common(limit)):
            key, count = bintuple
            binTable.append((i + 1, key, count, count / total * 100.0))

        table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
        return table

__init__(counter)

Constructor

Source code in lodstorage/tabulateCounter.py
14
15
16
17
18
def __init__(self, counter):
    """
    Constructor
    """
    self.counter = counter

mostCommonTable(headers=['#', 'key', 'count', '%'], tablefmt='pretty', limit=50)

get the most common Table

Source code in lodstorage/tabulateCounter.py
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
def mostCommonTable(
    self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
):
    """
    get the most common Table
    """
    bins = len(self.counter.keys())
    limit = min(bins, limit)
    total = sum(self.counter.values())
    binTable = [("total", bins, total)]
    for i, bintuple in enumerate(self.counter.most_common(limit)):
        key, count = bintuple
        binTable.append((i + 1, key, count, count / total * 100.0))

    table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
    return table

uml

Created on 2020-09-04

@author: wf

UML

Bases: object

UML diagrams via plantuml

Source code in lodstorage/uml.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
class UML(object):
    """
    UML diagrams via plantuml

    """

    skinparams = """
' BITPlan Corporate identity skin params
' Copyright (c) 2015-2020 BITPlan GmbH
' see http://wiki.bitplan.com/PlantUmlSkinParams#BITPlanCI
' skinparams generated by com.bitplan.restmodelmanager
skinparam note {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam component {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam package {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam usecase {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam activity {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam classAttribute {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam interface {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam class {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam object {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
hide Circle
' end of skinparams '
"""

    def __init__(self, debug=False):
        """
        Constructor
        Args:
            debug(boolean): True if debug information should be shown
        """
        self.debug = debug

    def tableListToPlantUml(
        self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
    ):
        """
        convert tableList to PlantUml notation

        Args:
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist
        """
        uml = ""
        indent = ""
        inherit = ""
        if title is not None:
            uml += "title\n%s\nend title\n" % title
        if packageName is not None:
            uml += "package %s {\n" % packageName
            indent = "  "
        if generalizeTo is not None:
            generalTable = Schema.getGeneral(tableList, generalizeTo)
            for table in tableList:
                inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
            tableList.insert(0, generalTable)
        for table in tableList:
            colUml = ""
            sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
            for col in sortedColumns:
                mandatory = "*" if col["notnull"] == 1 else ""
                pk = "<<PK>>" if col["pk"] == 1 else ""
                colName = col["name"]
                colType = col["type"]
                if "link" in col:
                    colName = col["link"]
                colUml += "%s %s%s : %s %s\n" % (
                    indent,
                    mandatory,
                    colName,
                    colType,
                    pk,
                )
            tableName = table["name"]
            if "notes" in table:
                uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
            uml += "%sclass %s << Entity >> {\n%s%s}\n" % (
                indent,
                tableName,
                colUml,
                indent,
            )
        uml += inherit
        if packageName is not None:
            uml += "}\n"
        if withSkin:
            uml += UML.skinparams
        return uml

    def mergeSchema(
        self,
        schemaManager,
        tableList,
        title=None,
        packageName=None,
        generalizeTo=None,
        withSkin=True,
    ):
        """
        merge Schema and tableList to PlantUml notation

        Args:
            schemaManager(SchemaManager): a schema manager to be used
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist

        """
        if schemaManager is not None:
            for table in tableList:
                if "schema" in table:
                    schema = schemaManager.schemasByName[table["schema"]]
                    url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                    url = url.replace(" ", "_")  # mediawiki
                    instanceNote = ""
                    if "instances" in table:
                        instanceNote = "\n%d instances " % (table["instances"])
                    table["notes"] = """[[%s %s]]%s""" % (
                        url,
                        schema.name,
                        instanceNote,
                    )
                    for col in table["columns"]:
                        colName = col["name"]
                        if colName in schema.propsByName:
                            prop = schema.propsByName[colName]
                            if prop.iri is not None:
                                tooltip = ""
                                if prop.definition is not None:
                                    tooltip = "{%s}" % prop.definition
                                col["link"] = "[[%s%s %s]]" % (
                                    prop.iri,
                                    tooltip,
                                    colName,
                                )
                                col["special"] = True  # keep column even if generalized
                    pass
        plantuml = self.tableListToPlantUml(
            tableList,
            title=title,
            packageName=packageName,
            generalizeTo=generalizeTo,
            withSkin=withSkin,
        )
        return plantuml

__init__(debug=False)

Constructor Args: debug(boolean): True if debug information should be shown

Source code in lodstorage/uml.py
 96
 97
 98
 99
100
101
102
def __init__(self, debug=False):
    """
    Constructor
    Args:
        debug(boolean): True if debug information should be shown
    """
    self.debug = debug

mergeSchema(schemaManager, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)

merge Schema and tableList to PlantUml notation

Parameters:

Name Type Description Default
schemaManager(SchemaManager)

a schema manager to be used

required
tableList(list)

the tableList list of Dicts from getTableList() to convert

required
title(string)

optional title to be added

required
packageName(string)

optional packageName to be added

required
generalizeTo(string)

optional name of a general table to be derived

required
withSkin(boolean)

if True add default BITPlan skin parameters

required

Returns:

Name Type Description
string

the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def mergeSchema(
    self,
    schemaManager,
    tableList,
    title=None,
    packageName=None,
    generalizeTo=None,
    withSkin=True,
):
    """
    merge Schema and tableList to PlantUml notation

    Args:
        schemaManager(SchemaManager): a schema manager to be used
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist

    """
    if schemaManager is not None:
        for table in tableList:
            if "schema" in table:
                schema = schemaManager.schemasByName[table["schema"]]
                url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                url = url.replace(" ", "_")  # mediawiki
                instanceNote = ""
                if "instances" in table:
                    instanceNote = "\n%d instances " % (table["instances"])
                table["notes"] = """[[%s %s]]%s""" % (
                    url,
                    schema.name,
                    instanceNote,
                )
                for col in table["columns"]:
                    colName = col["name"]
                    if colName in schema.propsByName:
                        prop = schema.propsByName[colName]
                        if prop.iri is not None:
                            tooltip = ""
                            if prop.definition is not None:
                                tooltip = "{%s}" % prop.definition
                            col["link"] = "[[%s%s %s]]" % (
                                prop.iri,
                                tooltip,
                                colName,
                            )
                            col["special"] = True  # keep column even if generalized
                pass
    plantuml = self.tableListToPlantUml(
        tableList,
        title=title,
        packageName=packageName,
        generalizeTo=generalizeTo,
        withSkin=withSkin,
    )
    return plantuml

tableListToPlantUml(tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)

convert tableList to PlantUml notation

Parameters:

Name Type Description Default
tableList(list)

the tableList list of Dicts from getTableList() to convert

required
title(string)

optional title to be added

required
packageName(string)

optional packageName to be added

required
generalizeTo(string)

optional name of a general table to be derived

required
withSkin(boolean)

if True add default BITPlan skin parameters

required

Returns:

Name Type Description
string

the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def tableListToPlantUml(
    self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
):
    """
    convert tableList to PlantUml notation

    Args:
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist
    """
    uml = ""
    indent = ""
    inherit = ""
    if title is not None:
        uml += "title\n%s\nend title\n" % title
    if packageName is not None:
        uml += "package %s {\n" % packageName
        indent = "  "
    if generalizeTo is not None:
        generalTable = Schema.getGeneral(tableList, generalizeTo)
        for table in tableList:
            inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
        tableList.insert(0, generalTable)
    for table in tableList:
        colUml = ""
        sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
        for col in sortedColumns:
            mandatory = "*" if col["notnull"] == 1 else ""
            pk = "<<PK>>" if col["pk"] == 1 else ""
            colName = col["name"]
            colType = col["type"]
            if "link" in col:
                colName = col["link"]
            colUml += "%s %s%s : %s %s\n" % (
                indent,
                mandatory,
                colName,
                colType,
                pk,
            )
        tableName = table["name"]
        if "notes" in table:
            uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
        uml += "%sclass %s << Entity >> {\n%s%s}\n" % (
            indent,
            tableName,
            colUml,
            indent,
        )
    uml += inherit
    if packageName is not None:
        uml += "}\n"
    if withSkin:
        uml += UML.skinparams
    return uml

version

Created on 2022-03-06

@author: wf

Version

Bases: object

Version handling for pyLoDStorage

Source code in lodstorage/version.py
 9
10
11
12
13
14
15
16
17
18
class Version(object):
    """
    Version handling for pyLoDStorage
    """

    name = "pylodstorage"
    version = lodstorage.__version__
    date = "2020-09-10"
    updated = "2024-08-02"
    description = "python List of Dict (Table) Storage library"

xml

Created on 2022-06-20

see https://github.com/tyleradams/json-toolkit https://stackoverflow.com/questions/36021526/converting-an-array-dict-to-xml-in-python

@author: tyleradams @author: wf

Lod2Xml

convert a list of dicts to XML

Source code in lodstorage/xml.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class Lod2Xml:
    """
    convert a list of dicts to XML
    """

    def __init__(
        self, lod, root: str = "root", node_name: callable = (lambda x: "node")
    ):
        """
        construct me with the given list of dicts

        Args:
            lod (list): the list of dicts to convert to XML
            root (str): the name of the root nod
            item_name (func): the function to use to calculate node names
        """
        self.lod = lod
        self.root = root
        self.item_name = node_name

    def asXml(self, pretty: bool = True):
        """
        convert result to XML

        Args:
            pretty (bool): if True pretty print the result

        """
        xml = dicttoxml(
            self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
        )
        if pretty:
            dom = parseString(xml)
            prettyXml = dom.toprettyxml()
        else:
            prettyXml = xml
        return prettyXml

__init__(lod, root='root', node_name=lambda x: 'node')

construct me with the given list of dicts

Parameters:

Name Type Description Default
lod list

the list of dicts to convert to XML

required
root str

the name of the root nod

'root'
item_name func

the function to use to calculate node names

required
Source code in lodstorage/xml.py
21
22
23
24
25
26
27
28
29
30
31
32
33
34
def __init__(
    self, lod, root: str = "root", node_name: callable = (lambda x: "node")
):
    """
    construct me with the given list of dicts

    Args:
        lod (list): the list of dicts to convert to XML
        root (str): the name of the root nod
        item_name (func): the function to use to calculate node names
    """
    self.lod = lod
    self.root = root
    self.item_name = node_name

asXml(pretty=True)

convert result to XML

Parameters:

Name Type Description Default
pretty bool

if True pretty print the result

True
Source code in lodstorage/xml.py
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def asXml(self, pretty: bool = True):
    """
    convert result to XML

    Args:
        pretty (bool): if True pretty print the result

    """
    xml = dicttoxml(
        self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
    )
    if pretty:
        dom = parseString(xml)
        prettyXml = dom.toprettyxml()
    else:
        prettyXml = xml
    return prettyXml

yamlable

Created on 2023-12-08, Extended on 2023-16-12 and 2024-01-25

@author: wf, ChatGPT

Prompts for the development and extension of the 'YamlAble' class within the 'yamable' module:

  1. Develop 'YamlAble' class in 'yamable' module. It should convert dataclass instances to/from YAML.
  2. Implement methods for YAML block scalar style and exclude None values in 'YamlAble' class.
  3. Add functionality to remove None values from dataclass instances before YAML conversion.
  4. Ensure 'YamlAble' processes only dataclass instances, with error handling for non-dataclass objects.
  5. Extend 'YamlAble' for JSON serialization and deserialization.
  6. Add methods for saving/loading dataclass instances to/from YAML and JSON files in 'YamlAble'.
  7. Implement loading of dataclass instances from URLs for both YAML and JSON in 'YamlAble'.
  8. Write tests for 'YamlAble' within the pyLodStorage context. Use 'samples 2' example from pyLoDStorage https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample2.py as a reference.
  9. Ensure tests cover YAML/JSON serialization, deserialization, and file I/O operations, using the sample-based approach..
  10. Use Google-style docstrings, comments, and type hints in 'YamlAble' class and tests.
  11. Adhere to instructions and seek clarification for any uncertainties.
  12. Add @lod_storable annotation support that will automatically YamlAble support and add @dataclass and @dataclass_json prerequisite behavior to a class

DateConvert

date converter

Source code in lodstorage/yamlable.py
76
77
78
79
80
81
82
83
84
class DateConvert:
    """
    date converter
    """

    @classmethod
    def iso_date_to_datetime(cls, iso_date: str) -> datetime.date:
        date = datetime.strptime(iso_date, "%Y-%m-%d").date() if iso_date else None
        return date

YamlAble

Bases: Generic[T]

An extended YAML handler class for converting dataclass objects to and from YAML format, and handling loading from and saving to files and URLs.

Source code in lodstorage/yamlable.py
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
class YamlAble(Generic[T]):
    """
    An extended YAML handler class for converting dataclass objects to and from YAML format,
    and handling loading from and saving to files and URLs.
    """

    def _yaml_setup(self):
        """
        Initializes the YamAble handler, setting up custom representers and preparing it for various operations.
        """
        if not is_dataclass(self):
            raise ValueError("I must be a dataclass instance.")
        if not hasattr(self, "_yaml_dumper"):
            self._yaml_dumper = yaml.Dumper
            self._yaml_dumper.ignore_aliases = lambda *_args: True
            self._yaml_dumper.add_representer(type(None), self.represent_none)
            self._yaml_dumper.add_representer(str, self.represent_literal)

    def represent_none(self, _, __) -> yaml.Node:
        """
        Custom representer for ignoring None values in the YAML output.
        """
        return self._yaml_dumper.represent_scalar("tag:yaml.org,2002:null", "")

    def represent_literal(self, dumper: yaml.Dumper, data: str) -> yaml.Node:
        """
        Custom representer for block scalar style for strings.
        """
        if "\n" in data:
            return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
        return dumper.represent_scalar("tag:yaml.org,2002:str", data)

    def to_yaml(
        self,
        ignore_none: bool = True,
        ignore_underscore: bool = True,
        allow_unicode: bool = True,
        sort_keys: bool = False,
    ) -> str:
        """
        Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables,
        and using block scalar style for strings.

        Args:
            ignore_none: Flag to indicate whether None values should be removed from the YAML output.
            ignore_underscore: Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.
            allow_unicode: Flag to indicate whether to allow unicode characters in the output.
            sort_keys: Flag to indicate whether to sort the dictionary keys in the output.

        Returns:
            A string representation of the dataclass object in YAML format.
        """
        obj_dict = asdict(self)
        self._yaml_setup()
        clean_dict = self.remove_ignored_values(
            obj_dict, ignore_none, ignore_underscore
        )
        yaml_str = yaml.dump(
            clean_dict,
            Dumper=self._yaml_dumper,
            default_flow_style=False,
            allow_unicode=allow_unicode,
            sort_keys=sort_keys,
        )
        return yaml_str

    @classmethod
    def from_yaml(cls: Type[T], yaml_str: str) -> T:
        """
        Deserializes a YAML string to a dataclass instance.

        Args:
            yaml_str (str): A string containing YAML formatted data.

        Returns:
            T: An instance of the dataclass.
        """
        data: dict[str, Any] = yaml.safe_load(yaml_str)
        instance: T = cls.from_dict(data)
        return instance

    @classmethod
    def load_from_yaml_file(cls: Type[T], filename: str) -> T:
        """
        Loads a dataclass instance from a YAML file.

        Args:
            filename (str): The path to the YAML file.

        Returns:
            T: An instance of the dataclass.
        """
        with open(filename, "r") as file:
            yaml_str: str = file.read()
        instance: T = cls.from_yaml(yaml_str)
        return instance

    @classmethod
    def load_from_yaml_url(cls: Type[T], url: str) -> T:
        """
        Loads a dataclass instance from a YAML string obtained from a URL.

        Args:
            url (str): The URL pointing to the YAML data.

        Returns:
            T: An instance of the dataclass.
        """
        yaml_str: str = cls.read_from_url(url)
        instance: T = cls.from_yaml(yaml_str)
        return instance

    def save_to_yaml_file(self, filename: str):
        """
        Saves the current dataclass instance to a YAML file.

        Args:
            filename (str): The path where the YAML file will be saved.
        """
        yaml_content: str = self.to_yaml()
        with open(filename, "w", encoding="utf-8") as file:
            file.write(yaml_content)

    @classmethod
    def load_from_json_file(cls: Type[T], filename: Union[str, Path]) -> T:
        """
        Loads a dataclass instance from a JSON file.

        Args:
            filename (str): The path to the JSON file.

        Returns:
            T: An instance of the dataclass.
        """
        with open(filename, "r", encoding="utf-8") as file:
            json_str: str = file.read()
        instance: T = cls.from_json(json_str)
        return instance

    @classmethod
    def load_from_json_url(cls: Type[T], url: str) -> T:
        """
        Loads a dataclass instance from a JSON string obtained from a URL.

        Args:
            url (str): The URL pointing to the JSON data.

        Returns:
            T: An instance of the dataclass.
        """
        json_str: str = cls.read_from_url(url)
        instance: T = cls.from_json(json_str)
        return instance

    def save_to_json_file(self, filename: str, **kwargs):
        """
        Saves the current dataclass instance to a JSON file.

        Args:
            filename (str): The path where the JSON file will be saved.
            **kwargs: Additional keyword arguments for the `to_json` method.
        """
        json_content: str = self.to_json(**kwargs)
        with open(filename, "w", encoding="utf-8") as file:
            file.write(json_content)

    @classmethod
    def read_from_url(cls, url: str) -> str:
        """
        Helper method to fetch content from a URL.
        """
        with urllib.request.urlopen(url) as response:
            if response.status == 200:
                return response.read().decode()
            else:
                raise Exception(f"Unable to load data from URL: {url}")

    @classmethod
    def remove_ignored_values(
        cls,
        value: Any,
        ignore_none: bool = True,
        ignore_underscore: bool = False,
        ignore_empty: bool = True,
    ) -> Any:
        """
        Recursively removes specified types of values from a dictionary or list.
        By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

        Args:
            value: The value to process (dictionary, list, or other).
            ignore_none: Flag to indicate whether None values should be removed.
            ignore_underscore: Flag to indicate whether keys starting with an underscore should be removed.
            ignore_empty: Flag to indicate whether empty collections should be removed.
        """

        def is_valid(v):
            """Check if the value is valid based on the specified flags."""
            if ignore_none and v is None:
                return False
            if ignore_empty:
                if isinstance(v, Mapping) and not v:
                    return False  # Empty dictionary
                if (
                    isinstance(v, Iterable)
                    and not isinstance(v, (str, bytes))
                    and not v
                ):
                    return (
                        False  # Empty list, set, tuple, etc., but not string or bytes
                    )
            return True

        if isinstance(value, Mapping):
            value = {
                k: YamlAble.remove_ignored_values(
                    v, ignore_none, ignore_underscore, ignore_empty
                )
                for k, v in value.items()
                if is_valid(v) and (not ignore_underscore or not k.startswith("_"))
            }
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            value = [
                YamlAble.remove_ignored_values(
                    v, ignore_none, ignore_underscore, ignore_empty
                )
                for v in value
                if is_valid(v)
            ]
        return value

    @classmethod
    def from_dict2(cls: Type[T], data: dict) -> T:
        """
        Creates an instance of a dataclass from a dictionary, typically used in deserialization.
        """
        if not data:
            return None
        instance = from_dict(data_class=cls, data=data)
        return instance

from_dict2(data) classmethod

Creates an instance of a dataclass from a dictionary, typically used in deserialization.

Source code in lodstorage/yamlable.py
318
319
320
321
322
323
324
325
326
@classmethod
def from_dict2(cls: Type[T], data: dict) -> T:
    """
    Creates an instance of a dataclass from a dictionary, typically used in deserialization.
    """
    if not data:
        return None
    instance = from_dict(data_class=cls, data=data)
    return instance

from_yaml(yaml_str) classmethod

Deserializes a YAML string to a dataclass instance.

Parameters:

Name Type Description Default
yaml_str str

A string containing YAML formatted data.

required

Returns:

Name Type Description
T T

An instance of the dataclass.

Source code in lodstorage/yamlable.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
@classmethod
def from_yaml(cls: Type[T], yaml_str: str) -> T:
    """
    Deserializes a YAML string to a dataclass instance.

    Args:
        yaml_str (str): A string containing YAML formatted data.

    Returns:
        T: An instance of the dataclass.
    """
    data: dict[str, Any] = yaml.safe_load(yaml_str)
    instance: T = cls.from_dict(data)
    return instance

load_from_json_file(filename) classmethod

Loads a dataclass instance from a JSON file.

Parameters:

Name Type Description Default
filename str

The path to the JSON file.

required

Returns:

Name Type Description
T T

An instance of the dataclass.

Source code in lodstorage/yamlable.py
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
@classmethod
def load_from_json_file(cls: Type[T], filename: Union[str, Path]) -> T:
    """
    Loads a dataclass instance from a JSON file.

    Args:
        filename (str): The path to the JSON file.

    Returns:
        T: An instance of the dataclass.
    """
    with open(filename, "r", encoding="utf-8") as file:
        json_str: str = file.read()
    instance: T = cls.from_json(json_str)
    return instance

load_from_json_url(url) classmethod

Loads a dataclass instance from a JSON string obtained from a URL.

Parameters:

Name Type Description Default
url str

The URL pointing to the JSON data.

required

Returns:

Name Type Description
T T

An instance of the dataclass.

Source code in lodstorage/yamlable.py
226
227
228
229
230
231
232
233
234
235
236
237
238
239
@classmethod
def load_from_json_url(cls: Type[T], url: str) -> T:
    """
    Loads a dataclass instance from a JSON string obtained from a URL.

    Args:
        url (str): The URL pointing to the JSON data.

    Returns:
        T: An instance of the dataclass.
    """
    json_str: str = cls.read_from_url(url)
    instance: T = cls.from_json(json_str)
    return instance

load_from_yaml_file(filename) classmethod

Loads a dataclass instance from a YAML file.

Parameters:

Name Type Description Default
filename str

The path to the YAML file.

required

Returns:

Name Type Description
T T

An instance of the dataclass.

Source code in lodstorage/yamlable.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
@classmethod
def load_from_yaml_file(cls: Type[T], filename: str) -> T:
    """
    Loads a dataclass instance from a YAML file.

    Args:
        filename (str): The path to the YAML file.

    Returns:
        T: An instance of the dataclass.
    """
    with open(filename, "r") as file:
        yaml_str: str = file.read()
    instance: T = cls.from_yaml(yaml_str)
    return instance

load_from_yaml_url(url) classmethod

Loads a dataclass instance from a YAML string obtained from a URL.

Parameters:

Name Type Description Default
url str

The URL pointing to the YAML data.

required

Returns:

Name Type Description
T T

An instance of the dataclass.

Source code in lodstorage/yamlable.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
@classmethod
def load_from_yaml_url(cls: Type[T], url: str) -> T:
    """
    Loads a dataclass instance from a YAML string obtained from a URL.

    Args:
        url (str): The URL pointing to the YAML data.

    Returns:
        T: An instance of the dataclass.
    """
    yaml_str: str = cls.read_from_url(url)
    instance: T = cls.from_yaml(yaml_str)
    return instance

read_from_url(url) classmethod

Helper method to fetch content from a URL.

Source code in lodstorage/yamlable.py
253
254
255
256
257
258
259
260
261
262
@classmethod
def read_from_url(cls, url: str) -> str:
    """
    Helper method to fetch content from a URL.
    """
    with urllib.request.urlopen(url) as response:
        if response.status == 200:
            return response.read().decode()
        else:
            raise Exception(f"Unable to load data from URL: {url}")

remove_ignored_values(value, ignore_none=True, ignore_underscore=False, ignore_empty=True) classmethod

Recursively removes specified types of values from a dictionary or list. By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

Parameters:

Name Type Description Default
value Any

The value to process (dictionary, list, or other).

required
ignore_none bool

Flag to indicate whether None values should be removed.

True
ignore_underscore bool

Flag to indicate whether keys starting with an underscore should be removed.

False
ignore_empty bool

Flag to indicate whether empty collections should be removed.

True
Source code in lodstorage/yamlable.py
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
@classmethod
def remove_ignored_values(
    cls,
    value: Any,
    ignore_none: bool = True,
    ignore_underscore: bool = False,
    ignore_empty: bool = True,
) -> Any:
    """
    Recursively removes specified types of values from a dictionary or list.
    By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

    Args:
        value: The value to process (dictionary, list, or other).
        ignore_none: Flag to indicate whether None values should be removed.
        ignore_underscore: Flag to indicate whether keys starting with an underscore should be removed.
        ignore_empty: Flag to indicate whether empty collections should be removed.
    """

    def is_valid(v):
        """Check if the value is valid based on the specified flags."""
        if ignore_none and v is None:
            return False
        if ignore_empty:
            if isinstance(v, Mapping) and not v:
                return False  # Empty dictionary
            if (
                isinstance(v, Iterable)
                and not isinstance(v, (str, bytes))
                and not v
            ):
                return (
                    False  # Empty list, set, tuple, etc., but not string or bytes
                )
        return True

    if isinstance(value, Mapping):
        value = {
            k: YamlAble.remove_ignored_values(
                v, ignore_none, ignore_underscore, ignore_empty
            )
            for k, v in value.items()
            if is_valid(v) and (not ignore_underscore or not k.startswith("_"))
        }
    elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
        value = [
            YamlAble.remove_ignored_values(
                v, ignore_none, ignore_underscore, ignore_empty
            )
            for v in value
            if is_valid(v)
        ]
    return value

represent_literal(dumper, data)

Custom representer for block scalar style for strings.

Source code in lodstorage/yamlable.py
111
112
113
114
115
116
117
def represent_literal(self, dumper: yaml.Dumper, data: str) -> yaml.Node:
    """
    Custom representer for block scalar style for strings.
    """
    if "\n" in data:
        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
    return dumper.represent_scalar("tag:yaml.org,2002:str", data)

represent_none(_, __)

Custom representer for ignoring None values in the YAML output.

Source code in lodstorage/yamlable.py
105
106
107
108
109
def represent_none(self, _, __) -> yaml.Node:
    """
    Custom representer for ignoring None values in the YAML output.
    """
    return self._yaml_dumper.represent_scalar("tag:yaml.org,2002:null", "")

save_to_json_file(filename, **kwargs)

Saves the current dataclass instance to a JSON file.

Parameters:

Name Type Description Default
filename str

The path where the JSON file will be saved.

required
**kwargs

Additional keyword arguments for the to_json method.

{}
Source code in lodstorage/yamlable.py
241
242
243
244
245
246
247
248
249
250
251
def save_to_json_file(self, filename: str, **kwargs):
    """
    Saves the current dataclass instance to a JSON file.

    Args:
        filename (str): The path where the JSON file will be saved.
        **kwargs: Additional keyword arguments for the `to_json` method.
    """
    json_content: str = self.to_json(**kwargs)
    with open(filename, "w", encoding="utf-8") as file:
        file.write(json_content)

save_to_yaml_file(filename)

Saves the current dataclass instance to a YAML file.

Parameters:

Name Type Description Default
filename str

The path where the YAML file will be saved.

required
Source code in lodstorage/yamlable.py
199
200
201
202
203
204
205
206
207
208
def save_to_yaml_file(self, filename: str):
    """
    Saves the current dataclass instance to a YAML file.

    Args:
        filename (str): The path where the YAML file will be saved.
    """
    yaml_content: str = self.to_yaml()
    with open(filename, "w", encoding="utf-8") as file:
        file.write(yaml_content)

to_yaml(ignore_none=True, ignore_underscore=True, allow_unicode=True, sort_keys=False)

Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables, and using block scalar style for strings.

Parameters:

Name Type Description Default
ignore_none bool

Flag to indicate whether None values should be removed from the YAML output.

True
ignore_underscore bool

Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.

True
allow_unicode bool

Flag to indicate whether to allow unicode characters in the output.

True
sort_keys bool

Flag to indicate whether to sort the dictionary keys in the output.

False

Returns:

Type Description
str

A string representation of the dataclass object in YAML format.

Source code in lodstorage/yamlable.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
def to_yaml(
    self,
    ignore_none: bool = True,
    ignore_underscore: bool = True,
    allow_unicode: bool = True,
    sort_keys: bool = False,
) -> str:
    """
    Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables,
    and using block scalar style for strings.

    Args:
        ignore_none: Flag to indicate whether None values should be removed from the YAML output.
        ignore_underscore: Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.
        allow_unicode: Flag to indicate whether to allow unicode characters in the output.
        sort_keys: Flag to indicate whether to sort the dictionary keys in the output.

    Returns:
        A string representation of the dataclass object in YAML format.
    """
    obj_dict = asdict(self)
    self._yaml_setup()
    clean_dict = self.remove_ignored_values(
        obj_dict, ignore_none, ignore_underscore
    )
    yaml_str = yaml.dump(
        clean_dict,
        Dumper=self._yaml_dumper,
        default_flow_style=False,
        allow_unicode=allow_unicode,
        sort_keys=sort_keys,
    )
    return yaml_str

lod_storable(cls)

Decorator to make a class LoDStorable by inheriting from YamlAble. This decorator also ensures the class is a dataclass and has JSON serialization/deserialization capabilities.

Source code in lodstorage/yamlable.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def lod_storable(cls):
    """
    Decorator to make a class LoDStorable by
    inheriting from YamlAble.
    This decorator also ensures the class is a
    dataclass and has JSON serialization/deserialization
    capabilities.
    """
    cls = dataclass(cls)  # Apply the @dataclass decorator
    cls = dataclass_json(cls)  # Apply the @dataclass_json decorator

    class LoDStorable(YamlAble, cls):
        """
        decorator class
        """

        __qualname__ = cls.__qualname__
        pass

    LoDStorable.__name__ = cls.__name__
    LoDStorable.__doc__ = cls.__doc__

    return LoDStorable

yamlablemixin

YamlAbleMixin

Bases: object

allow reading and writing derived objects from a yaml file

Source code in lodstorage/yamlablemixin.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class YamlAbleMixin(object):
    """allow reading and writing derived objects from a yaml file"""

    debug = False

    # read me from a yaml file
    @staticmethod
    def readYaml(name):
        yamlFile = name
        if not yamlFile.endswith(".yaml"):
            yamlFile = yamlFile + ".yaml"
        # is there a yamlFile for the given name
        if os.path.isfile(yamlFile):
            with io.open(yamlFile, "r") as stream:
                if YamlAbleMixin.debug:
                    print("reading %s" % (yamlFile))
                result = yaml.load(stream, Loader=yaml.Loader)
                if YamlAbleMixin.debug:
                    print(result)
                return result
        else:
            return None

    # write me to my yaml file
    def writeYaml(self, name):
        yamlFile = name
        if not yamlFile.endswith(".yaml"):
            yamlFile = yamlFile + ".yaml"
        with io.open(yamlFile, "w", encoding="utf-8") as stream:
            yaml.dump(self, stream)
            if YamlAbleMixin.debug:
                print(yaml.dump(self))