pyLodStorage API Documentation

`cache`

Created on 2024-03-09

@author: wf

refactored from https://github.com/WolfgangFahl/pyCEURmake/blob/main/ceurws/utils/json_cache.py by Tim Holzheim

`Cache`

Represents cache metadata and its file extension.

Attributes:

Name	Type	Description
`name`	`str`	The name of the cache.
`extension`	`str`	The file extension for the cache (e.g., 'json', 'csv').
`size`	`int`	The size of the cache file in bytes.
`count`	`Optional[int]`	Optional; the number of items in the cache, if applicable.
`count_attr`	`str`	the name of the attribute to determine the number of items, if applicable
`last_accessed`	`datetime`	Optional; the last accessed timestamp of the cache.

Source code in lodstorage/cache.py

@lod_storable
class Cache:
    """
    Represents cache metadata and its file extension.

    Attributes:
        name: The name of the cache.
        extension: The file extension for the cache (e.g., 'json', 'csv').
        size: The size of the cache file in bytes.
        count: Optional; the number of items in the cache, if applicable.
        count_attr: the name of the attribute to determine the number of items, if applicable
        last_accessed: Optional; the last accessed timestamp of the cache.
    """

    name: str
    extension: str
    count_attr: str = None
    count: Optional[int] = None

    def set_path(self, base_path: str):
        """
        Set my path based on the given base_path and ensure the parent directory is created.

        Args:
            base_path (str): The base path where the directory should be created.
        """
        self.path = Path(f"{base_path}/{self.name}{self.extension}")
        # Ensure parent directory is created
        self.path.parent.mkdir(parents=True, exist_ok=True)

    @property
    def is_stored(self) -> bool:
        """Determines if the cache file exists and is not empty."""
        return self.path.is_file() and self.path.stat().st_size > 1

    @property
    def size(self) -> int:
        cache_size = os.path.getsize(self.path) if os.path.isfile(self.path) else 0
        return cache_size

    @property
    def last_accessed(self) -> datetime:
        cache_last_accessed = (
            datetime.fromtimestamp(os.path.getmtime(self.path))
            if os.path.isfile(self.path)
            else None
        )
        return cache_last_accessed

`is_stored: bool` `property`

Determines if the cache file exists and is not empty.

`set_path(base_path)`

Set my path based on the given base_path and ensure the parent directory is created.

Parameters:

Name	Type	Description	Default
`base_path`	`str`	The base path where the directory should be created.	required

Source code in lodstorage/cache.py

def set_path(self, base_path: str):
    """
    Set my path based on the given base_path and ensure the parent directory is created.

    Args:
        base_path (str): The base path where the directory should be created.
    """
    self.path = Path(f"{base_path}/{self.name}{self.extension}")
    # Ensure parent directory is created
    self.path.parent.mkdir(parents=True, exist_ok=True)

`CacheManager`

Manages multiple cache files with various extensions.

Attributes:

Name	Type	Description
`name`	`str`	The name used for the base directory where cache files are stored.
`caches`	`Dict[str, Cache]`	A dictionary to track each cache's metadata.

Source code in lodstorage/cache.py

@lod_storable
class CacheManager:
    """Manages multiple cache files with various extensions.

    Attributes:
        name: The name used for the base directory where cache files are stored.
        caches: A dictionary to track each cache's metadata.
    """

    name: str
    caches: Dict[str, Cache] = field(default_factory=dict)

    def __post_init__(self):
        self.base_dir = None

    def base_path(self) -> str:
        """Fetches the base path for this cache manager.

        Args:
            cache: The cache for which to generate the file path.

        Returns:
            The base path
        """
        if self.base_dir is None:
            self.base_dir = os.path.expanduser("~")
        base_path = os.path.join(self.base_dir, f".{self.name}")
        os.makedirs(base_path, exist_ok=True)
        return base_path

    def get_cache_by_name(self, lod_name, ext=".json") -> Cache:
        """
        Retrieves or creates a cache object by name and extension.

        Args:
            cache_name (str): The name of the cache to retrieve or create.
            ext (str): The file extension for the cache.

        Returns:
            Cache: An existing or newly created Cache object.
        """
        if lod_name in self.caches:
            cache = self.caches[lod_name + ext]
        else:
            cache = Cache(lod_name, ext)
            self.caches[lod_name + ext] = cache
        base_path = self.base_path()
        cache.set_path(base_path)
        return cache

    def load(
        self,
        lod_name: str,
        ext: str = ".json",
        cls: Optional[Type[YamlAble]] = None,
        count_attr: str = None,
    ) -> Union[List, Dict, None]:
        """
        Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

        Args:
            lod_name (str): The name of the list of dicts or class instances to read from cache.
            ext (str): The extension of the cache file, indicating the format (default is ".json").
            cls (Optional[Type[YamlAble]]): The class type for deserialization. This class must have from_json() or from_yaml()
                                             class methods for deserialization, depending on the file extension.
            count_attr(str): the name of attribute data_to_store for updating the cache.count s
        Returns:
            Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.
        """
        cache = self.get_cache_by_name(lod_name, ext)
        cache.count_attr = count_attr
        result = None
        if cache.is_stored:
            if ext == ".json":
                if cls and hasattr(cls, "load_from_yaml_file"):
                    result = cls.load_from_json_file(
                        cache.path
                    )  # Adjusted for class method
                else:
                    with open(cache.path, encoding="utf-8") as json_file:
                        result = orjson.loads(json_file.read())
            elif ext == ".yaml":
                if cls and hasattr(cls, "load_from_yaml_file"):
                    result = cls.load_from_yaml_file(
                        cache.path
                    )  # Adjusted for class method
                else:
                    raise ValueError(
                        "YAML deserialization requires a cls parameter that is a subclass of YamlAble."
                    )
            else:
                raise ValueError(f"Unsupported file extension {ext} for loading.")

            # Dynamic count update based on count_attr if applicable
            if count_attr and hasattr(result, count_attr):
                cache.count = len(getattr(result, count_attr))
            elif isinstance(result, list):
                cache.count = len(result)

        return result

    def store(
        self,
        cache_name: str,
        data_to_store: Union[List, Dict],
        ext: str = ".json",
        count_attr: str = None,
    ) -> Cache:
        """
        Stores data into a cache file, handling serialization based on the specified file extension.
        Supports JSON and YAML formats, and custom serialization for classes that provide specific
        serialization methods.

        Args:
            cache_name (str): The identifier for the cache where the data will be stored.
            data_to_store (Union[List, Dict]): The data to be stored in the cache. This can be a list of dictionaries,
                                               a single dictionary, or instances of data classes if `cls` is provided.
            ext (str): The file extension indicating the serialization format (e.g., '.json', '.yaml').
                       Defaults to '.json'.
            count_attr(str): the name of attribute data_to_store for updating the cache.count s

        Raises:
            ValueError: If the file extension is unsupported or if required methods for serialization are not implemented in `cls`.
        """
        cache = self.get_cache_by_name(cache_name, ext)
        cache.count_attr = count_attr
        cache.set_path(self.base_path())

        if ext == ".json":
            # Check if  cls has a method `save_to_json_file`
            # that accepts a file path and data to store
            if isinstance(data_to_store, list):
                json_str = orjson.dumps(data_to_store, option=orjson.OPT_INDENT_2)
                with cache.path.open("wb") as json_file:
                    json_file.write(json_str)
            else:
                if hasattr(data_to_store, "save_to_json_file"):
                    data_to_store.save_to_json_file(str(cache.path))
                else:
                    raise ValueError(
                        "JSON serialization requires a 'save_to_json_file' method"
                    )
        elif ext == ".yaml":
            if hasattr(data_to_store, "save_to_yaml_file"):
                # Assuming cls has a method `save_to_yaml_file` that accepts a file path and data to store
                data_to_store.save_to_yaml_file(str(cache.path))
            else:
                raise ValueError(
                    "YAML serialization requires a 'save_to_yaml_file' method."
                )
        else:
            raise ValueError(f"Unsupported file extension {ext}.")

        # Update cache metadata post storing
        if count_attr and hasattr(data_to_store, count_attr):
            cache.count = len(getattr(data_to_store, count_attr))
        elif isinstance(data_to_store, list):
            cache.count = len(data_to_store)

        return cache

`base_path()`

Fetches the base path for this cache manager.

Parameters:

Name	Type	Description	Default
`cache`		The cache for which to generate the file path.	required

Returns:

Type	Description
`str`	The base path

Source code in lodstorage/cache.py

def base_path(self) -> str:
    """Fetches the base path for this cache manager.

    Args:
        cache: The cache for which to generate the file path.

    Returns:
        The base path
    """
    if self.base_dir is None:
        self.base_dir = os.path.expanduser("~")
    base_path = os.path.join(self.base_dir, f".{self.name}")
    os.makedirs(base_path, exist_ok=True)
    return base_path

`get_cache_by_name(lod_name, ext='.json')`

Retrieves or creates a cache object by name and extension.

Parameters:

Name	Type	Description	Default
`cache_name`	`str`	The name of the cache to retrieve or create.	required
`ext`	`str`	The file extension for the cache.	`'.json'`

Returns:

Name	Type	Description
`Cache`	`Cache`	An existing or newly created Cache object.

Source code in lodstorage/cache.py

def get_cache_by_name(self, lod_name, ext=".json") -> Cache:
    """
    Retrieves or creates a cache object by name and extension.

    Args:
        cache_name (str): The name of the cache to retrieve or create.
        ext (str): The file extension for the cache.

    Returns:
        Cache: An existing or newly created Cache object.
    """
    if lod_name in self.caches:
        cache = self.caches[lod_name + ext]
    else:
        cache = Cache(lod_name, ext)
        self.caches[lod_name + ext] = cache
    base_path = self.base_path()
    cache.set_path(base_path)
    return cache

`load(lod_name, ext='.json', cls=None, count_attr=None)`

Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

Parameters:

Name	Type	Description	Default
`lod_name`	`str`	The name of the list of dicts or class instances to read from cache.	required
`ext`	`str`	The extension of the cache file, indicating the format (default is ".json").	`'.json'`
`cls`	`Optional[Type[YamlAble]]`	The class type for deserialization. This class must have from_json() or from_yaml() class methods for deserialization, depending on the file extension.	`None`
`count_attr(str)`		the name of attribute data_to_store for updating the cache.count s	required

Returns: Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.

Source code in lodstorage/cache.py

def load(
    self,
    lod_name: str,
    ext: str = ".json",
    cls: Optional[Type[YamlAble]] = None,
    count_attr: str = None,
) -> Union[List, Dict, None]:
    """
    Load data from a cache file. This method supports JSON and, if a relevant class is provided, other formats like YAML.

    Args:
        lod_name (str): The name of the list of dicts or class instances to read from cache.
        ext (str): The extension of the cache file, indicating the format (default is ".json").
        cls (Optional[Type[YamlAble]]): The class type for deserialization. This class must have from_json() or from_yaml()
                                         class methods for deserialization, depending on the file extension.
        count_attr(str): the name of attribute data_to_store for updating the cache.count s
    Returns:
        Union[List, Dict, None]: A list of dicts, a list of class instances, a single dict, or None if the cache is not stored.
    """
    cache = self.get_cache_by_name(lod_name, ext)
    cache.count_attr = count_attr
    result = None
    if cache.is_stored:
        if ext == ".json":
            if cls and hasattr(cls, "load_from_yaml_file"):
                result = cls.load_from_json_file(
                    cache.path
                )  # Adjusted for class method
            else:
                with open(cache.path, encoding="utf-8") as json_file:
                    result = orjson.loads(json_file.read())
        elif ext == ".yaml":
            if cls and hasattr(cls, "load_from_yaml_file"):
                result = cls.load_from_yaml_file(
                    cache.path
                )  # Adjusted for class method
            else:
                raise ValueError(
                    "YAML deserialization requires a cls parameter that is a subclass of YamlAble."
                )
        else:
            raise ValueError(f"Unsupported file extension {ext} for loading.")

        # Dynamic count update based on count_attr if applicable
        if count_attr and hasattr(result, count_attr):
            cache.count = len(getattr(result, count_attr))
        elif isinstance(result, list):
            cache.count = len(result)

    return result

`store(cache_name, data_to_store, ext='.json', count_attr=None)`

Stores data into a cache file, handling serialization based on the specified file extension. Supports JSON and YAML formats, and custom serialization for classes that provide specific serialization methods.

Parameters:

Name	Type	Description	Default
`cache_name`	`str`	The identifier for the cache where the data will be stored.	required
`data_to_store`	`Union[List, Dict]`	The data to be stored in the cache. This can be a list of dictionaries, a single dictionary, or instances of data classes if `cls` is provided.	required
`ext`	`str`	The file extension indicating the serialization format (e.g., '.json', '.yaml'). Defaults to '.json'.	`'.json'`
`count_attr(str)`		the name of attribute data_to_store for updating the cache.count s	required

Raises:

Type	Description
`ValueError`	If the file extension is unsupported or if required methods for serialization are not implemented in `cls`.

Source code in lodstorage/cache.py

def store(
    self,
    cache_name: str,
    data_to_store: Union[List, Dict],
    ext: str = ".json",
    count_attr: str = None,
) -> Cache:
    """
    Stores data into a cache file, handling serialization based on the specified file extension.
    Supports JSON and YAML formats, and custom serialization for classes that provide specific
    serialization methods.

    Args:
        cache_name (str): The identifier for the cache where the data will be stored.
        data_to_store (Union[List, Dict]): The data to be stored in the cache. This can be a list of dictionaries,
                                           a single dictionary, or instances of data classes if `cls` is provided.
        ext (str): The file extension indicating the serialization format (e.g., '.json', '.yaml').
                   Defaults to '.json'.
        count_attr(str): the name of attribute data_to_store for updating the cache.count s

    Raises:
        ValueError: If the file extension is unsupported or if required methods for serialization are not implemented in `cls`.
    """
    cache = self.get_cache_by_name(cache_name, ext)
    cache.count_attr = count_attr
    cache.set_path(self.base_path())

    if ext == ".json":
        # Check if  cls has a method `save_to_json_file`
        # that accepts a file path and data to store
        if isinstance(data_to_store, list):
            json_str = orjson.dumps(data_to_store, option=orjson.OPT_INDENT_2)
            with cache.path.open("wb") as json_file:
                json_file.write(json_str)
        else:
            if hasattr(data_to_store, "save_to_json_file"):
                data_to_store.save_to_json_file(str(cache.path))
            else:
                raise ValueError(
                    "JSON serialization requires a 'save_to_json_file' method"
                )
    elif ext == ".yaml":
        if hasattr(data_to_store, "save_to_yaml_file"):
            # Assuming cls has a method `save_to_yaml_file` that accepts a file path and data to store
            data_to_store.save_to_yaml_file(str(cache.path))
        else:
            raise ValueError(
                "YAML serialization requires a 'save_to_yaml_file' method."
            )
    else:
        raise ValueError(f"Unsupported file extension {ext}.")

    # Update cache metadata post storing
    if count_attr and hasattr(data_to_store, count_attr):
        cache.count = len(getattr(data_to_store, count_attr))
    elif isinstance(data_to_store, list):
        cache.count = len(data_to_store)

    return cache

`docstring_parser`

Created on 2024-01-21

@author: wf

`DocstringParser`

A Python docstring parser.

Source code in lodstorage/docstring_parser.py

class DocstringParser:
    """
    A Python docstring parser.
    """

    def __init__(self):
        # Define basic elements
        identifier = Word(alphas, alphanums + "_")
        type_identifier = Word(alphas, alphanums + "_.[]")
        description = restOfLine

        # Define patterns for capturing attributes
        attribute_start = Suppress(Literal("Attributes:"))
        self.attribute = Group(
            identifier("name")
            + Suppress("(")
            + Optional(type_identifier("type"))
            + Suppress("):")
            + description("description")
        )

        # Define pattern for class docstring
        class_docstring = restOfLine("class_description") + Optional(
            attribute_start + OneOrMore(self.attribute)("attributes")
        )

        # Updated class_docstring pattern to correctly handle multi-line class descriptions
        self.class_docstring = class_docstring + Optional(
            OneOrMore(~attribute_start + restOfLine)("class_description")
            + attribute_start
            + OneOrMore(self.attribute)("attributes")
        )

    def parse(self, docstring: str):
        """
        Parse the given docstring.
        """
        result = self.class_docstring.parseString(docstring, parseAll=True)
        class_description = " ".join(result.class_description).strip()
        attributes = {
            attr.name: {"type": attr.type, "description": attr.description.strip()}
            for attr in result.attributes
        }
        return class_description, attributes

`parse(docstring)`

Parse the given docstring.

Source code in lodstorage/docstring_parser.py

def parse(self, docstring: str):
    """
    Parse the given docstring.
    """
    result = self.class_docstring.parseString(docstring, parseAll=True)
    class_description = " ".join(result.class_description).strip()
    attributes = {
        attr.name: {"type": attr.type, "description": attr.description.strip()}
        for attr in result.attributes
    }
    return class_description, attributes

`entity`

Created on 2020-08-19

@author: wf

`EntityManager`

Bases: YamlAbleMixin, JsonPickleMixin, JSONAbleList

generic entity manager

Source code in lodstorage/entity.py

class EntityManager(YamlAbleMixin, JsonPickleMixin, JSONAbleList):
    """
    generic entity manager
    """

    def __init__(
        self,
        name,
        entityName,
        entityPluralName: str,
        listName: str = None,
        clazz=None,
        tableName: str = None,
        primaryKey: str = None,
        config=None,
        handleInvalidListTypes=False,
        filterInvalidListTypes=False,
        listSeparator="⇹",
        debug=False,
    ):
        """
        Constructor

        Args:
            name(string): name of this eventManager
            entityName(string): entityType to be managed e.g. Country
            entityPluralName(string): plural of the the entityType e.g. Countries
            config(StorageConfig): the configuration to be used if None a default configuration will be used
            handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
            filterInvalidListTypes(bool): True if invalidListTypes should be deleted
            listSeparator(str): the symbol to use as a list separator
            debug(boolean): override debug setting when default of config is used via config=None
        """
        self.name = name
        self.entityName = entityName
        self.entityPluralName = entityPluralName
        if listName is None:
            listName = entityPluralName
        if tableName is None:
            tableName = entityName
        self.primaryKey = primaryKey
        if config is None:
            config = StorageConfig.getDefault()
            if debug:
                config.debug = debug
        self.config = config
        super(EntityManager, self).__init__(
            listName=listName,
            clazz=clazz,
            tableName=tableName,
            handleInvalidListTypes=handleInvalidListTypes,
            filterInvalidListTypes=filterInvalidListTypes,
        )
        cacheFile = self.getCacheFile(config=config, mode=config.mode)
        self.showProgress(
            "Creating %smanager(%s) for %s using cache %s"
            % (self.entityName, config.mode, self.name, cacheFile)
        )
        if config.mode is StoreMode.SPARQL:
            if config.endpoint is None:
                raise Exception("no endpoint set for mode sparql")
            self.endpoint = config.endpoint
            self.sparql = SPARQL(
                config.endpoint, debug=config.debug, profile=config.profile
            )
        elif config.mode is StoreMode.SQL:
            self.executeMany = False  # may be True when issues are fixed
        self.listSeparator = listSeparator

    def storeMode(self):
        """
        return my store mode
        """
        return self.config.mode

    def showProgress(self, msg):
        """display a progress message

        Args:
          msg(string): the message to display
        """
        if self.config.withShowProgress:
            print(msg, flush=True)

    def getCacheFile(self, config=None, mode=StoreMode.SQL):
        """
        get the cache file for this event manager
        Args:
            config(StorageConfig): if None get the cache for my mode
            mode(StoreMode): the storeMode to use
        """
        if config is None:
            config = self.config
        cachedir = config.getCachePath()
        if config.cacheFile is not None:
            return config.cacheFile
        """ get the path to the file for my cached data """
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            extension = f".{mode.name.lower()}"
            cachepath = f"{cachedir}/{self.name}-{self.listName}{extension}"
        elif mode is StoreMode.SPARQL:
            cachepath = f"SPAQRL {self.name}:{config.endpoint}"
        elif mode is StoreMode.SQL:
            cachepath = f"{cachedir}/{self.name}.db"
        else:
            cachepath = f"undefined cachepath for StoreMode {mode}"
        return cachepath

    def removeCacheFile(self):
        """remove my cache file"""
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            cacheFile = self.getCacheFile(mode=mode)
            if os.path.isfile(cacheFile):
                os.remove(cacheFile)

    def getSQLDB(self, cacheFile):
        """
        get the SQL database for the given cacheFile

        Args:
            cacheFile(string): the file to get the SQL db from
        """
        config = self.config
        sqldb = self.sqldb = SQLDB(
            cacheFile, debug=config.debug, errorDebug=config.errorDebug
        )
        return sqldb

    def initSQLDB(
        self,
        sqldb,
        listOfDicts=None,
        withCreate: bool = True,
        withDrop: bool = True,
        sampleRecordCount=-1,
    ):
        """
        initialize my sql DB

        Args:
            listOfDicts(list): the list of dicts to analyze for type information
            withDrop(boolean): true if the existing Table should be dropped
            withCreate(boolean): true if the create Table command should be executed - false if only the entityInfo should be returned
            sampleRecordCount(int): the number of records to analyze for type information
        Return:
            EntityInfo: the entity information such as CREATE Table command
        """
        if listOfDicts is None:
            listOfDicts = JSONAble.getJsonTypeSamplesForClass(self.clazz)
        entityInfo = sqldb.createTable(
            listOfDicts,
            self.tableName,
            primaryKey=self.primaryKey,
            withCreate=withCreate,
            withDrop=withDrop,
            sampleRecordCount=sampleRecordCount,
        )
        return entityInfo

    def setNone(self, record, fields):
        """
        make sure the given fields in the given record are set to none
        Args:
            record(dict): the record to work on
            fields(list): the list of fields to set to None
        """
        LOD.setNone(record, fields)

    def isCached(self):
        """check whether there is a file containing cached
        data for me"""
        result = False
        config = self.config
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            result = os.path.isfile(self.getCacheFile(config=self.config, mode=mode))
        elif mode is StoreMode.SPARQL:
            # @FIXME - make abstract
            query = (
                config.prefix
                + """
SELECT  ?source (COUNT(?source) AS ?sourcecount)
WHERE { 
   ?event cr:Event_source ?source.
}
GROUP by ?source
"""
            )
            sourceCountList = self.sparql.queryAsListOfDicts(query)
            for sourceCount in sourceCountList:
                source = sourceCount["source"]
                recordCount = sourceCount["sourcecount"]
                if source == self.name and recordCount > 100:
                    result = True
        elif mode is StoreMode.SQL:
            cacheFile = self.getCacheFile(config=self.config, mode=StoreMode.SQL)
            if os.path.isfile(cacheFile):
                sqlQuery = f"SELECT COUNT(*) AS count FROM {self.tableName}"
                try:
                    sqlDB = self.getSQLDB(cacheFile)
                    countResults = sqlDB.query(sqlQuery)
                    countResult = countResults[0]
                    count = countResult["count"]
                    result = count >= 0
                except Exception as ex:
                    msg = str(ex)
                    if self.debug:
                        print(msg, file=sys.stderr)
                        sys.stderr.flush()
                    # e.g. sqlite3.OperationalError: no such table: Event_crossref
                    pass
        else:
            raise Exception("unsupported mode %s" % self.mode)
        return result

    def fromCache(
        self,
        force: bool = False,
        getListOfDicts=None,
        append=False,
        sampleRecordCount=-1,
    ):
        """
        get my entries from the cache or from the callback provided

        Args:
            force(bool): force ignoring the cache
            getListOfDicts(callable): a function to call for getting the data
            append(bool): True if records should be appended
            sampleRecordCount(int): the number of records to analyze for type information

        Returns:
            the list of Dicts and as a side effect setting self.cacheFile
        """
        if not self.isCached() or force:
            startTime = time.time()
            self.showProgress(f"getting {self.entityPluralName} for {self.name} ...")
            if getListOfDicts is None:
                if hasattr(self, "getListOfDicts"):
                    getListOfDicts = self.getListOfDicts
                else:
                    raise Exception(
                        "from Cache failed and no secondary cache via getListOfDicts specified"
                    )
            listOfDicts = getListOfDicts()
            duration = time.time() - startTime
            self.showProgress(
                f"got {len(listOfDicts)} {self.entityPluralName} in {duration:5.1f} s"
            )
            self.cacheFile = self.storeLoD(
                listOfDicts, append=append, sampleRecordCount=sampleRecordCount
            )
            self.setListFromLoD(listOfDicts)
        else:
            # fromStore also sets self.cacheFile
            listOfDicts = self.fromStore()
        return listOfDicts

    def fromStore(self, cacheFile=None, setList: bool = True) -> list:
        """
        restore me from the store
        Args:
            cacheFile(String): the cacheFile to use if None use the pre configured cachefile
            setList(bool): if True set my list with the data from the cache file

        Returns:
            list: list of dicts or JSON entitymanager
        """
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        self.cacheFile = cacheFile
        self.showProgress(
            "reading %s for %s from cache %s"
            % (self.entityPluralName, self.name, cacheFile)
        )
        mode = self.config.mode
        if mode is StoreMode.JSONPICKLE:
            JSONem = JsonPickleMixin.readJsonPickle(cacheFile)
            if self.clazz is not None:
                listOfDicts = JSONem.getLoD()
            else:
                listOfDicts = JSONem.getList()
        elif mode is StoreMode.JSON:
            listOfDicts = self.readLodFromJsonFile(cacheFile)
            pass
        elif mode is StoreMode.SPARQL:
            # @FIXME make abstract
            eventQuery = (
                """
PREFIX cr: <http://cr.bitplan.com/>
SELECT ?eventId ?acronym ?series ?title ?year ?country ?city ?startDate ?endDate ?url ?source WHERE { 
   OPTIONAL { ?event cr:Event_eventId ?eventId. }
   OPTIONAL { ?event cr:Event_acronym ?acronym. }
   OPTIONAL { ?event cr:Event_series ?series. }
   OPTIONAL { ?event cr:Event_title ?title. }
   OPTIONAL { ?event cr:Event_year ?year.  }
   OPTIONAL { ?event cr:Event_country ?country. }
   OPTIONAL { ?event cr:Event_city ?city. }
   OPTIONAL { ?event cr:Event_startDate ?startDate. }
   OPTIONAL { ?event cr:Event_endDate ?endDate. }
   OPTIONAL { ?event cr:Event_url ?url. }
   ?event cr:Event_source ?source FILTER(?source='%s').
}
"""
                % self.name
            )
            listOfDicts = self.sparql.queryAsListOfDicts(eventQuery)
        elif mode is StoreMode.SQL:
            sqlQuery = "SELECT * FROM %s" % self.tableName
            sqlDB = self.getSQLDB(cacheFile)
            listOfDicts = sqlDB.query(sqlQuery)
            sqlDB.close()
            pass
        else:
            raise Exception("unsupported store mode %s" % self.mode)

        self.showProgress(
            "read %d %s from %s in %5.1f s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                time.time() - startTime,
            )
        )
        if setList:
            self.setListFromLoD(listOfDicts)
        return listOfDicts

    def getLoD(self):
        """
        Return the LoD of the entities in the list

        Return:
            list: a list of Dicts

        """
        lod = []
        for entity in self.getList():
            # TODO - optionally filter by samples
            lod.append(entity.__dict__)
        return lod

    def store(
        self,
        limit=10000000,
        batchSize=250,
        append=False,
        fixNone=True,
        sampleRecordCount=-1,
        replace: bool = False,
    ) -> str:
        """
        store my list of dicts

        Args:
            limit(int): maximum number of records to store per batch
            batchSize(int): size of batch for storing
            append(bool): True if records should be appended
            fixNone(bool): if True make sure the dicts are filled with None references for each record
            sampleRecordCount(int): the number of records to analyze for type information
            replace(bool): if True allow replace for insert

        Return:
            str: The cachefile being used
        """
        lod = self.getLoD()
        return self.storeLoD(
            lod,
            limit=limit,
            batchSize=batchSize,
            append=append,
            fixNone=fixNone,
            sampleRecordCount=sampleRecordCount,
            replace=replace,
        )

    def storeLoD(
        self,
        listOfDicts,
        limit=10000000,
        batchSize=250,
        cacheFile=None,
        append=False,
        fixNone=True,
        sampleRecordCount=1,
        replace: bool = False,
    ) -> str:
        """
        store my entities

        Args:
            listOfDicts(list): the list of dicts to store
            limit(int): maximum number of records to store
            batchSize(int): size of batch for storing
            cacheFile(string): the name of the storage e.g path to JSON or sqlite3 file
            append(bool): True if records should be appended
            fixNone(bool): if True make sure the dicts are filled with None references for each record
            sampleRecordCount(int): the number of records to analyze for type information
            replace(bool): if True allow replace for insert
        Return:
            str: The cachefile being used
        """
        config = self.config
        mode = config.mode
        if self.handleInvalidListTypes:
            LOD.handleListTypes(
                lod=listOfDicts,
                doFilter=self.filterInvalidListTypes,
                separator=self.listSeparator,
            )
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            if cacheFile is None:
                cacheFile = self.getCacheFile(config=self.config, mode=mode)
            self.showProgress(
                f"storing {len(listOfDicts)} {self.entityPluralName} for {self.name} to cache {cacheFile}"
            )
            if mode is StoreMode.JSONPICKLE:
                self.writeJsonPickle(cacheFile)
            if mode is StoreMode.JSON:
                self.storeToJsonFile(cacheFile)
                pass
        elif mode is StoreMode.SPARQL:
            startTime = time.time()
            msg = f"storing {len(listOfDicts)} {self.entityPluralName} to {self.config.mode} ({self.config.endpoint})"
            self.showProgress(msg)
            # @ FIXME make abstract /configurable
            entityType = "cr:Event"
            prefixes = self.config.prefix
            self.sparql.insertListOfDicts(
                listOfDicts,
                entityType,
                self.primaryKey,
                prefixes,
                limit=limit,
                batchSize=batchSize,
            )
            self.showProgress(
                "store for %s done after %5.1f secs"
                % (self.name, time.time() - startTime)
            )
        elif mode is StoreMode.SQL:
            startTime = time.time()
            if cacheFile is None:
                cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
            sqldb = self.getSQLDB(cacheFile)
            self.showProgress(
                "storing %d %s for %s to %s:%s"
                % (
                    len(listOfDicts),
                    self.entityPluralName,
                    self.name,
                    config.mode,
                    cacheFile,
                )
            )
            if append:
                withDrop = False
                withCreate = False
            else:
                withDrop = True
                withCreate = True
            entityInfo = self.initSQLDB(
                sqldb,
                listOfDicts,
                withCreate=withCreate,
                withDrop=withDrop,
                sampleRecordCount=sampleRecordCount,
            )
            self.sqldb.store(
                listOfDicts,
                entityInfo,
                executeMany=self.executeMany,
                fixNone=fixNone,
                replace=replace,
            )
            self.showProgress(
                "store for %s done after %5.1f secs"
                % (self.name, time.time() - startTime)
            )
        else:
            raise Exception(f"unsupported store mode {self.mode}")
        return cacheFile

`init(name, entityName, entityPluralName, listName=None, clazz=None, tableName=None, primaryKey=None, config=None, handleInvalidListTypes=False, filterInvalidListTypes=False, listSeparator='⇹', debug=False)`

Constructor

Parameters:

Name	Description	Default
`name(string)`	name of this eventManager	required
`entityName(string)`	entityType to be managed e.g. Country	required
`entityPluralName(string)`	plural of the the entityType e.g. Countries	required
`config(StorageConfig)`	the configuration to be used if None a default configuration will be used	required
`handleInvalidListTypes(bool)`	True if invalidListTypes should be converted or filtered	required
`filterInvalidListTypes(bool)`	True if invalidListTypes should be deleted	required
`listSeparator(str)`	the symbol to use as a list separator	required
`debug(boolean)`	override debug setting when default of config is used via config=None	required

Source code in lodstorage/entity.py

def __init__(
    self,
    name,
    entityName,
    entityPluralName: str,
    listName: str = None,
    clazz=None,
    tableName: str = None,
    primaryKey: str = None,
    config=None,
    handleInvalidListTypes=False,
    filterInvalidListTypes=False,
    listSeparator="⇹",
    debug=False,
):
    """
    Constructor

    Args:
        name(string): name of this eventManager
        entityName(string): entityType to be managed e.g. Country
        entityPluralName(string): plural of the the entityType e.g. Countries
        config(StorageConfig): the configuration to be used if None a default configuration will be used
        handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
        filterInvalidListTypes(bool): True if invalidListTypes should be deleted
        listSeparator(str): the symbol to use as a list separator
        debug(boolean): override debug setting when default of config is used via config=None
    """
    self.name = name
    self.entityName = entityName
    self.entityPluralName = entityPluralName
    if listName is None:
        listName = entityPluralName
    if tableName is None:
        tableName = entityName
    self.primaryKey = primaryKey
    if config is None:
        config = StorageConfig.getDefault()
        if debug:
            config.debug = debug
    self.config = config
    super(EntityManager, self).__init__(
        listName=listName,
        clazz=clazz,
        tableName=tableName,
        handleInvalidListTypes=handleInvalidListTypes,
        filterInvalidListTypes=filterInvalidListTypes,
    )
    cacheFile = self.getCacheFile(config=config, mode=config.mode)
    self.showProgress(
        "Creating %smanager(%s) for %s using cache %s"
        % (self.entityName, config.mode, self.name, cacheFile)
    )
    if config.mode is StoreMode.SPARQL:
        if config.endpoint is None:
            raise Exception("no endpoint set for mode sparql")
        self.endpoint = config.endpoint
        self.sparql = SPARQL(
            config.endpoint, debug=config.debug, profile=config.profile
        )
    elif config.mode is StoreMode.SQL:
        self.executeMany = False  # may be True when issues are fixed
    self.listSeparator = listSeparator

`fromCache(force=False, getListOfDicts=None, append=False, sampleRecordCount=-1)`

get my entries from the cache or from the callback provided

Parameters:

Name	Description	Default
`force(bool)`	force ignoring the cache	required
`getListOfDicts(callable)`	a function to call for getting the data	required
`append(bool)`	True if records should be appended	required
`sampleRecordCount(int)`	the number of records to analyze for type information	required

Returns:

Type	Description
	the list of Dicts and as a side effect setting self.cacheFile

Source code in lodstorage/entity.py

def fromCache(
    self,
    force: bool = False,
    getListOfDicts=None,
    append=False,
    sampleRecordCount=-1,
):
    """
    get my entries from the cache or from the callback provided

    Args:
        force(bool): force ignoring the cache
        getListOfDicts(callable): a function to call for getting the data
        append(bool): True if records should be appended
        sampleRecordCount(int): the number of records to analyze for type information

    Returns:
        the list of Dicts and as a side effect setting self.cacheFile
    """
    if not self.isCached() or force:
        startTime = time.time()
        self.showProgress(f"getting {self.entityPluralName} for {self.name} ...")
        if getListOfDicts is None:
            if hasattr(self, "getListOfDicts"):
                getListOfDicts = self.getListOfDicts
            else:
                raise Exception(
                    "from Cache failed and no secondary cache via getListOfDicts specified"
                )
        listOfDicts = getListOfDicts()
        duration = time.time() - startTime
        self.showProgress(
            f"got {len(listOfDicts)} {self.entityPluralName} in {duration:5.1f} s"
        )
        self.cacheFile = self.storeLoD(
            listOfDicts, append=append, sampleRecordCount=sampleRecordCount
        )
        self.setListFromLoD(listOfDicts)
    else:
        # fromStore also sets self.cacheFile
        listOfDicts = self.fromStore()
    return listOfDicts

`fromStore(cacheFile=None, setList=True)`

restore me from the store Args: cacheFile(String): the cacheFile to use if None use the pre configured cachefile setList(bool): if True set my list with the data from the cache file

Returns:

Name	Type	Description
`list`	`list`	list of dicts or JSON entitymanager

Source code in lodstorage/entity.py

    def fromStore(self, cacheFile=None, setList: bool = True) -> list:
        """
        restore me from the store
        Args:
            cacheFile(String): the cacheFile to use if None use the pre configured cachefile
            setList(bool): if True set my list with the data from the cache file

        Returns:
            list: list of dicts or JSON entitymanager
        """
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        self.cacheFile = cacheFile
        self.showProgress(
            "reading %s for %s from cache %s"
            % (self.entityPluralName, self.name, cacheFile)
        )
        mode = self.config.mode
        if mode is StoreMode.JSONPICKLE:
            JSONem = JsonPickleMixin.readJsonPickle(cacheFile)
            if self.clazz is not None:
                listOfDicts = JSONem.getLoD()
            else:
                listOfDicts = JSONem.getList()
        elif mode is StoreMode.JSON:
            listOfDicts = self.readLodFromJsonFile(cacheFile)
            pass
        elif mode is StoreMode.SPARQL:
            # @FIXME make abstract
            eventQuery = (
                """
PREFIX cr: <http://cr.bitplan.com/>
SELECT ?eventId ?acronym ?series ?title ?year ?country ?city ?startDate ?endDate ?url ?source WHERE { 
   OPTIONAL { ?event cr:Event_eventId ?eventId. }
   OPTIONAL { ?event cr:Event_acronym ?acronym. }
   OPTIONAL { ?event cr:Event_series ?series. }
   OPTIONAL { ?event cr:Event_title ?title. }
   OPTIONAL { ?event cr:Event_year ?year.  }
   OPTIONAL { ?event cr:Event_country ?country. }
   OPTIONAL { ?event cr:Event_city ?city. }
   OPTIONAL { ?event cr:Event_startDate ?startDate. }
   OPTIONAL { ?event cr:Event_endDate ?endDate. }
   OPTIONAL { ?event cr:Event_url ?url. }
   ?event cr:Event_source ?source FILTER(?source='%s').
}
"""
                % self.name
            )
            listOfDicts = self.sparql.queryAsListOfDicts(eventQuery)
        elif mode is StoreMode.SQL:
            sqlQuery = "SELECT * FROM %s" % self.tableName
            sqlDB = self.getSQLDB(cacheFile)
            listOfDicts = sqlDB.query(sqlQuery)
            sqlDB.close()
            pass
        else:
            raise Exception("unsupported store mode %s" % self.mode)

        self.showProgress(
            "read %d %s from %s in %5.1f s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                time.time() - startTime,
            )
        )
        if setList:
            self.setListFromLoD(listOfDicts)
        return listOfDicts

`getCacheFile(config=None, mode=StoreMode.SQL)`

get the cache file for this event manager Args: config(StorageConfig): if None get the cache for my mode mode(StoreMode): the storeMode to use

Source code in lodstorage/entity.py

def getCacheFile(self, config=None, mode=StoreMode.SQL):
    """
    get the cache file for this event manager
    Args:
        config(StorageConfig): if None get the cache for my mode
        mode(StoreMode): the storeMode to use
    """
    if config is None:
        config = self.config
    cachedir = config.getCachePath()
    if config.cacheFile is not None:
        return config.cacheFile
    """ get the path to the file for my cached data """
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        extension = f".{mode.name.lower()}"
        cachepath = f"{cachedir}/{self.name}-{self.listName}{extension}"
    elif mode is StoreMode.SPARQL:
        cachepath = f"SPAQRL {self.name}:{config.endpoint}"
    elif mode is StoreMode.SQL:
        cachepath = f"{cachedir}/{self.name}.db"
    else:
        cachepath = f"undefined cachepath for StoreMode {mode}"
    return cachepath

`getLoD()`

Return the LoD of the entities in the list

Return

list: a list of Dicts

Source code in lodstorage/entity.py

def getLoD(self):
    """
    Return the LoD of the entities in the list

    Return:
        list: a list of Dicts

    """
    lod = []
    for entity in self.getList():
        # TODO - optionally filter by samples
        lod.append(entity.__dict__)
    return lod

`getSQLDB(cacheFile)`

get the SQL database for the given cacheFile

Parameters:

Name	Type	Description	Default
`cacheFile(string)`		the file to get the SQL db from	required

Source code in lodstorage/entity.py

def getSQLDB(self, cacheFile):
    """
    get the SQL database for the given cacheFile

    Args:
        cacheFile(string): the file to get the SQL db from
    """
    config = self.config
    sqldb = self.sqldb = SQLDB(
        cacheFile, debug=config.debug, errorDebug=config.errorDebug
    )
    return sqldb

`initSQLDB(sqldb, listOfDicts=None, withCreate=True, withDrop=True, sampleRecordCount=-1)`

initialize my sql DB

Parameters:

Name	Description	Default
`listOfDicts(list)`	the list of dicts to analyze for type information	required
`withDrop(boolean)`	true if the existing Table should be dropped	required
`withCreate(boolean)`	true if the create Table command should be executed - false if only the entityInfo should be returned	required
`sampleRecordCount(int)`	the number of records to analyze for type information	required

Return: EntityInfo: the entity information such as CREATE Table command

Source code in lodstorage/entity.py

def initSQLDB(
    self,
    sqldb,
    listOfDicts=None,
    withCreate: bool = True,
    withDrop: bool = True,
    sampleRecordCount=-1,
):
    """
    initialize my sql DB

    Args:
        listOfDicts(list): the list of dicts to analyze for type information
        withDrop(boolean): true if the existing Table should be dropped
        withCreate(boolean): true if the create Table command should be executed - false if only the entityInfo should be returned
        sampleRecordCount(int): the number of records to analyze for type information
    Return:
        EntityInfo: the entity information such as CREATE Table command
    """
    if listOfDicts is None:
        listOfDicts = JSONAble.getJsonTypeSamplesForClass(self.clazz)
    entityInfo = sqldb.createTable(
        listOfDicts,
        self.tableName,
        primaryKey=self.primaryKey,
        withCreate=withCreate,
        withDrop=withDrop,
        sampleRecordCount=sampleRecordCount,
    )
    return entityInfo

`isCached()`

check whether there is a file containing cached data for me

Source code in lodstorage/entity.py

    def isCached(self):
        """check whether there is a file containing cached
        data for me"""
        result = False
        config = self.config
        mode = self.config.mode
        if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
            result = os.path.isfile(self.getCacheFile(config=self.config, mode=mode))
        elif mode is StoreMode.SPARQL:
            # @FIXME - make abstract
            query = (
                config.prefix
                + """
SELECT  ?source (COUNT(?source) AS ?sourcecount)
WHERE { 
   ?event cr:Event_source ?source.
}
GROUP by ?source
"""
            )
            sourceCountList = self.sparql.queryAsListOfDicts(query)
            for sourceCount in sourceCountList:
                source = sourceCount["source"]
                recordCount = sourceCount["sourcecount"]
                if source == self.name and recordCount > 100:
                    result = True
        elif mode is StoreMode.SQL:
            cacheFile = self.getCacheFile(config=self.config, mode=StoreMode.SQL)
            if os.path.isfile(cacheFile):
                sqlQuery = f"SELECT COUNT(*) AS count FROM {self.tableName}"
                try:
                    sqlDB = self.getSQLDB(cacheFile)
                    countResults = sqlDB.query(sqlQuery)
                    countResult = countResults[0]
                    count = countResult["count"]
                    result = count >= 0
                except Exception as ex:
                    msg = str(ex)
                    if self.debug:
                        print(msg, file=sys.stderr)
                        sys.stderr.flush()
                    # e.g. sqlite3.OperationalError: no such table: Event_crossref
                    pass
        else:
            raise Exception("unsupported mode %s" % self.mode)
        return result

`removeCacheFile()`

remove my cache file

Source code in lodstorage/entity.py

def removeCacheFile(self):
    """remove my cache file"""
    mode = self.config.mode
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        cacheFile = self.getCacheFile(mode=mode)
        if os.path.isfile(cacheFile):
            os.remove(cacheFile)

`setNone(record, fields)`

make sure the given fields in the given record are set to none Args: record(dict): the record to work on fields(list): the list of fields to set to None

Source code in lodstorage/entity.py

def setNone(self, record, fields):
    """
    make sure the given fields in the given record are set to none
    Args:
        record(dict): the record to work on
        fields(list): the list of fields to set to None
    """
    LOD.setNone(record, fields)

`showProgress(msg)`

display a progress message

Parameters:

Name	Type	Description	Default
`msg(string)`		the message to display	required

Source code in lodstorage/entity.py

def showProgress(self, msg):
    """display a progress message

    Args:
      msg(string): the message to display
    """
    if self.config.withShowProgress:
        print(msg, flush=True)

`store(limit=10000000, batchSize=250, append=False, fixNone=True, sampleRecordCount=-1, replace=False)`

store my list of dicts

Parameters:

Name	Description	Default
`limit(int)`	maximum number of records to store per batch	required
`batchSize(int)`	size of batch for storing	required
`append(bool)`	True if records should be appended	required
`fixNone(bool)`	if True make sure the dicts are filled with None references for each record	required
`sampleRecordCount(int)`	the number of records to analyze for type information	required
`replace(bool)`	if True allow replace for insert	required

Return

str: The cachefile being used

Source code in lodstorage/entity.py

def store(
    self,
    limit=10000000,
    batchSize=250,
    append=False,
    fixNone=True,
    sampleRecordCount=-1,
    replace: bool = False,
) -> str:
    """
    store my list of dicts

    Args:
        limit(int): maximum number of records to store per batch
        batchSize(int): size of batch for storing
        append(bool): True if records should be appended
        fixNone(bool): if True make sure the dicts are filled with None references for each record
        sampleRecordCount(int): the number of records to analyze for type information
        replace(bool): if True allow replace for insert

    Return:
        str: The cachefile being used
    """
    lod = self.getLoD()
    return self.storeLoD(
        lod,
        limit=limit,
        batchSize=batchSize,
        append=append,
        fixNone=fixNone,
        sampleRecordCount=sampleRecordCount,
        replace=replace,
    )

`storeLoD(listOfDicts, limit=10000000, batchSize=250, cacheFile=None, append=False, fixNone=True, sampleRecordCount=1, replace=False)`

store my entities

Parameters:

Name	Description	Default
`listOfDicts(list)`	the list of dicts to store	required
`limit(int)`	maximum number of records to store	required
`batchSize(int)`	size of batch for storing	required
`cacheFile(string)`	the name of the storage e.g path to JSON or sqlite3 file	required
`append(bool)`	True if records should be appended	required
`fixNone(bool)`	if True make sure the dicts are filled with None references for each record	required
`sampleRecordCount(int)`	the number of records to analyze for type information	required
`replace(bool)`	if True allow replace for insert	required

Return: str: The cachefile being used

Source code in lodstorage/entity.py

def storeLoD(
    self,
    listOfDicts,
    limit=10000000,
    batchSize=250,
    cacheFile=None,
    append=False,
    fixNone=True,
    sampleRecordCount=1,
    replace: bool = False,
) -> str:
    """
    store my entities

    Args:
        listOfDicts(list): the list of dicts to store
        limit(int): maximum number of records to store
        batchSize(int): size of batch for storing
        cacheFile(string): the name of the storage e.g path to JSON or sqlite3 file
        append(bool): True if records should be appended
        fixNone(bool): if True make sure the dicts are filled with None references for each record
        sampleRecordCount(int): the number of records to analyze for type information
        replace(bool): if True allow replace for insert
    Return:
        str: The cachefile being used
    """
    config = self.config
    mode = config.mode
    if self.handleInvalidListTypes:
        LOD.handleListTypes(
            lod=listOfDicts,
            doFilter=self.filterInvalidListTypes,
            separator=self.listSeparator,
        )
    if mode is StoreMode.JSON or mode is StoreMode.JSONPICKLE:
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=mode)
        self.showProgress(
            f"storing {len(listOfDicts)} {self.entityPluralName} for {self.name} to cache {cacheFile}"
        )
        if mode is StoreMode.JSONPICKLE:
            self.writeJsonPickle(cacheFile)
        if mode is StoreMode.JSON:
            self.storeToJsonFile(cacheFile)
            pass
    elif mode is StoreMode.SPARQL:
        startTime = time.time()
        msg = f"storing {len(listOfDicts)} {self.entityPluralName} to {self.config.mode} ({self.config.endpoint})"
        self.showProgress(msg)
        # @ FIXME make abstract /configurable
        entityType = "cr:Event"
        prefixes = self.config.prefix
        self.sparql.insertListOfDicts(
            listOfDicts,
            entityType,
            self.primaryKey,
            prefixes,
            limit=limit,
            batchSize=batchSize,
        )
        self.showProgress(
            "store for %s done after %5.1f secs"
            % (self.name, time.time() - startTime)
        )
    elif mode is StoreMode.SQL:
        startTime = time.time()
        if cacheFile is None:
            cacheFile = self.getCacheFile(config=self.config, mode=self.config.mode)
        sqldb = self.getSQLDB(cacheFile)
        self.showProgress(
            "storing %d %s for %s to %s:%s"
            % (
                len(listOfDicts),
                self.entityPluralName,
                self.name,
                config.mode,
                cacheFile,
            )
        )
        if append:
            withDrop = False
            withCreate = False
        else:
            withDrop = True
            withCreate = True
        entityInfo = self.initSQLDB(
            sqldb,
            listOfDicts,
            withCreate=withCreate,
            withDrop=withDrop,
            sampleRecordCount=sampleRecordCount,
        )
        self.sqldb.store(
            listOfDicts,
            entityInfo,
            executeMany=self.executeMany,
            fixNone=fixNone,
            replace=replace,
        )
        self.showProgress(
            "store for %s done after %5.1f secs"
            % (self.name, time.time() - startTime)
        )
    else:
        raise Exception(f"unsupported store mode {self.mode}")
    return cacheFile

`storeMode()`

return my store mode

Source code in lodstorage/entity.py

def storeMode(self):
    """
    return my store mode
    """
    return self.config.mode

`jsonable`

This module has a class JSONAble for serialization of tables/list of dicts to and from JSON encoding

Created on 2020-09-03

@author: wf

`JSONAble`

Bases: object

mixin to allow classes to be JSON serializable see

https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable

Source code in lodstorage/jsonable.py

class JSONAble(object):
    """
    mixin to allow classes to be JSON serializable see

    - https://stackoverflow.com/questions/3768895/how-to-make-a-class-json-serializable

    """

    def __init__(self):
        """
        Constructor
        """

    @classmethod
    def getPluralname(cls):
        return "%ss" % cls.__name__

    @staticmethod
    def singleQuoteToDoubleQuote(singleQuoted, useRegex=False):
        """
        convert a single quoted string to a double quoted one

        Args:
            singleQuoted (str): a single quoted string e.g.

                .. highlight:: json

                {'cities': [{'name': "Upper Hell's Gate"}]}

            useRegex (boolean): True if a regular expression shall be used for matching

        Returns:
            string: the double quoted version of the string

        Note:
            see
            - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

        """
        if useRegex:
            doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted)
        else:
            doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop(
                singleQuoted
            )
        return doubleQuoted

    @staticmethod
    def singleQuoteToDoubleQuoteUsingRegex(singleQuoted):
        """
        convert a single quoted string to a double quoted one using a regular expression

        Args:
            singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
            useRegex(boolean): True if a regular expression shall be used for matching
        Returns:
            string: the double quoted version of the string e.g.
        Note:
            see https://stackoverflow.com/a/50257217/1497139
        """
        doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted)
        return doubleQuoted

    @staticmethod
    def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted):
        """
        convert a single quoted string to a double quoted one using a regular expression

        Args:
            singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
            useRegex(boolean): True if a regular expression shall be used for matching
        Returns:
            string: the double quoted version of the string e.g.
        Note:
            see https://stackoverflow.com/a/63862387/1497139

        """
        cList = list(singleQuoted)
        inDouble = False
        inSingle = False
        for i, c in enumerate(cList):
            # print ("%d:%s %r %r" %(i,c,inSingle,inDouble))
            if c == "'":
                if not inDouble:
                    inSingle = not inSingle
                    cList[i] = '"'
            elif c == '"':
                inDouble = not inDouble
                inSingle = False
        doubleQuoted = "".join(cList)
        return doubleQuoted

    def getJsonTypeSamples(self):
        """
        does my class provide a "getSamples" method?
        """
        if hasattr(self, "__class__"):
            cls = self.__class__
            if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"):
                cls = self.clazz
            return JSONAble.getJsonTypeSamplesForClass(cls)
        return None

    @staticmethod
    def getJsonTypeSamplesForClass(cls):
        """
        return the type samples for the given class

        Return:
            list: a list of dict that specify the types by example
        """
        if hasattr(cls, "getSamples"):
            getSamples = getattr(cls, "getSamples")
            if callable(getSamples):
                return getSamples()
        return None

    @staticmethod
    def readJsonFromFile(jsonFilePath):
        """
        read json string from the given jsonFilePath

        Args:
            jsonFilePath(string): the path of the file where to read the result from

        Returns:
            the JSON string read from the file
        """
        with open(jsonFilePath, "r") as jsonFile:
            jsonStr = jsonFile.read()
        return jsonStr

    @staticmethod
    def storeJsonToFile(jsonStr, jsonFilePath):
        """
        store the given json string to the given jsonFilePath

        Args:
            jsonStr(string): the string to store
            jsonFilePath(string): the path of the file where to store the result

        """
        with open(jsonFilePath, "w") as jsonFile:
            jsonFile.write(jsonStr)

    def checkExtension(self, jsonFile: str, extension: str = ".json") -> str:
        """
        make sure the jsonFile has the given extension e.g. ".json"

        Args:
            jsonFile(str): the jsonFile name - potentially without ".json" suffix

        Returns:
            str: the jsonFile name with ".json" as an extension guaranteed
        """
        if not jsonFile.endswith(extension):
            jsonFile = f"{jsonFile}{extension}"
        return jsonFile

    def storeToJsonFile(
        self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False
    ):
        """
        store me to the given jsonFile

        Args:
            jsonFile(str): the JSON file name (optionally without extension)
            exension(str): the extension to use if not part of the jsonFile name
            limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
        """
        jsonFile = self.checkExtension(jsonFile, extension)
        JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)

    def restoreFromJsonFile(self, jsonFile: str):
        """
        restore me from the given jsonFile

        Args:
            jsonFile(string): the jsonFile to restore me from
        """
        jsonFile = self.checkExtension(jsonFile)
        jsonStr = JSONAble.readJsonFromFile(jsonFile)
        self.fromJson(jsonStr)

    def fromJson(self, jsonStr):
        """
        initialize me from the given JSON string

        Args:
            jsonStr(str): the JSON string
        """
        jsonMap = json.loads(jsonStr)
        self.fromDict(jsonMap)

    def fromDict(self, data: dict):
        """
        initialize me from the given data

        Args:
            data(dict): the dictionary to initialize me from
        """
        # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o
        for key in data.keys():
            value = data[key]
            setattr(self, key, value)

    def toJsonAbleValue(self, v):
        """
        return the JSON able value of the given value v
        Args:
            v(object): the value to convert
        """
        # objects have __dict__ hash tables which can be JSON-converted
        if hasattr(v, "__dict__"):
            return v.__dict__
        elif isinstance(v, datetime.datetime):
            return v.isoformat()
        elif isinstance(v, datetime.date):
            return v.isoformat()
        else:
            return ""

    def toJSON(self, limitToSampleFields: bool = False):
        """

        Args:
            limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

        Returns:
            a recursive JSON dump of the dicts of my objects
        """
        data = {}
        if limitToSampleFields:
            samples = self.getJsonTypeSamples()
            sampleFields = LOD.getFields(samples)
            if isinstance(self, JSONAbleList):
                limitedRecords = []
                for record in self.__dict__[self.listName]:
                    limitedRecord = {}
                    for key, value in record.__dict__.items():
                        if key in sampleFields:
                            limitedRecord[key] = value
                    limitedRecords.append(limitedRecord)
                data[self.listName] = limitedRecords
            else:
                for key, value in self.__dict__.items():
                    if key in sampleFields:
                        data[key] = value
        else:
            data = self
        jsonStr = json.dumps(
            data,
            default=lambda v: self.toJsonAbleValue(v),
            sort_keys=True,
            indent=JSONAbleSettings.indent,
        )
        return jsonStr

    def getJSONValue(self, v):
        """
        get the value of the given v as JSON

        Args:
            v(object): the value to get

        Returns:
            the the value making sure objects are return as dicts
        """
        if hasattr(v, "asJSON"):
            return v.asJSON(asString=False)
        elif type(v) is dict:
            return self.reprDict(v)
        elif type(v) is list:
            vlist = []
            for vitem in v:
                vlist.append(self.getJSONValue(vitem))
            return vlist
        elif isinstance(v, datetime.datetime):
            return v.isoformat()
        elif isinstance(v, datetime.date):
            return v.isoformat()
        elif isinstance(v, bool):
            # convert True,False to -> true,false
            return str(v).lower()
        else:
            return v

    def reprDict(self, srcDict):
        """
        get the given srcDict as new dict with fields being converted with getJSONValue

        Args:
            scrcDict(dict): the source dictionary

        Returns
            dict: the converted dictionary
        """
        d = dict()
        for a, v in srcDict.items():
            d[a] = self.getJSONValue(v)
        return d

    def asJSON(self, asString=True, data=None):
        """
        recursively return my dict elements

        Args:
            asString(boolean): if True return my result as a string
        """
        if data is None:
            data = self.__dict__
        jsonDict = self.reprDict(data)
        if asString:
            jsonStr = str(jsonDict)
            jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr)
            return jsonStr
        return jsonDict

`init()`

Constructor

Source code in lodstorage/jsonable.py

def __init__(self):
    """
    Constructor
    """

`asJSON(asString=True, data=None)`

recursively return my dict elements

Parameters:

Name	Type	Description	Default
`asString(boolean)`		if True return my result as a string	required

Source code in lodstorage/jsonable.py

def asJSON(self, asString=True, data=None):
    """
    recursively return my dict elements

    Args:
        asString(boolean): if True return my result as a string
    """
    if data is None:
        data = self.__dict__
    jsonDict = self.reprDict(data)
    if asString:
        jsonStr = str(jsonDict)
        jsonStr = JSONAble.singleQuoteToDoubleQuote(jsonStr)
        return jsonStr
    return jsonDict

`checkExtension(jsonFile, extension='.json')`

make sure the jsonFile has the given extension e.g. ".json"

Parameters:

Name	Type	Description	Default
`jsonFile(str)`		the jsonFile name - potentially without ".json" suffix	required

Returns:

Name	Type	Description
`str`	`str`	the jsonFile name with ".json" as an extension guaranteed

Source code in lodstorage/jsonable.py

def checkExtension(self, jsonFile: str, extension: str = ".json") -> str:
    """
    make sure the jsonFile has the given extension e.g. ".json"

    Args:
        jsonFile(str): the jsonFile name - potentially without ".json" suffix

    Returns:
        str: the jsonFile name with ".json" as an extension guaranteed
    """
    if not jsonFile.endswith(extension):
        jsonFile = f"{jsonFile}{extension}"
    return jsonFile

`fromDict(data)`

initialize me from the given data

Parameters:

Name	Type	Description	Default
`data(dict)`		the dictionary to initialize me from	required

Source code in lodstorage/jsonable.py

def fromDict(self, data: dict):
    """
    initialize me from the given data

    Args:
        data(dict): the dictionary to initialize me from
    """
    # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression-in-python-taking-union-o
    for key in data.keys():
        value = data[key]
        setattr(self, key, value)

`fromJson(jsonStr)`

initialize me from the given JSON string

Parameters:

Name	Type	Description	Default
`jsonStr(str)`		the JSON string	required

Source code in lodstorage/jsonable.py

def fromJson(self, jsonStr):
    """
    initialize me from the given JSON string

    Args:
        jsonStr(str): the JSON string
    """
    jsonMap = json.loads(jsonStr)
    self.fromDict(jsonMap)

`getJSONValue(v)`

get the value of the given v as JSON

Parameters:

Name	Type	Description	Default
`v(object)`		the value to get	required

Returns:

Type	Description
	the the value making sure objects are return as dicts

Source code in lodstorage/jsonable.py

def getJSONValue(self, v):
    """
    get the value of the given v as JSON

    Args:
        v(object): the value to get

    Returns:
        the the value making sure objects are return as dicts
    """
    if hasattr(v, "asJSON"):
        return v.asJSON(asString=False)
    elif type(v) is dict:
        return self.reprDict(v)
    elif type(v) is list:
        vlist = []
        for vitem in v:
            vlist.append(self.getJSONValue(vitem))
        return vlist
    elif isinstance(v, datetime.datetime):
        return v.isoformat()
    elif isinstance(v, datetime.date):
        return v.isoformat()
    elif isinstance(v, bool):
        # convert True,False to -> true,false
        return str(v).lower()
    else:
        return v

`getJsonTypeSamples()`

does my class provide a "getSamples" method?

Source code in lodstorage/jsonable.py

def getJsonTypeSamples(self):
    """
    does my class provide a "getSamples" method?
    """
    if hasattr(self, "__class__"):
        cls = self.__class__
        if isinstance(self, JSONAbleList) and not hasattr(cls, "getSamples"):
            cls = self.clazz
        return JSONAble.getJsonTypeSamplesForClass(cls)
    return None

`getJsonTypeSamplesForClass()` `staticmethod`

return the type samples for the given class

Return

list: a list of dict that specify the types by example

Source code in lodstorage/jsonable.py

@staticmethod
def getJsonTypeSamplesForClass(cls):
    """
    return the type samples for the given class

    Return:
        list: a list of dict that specify the types by example
    """
    if hasattr(cls, "getSamples"):
        getSamples = getattr(cls, "getSamples")
        if callable(getSamples):
            return getSamples()
    return None

`readJsonFromFile(jsonFilePath)` `staticmethod`

read json string from the given jsonFilePath

Parameters:

Name	Type	Description	Default
`jsonFilePath(string)`		the path of the file where to read the result from	required

Returns:

Type	Description
	the JSON string read from the file

Source code in lodstorage/jsonable.py

@staticmethod
def readJsonFromFile(jsonFilePath):
    """
    read json string from the given jsonFilePath

    Args:
        jsonFilePath(string): the path of the file where to read the result from

    Returns:
        the JSON string read from the file
    """
    with open(jsonFilePath, "r") as jsonFile:
        jsonStr = jsonFile.read()
    return jsonStr

`reprDict(srcDict)`

get the given srcDict as new dict with fields being converted with getJSONValue

Parameters:

Name	Type	Description	Default
`scrcDict(dict)`		the source dictionary	required

Returns dict: the converted dictionary

Source code in lodstorage/jsonable.py

def reprDict(self, srcDict):
    """
    get the given srcDict as new dict with fields being converted with getJSONValue

    Args:
        scrcDict(dict): the source dictionary

    Returns
        dict: the converted dictionary
    """
    d = dict()
    for a, v in srcDict.items():
        d[a] = self.getJSONValue(v)
    return d

`restoreFromJsonFile(jsonFile)`

restore me from the given jsonFile

Parameters:

Name	Type	Description	Default
`jsonFile(string)`		the jsonFile to restore me from	required

Source code in lodstorage/jsonable.py

def restoreFromJsonFile(self, jsonFile: str):
    """
    restore me from the given jsonFile

    Args:
        jsonFile(string): the jsonFile to restore me from
    """
    jsonFile = self.checkExtension(jsonFile)
    jsonStr = JSONAble.readJsonFromFile(jsonFile)
    self.fromJson(jsonStr)

`singleQuoteToDoubleQuote(singleQuoted, useRegex=False)` `staticmethod`

convert a single quoted string to a double quoted one

Parameters:

Name	Type	Description	Default
`singleQuoted`	`str`	a single quoted string e.g. .. highlight:: json {'cities': [{'name': "Upper Hell's Gate"}]}	required
`useRegex`	`boolean`	True if a regular expression shall be used for matching	`False`

Returns:

Name	Type	Description
`string`		the double quoted version of the string

Note

see - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

Source code in lodstorage/jsonable.py

@staticmethod
def singleQuoteToDoubleQuote(singleQuoted, useRegex=False):
    """
    convert a single quoted string to a double quoted one

    Args:
        singleQuoted (str): a single quoted string e.g.

            .. highlight:: json

            {'cities': [{'name': "Upper Hell's Gate"}]}

        useRegex (boolean): True if a regular expression shall be used for matching

    Returns:
        string: the double quoted version of the string

    Note:
        see
        - https://stackoverflow.com/questions/55600788/python-replace-single-quotes-with-double-quotes-but-leave-ones-within-double-q

    """
    if useRegex:
        doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingRegex(singleQuoted)
    else:
        doubleQuoted = JSONAble.singleQuoteToDoubleQuoteUsingBracketLoop(
            singleQuoted
        )
    return doubleQuoted

`singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted)` `staticmethod`

convert a single quoted string to a double quoted one using a regular expression

Parameters:

Name	Type	Description	Default
`singleQuoted(string)`		a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}	required
`useRegex(boolean)`		True if a regular expression shall be used for matching	required

Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/63862387/1497139

Source code in lodstorage/jsonable.py

@staticmethod
def singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted):
    """
    convert a single quoted string to a double quoted one using a regular expression

    Args:
        singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
        useRegex(boolean): True if a regular expression shall be used for matching
    Returns:
        string: the double quoted version of the string e.g.
    Note:
        see https://stackoverflow.com/a/63862387/1497139

    """
    cList = list(singleQuoted)
    inDouble = False
    inSingle = False
    for i, c in enumerate(cList):
        # print ("%d:%s %r %r" %(i,c,inSingle,inDouble))
        if c == "'":
            if not inDouble:
                inSingle = not inSingle
                cList[i] = '"'
        elif c == '"':
            inDouble = not inDouble
            inSingle = False
    doubleQuoted = "".join(cList)
    return doubleQuoted

`singleQuoteToDoubleQuoteUsingRegex(singleQuoted)` `staticmethod`

convert a single quoted string to a double quoted one using a regular expression

Parameters:

Name	Type	Description	Default
`singleQuoted(string)`		a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}	required
`useRegex(boolean)`		True if a regular expression shall be used for matching	required

Returns: string: the double quoted version of the string e.g. Note: see https://stackoverflow.com/a/50257217/1497139

Source code in lodstorage/jsonable.py

@staticmethod
def singleQuoteToDoubleQuoteUsingRegex(singleQuoted):
    """
    convert a single quoted string to a double quoted one using a regular expression

    Args:
        singleQuoted(string): a single quoted string e.g. {'cities': [{'name': "Upper Hell's Gate"}]}
        useRegex(boolean): True if a regular expression shall be used for matching
    Returns:
        string: the double quoted version of the string e.g.
    Note:
        see https://stackoverflow.com/a/50257217/1497139
    """
    doubleQuoted = JSONAbleSettings.singleQuoteRegex.sub('"', singleQuoted)
    return doubleQuoted

`storeJsonToFile(jsonStr, jsonFilePath)` `staticmethod`

store the given json string to the given jsonFilePath

Parameters:

Name	Type	Description	Default
`jsonStr(string)`		the string to store	required
`jsonFilePath(string)`		the path of the file where to store the result	required

Source code in lodstorage/jsonable.py

@staticmethod
def storeJsonToFile(jsonStr, jsonFilePath):
    """
    store the given json string to the given jsonFilePath

    Args:
        jsonStr(string): the string to store
        jsonFilePath(string): the path of the file where to store the result

    """
    with open(jsonFilePath, "w") as jsonFile:
        jsonFile.write(jsonStr)

`storeToJsonFile(jsonFile, extension='.json', limitToSampleFields=False)`

store me to the given jsonFile

Parameters:

Name	Description	Default
`jsonFile(str)`	the JSON file name (optionally without extension)	required
`exension(str)`	the extension to use if not part of the jsonFile name	required
`limitToSampleFields(bool)`	If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.	required

Source code in lodstorage/jsonable.py

def storeToJsonFile(
    self, jsonFile: str, extension: str = ".json", limitToSampleFields: bool = False
):
    """
    store me to the given jsonFile

    Args:
        jsonFile(str): the JSON file name (optionally without extension)
        exension(str): the extension to use if not part of the jsonFile name
        limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.
    """
    jsonFile = self.checkExtension(jsonFile, extension)
    JSONAble.storeJsonToFile(self.toJSON(limitToSampleFields), jsonFile)

`toJSON(limitToSampleFields=False)`

Parameters:

Name	Type	Description	Default
`limitToSampleFields(bool)`		If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.	required

Returns:

Type	Description
	a recursive JSON dump of the dicts of my objects

Source code in lodstorage/jsonable.py

def toJSON(self, limitToSampleFields: bool = False):
    """

    Args:
        limitToSampleFields(bool): If True the returned JSON is limited to the attributes/fields that are present in the samples. Otherwise all attributes of the object will be included. Default is False.

    Returns:
        a recursive JSON dump of the dicts of my objects
    """
    data = {}
    if limitToSampleFields:
        samples = self.getJsonTypeSamples()
        sampleFields = LOD.getFields(samples)
        if isinstance(self, JSONAbleList):
            limitedRecords = []
            for record in self.__dict__[self.listName]:
                limitedRecord = {}
                for key, value in record.__dict__.items():
                    if key in sampleFields:
                        limitedRecord[key] = value
                limitedRecords.append(limitedRecord)
            data[self.listName] = limitedRecords
        else:
            for key, value in self.__dict__.items():
                if key in sampleFields:
                    data[key] = value
    else:
        data = self
    jsonStr = json.dumps(
        data,
        default=lambda v: self.toJsonAbleValue(v),
        sort_keys=True,
        indent=JSONAbleSettings.indent,
    )
    return jsonStr

`toJsonAbleValue(v)`

return the JSON able value of the given value v Args: v(object): the value to convert

Source code in lodstorage/jsonable.py

def toJsonAbleValue(self, v):
    """
    return the JSON able value of the given value v
    Args:
        v(object): the value to convert
    """
    # objects have __dict__ hash tables which can be JSON-converted
    if hasattr(v, "__dict__"):
        return v.__dict__
    elif isinstance(v, datetime.datetime):
        return v.isoformat()
    elif isinstance(v, datetime.date):
        return v.isoformat()
    else:
        return ""

`JSONAbleList`

Bases: JSONAble

Container class

Source code in lodstorage/jsonable.py

class JSONAbleList(JSONAble):
    """
    Container class
    """

    def __init__(
        self,
        listName: str = None,
        clazz=None,
        tableName: str = None,
        initList: bool = True,
        handleInvalidListTypes=False,
        filterInvalidListTypes=False,
    ):
        """
        Constructor

        Args:
            listName(str): the name of the list attribute to be used for storing the List
            clazz(class): a class to be used for Object relational mapping (if any)
            tableName(str): the name of the "table" to be used
            initList(bool): True if the list should be initialized
            handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
            filterInvalidListTypes(bool): True if invalidListTypes should be deleted
        """
        self.clazz = clazz
        self.handleInvalidListTypes = handleInvalidListTypes
        self.filterInvalidListTypes = filterInvalidListTypes
        if listName is None:
            if self.clazz is not None:
                listName = self.clazz.getPluralname()
            else:
                listName = self.__class__.name.lower()
        self.listName = listName
        if tableName is None:
            self.tableName = listName
        else:
            self.tableName = tableName
        if initList:
            self.__dict__[self.listName] = []

    def getList(self):
        """
        get my list
        """
        return self.__dict__[self.listName]

    def setListFromLoD(self, lod: list) -> list:
        """
        set my list from the given list of dicts

        Args:
            lod(list) a raw record list of dicts

        Returns:
            list: a list of dicts if no clazz is set
                otherwise a list of objects
        """
        # non OO mode
        if self.clazz is None:
            result = lod
            self.__dict__[self.listName] = result
        else:
            # ORM mode
            # TODO - handle errors
            self.fromLoD(lod, append=False)
        return self.getList()

    def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None):
        """
        get a list of Dicts form the given JSON String

        Args:
            jsonStr(str): the JSON string
            fixType(Types): the types to be fixed
        Returns:
            list: a list of dicts
        """
        # read a data structe from the given JSON string
        lodOrDict = json.loads(jsonStr)
        # it should be a list only of dict with my list
        if not isinstance(lodOrDict, dict) and listName is not None:
            lod = lodOrDict
        else:
            if self.listName in lodOrDict:
                # get the relevant list of dicts
                lod = lodOrDict[self.listName]
            else:
                msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead"
                raise Exception(msg)
        if types is not None:
            types.fixTypes(lod, self.listName)
        return lod

    def fromLoD(self, lod, append: bool = True, debug: bool = False):
        """
        load my entityList from the given list of dicts

        Args:
            lod(list): the list of dicts to load
            append(bool): if True append to my existing entries

        Return:
            list: a list of errors (if any)

        """
        errors = []
        entityList = self.getList()
        if not append:
            del entityList[:]
        if self.handleInvalidListTypes:
            LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)

        for record in lod:
            # call the constructor to get a new instance
            try:
                entity = self.clazz()
                entity.fromDict(record)
                entityList.append(entity)
            except Exception as ex:
                error = {self.listName: record, "error": ex}
                errors.append(error)
                if debug:
                    print(error)
        return errors

    def getLookup(self, attrName: str, withDuplicates: bool = False):
        """
        create a lookup dictionary by the given attribute name

        Args:
            attrName(str): the attribute to lookup
            withDuplicates(bool): whether to retain single values or lists

        Return:
            a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
        """
        return LOD.getLookup(self.getList(), attrName, withDuplicates)

    def getJsonData(self):
        """
        get my Jsondata
        """
        jsonData = {self.listName: self.__dict__[self.listName]}
        return jsonData

    def toJsonAbleValue(self, v):
        """
        make sure we don't store our meta information
        clazz, tableName and listName but just the list we are holding
        """
        if v == self:
            return self.getJsonData()
        else:
            return super().toJsonAbleValue(v)

    def fromJson(self, jsonStr, types=None):
        """
        initialize me from the given JSON string

        Args:
            jsonStr(str): the JSON string
            fixType(Types): the types to be fixed
        """
        lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
        self.setListFromLoD(lod)

    def asJSON(self, asString=True):
        jsonData = self.getJsonData()
        return super().asJSON(asString, data=jsonData)

    def restoreFromJsonFile(self, jsonFile: str) -> list:
        """
        read my list of dicts and restore it
        """
        lod = self.readLodFromJsonFile(jsonFile)
        return self.setListFromLoD(lod)

    def restoreFromJsonStr(self, jsonStr: str) -> list:
        """
        restore me from the given jsonStr

        Args:
            jsonStr(str): the json string to restore me from
        """
        lod = self.readLodFromJsonStr(jsonStr)
        return self.setListFromLoD(lod)

    def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"):
        """
        read the list of dicts from the given jsonFile

        Args:
            jsonFile(string): the jsonFile to read from

        Returns:
            list: a list of dicts
        """
        jsonFile = self.checkExtension(jsonFile, extension)
        jsonStr = JSONAble.readJsonFromFile(jsonFile)
        lod = self.readLodFromJsonStr(jsonStr)
        return lod

    def readLodFromJsonStr(self, jsonStr) -> list:
        """
        restore me from the given jsonStr

        Args:
            storeFilePrefix(string): the prefix for the JSON file name
        """
        if self.clazz is None:
            typeSamples = self.getJsonTypeSamples()
        else:
            typeSamples = self.clazz.getSamples()
        if typeSamples is None:
            types = None
        else:
            types = Types(
                self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes
            )
            types.getTypes(self.listName, typeSamples, len(typeSamples))
        lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
        return lod

`init(listName=None, clazz=None, tableName=None, initList=True, handleInvalidListTypes=False, filterInvalidListTypes=False)`

Constructor

Parameters:

Name	Description	Default
`listName(str)`	the name of the list attribute to be used for storing the List	required
`clazz(class)`	a class to be used for Object relational mapping (if any)	required
`tableName(str)`	the name of the "table" to be used	required
`initList(bool)`	True if the list should be initialized	required
`handleInvalidListTypes(bool)`	True if invalidListTypes should be converted or filtered	required
`filterInvalidListTypes(bool)`	True if invalidListTypes should be deleted	required

Source code in lodstorage/jsonable.py

def __init__(
    self,
    listName: str = None,
    clazz=None,
    tableName: str = None,
    initList: bool = True,
    handleInvalidListTypes=False,
    filterInvalidListTypes=False,
):
    """
    Constructor

    Args:
        listName(str): the name of the list attribute to be used for storing the List
        clazz(class): a class to be used for Object relational mapping (if any)
        tableName(str): the name of the "table" to be used
        initList(bool): True if the list should be initialized
        handleInvalidListTypes(bool): True if invalidListTypes should be converted or filtered
        filterInvalidListTypes(bool): True if invalidListTypes should be deleted
    """
    self.clazz = clazz
    self.handleInvalidListTypes = handleInvalidListTypes
    self.filterInvalidListTypes = filterInvalidListTypes
    if listName is None:
        if self.clazz is not None:
            listName = self.clazz.getPluralname()
        else:
            listName = self.__class__.name.lower()
    self.listName = listName
    if tableName is None:
        self.tableName = listName
    else:
        self.tableName = tableName
    if initList:
        self.__dict__[self.listName] = []

`fromJson(jsonStr, types=None)`

initialize me from the given JSON string

Parameters:

Name	Type	Description	Default
`jsonStr(str)`		the JSON string	required
`fixType(Types)`		the types to be fixed	required

Source code in lodstorage/jsonable.py

def fromJson(self, jsonStr, types=None):
    """
    initialize me from the given JSON string

    Args:
        jsonStr(str): the JSON string
        fixType(Types): the types to be fixed
    """
    lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
    self.setListFromLoD(lod)

`fromLoD(lod, append=True, debug=False)`

load my entityList from the given list of dicts

Parameters:

Name	Type	Description	Default
`lod(list)`		the list of dicts to load	required
`append(bool)`		if True append to my existing entries	required

Return

list: a list of errors (if any)

Source code in lodstorage/jsonable.py

def fromLoD(self, lod, append: bool = True, debug: bool = False):
    """
    load my entityList from the given list of dicts

    Args:
        lod(list): the list of dicts to load
        append(bool): if True append to my existing entries

    Return:
        list: a list of errors (if any)

    """
    errors = []
    entityList = self.getList()
    if not append:
        del entityList[:]
    if self.handleInvalidListTypes:
        LOD.handleListTypes(lod=lod, doFilter=self.filterInvalidListTypes)

    for record in lod:
        # call the constructor to get a new instance
        try:
            entity = self.clazz()
            entity.fromDict(record)
            entityList.append(entity)
        except Exception as ex:
            error = {self.listName: record, "error": ex}
            errors.append(error)
            if debug:
                print(error)
    return errors

`getJsonData()`

get my Jsondata

Source code in lodstorage/jsonable.py

def getJsonData(self):
    """
    get my Jsondata
    """
    jsonData = {self.listName: self.__dict__[self.listName]}
    return jsonData

`getList()`

get my list

Source code in lodstorage/jsonable.py

def getList(self):
    """
    get my list
    """
    return self.__dict__[self.listName]

`getLoDfromJson(jsonStr, types=None, listName=None)`

get a list of Dicts form the given JSON String

Parameters:

Name	Type	Description	Default
`jsonStr(str)`		the JSON string	required
`fixType(Types)`		the types to be fixed	required

Returns: list: a list of dicts

Source code in lodstorage/jsonable.py

def getLoDfromJson(self, jsonStr: str, types=None, listName: str = None):
    """
    get a list of Dicts form the given JSON String

    Args:
        jsonStr(str): the JSON string
        fixType(Types): the types to be fixed
    Returns:
        list: a list of dicts
    """
    # read a data structe from the given JSON string
    lodOrDict = json.loads(jsonStr)
    # it should be a list only of dict with my list
    if not isinstance(lodOrDict, dict) and listName is not None:
        lod = lodOrDict
    else:
        if self.listName in lodOrDict:
            # get the relevant list of dicts
            lod = lodOrDict[self.listName]
        else:
            msg = f"invalid JSON for getLoD from Json\nexpecting a list of dicts or a dict with '{self.listName}' as list\nfound a dict with keys: {lodOrDict.keys()} instead"
            raise Exception(msg)
    if types is not None:
        types.fixTypes(lod, self.listName)
    return lod

`getLookup(attrName, withDuplicates=False)`

create a lookup dictionary by the given attribute name

Parameters:

Name	Type	Description	Default
`attrName(str)`		the attribute to lookup	required
`withDuplicates(bool)`		whether to retain single values or lists	required

Return

a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates

Source code in lodstorage/jsonable.py

def getLookup(self, attrName: str, withDuplicates: bool = False):
    """
    create a lookup dictionary by the given attribute name

    Args:
        attrName(str): the attribute to lookup
        withDuplicates(bool): whether to retain single values or lists

    Return:
        a dictionary for lookup or a tuple dictionary,list of duplicates depending on withDuplicates
    """
    return LOD.getLookup(self.getList(), attrName, withDuplicates)

`readLodFromJsonFile(jsonFile, extension='.json')`

read the list of dicts from the given jsonFile

Parameters:

Name	Type	Description	Default
`jsonFile(string)`		the jsonFile to read from	required

Returns:

Name	Type	Description
`list`		a list of dicts

Source code in lodstorage/jsonable.py

def readLodFromJsonFile(self, jsonFile: str, extension: str = ".json"):
    """
    read the list of dicts from the given jsonFile

    Args:
        jsonFile(string): the jsonFile to read from

    Returns:
        list: a list of dicts
    """
    jsonFile = self.checkExtension(jsonFile, extension)
    jsonStr = JSONAble.readJsonFromFile(jsonFile)
    lod = self.readLodFromJsonStr(jsonStr)
    return lod

`readLodFromJsonStr(jsonStr)`

restore me from the given jsonStr

Parameters:

Name	Type	Description	Default
`storeFilePrefix(string)`		the prefix for the JSON file name	required

Source code in lodstorage/jsonable.py

def readLodFromJsonStr(self, jsonStr) -> list:
    """
    restore me from the given jsonStr

    Args:
        storeFilePrefix(string): the prefix for the JSON file name
    """
    if self.clazz is None:
        typeSamples = self.getJsonTypeSamples()
    else:
        typeSamples = self.clazz.getSamples()
    if typeSamples is None:
        types = None
    else:
        types = Types(
            self.listName, warnOnUnsupportedTypes=not self.handleInvalidListTypes
        )
        types.getTypes(self.listName, typeSamples, len(typeSamples))
    lod = self.getLoDfromJson(jsonStr, types, listName=self.listName)
    return lod

`restoreFromJsonFile(jsonFile)`

read my list of dicts and restore it

Source code in lodstorage/jsonable.py

def restoreFromJsonFile(self, jsonFile: str) -> list:
    """
    read my list of dicts and restore it
    """
    lod = self.readLodFromJsonFile(jsonFile)
    return self.setListFromLoD(lod)

`restoreFromJsonStr(jsonStr)`

restore me from the given jsonStr

Parameters:

Name	Type	Description	Default
`jsonStr(str)`		the json string to restore me from	required

Source code in lodstorage/jsonable.py

def restoreFromJsonStr(self, jsonStr: str) -> list:
    """
    restore me from the given jsonStr

    Args:
        jsonStr(str): the json string to restore me from
    """
    lod = self.readLodFromJsonStr(jsonStr)
    return self.setListFromLoD(lod)

`setListFromLoD(lod)`

set my list from the given list of dicts

Returns:

Name	Type	Description
`list`	`list`	a list of dicts if no clazz is set otherwise a list of objects

Source code in lodstorage/jsonable.py

def setListFromLoD(self, lod: list) -> list:
    """
    set my list from the given list of dicts

    Args:
        lod(list) a raw record list of dicts

    Returns:
        list: a list of dicts if no clazz is set
            otherwise a list of objects
    """
    # non OO mode
    if self.clazz is None:
        result = lod
        self.__dict__[self.listName] = result
    else:
        # ORM mode
        # TODO - handle errors
        self.fromLoD(lod, append=False)
    return self.getList()

`toJsonAbleValue(v)`

make sure we don't store our meta information clazz, tableName and listName but just the list we are holding

Source code in lodstorage/jsonable.py

def toJsonAbleValue(self, v):
    """
    make sure we don't store our meta information
    clazz, tableName and listName but just the list we are holding
    """
    if v == self:
        return self.getJsonData()
    else:
        return super().toJsonAbleValue(v)

`JSONAbleSettings`

settings for JSONAble - put in a separate class so they would not be serialized

Source code in lodstorage/jsonable.py

class JSONAbleSettings:
    """
    settings for JSONAble - put in a separate class so they would not be
    serialized
    """

    indent = 4
    """
    regular expression to be used for conversion from singleQuote to doubleQuote
    see https://stackoverflow.com/a/50257217/1497139
    """
    singleQuoteRegex = re.compile("(?<!\\\\)'")

`indent = 4` `class-attribute` `instance-attribute`

regular expression to be used for conversion from singleQuote to doubleQuote see https://stackoverflow.com/a/50257217/1497139

`Types`

Bases: JSONAble

Types

holds entity meta Info

:ivar name(string): entity name = table name

Source code in lodstorage/jsonable.py

class Types(JSONAble):
    """
    Types

    holds entity meta Info

    :ivar name(string): entity name = table name
    """

    typeName2Type = {
        "bool": bool,
        "date": datetime.date,
        "datetime": datetime.datetime,
        "float": float,
        "int": int,
        "str": str,
    }

    def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False):
        """
        Constructor

        Args:
            name(str): the name of the type map
            warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
            debug(bool): if True - debugging information should be shown
        """
        self.name = name
        self.warnOnUnsupportedTypes = warnOnUnsupportedTypes
        self.debug = debug
        self.typeMap = {}

    @staticmethod
    def forTable(
        instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False
    ):
        """
        get the types for the list of Dicts (table) in the given instance with the given listName
        Args:
            instance(object): the instance to inspect
            listName(string): the list of dicts to inspect
            warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
            debug(bool): True if debuggin information should be shown

        Returns:
            Types: a types object
        """
        clazz = type(instance)
        types = Types(
            clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug
        )
        types.getTypes(listName, instance.__dict__[listName])
        return types

    def addType(self, listName, field, valueType):
        """
        add the python type for the given field to the typeMap

        Args:
           listName(string): the name of the list of the field
           field(string): the name of the field

           valueType(type): the python type of the field
        """
        if listName not in self.typeMap:
            self.typeMap[listName] = {}
        typeMap = self.typeMap[listName]
        if not field in typeMap:
            typeMap[field] = valueType

    def getTypes(self, listName: str, sampleRecords: list, limit: int = 10):
        """
        determine the types for the given sample records

        Args:
            listName(str): the name of the list
            sampleRecords(list): a list of items
            limit(int): the maximum number of items to check
        """
        for sampleRecord in sampleRecords[:limit]:
            items = sampleRecord.items()
            self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)

    def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False):
        """
        get the types for the given items
        side effect is setting my types

        Args:
            listName(str): the name of the list
            items(list): a list of items
            warnOnNone(bool): if TRUE warn if an item value is None

        """
        for key, value in items:
            valueType = None
            if value is None:
                if warnOnNone and self.debug:
                    print(
                        f"Warning sampleRecord field {key} is None - using string as type"
                    )
                    valueType = str
            else:
                valueType = type(value)
            if valueType == str:
                pass
            elif valueType == int:
                pass
            elif valueType == float:
                pass
            elif valueType == bool:
                pass
            elif valueType == datetime.date:
                pass
            elif valueType == datetime.datetime:
                pass
            else:
                if valueType is not None:
                    msg = f"warning: unsupported type {str(valueType)} for field {key}"
                    if self.debug and self.warnOnUnsupportedTypes:
                        print(msg)
            if valueType is not None:
                self.addType(listName, key, valueType.__name__)

    def fixTypes(self, lod: list, listName: str):
        """
        fix the types in the given data structure

        Args:
            lod(list): a list of dicts
            listName(str): the types to lookup by list name
        """
        for listName in self.typeMap:
            self.fixListOfDicts(self.typeMap[listName], lod)

    def getType(self, typeName):
        """
        get the type for the given type name
        """
        if typeName in Types.typeName2Type:
            return Types.typeName2Type[typeName]
        else:
            if self.debug:
                print("Warning unsupported type %s" % typeName)
            return None

    def fixListOfDicts(self, typeMap, listOfDicts):
        """
        fix the type in the given list of Dicts
        """
        for record in listOfDicts:
            for keyValue in record.items():
                key, value = keyValue
                if value is None:
                    record[key] = None
                elif key in typeMap:
                    valueType = self.getType(typeMap[key])
                    if valueType == bool:
                        if type(value) == str:
                            b = value in ["True", "TRUE", "true"]
                        else:
                            b = value
                        record[key] = b
                    elif valueType == datetime.date:
                        dt = datetime.datetime.strptime(value, "%Y-%m-%d")
                        record[key] = dt.date()
                    elif valueType == datetime.datetime:
                        # see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date
                        if isinstance(value, str):
                            if sys.version_info >= (3, 7):
                                dtime = datetime.datetime.fromisoformat(value)
                            else:
                                dtime = datetime.datetime.strptime(
                                    value, "%Y-%m-%dT%H:%M:%S.%f"
                                )
                        else:
                            # TODO: error handling
                            dtime = None
                        record[key] = dtime

`init(name, warnOnUnsupportedTypes=True, debug=False)`

Constructor

Parameters:

Name	Description	Default
`name(str)`	the name of the type map	required
`warnOnUnsupportedTypes(bool)`	if TRUE warn if an item value has an unsupported type	required
`debug(bool)`	if True - debugging information should be shown	required

Source code in lodstorage/jsonable.py

def __init__(self, name: str, warnOnUnsupportedTypes=True, debug=False):
    """
    Constructor

    Args:
        name(str): the name of the type map
        warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
        debug(bool): if True - debugging information should be shown
    """
    self.name = name
    self.warnOnUnsupportedTypes = warnOnUnsupportedTypes
    self.debug = debug
    self.typeMap = {}

`addType(listName, field, valueType)`

add the python type for the given field to the typeMap

Parameters:

Name	Description	Default
`listName(string)`	the name of the list of the field	required
`field(string)`	the name of the field	required
`valueType(type)`	the python type of the field	required

Source code in lodstorage/jsonable.py

def addType(self, listName, field, valueType):
    """
    add the python type for the given field to the typeMap

    Args:
       listName(string): the name of the list of the field
       field(string): the name of the field

       valueType(type): the python type of the field
    """
    if listName not in self.typeMap:
        self.typeMap[listName] = {}
    typeMap = self.typeMap[listName]
    if not field in typeMap:
        typeMap[field] = valueType

`fixListOfDicts(typeMap, listOfDicts)`

fix the type in the given list of Dicts

Source code in lodstorage/jsonable.py

def fixListOfDicts(self, typeMap, listOfDicts):
    """
    fix the type in the given list of Dicts
    """
    for record in listOfDicts:
        for keyValue in record.items():
            key, value = keyValue
            if value is None:
                record[key] = None
            elif key in typeMap:
                valueType = self.getType(typeMap[key])
                if valueType == bool:
                    if type(value) == str:
                        b = value in ["True", "TRUE", "true"]
                    else:
                        b = value
                    record[key] = b
                elif valueType == datetime.date:
                    dt = datetime.datetime.strptime(value, "%Y-%m-%d")
                    record[key] = dt.date()
                elif valueType == datetime.datetime:
                    # see https://stackoverflow.com/questions/127803/how-do-i-parse-an-iso-8601-formatted-date
                    if isinstance(value, str):
                        if sys.version_info >= (3, 7):
                            dtime = datetime.datetime.fromisoformat(value)
                        else:
                            dtime = datetime.datetime.strptime(
                                value, "%Y-%m-%dT%H:%M:%S.%f"
                            )
                    else:
                        # TODO: error handling
                        dtime = None
                    record[key] = dtime

`fixTypes(lod, listName)`

fix the types in the given data structure

Parameters:

Name	Type	Description	Default
`lod(list)`		a list of dicts	required
`listName(str)`		the types to lookup by list name	required

Source code in lodstorage/jsonable.py

def fixTypes(self, lod: list, listName: str):
    """
    fix the types in the given data structure

    Args:
        lod(list): a list of dicts
        listName(str): the types to lookup by list name
    """
    for listName in self.typeMap:
        self.fixListOfDicts(self.typeMap[listName], lod)

`forTable(instance, listName, warnOnUnsupportedTypes=True, debug=False)` `staticmethod`

get the types for the list of Dicts (table) in the given instance with the given listName Args: instance(object): the instance to inspect listName(string): the list of dicts to inspect warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type debug(bool): True if debuggin information should be shown

Returns:

Name	Type	Description
`Types`		a types object

Source code in lodstorage/jsonable.py

@staticmethod
def forTable(
    instance, listName: str, warnOnUnsupportedTypes: bool = True, debug=False
):
    """
    get the types for the list of Dicts (table) in the given instance with the given listName
    Args:
        instance(object): the instance to inspect
        listName(string): the list of dicts to inspect
        warnOnUnsupportedTypes(bool): if TRUE warn if an item value has an unsupported type
        debug(bool): True if debuggin information should be shown

    Returns:
        Types: a types object
    """
    clazz = type(instance)
    types = Types(
        clazz.__name__, warnOnUnsupportedTypes=warnOnUnsupportedTypes, debug=debug
    )
    types.getTypes(listName, instance.__dict__[listName])
    return types

`getType(typeName)`

get the type for the given type name

Source code in lodstorage/jsonable.py

def getType(self, typeName):
    """
    get the type for the given type name
    """
    if typeName in Types.typeName2Type:
        return Types.typeName2Type[typeName]
    else:
        if self.debug:
            print("Warning unsupported type %s" % typeName)
        return None

`getTypes(listName, sampleRecords, limit=10)`

determine the types for the given sample records

Parameters:

Name	Description	Default
`listName(str)`	the name of the list	required
`sampleRecords(list)`	a list of items	required
`limit(int)`	the maximum number of items to check	required

Source code in lodstorage/jsonable.py

def getTypes(self, listName: str, sampleRecords: list, limit: int = 10):
    """
    determine the types for the given sample records

    Args:
        listName(str): the name of the list
        sampleRecords(list): a list of items
        limit(int): the maximum number of items to check
    """
    for sampleRecord in sampleRecords[:limit]:
        items = sampleRecord.items()
        self.getTypesForItems(listName, items, warnOnNone=len(sampleRecords) == 1)

`getTypesForItems(listName, items, warnOnNone=False)`

get the types for the given items side effect is setting my types

Parameters:

Name	Description	Default
`listName(str)`	the name of the list	required
`items(list)`	a list of items	required
`warnOnNone(bool)`	if TRUE warn if an item value is None	required

Source code in lodstorage/jsonable.py

def getTypesForItems(self, listName: str, items: list, warnOnNone: bool = False):
    """
    get the types for the given items
    side effect is setting my types

    Args:
        listName(str): the name of the list
        items(list): a list of items
        warnOnNone(bool): if TRUE warn if an item value is None

    """
    for key, value in items:
        valueType = None
        if value is None:
            if warnOnNone and self.debug:
                print(
                    f"Warning sampleRecord field {key} is None - using string as type"
                )
                valueType = str
        else:
            valueType = type(value)
        if valueType == str:
            pass
        elif valueType == int:
            pass
        elif valueType == float:
            pass
        elif valueType == bool:
            pass
        elif valueType == datetime.date:
            pass
        elif valueType == datetime.datetime:
            pass
        else:
            if valueType is not None:
                msg = f"warning: unsupported type {str(valueType)} for field {key}"
                if self.debug and self.warnOnUnsupportedTypes:
                    print(msg)
        if valueType is not None:
            self.addType(listName, key, valueType.__name__)

`jsonpicklemixin`

`JsonPickleMixin`

Bases: object

allow reading and writing derived objects from a jsonpickle file

Source code in lodstorage/jsonpicklemixin.py

class JsonPickleMixin(object):
    """
    allow reading and writing derived objects from a jsonpickle file
    """

    debug = False

    @staticmethod
    def checkExtension(jsonFile: str, extension: str = ".json") -> str:
        """
        make sure the jsonFile has the given extension e.g. ".json"

        Args:
            jsonFile(str): the jsonFile name - potentially without ".json" suffix

        Returns:
            str: the jsonFile name with ".json" as an extension guaranteed
        """
        if not jsonFile.endswith(extension):
            jsonFile = f"{jsonFile}{extension}"
        return jsonFile

    # read me from a json pickle file
    @staticmethod
    def readJsonPickle(jsonFileName, extension=".jsonpickle"):
        """
        Args:
            jsonFileName(str): name of the file (optionally without ".json" postfix)
            extension(str): default file extension
        """
        jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
        # is there a jsonFile for the given name
        if os.path.isfile(jsonFileName):
            if JsonPickleMixin.debug:
                print("reading %s" % (jsonFileName))
            with open(jsonFileName) as jsonFile:
                json = jsonFile.read()
            result = jsonpickle.decode(json)
            if JsonPickleMixin.debug:
                print(json)
                print(result)
            return result
        else:
            return None

    def asJsonPickle(self) -> str:
        """
        convert me to JSON

        Returns:
            str: a JSON String with my JSON representation
        """
        json = jsonpickle.encode(self)
        return json

    def writeJsonPickle(self, jsonFileName: str, extension: str = ".jsonpickle"):
        """
        write me to the json file with the given name (optionally without postfix)

        Args:
            jsonFileName(str): name of the file (optionally without ".json" postfix)
            extension(str): default file extension
        """
        jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
        json = self.asJsonPickle()
        if JsonPickleMixin.debug:
            print("writing %s" % (jsonFileName))
            print(json)
            print(self)
        jsonFile = open(jsonFileName, "w")
        jsonFile.write(json)
        jsonFile.close()

`asJsonPickle()`

convert me to JSON

Returns:

Name	Type	Description
`str`	`str`	a JSON String with my JSON representation

Source code in lodstorage/jsonpicklemixin.py

def asJsonPickle(self) -> str:
    """
    convert me to JSON

    Returns:
        str: a JSON String with my JSON representation
    """
    json = jsonpickle.encode(self)
    return json

`checkExtension(jsonFile, extension='.json')` `staticmethod`

make sure the jsonFile has the given extension e.g. ".json"

Parameters:

Name	Type	Description	Default
`jsonFile(str)`		the jsonFile name - potentially without ".json" suffix	required

Returns:

Name	Type	Description
`str`	`str`	the jsonFile name with ".json" as an extension guaranteed

Source code in lodstorage/jsonpicklemixin.py

@staticmethod
def checkExtension(jsonFile: str, extension: str = ".json") -> str:
    """
    make sure the jsonFile has the given extension e.g. ".json"

    Args:
        jsonFile(str): the jsonFile name - potentially without ".json" suffix

    Returns:
        str: the jsonFile name with ".json" as an extension guaranteed
    """
    if not jsonFile.endswith(extension):
        jsonFile = f"{jsonFile}{extension}"
    return jsonFile

`readJsonPickle(jsonFileName, extension='.jsonpickle')` `staticmethod`

Parameters:

Name	Type	Description	Default
`jsonFileName(str)`		name of the file (optionally without ".json" postfix)	required
`extension(str)`		default file extension	required

Source code in lodstorage/jsonpicklemixin.py

@staticmethod
def readJsonPickle(jsonFileName, extension=".jsonpickle"):
    """
    Args:
        jsonFileName(str): name of the file (optionally without ".json" postfix)
        extension(str): default file extension
    """
    jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
    # is there a jsonFile for the given name
    if os.path.isfile(jsonFileName):
        if JsonPickleMixin.debug:
            print("reading %s" % (jsonFileName))
        with open(jsonFileName) as jsonFile:
            json = jsonFile.read()
        result = jsonpickle.decode(json)
        if JsonPickleMixin.debug:
            print(json)
            print(result)
        return result
    else:
        return None

`writeJsonPickle(jsonFileName, extension='.jsonpickle')`

write me to the json file with the given name (optionally without postfix)

Parameters:

Name	Type	Description	Default
`jsonFileName(str)`		name of the file (optionally without ".json" postfix)	required
`extension(str)`		default file extension	required

Source code in lodstorage/jsonpicklemixin.py

def writeJsonPickle(self, jsonFileName: str, extension: str = ".jsonpickle"):
    """
    write me to the json file with the given name (optionally without postfix)

    Args:
        jsonFileName(str): name of the file (optionally without ".json" postfix)
        extension(str): default file extension
    """
    jsonFileName = JsonPickleMixin.checkExtension(jsonFileName, extension)
    json = self.asJsonPickle()
    if JsonPickleMixin.debug:
        print("writing %s" % (jsonFileName))
        print(json)
        print(self)
    jsonFile = open(jsonFileName, "w")
    jsonFile.write(json)
    jsonFile.close()

`linkml`

Created on 2024-01-28

@author: wf

`Class`

Represents a class in the LinkML schema.

Source code in lodstorage/linkml.py

@lod_storable
class Class:
    """
    Represents a class in the LinkML schema.
    """

    description: str
    slots: List[Slot]

`PythonTypes`

python type handling

Source code in lodstorage/linkml.py

class PythonTypes:
    """
    python type handling
    """

    # Define a mapping from Python types to LinkML ranges
    to_linkml_ranges = {
        str: "string",
        int: "integer",
        float: "float",
        bool: "boolean",
        list: "list",
        dict: "dictionary",
    }
    # Mapping from Python types to RDF (XSD) datatypes
    to_rdf_datatypes = {
        str: XSD.string,
        int: XSD.integer,
        float: XSD.float,
        bool: XSD.boolean,
        # Add more mappings if needed
    }

    @classmethod
    def get_linkml_range(cls, ptype: Type) -> str:
        """
        Determines the LinkML range for a given Python type.

        Args:
            ptype (Type): The Python type for which the LinkML range is required.

        Returns:
            str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
        """
        return cls.to_linkml_ranges.get(ptype, "string")

    @classmethod
    def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
        """
        Determines the RDF (XSD) datatype for a given Python type.

        Args:
            ptype (Type): The Python type for which the RDF (XSD) datatype is required.

        Returns:
            XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
        """
        return cls.to_rdf_datatypes.get(ptype)

`get_linkml_range(ptype)` `classmethod`

Determines the LinkML range for a given Python type.

Parameters:

Name	Type	Description	Default
`ptype`	`Type`	The Python type for which the LinkML range is required.	required

Returns:

Name	Type	Description
`str`	`str`	The corresponding LinkML range as a string. Defaults to "string" if the type is not found.

Source code in lodstorage/linkml.py

@classmethod
def get_linkml_range(cls, ptype: Type) -> str:
    """
    Determines the LinkML range for a given Python type.

    Args:
        ptype (Type): The Python type for which the LinkML range is required.

    Returns:
        str: The corresponding LinkML range as a string. Defaults to "string" if the type is not found.
    """
    return cls.to_linkml_ranges.get(ptype, "string")

`get_rdf_datatype(ptype)` `classmethod`

Determines the RDF (XSD) datatype for a given Python type.

Parameters:

Name	Type	Description	Default
`ptype`	`Type`	The Python type for which the RDF (XSD) datatype is required.	required

Returns:

Name	Type	Description
`XSD`	`Optional[XSD]`	The corresponding RDF (XSD) datatype. Returns None if the type is not found.

Source code in lodstorage/linkml.py

@classmethod
def get_rdf_datatype(cls, ptype: Type) -> Optional[XSD]:
    """
    Determines the RDF (XSD) datatype for a given Python type.

    Args:
        ptype (Type): The Python type for which the RDF (XSD) datatype is required.

    Returns:
        XSD: The corresponding RDF (XSD) datatype. Returns None if the type is not found.
    """
    return cls.to_rdf_datatypes.get(ptype)

`Schema`

Represents the entire LinkML schema.

Source code in lodstorage/linkml.py

@lod_storable
class Schema:
    """
    Represents the entire LinkML schema.
    """

    name: str
    id: str
    description: str
    title: Optional[str] = None
    version: Optional[str] = None
    license: Optional[str] = None

    default_prefix: Optional[str] = None

    prefixes: Dict[str, str] = field(default_factory=dict)
    imports: List[str] = field(default_factory=list)
    default_range: str = "string"
    classes: Dict[str, Class] = field(default_factory=dict)
    slots: Dict[str, Slot] = field(default_factory=dict)
    types: Dict[str, Type] = field(default_factory=dict)

    def __post_init__(self):
        if not self.title:
            self.title = self.name

`Slot`

Represents a slot in the LinkML schema, equivalent to a field or property.

Source code in lodstorage/linkml.py

@lod_storable
class Slot:
    """
    Represents a slot in the LinkML schema, equivalent to a field or property.
    """

    description: str
    range: str = "string"
    multivalued: bool = False
    identifier: bool = False

`linkml_gen`

Created on 2024-01-21

@author: wf

`LinkMLGen`

Class for generating LinkML YAML schema from Python data models using dataclasses.

Source code in lodstorage/linkml_gen.py

class LinkMLGen:
    """
    Class for generating LinkML YAML schema from Python data models using dataclasses.
    """

    def __init__(self, schema: Schema):
        """
        Initialize the LinkMLGen.

        Args:
            schema (Schema): The LinkML schema to be generated.
        """
        self.schema = schema

    def gen_schema(self, data_model_class) -> Schema:
        # Use DocstringParser to extract class description
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_class.__doc__)

        class_name = data_model_class.__name__
        new_class = Class(description=class_description, slots=[])

        # Iterate over the fields of the dataclass
        for field_info in fields(data_model_class):
            attr_name = field_info.name
            attr_type = field_info.type

            # Handle Optional and List types
            is_optional = False
            is_list = False
            content_type = None
            if hasattr(attr_type, "__origin__"):
                if attr_type.__origin__ is Union and type(None) in attr_type.__args__:
                    is_optional = True
                    attr_type = [t for t in attr_type.__args__ if t is not type(None)][
                        0
                    ]  # unwrap Optional type
                elif attr_type.__origin__ is list:
                    is_list = True
                    content_type = attr_type.__args__[0]  # unwrap List type
                elif attr_type.__origin__ is dict:
                    # Assuming dictionary values are of interest, keys are strings
                    content_type = attr_type.__args__[
                        1
                    ]  # unwrap Dict type, focusing on value type

            # Check and handle nested dataclasses for lists or dicts
            if is_dataclass(content_type):
                # Recursive call to handle nested dataclass
                self.gen_schema(content_type)
                # Set the range to the name of the dataclass
                linkml_range = (
                    content_type.__name__
                )  # Use the name of the dataclass as the range
            elif is_list:
                # If it's a list, get the LinkML range for the base type
                # Use self.get_linkml_range to ensure consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(content_type)
            else:
                # For non-list and non-dataclass types, use self.get_linkml_range for consistent type mapping
                linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Extract description from doc_attributes
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )

            # Create a new slot for the field
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=is_list
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        self.schema.classes[class_name] = new_class
        return self.schema

    def gen_schema_from_instance(self, data_model_instance) -> Schema:
        """
        Generate a LinkML YAML schema from a Python data model using dataclasses.

        Args:
            data_model_instance: An instance of the Python data model.

        Returns:
            Schema: The LinkML schema generated from the data model.
        """
        # Use DocstringParser to extract class description and attributes
        parser = DocstringParser()
        class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

        class_name = data_model_instance.__class__.__name__
        new_class = Class(description=class_description, slots=[])

        for field_info in fields(data_model_instance):
            attr_name = field_info.name
            attr_type = field_info.type

            # Extract field type/range
            linkml_range = PythonTypes.get_linkml_range(attr_type)

            # Check values for multivalued and type consistency
            attr_value = getattr(data_model_instance, attr_name)
            multivalued, actual_type = self.check_value(attr_value)

            # Ensure documentation, declaration, and value type are consistent
            self.ensure_consistency(
                attr_name, linkml_range, actual_type, doc_attributes
            )

            # Prepare slot
            description = doc_attributes.get(attr_name, {}).get(
                "description", f"{attr_name} - missing description"
            )
            if attr_name not in self.schema.slots:
                new_slot = Slot(
                    description=description, range=linkml_range, multivalued=multivalued
                )
                self.schema.slots[attr_name] = new_slot
                new_class.slots.append(attr_name)

            if multivalued:
                # recursive call if type of list or dict is a dataclass
                if hasattr(attr_type, "__args__"):
                    content_type = attr_type.__args__[
                        0
                    ]  # Get the declared content type
                    if is_dataclass(content_type):
                        self.gen_schema(content_type)

        self.schema.classes[class_name] = new_class
        return self.schema

    def check_value(self, value):
        # Method to check if the value is multivalued and determine its type
        multivalued = isinstance(value, (Iterable, Mapping)) and not isinstance(
            value, (str, bytes)
        )
        value_type = type(value).__name__
        return multivalued, value_type

    def ensure_consistency(self, name, declared_type, actual_type, doc_attributes):
        # Adjust this method to handle complex types like list, dict, etc.

        # Check if the actual type is a list or dict, and if so, get the type of its elements
        if actual_type == "list" or actual_type == "dict":
            # You may need a more complex logic here to handle lists of custom dataclasses
            # For simplicity, let's assume it's a list of strings for now
            actual_type = "string"

        # Now compare the adjusted actual type with the declared type
        if declared_type != actual_type:
            raise ValueError(
                f"Type mismatch for '{name}': declared as '{declared_type}', actual type is '{actual_type}'"
            )

        # Check for documentation
        if name not in doc_attributes:
            raise ValueError(f"Missing documentation for field '{name}'")

`init(schema)`

Initialize the LinkMLGen.

Parameters:

Name	Type	Description	Default
`schema`	`Schema`	The LinkML schema to be generated.	required

Source code in lodstorage/linkml_gen.py

def __init__(self, schema: Schema):
    """
    Initialize the LinkMLGen.

    Args:
        schema (Schema): The LinkML schema to be generated.
    """
    self.schema = schema

`gen_schema_from_instance(data_model_instance)`

Generate a LinkML YAML schema from a Python data model using dataclasses.

Parameters:

Name	Type	Description	Default
`data_model_instance`		An instance of the Python data model.	required

Returns:

Name	Type	Description
`Schema`	`Schema`	The LinkML schema generated from the data model.

Source code in lodstorage/linkml_gen.py

def gen_schema_from_instance(self, data_model_instance) -> Schema:
    """
    Generate a LinkML YAML schema from a Python data model using dataclasses.

    Args:
        data_model_instance: An instance of the Python data model.

    Returns:
        Schema: The LinkML schema generated from the data model.
    """
    # Use DocstringParser to extract class description and attributes
    parser = DocstringParser()
    class_description, doc_attributes = parser.parse(data_model_instance.__doc__)

    class_name = data_model_instance.__class__.__name__
    new_class = Class(description=class_description, slots=[])

    for field_info in fields(data_model_instance):
        attr_name = field_info.name
        attr_type = field_info.type

        # Extract field type/range
        linkml_range = PythonTypes.get_linkml_range(attr_type)

        # Check values for multivalued and type consistency
        attr_value = getattr(data_model_instance, attr_name)
        multivalued, actual_type = self.check_value(attr_value)

        # Ensure documentation, declaration, and value type are consistent
        self.ensure_consistency(
            attr_name, linkml_range, actual_type, doc_attributes
        )

        # Prepare slot
        description = doc_attributes.get(attr_name, {}).get(
            "description", f"{attr_name} - missing description"
        )
        if attr_name not in self.schema.slots:
            new_slot = Slot(
                description=description, range=linkml_range, multivalued=multivalued
            )
            self.schema.slots[attr_name] = new_slot
            new_class.slots.append(attr_name)

        if multivalued:
            # recursive call if type of list or dict is a dataclass
            if hasattr(attr_type, "__args__"):
                content_type = attr_type.__args__[
                    0
                ]  # Get the declared content type
                if is_dataclass(content_type):
                    self.gen_schema(content_type)

    self.schema.classes[class_name] = new_class
    return self.schema

`lod`

Created on 2021-01-31

@author: wf

`LOD`

Bases: object

list of Dict aka Table

Source code in lodstorage/lod.py

class LOD(object):
    """
    list of Dict aka Table
    """

    def __init__(self, name):
        """
        Constructor
        """
        self.name = name
        pass

    @staticmethod
    def getFields(listOfDicts, sampleCount: int = None):
        if sampleCount is None:
            if listOfDicts is None:
                return None
            sampleCount = len(listOfDicts)
        fields = []
        from lodstorage.jsonable import JSONAble

        for row in listOfDicts:
            if isinstance(row, JSONAble):
                row = vars(row)
            for key in row.keys():
                if not key in fields:
                    fields.append(key)
        return fields

    @staticmethod
    def setNone4List(listOfDicts, fields):
        """
        set the given fields to None for the records in the given listOfDicts
        if they are not set
        Args:
            listOfDicts(list): the list of records to work on
            fields(list): the list of fields to set to None
        """
        for record in listOfDicts:
            LOD.setNone(record, fields)

    @staticmethod
    def setNone(record, fields):
        """
        make sure the given fields in the given record are set to none
        Args:
            record(dict): the record to work on
            fields(list): the list of fields to set to None
        """
        for field in fields:
            if not field in record:
                record[field] = None

    """
    https://stackoverflow.com/questions/33542997/python-intersection-of-2-lists-of-dictionaries/33543164
    """

    @staticmethod
    def sortKey(d, key=None):
        """get the sort key for the given dict d with the given key"""
        if key is None:
            # https://stackoverflow.com/a/60765557/1497139
            return hash(tuple(d.items()))
        else:
            return d[key]

    @staticmethod
    def intersect(listOfDict1, listOfDict2, key=None):
        """
        get the  intersection of the two lists of Dicts by the given key
        """
        i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
        i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
        c1 = next(i1)
        c2 = next(i2)
        lr = []
        while True:
            try:
                val1 = LOD.sortKey(c1, key)
                val2 = LOD.sortKey(c2, key)
                if val1 < val2:
                    c1 = next(i1)
                elif val1 > val2:
                    c2 = next(i2)
                else:
                    lr.append(c1)
                    c1 = next(i1)
                    c2 = next(i2)
            except StopIteration:
                break
        return lr

    @staticmethod
    def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
        """
        add a single lookup result

        Args:
            lookup(dict): the lookup map
            duplicates(list): the list of duplicates
            record(dict): the current record
            value(object): the current value to lookup
            withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
            list is created
        """
        if value in lookup:
            if withDuplicates:
                lookupResult = lookup[value]
                lookupResult.append(record)
            else:
                duplicates.append(record)
                return
        else:
            if withDuplicates:
                lookupResult = [record]
            else:
                lookupResult = record
        lookup[value] = lookupResult

    @staticmethod
    def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
        """
        create a lookup dictionary by the given attribute name for the given list of dicts

        Args:
            lod(list): the list of dicts to get the lookup dictionary for
            attrName(str): the attribute to lookup
            withDuplicates(bool): whether to retain single values or lists

        Return:
            a dictionary for lookup
        """
        lookup = {}
        duplicates = []
        for record in lod:
            value = None
            if isinstance(record, dict):
                if attrName in record:
                    value = record[attrName]
            else:
                if hasattr(record, attrName):
                    value = getattr(record, attrName)
            if value is not None:
                if isinstance(value, list):
                    for listValue in value:
                        LOD.addLookup(
                            lookup, duplicates, record, listValue, withDuplicates
                        )
                else:
                    LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
        if withDuplicates:
            return lookup
        else:
            return lookup, duplicates

    @classmethod
    def handleListTypes(cls, lod, doFilter=False, separator=","):
        """
        handle list types in the given list of dicts

        Args:
            cls: this class
            lod(list): a list of dicts
            doFilter(bool): True if records containing lists value items should be filtered
            separator(str): the separator to use when converting lists
        """
        # see https://stackoverflow.com/a/1207485/1497139
        for i in range(len(lod) - 1, -1, -1):
            record = lod[i]
            if isinstance(record, dict):
                for key in record:
                    value = record[key]
                    if isinstance(value, list):
                        if doFilter:
                            del lod[i]
                            continue
                        else:
                            newValue = separator.join(filter(None, value))
                            record[key] = newValue

    @staticmethod
    def filterFields(lod: list, fields: list, reverse: bool = False):
        """
        filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
        fields contained in the list depending on the state of the reverse parameter

        Args:
            lod(list): list of dicts from which the fields should be excluded
            fields(list): list of fields that should be excluded from the lod
            reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

        Returns:
            LoD
        """
        res = []
        for record in lod:
            if reverse:
                recordReduced = {d: record[d] for d in record if d in fields}
            else:
                recordReduced = {d: record[d] for d in record if d not in fields}
            res.append(recordReduced)
        return res

`init(name)`

Constructor

Source code in lodstorage/lod.py

def __init__(self, name):
    """
    Constructor
    """
    self.name = name
    pass

`addLookup(lookup, duplicates, record, value, withDuplicates)` `staticmethod`

add a single lookup result

Parameters:

Name	Description	Default
`lookup(dict)`	the lookup map	required
`duplicates(list)`	the list of duplicates	required
`record(dict)`	the current record	required
`value(object)`	the current value to lookup	required
`withDuplicates(bool)`	if True duplicates should be allowed and lists returned if False a separate duplicates	required

Source code in lodstorage/lod.py

@staticmethod
def addLookup(lookup, duplicates, record, value, withDuplicates: bool):
    """
    add a single lookup result

    Args:
        lookup(dict): the lookup map
        duplicates(list): the list of duplicates
        record(dict): the current record
        value(object): the current value to lookup
        withDuplicates(bool): if True duplicates should be allowed and lists returned if False a separate duplicates
        list is created
    """
    if value in lookup:
        if withDuplicates:
            lookupResult = lookup[value]
            lookupResult.append(record)
        else:
            duplicates.append(record)
            return
    else:
        if withDuplicates:
            lookupResult = [record]
        else:
            lookupResult = record
    lookup[value] = lookupResult

`filterFields(lod, fields, reverse=False)` `staticmethod`

filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the fields contained in the list depending on the state of the reverse parameter

Parameters:

Name	Description	Default
`lod(list)`	list of dicts from which the fields should be excluded	required
`fields(list)`	list of fields that should be excluded from the lod	required
`reverse(bool)`	If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.	required

Returns:

Type	Description
	LoD

Source code in lodstorage/lod.py

@staticmethod
def filterFields(lod: list, fields: list, reverse: bool = False):
    """
    filter the given LoD with the given list of fields by either limiting the LoD to the fields or removing the
    fields contained in the list depending on the state of the reverse parameter

    Args:
        lod(list): list of dicts from which the fields should be excluded
        fields(list): list of fields that should be excluded from the lod
        reverse(bool): If True limit dict to the list of given fields. Otherwise exclude the fields from the dict.

    Returns:
        LoD
    """
    res = []
    for record in lod:
        if reverse:
            recordReduced = {d: record[d] for d in record if d in fields}
        else:
            recordReduced = {d: record[d] for d in record if d not in fields}
        res.append(recordReduced)
    return res

`getLookup(lod, attrName, withDuplicates=False)` `staticmethod`

create a lookup dictionary by the given attribute name for the given list of dicts

Parameters:

Name	Description	Default
`lod(list)`	the list of dicts to get the lookup dictionary for	required
`attrName(str)`	the attribute to lookup	required
`withDuplicates(bool)`	whether to retain single values or lists	required

Return

a dictionary for lookup

Source code in lodstorage/lod.py

@staticmethod
def getLookup(lod: list, attrName: str, withDuplicates: bool = False):
    """
    create a lookup dictionary by the given attribute name for the given list of dicts

    Args:
        lod(list): the list of dicts to get the lookup dictionary for
        attrName(str): the attribute to lookup
        withDuplicates(bool): whether to retain single values or lists

    Return:
        a dictionary for lookup
    """
    lookup = {}
    duplicates = []
    for record in lod:
        value = None
        if isinstance(record, dict):
            if attrName in record:
                value = record[attrName]
        else:
            if hasattr(record, attrName):
                value = getattr(record, attrName)
        if value is not None:
            if isinstance(value, list):
                for listValue in value:
                    LOD.addLookup(
                        lookup, duplicates, record, listValue, withDuplicates
                    )
            else:
                LOD.addLookup(lookup, duplicates, record, value, withDuplicates)
    if withDuplicates:
        return lookup
    else:
        return lookup, duplicates

`handleListTypes(lod, doFilter=False, separator=',')` `classmethod`

handle list types in the given list of dicts

Parameters:

Name	Description	Default
`cls`	this class	required
`lod(list)`	a list of dicts	required
`doFilter(bool)`	True if records containing lists value items should be filtered	required
`separator(str)`	the separator to use when converting lists	required

Source code in lodstorage/lod.py

@classmethod
def handleListTypes(cls, lod, doFilter=False, separator=","):
    """
    handle list types in the given list of dicts

    Args:
        cls: this class
        lod(list): a list of dicts
        doFilter(bool): True if records containing lists value items should be filtered
        separator(str): the separator to use when converting lists
    """
    # see https://stackoverflow.com/a/1207485/1497139
    for i in range(len(lod) - 1, -1, -1):
        record = lod[i]
        if isinstance(record, dict):
            for key in record:
                value = record[key]
                if isinstance(value, list):
                    if doFilter:
                        del lod[i]
                        continue
                    else:
                        newValue = separator.join(filter(None, value))
                        record[key] = newValue

`intersect(listOfDict1, listOfDict2, key=None)` `staticmethod`

get the intersection of the two lists of Dicts by the given key

Source code in lodstorage/lod.py

@staticmethod
def intersect(listOfDict1, listOfDict2, key=None):
    """
    get the  intersection of the two lists of Dicts by the given key
    """
    i1 = iter(sorted(listOfDict1, key=lambda k: LOD.sortKey(k, key)))
    i2 = iter(sorted(listOfDict2, key=lambda k: LOD.sortKey(k, key)))
    c1 = next(i1)
    c2 = next(i2)
    lr = []
    while True:
        try:
            val1 = LOD.sortKey(c1, key)
            val2 = LOD.sortKey(c2, key)
            if val1 < val2:
                c1 = next(i1)
            elif val1 > val2:
                c2 = next(i2)
            else:
                lr.append(c1)
                c1 = next(i1)
                c2 = next(i2)
        except StopIteration:
            break
    return lr

`setNone(record, fields)` `staticmethod`

make sure the given fields in the given record are set to none Args: record(dict): the record to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py

@staticmethod
def setNone(record, fields):
    """
    make sure the given fields in the given record are set to none
    Args:
        record(dict): the record to work on
        fields(list): the list of fields to set to None
    """
    for field in fields:
        if not field in record:
            record[field] = None

`setNone4List(listOfDicts, fields)` `staticmethod`

set the given fields to None for the records in the given listOfDicts if they are not set Args: listOfDicts(list): the list of records to work on fields(list): the list of fields to set to None

Source code in lodstorage/lod.py

@staticmethod
def setNone4List(listOfDicts, fields):
    """
    set the given fields to None for the records in the given listOfDicts
    if they are not set
    Args:
        listOfDicts(list): the list of records to work on
        fields(list): the list of fields to set to None
    """
    for record in listOfDicts:
        LOD.setNone(record, fields)

`sortKey(d, key=None)` `staticmethod`

get the sort key for the given dict d with the given key

Source code in lodstorage/lod.py

@staticmethod
def sortKey(d, key=None):
    """get the sort key for the given dict d with the given key"""
    if key is None:
        # https://stackoverflow.com/a/60765557/1497139
        return hash(tuple(d.items()))
    else:
        return d[key]

`lod_csv`

`CSV`

Bases: LOD

helper for converting data in csv format to list of dicts (LoD) and vice versa

Source code in lodstorage/lod_csv.py

class CSV(LOD):
    """
    helper for converting data in csv format to list of dicts (LoD) and vice versa
    """

    @staticmethod
    def restoreFromCSVFile(
        filePath: str, headerNames: list = None, withPostfix: bool = False
    ):
        """
        restore LOD from given csv file

        Args:
            filePath(str): file name
            headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

        Returns:
            list of dicts (LoD) containing the content of the given csv file
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = CSV.readFile(filePath)
        lod = CSV.fromCSV(csvStr, headerNames)
        return lod

    @staticmethod
    def fromCSV(
        csvString: str,
        fields: list = None,
        delimiter=",",
        quoting=csv.QUOTE_NONNUMERIC,
        **kwargs
    ):
        """
        convert given csv string to list of dicts (LOD)

        Args:
            csvStr(str): csv string that should be converted to LOD
            headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.

        Returns:
            list of dicts (LoD) containing the content of the given csv string
        """
        csvStream = io.StringIO(csvString)
        reader = csv.DictReader(
            csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
        )
        lod = list(reader)
        CSV.fixTypes(lod)
        return lod

    @staticmethod
    def storeToCSVFile(lod: list, filePath: str, withPostfix: bool = False):
        """
        converts the given lod to CSV file.

        Args:
            lod(list): lod that should be converted to csv file
            filePath(str): file name the csv should be stored to
            withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
        Returns:
            csv string of the given lod
        """
        if not withPostfix:
            filePath += ".csv"
        csvStr = CSV.toCSV(lod)
        CSV.writeFile(csvStr, filePath)

    @staticmethod
    def toCSV(
        lod: list,
        includeFields: list = None,
        excludeFields: list = None,
        delimiter=",",
        quoting=csv.QUOTE_NONNUMERIC,
        **kwargs
    ):
        """
        converts the given lod to CSV string.
        For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

        Args:
            lod(list): lod that should be converted to csv string
            includeFields(list): list of fields that should be included in the csv (positive list)
            excludeFields(list): list of fields that should be excluded from the csv (negative list)
            kwargs: csv dialect parameters
        Returns:
            csv string of the given lod
        """
        if lod is None:
            return ""
        if isinstance(lod[0], JSONAble):
            lod = [vars(d) for d in lod]
        if excludeFields is not None:
            lod = LOD.filterFields(lod, excludeFields)
        if includeFields is None:
            fields = LOD.getFields(lod)
        else:
            fields = includeFields
            lod = LOD.filterFields(lod, includeFields, reverse=True)
        csvStream = io.StringIO()
        dict_writer = csv.DictWriter(
            csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
        )
        dict_writer.writeheader()
        dict_writer.writerows(lod)
        csvString = csvStream.getvalue()
        return csvString

    @staticmethod
    def readFile(filename: str) -> str:
        """
        Reads the given filename and returns it as string
        Args:
            filename: Name of the file that should be returned as string

        Returns:
            Content of the file as string
        """
        with open(filename, "r") as file:
            content = file.read()
        return content

    @staticmethod
    def writeFile(content: str, filename: str) -> str:
        """
        Write the given str to the given filename
        Args:
            content(str): string that should be written into the file
            filename: Name of the file the given str should be written to
        Returns:
            Nothing
        """
        with open(filename, "w") as file:
            file.write(content)

    @staticmethod
    def fixTypes(lod: list):
        """
        fixes the types of the given LoD.

        """
        for record in lod:
            for key, value in record.items():
                # fix empty csv value: "cell1,,cell3" converts the second value to empty string instead of None
                if value == "":
                    record[key] = None

`fixTypes(lod)` `staticmethod`

fixes the types of the given LoD.

Source code in lodstorage/lod_csv.py

@staticmethod
def fixTypes(lod: list):
    """
    fixes the types of the given LoD.

    """
    for record in lod:
        for key, value in record.items():
            # fix empty csv value: "cell1,,cell3" converts the second value to empty string instead of None
            if value == "":
                record[key] = None

`fromCSV(csvString, fields=None, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, **kwargs)` `staticmethod`

convert given csv string to list of dicts (LOD)

Parameters:

Name	Type	Description	Default
`csvStr(str)`		csv string that should be converted to LOD	required
`headerNames(list)`		Names of the headers that should be used. If None it is assumed that the header is given.	required

Returns:

Type	Description
	list of dicts (LoD) containing the content of the given csv string

Source code in lodstorage/lod_csv.py

@staticmethod
def fromCSV(
    csvString: str,
    fields: list = None,
    delimiter=",",
    quoting=csv.QUOTE_NONNUMERIC,
    **kwargs
):
    """
    convert given csv string to list of dicts (LOD)

    Args:
        csvStr(str): csv string that should be converted to LOD
        headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.

    Returns:
        list of dicts (LoD) containing the content of the given csv string
    """
    csvStream = io.StringIO(csvString)
    reader = csv.DictReader(
        csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
    )
    lod = list(reader)
    CSV.fixTypes(lod)
    return lod

`readFile(filename)` `staticmethod`

Reads the given filename and returns it as string Args: filename: Name of the file that should be returned as string

Returns:

Type	Description
`str`	Content of the file as string

Source code in lodstorage/lod_csv.py

@staticmethod
def readFile(filename: str) -> str:
    """
    Reads the given filename and returns it as string
    Args:
        filename: Name of the file that should be returned as string

    Returns:
        Content of the file as string
    """
    with open(filename, "r") as file:
        content = file.read()
    return content

`restoreFromCSVFile(filePath, headerNames=None, withPostfix=False)` `staticmethod`

restore LOD from given csv file

Parameters:

Name	Description	Default
`filePath(str)`	file name	required
`headerNames(list)`	Names of the headers that should be used. If None it is assumed that the header is given.	required
`withPostfix(bool)`	If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.	required

Returns:

Type	Description
	list of dicts (LoD) containing the content of the given csv file

Source code in lodstorage/lod_csv.py

@staticmethod
def restoreFromCSVFile(
    filePath: str, headerNames: list = None, withPostfix: bool = False
):
    """
    restore LOD from given csv file

    Args:
        filePath(str): file name
        headerNames(list): Names of the headers that should be used. If None it is assumed that the header is given.
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.

    Returns:
        list of dicts (LoD) containing the content of the given csv file
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = CSV.readFile(filePath)
    lod = CSV.fromCSV(csvStr, headerNames)
    return lod

`storeToCSVFile(lod, filePath, withPostfix=False)` `staticmethod`

converts the given lod to CSV file.

Parameters:

Name	Description	Default
`lod(list)`	lod that should be converted to csv file	required
`filePath(str)`	file name the csv should be stored to	required
`withPostfix(bool)`	If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.	required

Returns: csv string of the given lod

Source code in lodstorage/lod_csv.py

@staticmethod
def storeToCSVFile(lod: list, filePath: str, withPostfix: bool = False):
    """
    converts the given lod to CSV file.

    Args:
        lod(list): lod that should be converted to csv file
        filePath(str): file name the csv should be stored to
        withPostfix(bool): If False the file type is appended to given filePath. Otherwise file type MUST be given with filePath.
    Returns:
        csv string of the given lod
    """
    if not withPostfix:
        filePath += ".csv"
    csvStr = CSV.toCSV(lod)
    CSV.writeFile(csvStr, filePath)

`toCSV(lod, includeFields=None, excludeFields=None, delimiter=',', quoting=csv.QUOTE_NONNUMERIC, **kwargs)` `staticmethod`

converts the given lod to CSV string. For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

Parameters:

Name	Description	Default
`lod(list)`	lod that should be converted to csv string	required
`includeFields(list)`	list of fields that should be included in the csv (positive list)	required
`excludeFields(list)`	list of fields that should be excluded from the csv (negative list)	required
`kwargs`	csv dialect parameters	`{}`

Returns: csv string of the given lod

Source code in lodstorage/lod_csv.py

@staticmethod
def toCSV(
    lod: list,
    includeFields: list = None,
    excludeFields: list = None,
    delimiter=",",
    quoting=csv.QUOTE_NONNUMERIC,
    **kwargs
):
    """
    converts the given lod to CSV string.
    For details about the csv dialect parameters see https://docs.python.org/3/library/csv.html#csv-fmt-params

    Args:
        lod(list): lod that should be converted to csv string
        includeFields(list): list of fields that should be included in the csv (positive list)
        excludeFields(list): list of fields that should be excluded from the csv (negative list)
        kwargs: csv dialect parameters
    Returns:
        csv string of the given lod
    """
    if lod is None:
        return ""
    if isinstance(lod[0], JSONAble):
        lod = [vars(d) for d in lod]
    if excludeFields is not None:
        lod = LOD.filterFields(lod, excludeFields)
    if includeFields is None:
        fields = LOD.getFields(lod)
    else:
        fields = includeFields
        lod = LOD.filterFields(lod, includeFields, reverse=True)
    csvStream = io.StringIO()
    dict_writer = csv.DictWriter(
        csvStream, fieldnames=fields, delimiter=delimiter, quoting=quoting, **kwargs
    )
    dict_writer.writeheader()
    dict_writer.writerows(lod)
    csvString = csvStream.getvalue()
    return csvString

`writeFile(content, filename)` `staticmethod`

Write the given str to the given filename Args: content(str): string that should be written into the file filename: Name of the file the given str should be written to Returns: Nothing

Source code in lodstorage/lod_csv.py

@staticmethod
def writeFile(content: str, filename: str) -> str:
    """
    Write the given str to the given filename
    Args:
        content(str): string that should be written into the file
        filename: Name of the file the given str should be written to
    Returns:
        Nothing
    """
    with open(filename, "w") as file:
        file.write(content)

`mwTable`

Created on 2020-08-21

@author: wf

`MediaWikiTable`

Bases: object

helper for https://www.mediawiki.org/wiki/Help:Tables

Source code in lodstorage/mwTable.py

class MediaWikiTable(object):
    """
    helper for https://www.mediawiki.org/wiki/Help:Tables
    """

    def __init__(
        self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
    ):
        """
        Constructor
        """
        self.colFormats = colFormats
        cssDelim = ""
        if wikiTable:
            cWikiTable = "wikitable"
            cssDelim = " "
        else:
            cWikiTable = ""
        if sortable:
            cSortable = "sortable"
        else:
            cSortable = ""

        self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
        self.header = None
        self.content = ""
        self.end = "\n|}\n"
        self.withNewLines = withNewLines
        pass

    def addHeader(self, record):
        """
        add the given record as a "sample" header
        """
        if self.withNewLines:
            headerStart = "|+"
            firstColDelim = "\n!"
            colDelim = firstColDelim
        else:
            headerStart = "|+\n"
            firstColDelim = "!"
            colDelim = "!!"
        self.header = headerStart
        first = True
        for key in record.keys():
            if first:
                delim = firstColDelim
                first = False
            else:
                delim = colDelim
            self.header += "%s%s" % (delim, key)

    def addRow4Dict(self, record):
        if self.header is None:
            self.addHeader(record)
        if self.withNewLines:
            rowStart = "\n|-"
            colDelim = "\n|"
        else:
            rowStart = "\n|-\n"
            colDelim = "||"
        self.content += rowStart
        for key in record.keys():
            value = record[key]
            if self.colFormats is not None and key in self.colFormats:
                colFormat = self.colFormats[key]
            else:
                colFormat = "%s"
            self.content += ("%s" + colFormat) % (colDelim, value)

    def fromListOfDicts(self, listOfDicts):
        for record in listOfDicts:
            self.addRow4Dict(record)
        pass

    def noneReplace(self, value):
        return "" if value is None else value

    def asWikiMarkup(self):
        """
        convert me to MediaWiki markup

        Returns:
            string: the MediWiki Markup for this table
        """
        markup = (
            self.noneReplace(self.start)
            + self.noneReplace(self.header)
            + self.noneReplace(self.content)
            + self.noneReplace(self.end)
        )
        return markup

`init(wikiTable=True, colFormats=None, sortable=True, withNewLines=False)`

Constructor

Source code in lodstorage/mwTable.py

def __init__(
    self, wikiTable=True, colFormats=None, sortable=True, withNewLines=False
):
    """
    Constructor
    """
    self.colFormats = colFormats
    cssDelim = ""
    if wikiTable:
        cWikiTable = "wikitable"
        cssDelim = " "
    else:
        cWikiTable = ""
    if sortable:
        cSortable = "sortable"
    else:
        cSortable = ""

    self.start = '{|class="%s%s%s"\n' % (cWikiTable, cssDelim, cSortable)
    self.header = None
    self.content = ""
    self.end = "\n|}\n"
    self.withNewLines = withNewLines
    pass

`addHeader(record)`

add the given record as a "sample" header

Source code in lodstorage/mwTable.py

def addHeader(self, record):
    """
    add the given record as a "sample" header
    """
    if self.withNewLines:
        headerStart = "|+"
        firstColDelim = "\n!"
        colDelim = firstColDelim
    else:
        headerStart = "|+\n"
        firstColDelim = "!"
        colDelim = "!!"
    self.header = headerStart
    first = True
    for key in record.keys():
        if first:
            delim = firstColDelim
            first = False
        else:
            delim = colDelim
        self.header += "%s%s" % (delim, key)

`asWikiMarkup()`

convert me to MediaWiki markup

Returns:

Name	Type	Description
`string`		the MediWiki Markup for this table

Source code in lodstorage/mwTable.py

def asWikiMarkup(self):
    """
    convert me to MediaWiki markup

    Returns:
        string: the MediWiki Markup for this table
    """
    markup = (
        self.noneReplace(self.start)
        + self.noneReplace(self.header)
        + self.noneReplace(self.content)
        + self.noneReplace(self.end)
    )
    return markup

`params`

Created on 2024-05-06

@author: wf

`Params`

parameter handling

Source code in lodstorage/params.py

class Params:
    """
    parameter handling
    """

    def __init__(self, query: str, illegal_chars: str = """"[;<>&|]"'"""):
        """
        constructor

        Args:
            query(str): the query to analyze for parameters
            illegal_chars: chars that may not be in the values
        """
        self.illegal_chars = illegal_chars
        self.query = query
        self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
        self.params = self.pattern.findall(query)
        self.params_dict = {param: "" for param in self.params}
        self.has_params = len(self.params) > 0

    def set(self, params_dict: Dict):
        """
        set my params
        """
        self.params_dict = params_dict

    def audit(self) -> None:
        """
        Audit the usage of parameters in the query.

        Raises:
            ValueError: If potentially malicious values are detected in the parameter dictionary.
        """
        for param, value in self.params_dict.items():
            for char in self.illegal_chars:
                if char in value:
                    raise ValueError(
                        f"Potentially malicious value detected for parameter '{param}'"
                    )

    def apply_parameters(self) -> str:
        """
        Replace Jinja templates in the query with corresponding parameter values.

        Returns:
            str: The query with Jinja templates replaced by parameter values.
        """
        self.audit()
        query = self.query
        for param, value in self.params_dict.items():
            pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
            query = re.sub(pattern, value, query)
        return query

`init(query, illegal_chars='"[;<>&|]"\'')`

constructor

Parameters:

Name	Type	Description	Default
`query(str)`		the query to analyze for parameters	required
`illegal_chars`	`str`	chars that may not be in the values	`'"[;<>&\|]"\''`

Source code in lodstorage/params.py

def __init__(self, query: str, illegal_chars: str = """"[;<>&|]"'"""):
    """
    constructor

    Args:
        query(str): the query to analyze for parameters
        illegal_chars: chars that may not be in the values
    """
    self.illegal_chars = illegal_chars
    self.query = query
    self.pattern = re.compile(r"{{\s*(\w+)\s*}}")
    self.params = self.pattern.findall(query)
    self.params_dict = {param: "" for param in self.params}
    self.has_params = len(self.params) > 0

`apply_parameters()`

Replace Jinja templates in the query with corresponding parameter values.

Returns:

Name	Type	Description
`str`	`str`	The query with Jinja templates replaced by parameter values.

Source code in lodstorage/params.py

def apply_parameters(self) -> str:
    """
    Replace Jinja templates in the query with corresponding parameter values.

    Returns:
        str: The query with Jinja templates replaced by parameter values.
    """
    self.audit()
    query = self.query
    for param, value in self.params_dict.items():
        pattern = re.compile(r"{{\s*" + re.escape(param) + r"\s*\}\}")
        query = re.sub(pattern, value, query)
    return query

`audit()`

Audit the usage of parameters in the query.

Raises:

Type	Description
`ValueError`	If potentially malicious values are detected in the parameter dictionary.

Source code in lodstorage/params.py

def audit(self) -> None:
    """
    Audit the usage of parameters in the query.

    Raises:
        ValueError: If potentially malicious values are detected in the parameter dictionary.
    """
    for param, value in self.params_dict.items():
        for char in self.illegal_chars:
            if char in value:
                raise ValueError(
                    f"Potentially malicious value detected for parameter '{param}'"
                )

`set(params_dict)`

set my params

Source code in lodstorage/params.py

def set(self, params_dict: Dict):
    """
    set my params
    """
    self.params_dict = params_dict

`StoreDictKeyPair`

Bases: Action

Custom argparse action to store key-value pairs as a dictionary.

This class implements an argparse action to parse and store command-line arguments in the form of key-value pairs. The pairs should be separated by a comma and each key-value pair should be separated by an equals sign.

Example

--option key1=value1,key2=value2,key3=value3

Reference

https://stackoverflow.com/a/42355279/1497139

Source code in lodstorage/params.py

class StoreDictKeyPair(argparse.Action):
    """
    Custom argparse action to store key-value pairs as a dictionary.

    This class implements an argparse action to parse and store command-line
    arguments in the form of key-value pairs. The pairs should be separated by
    a comma and each key-value pair should be separated by an equals sign.

    Example:
        --option key1=value1,key2=value2,key3=value3

    Reference:
        https://stackoverflow.com/a/42355279/1497139
    """

    def __call__(
        self,
        _parser: argparse.ArgumentParser,
        namespace: argparse.Namespace,
        values: str,
        _option_string: Optional[str] = None,
    ) -> None:
        """
        Parse key-value pairs and store them as a dictionary in the namespace.

        Args:
            parser (argparse.ArgumentParser): The argument parser object.
            namespace (argparse.Namespace): The namespace to store the parsed values.
            values (str): The string containing key-value pairs separated by commas.
            option_string (Optional[str]): The option string, if provided.
        """
        my_dict = {}
        for kv in values.split(","):
            k, v = kv.split("=")
            my_dict[k] = v
        setattr(namespace, self.dest, my_dict)

`call(_parser, namespace, values, _option_string=None)`

Parse key-value pairs and store them as a dictionary in the namespace.

Parameters:

Name	Type	Description	Default
`parser`	`ArgumentParser`	The argument parser object.	required
`namespace`	`Namespace`	The namespace to store the parsed values.	required
`values`	`str`	The string containing key-value pairs separated by commas.	required
`option_string`	`Optional[str]`	The option string, if provided.	required

Source code in lodstorage/params.py

def __call__(
    self,
    _parser: argparse.ArgumentParser,
    namespace: argparse.Namespace,
    values: str,
    _option_string: Optional[str] = None,
) -> None:
    """
    Parse key-value pairs and store them as a dictionary in the namespace.

    Args:
        parser (argparse.ArgumentParser): The argument parser object.
        namespace (argparse.Namespace): The namespace to store the parsed values.
        values (str): The string containing key-value pairs separated by commas.
        option_string (Optional[str]): The option string, if provided.
    """
    my_dict = {}
    for kv in values.split(","):
        k, v = kv.split("=")
        my_dict[k] = v
    setattr(namespace, self.dest, my_dict)

`plot`

Created on 2020-07-05

@author: wf

`Plot`

Bases: object

create Plot based on counters see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram

Source code in lodstorage/plot.py

class Plot(object):
    """
    create Plot based on counters
    see https://stackoverflow.com/questions/19198920/using-counter-in-python-to-build-histogram
    """

    def __init__(
        self,
        valueList,
        title,
        xlabel=None,
        ylabel=None,
        gformat=".png",
        fontsize=12,
        plotdir=None,
        debug=False,
    ):
        """
        Constructor
        """
        self.counter = Counter(valueList)
        self.valueList = valueList
        self.title = title
        self.xlabel = xlabel
        self.ylabel = ylabel
        self.fontsize = fontsize
        self.gformat = gformat
        self.debug = debug
        path = os.path.dirname(__file__)
        if plotdir is not None:
            self.plotdir = plotdir
        else:
            self.plotdir = path + "/../plots/"
            os.makedirs(self.plotdir, exist_ok=True)

    def titleMe(self):
        """set my title and labels"""
        plt.title(self.title, fontsize=self.fontsize)
        if self.xlabel is not None:
            plt.xlabel(self.xlabel)
        if self.ylabel is not None:
            plt.ylabel(self.ylabel)

    def showMe(self, mode="show", close=True):
        """show me in the given mode"""
        if mode == "show":
            plt.show()
        else:
            plt.savefig(self.plotdir + self.title + self.gformat)
        if close:
            plt.close()

    def barchart(self, mode="show"):
        """barchart based histogram for the given counter"""
        labels, values = zip(*self.counter.items())
        indexes = np.arange(len(labels))
        width = 1
        self.titleMe()
        plt.bar(indexes, values, width)
        plt.xticks(indexes + width * 0.5, labels)
        plt.yticks(np.arange(1, max(values) + 1, step=1))
        self.showMe(mode)

    def showDebug(self):
        print("   value  list: ", self.valueList)
        print("counter  items: ", self.counter.items())
        print("counter values: ", self.counter.values())
        print("counter   keys: ", self.counter.keys())

    def hist(self, mode="show"):
        """create histogram for the given counter"""
        if self.debug:
            self.showDebug()
        self.titleMe()
        # see https://stackoverflow.com/a/2162045/1497139
        plt.hist(self.valueList, bins=len(self.counter.keys()))
        self.showMe(mode)
        pass

`init(valueList, title, xlabel=None, ylabel=None, gformat='.png', fontsize=12, plotdir=None, debug=False)`

Constructor

Source code in lodstorage/plot.py

def __init__(
    self,
    valueList,
    title,
    xlabel=None,
    ylabel=None,
    gformat=".png",
    fontsize=12,
    plotdir=None,
    debug=False,
):
    """
    Constructor
    """
    self.counter = Counter(valueList)
    self.valueList = valueList
    self.title = title
    self.xlabel = xlabel
    self.ylabel = ylabel
    self.fontsize = fontsize
    self.gformat = gformat
    self.debug = debug
    path = os.path.dirname(__file__)
    if plotdir is not None:
        self.plotdir = plotdir
    else:
        self.plotdir = path + "/../plots/"
        os.makedirs(self.plotdir, exist_ok=True)

`barchart(mode='show')`

barchart based histogram for the given counter

Source code in lodstorage/plot.py

def barchart(self, mode="show"):
    """barchart based histogram for the given counter"""
    labels, values = zip(*self.counter.items())
    indexes = np.arange(len(labels))
    width = 1
    self.titleMe()
    plt.bar(indexes, values, width)
    plt.xticks(indexes + width * 0.5, labels)
    plt.yticks(np.arange(1, max(values) + 1, step=1))
    self.showMe(mode)

`hist(mode='show')`

create histogram for the given counter

Source code in lodstorage/plot.py

def hist(self, mode="show"):
    """create histogram for the given counter"""
    if self.debug:
        self.showDebug()
    self.titleMe()
    # see https://stackoverflow.com/a/2162045/1497139
    plt.hist(self.valueList, bins=len(self.counter.keys()))
    self.showMe(mode)
    pass

`showMe(mode='show', close=True)`

show me in the given mode

Source code in lodstorage/plot.py

def showMe(self, mode="show", close=True):
    """show me in the given mode"""
    if mode == "show":
        plt.show()
    else:
        plt.savefig(self.plotdir + self.title + self.gformat)
    if close:
        plt.close()

`titleMe()`

set my title and labels

Source code in lodstorage/plot.py

def titleMe(self):
    """set my title and labels"""
    plt.title(self.title, fontsize=self.fontsize)
    if self.xlabel is not None:
        plt.xlabel(self.xlabel)
    if self.ylabel is not None:
        plt.ylabel(self.ylabel)

`prefixes`

Created on 2024-03-02

@author: wf

`Prefixes`

Handles the generation of standard SPARQL prefix declarations for queries. This utility class simplifies the inclusion of common prefixes used in SPARQL queries by providing a method to generate the necessary PREFIX lines based on a list of prefix keys.

The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy and improve clarity in SPARQL query construction by centralizing prefix management.

Methods:

Name	Description
`getPrefixes`	Generates SPARQL PREFIX lines for a given list of prefix keys.

Source code in lodstorage/prefixes.py

class Prefixes:
    """
    Handles the generation of standard SPARQL prefix declarations for queries.
    This utility class simplifies the inclusion of common prefixes used in SPARQL
    queries by providing a method to generate the necessary PREFIX lines based on
    a list of prefix keys.

    The class supports a wide range of prefixes relevant to Wikidata and general RDF/SPARQL
    usage, including RDF, RDFS, Wikibase, Schema.org, and more. It aims to reduce redundancy
    and improve clarity in SPARQL query construction by centralizing prefix management.

    Attributes:
        None

    Methods:
        getPrefixes(prefixes): Generates SPARQL PREFIX lines for a given list of prefix keys.
    """

    @classmethod
    def getPrefixes(
        cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
    ) -> str:
        """Generates SPARQL PREFIX lines for a given list of prefix keys.

        This method looks up URIs for the specified prefixes from a predefined map and constructs
        PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
        and flexible specification of the prefixes needed for a particular query.

        Args:
            prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
                Defaults to a common set of prefixes used in Wikidata queries.

        Returns:
            str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
                with a newline character. If a prefix key is not recognized, it is ignored.

        Example:
            >>> Prefixes.getPrefixes(["wd", "wdt"])
            'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
        """
        prefixMap = {
            "bd": "<http://www.bigdata.com/rdf#>",
            "cc": "<http://creativecommons.org/ns#>",
            "dct": "<http://purl.org/dc/terms/>",
            "geo": "<http://www.opengis.net/ont/geosparql#>",
            "ontolex": "<http://www.w3.org/ns/lemon/ontolex#>",
            "owl": "<http://www.w3.org/2002/07/owl#>",
            "p": "<http://www.wikidata.org/prop/>",
            "pq": "<http://www.wikidata.org/prop/qualifier/>",
            "pqn": "<http://www.wikidata.org/prop/qualifier/value-normalized/>",
            "pqv": "<http://www.wikidata.org/prop/qualifier/value/>",
            "pr": "<http://www.wikidata.org/prop/reference/>",
            "prn": "<http://www.wikidata.org/prop/reference/value-normalized/>",
            "prov": "<http://www.w3.org/ns/prov#>",
            "prv": "<http://www.wikidata.org/prop/reference/value/>",
            "ps": "<http://www.wikidata.org/prop/statement/>",
            "psn": "<http://www.wikidata.org/prop/statement/value-normalized/>",
            "psv": "<http://www.wikidata.org/prop/statement/value/>",
            "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
            "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>",
            "schema": "<http://schema.org/>",
            "skos": "<http://www.w3.org/2004/02/skos/core#>",
            "wd": "<http://www.wikidata.org/entity/>",
            "wdata": "<http://www.wikidata.org/wiki/Special:EntityData/>",
            "wdno": "<http://www.wikidata.org/prop/novalue/>",
            "wdref": "<http://www.wikidata.org/reference/>",
            "wds": "<http://www.wikidata.org/entity/statement/>",
            "wdt": "<http://www.wikidata.org/prop/direct/>",
            "wdtn": "<http://www.wikidata.org/prop/direct-normalized/>",
            "wdv": "<http://www.wikidata.org/value/>",
            "wikibase": "<http://wikiba.se/ontology#>",
            "xsd": "<http://www.w3.org/2001/XMLSchema#>",
        }
        # see also https://www.wikidata.org/wiki/EntitySchema:E49
        sparql = ""
        for prefix in prefixes:
            if prefix in prefixMap:
                sparql += f"PREFIX {prefix}: {prefixMap[prefix]}\n"
        return sparql

`getPrefixes(prefixes=['rdf', 'rdfs', 'schema', 'wd', 'wdt', 'wikibase', 'xsd'])` `classmethod`

Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>

PREFIX wdt: http://www.wikidata.org/prop/direct/ '

Source code in lodstorage/prefixes.py

@classmethod
def getPrefixes(
    cls, prefixes=["rdf", "rdfs", "schema", "wd", "wdt", "wikibase", "xsd"]
) -> str:
    """Generates SPARQL PREFIX lines for a given list of prefix keys.

    This method looks up URIs for the specified prefixes from a predefined map and constructs
    PREFIX lines suitable for inclusion at the beginning of a SPARQL query. It allows for easy
    and flexible specification of the prefixes needed for a particular query.

    Args:
        prefixes (list of str): A list of prefix keys for which PREFIX lines should be generated.
            Defaults to a common set of prefixes used in Wikidata queries.

    Returns:
        str: A string containing the SPARQL PREFIX lines for the specified prefixes, each ending
            with a newline character. If a prefix key is not recognized, it is ignored.

    Example:
        >>> Prefixes.getPrefixes(["wd", "wdt"])
        'PREFIX wd: <http://www.wikidata.org/entity/>\nPREFIX wdt: <http://www.wikidata.org/prop/direct/>\n'
    """
    prefixMap = {
        "bd": "<http://www.bigdata.com/rdf#>",
        "cc": "<http://creativecommons.org/ns#>",
        "dct": "<http://purl.org/dc/terms/>",
        "geo": "<http://www.opengis.net/ont/geosparql#>",
        "ontolex": "<http://www.w3.org/ns/lemon/ontolex#>",
        "owl": "<http://www.w3.org/2002/07/owl#>",
        "p": "<http://www.wikidata.org/prop/>",
        "pq": "<http://www.wikidata.org/prop/qualifier/>",
        "pqn": "<http://www.wikidata.org/prop/qualifier/value-normalized/>",
        "pqv": "<http://www.wikidata.org/prop/qualifier/value/>",
        "pr": "<http://www.wikidata.org/prop/reference/>",
        "prn": "<http://www.wikidata.org/prop/reference/value-normalized/>",
        "prov": "<http://www.w3.org/ns/prov#>",
        "prv": "<http://www.wikidata.org/prop/reference/value/>",
        "ps": "<http://www.wikidata.org/prop/statement/>",
        "psn": "<http://www.wikidata.org/prop/statement/value-normalized/>",
        "psv": "<http://www.wikidata.org/prop/statement/value/>",
        "rdf": "<http://www.w3.org/1999/02/22-rdf-syntax-ns#>",
        "rdfs": "<http://www.w3.org/2000/01/rdf-schema#>",
        "schema": "<http://schema.org/>",
        "skos": "<http://www.w3.org/2004/02/skos/core#>",
        "wd": "<http://www.wikidata.org/entity/>",
        "wdata": "<http://www.wikidata.org/wiki/Special:EntityData/>",
        "wdno": "<http://www.wikidata.org/prop/novalue/>",
        "wdref": "<http://www.wikidata.org/reference/>",
        "wds": "<http://www.wikidata.org/entity/statement/>",
        "wdt": "<http://www.wikidata.org/prop/direct/>",
        "wdtn": "<http://www.wikidata.org/prop/direct-normalized/>",
        "wdv": "<http://www.wikidata.org/value/>",
        "wikibase": "<http://wikiba.se/ontology#>",
        "xsd": "<http://www.w3.org/2001/XMLSchema#>",
    }
    # see also https://www.wikidata.org/wiki/EntitySchema:E49
    sparql = ""
    for prefix in prefixes:
        if prefix in prefixMap:
            sparql += f"PREFIX {prefix}: {prefixMap[prefix]}\n"
    return sparql

`profiler`

Created on 2022-11-18

@author: wf

`Profiler`

simple profiler

Source code in lodstorage/profiler.py

class Profiler:
    """
    simple profiler
    """

    def __init__(self, msg, profile=True, with_start: bool = True):
        """
        construct me with the given msg and profile active flag

        Args:
            msg(str): the message to show if profiling is active
            profile(bool): True if messages should be shown
        """
        self.msg = msg
        self.profile = profile
        if with_start:
            self.start()

    def start(self):
        """
        start profiling
        """
        self.starttime = time.time()
        if self.profile:
            print(f"Starting {self.msg} ...")

    def time(self, extraMsg=""):
        """
        time the action and print if profile is active
        """
        elapsed = time.time() - self.starttime
        if self.profile:
            print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
        return elapsed

`init(msg, profile=True, with_start=True)`

construct me with the given msg and profile active flag

Parameters:

Name	Type	Description	Default
`msg(str)`		the message to show if profiling is active	required
`profile(bool)`		True if messages should be shown	required

Source code in lodstorage/profiler.py

def __init__(self, msg, profile=True, with_start: bool = True):
    """
    construct me with the given msg and profile active flag

    Args:
        msg(str): the message to show if profiling is active
        profile(bool): True if messages should be shown
    """
    self.msg = msg
    self.profile = profile
    if with_start:
        self.start()

`start()`

start profiling

Source code in lodstorage/profiler.py

def start(self):
    """
    start profiling
    """
    self.starttime = time.time()
    if self.profile:
        print(f"Starting {self.msg} ...")

`time(extraMsg='')`

time the action and print if profile is active

Source code in lodstorage/profiler.py

def time(self, extraMsg=""):
    """
    time the action and print if profile is active
    """
    elapsed = time.time() - self.starttime
    if self.profile:
        print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s")
    return elapsed

`query`

Created on 2020-08-22

@author: wf

`Endpoint`

Bases: JSONAble

a query endpoint

Source code in lodstorage/query.py

class Endpoint(JSONAble):
    """
    a query endpoint
    """

    @staticmethod
    def getSamples():
        samples = [
            {
                "name": "wikidata",
                "lang": "sparql",
                "endpoint": "https://query.wikidata.org/sparql",
                "website": "https://query.wikidata.org/",
                "database": "blazegraph",
                "method": "POST",
                "prefixes": "PREFIX bd: <http://www.bigdata.com/rdf#>\nPREFIX cc: <http://creativecommons.org/ns#>",
            },
            {
                "name": "dbis-jena",
                "lang": "sparql",
                "endpoint": "https://confident.dbis.rwth-aachen.de/jena/",
                "website": "https://confident.dbis.rwth-aachen.de",
                "auth": "BASIC",
                "user": "secret",
                "password": "#not public - example not usable for access#",
            },
        ]
        return samples

    @classmethod
    def getDefault(cls):
        endpointConf = Endpoint()
        endpointConf.fromDict(Endpoint.getSamples()[0])
        return endpointConf

    def __init__(self):
        """
        constructor for setting defaults
        """
        self.method = "POST"
        self.lang = "SPARQL"

    def __str__(self):
        """
        Returns:
            str: a string representation of this Endpoint
        """
        text = f"{self.name}:{self.website}:{self.endpoint}({self.method})"
        return text

`init()`

constructor for setting defaults

Source code in lodstorage/query.py

def __init__(self):
    """
    constructor for setting defaults
    """
    self.method = "POST"
    self.lang = "SPARQL"

`str()`

Returns:

Name	Type	Description
`str`		a string representation of this Endpoint

Source code in lodstorage/query.py

def __str__(self):
    """
    Returns:
        str: a string representation of this Endpoint
    """
    text = f"{self.name}:{self.website}:{self.endpoint}({self.method})"
    return text

`EndpointManager`

Bases: object

manages a set of SPARQL endpoints

Source code in lodstorage/query.py

class EndpointManager(object):
    """
    manages a set of SPARQL endpoints
    """

    @staticmethod
    def getEndpoints(
        endpointPath: str = None, lang: str = None, with_default: bool = True
    ):
        """
        get the endpoints for the given endpointPath

        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language
            with_default(bool): if True include the default endpoints
        """
        endpointPaths = YamlPath.getPaths(
            "endpoints.yaml", endpointPath, with_default=with_default
        )
        endpoints = {}
        for lEndpointPath in endpointPaths:
            with open(lEndpointPath, "r") as stream:
                endpointRecords = yaml.safe_load(stream)
                for name, record in endpointRecords.items():
                    select = True
                    if lang is not None:
                        select = record["lang"] == lang
                    if select:
                        endpoint = Endpoint()
                        endpoint.fromDict({"name": name, **record})
                        endpoints[name] = endpoint
        return endpoints

    @staticmethod
    def getEndpointNames(endpointPath=None, lang: str = None) -> list:
        """
        Returns a list of all available endpoint names
        Args:
            endpointPath(str): the path to the yaml file with the endpoint configurations
            lang(str): if lang is given filter by the given language

        """
        endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
        return list(endpoints.keys())

`getEndpointNames(endpointPath=None, lang=None)` `staticmethod`

Returns a list of all available endpoint names Args: endpointPath(str): the path to the yaml file with the endpoint configurations lang(str): if lang is given filter by the given language

Source code in lodstorage/query.py

@staticmethod
def getEndpointNames(endpointPath=None, lang: str = None) -> list:
    """
    Returns a list of all available endpoint names
    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language

    """
    endpoints = EndpointManager.getEndpoints(endpointPath, lang=lang)
    return list(endpoints.keys())

`getEndpoints(endpointPath=None, lang=None, with_default=True)` `staticmethod`

get the endpoints for the given endpointPath

Parameters:

Name	Description	Default
`endpointPath(str)`	the path to the yaml file with the endpoint configurations	required
`lang(str)`	if lang is given filter by the given language	required
`with_default(bool)`	if True include the default endpoints	required

Source code in lodstorage/query.py

@staticmethod
def getEndpoints(
    endpointPath: str = None, lang: str = None, with_default: bool = True
):
    """
    get the endpoints for the given endpointPath

    Args:
        endpointPath(str): the path to the yaml file with the endpoint configurations
        lang(str): if lang is given filter by the given language
        with_default(bool): if True include the default endpoints
    """
    endpointPaths = YamlPath.getPaths(
        "endpoints.yaml", endpointPath, with_default=with_default
    )
    endpoints = {}
    for lEndpointPath in endpointPaths:
        with open(lEndpointPath, "r") as stream:
            endpointRecords = yaml.safe_load(stream)
            for name, record in endpointRecords.items():
                select = True
                if lang is not None:
                    select = record["lang"] == lang
                if select:
                    endpoint = Endpoint()
                    endpoint.fromDict({"name": name, **record})
                    endpoints[name] = endpoint
    return endpoints

`Format`

Bases: Enum

the supported formats for the results to be delivered

Source code in lodstorage/query.py

class Format(Enum):
    """
    the supported formats for the results to be delivered
    """

    csv = "csv"
    json = "json"
    html = "html"
    xml = "xml"
    tsv = "tsv"
    latex = "latex"
    mediawiki = "mediawiki"
    raw = "raw"
    github = "github"

    def __str__(self):
        return self.value

`Query`

Bases: object

a Query e.g. for SPAQRL

Source code in lodstorage/query.py

class Query(object):
    """a Query e.g. for SPAQRL"""

    def __init__(
        self,
        name: str,
        query: str,
        lang="sparql",
        endpoint: str = None,
        database: str = "blazegraph",
        title: str = None,
        description: str = None,
        limit: int = None,
        prefixes=None,
        tryItUrl: str = None,
        formats: list = None,
        debug=False,
    ):
        """
        constructor
        Args:
            name(string): the name/label of the query
            query(string): the native Query text e.g. in SPARQL
            lang(string): the language of the query e.g. SPARQL
            endpoint(string): the endpoint url to use
            database(string): the type of database e.g. "blazegraph"
            title(string): the header/title of the query
            description(string): the description of the query
            limit(int): the limit of the query default: None
            prefixes(list): list of prefixes to be resolved
            tryItUrl(str): the url of a "tryit" webpage
            formats(list): key,value pairs of ValueFormatters to be applied
            debug(boolean): true if debug mode should be switched on
        """
        self.name = name
        self.query = query
        self.lang = lang
        self.endpoint = endpoint
        self.database = database
        self.tryItUrl = tryItUrl

        self.title = title = name if title is None else title
        self.description = "" if description is None else description
        self.limit = limit
        self.prefixes = prefixes
        self.debug = debug
        self.formats = formats
        self.formatCallBacks = []

    def __str__(self):
        queryStr = "\n".join(
            [
                f"{key}:{value}"
                for key, value in self.__dict__.items()
                if value is not None
            ]
        )
        return f"{queryStr}"

    def addFormatCallBack(self, callback):
        self.formatCallBacks.append(callback)

    def preFormatWithCallBacks(self, lod, tablefmt: str):
        """
        run the configured call backs to pre-format the given list of dicts for the given tableformat

        Args:
            lod(list): the list of dicts to handle
            tablefmt(str): the table format (according to tabulate) to apply

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if value is not None:
                    for formatCallBack in self.formatCallBacks:
                        formatCallBack(record, key, value, tablefmt)

    def formatWithValueFormatters(self, lod, tablefmt: str):
        """
        format the given list of Dicts with the ValueFormatters
        """
        # is there anything to do?
        if self.formats is None:
            # no
            return
        # get the value Formatters that might apply here
        valueFormatters = ValueFormatter.getFormats()
        formatsToApply = {}
        for valueFormatSpec in self.formats:
            parts = valueFormatSpec.split(":")
            # e.g. president:wikidata
            keytoformat = parts[0]
            formatName = parts[1]
            if formatName in valueFormatters:
                formatsToApply[keytoformat] = valueFormatters[formatName]
        for record in lod:
            for keytoformat in formatsToApply:
                valueFormatter = formatsToApply[keytoformat]
                # format all key values
                if keytoformat == "*":
                    for key in record:
                        valueFormatter.applyFormat(record, key, tablefmt)
                # or just a selected one
                elif keytoformat in record:
                    valueFormatter.applyFormat(record, keytoformat, tablefmt)
            pass

    def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
        """
        return the "try it!" url for the given baseurl

        Args:
            baseurl(str): the baseurl to used

        Returns:
            str: the "try it!" url for the given query
        """
        # https://stackoverflow.com/a/9345102/1497139
        quoted = urllib.parse.quote(str(self.query))
        if database == "blazegraph":
            delim = "/#"
        else:
            delim = "?query="
        url = f"{baseurl}{delim}{quoted}"
        return url

    def getLink(self, url, title, tablefmt):
        """
        convert the given url and title to a link for the given tablefmt

        Args:
            url(str): the url to convert
            title(str): the title to show
            tablefmt(str): the table format to use
        """
        # create a safe url
        if url is None:
            return ""
        markup = f"{title}:{url}"
        if tablefmt == "mediawiki":
            markup = f"[{url} {title}]"
        elif tablefmt == "github":
            markup = f"[{title}]({url})"
        elif tablefmt == "latex":
            markup = r"\href{%s}{%s}" % (url, title)
        return markup

    def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
        """
        convert url prefixes to link according to the given table format
        TODO - refactor as preFormat callback

        Args:
            lod(list): the list of dicts to convert
            prefix(str): the prefix to strip
            tablefmt(str): the tabulate tableformat to use

        """
        for record in lod:
            for key in record.keys():
                value = record[key]
                if (
                    value is not None
                    and isinstance(value, str)
                    and value.startswith(prefix)
                ):
                    item = value.replace(prefix, "")
                    uqitem = urllib.parse.unquote(item)
                    if tablefmt == "latex":
                        link = uqitem
                    else:
                        link = self.getLink(value, uqitem, tablefmt)
                    record[key] = link

    def asYaml(self):
        yamlMarkup = yaml.dump(self)
        return yamlMarkup

    def asWikiSourceMarkup(self):
        """
        convert me to Mediawiki markup for syntax highlighting using the "source" tag


        Returns:
            string: the Markup
        """
        markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
        return markup

    def asWikiMarkup(self, listOfDicts):
        """
        convert the given listOfDicts result to MediaWiki markup

        Args:
            listOfDicts(list): the list of Dicts to convert to MediaWiki markup

        Returns:
            string: the markup
        """
        if self.debug:
            print(listOfDicts)
        mwTable = MediaWikiTable()
        mwTable.fromListOfDicts(listOfDicts)
        markup = mwTable.asWikiMarkup()
        return markup

    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

`init(name, query, lang='sparql', endpoint=None, database='blazegraph', title=None, description=None, limit=None, prefixes=None, tryItUrl=None, formats=None, debug=False)`

constructor Args: name(string): the name/label of the query query(string): the native Query text e.g. in SPARQL lang(string): the language of the query e.g. SPARQL endpoint(string): the endpoint url to use database(string): the type of database e.g. "blazegraph" title(string): the header/title of the query description(string): the description of the query limit(int): the limit of the query default: None prefixes(list): list of prefixes to be resolved tryItUrl(str): the url of a "tryit" webpage formats(list): key,value pairs of ValueFormatters to be applied debug(boolean): true if debug mode should be switched on

Source code in lodstorage/query.py

def __init__(
    self,
    name: str,
    query: str,
    lang="sparql",
    endpoint: str = None,
    database: str = "blazegraph",
    title: str = None,
    description: str = None,
    limit: int = None,
    prefixes=None,
    tryItUrl: str = None,
    formats: list = None,
    debug=False,
):
    """
    constructor
    Args:
        name(string): the name/label of the query
        query(string): the native Query text e.g. in SPARQL
        lang(string): the language of the query e.g. SPARQL
        endpoint(string): the endpoint url to use
        database(string): the type of database e.g. "blazegraph"
        title(string): the header/title of the query
        description(string): the description of the query
        limit(int): the limit of the query default: None
        prefixes(list): list of prefixes to be resolved
        tryItUrl(str): the url of a "tryit" webpage
        formats(list): key,value pairs of ValueFormatters to be applied
        debug(boolean): true if debug mode should be switched on
    """
    self.name = name
    self.query = query
    self.lang = lang
    self.endpoint = endpoint
    self.database = database
    self.tryItUrl = tryItUrl

    self.title = title = name if title is None else title
    self.description = "" if description is None else description
    self.limit = limit
    self.prefixes = prefixes
    self.debug = debug
    self.formats = formats
    self.formatCallBacks = []

`asWikiMarkup(listOfDicts)`

convert the given listOfDicts result to MediaWiki markup

Parameters:

Name	Type	Description	Default
`listOfDicts(list)`		the list of Dicts to convert to MediaWiki markup	required

Returns:

Name	Type	Description
`string`		the markup

Source code in lodstorage/query.py

def asWikiMarkup(self, listOfDicts):
    """
    convert the given listOfDicts result to MediaWiki markup

    Args:
        listOfDicts(list): the list of Dicts to convert to MediaWiki markup

    Returns:
        string: the markup
    """
    if self.debug:
        print(listOfDicts)
    mwTable = MediaWikiTable()
    mwTable.fromListOfDicts(listOfDicts)
    markup = mwTable.asWikiMarkup()
    return markup

`asWikiSourceMarkup()`

convert me to Mediawiki markup for syntax highlighting using the "source" tag

Returns:

Name	Type	Description
`string`		the Markup

Source code in lodstorage/query.py

def asWikiSourceMarkup(self):
    """
    convert me to Mediawiki markup for syntax highlighting using the "source" tag


    Returns:
        string: the Markup
    """
    markup = "<source lang='%s'>\n%s\n</source>\n" % (self.lang, self.query)
    return markup

`documentQueryResult(qlod, limit=None, tablefmt='mediawiki', tryItUrl=None, withSourceCode=True, **kwArgs)`

document the given query results - note that a copy of the whole list is going to be created for being able to format

Parameters:

Name	Type	Description	Default
`qlod`	`list`	the list of dicts result	required
`limit(int)`		the maximum number of records to display in result tabulate	required
`tablefmt(str)`		the table format to use	required
`tryItUrl`	`str`	the "try it!" url to show	`None`
`withSourceCode(bool)`		if True document the source code	required

Return

str: the documentation tabular text for the given parameters

Source code in lodstorage/query.py

    def documentQueryResult(
        self,
        qlod: list,
        limit=None,
        tablefmt: str = "mediawiki",
        tryItUrl: str = None,
        withSourceCode=True,
        **kwArgs,
    ):
        """
        document the given query results - note that a copy of the whole list is going to be created for being able to format

        Args:
            qlod: the list of dicts result
            limit(int): the maximum number of records to display in result tabulate
            tablefmt(str): the table format to use
            tryItUrl: the "try it!" url to show
            withSourceCode(bool): if True document the source code

        Return:
            str: the documentation tabular text for the given parameters
        """
        sourceCode = self.query
        tryItMarkup = ""
        sourceCodeHeader = ""
        resultHeader = ""
        title = self.title
        if limit is not None:
            lod = copy.deepcopy(qlod[:limit])
        else:
            lod = copy.deepcopy(qlod)
        self.preFormatWithCallBacks(lod, tablefmt=tablefmt)
        self.formatWithValueFormatters(lod, tablefmt=tablefmt)
        result = tabulate(lod, headers="keys", tablefmt=tablefmt, **kwArgs)
        if tryItUrl is None and hasattr(self, "tryItUrl"):
            tryItUrl = self.tryItUrl
        if tablefmt == "github":
            title = f"## {self.title}"
            resultHeader = "## result"
        elif tablefmt == "mediawiki":
            title = f"== {self.title} =="
            resultHeader = "=== result ==="
        elif tablefmt == "latex":
            resultHeader = ""
            result = r"""\begin{table}
            \caption{%s}
            \label{tab:%s}
            %s
            \end{table}
            """ % (
                self.title,
                self.name,
                result,
            )
        else:
            title = f"{self.title}"
            resultHeader = "result:"
        if withSourceCode:
            tryItUrlEncoded = self.getTryItUrl(tryItUrl, self.database)
            tryItMarkup = self.getLink(tryItUrlEncoded, "try it!", tablefmt)
            if tablefmt == "github":
                sourceCodeHeader = "### query"
                sourceCode = f"""```{self.lang}
{self.query}
```"""
            elif tablefmt == "mediawiki":
                sourceCodeHeader = "=== query ==="
                sourceCode = f"""<source lang='{self.lang}'>
{self.query}
</source>
"""
            elif tablefmt == "latex":
                sourceCodeHeader = (
                    r"see query listing \ref{listing:%s} and result table \ref{tab:%s}"
                    % (self.name, self.name)
                )
                sourceCode = r"""\begin{listing}[ht]
\caption{%s}
\label{listing:%s}
\begin{minted}{%s}
%s
\end{minted}
%s
\end{listing}
""" % (
                    self.title,
                    self.name,
                    self.lang.lower(),
                    self.query,
                    tryItMarkup,
                )
            else:
                sourceCodeHeader = "query:"
                sourceCode = f"{self.query}"
        if self.lang != "sparql":
            tryItMarkup = ""
        queryResultDocumentation = QueryResultDocumentation(
            query=self,
            title=title,
            tablefmt=tablefmt,
            tryItMarkup=tryItMarkup,
            sourceCodeHeader=sourceCodeHeader,
            sourceCode=sourceCode,
            resultHeader=resultHeader,
            result=result,
        )
        return queryResultDocumentation

`formatWithValueFormatters(lod, tablefmt)`

format the given list of Dicts with the ValueFormatters

Source code in lodstorage/query.py

def formatWithValueFormatters(self, lod, tablefmt: str):
    """
    format the given list of Dicts with the ValueFormatters
    """
    # is there anything to do?
    if self.formats is None:
        # no
        return
    # get the value Formatters that might apply here
    valueFormatters = ValueFormatter.getFormats()
    formatsToApply = {}
    for valueFormatSpec in self.formats:
        parts = valueFormatSpec.split(":")
        # e.g. president:wikidata
        keytoformat = parts[0]
        formatName = parts[1]
        if formatName in valueFormatters:
            formatsToApply[keytoformat] = valueFormatters[formatName]
    for record in lod:
        for keytoformat in formatsToApply:
            valueFormatter = formatsToApply[keytoformat]
            # format all key values
            if keytoformat == "*":
                for key in record:
                    valueFormatter.applyFormat(record, key, tablefmt)
            # or just a selected one
            elif keytoformat in record:
                valueFormatter.applyFormat(record, keytoformat, tablefmt)
        pass

`getLink(url, title, tablefmt)`

convert the given url and title to a link for the given tablefmt

Parameters:

Name	Description	Default
`url(str)`	the url to convert	required
`title(str)`	the title to show	required
`tablefmt(str)`	the table format to use	required

Source code in lodstorage/query.py

def getLink(self, url, title, tablefmt):
    """
    convert the given url and title to a link for the given tablefmt

    Args:
        url(str): the url to convert
        title(str): the title to show
        tablefmt(str): the table format to use
    """
    # create a safe url
    if url is None:
        return ""
    markup = f"{title}:{url}"
    if tablefmt == "mediawiki":
        markup = f"[{url} {title}]"
    elif tablefmt == "github":
        markup = f"[{title}]({url})"
    elif tablefmt == "latex":
        markup = r"\href{%s}{%s}" % (url, title)
    return markup

`getTryItUrl(baseurl, database='blazegraph')`

return the "try it!" url for the given baseurl

Parameters:

Name	Type	Description	Default
`baseurl(str)`		the baseurl to used	required

Returns:

Name	Type	Description
`str`		the "try it!" url for the given query

Source code in lodstorage/query.py

def getTryItUrl(self, baseurl: str, database: str = "blazegraph"):
    """
    return the "try it!" url for the given baseurl

    Args:
        baseurl(str): the baseurl to used

    Returns:
        str: the "try it!" url for the given query
    """
    # https://stackoverflow.com/a/9345102/1497139
    quoted = urllib.parse.quote(str(self.query))
    if database == "blazegraph":
        delim = "/#"
    else:
        delim = "?query="
    url = f"{baseurl}{delim}{quoted}"
    return url

`preFormatWithCallBacks(lod, tablefmt)`

run the configured call backs to pre-format the given list of dicts for the given tableformat

Parameters:

Name	Type	Description	Default
`lod(list)`		the list of dicts to handle	required
`tablefmt(str)`		the table format (according to tabulate) to apply	required

Source code in lodstorage/query.py

def preFormatWithCallBacks(self, lod, tablefmt: str):
    """
    run the configured call backs to pre-format the given list of dicts for the given tableformat

    Args:
        lod(list): the list of dicts to handle
        tablefmt(str): the table format (according to tabulate) to apply

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if value is not None:
                for formatCallBack in self.formatCallBacks:
                    formatCallBack(record, key, value, tablefmt)

`prefixToLink(lod, prefix, tablefmt)`

convert url prefixes to link according to the given table format TODO - refactor as preFormat callback

Parameters:

Name	Description	Default
`lod(list)`	the list of dicts to convert	required
`prefix(str)`	the prefix to strip	required
`tablefmt(str)`	the tabulate tableformat to use	required

Source code in lodstorage/query.py

def prefixToLink(self, lod: list, prefix: str, tablefmt: str):
    """
    convert url prefixes to link according to the given table format
    TODO - refactor as preFormat callback

    Args:
        lod(list): the list of dicts to convert
        prefix(str): the prefix to strip
        tablefmt(str): the tabulate tableformat to use

    """
    for record in lod:
        for key in record.keys():
            value = record[key]
            if (
                value is not None
                and isinstance(value, str)
                and value.startswith(prefix)
            ):
                item = value.replace(prefix, "")
                uqitem = urllib.parse.unquote(item)
                if tablefmt == "latex":
                    link = uqitem
                else:
                    link = self.getLink(value, uqitem, tablefmt)
                record[key] = link

`QueryManager`

Bases: object

manages pre packaged Queries

Source code in lodstorage/query.py

class QueryManager(object):
    """
    manages pre packaged Queries
    """

    def __init__(
        self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
    ):
        """
        Constructor
        Args:
            lang(str): the language to use for the queries sql or sparql
            queriesPath(str): the path of the yaml file to load queries from
            debug(bool): True if debug information should be shown
            with_default(bool): if True also load the default yaml file
        """
        if lang is None:
            lang = "sql"
        self.queriesByName = {}
        self.lang = lang
        self.debug = debug
        queries = QueryManager.getQueries(
            queriesPath=queriesPath, with_default=with_default
        )
        for name, queryDict in queries.items():
            if self.lang in queryDict:
                queryText = queryDict.pop(self.lang)
                for qformat in ["sparql", "sql"]:  # drop not needed query variants
                    if qformat in queryDict:
                        queryDict.pop(qformat)
                query = Query(
                    name=name,
                    query=queryText,
                    lang=self.lang,
                    **queryDict,
                    debug=self.debug,
                )
                self.queriesByName[name] = query

    @staticmethod
    def getQueries(queriesPath=None, with_default: bool = True):
        """
        get the queries for the given queries Path

        Args:
            queriesPath(str): the path of the yaml file to load queries from
            with_default(bool): if True also load the default yaml file

        """
        queriesPaths = YamlPath.getPaths(
            "queries.yaml", queriesPath, with_default=with_default
        )
        queries = {}
        for queriesPath in queriesPaths:
            if os.path.isfile(queriesPath):
                with open(queriesPath, "r") as stream:
                    lqueries = yaml.safe_load(stream)
                    for key in lqueries:
                        queries[key] = lqueries[key]
        return queries

`init(lang=None, debug=False, queriesPath=None, with_default=True)`

Constructor Args: lang(str): the language to use for the queries sql or sparql queriesPath(str): the path of the yaml file to load queries from debug(bool): True if debug information should be shown with_default(bool): if True also load the default yaml file

Source code in lodstorage/query.py

def __init__(
    self, lang: str = None, debug=False, queriesPath=None, with_default: bool = True
):
    """
    Constructor
    Args:
        lang(str): the language to use for the queries sql or sparql
        queriesPath(str): the path of the yaml file to load queries from
        debug(bool): True if debug information should be shown
        with_default(bool): if True also load the default yaml file
    """
    if lang is None:
        lang = "sql"
    self.queriesByName = {}
    self.lang = lang
    self.debug = debug
    queries = QueryManager.getQueries(
        queriesPath=queriesPath, with_default=with_default
    )
    for name, queryDict in queries.items():
        if self.lang in queryDict:
            queryText = queryDict.pop(self.lang)
            for qformat in ["sparql", "sql"]:  # drop not needed query variants
                if qformat in queryDict:
                    queryDict.pop(qformat)
            query = Query(
                name=name,
                query=queryText,
                lang=self.lang,
                **queryDict,
                debug=self.debug,
            )
            self.queriesByName[name] = query

`getQueries(queriesPath=None, with_default=True)` `staticmethod`

get the queries for the given queries Path

Parameters:

Name	Type	Description	Default
`queriesPath(str)`		the path of the yaml file to load queries from	required
`with_default(bool)`		if True also load the default yaml file	required

Source code in lodstorage/query.py

@staticmethod
def getQueries(queriesPath=None, with_default: bool = True):
    """
    get the queries for the given queries Path

    Args:
        queriesPath(str): the path of the yaml file to load queries from
        with_default(bool): if True also load the default yaml file

    """
    queriesPaths = YamlPath.getPaths(
        "queries.yaml", queriesPath, with_default=with_default
    )
    queries = {}
    for queriesPath in queriesPaths:
        if os.path.isfile(queriesPath):
            with open(queriesPath, "r") as stream:
                lqueries = yaml.safe_load(stream)
                for key in lqueries:
                    queries[key] = lqueries[key]
    return queries

`QueryResultDocumentation`

documentation of a query result

Source code in lodstorage/query.py

class QueryResultDocumentation:
    """
    documentation of a query result
    """

    def __init__(
        self,
        query,
        title: str,
        tablefmt: str,
        tryItMarkup: str,
        sourceCodeHeader: str,
        sourceCode: str,
        resultHeader: str,
        result: str,
    ):
        """
        constructor

        Args:
            query(Query): the query to be documented
            title(str): the title markup
            tablefmt(str): the tableformat that has been used
            tryItMarkup: the "try it!" markup to show
            sourceCodeHeader(str): the header title to use for the sourceCode
            sourceCode(str): the sourceCode
            resultCodeHeader(str): the header title to use for the result
            result(str): the result header

        """
        self.query = query
        self.title = title
        self.tablefmt = tablefmt
        self.tryItMarkup = f"\n{tryItMarkup}"
        self.sourceCodeHeader = sourceCodeHeader
        self.sourceCode = sourceCode
        self.resultHeader = resultHeader
        self.result = result

    @staticmethod
    def uniCode2Latex(text: str, withConvert: bool = False) -> str:
        """
        converts unicode text to latex and
        fixes UTF-8 chars for latex in a certain range:
            ₀:$_0$ ... ₉:$_9$

        see https://github.com/phfaist/pylatexenc/issues/72

        Args:
            text(str): the string to fix
            withConvert(bool): if unicode to latex libary conversion should be used

        Return:
            str: latex presentation of UTF-8 char
        """
        for code in range(8320, 8330):
            text = text.replace(chr(code), f"$_{code-8320}$")
        if withConvert:
            latex = unicode_to_latex(text)
            # workaround {\textbackslash} being returned
            # latex=latex.replace("{\\textbackslash}",'\\')
            text = latex
        return text

    def __str__(self):
        """
        simple string representation
        """
        return self.asText()

    def asText(self):
        """
        return my text representation

        Returns:
            str: description, sourceCodeHeader, sourceCode, tryIt link and result table
        """
        text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
        fixedStr = (
            self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
        )
        return fixedStr

`init(query, title, tablefmt, tryItMarkup, sourceCodeHeader, sourceCode, resultHeader, result)`

constructor

Parameters:

Name	Type	Description	Default
`query(Query)`		the query to be documented	required
`title(str)`		the title markup	required
`tablefmt(str)`		the tableformat that has been used	required
`tryItMarkup`	`str`	the "try it!" markup to show	required
`sourceCodeHeader(str)`		the header title to use for the sourceCode	required
`sourceCode(str)`		the sourceCode	required
`resultCodeHeader(str)`		the header title to use for the result	required
`result(str)`		the result header	required

Source code in lodstorage/query.py

def __init__(
    self,
    query,
    title: str,
    tablefmt: str,
    tryItMarkup: str,
    sourceCodeHeader: str,
    sourceCode: str,
    resultHeader: str,
    result: str,
):
    """
    constructor

    Args:
        query(Query): the query to be documented
        title(str): the title markup
        tablefmt(str): the tableformat that has been used
        tryItMarkup: the "try it!" markup to show
        sourceCodeHeader(str): the header title to use for the sourceCode
        sourceCode(str): the sourceCode
        resultCodeHeader(str): the header title to use for the result
        result(str): the result header

    """
    self.query = query
    self.title = title
    self.tablefmt = tablefmt
    self.tryItMarkup = f"\n{tryItMarkup}"
    self.sourceCodeHeader = sourceCodeHeader
    self.sourceCode = sourceCode
    self.resultHeader = resultHeader
    self.result = result

`str()`

simple string representation

Source code in lodstorage/query.py

def __str__(self):
    """
    simple string representation
    """
    return self.asText()

`asText()`

return my text representation

Returns:

Name	Type	Description
`str`		description, sourceCodeHeader, sourceCode, tryIt link and result table

Source code in lodstorage/query.py

def asText(self):
    """
    return my text representation

    Returns:
        str: description, sourceCodeHeader, sourceCode, tryIt link and result table
    """
    text = f"{self.title}\n{self.query.description}\n{self.sourceCodeHeader}\n{self.sourceCode}{self.tryItMarkup}\n{self.resultHeader}\n{self.result}"
    fixedStr = (
        self.uniCode2Latex(text) if self.tablefmt.lower() == "latex" else text
    )
    return fixedStr

`uniCode2Latex(text, withConvert=False)` `staticmethod`

converts unicode text to latex and fixes UTF-8 chars for latex in a certain range: ₀:$_0$ ... ₉:$_9$

see https://github.com/phfaist/pylatexenc/issues/72

Parameters:

Name	Type	Description	Default
`text(str)`		the string to fix	required
`withConvert(bool)`		if unicode to latex libary conversion should be used	required

Return

str: latex presentation of UTF-8 char

Source code in lodstorage/query.py

@staticmethod
def uniCode2Latex(text: str, withConvert: bool = False) -> str:
    """
    converts unicode text to latex and
    fixes UTF-8 chars for latex in a certain range:
        ₀:$_0$ ... ₉:$_9$

    see https://github.com/phfaist/pylatexenc/issues/72

    Args:
        text(str): the string to fix
        withConvert(bool): if unicode to latex libary conversion should be used

    Return:
        str: latex presentation of UTF-8 char
    """
    for code in range(8320, 8330):
        text = text.replace(chr(code), f"$_{code-8320}$")
    if withConvert:
        latex = unicode_to_latex(text)
        # workaround {\textbackslash} being returned
        # latex=latex.replace("{\\textbackslash}",'\\')
        text = latex
    return text

`QuerySyntaxHighlight`

Syntax highlighting for queries with pygments

Source code in lodstorage/query.py

class QuerySyntaxHighlight:
    """
    Syntax highlighting for queries with pygments
    """

    def __init__(self, query, highlightFormat: str = "html"):
        """
        construct me for the given query and highlightFormat

        Args:
            query(Query): the query to do the syntax highlighting for
            highlightFormat(str): the highlight format to be used
        """
        self.query = query
        self.highlightFormat = highlightFormat
        self.lexer = get_lexer_by_name(self.query.lang)
        if self.highlightFormat == "html":
            self.formatter = HtmlFormatter()
        elif self.highlightFormat == "latex":
            self.formatter = LatexFormatter()

    def highlight(self):
        """
        Returns:
            str: the result of the syntax highlighting with pygments
        """
        syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
        return syntaxResult

`init(query, highlightFormat='html')`

construct me for the given query and highlightFormat

Parameters:

Name	Type	Description	Default
`query(Query)`		the query to do the syntax highlighting for	required
`highlightFormat(str)`		the highlight format to be used	required

Source code in lodstorage/query.py

def __init__(self, query, highlightFormat: str = "html"):
    """
    construct me for the given query and highlightFormat

    Args:
        query(Query): the query to do the syntax highlighting for
        highlightFormat(str): the highlight format to be used
    """
    self.query = query
    self.highlightFormat = highlightFormat
    self.lexer = get_lexer_by_name(self.query.lang)
    if self.highlightFormat == "html":
        self.formatter = HtmlFormatter()
    elif self.highlightFormat == "latex":
        self.formatter = LatexFormatter()

`highlight()`

Returns:

Name	Type	Description
`str`		the result of the syntax highlighting with pygments

Source code in lodstorage/query.py

def highlight(self):
    """
    Returns:
        str: the result of the syntax highlighting with pygments
    """
    syntaxResult = highlight(self.query.query, self.lexer, self.formatter)
    return syntaxResult

`ValueFormatter`

a value Formatter

Source code in lodstorage/query.py

class ValueFormatter:
    """
    a value Formatter
    """

    home = str(Path.home())
    # additional endpoints from users endpoint configuration
    formatsPath = f"{os.path.dirname(__file__)}/../sampledata/formats.yaml"
    valueFormats = None

    def __init__(
        self,
        name: str,
        formatString: str,
        regexps: list = None,
    ):
        """
        constructor

        Args:
            fstring(str): the format String to use
            regexps(list): the regular expressions to apply
        """
        self.name = name
        self.regexps = regexps
        self.formatString = formatString

    @classmethod
    def fromDict(cls, name: str, record: dict):
        """
        create a ValueFormatter from the given dict
        """
        if "regexps" in record:
            regexps = record["regexps"]
        else:
            regexps = []
        vf = ValueFormatter(name=name, formatString=record["format"], regexps=regexps)
        return vf

    @classmethod
    def getFormats(cls, formatsPath: str = None) -> dict:
        """
        get the available ValueFormatters

        Args:
            formatsPath(str): the path to the yaml file to read the format specs from
        Returns:
            dict: a map for ValueFormatters by formatter Name
        """
        if cls.valueFormats is None:
            valueFormats = {}
            formatPaths = YamlPath.getPaths("formats.yaml", formatsPath)
            for formatPath in formatPaths:
                with open(formatPath, "r", encoding="utf-8") as stream:
                    valueFormatRecords = yaml.safe_load(stream)
                    for valueFormatKey, valueFormatRecord in valueFormatRecords.items():
                        valueFormats[valueFormatKey] = ValueFormatter.fromDict(
                            name=valueFormatKey, record=valueFormatRecord
                        )
            cls.valueFormats = valueFormats
        return cls.valueFormats

    def applyFormat(self, record, key, resultFormat: Format):
        """
        apply the given format to the given record

        Args:
            record(dict): the record to handle
            key(str): the property key
            resultFormat(str): the resultFormat Style to apply
        """
        if key in record:
            value = record[key]
            if value is not None and isinstance(value, str):
                # if there are no regular expressions specified always format
                doformat = len(self.regexps) == 0
                for regexp in self.regexps:
                    try:
                        vmatch = re.match(regexp, value)
                        if vmatch:
                            # we found a match and will format it if the value is not none
                            doformat = True
                            value = vmatch.group("value")
                    except Exception as ex:
                        print(
                            f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                            file=sys.stderr,
                        )
                if value is not None and doformat:
                    link = self.formatString.format(value=value)
                    newValue = None
                    if resultFormat == "github":
                        newValue = f"[{value}]({link})"
                    elif resultFormat == "mediawiki":
                        newValue = f"[{link} {value}]"
                    elif resultFormat == "latex":
                        newValue = f"\href{{{link}}}{{{value}}}"
                    if newValue is not None:
                        record[key] = newValue

`init(name, formatString, regexps=None)`

constructor

Parameters:

Name	Type	Description	Default
`fstring(str)`		the format String to use	required
`regexps(list)`		the regular expressions to apply	required

Source code in lodstorage/query.py

def __init__(
    self,
    name: str,
    formatString: str,
    regexps: list = None,
):
    """
    constructor

    Args:
        fstring(str): the format String to use
        regexps(list): the regular expressions to apply
    """
    self.name = name
    self.regexps = regexps
    self.formatString = formatString

`applyFormat(record, key, resultFormat)`

apply the given format to the given record

Parameters:

Name	Description	Default
`record(dict)`	the record to handle	required
`key(str)`	the property key	required
`resultFormat(str)`	the resultFormat Style to apply	required

Source code in lodstorage/query.py

def applyFormat(self, record, key, resultFormat: Format):
    """
    apply the given format to the given record

    Args:
        record(dict): the record to handle
        key(str): the property key
        resultFormat(str): the resultFormat Style to apply
    """
    if key in record:
        value = record[key]
        if value is not None and isinstance(value, str):
            # if there are no regular expressions specified always format
            doformat = len(self.regexps) == 0
            for regexp in self.regexps:
                try:
                    vmatch = re.match(regexp, value)
                    if vmatch:
                        # we found a match and will format it if the value is not none
                        doformat = True
                        value = vmatch.group("value")
                except Exception as ex:
                    print(
                        f"ValueFormatter: {self.name}\nInvalid regular expression:{regexp}\n{str(ex)}",
                        file=sys.stderr,
                    )
            if value is not None and doformat:
                link = self.formatString.format(value=value)
                newValue = None
                if resultFormat == "github":
                    newValue = f"[{value}]({link})"
                elif resultFormat == "mediawiki":
                    newValue = f"[{link} {value}]"
                elif resultFormat == "latex":
                    newValue = f"\href{{{link}}}{{{value}}}"
                if newValue is not None:
                    record[key] = newValue

`fromDict(name, record)` `classmethod`

create a ValueFormatter from the given dict

Source code in lodstorage/query.py

@classmethod
def fromDict(cls, name: str, record: dict):
    """
    create a ValueFormatter from the given dict
    """
    if "regexps" in record:
        regexps = record["regexps"]
    else:
        regexps = []
    vf = ValueFormatter(name=name, formatString=record["format"], regexps=regexps)
    return vf

`getFormats(formatsPath=None)` `classmethod`

get the available ValueFormatters

Parameters:

Name	Type	Description	Default
`formatsPath(str)`		the path to the yaml file to read the format specs from	required

Returns: dict: a map for ValueFormatters by formatter Name

Source code in lodstorage/query.py

@classmethod
def getFormats(cls, formatsPath: str = None) -> dict:
    """
    get the available ValueFormatters

    Args:
        formatsPath(str): the path to the yaml file to read the format specs from
    Returns:
        dict: a map for ValueFormatters by formatter Name
    """
    if cls.valueFormats is None:
        valueFormats = {}
        formatPaths = YamlPath.getPaths("formats.yaml", formatsPath)
        for formatPath in formatPaths:
            with open(formatPath, "r", encoding="utf-8") as stream:
                valueFormatRecords = yaml.safe_load(stream)
                for valueFormatKey, valueFormatRecord in valueFormatRecords.items():
                    valueFormats[valueFormatKey] = ValueFormatter.fromDict(
                        name=valueFormatKey, record=valueFormatRecord
                    )
        cls.valueFormats = valueFormats
    return cls.valueFormats

`YamlPath`

Source code in lodstorage/query.py

class YamlPath:
    @staticmethod
    def getPaths(yamlFileName: str, yamlPath: str = None, with_default: bool = True):
        """
        Args:
            yamlFileName (str): The name of the YAML file to read from if (any) - legacy way to specify name
            yamlPath (str, optional): The full path to read from. Defaults to None.
            with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

        """
        if yamlPath is None:
            yamlPath = f"{os.path.dirname(__file__)}/../sampledata/{yamlFileName}"
        yamlPaths = [yamlPath]
        if with_default:
            home = str(Path.home())
            # additional yamls from users yaml configuration
            homepath = f"{home}/.pylodstorage/{yamlFileName}"
            if os.path.isfile(homepath):
                yamlPaths.append(homepath)
        return yamlPaths

`getPaths(yamlFileName, yamlPath=None, with_default=True)` `staticmethod`

Parameters:

Name	Type	Description	Default
`yamlFileName`	`str`	The name of the YAML file to read from if (any) - legacy way to specify name	required
`yamlPath`	`str`	The full path to read from. Defaults to None.	`None`
`with_default`	`bool`	Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.	`True`

Source code in lodstorage/query.py

@staticmethod
def getPaths(yamlFileName: str, yamlPath: str = None, with_default: bool = True):
    """
    Args:
        yamlFileName (str): The name of the YAML file to read from if (any) - legacy way to specify name
        yamlPath (str, optional): The full path to read from. Defaults to None.
        with_default (bool, optional): Whether to include paths from the default location .pylodstorage in the Home directory. Defaults to True.

    """
    if yamlPath is None:
        yamlPath = f"{os.path.dirname(__file__)}/../sampledata/{yamlFileName}"
    yamlPaths = [yamlPath]
    if with_default:
        home = str(Path.home())
        # additional yamls from users yaml configuration
        homepath = f"{home}/.pylodstorage/{yamlFileName}"
        if os.path.isfile(homepath):
            yamlPaths.append(homepath)
    return yamlPaths

`querymain`

Created on 2022-02-13

@author: wf

`QueryMain`

Commandline handler

Source code in lodstorage/querymain.py

class QueryMain:
    """
    Commandline handler
    """

    @classmethod
    def main(cls, args):
        """
        command line activation with parsed args

        Args:
            args(list): the command line arguments
        """
        debug = args.debug
        endpoints = EndpointManager.getEndpoints(args.endpointPath)
        qm = QueryManager(lang=args.language, debug=debug, queriesPath=args.queriesPath)
        query = None
        queryCode = args.query
        formats = None
        # preload ValueFormatter
        ValueFormatter.getFormats(args.formatsPath)
        if args.list:
            for name, query in qm.queriesByName.items():
                print(f"{name}:{query.title}")
        elif args.listEndpoints:
            # list endpoints
            for endpoint in endpoints.values():
                if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                    print(endpoint)

        elif args.queryName is not None:
            if debug or args.showQuery:
                print(f"named query {args.queryName}:")
            if args.queryName not in qm.queriesByName:
                raise Exception(f"named query {args.queryName} not available")
            query = qm.queriesByName[args.queryName]
            if query.limit is None and args.limit is not None:
                query.limit = args.limit
            formats = query.formats
            queryCode = query.query
            if debug or args.showQuery:
                if hasattr(query, "description") and query.description is not None:
                    print(query.description)
        if query is None:
            name = "?"
            if queryCode is None and args.queryFile is not None:
                queryFilePath = Path(args.queryFile)
                queryCode = queryFilePath.read_text()
                name = queryFilePath.stem
            query = Query(name="?", query=queryCode, lang=args.language)

        if queryCode:
            params = Params(query.query)
            if params.has_params:
                if not args.params:
                    raise Exception(f"{query.name} needs parameters")
                else:
                    params.set(args.params)
                    query.query = params.apply_parameters()
                    queryCode = query.query
            if debug or args.showQuery:
                print(f"{args.language}:\n{query.query}")
            endpointConf = Endpoint()
            endpointConf.method = "POST"
            if args.endpointName:
                endpointConf = endpoints.get(args.endpointName)
                query.tryItUrl = endpointConf.website
                query.database = endpointConf.database
            else:
                endpointConf.endpoint = query.endpoint
            if args.method:
                endpointConf.method = args.method
            if query.limit:
                if "limit" in queryCode or "LIMIT" in queryCode:
                    queryCode = re.sub(
                        r"(limit|LIMIT)\s+(\d+)", f"LIMIT {query.limit}", queryCode
                    )
                else:
                    queryCode += f"\nLIMIT {query.limit}"
            if args.language == "sparql":
                sparql = SPARQL.fromEndpointConf(endpointConf)
                if args.prefixes and endpointConf is not None:
                    queryCode = f"{endpointConf.prefixes}\n{queryCode}"
                if args.raw:
                    qres = cls.rawQuery(
                        endpointConf,
                        query=query.query,
                        resultFormat=args.format,
                        mimeType=args.mimeType,
                    )
                    print(qres)
                    return
                if "wikidata" in args.endpointName and formats is None:
                    formats = ["*:wikidata"]
                qlod = sparql.queryAsListOfDicts(queryCode)
            elif args.language == "sql":
                sqlDB = SQLDB(endpointConf.endpoint)
                qlod = sqlDB.query(queryCode)
            else:
                raise Exception(f"language {args.language} not known/supported")
            if args.format is Format.csv:
                csv = CSV.toCSV(qlod)
                print(csv)
            elif args.format in [Format.latex, Format.github, Format.mediawiki]:
                doc = query.documentQueryResult(
                    qlod, tablefmt=str(args.format), floatfmt=".0f"
                )
                docstr = doc.asText()
                print(docstr)
            elif args.format in [Format.json] or args.format is None:  # set as default
                # https://stackoverflow.com/a/36142844/1497139
                print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
            elif args.format in [Format.xml]:
                lod2xml = Lod2Xml(qlod)
                xml = lod2xml.asXml()
                print(xml)

            else:
                raise Exception(f"format {args.format} not supported yet")

    @staticmethod
    def rawQuery(endpointConf, query, resultFormat, mimeType, timeout: float = 10.0):
        """
        returns raw result of the endpoint

        Args:
            endpointConf: EndPoint
            query(str): query
            resultFormat(str): format of the result
            mimeType(str): mimeType
            timoeout(float): timeout in seconds

        Returns:
            raw result of the query
        """
        params = {"query": query, "format": resultFormat}
        payload = {}
        if mimeType:
            headers = {"Accept": mimeType}
        else:
            headers = {}
        endpoint = endpointConf.endpoint
        method = endpointConf.method
        response = requests.request(
            method,
            endpoint,
            headers=headers,
            data=payload,
            params=params,
            timeout=timeout,
        )
        return response.text

`main(args)` `classmethod`

command line activation with parsed args

Parameters:

Name	Type	Description	Default
`args(list)`		the command line arguments	required

Source code in lodstorage/querymain.py

@classmethod
def main(cls, args):
    """
    command line activation with parsed args

    Args:
        args(list): the command line arguments
    """
    debug = args.debug
    endpoints = EndpointManager.getEndpoints(args.endpointPath)
    qm = QueryManager(lang=args.language, debug=debug, queriesPath=args.queriesPath)
    query = None
    queryCode = args.query
    formats = None
    # preload ValueFormatter
    ValueFormatter.getFormats(args.formatsPath)
    if args.list:
        for name, query in qm.queriesByName.items():
            print(f"{name}:{query.title}")
    elif args.listEndpoints:
        # list endpoints
        for endpoint in endpoints.values():
            if hasattr(endpoint, "lang") and endpoint.lang == args.language:
                print(endpoint)

    elif args.queryName is not None:
        if debug or args.showQuery:
            print(f"named query {args.queryName}:")
        if args.queryName not in qm.queriesByName:
            raise Exception(f"named query {args.queryName} not available")
        query = qm.queriesByName[args.queryName]
        if query.limit is None and args.limit is not None:
            query.limit = args.limit
        formats = query.formats
        queryCode = query.query
        if debug or args.showQuery:
            if hasattr(query, "description") and query.description is not None:
                print(query.description)
    if query is None:
        name = "?"
        if queryCode is None and args.queryFile is not None:
            queryFilePath = Path(args.queryFile)
            queryCode = queryFilePath.read_text()
            name = queryFilePath.stem
        query = Query(name="?", query=queryCode, lang=args.language)

    if queryCode:
        params = Params(query.query)
        if params.has_params:
            if not args.params:
                raise Exception(f"{query.name} needs parameters")
            else:
                params.set(args.params)
                query.query = params.apply_parameters()
                queryCode = query.query
        if debug or args.showQuery:
            print(f"{args.language}:\n{query.query}")
        endpointConf = Endpoint()
        endpointConf.method = "POST"
        if args.endpointName:
            endpointConf = endpoints.get(args.endpointName)
            query.tryItUrl = endpointConf.website
            query.database = endpointConf.database
        else:
            endpointConf.endpoint = query.endpoint
        if args.method:
            endpointConf.method = args.method
        if query.limit:
            if "limit" in queryCode or "LIMIT" in queryCode:
                queryCode = re.sub(
                    r"(limit|LIMIT)\s+(\d+)", f"LIMIT {query.limit}", queryCode
                )
            else:
                queryCode += f"\nLIMIT {query.limit}"
        if args.language == "sparql":
            sparql = SPARQL.fromEndpointConf(endpointConf)
            if args.prefixes and endpointConf is not None:
                queryCode = f"{endpointConf.prefixes}\n{queryCode}"
            if args.raw:
                qres = cls.rawQuery(
                    endpointConf,
                    query=query.query,
                    resultFormat=args.format,
                    mimeType=args.mimeType,
                )
                print(qres)
                return
            if "wikidata" in args.endpointName and formats is None:
                formats = ["*:wikidata"]
            qlod = sparql.queryAsListOfDicts(queryCode)
        elif args.language == "sql":
            sqlDB = SQLDB(endpointConf.endpoint)
            qlod = sqlDB.query(queryCode)
        else:
            raise Exception(f"language {args.language} not known/supported")
        if args.format is Format.csv:
            csv = CSV.toCSV(qlod)
            print(csv)
        elif args.format in [Format.latex, Format.github, Format.mediawiki]:
            doc = query.documentQueryResult(
                qlod, tablefmt=str(args.format), floatfmt=".0f"
            )
            docstr = doc.asText()
            print(docstr)
        elif args.format in [Format.json] or args.format is None:  # set as default
            # https://stackoverflow.com/a/36142844/1497139
            print(json.dumps(qlod, indent=2, sort_keys=True, default=str))
        elif args.format in [Format.xml]:
            lod2xml = Lod2Xml(qlod)
            xml = lod2xml.asXml()
            print(xml)

        else:
            raise Exception(f"format {args.format} not supported yet")

`rawQuery(endpointConf, query, resultFormat, mimeType, timeout=10.0)` `staticmethod`

returns raw result of the endpoint

Parameters:

Name	Description	Default
`endpointConf`	EndPoint	required
`query(str)`	query	required
`resultFormat(str)`	format of the result	required
`mimeType(str)`	mimeType	required
`timoeout(float)`	timeout in seconds	required

Returns:

Type	Description
	raw result of the query

Source code in lodstorage/querymain.py

@staticmethod
def rawQuery(endpointConf, query, resultFormat, mimeType, timeout: float = 10.0):
    """
    returns raw result of the endpoint

    Args:
        endpointConf: EndPoint
        query(str): query
        resultFormat(str): format of the result
        mimeType(str): mimeType
        timoeout(float): timeout in seconds

    Returns:
        raw result of the query
    """
    params = {"query": query, "format": resultFormat}
    payload = {}
    if mimeType:
        headers = {"Accept": mimeType}
    else:
        headers = {}
    endpoint = endpointConf.endpoint
    method = endpointConf.method
    response = requests.request(
        method,
        endpoint,
        headers=headers,
        data=payload,
        params=params,
        timeout=timeout,
    )
    return response.text

`main(argv=None, lang=None)`

main program.

commandline access to List of Dicts / Linked Open Data Queries

Source code in lodstorage/querymain.py

def main(argv=None, lang=None):  # IGNORE:C0111
    """
    main program.

    commandline access to List of Dicts / Linked Open Data Queries
    """
    if argv is None:
        argv = sys.argv[1:]

    program_name = os.path.basename(__file__)
    program_version = "v%s" % __version__
    program_build_date = str(__updated__)
    program_version_message = "%%(prog)s %s (%s)" % (
        program_version,
        program_build_date,
    )
    program_shortdesc = (
        "commandline query of endpoints in diverse languages such as SPARQL/SQL"
    )
    user_name = "Wolfgang Fahl"
    program_license = """%s

  Created by %s on %s.
  Copyright 2020-2024 Wolfgang Fahl. All rights reserved.

  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0

  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.

USAGE
""" % (
        program_shortdesc,
        user_name,
        str(__date__),
    )

    try:
        # Setup argument parser
        parser = ArgumentParser(
            description=program_license, formatter_class=RawDescriptionHelpFormatter
        )
        parser.add_argument(
            "-d",
            "--debug",
            dest="debug",
            action="store_true",
            help="set debug [default: %(default)s]",
        )
        parser.add_argument(
            "-ep",
            "--endpointPath",
            default=None,
            help="path to yaml file to configure endpoints to use for queries",
        )
        parser.add_argument(
            "-fp",
            "--formatsPath",
            default=ValueFormatter.formatsPath,
            help="path to yaml file to configure formats to use for query result documentation",
        )
        parser.add_argument(
            "-en",
            "--endpointName",
            default="wikidata",
            help=f"Name of the endpoint to use for queries. Available by default: {EndpointManager.getEndpointNames()}",
        )
        parser.add_argument("--method", help="method to be used for SPARQL queries")
        parser.add_argument("-f", "--format", type=Format, choices=list(Format))
        parser.add_argument(
            "-li",
            "--list",
            action="store_true",
            help="show the list of available queries",
        )
        parser.add_argument(
            "--limit", type=int, default=None, help="set limit parameter of query"
        )
        parser.add_argument(
            "--params",
            action=StoreDictKeyPair,
            help="query parameters as Key-value pairs in the format key1=value1,key2=value2",
        )
        parser.add_argument(
            "-le",
            "--listEndpoints",
            action="store_true",
            help="show the list of available endpoints",
        )
        parser.add_argument(
            "-m", "--mimeType", help="MIME-type to use for the raw query"
        )
        parser.add_argument(
            "-p",
            "--prefixes",
            action="store_true",
            help="add predefined prefixes for endpoint",
        )
        parser.add_argument(
            "-sq", "--showQuery", action="store_true", help="show the query"
        )
        parser.add_argument(
            "-qp", "--queriesPath", help="path to YAML file with query definitions"
        )
        parser.add_argument("-q", "--query", help="the query to run")
        parser.add_argument("-qf", "--queryFile", help="the query file to run")
        parser.add_argument("-qn", "--queryName", help="run a named query")
        parser.add_argument(
            "-raw",
            action="store_true",
            help="return the raw query result from the endpoint. (MIME type defined over -f or -m)",
        )
        parser.add_argument(
            "-V", "--version", action="version", version=program_version_message
        )
        if lang is None:
            parser.add_argument(
                "-l", "--language", help="the query language to use", required=True
            )
        args = parser.parse_args(argv)
        if lang is not None:
            args.language = lang
        QueryMain.main(args)

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        if DEBUG:
            raise (e)
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if args.debug:
            print(traceback.format_exc())
        return 2

`mainSPARQL(argv=None)`

commandline for SPARQL queries

Source code in lodstorage/querymain.py

def mainSPARQL(argv=None):
    """
    commandline for SPARQL queries
    """
    main(argv, lang="sparql")

`mainSQL(argv=None)`

commandline for SQL queries

Source code in lodstorage/querymain.py

def mainSQL(argv=None):
    """
    commandline for SQL queries
    """
    main(argv, lang="sql")

`rdf`

Created on 2024-01-27

@author: wf, using ChatGPT-4 prompting

`RDFDumper`

A class to convert instances of data models (based on a LinkML schema) into an RDF graph.

Source code in lodstorage/rdf.py

class RDFDumper:
    """
    A class to convert instances of data models (based on a LinkML schema) into an RDF graph.
    """

    def __init__(self, schema: Schema, instance: object):
        """
        Initialize the RDFDumper.

        Args:
            schema (Schema): The LinkML schema defining the structure of the data models.
            instance (object): The instance of the data model to be converted into RDF.
        """
        self.schema = schema
        self.instance = instance
        self.graph = Graph()
        self.namespaces = {
            prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
        }

    def convert_to_rdf(self):
        """
        Converts the provided instance into RDF triples based on the LinkML schema.
        """
        # Process the instance data according to its class in the schema
        instance_class = self.instance.__class__.__name__
        if instance_class in self.schema.classes:
            self.process_class(instance_class, self.instance)

    def serialize(self, rdf_format: str = "turtle") -> str:
        """
        Serializes the RDF graph into a string representation in the specified format.

        Args:
            format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

        Returns:
            str: The serialized RDF graph.
        """
        return self.graph.serialize(format=rdf_format)

    def value_iterator(self, value: Any):
        """
        Iterates over values in a mapping or iterable.

        Args:
            value: The value to iterate over. It can be a mapping, iterable, or a single value.

        Yields:
            Tuples of (key, value) from the input value. For single values, key is None.
        """
        if isinstance(value, Mapping):
            yield from value.items()
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            yield from ((None, v) for v in value)
        else:
            yield (None, value)

    def process_class(self, class_name: str, instance_data: object):
        # Get the base namespace URI
        self.base_uri = self.namespaces[self.schema.default_prefix]
        # get the class object
        # class_obj = self.schema.classes[class_name]
        # Construct class_uri using the namespace and class_name with a separator
        class_uri = URIRef(f"{self.base_uri}:{class_name}")

        # Create a unique URI or a Blank Node for the instance
        instance_uri = self.get_instance_uri(instance_data)

        # Type the instance with its class
        self.graph.add((instance_uri, RDF.type, class_uri))

        # loop over all fieds of the instance data
        for field_info in fields(instance_data):
            slot_name = field_info.name
            # assure we only work on fields defined
            # in our schema
            slot_obj = self.schema.slots.get(slot_name)
            if not slot_obj:
                continue

            # Combine the namespace with the slot name to form the field URI
            field_uri = URIRef(f"{self.base_uri}:{slot_name}")
            field_value = getattr(instance_data, slot_name, None)

            # Use value_iterator to handle different types of values
            for key, item in self.value_iterator(field_value):
                if key is not None:
                    # Handle as a mapping
                    key_uri = URIRef(self.namespaces[self.schema.default_prefix][key])
                    self.graph.add((instance_uri, field_uri, key_uri))
                    self.graph.add(
                        (key_uri, RDF.value, self.convert_to_literal(item, slot_obj))
                    )
                else:
                    # Handle as a single value or an item from an iterable
                    # Check if item has an 'identifier' property
                    if hasattr(item, "identifier") and getattr(item, "identifier"):
                        item_uri = self.get_instance_uri(item)
                        self.graph.add((instance_uri, field_uri, item_uri))
                        self.process_class(item.__class__.__name__, item)
                    else:
                        self.graph.add(
                            (
                                instance_uri,
                                field_uri,
                                self.convert_to_literal(item, slot_obj),
                            )
                        )

    def get_instance_uri(self, instance_data):
        """
        Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
        Otherwise, it generates or retrieves a unique URI.
        """
        if hasattr(instance_data, "identifier") and getattr(
            instance_data, "identifier"
        ):
            identifier = getattr(instance_data, "identifier")
            return URIRef(f"{self.base_uri}:{identifier}")
        else:
            # Fallback to a blank node if no identifier is found
            return BNode()

    def convert_to_literal(self, value, slot_obj):
        """
        Converts a value to an RDFLib Literal with appropriate datatype.

        Args:
            value: The value to be converted.
            slot_obj: The slot object containing information about the field.

        Returns:
            An RDFLib Literal with the value and appropriate datatype.
        """
        # Determine the datatype based on the Python type of the value
        datatype = PythonTypes.get_rdf_datatype(type(value))

        # Create and return the literal
        return Literal(value, datatype=datatype)

`init(schema, instance)`

Initialize the RDFDumper.

Parameters:

Name	Type	Description	Default
`schema`	`Schema`	The LinkML schema defining the structure of the data models.	required
`instance`	`object`	The instance of the data model to be converted into RDF.	required

Source code in lodstorage/rdf.py

def __init__(self, schema: Schema, instance: object):
    """
    Initialize the RDFDumper.

    Args:
        schema (Schema): The LinkML schema defining the structure of the data models.
        instance (object): The instance of the data model to be converted into RDF.
    """
    self.schema = schema
    self.instance = instance
    self.graph = Graph()
    self.namespaces = {
        prefix: Namespace(uri) for prefix, uri in schema.prefixes.items()
    }

`convert_to_literal(value, slot_obj)`

Converts a value to an RDFLib Literal with appropriate datatype.

Parameters:

Name	Type	Description	Default
`value`		The value to be converted.	required
`slot_obj`		The slot object containing information about the field.	required

Returns:

Type	Description
	An RDFLib Literal with the value and appropriate datatype.

Source code in lodstorage/rdf.py

def convert_to_literal(self, value, slot_obj):
    """
    Converts a value to an RDFLib Literal with appropriate datatype.

    Args:
        value: The value to be converted.
        slot_obj: The slot object containing information about the field.

    Returns:
        An RDFLib Literal with the value and appropriate datatype.
    """
    # Determine the datatype based on the Python type of the value
    datatype = PythonTypes.get_rdf_datatype(type(value))

    # Create and return the literal
    return Literal(value, datatype=datatype)

`convert_to_rdf()`

Converts the provided instance into RDF triples based on the LinkML schema.

Source code in lodstorage/rdf.py

def convert_to_rdf(self):
    """
    Converts the provided instance into RDF triples based on the LinkML schema.
    """
    # Process the instance data according to its class in the schema
    instance_class = self.instance.__class__.__name__
    if instance_class in self.schema.classes:
        self.process_class(instance_class, self.instance)

`get_instance_uri(instance_data)`

Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI. Otherwise, it generates or retrieves a unique URI.

Source code in lodstorage/rdf.py

def get_instance_uri(self, instance_data):
    """
    Generates a URI for an instance. If the instance has an 'identifier' property, it uses that as part of the URI.
    Otherwise, it generates or retrieves a unique URI.
    """
    if hasattr(instance_data, "identifier") and getattr(
        instance_data, "identifier"
    ):
        identifier = getattr(instance_data, "identifier")
        return URIRef(f"{self.base_uri}:{identifier}")
    else:
        # Fallback to a blank node if no identifier is found
        return BNode()

`serialize(rdf_format='turtle')`

Serializes the RDF graph into a string representation in the specified format.

Parameters:

Name	Type	Description	Default
`format`	`str`	The serialization format (e.g., 'turtle', 'xml', 'json-ld').	required

Returns:

Name	Type	Description
`str`	`str`	The serialized RDF graph.

Source code in lodstorage/rdf.py

def serialize(self, rdf_format: str = "turtle") -> str:
    """
    Serializes the RDF graph into a string representation in the specified format.

    Args:
        format (str): The serialization format (e.g., 'turtle', 'xml', 'json-ld').

    Returns:
        str: The serialized RDF graph.
    """
    return self.graph.serialize(format=rdf_format)

`value_iterator(value)`

Iterates over values in a mapping or iterable.

Parameters:

Name	Type	Description	Default
`value`	`Any`	The value to iterate over. It can be a mapping, iterable, or a single value.	required

Yields:

Type	Description
	Tuples of (key, value) from the input value. For single values, key is None.

Source code in lodstorage/rdf.py

def value_iterator(self, value: Any):
    """
    Iterates over values in a mapping or iterable.

    Args:
        value: The value to iterate over. It can be a mapping, iterable, or a single value.

    Yields:
        Tuples of (key, value) from the input value. For single values, key is None.
    """
    if isinstance(value, Mapping):
        yield from value.items()
    elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
        yield from ((None, v) for v in value)
    else:
        yield (None, value)

`sample`

Created on 2020-08-24

@author: wf

`Royal`

Bases: JSONAble

i am a single Royal

Source code in lodstorage/sample.py

class Royal(JSONAble):
    """
    i am a single Royal
    """

    @classmethod
    def getSamples(cls):
        listOfDicts = [
            {
                "name": "Elizabeth Alexandra Mary Windsor",
                "born": Sample.dob("1926-04-21"),
                "numberInLine": 0,
                "wikidataurl": "https://www.wikidata.org/wiki/Q9682",
            },
            {
                "name": "Charles, Prince of Wales",
                "born": Sample.dob("1948-11-14"),
                "numberInLine": 1,
                "wikidataurl": "https://www.wikidata.org/wiki/Q43274",
            },
            {
                "name": "George of Cambridge",
                "born": Sample.dob("2013-07-22"),
                "numberInLine": 3,
                "wikidataurl": "https://www.wikidata.org/wiki/Q1359041",
            },
            {
                "name": "Harry Duke of Sussex",
                "born": Sample.dob("1984-09-15"),
                "numberInLine": 6,
                "wikidataurl": "https://www.wikidata.org/wiki/Q152316",
            },
        ]
        today = date.today()
        for person in listOfDicts:
            born = person["born"]
            age = (today - born).days / 365.2425
            person["age"] = age
            person["ofAge"] = age >= 18
            person["lastmodified"] = datetime.now()
        return listOfDicts

    def __repr__(self):
        text = self.__class__.__name__
        attrs = ["name", "born"]
        delim = ":"
        for attr in attrs:
            if hasattr(self, attr):
                value = getattr(self, attr)
                text += f"{delim}{value}"
                delim = ":"
        return text

`Royals`

Bases: JSONAbleList

a non ORM Royals list

Source code in lodstorage/sample.py

class Royals(JSONAbleList):
    """
    a non ORM Royals list
    """

    def __init__(self, load=False):
        super(Royals, self).__init__("royals", clazz=None)
        if load:
            self.royals = Royal.getSamples()
        else:
            self.royals = None

`Sample`

Bases: object

Sample dataset generator

Source code in lodstorage/sample.py

class Sample(object):
    """
    Sample dataset generator
    """

    cityList = None

    def __init__(self):
        """
        Constructor
        """

    @staticmethod
    def getSample(size):
        listOfDicts = []
        for index in range(size):
            listOfDicts.append({"pkey": "index%d" % index, "cindex": index})
        return listOfDicts

    @staticmethod
    def getCountries():
        countryJsonUrl = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
        with urllib.request.urlopen(countryJsonUrl) as url:
            countryList = json.loads(url.read().decode())
        return countryList

    @staticmethod
    def getCities():
        """
        get a list of cities
        """
        if Sample.cityList is None:
            cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
            with urllib.request.urlopen(cityJsonUrl) as url:
                Sample.cityList = json.loads(url.read().decode())
            for city in Sample.cityList:
                city["cityId"] = "%s-%s" % (city["country"], city["name"])
        return Sample.cityList

    @staticmethod
    def dob(isoDateString):
        """get the date of birth from the given iso date state"""
        # if sys.version_info >= (3, 7):
        #    dt=datetime.fromisoformat(isoDateString)
        # else:
        dt = datetime.strptime(isoDateString, "%Y-%m-%d")
        return dt.date()

    @staticmethod
    def getRoyals():
        return Royal.getSamples()

    @staticmethod
    def getRoyalsInstances():
        lod = Royal.getSamples()
        royals = []
        for record in lod:
            royal = Royal()
            royal.fromDict(record)
            royals.append(royal)
        return royals

`init()`

Constructor

Source code in lodstorage/sample.py

def __init__(self):
    """
    Constructor
    """

`dob(isoDateString)` `staticmethod`

get the date of birth from the given iso date state

Source code in lodstorage/sample.py

@staticmethod
def dob(isoDateString):
    """get the date of birth from the given iso date state"""
    # if sys.version_info >= (3, 7):
    #    dt=datetime.fromisoformat(isoDateString)
    # else:
    dt = datetime.strptime(isoDateString, "%Y-%m-%d")
    return dt.date()

`getCities()` `staticmethod`

get a list of cities

Source code in lodstorage/sample.py

@staticmethod
def getCities():
    """
    get a list of cities
    """
    if Sample.cityList is None:
        cityJsonUrl = "https://raw.githubusercontent.com/lutangar/cities.json/master/cities.json"
        with urllib.request.urlopen(cityJsonUrl) as url:
            Sample.cityList = json.loads(url.read().decode())
        for city in Sample.cityList:
            city["cityId"] = "%s-%s" % (city["country"], city["name"])
    return Sample.cityList

`sample2`

Created on 2024-01-21

@author: wf

`Countries`

Represents a collection of country instances.

Attributes:

Name	Type	Description
`countries`	`List[Country]`	A list of Country instances.

Source code in lodstorage/sample2.py

@lod_storable
class Countries:
    """
    Represents a collection of country instances.

    Attributes:
        countries (List[Country]): A list of Country instances.
    """

    countries: List[Country]

    @classmethod
    def get_countries_erdem(cls) -> "Countries":
        """
        get Erdem Ozkol's country list
        """
        countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
        json_str = cls.read_from_url(countries_json_url)
        countries_list = json.loads(json_str)
        countries_dict = {"countries": countries_list}
        instance = cls.from_dict(countries_dict)
        return instance

    @classmethod
    def get_samples(cls) -> dict[str, "Countries"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names
            and values as `Countries` instances.
        """
        samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
        return samples

`get_countries_erdem()` `classmethod`

get Erdem Ozkol's country list

Source code in lodstorage/sample2.py

@classmethod
def get_countries_erdem(cls) -> "Countries":
    """
    get Erdem Ozkol's country list
    """
    countries_json_url = "https://gist.githubusercontent.com/erdem/8c7d26765831d0f9a8c62f02782ae00d/raw/248037cd701af0a4957cce340dabb0fd04e38f4c/countries.json"
    json_str = cls.read_from_url(countries_json_url)
    countries_list = json.loads(json_str)
    countries_dict = {"countries": countries_list}
    instance = cls.from_dict(countries_dict)
    return instance

`get_samples()` `classmethod`

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name	Type	Description
`dict`	`dict[str, Countries]`	A dictionary with keys as sample names
	`dict[str, Countries]`	and values as `Countries` instances.

Source code in lodstorage/sample2.py

@classmethod
def get_samples(cls) -> dict[str, "Countries"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names
        and values as `Countries` instances.
    """
    samples = {"country list provided by Erdem Ozkol": cls.get_countries_erdem()}
    return samples

`Country`

Represents a country with its details.

Attributes:

Name	Type	Description
`name`	`str`	The name of the country.
`country_code`	`str`	The country code.
`capital`	`Optional[str]`	The capital city of the country.
`timezones`	`List[str]`	List of timezones in the country.
`latlng`	`List[float]`	Latitude and longitude of the country.

Source code in lodstorage/sample2.py

@lod_storable
class Country:
    """
    Represents a country with its details.

    Attributes:
        name (str): The name of the country.
        country_code (str): The country code.
        capital (Optional[str]): The capital city of the country.
        timezones (List[str]): List of timezones in the country.
        latlng (List[float]): Latitude and longitude of the country.
    """

    name: str
    country_code: str
    capital: Optional[str] = None
    timezones: List[str] = field(default_factory=list)
    latlng: List[float] = field(default_factory=list)

`Royal`

Represents a member of the royal family, with various personal details.

Attributes:

Name	Type	Description
`name`	`str`	The full name of the royal member.
`wikidata_id`	`str`	The Wikidata identifier associated with the royal member.
`number_in_line`	`Optional[int]`	The number in line to succession, if applicable.
`born_iso_date`	`Optional[str]`	The ISO date of birth.
`died_iso_date`	`Optional[str]`	The ISO date of death, if deceased.
`last_modified_iso`	`str`	ISO timestamp of the last modification.
`age`	`Optional[int]`	The age of the royal member.
`of_age`	`Optional[bool]`	Indicates whether the member is of legal age.
`wikidata_url`	`Optional[str]`	URL to the Wikidata page of the member.

Source code in lodstorage/sample2.py

@lod_storable
class Royal:
    """
    Represents a member of the royal family, with various personal details.

    Attributes:
        name (str): The full name of the royal member.
        wikidata_id (str): The Wikidata identifier associated with the royal member.
        number_in_line (Optional[int]): The number in line to succession, if applicable.
        born_iso_date (Optional[str]): The ISO date of birth.
        died_iso_date (Optional[str]): The ISO date of death, if deceased.
        last_modified_iso (str): ISO timestamp of the last modification.
        age (Optional[int]): The age of the royal member.
        of_age (Optional[bool]): Indicates whether the member is of legal age.
        wikidata_url (Optional[str]): URL to the Wikidata page of the member.
    """

    name: str
    wikidata_id: str
    number_in_line: Optional[int] = None
    born_iso_date: Optional[str] = None
    died_iso_date: Optional[str] = None
    last_modified_iso: str = field(init=False)
    age: Optional[int] = field(init=None)
    of_age: Optional[bool] = field(init=None)
    wikidata_url: Optional[str] = field(init=None)

    def __post_init__(self):
        """
        init calculated fields
        """
        self.lastmodified = datetime.utcnow()
        self.last_modified_iso = self.lastmodified.strftime("%Y-%m-%dT%H:%M:%SZ")
        end_date = self.died if self.died else date.today()
        self.age = int((end_date - self.born).days / 365.2425)
        self.of_age = self.age >= 18
        if self.wikidata_id:
            self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

    @property
    def identifier(self) -> str:
        """
        Generates a unique identifier for the Royal instance.
        The identifier is a combination of a slugified name and the Wikidata ID (if available).
        """
        slugified_name = slugify(self.name, lowercase=False, regex_pattern=r"[^\w\-]")
        if self.wikidata_id:
            return f"{slugified_name}-{self.wikidata_id}"
        return slugified_name

    @property
    def born(self) -> date:
        """Return the date of birth from the ISO date string."""
        born_date = DateConvert.iso_date_to_datetime(self.born_iso_date)
        return born_date

    @property
    def died(self) -> Optional[date]:
        """Return the date of death from the ISO date string, if available."""
        died_date = DateConvert.iso_date_to_datetime(self.died_iso_date)
        return died_date

`born: date` `property`

Return the date of birth from the ISO date string.

`died: Optional[date]` `property`

Return the date of death from the ISO date string, if available.

`identifier: str` `property`

Generates a unique identifier for the Royal instance. The identifier is a combination of a slugified name and the Wikidata ID (if available).

`__post_init__()`

init calculated fields

Source code in lodstorage/sample2.py

def __post_init__(self):
    """
    init calculated fields
    """
    self.lastmodified = datetime.utcnow()
    self.last_modified_iso = self.lastmodified.strftime("%Y-%m-%dT%H:%M:%SZ")
    end_date = self.died if self.died else date.today()
    self.age = int((end_date - self.born).days / 365.2425)
    self.of_age = self.age >= 18
    if self.wikidata_id:
        self.wikidata_url = f"https://www.wikidata.org/wiki/{self.wikidata_id}"

`Royals`

Represents a collection of Royal family members.

Attributes:

Name	Type	Description
`members`	`List[Royal]`	A list of Royal family members.

Source code in lodstorage/sample2.py

@lod_storable
class Royals:
    """
    Represents a collection of Royal family members.

    Attributes:
        members (List[Royal]): A list of Royal family members.
    """

    members: List[Royal] = field(default_factory=list)

    @classmethod
    def get_samples(cls) -> dict[str, "Royals"]:
        """
        Returns a dictionary of named samples
        for 'specification by example' style
        requirements management.

        Returns:
            dict: A dictionary with keys as sample names and values as `Royals` instances.
        """
        samples = {
            "QE2 heirs up to number in line 5": Royals(
                members=[
                    Royal(
                        name="Elizabeth Alexandra Mary Windsor",
                        born_iso_date="1926-04-21",
                        died_iso_date="2022-09-08",
                        wikidata_id="Q9682",
                    ),
                    Royal(
                        name="Charles III of the United Kingdom",
                        born_iso_date="1948-11-14",
                        number_in_line=0,
                        wikidata_id="Q43274",
                    ),
                    Royal(
                        name="William, Duke of Cambridge",
                        born_iso_date="1982-06-21",
                        number_in_line=1,
                        wikidata_id="Q36812",
                    ),
                    Royal(
                        name="Prince George of Wales",
                        born_iso_date="2013-07-22",
                        number_in_line=2,
                        wikidata_id="Q13590412",
                    ),
                    Royal(
                        name="Princess Charlotte of Wales",
                        born_iso_date="2015-05-02",
                        number_in_line=3,
                        wikidata_id="Q18002970",
                    ),
                    Royal(
                        name="Prince Louis of Wales",
                        born_iso_date="2018-04-23",
                        number_in_line=4,
                        wikidata_id="Q38668629",
                    ),
                    Royal(
                        name="Harry Duke of Sussex",
                        born_iso_date="1984-09-15",
                        number_in_line=5,
                        wikidata_id="Q152316",
                    ),
                ]
            )
        }
        return samples

`get_samples()` `classmethod`

Returns a dictionary of named samples for 'specification by example' style requirements management.

Returns:

Name	Type	Description
`dict`	`dict[str, Royals]`	A dictionary with keys as sample names and values as `Royals` instances.

Source code in lodstorage/sample2.py

@classmethod
def get_samples(cls) -> dict[str, "Royals"]:
    """
    Returns a dictionary of named samples
    for 'specification by example' style
    requirements management.

    Returns:
        dict: A dictionary with keys as sample names and values as `Royals` instances.
    """
    samples = {
        "QE2 heirs up to number in line 5": Royals(
            members=[
                Royal(
                    name="Elizabeth Alexandra Mary Windsor",
                    born_iso_date="1926-04-21",
                    died_iso_date="2022-09-08",
                    wikidata_id="Q9682",
                ),
                Royal(
                    name="Charles III of the United Kingdom",
                    born_iso_date="1948-11-14",
                    number_in_line=0,
                    wikidata_id="Q43274",
                ),
                Royal(
                    name="William, Duke of Cambridge",
                    born_iso_date="1982-06-21",
                    number_in_line=1,
                    wikidata_id="Q36812",
                ),
                Royal(
                    name="Prince George of Wales",
                    born_iso_date="2013-07-22",
                    number_in_line=2,
                    wikidata_id="Q13590412",
                ),
                Royal(
                    name="Princess Charlotte of Wales",
                    born_iso_date="2015-05-02",
                    number_in_line=3,
                    wikidata_id="Q18002970",
                ),
                Royal(
                    name="Prince Louis of Wales",
                    born_iso_date="2018-04-23",
                    number_in_line=4,
                    wikidata_id="Q38668629",
                ),
                Royal(
                    name="Harry Duke of Sussex",
                    born_iso_date="1984-09-15",
                    number_in_line=5,
                    wikidata_id="Q152316",
                ),
            ]
        )
    }
    return samples

`Sample`

Sample dataset provider

Source code in lodstorage/sample2.py

class Sample:
    """
    Sample dataset provider
    """

    @staticmethod
    def get(dataset_name: str):
        """
        Get the given sample dataset name
        """
        samples = None
        if dataset_name == "royals":
            samples = Royals.get_samples()
        elif dataset_name == "countries":
            samples = Countries.get_samples()
        else:
            raise ValueError("Unknown dataset name")
        return samples

`get(dataset_name)` `staticmethod`

Get the given sample dataset name

Source code in lodstorage/sample2.py

@staticmethod
def get(dataset_name: str):
    """
    Get the given sample dataset name
    """
    samples = None
    if dataset_name == "royals":
        samples = Royals.get_samples()
    elif dataset_name == "countries":
        samples = Countries.get_samples()
    else:
        raise ValueError("Unknown dataset name")
    return samples

`schema`

Created on 2021-01-26

@author: wf

`Schema`

Bases: object

a relational Schema

Source code in lodstorage/schema.py

class Schema(object):
    """
    a relational Schema
    """

    def __init__(self, name: str, title: str):
        """
        Constructor

        Args:
            name(str): the name of the schema
            title(str): the title of the schema
        """
        self.name = name
        self.title = title
        self.propsByName = {}

    @staticmethod
    def generalizeColumn(tableList, colName: str):
        """
        remove the column with the given name from all tables in the tablelist and
        return it

        Args:
            tableList(list): a list of Tables
            colName(string): the name of the column to generalize

        Returns:
            string: the column having been generalized and removed
        """
        gCol = None
        for table in tableList:
            for col in table["columns"]:
                if col["name"] == colName:
                    gCol = col.copy()
                    # no linking yet @FIXME - will need this later
                    if "link" in gCol:
                        gCol.pop("link")
                    # is generalization protected for this column?
                    if not "special" in col or not col["special"]:
                        table["columns"].remove(col)
        return gCol

    @staticmethod
    def getGeneral(tableList, name: str, debug: bool = False):
        """
        derive a general table from the given table list
        Args:
            tableList(list): a list of tables
            name(str): name of the general table
            debug(bool): True if column names should be shown

        Returns:
            at table dict for the generalized table
        """
        general = {"name": name, "columns": []}
        colCount = Counter()
        for table in tableList:
            for col in table["columns"]:
                columnId = "%s.%s" % (col["name"], col["type"])
                if debug:
                    print(columnId)
                colCount[columnId] += 1
        for columnId, count in colCount.items():
            if count == len(tableList):
                colName = columnId.split(".")[0]
                generalCol = Schema.generalizeColumn(tableList, colName)
                general["columns"].append(generalCol)
        return general

    @staticmethod
    def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
        """
        get the DDL statement to create a general view

        Args:
            tableList: the list of tables
            name(str): the name of the view
            debug(bool): True if debug should be set
        """
        general = Schema.getGeneral(tableList, name, debug)
        cols = ""
        delim = ""
        for col in general["columns"]:
            cols += "%s%s" % (delim, col["name"])
            delim = ","
        ddl = "CREATE VIEW %s AS \n" % name
        delim = ""
        for table in tableList:
            ddl += "%s  SELECT %s FROM %s" % (delim, cols, table["name"])
            delim = "\nUNION\n"
        return ddl

`init(name, title)`

Constructor

Parameters:

Name	Type	Description	Default
`name(str)`		the name of the schema	required
`title(str)`		the title of the schema	required

Source code in lodstorage/schema.py

def __init__(self, name: str, title: str):
    """
    Constructor

    Args:
        name(str): the name of the schema
        title(str): the title of the schema
    """
    self.name = name
    self.title = title
    self.propsByName = {}

`generalizeColumn(tableList, colName)` `staticmethod`

remove the column with the given name from all tables in the tablelist and return it

Parameters:

Name	Type	Description	Default
`tableList(list)`		a list of Tables	required
`colName(string)`		the name of the column to generalize	required

Returns:

Name	Type	Description
`string`		the column having been generalized and removed

Source code in lodstorage/schema.py

@staticmethod
def generalizeColumn(tableList, colName: str):
    """
    remove the column with the given name from all tables in the tablelist and
    return it

    Args:
        tableList(list): a list of Tables
        colName(string): the name of the column to generalize

    Returns:
        string: the column having been generalized and removed
    """
    gCol = None
    for table in tableList:
        for col in table["columns"]:
            if col["name"] == colName:
                gCol = col.copy()
                # no linking yet @FIXME - will need this later
                if "link" in gCol:
                    gCol.pop("link")
                # is generalization protected for this column?
                if not "special" in col or not col["special"]:
                    table["columns"].remove(col)
    return gCol

`getGeneral(tableList, name, debug=False)` `staticmethod`

derive a general table from the given table list Args: tableList(list): a list of tables name(str): name of the general table debug(bool): True if column names should be shown

Returns:

Type	Description
	at table dict for the generalized table

Source code in lodstorage/schema.py

@staticmethod
def getGeneral(tableList, name: str, debug: bool = False):
    """
    derive a general table from the given table list
    Args:
        tableList(list): a list of tables
        name(str): name of the general table
        debug(bool): True if column names should be shown

    Returns:
        at table dict for the generalized table
    """
    general = {"name": name, "columns": []}
    colCount = Counter()
    for table in tableList:
        for col in table["columns"]:
            columnId = "%s.%s" % (col["name"], col["type"])
            if debug:
                print(columnId)
            colCount[columnId] += 1
    for columnId, count in colCount.items():
        if count == len(tableList):
            colName = columnId.split(".")[0]
            generalCol = Schema.generalizeColumn(tableList, colName)
            general["columns"].append(generalCol)
    return general

`getGeneralViewDDL(tableList, name, debug=False)` `staticmethod`

get the DDL statement to create a general view

Parameters:

Name	Description	Default
`tableList`	the list of tables	required
`name(str)`	the name of the view	required
`debug(bool)`	True if debug should be set	required

Source code in lodstorage/schema.py

@staticmethod
def getGeneralViewDDL(tableList, name: str, debug=False) -> str:
    """
    get the DDL statement to create a general view

    Args:
        tableList: the list of tables
        name(str): the name of the view
        debug(bool): True if debug should be set
    """
    general = Schema.getGeneral(tableList, name, debug)
    cols = ""
    delim = ""
    for col in general["columns"]:
        cols += "%s%s" % (delim, col["name"])
        delim = ","
    ddl = "CREATE VIEW %s AS \n" % name
    delim = ""
    for table in tableList:
        ddl += "%s  SELECT %s FROM %s" % (delim, cols, table["name"])
        delim = "\nUNION\n"
    return ddl

`SchemaManager`

Bases: object

a manager for schemas

Source code in lodstorage/schema.py

class SchemaManager(
    object,
):
    """
    a manager for schemas
    """

    def __init__(self, schemaDefs=None, baseUrl: str = None):
        """
        constructor
            Args:
                schemaDefs(dict): a dictionary of schema names
                baseUrl(str): the base url to use for links
        """
        self.baseUrl = baseUrl
        self.schemasByName = {}
        if schemaDefs is None:
            schemaDefs = {}
        for key, name in schemaDefs.items():
            self.schemasByName[key] = Schema(key, name)
        pass

`init(schemaDefs=None, baseUrl=None)`

constructor Args: schemaDefs(dict): a dictionary of schema names baseUrl(str): the base url to use for links

Source code in lodstorage/schema.py

def __init__(self, schemaDefs=None, baseUrl: str = None):
    """
    constructor
        Args:
            schemaDefs(dict): a dictionary of schema names
            baseUrl(str): the base url to use for links
    """
    self.baseUrl = baseUrl
    self.schemasByName = {}
    if schemaDefs is None:
        schemaDefs = {}
    for key, name in schemaDefs.items():
        self.schemasByName[key] = Schema(key, name)
    pass

`sparql`

Created on 2020-08-14

@author: wf

`SPARQL`

Bases: object

wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

:ivar url: full endpoint url (including mode) :ivar mode: 'query' or 'update' :ivar debug: True if debugging is active :ivar typedLiterals: True if INSERT should be done with typedLiterals :ivar profile(boolean): True if profiling / timing information should be displayed :ivar sparql: the SPARQLWrapper2 instance to be used :ivar method(str): the HTTP method to be used 'POST' or 'GET'

Source code in lodstorage/sparql.py

class SPARQL(object):
    """
    wrapper for SPARQL e.g. Apache Jena, Virtuoso, Blazegraph

    :ivar url: full endpoint url (including mode)
    :ivar mode: 'query' or 'update'
    :ivar debug: True if debugging is active
    :ivar typedLiterals: True if INSERT should be done with typedLiterals
    :ivar profile(boolean): True if profiling / timing information should be displayed
    :ivar sparql: the SPARQLWrapper2 instance to be used
    :ivar method(str): the HTTP method to be used 'POST' or 'GET'
    """

    def __init__(
        self,
        url,
        mode="query",
        debug=False,
        isFuseki=False,
        typedLiterals=False,
        profile=False,
        agent="PyLodStorage",
        method="POST",
    ):
        """
        Constructor a SPARQL wrapper

        Args:
            url(string): the base URL of the endpoint - the mode query/update is going to be appended
            mode(string): 'query' or 'update'
            debug(bool): True if debugging is to be activated
            typedLiterals(bool): True if INSERT should be done with typedLiterals
            profile(boolean): True if profiling / timing information should be displayed
            agent(string): the User agent to use
            method(string): the HTTP method to be used 'POST' or 'GET'
        """
        if isFuseki:
            self.url = f"{url}/{mode}"
        else:
            self.url = url
        self.mode = mode
        self.debug = debug
        self.typedLiterals = typedLiterals
        self.profile = profile
        self.sparql = SPARQLWrapper2(url)
        self.method = method
        self.sparql.agent = agent

    @classmethod
    def fromEndpointConf(cls, endpointConf) -> "SPARQL":
        """
        create a SPARQL endpoint from the given EndpointConfiguration

        Args:
            endpointConf(Endpoint): the endpoint configuration to be used
        """
        sparql = SPARQL(url=endpointConf.endpoint, method=endpointConf.method)
        if hasattr(endpointConf, "auth"):
            authMethod = None
            if endpointConf.auth == "BASIC":
                authMethod = BASIC
            elif endpointConf.auth == "DIGEST":
                authMethod = DIGEST
            sparql.addAuthentication(
                endpointConf.user, endpointConf.passwd, method=authMethod
            )
        return sparql

    def addAuthentication(
        self, username: str, password: str, method: Union[BASIC, DIGEST] = BASIC
    ):
        """
        Add Http Authentication credentials to the sparql wrapper
        Args:
            username: name of the user
            password: password of the user
            method: HTTP Authentication method
        """
        self.sparql.setHTTPAuth(method)
        self.sparql.setCredentials(username, password)

    def rawQuery(self, queryString, method=POST):
        """
        query with the given query string

        Args:
            queryString(string): the SPARQL query to be performed
            method(string): POST or GET - POST is mandatory for update queries
        Returns:
            list: the raw query result as bindings
        """
        queryString = self.fix_comments(queryString)
        self.sparql.setQuery(queryString)
        self.sparql.method = method
        queryResult = self.sparql.query()
        return queryResult

    def fix_comments(self, query_string: str) -> str:
        """
        make sure broken SPARQLWrapper will find comments
        """
        if query_string is None:
            return None
        return "#\n" + query_string

    def getValue(self, sparqlQuery: str, attr: str):
        """
        get the value for the given SPARQL query using the given attr

        Args:
            sparql(SPARQL): the SPARQL endpoint to ge the value for
            sparqlQuery(str): the SPARQL query to run
            attr(str): the attribute to get
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        return self.getFirst(qLod, attr)

    def getValues(self, sparqlQuery: str, attrList: list):
        """
        get Values for the given sparlQuery and attribute list

        Args:
            sparqlQuery(str): the query which did not return any values
            attrList(list): the list of attributes
        """
        if self.debug:
            print(sparqlQuery)
        qLod = self.queryAsListOfDicts(sparqlQuery)
        if not (len(qLod) == 1):
            msg = f"getValues for {attrList} failed for {qLod}"
            raise Exception(msg)
        record = qLod[0]
        values = ()
        for attr in attrList:
            if not attr in record:
                msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
                raise Exception(msg)
            recordTuple = (record[attr],)
            values += recordTuple
        return values

    def getFirst(self, qLod: list, attr: str):
        """
        get the column attr of the first row of the given qLod list

        Args:
            qLod(list): the list of dicts (returned by a query)
            attr(str): the attribute to retrieve

        Returns:
            object: the value
        """
        if len(qLod) == 1 and attr in qLod[0]:
            value = qLod[0][attr]
            return value
        raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

    def getResults(self, jsonResult):
        """
        get the result from the given jsonResult

        Args:
            jsonResult: the JSON encoded result

        Returns:
            list: the list of bindings
        """
        return jsonResult.bindings

    def insert(self, insertCommand):
        """
        run an insert

        Args:
            insertCommand(string): the SPARQL INSERT command

        Returns:
            a response
        """
        self.sparql.setRequestMethod(POSTDIRECTLY)
        response = None
        exception = None
        try:
            response = self.rawQuery(insertCommand, method=POST)
            # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
            # dummy read the body
            response.response.read()
        except Exception as ex:
            exception = ex
            if self.debug:
                print(ex)
        return response, exception

    def getLocalName(self, name):
        """
        retrieve valid localname from a string based primary key
        https://www.w3.org/TR/sparql11-query/#prefNames

        Args:
            name(string): the name to convert

        Returns:
            string: a valid local name
        """
        localName = "".join(ch for ch in name if ch.isalnum())
        return localName

    def insertListOfDicts(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        limit=None,
        batchSize=None,
        profile=False,
    ):
        """
        insert the given list of dicts mapping datatypes

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            limit(int): maximum number of records to insert
            batchSize(int): number of records to send per request

        Return:
            a list of errors which should be empty on full success

        datatype maping according to
        https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

        mapped from
        https://docs.python.org/3/library/stdtypes.html

        compare to
        https://www.w3.org/2001/sw/rdb2rdf/directGraph/
        http://www.bobdc.com/blog/json2rdf/
        https://www.w3.org/TR/json-ld11-api/#data-round-tripping
        https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
        """
        if limit is not None:
            listOfDicts = listOfDicts[:limit]
        else:
            limit = len(listOfDicts)
        total = len(listOfDicts)
        if batchSize is None:
            return self.insertListOfDictsBatch(
                listOfDicts, entityType, primaryKey, prefixes, total=total
            )
        else:
            startTime = time.time()
            errors = []
            # store the list in batches
            for i in range(0, total, batchSize):
                recordBatch = listOfDicts[i : i + batchSize]
                batchErrors = self.insertListOfDictsBatch(
                    recordBatch,
                    entityType,
                    primaryKey,
                    prefixes,
                    batchIndex=i,
                    total=total,
                    startTime=startTime,
                )
                errors.extend(batchErrors)
            if self.profile:
                print(
                    "insertListOfDicts for %9d records in %6.1f secs"
                    % (len(listOfDicts), time.time() - startTime),
                    flush=True,
                )
            return errors

    def insertListOfDictsBatch(
        self,
        listOfDicts,
        entityType,
        primaryKey,
        prefixes,
        title="batch",
        batchIndex=None,
        total=None,
        startTime=None,
    ):
        """
        insert a Batch part of listOfDicts

        Args:
            entityType(string): the entityType to use as a
            primaryKey(string): the name of the primary key attribute to use
            prefix(string): any PREFIX statements to be used
            title(string): the title to display for the profiling (if any)
            batchIndex(int): the start index of the current batch
            total(int): the total number of records for all batches
            starttime(datetime): the start of the batch processing

        Return:
            a list of errors which should be empty on full success
        """
        errors = []
        size = len(listOfDicts)
        if batchIndex is None:
            batchIndex = 0
        batchStartTime = time.time()
        if startTime is None:
            startTime = batchStartTime
        rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
        insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
        for index, record in enumerate(listOfDicts):
            if not primaryKey in record:
                errors.append(f"missing primary key {primaryKey} in record {index}")
            else:
                primaryValue = record[primaryKey]
                if primaryValue is None:
                    errors.append(
                        f"primary key {primaryKey} value is None in record {index}"
                    )
                else:
                    encodedPrimaryValue = self.getLocalName(primaryValue)
                    tSubject = f"{entityType}__{encodedPrimaryValue}"
                    insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                    for keyValue in record.items():
                        key, value = keyValue
                        # convert key if necessary
                        key = self.getLocalName(key)
                        valueType = type(value)
                        if self.debug:
                            print("%s(%s)=%s" % (key, valueType, value))
                        tPredicate = f"{entityType}_{key}"
                        tObject = value
                        if valueType == str:
                            escapedString = self.controlEscape(value)
                            tObject = '"%s"' % escapedString
                        elif valueType == int:
                            if self.typedLiterals:
                                tObject = (
                                    '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                    % value
                                )
                            pass
                        elif valueType == float:
                            if self.typedLiterals:
                                tObject = (
                                    '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                    % value
                                )
                            pass
                        elif valueType == bool:
                            pass
                        elif valueType == datetime.date:
                            # if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                            )
                            pass
                        elif valueType == datetime.datetime:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                                % value
                            )
                            pass
                        else:
                            errors.append(
                                "can't handle type %s in record %d" % (valueType, index)
                            )
                            tObject = None
                        if tObject is not None:
                            insertRecord = "  %s %s %s.\n" % (
                                tSubject,
                                tPredicate,
                                tObject,
                            )
                            insertCommand += insertRecord
        insertCommand += "\n}"
        if self.debug:
            print(insertCommand, flush=True)
        response, ex = self.insert(insertCommand)
        if response is None and ex is not None:
            errors.append("%s for record %d" % (str(ex), index))
        if self.profile:
            print(
                "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
                % (
                    title,
                    batchIndex + 1,
                    batchIndex + size,
                    total,
                    entityType,
                    time.time() - batchStartTime,
                    time.time() - startTime,
                ),
                flush=True,
            )
        return errors

    controlChars = [chr(c) for c in range(0x20)]

    @staticmethod
    def controlEscape(s):
        """
        escape control characters

        see https://stackoverflow.com/a/9778992/1497139
        """
        escaped = "".join(
            [
                c.encode("unicode_escape").decode("ascii")
                if c in SPARQL.controlChars
                else c
                for c in s
            ]
        )
        escaped = escaped.replace('"', '\\"')
        return escaped

    def query(self, queryString, method=POST):
        """
        get a list of results for the given query

        Args:
            queryString(string): the SPARQL query to execute
            method(string): the method eg. POST to use

        Returns:
            list: list of bindings
        """
        queryResult = self.rawQuery(queryString, method=method)
        if self.debug:
            print(queryString)
        if hasattr(queryResult, "info"):
            if "content-type" in queryResult.info():
                ct = queryResult.info()["content-type"]
                if "text/html" in ct:
                    response = queryResult.response.read().decode()
                    if not "Success" in response:
                        raise ("%s failed: %s", response)
                return None
        jsonResult = queryResult.convert()
        return self.getResults(jsonResult)

    def queryAsListOfDicts(
        self, queryString, fixNone: bool = False, sampleCount: int = None
    ):
        """
        get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

        Args:
            queryString(string): the SPARQL query to execute
            fixNone(bool): if True add None values for empty columns in Dict
            sampleCount(int): the number of samples to check

        Returns:
            list: a list ofDicts
        """
        records = self.query(queryString, method=self.method)
        listOfDicts = self.asListOfDicts(
            records, fixNone=fixNone, sampleCount=sampleCount
        )
        return listOfDicts

    @staticmethod
    def strToDatetime(value, debug=False):
        """
        convert a string to a datetime
        Args:
            value(str): the value to convert
        Returns:
            datetime: the datetime
        """
        dateFormat = "%Y-%m-%d %H:%M:%S.%f"
        if "T" in value and "Z" in value:
            dateFormat = "%Y-%m-%dT%H:%M:%SZ"
        dt = None
        try:
            dt = datetime.datetime.strptime(value, dateFormat)
        except ValueError as ve:
            if debug:
                print(str(ve))
        return dt

    def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
        """
        convert SPARQL result back to python native

        Args:
            record(list): the list of bindings
            fixNone(bool): if True add None values for empty columns in Dict
            sampleCount(int): the number of samples to check

        Returns:
            list: a list of Dicts
        """
        resultList = []
        fields = None
        if fixNone:
            fields = LOD.getFields(records, sampleCount)
        for record in records:
            resultDict = {}
            for keyValue in record.items():
                key, value = keyValue
                datatype = value.datatype
                if datatype is not None:
                    if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                        resultValue = int(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                        resultValue = float(value.value)
                    elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                        resultValue = value.value in ["TRUE", "true"]
                    elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                        dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                        resultValue = dt.date()
                    elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                        dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                        resultValue = dt
                    else:
                        # unsupported datatype
                        resultValue = value.value
                else:
                    resultValue = value.value
                resultDict[key] = resultValue
            if fixNone:
                for field in fields:
                    if not field in resultDict:
                        resultDict[field] = None
            resultList.append(resultDict)
        return resultList

    def printErrors(self, errors):
        """
        print the given list of errors

        Args:
            errors(list): a list of error strings

        Returns:
            boolean: True if the list is empty else false
        """
        if len(errors) > 0:
            print("ERRORS:")
            for error in errors:
                print(error, flush=True, file=stderr)
            return True
        else:
            return False

`init(url, mode='query', debug=False, isFuseki=False, typedLiterals=False, profile=False, agent='PyLodStorage', method='POST')`

Constructor a SPARQL wrapper

Parameters:

Name	Description	Default
`url(string)`	the base URL of the endpoint - the mode query/update is going to be appended	required
`mode(string)`	'query' or 'update'	required
`debug(bool)`	True if debugging is to be activated	required
`typedLiterals(bool)`	True if INSERT should be done with typedLiterals	required
`profile(boolean)`	True if profiling / timing information should be displayed	required
`agent(string)`	the User agent to use	required
`method(string)`	the HTTP method to be used 'POST' or 'GET'	required

Source code in lodstorage/sparql.py

def __init__(
    self,
    url,
    mode="query",
    debug=False,
    isFuseki=False,
    typedLiterals=False,
    profile=False,
    agent="PyLodStorage",
    method="POST",
):
    """
    Constructor a SPARQL wrapper

    Args:
        url(string): the base URL of the endpoint - the mode query/update is going to be appended
        mode(string): 'query' or 'update'
        debug(bool): True if debugging is to be activated
        typedLiterals(bool): True if INSERT should be done with typedLiterals
        profile(boolean): True if profiling / timing information should be displayed
        agent(string): the User agent to use
        method(string): the HTTP method to be used 'POST' or 'GET'
    """
    if isFuseki:
        self.url = f"{url}/{mode}"
    else:
        self.url = url
    self.mode = mode
    self.debug = debug
    self.typedLiterals = typedLiterals
    self.profile = profile
    self.sparql = SPARQLWrapper2(url)
    self.method = method
    self.sparql.agent = agent

`addAuthentication(username, password, method=BASIC)`

Add Http Authentication credentials to the sparql wrapper Args: username: name of the user password: password of the user method: HTTP Authentication method

Source code in lodstorage/sparql.py

def addAuthentication(
    self, username: str, password: str, method: Union[BASIC, DIGEST] = BASIC
):
    """
    Add Http Authentication credentials to the sparql wrapper
    Args:
        username: name of the user
        password: password of the user
        method: HTTP Authentication method
    """
    self.sparql.setHTTPAuth(method)
    self.sparql.setCredentials(username, password)

`asListOfDicts(records, fixNone=False, sampleCount=None)`

convert SPARQL result back to python native

Parameters:

Name	Description	Default
`record(list)`	the list of bindings	required
`fixNone(bool)`	if True add None values for empty columns in Dict	required
`sampleCount(int)`	the number of samples to check	required

Returns:

Name	Type	Description
`list`		a list of Dicts

Source code in lodstorage/sparql.py

def asListOfDicts(self, records, fixNone: bool = False, sampleCount: int = None):
    """
    convert SPARQL result back to python native

    Args:
        record(list): the list of bindings
        fixNone(bool): if True add None values for empty columns in Dict
        sampleCount(int): the number of samples to check

    Returns:
        list: a list of Dicts
    """
    resultList = []
    fields = None
    if fixNone:
        fields = LOD.getFields(records, sampleCount)
    for record in records:
        resultDict = {}
        for keyValue in record.items():
            key, value = keyValue
            datatype = value.datatype
            if datatype is not None:
                if datatype == "http://www.w3.org/2001/XMLSchema#integer":
                    resultValue = int(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#decimal":
                    resultValue = float(value.value)
                elif datatype == "http://www.w3.org/2001/XMLSchema#boolean":
                    resultValue = value.value in ["TRUE", "true"]
                elif datatype == "http://www.w3.org/2001/XMLSchema#date":
                    dt = datetime.datetime.strptime(value.value, "%Y-%m-%d")
                    resultValue = dt.date()
                elif datatype == "http://www.w3.org/2001/XMLSchema#dateTime":
                    dt = SPARQL.strToDatetime(value.value, debug=self.debug)
                    resultValue = dt
                else:
                    # unsupported datatype
                    resultValue = value.value
            else:
                resultValue = value.value
            resultDict[key] = resultValue
        if fixNone:
            for field in fields:
                if not field in resultDict:
                    resultDict[field] = None
        resultList.append(resultDict)
    return resultList

`controlEscape(s)` `staticmethod`

escape control characters

see https://stackoverflow.com/a/9778992/1497139

Source code in lodstorage/sparql.py

@staticmethod
def controlEscape(s):
    """
    escape control characters

    see https://stackoverflow.com/a/9778992/1497139
    """
    escaped = "".join(
        [
            c.encode("unicode_escape").decode("ascii")
            if c in SPARQL.controlChars
            else c
            for c in s
        ]
    )
    escaped = escaped.replace('"', '\\"')
    return escaped

`fix_comments(query_string)`

make sure broken SPARQLWrapper will find comments

Source code in lodstorage/sparql.py

def fix_comments(self, query_string: str) -> str:
    """
    make sure broken SPARQLWrapper will find comments
    """
    if query_string is None:
        return None
    return "#\n" + query_string

`fromEndpointConf(endpointConf)` `classmethod`

create a SPARQL endpoint from the given EndpointConfiguration

Parameters:

Name	Type	Description	Default
`endpointConf(Endpoint)`		the endpoint configuration to be used	required

Source code in lodstorage/sparql.py

@classmethod
def fromEndpointConf(cls, endpointConf) -> "SPARQL":
    """
    create a SPARQL endpoint from the given EndpointConfiguration

    Args:
        endpointConf(Endpoint): the endpoint configuration to be used
    """
    sparql = SPARQL(url=endpointConf.endpoint, method=endpointConf.method)
    if hasattr(endpointConf, "auth"):
        authMethod = None
        if endpointConf.auth == "BASIC":
            authMethod = BASIC
        elif endpointConf.auth == "DIGEST":
            authMethod = DIGEST
        sparql.addAuthentication(
            endpointConf.user, endpointConf.passwd, method=authMethod
        )
    return sparql

`getFirst(qLod, attr)`

get the column attr of the first row of the given qLod list

Parameters:

Name	Type	Description	Default
`qLod(list)`		the list of dicts (returned by a query)	required
`attr(str)`		the attribute to retrieve	required

Returns:

Name	Type	Description
`object`		the value

Source code in lodstorage/sparql.py

def getFirst(self, qLod: list, attr: str):
    """
    get the column attr of the first row of the given qLod list

    Args:
        qLod(list): the list of dicts (returned by a query)
        attr(str): the attribute to retrieve

    Returns:
        object: the value
    """
    if len(qLod) == 1 and attr in qLod[0]:
        value = qLod[0][attr]
        return value
    raise Exception(f"getFirst for attribute {attr} failed for {qLod}")

`getLocalName(name)`

retrieve valid localname from a string based primary key https://www.w3.org/TR/sparql11-query/#prefNames

Parameters:

Name	Type	Description	Default
`name(string)`		the name to convert	required

Returns:

Name	Type	Description
`string`		a valid local name

Source code in lodstorage/sparql.py

def getLocalName(self, name):
    """
    retrieve valid localname from a string based primary key
    https://www.w3.org/TR/sparql11-query/#prefNames

    Args:
        name(string): the name to convert

    Returns:
        string: a valid local name
    """
    localName = "".join(ch for ch in name if ch.isalnum())
    return localName

`getResults(jsonResult)`

get the result from the given jsonResult

Parameters:

Name	Type	Description	Default
`jsonResult`		the JSON encoded result	required

Returns:

Name	Type	Description
`list`		the list of bindings

Source code in lodstorage/sparql.py

def getResults(self, jsonResult):
    """
    get the result from the given jsonResult

    Args:
        jsonResult: the JSON encoded result

    Returns:
        list: the list of bindings
    """
    return jsonResult.bindings

`getValue(sparqlQuery, attr)`

get the value for the given SPARQL query using the given attr

Parameters:

Name	Description	Default
`sparql(SPARQL)`	the SPARQL endpoint to ge the value for	required
`sparqlQuery(str)`	the SPARQL query to run	required
`attr(str)`	the attribute to get	required

Source code in lodstorage/sparql.py

def getValue(self, sparqlQuery: str, attr: str):
    """
    get the value for the given SPARQL query using the given attr

    Args:
        sparql(SPARQL): the SPARQL endpoint to ge the value for
        sparqlQuery(str): the SPARQL query to run
        attr(str): the attribute to get
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    return self.getFirst(qLod, attr)

`getValues(sparqlQuery, attrList)`

get Values for the given sparlQuery and attribute list

Parameters:

Name	Type	Description	Default
`sparqlQuery(str)`		the query which did not return any values	required
`attrList(list)`		the list of attributes	required

Source code in lodstorage/sparql.py

def getValues(self, sparqlQuery: str, attrList: list):
    """
    get Values for the given sparlQuery and attribute list

    Args:
        sparqlQuery(str): the query which did not return any values
        attrList(list): the list of attributes
    """
    if self.debug:
        print(sparqlQuery)
    qLod = self.queryAsListOfDicts(sparqlQuery)
    if not (len(qLod) == 1):
        msg = f"getValues for {attrList} failed for {qLod}"
        raise Exception(msg)
    record = qLod[0]
    values = ()
    for attr in attrList:
        if not attr in record:
            msg = f"getValues failed for attribute {attr} which is missing in result record {record}"
            raise Exception(msg)
        recordTuple = (record[attr],)
        values += recordTuple
    return values

`insert(insertCommand)`

run an insert

Parameters:

Name	Type	Description	Default
`insertCommand(string)`		the SPARQL INSERT command	required

Returns:

Type	Description
	a response

Source code in lodstorage/sparql.py

def insert(self, insertCommand):
    """
    run an insert

    Args:
        insertCommand(string): the SPARQL INSERT command

    Returns:
        a response
    """
    self.sparql.setRequestMethod(POSTDIRECTLY)
    response = None
    exception = None
    try:
        response = self.rawQuery(insertCommand, method=POST)
        # see https://github.com/RDFLib/sparqlwrapper/issues/159#issuecomment-674523696
        # dummy read the body
        response.response.read()
    except Exception as ex:
        exception = ex
        if self.debug:
            print(ex)
    return response, exception

`insertListOfDicts(listOfDicts, entityType, primaryKey, prefixes, limit=None, batchSize=None, profile=False)`

insert the given list of dicts mapping datatypes

Parameters:

Name	Description	Default
`entityType(string)`	the entityType to use as a	required
`primaryKey(string)`	the name of the primary key attribute to use	required
`prefix(string)`	any PREFIX statements to be used	required
`limit(int)`	maximum number of records to insert	required
`batchSize(int)`	number of records to send per request	required

Return

a list of errors which should be empty on full success

datatype maping according to https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

mapped from https://docs.python.org/3/library/stdtypes.html

compare to https://www.w3.org/2001/sw/rdb2rdf/directGraph/ http://www.bobdc.com/blog/json2rdf/ https://www.w3.org/TR/json-ld11-api/#data-round-tripping https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python

Source code in lodstorage/sparql.py

def insertListOfDicts(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    limit=None,
    batchSize=None,
    profile=False,
):
    """
    insert the given list of dicts mapping datatypes

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        limit(int): maximum number of records to insert
        batchSize(int): number of records to send per request

    Return:
        a list of errors which should be empty on full success

    datatype maping according to
    https://www.w3.org/TR/xmlschema-2/#built-in-datatypes

    mapped from
    https://docs.python.org/3/library/stdtypes.html

    compare to
    https://www.w3.org/2001/sw/rdb2rdf/directGraph/
    http://www.bobdc.com/blog/json2rdf/
    https://www.w3.org/TR/json-ld11-api/#data-round-tripping
    https://stackoverflow.com/questions/29030231/json-to-rdf-xml-file-in-python
    """
    if limit is not None:
        listOfDicts = listOfDicts[:limit]
    else:
        limit = len(listOfDicts)
    total = len(listOfDicts)
    if batchSize is None:
        return self.insertListOfDictsBatch(
            listOfDicts, entityType, primaryKey, prefixes, total=total
        )
    else:
        startTime = time.time()
        errors = []
        # store the list in batches
        for i in range(0, total, batchSize):
            recordBatch = listOfDicts[i : i + batchSize]
            batchErrors = self.insertListOfDictsBatch(
                recordBatch,
                entityType,
                primaryKey,
                prefixes,
                batchIndex=i,
                total=total,
                startTime=startTime,
            )
            errors.extend(batchErrors)
        if self.profile:
            print(
                "insertListOfDicts for %9d records in %6.1f secs"
                % (len(listOfDicts), time.time() - startTime),
                flush=True,
            )
        return errors

`insertListOfDictsBatch(listOfDicts, entityType, primaryKey, prefixes, title='batch', batchIndex=None, total=None, startTime=None)`

insert a Batch part of listOfDicts

Parameters:

Name	Description	Default
`entityType(string)`	the entityType to use as a	required
`primaryKey(string)`	the name of the primary key attribute to use	required
`prefix(string)`	any PREFIX statements to be used	required
`title(string)`	the title to display for the profiling (if any)	required
`batchIndex(int)`	the start index of the current batch	required
`total(int)`	the total number of records for all batches	required
`starttime(datetime)`	the start of the batch processing	required

Return

a list of errors which should be empty on full success

Source code in lodstorage/sparql.py

def insertListOfDictsBatch(
    self,
    listOfDicts,
    entityType,
    primaryKey,
    prefixes,
    title="batch",
    batchIndex=None,
    total=None,
    startTime=None,
):
    """
    insert a Batch part of listOfDicts

    Args:
        entityType(string): the entityType to use as a
        primaryKey(string): the name of the primary key attribute to use
        prefix(string): any PREFIX statements to be used
        title(string): the title to display for the profiling (if any)
        batchIndex(int): the start index of the current batch
        total(int): the total number of records for all batches
        starttime(datetime): the start of the batch processing

    Return:
        a list of errors which should be empty on full success
    """
    errors = []
    size = len(listOfDicts)
    if batchIndex is None:
        batchIndex = 0
    batchStartTime = time.time()
    if startTime is None:
        startTime = batchStartTime
    rdfprefix = "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n"
    insertCommand = f"{rdfprefix}{prefixes}\nINSERT DATA {{\n"
    for index, record in enumerate(listOfDicts):
        if not primaryKey in record:
            errors.append(f"missing primary key {primaryKey} in record {index}")
        else:
            primaryValue = record[primaryKey]
            if primaryValue is None:
                errors.append(
                    f"primary key {primaryKey} value is None in record {index}"
                )
            else:
                encodedPrimaryValue = self.getLocalName(primaryValue)
                tSubject = f"{entityType}__{encodedPrimaryValue}"
                insertCommand += f'  {tSubject} rdf:type "{entityType}".\n'
                for keyValue in record.items():
                    key, value = keyValue
                    # convert key if necessary
                    key = self.getLocalName(key)
                    valueType = type(value)
                    if self.debug:
                        print("%s(%s)=%s" % (key, valueType, value))
                    tPredicate = f"{entityType}_{key}"
                    tObject = value
                    if valueType == str:
                        escapedString = self.controlEscape(value)
                        tObject = '"%s"' % escapedString
                    elif valueType == int:
                        if self.typedLiterals:
                            tObject = (
                                '"%d"^^<http://www.w3.org/2001/XMLSchema#integer>'
                                % value
                            )
                        pass
                    elif valueType == float:
                        if self.typedLiterals:
                            tObject = (
                                '"%s"^^<http://www.w3.org/2001/XMLSchema#decimal>'
                                % value
                            )
                        pass
                    elif valueType == bool:
                        pass
                    elif valueType == datetime.date:
                        # if self.typedLiterals:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#date>' % value
                        )
                        pass
                    elif valueType == datetime.datetime:
                        tObject = (
                            '"%s"^^<http://www.w3.org/2001/XMLSchema#dateTime>'
                            % value
                        )
                        pass
                    else:
                        errors.append(
                            "can't handle type %s in record %d" % (valueType, index)
                        )
                        tObject = None
                    if tObject is not None:
                        insertRecord = "  %s %s %s.\n" % (
                            tSubject,
                            tPredicate,
                            tObject,
                        )
                        insertCommand += insertRecord
    insertCommand += "\n}"
    if self.debug:
        print(insertCommand, flush=True)
    response, ex = self.insert(insertCommand)
    if response is None and ex is not None:
        errors.append("%s for record %d" % (str(ex), index))
    if self.profile:
        print(
            "%7s for %9d - %9d of %9d %s in %6.1f s -> %6.1f s"
            % (
                title,
                batchIndex + 1,
                batchIndex + size,
                total,
                entityType,
                time.time() - batchStartTime,
                time.time() - startTime,
            ),
            flush=True,
        )
    return errors

`printErrors(errors)`

print the given list of errors

Parameters:

Name	Type	Description	Default
`errors(list)`		a list of error strings	required

Returns:

Name	Type	Description
`boolean`		True if the list is empty else false

Source code in lodstorage/sparql.py

def printErrors(self, errors):
    """
    print the given list of errors

    Args:
        errors(list): a list of error strings

    Returns:
        boolean: True if the list is empty else false
    """
    if len(errors) > 0:
        print("ERRORS:")
        for error in errors:
            print(error, flush=True, file=stderr)
        return True
    else:
        return False

`query(queryString, method=POST)`

get a list of results for the given query

Parameters:

Name	Type	Description	Default
`queryString(string)`		the SPARQL query to execute	required
`method(string)`		the method eg. POST to use	required

Returns:

Name	Type	Description
`list`		list of bindings

Source code in lodstorage/sparql.py

def query(self, queryString, method=POST):
    """
    get a list of results for the given query

    Args:
        queryString(string): the SPARQL query to execute
        method(string): the method eg. POST to use

    Returns:
        list: list of bindings
    """
    queryResult = self.rawQuery(queryString, method=method)
    if self.debug:
        print(queryString)
    if hasattr(queryResult, "info"):
        if "content-type" in queryResult.info():
            ct = queryResult.info()["content-type"]
            if "text/html" in ct:
                response = queryResult.response.read().decode()
                if not "Success" in response:
                    raise ("%s failed: %s", response)
            return None
    jsonResult = queryResult.convert()
    return self.getResults(jsonResult)

`queryAsListOfDicts(queryString, fixNone=False, sampleCount=None)`

get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

Parameters:

Name	Description	Default
`queryString(string)`	the SPARQL query to execute	required
`fixNone(bool)`	if True add None values for empty columns in Dict	required
`sampleCount(int)`	the number of samples to check	required

Returns:

Name	Type	Description
`list`		a list ofDicts

Source code in lodstorage/sparql.py

def queryAsListOfDicts(
    self, queryString, fixNone: bool = False, sampleCount: int = None
):
    """
    get a list of dicts for the given query (to allow round-trip results for insertListOfDicts)

    Args:
        queryString(string): the SPARQL query to execute
        fixNone(bool): if True add None values for empty columns in Dict
        sampleCount(int): the number of samples to check

    Returns:
        list: a list ofDicts
    """
    records = self.query(queryString, method=self.method)
    listOfDicts = self.asListOfDicts(
        records, fixNone=fixNone, sampleCount=sampleCount
    )
    return listOfDicts

`rawQuery(queryString, method=POST)`

query with the given query string

Parameters:

Name	Type	Description	Default
`queryString(string)`		the SPARQL query to be performed	required
`method(string)`		POST or GET - POST is mandatory for update queries	required

Returns: list: the raw query result as bindings

Source code in lodstorage/sparql.py

def rawQuery(self, queryString, method=POST):
    """
    query with the given query string

    Args:
        queryString(string): the SPARQL query to be performed
        method(string): POST or GET - POST is mandatory for update queries
    Returns:
        list: the raw query result as bindings
    """
    queryString = self.fix_comments(queryString)
    self.sparql.setQuery(queryString)
    self.sparql.method = method
    queryResult = self.sparql.query()
    return queryResult

`strToDatetime(value, debug=False)` `staticmethod`

convert a string to a datetime Args: value(str): the value to convert Returns: datetime: the datetime

Source code in lodstorage/sparql.py

@staticmethod
def strToDatetime(value, debug=False):
    """
    convert a string to a datetime
    Args:
        value(str): the value to convert
    Returns:
        datetime: the datetime
    """
    dateFormat = "%Y-%m-%d %H:%M:%S.%f"
    if "T" in value and "Z" in value:
        dateFormat = "%Y-%m-%dT%H:%M:%SZ"
    dt = None
    try:
        dt = datetime.datetime.strptime(value, dateFormat)
    except ValueError as ve:
        if debug:
            print(str(ve))
    return dt

`sql`

Created on 2020-08-24

@author: wf

`DatetimeAdapter`

Singleton class for converting date and time formats with optional lenient error handling.

Attributes:

Name	Type	Description
`lenient`	`bool`	Whether to handle conversion errors leniently, returning None and logging a warning.

Source code in lodstorage/sql.py

class DatetimeAdapter:
    """Singleton class for converting date and time formats with optional lenient error handling.

    Attributes:
        lenient (bool): Whether to handle conversion errors leniently, returning None and logging a warning.
    """

    _instance = None

    def __new__(cls, lenient: bool = False):
        """Ensure only one instance of the adapter exists.

        Args:
            lenient (bool): If True, the adapter will not raise exceptions on conversion failures.

        Returns:
            DatetimeAdapter: The singleton instance of the adapter.
        """
        if cls._instance is None:
            cls._instance = super(DatetimeAdapter, cls).__new__(cls)
            cls._instance.lenient = lenient
        return cls._instance

    def _handle_input(self, val: bytes) -> str:
        """Validate and decode the input bytes into string.

        Args:
            val (bytes): The bytes input to validate and decode.

        Returns:
            str: The decoded string from bytes.

        Raises:
            TypeError: If the input is not bytes.
        """
        if not isinstance(val, bytes):
            raise TypeError("Input must be a byte string.")
        return val.decode()

    def _handle_error(self, error: Exception, val: bytes):
        """Handle errors based on the lenient mode.

        Args:
            error (Exception): The exception that was raised.
            val (bytes): The input value that caused the error.

        Returns:
            None: If lenient mode is True and an error occurs.

        Raises:
            Exception: If lenient mode is False and an error occurs.
        """
        if self.lenient:
            logging.warning(f"Failed to convert {val}: {error}")
            return None
        else:
            raise error

    def convert_date(self, val: bytes) -> datetime.date:
        """Convert ISO 8601 date byte string to a datetime.date object.

        Args:
            val (bytes): The ISO 8601 date string in bytes.

        Returns:
            datetime.date: The converted date object.
        """
        try:
            decoded_date = self._handle_input(val)
            return datetime.date.fromisoformat(decoded_date)
        except Exception as e:
            return self._handle_error(e, val)

    def convert_datetime(self, val: bytes) -> datetime.datetime:
        """Convert ISO 8601 datetime byte string to a datetime.datetime object.

        Args:
            val (bytes): The ISO 8601 datetime string in bytes.

        Returns:
            datetime.datetime: The converted datetime object.
        """
        try:
            decoded_datetime = self._handle_input(val)
            return datetime.datetime.fromisoformat(decoded_datetime)
        except Exception as e:
            return self._handle_error(e, val)

    def convert_timestamp(self, val: bytes) -> datetime.datetime:
        """Convert Unix epoch timestamp byte string to a datetime.datetime object.

        Args:
            val (bytes): The Unix epoch timestamp in bytes.

        Returns:
            datetime.datetime: The converted datetime object.
        """
        try:
            decoded_string = self._handle_input(val)
            timestamp_float = float(decoded_string) / 10**6
            return datetime.datetime.fromtimestamp(timestamp_float)
        except ValueError as _ve:
            try:
                # If not, try to parse it as a datetime string
                dt = datetime.datetime.fromisoformat(decoded_string)
                return dt
            except Exception as e:
                return self._handle_error(e, val)
        except Exception as e:
            return self._handle_error(e, val)

    def set_lenient(self, lenient: bool):
        """Set the lenient mode of the adapter.

        Args:
            lenient (bool): True to enable lenient mode, False to disable it.
        """
        self.lenient = lenient

`new(lenient=False)`

Ensure only one instance of the adapter exists.

Parameters:

Name	Type	Description	Default
`lenient`	`bool`	If True, the adapter will not raise exceptions on conversion failures.	`False`

Returns:

Name	Type	Description
`DatetimeAdapter`		The singleton instance of the adapter.

Source code in lodstorage/sql.py

def __new__(cls, lenient: bool = False):
    """Ensure only one instance of the adapter exists.

    Args:
        lenient (bool): If True, the adapter will not raise exceptions on conversion failures.

    Returns:
        DatetimeAdapter: The singleton instance of the adapter.
    """
    if cls._instance is None:
        cls._instance = super(DatetimeAdapter, cls).__new__(cls)
        cls._instance.lenient = lenient
    return cls._instance

`convert_date(val)`

Convert ISO 8601 date byte string to a datetime.date object.

Parameters:

Name	Type	Description	Default
`val`	`bytes`	The ISO 8601 date string in bytes.	required

Returns:

Type	Description
`date`	datetime.date: The converted date object.

Source code in lodstorage/sql.py

def convert_date(self, val: bytes) -> datetime.date:
    """Convert ISO 8601 date byte string to a datetime.date object.

    Args:
        val (bytes): The ISO 8601 date string in bytes.

    Returns:
        datetime.date: The converted date object.
    """
    try:
        decoded_date = self._handle_input(val)
        return datetime.date.fromisoformat(decoded_date)
    except Exception as e:
        return self._handle_error(e, val)

`convert_datetime(val)`

Convert ISO 8601 datetime byte string to a datetime.datetime object.

Parameters:

Name	Type	Description	Default
`val`	`bytes`	The ISO 8601 datetime string in bytes.	required

Returns:

Type	Description
`datetime`	datetime.datetime: The converted datetime object.

Source code in lodstorage/sql.py

def convert_datetime(self, val: bytes) -> datetime.datetime:
    """Convert ISO 8601 datetime byte string to a datetime.datetime object.

    Args:
        val (bytes): The ISO 8601 datetime string in bytes.

    Returns:
        datetime.datetime: The converted datetime object.
    """
    try:
        decoded_datetime = self._handle_input(val)
        return datetime.datetime.fromisoformat(decoded_datetime)
    except Exception as e:
        return self._handle_error(e, val)

`convert_timestamp(val)`

Convert Unix epoch timestamp byte string to a datetime.datetime object.

Parameters:

Name	Type	Description	Default
`val`	`bytes`	The Unix epoch timestamp in bytes.	required

Returns:

Type	Description
`datetime`	datetime.datetime: The converted datetime object.

Source code in lodstorage/sql.py

def convert_timestamp(self, val: bytes) -> datetime.datetime:
    """Convert Unix epoch timestamp byte string to a datetime.datetime object.

    Args:
        val (bytes): The Unix epoch timestamp in bytes.

    Returns:
        datetime.datetime: The converted datetime object.
    """
    try:
        decoded_string = self._handle_input(val)
        timestamp_float = float(decoded_string) / 10**6
        return datetime.datetime.fromtimestamp(timestamp_float)
    except ValueError as _ve:
        try:
            # If not, try to parse it as a datetime string
            dt = datetime.datetime.fromisoformat(decoded_string)
            return dt
        except Exception as e:
            return self._handle_error(e, val)
    except Exception as e:
        return self._handle_error(e, val)

`set_lenient(lenient)`

Set the lenient mode of the adapter.

Parameters:

Name	Type	Description	Default
`lenient`	`bool`	True to enable lenient mode, False to disable it.	required

Source code in lodstorage/sql.py

def set_lenient(self, lenient: bool):
    """Set the lenient mode of the adapter.

    Args:
        lenient (bool): True to enable lenient mode, False to disable it.
    """
    self.lenient = lenient

`EntityInfo`

Bases: object

holds entity meta Info

:ivar name(string): entity name = table name

:ivar primaryKey(string): the name of the primary key column

:ivar typeMap(dict): maps column names to python types

:ivar debug(boolean): True if debug information should be shown

Source code in lodstorage/sql.py

class EntityInfo(object):
    """
    holds entity meta Info

    :ivar name(string): entity name = table name

    :ivar primaryKey(string): the name of the primary key column

    :ivar typeMap(dict): maps column names to python types

    :ivar debug(boolean): True if debug information should be shown

    """

    def __init__(self, sampleRecords, name, primaryKey=None, debug=False):
        """
        construct me from the given name and primary key

        Args:
           name(string): the name of the entity
           primaryKey(string): the name of the primary key column
           debug(boolean): True if debug information should be shown
        """
        self.sampleRecords = sampleRecords
        self.name = name
        self.primaryKey = primaryKey
        self.debug = debug
        self.typeMap = {}
        self.sqlTypeMap = {}
        self.createTableCmd = self.getCreateTableCmd(sampleRecords)
        self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
        self.insertCmd = self.getInsertCmd()

    def getCreateTableCmd(self, sampleRecords):
        """
        get the CREATE TABLE DDL command for the given sample records

        Args:
            sampleRecords(list): a list of Dicts of sample Records

        Returns:
            string: CREATE TABLE DDL command for this entity info

        Example:

        .. code-block:: sql

            CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

        """
        ddlCmd = "CREATE TABLE %s(" % self.name
        delim = ""
        for sampleRecord in sampleRecords:
            for key, value in sampleRecord.items():
                sqlType = None
                valueType = None
                if value is None:
                    if len(sampleRecords) == 1:
                        print(
                            "Warning sampleRecord column %s is None - using TEXT as type"
                            % key
                        )
                        valueType = str
                else:
                    valueType = type(value)
                if valueType == str:
                    sqlType = "TEXT"
                elif valueType == int:
                    sqlType = "INTEGER"
                elif valueType == float:
                    sqlType = "FLOAT"
                elif valueType == bool:
                    sqlType = "BOOLEAN"
                elif valueType == datetime.date:
                    sqlType = "DATE"
                elif valueType == datetime.datetime:
                    sqlType = "TIMESTAMP"
                else:
                    if valueType is not None:
                        msg = "warning: unsupported type %s for column %s " % (
                            str(valueType),
                            key,
                        )
                        print(msg)
                if sqlType is not None and valueType is not None:
                    self.addType(key, valueType, sqlType)
        for key, sqlType in self.sqlTypeMap.items():
            ddlCmd += "%s%s %s%s" % (
                delim,
                key,
                sqlType,
                " PRIMARY KEY" if key == self.primaryKey else "",
            )
            delim = ","
        ddlCmd += ")"
        if self.debug:
            print(ddlCmd)
        return ddlCmd

    def getInsertCmd(self, replace: bool = False) -> str:
        """
        get the INSERT command for this entityInfo

        Args:
             replace(bool): if True allow replace for insert

        Returns:
            str: the INSERT INTO SQL command for his entityInfo e.g.

        Example:

        .. code-block:: sql

            INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

        """
        columns = ",".join(self.typeMap.keys())
        placeholders = ":" + ",:".join(self.typeMap.keys())
        replaceClause = " OR REPLACE" if replace else ""
        insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
        if self.debug:
            print(insertCmd)
        return insertCmd

    def addType(self, column, valueType, sqlType):
        """
        add the python type for the given column to the typeMap

        Args:
           column(string): the name of the column

           valueType(type): the python type of the column
        """
        if not column in self.typeMap:
            self.typeMap[column] = valueType
            self.sqlTypeMap[column] = sqlType

    def fixDates(self, resultList):
        """
        fix date entries in the given resultList by parsing the date content e.g.
        converting '1926-04-21' back to datetime.date(1926, 4, 21)

        Args:
            resultList(list): the list of records to be fixed
        """
        for record in resultList:
            for key, valueType in self.typeMap.items():
                if valueType == datetime.date:
                    dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                    dateValue = dt.date()
                    record[key] = dateValue

`init(sampleRecords, name, primaryKey=None, debug=False)`

construct me from the given name and primary key

Parameters:

Name	Description	Default
`name(string)`	the name of the entity	required
`primaryKey(string)`	the name of the primary key column	required
`debug(boolean)`	True if debug information should be shown	required

Source code in lodstorage/sql.py

def __init__(self, sampleRecords, name, primaryKey=None, debug=False):
    """
    construct me from the given name and primary key

    Args:
       name(string): the name of the entity
       primaryKey(string): the name of the primary key column
       debug(boolean): True if debug information should be shown
    """
    self.sampleRecords = sampleRecords
    self.name = name
    self.primaryKey = primaryKey
    self.debug = debug
    self.typeMap = {}
    self.sqlTypeMap = {}
    self.createTableCmd = self.getCreateTableCmd(sampleRecords)
    self.dropTableCmd = "DROP TABLE IF EXISTS %s" % self.name
    self.insertCmd = self.getInsertCmd()

`addType(column, valueType, sqlType)`

add the python type for the given column to the typeMap

Parameters:

Name	Type	Description	Default
`column(string)`		the name of the column	required
`valueType(type)`		the python type of the column	required

Source code in lodstorage/sql.py

def addType(self, column, valueType, sqlType):
    """
    add the python type for the given column to the typeMap

    Args:
       column(string): the name of the column

       valueType(type): the python type of the column
    """
    if not column in self.typeMap:
        self.typeMap[column] = valueType
        self.sqlTypeMap[column] = sqlType

`fixDates(resultList)`

fix date entries in the given resultList by parsing the date content e.g. converting '1926-04-21' back to datetime.date(1926, 4, 21)

Parameters:

Name	Type	Description	Default
`resultList(list)`		the list of records to be fixed	required

Source code in lodstorage/sql.py

def fixDates(self, resultList):
    """
    fix date entries in the given resultList by parsing the date content e.g.
    converting '1926-04-21' back to datetime.date(1926, 4, 21)

    Args:
        resultList(list): the list of records to be fixed
    """
    for record in resultList:
        for key, valueType in self.typeMap.items():
            if valueType == datetime.date:
                dt = datetime.datetime.strptime(record[key], "%Y-%m-%d")
                dateValue = dt.date()
                record[key] = dateValue

`getCreateTableCmd(sampleRecords)`

get the CREATE TABLE DDL command for the given sample records

Parameters:

Name	Type	Description	Default
`sampleRecords(list)`		a list of Dicts of sample Records	required

Returns:

Name	Type	Description
`string`		CREATE TABLE DDL command for this entity info

Example:

.. code-block:: sql

CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

Source code in lodstorage/sql.py

def getCreateTableCmd(self, sampleRecords):
    """
    get the CREATE TABLE DDL command for the given sample records

    Args:
        sampleRecords(list): a list of Dicts of sample Records

    Returns:
        string: CREATE TABLE DDL command for this entity info

    Example:

    .. code-block:: sql

        CREATE TABLE Person(name TEXT PRIMARY KEY,born DATE,numberInLine INTEGER,wikidataurl TEXT,age FLOAT,ofAge BOOLEAN)

    """
    ddlCmd = "CREATE TABLE %s(" % self.name
    delim = ""
    for sampleRecord in sampleRecords:
        for key, value in sampleRecord.items():
            sqlType = None
            valueType = None
            if value is None:
                if len(sampleRecords) == 1:
                    print(
                        "Warning sampleRecord column %s is None - using TEXT as type"
                        % key
                    )
                    valueType = str
            else:
                valueType = type(value)
            if valueType == str:
                sqlType = "TEXT"
            elif valueType == int:
                sqlType = "INTEGER"
            elif valueType == float:
                sqlType = "FLOAT"
            elif valueType == bool:
                sqlType = "BOOLEAN"
            elif valueType == datetime.date:
                sqlType = "DATE"
            elif valueType == datetime.datetime:
                sqlType = "TIMESTAMP"
            else:
                if valueType is not None:
                    msg = "warning: unsupported type %s for column %s " % (
                        str(valueType),
                        key,
                    )
                    print(msg)
            if sqlType is not None and valueType is not None:
                self.addType(key, valueType, sqlType)
    for key, sqlType in self.sqlTypeMap.items():
        ddlCmd += "%s%s %s%s" % (
            delim,
            key,
            sqlType,
            " PRIMARY KEY" if key == self.primaryKey else "",
        )
        delim = ","
    ddlCmd += ")"
    if self.debug:
        print(ddlCmd)
    return ddlCmd

`getInsertCmd(replace=False)`

get the INSERT command for this entityInfo

Parameters:

Name	Type	Description	Default
`replace(bool)`		if True allow replace for insert	required

Returns:

Name	Type	Description
`str`	`str`	the INSERT INTO SQL command for his entityInfo e.g.

Example:

.. code-block:: sql

INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

Source code in lodstorage/sql.py

def getInsertCmd(self, replace: bool = False) -> str:
    """
    get the INSERT command for this entityInfo

    Args:
         replace(bool): if True allow replace for insert

    Returns:
        str: the INSERT INTO SQL command for his entityInfo e.g.

    Example:

    .. code-block:: sql

        INSERT INTO Person (name,born,numberInLine,wikidataurl,age,ofAge) values (?,?,?,?,?,?).

    """
    columns = ",".join(self.typeMap.keys())
    placeholders = ":" + ",:".join(self.typeMap.keys())
    replaceClause = " OR REPLACE" if replace else ""
    insertCmd = f"INSERT{replaceClause} INTO {self.name} ({columns}) values ({placeholders})"
    if self.debug:
        print(insertCmd)
    return insertCmd

`SQLDB`

Bases: object

Structured Query Language Database wrapper

:ivar dbname(string): name of the database :ivar debug(boolean): True if debug info should be provided :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py

class SQLDB(object):
    """
    Structured Query Language Database wrapper

    :ivar dbname(string): name of the database
    :ivar debug(boolean): True if debug info should be provided
    :ivar errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """

    RAM = ":memory:"

    def __init__(
        self,
        dbname: str = ":memory:",
        connection=None,
        check_same_thread=True,
        timeout=5,
        debug=False,
        errorDebug=False,
    ):
        """
        Construct me for the given dbname and debug

        Args:

           dbname(string): name of the database - default is a RAM based database
           connection(Connection): an optional connection to be reused
           check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
           timeout(float): number of seconds for connection timeout
           debug(boolean): if True switch on debug
           errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        self.dbname = dbname
        self.debug = debug
        self.errorDebug = errorDebug
        if connection is None:
            self.c = sqlite3.connect(
                dbname,
                detect_types=sqlite3.PARSE_DECLTYPES,
                check_same_thread=check_same_thread,
                timeout=timeout,
            )
        else:
            self.c = connection

    def logError(self, msg):
        """
        log the given error message to stderr

        Args:
            msg(str): the error messsage to display
        """
        print(msg, file=sys.stderr, flush=True)

    def close(self):
        """close my connection"""
        self.c.close()

    def execute(self, ddlCmd):
        """
        execute the given Data Definition Command

        Args:
            ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
        """
        self.c.execute(ddlCmd)

    def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
        """
        Create a table based on the provided EntityInfo.

        Args:
            entityInfo (EntityInfo): The EntityInfo object containing table metadata.
            withDrop (bool): If True, drop the existing table before creation.
            withCreate (bool): If True, execute the CREATE TABLE command.

        Returns:
            EntityInfo: The provided EntityInfo object.
        """
        if withDrop:
            self.c.execute(entityInfo.dropTableCmd)
        if withCreate:
            try:
                self.c.execute(entityInfo.createTableCmd)
            except sqlite3.OperationalError as oe:
                raise Exception(
                    f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
                )
        return entityInfo

    def createTable(
        self,
        listOfRecords,
        entityName: str,
        primaryKey: str = None,
        withCreate: bool = True,
        withDrop: bool = False,
        sampleRecordCount=1,
        failIfTooFew=True,
    ):
        """
        Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
        as defining sample record and execute DDL command.

        Args:
            listOfRecords (list): A list of Dicts.
            entityName (str): The entity / table name to use.
            primaryKey (str): The key/column to use as a primary key.
            withDrop (bool): True if the existing Table should be dropped.
            withCreate (bool): True if the create Table command should be executed.
            sampleRecordCount (int): Number of sample records expected and to be inspected.
            failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

        Returns:
            EntityInfo: Meta data information for the created table.
        """
        l = len(listOfRecords)
        if sampleRecordCount < 0:
            sampleRecordCount = l
        if l < sampleRecordCount:
            msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
            if failIfTooFew:
                raise Exception(msg)
            elif self.debug:
                self.logError(msg)

        sampleRecords = listOfRecords[:sampleRecordCount]
        entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

        return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

    def getDebugInfo(self, record, index, executeMany):
        """
        get the debug info for the given record at the given index depending on the state of executeMany

        Args:
            record(dict): the record to show
            index(int): the index of the record
            executeMany(boolean): if True the record may be valid else not
        """
        debugInfo = ""
        if not executeMany:
            # shall we shoe the details of the record (which might be a security risk)
            if self.errorDebug:
                # show details of record
                debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
            else:
                # show only index
                debugInfo = "\nrecord #%d" % index
        return debugInfo

    def store(
        self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
    ):
        """
        store the given list of records based on the given entityInfo

        Args:

           listOfRecords(list): the list of Dicts to be stored
           entityInfo(EntityInfo): the meta data to be used for storing
           executeMany(bool): if True the insert command is done with many/all records at once
           fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
           replace(bool): if True allow replace for insert
        """
        insertCmd = entityInfo.getInsertCmd(replace=replace)
        record = None
        index = 0
        try:
            if executeMany:
                if fixNone:
                    LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
                self.c.executemany(insertCmd, listOfRecords)
            else:
                for record in listOfRecords:
                    index += 1
                    if fixNone:
                        LOD.setNone(record, entityInfo.typeMap.keys())
                    self.c.execute(insertCmd, record)
            self.c.commit()
        except sqlite3.ProgrammingError as pe:
            msg = pe.args[0]
            if "You did not supply a value for binding" in msg:
                if ":" in msg:
                    # sqlite now returns the parameter name not the number
                    # You did not supply a value for binding parameter :type.
                    columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                    columnName = columnName.replace(":", "")
                else:
                    # pre python 3.10
                    # You did not supply a value for binding 2.
                    columnIndex = int(re.findall(r"\d+", msg)[0])
                    columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: no value supplied for column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise pe
        except sqlite3.InterfaceError as ie:
            msg = ie.args[0]
            if "Error binding parameter" in msg:
                columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
                debugInfo = self.getDebugInfo(record, index, executeMany)
                raise Exception(
                    "%s\nfailed: error binding column '%s'%s"
                    % (insertCmd, columnName, debugInfo)
                )
            else:
                raise ie
        except Exception as ex:
            debugInfo = self.getDebugInfo(record, index, executeMany)
            msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
            raise Exception(msg)

    def queryGen(self, sqlQuery, params=None):
        """
        run the given sqlQuery a a generator for dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            a generator of dicts
        """
        if self.debug:
            print(sqlQuery)
            if params is not None:
                print(params)
        # https://stackoverflow.com/a/13735506/1497139
        cur = self.c.cursor()
        if params is not None:
            query = cur.execute(sqlQuery, params)
        else:
            query = cur.execute(sqlQuery)
        colname = [d[0] for d in query.description]
        try:
            # loop over all rows
            for row in query:
                record = dict(zip(colname, row))
                yield record
        except Exception as ex:
            msg = str(ex)
            self.logError(msg)
            pass
        cur.close()

    def query(self, sqlQuery, params=None):
        """
        run the given sqlQuery and return a list of Dicts

        Args:

            sqlQuery(string): the SQL query to be executed
            params(tuple): the query params, if any

        Returns:
            list: a list of Dicts
        """
        resultList = []
        for record in self.queryGen(sqlQuery, params):
            resultList.append(record)
        return resultList

    def queryAll(self, entityInfo, fixDates=True):
        """
        query all records for the given entityName/tableName

        Args:
           entityName(string): name of the entity/table to qury
           fixDates(boolean): True if date entries should be returned as such and not as strings
        """
        sqlQuery = "SELECT * FROM %s" % entityInfo.name
        resultList = self.query(sqlQuery)
        if fixDates:
            entityInfo.fixDates(resultList)
        return resultList

    def getTableList(self, tableType="table"):
        """
        get the schema information from this database

        Args:
            tableType(str): table or view

        Return:
            list: a list as derived from PRAGMA table_info
        """
        tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
        tableList = self.query(tableQuery)
        for table in tableList:
            tableName = table["name"]
            columnQuery = f"PRAGMA table_info('{tableName}')"
            columns = self.query(columnQuery)
            table["columns"] = columns
        return tableList

    def getTableDict(self, tableType="table"):
        """
        get the schema information from this database as a dict

        Args:
            tableType(str): table or view

        Returns:
            dict: Lookup map of tables with columns also being converted to dict
        """
        tableDict = {}
        for table in self.getTableList(tableType=tableType):
            colDict = {}
            for col in table["columns"]:
                colDict[col["name"]] = col
            table["columns"] = colDict
            tableDict[table["name"]] = table
        return tableDict

    def restoreProgress(self, status, remaining, total):
        self.progress("Restore", status, remaining, total)

    def backupProgress(self, status, remaining, total):
        self.progress("Backup", status, remaining, total)

    def progress(self, action, status, remaining, total):
        """
        show progress
        """
        print(
            "%s %s at %5.0f%%"
            % (
                action,
                "... " if status == 0 else "done",
                (total - remaining) / total * 100,
            )
        )

    def backup(
        self,
        backupDB,
        action="Backup",
        profile=False,
        showProgress: int = 200,
        doClose=True,
    ):
        """
        create backup of this SQLDB to the given backup db

        see https://stackoverflow.com/a/59042442/1497139

        Args:
            backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
            action(string): the action to display
            profile(boolean): True if timing information shall be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        if sys.version_info <= (3, 6):
            raise Exception(
                "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
            )
        startTime = time.time()
        bck = sqlite3.connect(backupDB)
        if showProgress > 0:
            if action == "Restore":
                progress = self.restoreProgress
            else:
                progress = self.backupProgress
        else:
            progress = None
        with bck:
            self.c.backup(bck, pages=showProgress, progress=progress)
        elapsed = time.time() - startTime
        if profile:
            print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
        if doClose:
            bck.close()
            return None
        else:
            return bck

    def showDump(self, dump, limit=10):
        """
        show the given dump up to the given limit

        Args:
            dump(string): the SQL dump to show
            limit(int): the maximum number of lines to display
        """
        s = io.StringIO(dump)
        index = 0
        for line in s:
            if index <= limit:
                print(line)
                index += 1
            else:
                break

    def executeDump(
        self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
    ):
        """
        execute the given dump for the given connection

        Args:
            connection(Connection): the sqlite3 connection to use
            dump(string): the SQL commands for the dump
            title(string): the title of the dump
            maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
            profile(boolean): True if profiling information should be shown
        Returns:
            a list of errors
        """
        if self.debug:
            self.showDump(dump)
        startTime = time.time()
        if profile:
            print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
        errors = []
        index = 0
        # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
        for line in dump.split(";\n"):
            try:
                connection.execute(line)
            except sqlite3.OperationalError as soe:
                msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
                errors.append(msg)
                if len(errors) <= errorDisplayLimit:
                    print(msg)
                if len(errors) >= maxErrors:
                    connection.execute("ROLLBACK;")
                    break

            index = index + 1
        if profile:
            print(
                "finished executing dump %s with %d lines and %d errors in %5.1f s"
                % (title, index, len(errors), time.time() - startTime)
            )
        return errors

    def copyTo(self, copyDB, profile=True):
        """
        copy my content to another database

        Args:

           copyDB(Connection): the target database
           profile(boolean): if True show profile information
        """
        startTime = time.time()
        dump = "\n".join(self.c.iterdump())
        # cursor.executescript(dump)
        if profile:
            print(
                "finished getting dump of %s in %5.1f s"
                % (self.dbname, time.time() - startTime)
            )
        dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
        return dumpErrors

    @staticmethod
    def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
        """
        restore the restoreDB from the given backup DB

        Args:
            backupDB(string): path to the backupDB e.g. backup.db
            restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
            profile(boolean): True if timing information should be shown
            showProgress(int): show progress at each showProgress page (0=show no progress)
        """
        backupSQLDB = SQLDB(backupDB)
        connection = backupSQLDB.backup(
            restoreDB,
            action="Restore",
            profile=profile,
            showProgress=showProgress,
            doClose=False,
        )
        restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
        return restoreSQLDB

`init(dbname=':memory:', connection=None, check_same_thread=True, timeout=5, debug=False, errorDebug=False)`

Construct me for the given dbname and debug

Args:

dbname(string): name of the database - default is a RAM based database connection(Connection): an optional connection to be reused check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139 timeout(float): number of seconds for connection timeout debug(boolean): if True switch on debug errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)

Source code in lodstorage/sql.py

def __init__(
    self,
    dbname: str = ":memory:",
    connection=None,
    check_same_thread=True,
    timeout=5,
    debug=False,
    errorDebug=False,
):
    """
    Construct me for the given dbname and debug

    Args:

       dbname(string): name of the database - default is a RAM based database
       connection(Connection): an optional connection to be reused
       check_same_thread(boolean): True if object handling needs to be on the same thread see https://stackoverflow.com/a/48234567/1497139
       timeout(float): number of seconds for connection timeout
       debug(boolean): if True switch on debug
       errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    self.dbname = dbname
    self.debug = debug
    self.errorDebug = errorDebug
    if connection is None:
        self.c = sqlite3.connect(
            dbname,
            detect_types=sqlite3.PARSE_DECLTYPES,
            check_same_thread=check_same_thread,
            timeout=timeout,
        )
    else:
        self.c = connection

`backup(backupDB, action='Backup', profile=False, showProgress=200, doClose=True)`

create backup of this SQLDB to the given backup db

see https://stackoverflow.com/a/59042442/1497139

Parameters:

Name	Description	Default
`backupDB(string)`	the path to the backupdb or SQLDB.RAM for in memory	required
`action(string)`	the action to display	required
`profile(boolean)`	True if timing information shall be shown	required
`showProgress(int)`	show progress at each showProgress page (0=show no progress)	required

Source code in lodstorage/sql.py

def backup(
    self,
    backupDB,
    action="Backup",
    profile=False,
    showProgress: int = 200,
    doClose=True,
):
    """
    create backup of this SQLDB to the given backup db

    see https://stackoverflow.com/a/59042442/1497139

    Args:
        backupDB(string): the path to the backupdb or SQLDB.RAM for in memory
        action(string): the action to display
        profile(boolean): True if timing information shall be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    if sys.version_info <= (3, 6):
        raise Exception(
            "backup via stdlibrary not available in python <=3.6 use copyToDB instead"
        )
    startTime = time.time()
    bck = sqlite3.connect(backupDB)
    if showProgress > 0:
        if action == "Restore":
            progress = self.restoreProgress
        else:
            progress = self.backupProgress
    else:
        progress = None
    with bck:
        self.c.backup(bck, pages=showProgress, progress=progress)
    elapsed = time.time() - startTime
    if profile:
        print("%s to %s took %5.1f s" % (action, backupDB, elapsed))
    if doClose:
        bck.close()
        return None
    else:
        return bck

`close()`

close my connection

Source code in lodstorage/sql.py

def close(self):
    """close my connection"""
    self.c.close()

`copyTo(copyDB, profile=True)`

copy my content to another database

Args:

copyDB(Connection): the target database profile(boolean): if True show profile information

Source code in lodstorage/sql.py

def copyTo(self, copyDB, profile=True):
    """
    copy my content to another database

    Args:

       copyDB(Connection): the target database
       profile(boolean): if True show profile information
    """
    startTime = time.time()
    dump = "\n".join(self.c.iterdump())
    # cursor.executescript(dump)
    if profile:
        print(
            "finished getting dump of %s in %5.1f s"
            % (self.dbname, time.time() - startTime)
        )
    dumpErrors = self.executeDump(copyDB.c, dump, self.dbname, profile=profile)
    return dumpErrors

`createTable(listOfRecords, entityName, primaryKey=None, withCreate=True, withDrop=False, sampleRecordCount=1, failIfTooFew=True)`

Derive Data Definition Language CREATE TABLE command from list of Records by examining first record as defining sample record and execute DDL command.

Parameters:

Name	Type	Description	Default
`listOfRecords`	`list`	A list of Dicts.	required
`entityName`	`str`	The entity / table name to use.	required
`primaryKey`	`str`	The key/column to use as a primary key.	`None`
`withDrop`	`bool`	True if the existing Table should be dropped.	`False`
`withCreate`	`bool`	True if the create Table command should be executed.	`True`
`sampleRecordCount`	`int`	Number of sample records expected and to be inspected.	`1`
`failIfTooFew`	`bool`	Raise an Exception if too few sample records, else warn only.	`True`

Returns:

Name	Type	Description
`EntityInfo`		Meta data information for the created table.

Source code in lodstorage/sql.py

def createTable(
    self,
    listOfRecords,
    entityName: str,
    primaryKey: str = None,
    withCreate: bool = True,
    withDrop: bool = False,
    sampleRecordCount=1,
    failIfTooFew=True,
):
    """
    Derive Data Definition Language CREATE TABLE command from list of Records by examining first record
    as defining sample record and execute DDL command.

    Args:
        listOfRecords (list): A list of Dicts.
        entityName (str): The entity / table name to use.
        primaryKey (str): The key/column to use as a primary key.
        withDrop (bool): True if the existing Table should be dropped.
        withCreate (bool): True if the create Table command should be executed.
        sampleRecordCount (int): Number of sample records expected and to be inspected.
        failIfTooFew (bool): Raise an Exception if too few sample records, else warn only.

    Returns:
        EntityInfo: Meta data information for the created table.
    """
    l = len(listOfRecords)
    if sampleRecordCount < 0:
        sampleRecordCount = l
    if l < sampleRecordCount:
        msg = f"only {l}/{sampleRecordCount} of needed sample records to createTable available"
        if failIfTooFew:
            raise Exception(msg)
        elif self.debug:
            self.logError(msg)

    sampleRecords = listOfRecords[:sampleRecordCount]
    entityInfo = EntityInfo(sampleRecords, entityName, primaryKey, debug=self.debug)

    return self.createTable4EntityInfo(entityInfo, withDrop, withCreate)

`createTable4EntityInfo(entityInfo, withDrop=False, withCreate=True)`

Create a table based on the provided EntityInfo.

Parameters:

Name	Type	Description	Default
`entityInfo`	`EntityInfo`	The EntityInfo object containing table metadata.	required
`withDrop`	`bool`	If True, drop the existing table before creation.	`False`
`withCreate`	`bool`	If True, execute the CREATE TABLE command.	`True`

Returns:

Name	Type	Description
`EntityInfo`		The provided EntityInfo object.

Source code in lodstorage/sql.py

def createTable4EntityInfo(self, entityInfo, withDrop=False, withCreate=True):
    """
    Create a table based on the provided EntityInfo.

    Args:
        entityInfo (EntityInfo): The EntityInfo object containing table metadata.
        withDrop (bool): If True, drop the existing table before creation.
        withCreate (bool): If True, execute the CREATE TABLE command.

    Returns:
        EntityInfo: The provided EntityInfo object.
    """
    if withDrop:
        self.c.execute(entityInfo.dropTableCmd)
    if withCreate:
        try:
            self.c.execute(entityInfo.createTableCmd)
        except sqlite3.OperationalError as oe:
            raise Exception(
                f"createTable failed with error {oe} for {entityInfo.createTableCmd}"
            )
    return entityInfo

`execute(ddlCmd)`

execute the given Data Definition Command

Parameters:

Name	Type	Description	Default
`ddlCmd(string)`		e.g. a CREATE TABLE or CREATE View command	required

Source code in lodstorage/sql.py

def execute(self, ddlCmd):
    """
    execute the given Data Definition Command

    Args:
        ddlCmd(string): e.g. a CREATE TABLE or CREATE View command
    """
    self.c.execute(ddlCmd)

`executeDump(connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True)`

execute the given dump for the given connection

Parameters:

Name	Description	Default
`connection(Connection)`	the sqlite3 connection to use	required
`dump(string)`	the SQL commands for the dump	required
`title(string)`	the title of the dump	required
`maxErrors(int)`	maximum number of errors to be tolerated before stopping and doing a rollback	required
`profile(boolean)`	True if profiling information should be shown	required

Returns: a list of errors

Source code in lodstorage/sql.py

def executeDump(
    self, connection, dump, title, maxErrors=100, errorDisplayLimit=12, profile=True
):
    """
    execute the given dump for the given connection

    Args:
        connection(Connection): the sqlite3 connection to use
        dump(string): the SQL commands for the dump
        title(string): the title of the dump
        maxErrors(int): maximum number of errors to be tolerated before stopping and doing a rollback
        profile(boolean): True if profiling information should be shown
    Returns:
        a list of errors
    """
    if self.debug:
        self.showDump(dump)
    startTime = time.time()
    if profile:
        print("dump of %s has size %4.1f MB" % (title, len(dump) / 1024 / 1024))
    errors = []
    index = 0
    # fixes https://github.com/WolfgangFahl/ProceedingsTitleParser/issues/37
    for line in dump.split(";\n"):
        try:
            connection.execute(line)
        except sqlite3.OperationalError as soe:
            msg = "SQL error %s in line %d:\n\t%s" % (soe, index, line)
            errors.append(msg)
            if len(errors) <= errorDisplayLimit:
                print(msg)
            if len(errors) >= maxErrors:
                connection.execute("ROLLBACK;")
                break

        index = index + 1
    if profile:
        print(
            "finished executing dump %s with %d lines and %d errors in %5.1f s"
            % (title, index, len(errors), time.time() - startTime)
        )
    return errors

`getDebugInfo(record, index, executeMany)`

get the debug info for the given record at the given index depending on the state of executeMany

Parameters:

Name	Description	Default
`record(dict)`	the record to show	required
`index(int)`	the index of the record	required
`executeMany(boolean)`	if True the record may be valid else not	required

Source code in lodstorage/sql.py

def getDebugInfo(self, record, index, executeMany):
    """
    get the debug info for the given record at the given index depending on the state of executeMany

    Args:
        record(dict): the record to show
        index(int): the index of the record
        executeMany(boolean): if True the record may be valid else not
    """
    debugInfo = ""
    if not executeMany:
        # shall we shoe the details of the record (which might be a security risk)
        if self.errorDebug:
            # show details of record
            debugInfo = "\nrecord  #%d=%s" % (index, repr(record))
        else:
            # show only index
            debugInfo = "\nrecord #%d" % index
    return debugInfo

`getTableDict(tableType='table')`

get the schema information from this database as a dict

Parameters:

Name	Type	Description	Default
`tableType(str)`		table or view	required

Returns:

Name	Type	Description
`dict`		Lookup map of tables with columns also being converted to dict

Source code in lodstorage/sql.py

def getTableDict(self, tableType="table"):
    """
    get the schema information from this database as a dict

    Args:
        tableType(str): table or view

    Returns:
        dict: Lookup map of tables with columns also being converted to dict
    """
    tableDict = {}
    for table in self.getTableList(tableType=tableType):
        colDict = {}
        for col in table["columns"]:
            colDict[col["name"]] = col
        table["columns"] = colDict
        tableDict[table["name"]] = table
    return tableDict

`getTableList(tableType='table')`

get the schema information from this database

Parameters:

Name	Type	Description	Default
`tableType(str)`		table or view	required

Return

list: a list as derived from PRAGMA table_info

Source code in lodstorage/sql.py

def getTableList(self, tableType="table"):
    """
    get the schema information from this database

    Args:
        tableType(str): table or view

    Return:
        list: a list as derived from PRAGMA table_info
    """
    tableQuery = f"SELECT name FROM sqlite_master WHERE type='{tableType}'"
    tableList = self.query(tableQuery)
    for table in tableList:
        tableName = table["name"]
        columnQuery = f"PRAGMA table_info('{tableName}')"
        columns = self.query(columnQuery)
        table["columns"] = columns
    return tableList

`logError(msg)`

log the given error message to stderr

Parameters:

Name	Type	Description	Default
`msg(str)`		the error messsage to display	required

Source code in lodstorage/sql.py

def logError(self, msg):
    """
    log the given error message to stderr

    Args:
        msg(str): the error messsage to display
    """
    print(msg, file=sys.stderr, flush=True)

`progress(action, status, remaining, total)`

show progress

Source code in lodstorage/sql.py

def progress(self, action, status, remaining, total):
    """
    show progress
    """
    print(
        "%s %s at %5.0f%%"
        % (
            action,
            "... " if status == 0 else "done",
            (total - remaining) / total * 100,
        )
    )

`query(sqlQuery, params=None)`

run the given sqlQuery and return a list of Dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Name	Type	Description
`list`		a list of Dicts

Source code in lodstorage/sql.py

def query(self, sqlQuery, params=None):
    """
    run the given sqlQuery and return a list of Dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        list: a list of Dicts
    """
    resultList = []
    for record in self.queryGen(sqlQuery, params):
        resultList.append(record)
    return resultList

`queryAll(entityInfo, fixDates=True)`

query all records for the given entityName/tableName

Parameters:

Name	Type	Description	Default
`entityName(string)`		name of the entity/table to qury	required
`fixDates(boolean)`		True if date entries should be returned as such and not as strings	required

Source code in lodstorage/sql.py

def queryAll(self, entityInfo, fixDates=True):
    """
    query all records for the given entityName/tableName

    Args:
       entityName(string): name of the entity/table to qury
       fixDates(boolean): True if date entries should be returned as such and not as strings
    """
    sqlQuery = "SELECT * FROM %s" % entityInfo.name
    resultList = self.query(sqlQuery)
    if fixDates:
        entityInfo.fixDates(resultList)
    return resultList

`queryGen(sqlQuery, params=None)`

run the given sqlQuery a a generator for dicts

Args:

sqlQuery(string): the SQL query to be executed
params(tuple): the query params, if any

Returns:

Type	Description
	a generator of dicts

Source code in lodstorage/sql.py

def queryGen(self, sqlQuery, params=None):
    """
    run the given sqlQuery a a generator for dicts

    Args:

        sqlQuery(string): the SQL query to be executed
        params(tuple): the query params, if any

    Returns:
        a generator of dicts
    """
    if self.debug:
        print(sqlQuery)
        if params is not None:
            print(params)
    # https://stackoverflow.com/a/13735506/1497139
    cur = self.c.cursor()
    if params is not None:
        query = cur.execute(sqlQuery, params)
    else:
        query = cur.execute(sqlQuery)
    colname = [d[0] for d in query.description]
    try:
        # loop over all rows
        for row in query:
            record = dict(zip(colname, row))
            yield record
    except Exception as ex:
        msg = str(ex)
        self.logError(msg)
        pass
    cur.close()

`restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False)` `staticmethod`

restore the restoreDB from the given backup DB

Parameters:

Name	Description	Default
`backupDB(string)`	path to the backupDB e.g. backup.db	required
`restoreDB(string)`	path to the restoreDB or in Memory SQLDB.RAM	required
`profile(boolean)`	True if timing information should be shown	required
`showProgress(int)`	show progress at each showProgress page (0=show no progress)	required

Source code in lodstorage/sql.py

@staticmethod
def restore(backupDB, restoreDB, profile=False, showProgress=200, debug=False):
    """
    restore the restoreDB from the given backup DB

    Args:
        backupDB(string): path to the backupDB e.g. backup.db
        restoreDB(string): path to the restoreDB or in Memory SQLDB.RAM
        profile(boolean): True if timing information should be shown
        showProgress(int): show progress at each showProgress page (0=show no progress)
    """
    backupSQLDB = SQLDB(backupDB)
    connection = backupSQLDB.backup(
        restoreDB,
        action="Restore",
        profile=profile,
        showProgress=showProgress,
        doClose=False,
    )
    restoreSQLDB = SQLDB(restoreDB, connection=connection, debug=debug)
    return restoreSQLDB

`showDump(dump, limit=10)`

show the given dump up to the given limit

Parameters:

Name	Type	Description	Default
`dump(string)`		the SQL dump to show	required
`limit(int)`		the maximum number of lines to display	required

Source code in lodstorage/sql.py

def showDump(self, dump, limit=10):
    """
    show the given dump up to the given limit

    Args:
        dump(string): the SQL dump to show
        limit(int): the maximum number of lines to display
    """
    s = io.StringIO(dump)
    index = 0
    for line in s:
        if index <= limit:
            print(line)
            index += 1
        else:
            break

`store(listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False)`

store the given list of records based on the given entityInfo

Args:

listOfRecords(list): the list of Dicts to be stored entityInfo(EntityInfo): the meta data to be used for storing executeMany(bool): if True the insert command is done with many/all records at once fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values replace(bool): if True allow replace for insert

Source code in lodstorage/sql.py

def store(
    self, listOfRecords, entityInfo, executeMany=False, fixNone=False, replace=False
):
    """
    store the given list of records based on the given entityInfo

    Args:

       listOfRecords(list): the list of Dicts to be stored
       entityInfo(EntityInfo): the meta data to be used for storing
       executeMany(bool): if True the insert command is done with many/all records at once
       fixNone(bool): if True make sure empty columns in the listOfDict are filled with "None" values
       replace(bool): if True allow replace for insert
    """
    insertCmd = entityInfo.getInsertCmd(replace=replace)
    record = None
    index = 0
    try:
        if executeMany:
            if fixNone:
                LOD.setNone4List(listOfRecords, entityInfo.typeMap.keys())
            self.c.executemany(insertCmd, listOfRecords)
        else:
            for record in listOfRecords:
                index += 1
                if fixNone:
                    LOD.setNone(record, entityInfo.typeMap.keys())
                self.c.execute(insertCmd, record)
        self.c.commit()
    except sqlite3.ProgrammingError as pe:
        msg = pe.args[0]
        if "You did not supply a value for binding" in msg:
            if ":" in msg:
                # sqlite now returns the parameter name not the number
                # You did not supply a value for binding parameter :type.
                columnName = re.findall(r":([a-zA-Z][a-zA-Z0-9_]*)", msg)[0]
                columnName = columnName.replace(":", "")
            else:
                # pre python 3.10
                # You did not supply a value for binding 2.
                columnIndex = int(re.findall(r"\d+", msg)[0])
                columnName = list(entityInfo.typeMap.keys())[columnIndex - 1]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: no value supplied for column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise pe
    except sqlite3.InterfaceError as ie:
        msg = ie.args[0]
        if "Error binding parameter" in msg:
            columnName = re.findall(r":[_a-zA-Z]\w*", msg)[0]
            debugInfo = self.getDebugInfo(record, index, executeMany)
            raise Exception(
                "%s\nfailed: error binding column '%s'%s"
                % (insertCmd, columnName, debugInfo)
            )
        else:
            raise ie
    except Exception as ex:
        debugInfo = self.getDebugInfo(record, index, executeMany)
        msg = "%s\nfailed:%s%s" % (insertCmd, str(ex), debugInfo)
        raise Exception(msg)

`adapt_boolean(val)`

Adapt boolean to int

Source code in lodstorage/sql.py

def adapt_boolean(val: bool):
    """Adapt boolean to int"""
    return 1 if val else 0

`adapt_date_iso(val)`

Adapt datetime.date to ISO 8601 date.

Source code in lodstorage/sql.py

def adapt_date_iso(val: datetime.date):
    """Adapt datetime.date to ISO 8601 date."""
    return val.isoformat()

`adapt_datetime_epoch(val)`

Adapt datetime.datetime to Unix timestamp.

Source code in lodstorage/sql.py

def adapt_datetime_epoch(val: datetime.datetime):
    """Adapt datetime.datetime to Unix timestamp."""
    return float(val.timestamp()) * 10**6

`adapt_datetime_iso(val)`

Adapt datetime.datetime to timezone-naive ISO 8601 date.

Source code in lodstorage/sql.py

def adapt_datetime_iso(val: datetime.datetime):
    """Adapt datetime.datetime to timezone-naive ISO 8601 date."""
    return val.isoformat()

`convert_boolean(val)`

Convert 0 or 1 to boolean

Source code in lodstorage/sql.py

def convert_boolean(val: bytes):
    """
    Convert 0 or 1 to boolean
    """
    return True if int(val) == 1 else False

`convert_date(val)`

Convert byte string to date using the DatetimeAdapter.

Source code in lodstorage/sql.py

def convert_date(val: bytes) -> datetime.date:
    """Convert byte string to date using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_date(val)

`convert_datetime(val)`

Convert byte string to datetime using the DatetimeAdapter.

Source code in lodstorage/sql.py

def convert_datetime(val: bytes) -> datetime.datetime:
    """Convert byte string to datetime using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_datetime(val)

`convert_timestamp(val)`

Convert byte string to timestamp using the DatetimeAdapter.

Source code in lodstorage/sql.py

def convert_timestamp(val: bytes) -> datetime.datetime:
    """Convert byte string to timestamp using the DatetimeAdapter."""
    adapter = DatetimeAdapter()
    return adapter.convert_timestamp(val)

`sql_cache`

Created on 2024-03-16

@author: wf

`Cached`

Manage cached entities.

Source code in lodstorage/sql_cache.py

class Cached:
    """
    Manage cached entities.
    """

    def __init__(
        self,
        clazz: Type[Any],
        sparql: SPARQL,
        sql_db: str,
        query_name: str,
        max_errors: int = 0,
        debug: bool = False,
    ):
        """
        Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string,
        query name, and an optional debug flag.

        Args:
            clazz (Type[Any]): The class reference for the type of objects managed by this manager.
            sparql (SPARQL): a SPARQL endpoint.
            sql_db (str): The connection string for the SQL database.
            query_name (str): The name of the query to be executed.
            debug (bool, optional): Flag to enable debug mode. Defaults to False.
        """
        self.clazz = clazz
        self.sparql = sparql
        self.sql_db = sql_db
        self.query_name = query_name
        self.max_errors = max_errors
        self.debug = debug
        self.entities = []
        self.errors = []
        self.fetched = False
        # Ensure the table for the class exists
        clazz.metadata.create_all(self.sql_db.engine)

    def fetch_or_query(self, qm, force_query=False) -> List[Dict]:
        """
        Fetches data from the local cache if available.
        If the data is not in the cache or if force_query is True,
        it queries via SPARQL and caches the results.

        Args:
            qm (QueryManager): The query manager object used for making SPARQL queries.
            force_query (bool, optional): A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.
        Returns:
            List: list of records from the SQL database
        """
        if not force_query and self.check_local_cache():
            lod = self.fetch_from_local()
        else:
            lod = self.get_lod(qm)
            self.store()
        return lod

    def check_local_cache(self) -> bool:
        """
        Checks if there is data in the local cache (SQL database).

        Returns:
            bool: True if  there is at least one record in the local SQL cache table
        """
        with self.sql_db.get_session() as session:
            result = session.exec(select(self.clazz)).first()
            return result is not None

    def fetch_from_local(self) -> List[Dict]:
        """
        Fetches data from the local SQL database as list of dicts and entities.

        Returns:
            List[Dict]: List of records from the SQL database in dictionary form.
        """
        profiler = Profiler(f"fetch {self.query_name} from local", profile=self.debug)
        with self.sql_db.get_session() as session:
            self.entities = session.exec(select(self.clazz)).all()
            self.lod = [entity.dict() for entity in self.entities]
            if self.debug:
                print(f"Loaded {len(self.entities)} records from local cache")
        profiler.time()
        return self.lod

    def get_lod(self, qm: QueryManager) -> List[Dict]:
        """
        Fetches data using the SPARQL query specified by my query_name.

        Args:
            qm (QueryManager): The query manager object used for making SPARQL queries.

        Returns:
            List[Dict]: A list of dictionaries representing the data fetched.
        """
        profiler = Profiler(
            f"fetch {self.query_name} from SPARQL endpoint {self.sparql.url}",
            profile=self.debug,
        )
        query = qm.queriesByName[self.query_name]
        self.lod = self.sparql.queryAsListOfDicts(query.query)
        profiler.time()
        if self.debug:
            print(f"Found {len(self.lod)} records for {self.query_name}")
        return self.lod

    def to_entities(self, max_errors: int = None, cached: bool = True) -> List[Any]:
        """
        Converts records fetched from the LOD into entity instances, applying validation.

        Args:
            max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.
            cached(bool): if True use existing entries
        Returns:
            List[Any]: A list of entity instances that have passed validation.
        """
        if not cached:
            self.entities = []
            self.errors = []
        elif self.fetched:
            return self.entities

        error_records = []
        if max_errors is None:
            max_errors = self.max_errors
        for record in self.lod:
            try:
                entity = self.clazz.model_validate(record)
                self.entities.append(entity)
            except Exception as e:
                self.errors.append(e)
                error_records.append(record)
        error_count = len(self.errors)
        if error_count > max_errors:
            msg = f"found {error_count} errors > maximum allowed {max_errors} errors"
            if self.debug:
                print(msg)
                for i, e in enumerate(self.errors):
                    print(f"{i}:{str(e)} for \n{error_records[i]}")
            raise Exception(msg)
        self.fetched = True
        return self.entities

    def store(self, max_errors: int = None) -> List[Any]:
        """
        Stores the fetched data into the local SQL database.

        Args:
            max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.

        Returns:
            List[Any]: A list of entity instances that were stored in the database.

        """
        profiler = Profiler(f"store {self.query_name}", profile=self.debug)
        self.to_entities(max_errors=max_errors, cached=False)
        with self.sql_db.get_session() as session:
            session.add_all(self.entities)
            session.commit()
            if self.debug:
                print(f"Stored {len(self.entities)} records in local cache")
        profiler.time()
        return self.entities

`init(clazz, sparql, sql_db, query_name, max_errors=0, debug=False)`

Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string, query name, and an optional debug flag.

Parameters:

Name	Type	Description	Default
`clazz`	`Type[Any]`	The class reference for the type of objects managed by this manager.	required
`sparql`	`SPARQL`	a SPARQL endpoint.	required
`sql_db`	`str`	The connection string for the SQL database.	required
`query_name`	`str`	The name of the query to be executed.	required
`debug`	`bool`	Flag to enable debug mode. Defaults to False.	`False`

Source code in lodstorage/sql_cache.py

def __init__(
    self,
    clazz: Type[Any],
    sparql: SPARQL,
    sql_db: str,
    query_name: str,
    max_errors: int = 0,
    debug: bool = False,
):
    """
    Initializes the Manager with class reference, SPARQL endpoint URL, SQL database connection string,
    query name, and an optional debug flag.

    Args:
        clazz (Type[Any]): The class reference for the type of objects managed by this manager.
        sparql (SPARQL): a SPARQL endpoint.
        sql_db (str): The connection string for the SQL database.
        query_name (str): The name of the query to be executed.
        debug (bool, optional): Flag to enable debug mode. Defaults to False.
    """
    self.clazz = clazz
    self.sparql = sparql
    self.sql_db = sql_db
    self.query_name = query_name
    self.max_errors = max_errors
    self.debug = debug
    self.entities = []
    self.errors = []
    self.fetched = False
    # Ensure the table for the class exists
    clazz.metadata.create_all(self.sql_db.engine)

`check_local_cache()`

Checks if there is data in the local cache (SQL database).

Returns:

Name	Type	Description
`bool`	`bool`	True if there is at least one record in the local SQL cache table

Source code in lodstorage/sql_cache.py

def check_local_cache(self) -> bool:
    """
    Checks if there is data in the local cache (SQL database).

    Returns:
        bool: True if  there is at least one record in the local SQL cache table
    """
    with self.sql_db.get_session() as session:
        result = session.exec(select(self.clazz)).first()
        return result is not None

`fetch_from_local()`

Fetches data from the local SQL database as list of dicts and entities.

Returns:

Type	Description
`List[Dict]`	List[Dict]: List of records from the SQL database in dictionary form.

Source code in lodstorage/sql_cache.py

def fetch_from_local(self) -> List[Dict]:
    """
    Fetches data from the local SQL database as list of dicts and entities.

    Returns:
        List[Dict]: List of records from the SQL database in dictionary form.
    """
    profiler = Profiler(f"fetch {self.query_name} from local", profile=self.debug)
    with self.sql_db.get_session() as session:
        self.entities = session.exec(select(self.clazz)).all()
        self.lod = [entity.dict() for entity in self.entities]
        if self.debug:
            print(f"Loaded {len(self.entities)} records from local cache")
    profiler.time()
    return self.lod

`fetch_or_query(qm, force_query=False)`

Fetches data from the local cache if available. If the data is not in the cache or if force_query is True, it queries via SPARQL and caches the results.

Parameters:

Name	Type	Description	Default
`qm`	`QueryManager`	The query manager object used for making SPARQL queries.	required
`force_query`	`bool`	A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.	`False`

Returns: List: list of records from the SQL database

Source code in lodstorage/sql_cache.py

def fetch_or_query(self, qm, force_query=False) -> List[Dict]:
    """
    Fetches data from the local cache if available.
    If the data is not in the cache or if force_query is True,
    it queries via SPARQL and caches the results.

    Args:
        qm (QueryManager): The query manager object used for making SPARQL queries.
        force_query (bool, optional): A flag to force querying via SPARQL even if the data exists in the local cache. Defaults to False.
    Returns:
        List: list of records from the SQL database
    """
    if not force_query and self.check_local_cache():
        lod = self.fetch_from_local()
    else:
        lod = self.get_lod(qm)
        self.store()
    return lod

`get_lod(qm)`

Fetches data using the SPARQL query specified by my query_name.

Parameters:

Name	Type	Description	Default
`qm`	`QueryManager`	The query manager object used for making SPARQL queries.	required

Returns:

Type	Description
`List[Dict]`	List[Dict]: A list of dictionaries representing the data fetched.

Source code in lodstorage/sql_cache.py

def get_lod(self, qm: QueryManager) -> List[Dict]:
    """
    Fetches data using the SPARQL query specified by my query_name.

    Args:
        qm (QueryManager): The query manager object used for making SPARQL queries.

    Returns:
        List[Dict]: A list of dictionaries representing the data fetched.
    """
    profiler = Profiler(
        f"fetch {self.query_name} from SPARQL endpoint {self.sparql.url}",
        profile=self.debug,
    )
    query = qm.queriesByName[self.query_name]
    self.lod = self.sparql.queryAsListOfDicts(query.query)
    profiler.time()
    if self.debug:
        print(f"Found {len(self.lod)} records for {self.query_name}")
    return self.lod

`store(max_errors=None)`

Stores the fetched data into the local SQL database.

Parameters:

Name	Type	Description	Default
`max_errors`	`int`	Maximum allowed validation errors. Defaults to 0.	`None`

Returns:

Type	Description
`List[Any]`	List[Any]: A list of entity instances that were stored in the database.

Source code in lodstorage/sql_cache.py

def store(self, max_errors: int = None) -> List[Any]:
    """
    Stores the fetched data into the local SQL database.

    Args:
        max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.

    Returns:
        List[Any]: A list of entity instances that were stored in the database.

    """
    profiler = Profiler(f"store {self.query_name}", profile=self.debug)
    self.to_entities(max_errors=max_errors, cached=False)
    with self.sql_db.get_session() as session:
        session.add_all(self.entities)
        session.commit()
        if self.debug:
            print(f"Stored {len(self.entities)} records in local cache")
    profiler.time()
    return self.entities

`to_entities(max_errors=None, cached=True)`

Converts records fetched from the LOD into entity instances, applying validation.

Parameters:

Name	Type	Description	Default
`max_errors`	`int`	Maximum allowed validation errors. Defaults to 0.	`None`
`cached(bool)`		if True use existing entries	required

Returns: List[Any]: A list of entity instances that have passed validation.

Source code in lodstorage/sql_cache.py

def to_entities(self, max_errors: int = None, cached: bool = True) -> List[Any]:
    """
    Converts records fetched from the LOD into entity instances, applying validation.

    Args:
        max_errors (int, optional): Maximum allowed validation errors. Defaults to 0.
        cached(bool): if True use existing entries
    Returns:
        List[Any]: A list of entity instances that have passed validation.
    """
    if not cached:
        self.entities = []
        self.errors = []
    elif self.fetched:
        return self.entities

    error_records = []
    if max_errors is None:
        max_errors = self.max_errors
    for record in self.lod:
        try:
            entity = self.clazz.model_validate(record)
            self.entities.append(entity)
        except Exception as e:
            self.errors.append(e)
            error_records.append(record)
    error_count = len(self.errors)
    if error_count > max_errors:
        msg = f"found {error_count} errors > maximum allowed {max_errors} errors"
        if self.debug:
            print(msg)
            for i, e in enumerate(self.errors):
                print(f"{i}:{str(e)} for \n{error_records[i]}")
        raise Exception(msg)
    self.fetched = True
    return self.entities

`SqlDB`

general SQL database access using SQL Alchemy

Source code in lodstorage/sql_cache.py

class SqlDB:
    """
    general SQL database access using SQL Alchemy
    """

    def __init__(self, sqlite_file_path: str, debug: bool = False):
        self.debug = debug
        sqlite_url = f"sqlite:///{sqlite_file_path}"
        connect_args = {"check_same_thread": False}
        self.engine = create_engine(sqlite_url, echo=debug, connect_args=connect_args)

    def get_session(self) -> Session:
        """
        Provide a session for database operations.

        Returns:
            Session: A SQLAlchemy Session object bound to the engine for database operations.
        """
        return Session(bind=self.engine)

`get_session()`

Provide a session for database operations.

Returns:

Name	Type	Description
`Session`	`Session`	A SQLAlchemy Session object bound to the engine for database operations.

Source code in lodstorage/sql_cache.py

def get_session(self) -> Session:
    """
    Provide a session for database operations.

    Returns:
        Session: A SQLAlchemy Session object bound to the engine for database operations.
    """
    return Session(bind=self.engine)

`storageconfig`

Created on 2020-08-29

@author: wf

`StorageConfig`

Bases: object

a storage configuration

Source code in lodstorage/storageconfig.py

class StorageConfig(object):
    """
    a storage configuration
    """

    def getCachePath(self, ensureExists=True) -> str:
        """
        get the path to the default cache

        Args:
            name(str): the name of the cache to use
        """

        cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

        if ensureExists:
            if not os.path.exists(cachedir):
                os.makedirs(cachedir)
        return cachedir

    def __init__(
        self,
        mode=StoreMode.SQL,
        cacheRootDir: str = None,
        cacheDirName: str = "lodstorage",
        cacheFile=None,
        withShowProgress=True,
        profile=True,
        debug=False,
        errorDebug=True,
    ):
        """
        Constructor

        Args:
            mode(StoreMode): the storage mode e.g. sql
            cacheRootDir(str): the cache root directory to use - if None the home directory will be used
            cacheFile(string): the common cacheFile to use (if any)
            withShowProgress(boolean): True if progress should be shown
            profile(boolean): True if timing / profiling information should be shown
            debug(boolean): True if debugging information should be shown
            errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
        """
        if cacheRootDir is None:
            home = str(Path.home())
            self.cacheRootDir = f"{home}"
        else:
            self.cacheRootDir = cacheRootDir
        self.cacheDirName = cacheDirName
        self.mode = mode
        self.cacheFile = cacheFile
        self.profile = profile
        self.withShowProgress = withShowProgress
        self.debug = debug
        self.errorDebug = errorDebug

    @staticmethod
    def getDefault(debug=False):
        return StorageConfig.getSQL(debug)

    @staticmethod
    def getSQL(debug=False):
        config = StorageConfig(mode=StoreMode.SQL, debug=debug)
        config.tableName = None
        return config

    @staticmethod
    def getJSON(debug=False):
        config = StorageConfig(mode=StoreMode.JSON, debug=debug)
        return config

    @staticmethod
    def getJsonPickle(debug=False):
        config = StorageConfig(mode=StoreMode.JSONPICKLE, debug=debug)
        return config

    @staticmethod
    def getSPARQL(prefix, endpoint, host, debug=False):
        config = StorageConfig(mode=StoreMode.SPARQL, debug=debug)
        config.prefix = prefix
        config.host = host
        config.endpoint = endpoint
        return config

    @staticmethod
    def getYaml(debug=False):
        config = StorageConfig(mode=StoreMode.YAML, debug=debug)
        return config

`init(mode=StoreMode.SQL, cacheRootDir=None, cacheDirName='lodstorage', cacheFile=None, withShowProgress=True, profile=True, debug=False, errorDebug=True)`

Constructor

Parameters:

Name	Description	Default
`mode(StoreMode)`	the storage mode e.g. sql	required
`cacheRootDir(str)`	the cache root directory to use - if None the home directory will be used	required
`cacheFile(string)`	the common cacheFile to use (if any)	required
`withShowProgress(boolean)`	True if progress should be shown	required
`profile(boolean)`	True if timing / profiling information should be shown	required
`debug(boolean)`	True if debugging information should be shown	required
`errorDebug(boolean)`	True if debug info should be provided on errors (should not be used for production since it might reveal data)	required

Source code in lodstorage/storageconfig.py

def __init__(
    self,
    mode=StoreMode.SQL,
    cacheRootDir: str = None,
    cacheDirName: str = "lodstorage",
    cacheFile=None,
    withShowProgress=True,
    profile=True,
    debug=False,
    errorDebug=True,
):
    """
    Constructor

    Args:
        mode(StoreMode): the storage mode e.g. sql
        cacheRootDir(str): the cache root directory to use - if None the home directory will be used
        cacheFile(string): the common cacheFile to use (if any)
        withShowProgress(boolean): True if progress should be shown
        profile(boolean): True if timing / profiling information should be shown
        debug(boolean): True if debugging information should be shown
        errorDebug(boolean): True if debug info should be provided on errors (should not be used for production since it might reveal data)
    """
    if cacheRootDir is None:
        home = str(Path.home())
        self.cacheRootDir = f"{home}"
    else:
        self.cacheRootDir = cacheRootDir
    self.cacheDirName = cacheDirName
    self.mode = mode
    self.cacheFile = cacheFile
    self.profile = profile
    self.withShowProgress = withShowProgress
    self.debug = debug
    self.errorDebug = errorDebug

`getCachePath(ensureExists=True)`

get the path to the default cache

Parameters:

Name	Type	Description	Default
`name(str)`		the name of the cache to use	required

Source code in lodstorage/storageconfig.py

def getCachePath(self, ensureExists=True) -> str:
    """
    get the path to the default cache

    Args:
        name(str): the name of the cache to use
    """

    cachedir = f"{self.cacheRootDir}/.{self.cacheDirName}"

    if ensureExists:
        if not os.path.exists(cachedir):
            os.makedirs(cachedir)
    return cachedir

`StoreMode`

Bases: Enum

possible supported storage modes

Source code in lodstorage/storageconfig.py

class StoreMode(Enum):
    """
    possible supported storage modes
    """

    JSONPICKLE = 1  # JSON Pickle
    JSON = 2
    SQL = 3
    SPARQL = 4
    YAML = 5

`sync`

Created on 2023-12-27

@author: wf

`Sync`

A class to help with synchronization between two sets of data, each represented as a list of dictionaries.

Source code in lodstorage/sync.py

class Sync:
    """
    A class to help with synchronization between two sets of data, each represented as a list of dictionaries.
    """

    def __init__(self, pair: SyncPair):
        """
        Initialize the Sync class with the given Synchronization Pair.
        """
        self.pair = pair
        self.sync_dict = self._create_sync_dict()
        self.directions = ["←", "↔", "→"]
        self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

    def handle_direction_error(self, direction: str):
        invalid_direction_msg = (
            f"Invalid direction '{direction}'. Use {', '.join(self.directions)}."
        )
        raise ValueError(invalid_direction_msg)

    def handle_side_error(self, side: str):
        invalid_side_msg = f"Invalid side '{side}'. Use {', '.join(self.sides['left'])} for left or {', '.join(self.sides['right'])} for right."
        raise ValueError(invalid_side_msg)

    def _create_sync_dict(self) -> dict:
        """
        Create a dictionary representing the synchronization state between left and right data sources.
        """
        l_keys = {d[self.pair.l_key] for d in self.pair.l_data if self.pair.l_key in d}
        r_keys = {d[self.pair.r_key] for d in self.pair.r_data if self.pair.r_key in d}

        sync_dict = {
            "←": r_keys - l_keys,  # Present in right but not in left
            "↔": l_keys.intersection(r_keys),  # Present in both
            "→": l_keys - r_keys,  # Present in left but not in right
        }
        return sync_dict

    def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
        """
        Retrieves a record by primary key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            pkey (str): The primary key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.
        """
        record = None
        if side in self.sides["left"]:  # retrieve from left
            record = self.pair.l_by_pkey.get(pkey)
        elif side in self.sides["right"]:  # retrieve from right
            record = self.pair.r_by_pkey.get(pkey)
        else:
            self.handle_side_error(side)
        return record

    def get_record_by_key(self, side: str, key: str) -> dict:
        """
        Retrieves a record by the given unique key from the appropriate data source as specified by direction.

        Args:
            side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
            key (str): The unique key of the record to retrieve.

        Returns:
            Optional[Dict[str, Any]]: The record if found, otherwise None.

        Raises:
            ValueError: If the provided direction is invalid.
        """
        record = None
        if side in ["←", "l", "left"]:
            record = next(
                (item for item in self.pair.l_data if item[self.pair.l_key] == key),
                None,
            )
        elif side in ["→", "r", "right"]:
            record = next(
                (item for item in self.pair.r_data if item[self.pair.r_key] == key),
                None,
            )
        else:
            self.handle_side_error(side)
        return record

    def get_keys(self, direction: str) -> set:
        """
        Get the keys for a given direction of synchronization.
        """
        if direction in self.sync_dict:
            return self.sync_dict[direction]
        else:
            self.handle_direction_error(direction)

    def status_table(self, tablefmt: str = "grid") -> str:
        """
        Create a table representing the synchronization status.
        """
        total_records = sum(len(keys) for keys in self.sync_dict.values())
        if total_records == 0:  # Avoid division by zero
            total_records = 1

        table_data = []
        for direction, keys in self.sync_dict.items():
            num_records = len(keys)
            percentage = (num_records / total_records) * 100
            table_data.append(
                {
                    "left": self.pair.l_name,
                    "↔": direction,
                    "right": self.pair.r_name,
                    "#": num_records,
                    "%": f"{percentage:7.2f}%",
                }
            )

        markup = tabulate(
            table_data,
            headers="keys",
            tablefmt=tablefmt,
            colalign=("right", "center", "left", "right", "right"),
        )
        return markup

`init(pair)`

Initialize the Sync class with the given Synchronization Pair.

Source code in lodstorage/sync.py

def __init__(self, pair: SyncPair):
    """
    Initialize the Sync class with the given Synchronization Pair.
    """
    self.pair = pair
    self.sync_dict = self._create_sync_dict()
    self.directions = ["←", "↔", "→"]
    self.sides = {"left": ["←", "l", "left"], "right": ["→", "r", "right"]}

`get_keys(direction)`

Get the keys for a given direction of synchronization.

Source code in lodstorage/sync.py

def get_keys(self, direction: str) -> set:
    """
    Get the keys for a given direction of synchronization.
    """
    if direction in self.sync_dict:
        return self.sync_dict[direction]
    else:
        self.handle_direction_error(direction)

`get_record_by_key(side, key)`

Retrieves a record by the given unique key from the appropriate data source as specified by direction.

Parameters:

Name	Type	Description	Default
`side`	`str`	The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.	required
`key`	`str`	The unique key of the record to retrieve.	required

Returns:

Type	Description
`dict`	Optional[Dict[str, Any]]: The record if found, otherwise None.

Raises:

Type	Description
`ValueError`	If the provided direction is invalid.

Source code in lodstorage/sync.py

def get_record_by_key(self, side: str, key: str) -> dict:
    """
    Retrieves a record by the given unique key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        key (str): The unique key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.

    Raises:
        ValueError: If the provided direction is invalid.
    """
    record = None
    if side in ["←", "l", "left"]:
        record = next(
            (item for item in self.pair.l_data if item[self.pair.l_key] == key),
            None,
        )
    elif side in ["→", "r", "right"]:
        record = next(
            (item for item in self.pair.r_data if item[self.pair.r_key] == key),
            None,
        )
    else:
        self.handle_side_error(side)
    return record

`get_record_by_pkey(side, pkey)`

Retrieves a record by primary key from the appropriate data source as specified by direction.

Parameters:

Name	Type	Description	Default
`side`	`str`	The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.	required
`pkey`	`str`	The primary key of the record to retrieve.	required

Returns:

Type	Description
`Optional[Dict[str, Any]]`	Optional[Dict[str, Any]]: The record if found, otherwise None.

Source code in lodstorage/sync.py

def get_record_by_pkey(self, side: str, pkey: str) -> Optional[Dict[str, Any]]:
    """
    Retrieves a record by primary key from the appropriate data source as specified by direction.

    Args:
        side (str): The side of data source, "←","l" or "left" for left and "→","r" or "right" for right.
        pkey (str): The primary key of the record to retrieve.

    Returns:
        Optional[Dict[str, Any]]: The record if found, otherwise None.
    """
    record = None
    if side in self.sides["left"]:  # retrieve from left
        record = self.pair.l_by_pkey.get(pkey)
    elif side in self.sides["right"]:  # retrieve from right
        record = self.pair.r_by_pkey.get(pkey)
    else:
        self.handle_side_error(side)
    return record

`status_table(tablefmt='grid')`

Create a table representing the synchronization status.

Source code in lodstorage/sync.py

def status_table(self, tablefmt: str = "grid") -> str:
    """
    Create a table representing the synchronization status.
    """
    total_records = sum(len(keys) for keys in self.sync_dict.values())
    if total_records == 0:  # Avoid division by zero
        total_records = 1

    table_data = []
    for direction, keys in self.sync_dict.items():
        num_records = len(keys)
        percentage = (num_records / total_records) * 100
        table_data.append(
            {
                "left": self.pair.l_name,
                "↔": direction,
                "right": self.pair.r_name,
                "#": num_records,
                "%": f"{percentage:7.2f}%",
            }
        )

    markup = tabulate(
        table_data,
        headers="keys",
        tablefmt=tablefmt,
        colalign=("right", "center", "left", "right", "right"),
    )
    return markup

`SyncPair` `dataclass`

A class to represent a pair of data sources for synchronization.

Attributes: title (str): The title of the synchronization pair. l_name (str): Name of the left data source (e.g., 'local'). r_name (str): Name of the right data source (e.g., 'wikidata'). l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source. r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source. l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization. r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization. l_pkey(str): the primary key field of the left data source r_pkey(str): the primary key field of the right data source

Example usage: l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}] r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}] pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r') sync = Sync(pair) print(sync.status_table())

Source code in lodstorage/sync.py

@dataclass
class SyncPair:
    """
       A class to represent a pair of data sources for synchronization.

       Attributes:
           title (str): The title of the synchronization pair.
           l_name (str): Name of the left data source (e.g., 'local').
           r_name (str): Name of the right data source (e.g., 'wikidata').
           l_data (List[Dict[str, Any]]): A list of dictionaries from the left data source.
           r_data (List[Dict[str, Any]]): A list of dictionaries from the right data source.
           l_key (str): The field name in the left data source dictionaries used as a unique identifier for synchronization.
           r_key (str): The field name in the right data source dictionaries used as a unique identifier for synchronization.
           l_pkey(str): the primary key field of the left data source
           r_pkey(str): the primary key field of the right data source

    Example usage:
    l_data = [{'id_l': '1', 'value': 'a'}, {'id_l': '2', 'value': 'b'}]
    r_data = [{'id_r': '2', 'value': 'b'}, {'id_r': '3', 'value': 'c'}]
    pair = SyncPair("Title", "local", "wikidata", l_data, r_data, 'id_l', 'id_r')
    sync = Sync(pair)
    print(sync.status_table())
    """

    title: str
    l_name: str
    r_name: str
    l_data: List[Dict[str, Any]]
    r_data: List[Dict[str, Any]]
    l_key: str
    r_key: str
    l_pkey: Optional[str] = None
    r_pkey: Optional[str] = None
    # Add dictionaries for quick primary key access
    l_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)
    r_by_pkey: Dict[str, Dict[str, Any]] = field(init=False)

    def __post_init__(self):
        # Set the l_pkey to l_key if not provided
        if self.l_pkey is None:
            self.l_pkey = self.l_key
        # Set the r_pkey to r_key if not provided
        if self.r_pkey is None:
            self.r_pkey = self.r_key
        self.l_by_pkey = {d[self.l_pkey]: d for d in self.l_data if self.l_pkey in d}
        self.r_by_pkey = {d[self.r_pkey]: d for d in self.r_data if self.r_pkey in d}

`tabulateCounter`

Created on 2021-06-13

@author: wf

`TabulateCounter`

Bases: object

helper for tabulating Counters

Source code in lodstorage/tabulateCounter.py

class TabulateCounter(object):
    """
    helper for tabulating Counters
    """

    def __init__(self, counter):
        """
        Constructor
        """
        self.counter = counter

    def mostCommonTable(
        self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
    ):
        """
        get the most common Table
        """
        bins = len(self.counter.keys())
        limit = min(bins, limit)
        total = sum(self.counter.values())
        binTable = [("total", bins, total)]
        for i, bintuple in enumerate(self.counter.most_common(limit)):
            key, count = bintuple
            binTable.append((i + 1, key, count, count / total * 100.0))

        table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
        return table

`init(counter)`

Constructor

Source code in lodstorage/tabulateCounter.py

def __init__(self, counter):
    """
    Constructor
    """
    self.counter = counter

`mostCommonTable(headers=['#', 'key', 'count', '%'], tablefmt='pretty', limit=50)`

get the most common Table

Source code in lodstorage/tabulateCounter.py

def mostCommonTable(
    self, headers=["#", "key", "count", "%"], tablefmt="pretty", limit=50
):
    """
    get the most common Table
    """
    bins = len(self.counter.keys())
    limit = min(bins, limit)
    total = sum(self.counter.values())
    binTable = [("total", bins, total)]
    for i, bintuple in enumerate(self.counter.most_common(limit)):
        key, count = bintuple
        binTable.append((i + 1, key, count, count / total * 100.0))

    table = tabulate(binTable, headers=headers, tablefmt=tablefmt, floatfmt=".2f")
    return table

`uml`

Created on 2020-09-04

@author: wf

`UML`

Bases: object

UML diagrams via plantuml

Source code in lodstorage/uml.py

class UML(object):
    """
    UML diagrams via plantuml

    """

    skinparams = """
' BITPlan Corporate identity skin params
' Copyright (c) 2015-2020 BITPlan GmbH
' see http://wiki.bitplan.com/PlantUmlSkinParams#BITPlanCI
' skinparams generated by com.bitplan.restmodelmanager
skinparam note {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam component {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam package {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam usecase {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam activity {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam classAttribute {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam interface {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam class {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
skinparam object {
  BackGroundColor #FFFFFF
  FontSize 12
  ArrowColor #FF8000
  BorderColor #FF8000
  FontColor black
  FontName Technical
}
hide Circle
' end of skinparams '
"""

    def __init__(self, debug=False):
        """
        Constructor
        Args:
            debug(boolean): True if debug information should be shown
        """
        self.debug = debug

    def tableListToPlantUml(
        self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
    ):
        """
        convert tableList to PlantUml notation

        Args:
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist
        """
        uml = ""
        indent = ""
        inherit = ""
        if title is not None:
            uml += "title\n%s\nend title\n" % title
        if packageName is not None:
            uml += "package %s {\n" % packageName
            indent = "  "
        if generalizeTo is not None:
            generalTable = Schema.getGeneral(tableList, generalizeTo)
            for table in tableList:
                inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
            tableList.insert(0, generalTable)
        for table in tableList:
            colUml = ""
            sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
            for col in sortedColumns:
                mandatory = "*" if col["notnull"] == 1 else ""
                pk = "<<PK>>" if col["pk"] == 1 else ""
                colName = col["name"]
                colType = col["type"]
                if "link" in col:
                    colName = col["link"]
                colUml += "%s %s%s : %s %s\n" % (
                    indent,
                    mandatory,
                    colName,
                    colType,
                    pk,
                )
            tableName = table["name"]
            if "notes" in table:
                uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
            uml += "%sclass %s << Entity >> {\n%s%s}\n" % (
                indent,
                tableName,
                colUml,
                indent,
            )
        uml += inherit
        if packageName is not None:
            uml += "}\n"
        if withSkin:
            uml += UML.skinparams
        return uml

    def mergeSchema(
        self,
        schemaManager,
        tableList,
        title=None,
        packageName=None,
        generalizeTo=None,
        withSkin=True,
    ):
        """
        merge Schema and tableList to PlantUml notation

        Args:
            schemaManager(SchemaManager): a schema manager to be used
            tableList(list): the tableList list of Dicts from getTableList() to convert
            title(string): optional title to be added
            packageName(string): optional packageName to be added
            generalizeTo(string): optional name of a general table to be derived
            withSkin(boolean): if True add default BITPlan skin parameters

        Returns:
            string: the Plantuml notation for the entities in columns of the given tablelist

        """
        if schemaManager is not None:
            for table in tableList:
                if "schema" in table:
                    schema = schemaManager.schemasByName[table["schema"]]
                    url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                    url = url.replace(" ", "_")  # mediawiki
                    instanceNote = ""
                    if "instances" in table:
                        instanceNote = "\n%d instances " % (table["instances"])
                    table["notes"] = """[[%s %s]]%s""" % (
                        url,
                        schema.name,
                        instanceNote,
                    )
                    for col in table["columns"]:
                        colName = col["name"]
                        if colName in schema.propsByName:
                            prop = schema.propsByName[colName]
                            if prop.iri is not None:
                                tooltip = ""
                                if prop.definition is not None:
                                    tooltip = "{%s}" % prop.definition
                                col["link"] = "[[%s%s %s]]" % (
                                    prop.iri,
                                    tooltip,
                                    colName,
                                )
                                col["special"] = True  # keep column even if generalized
                    pass
        plantuml = self.tableListToPlantUml(
            tableList,
            title=title,
            packageName=packageName,
            generalizeTo=generalizeTo,
            withSkin=withSkin,
        )
        return plantuml

`init(debug=False)`

Constructor Args: debug(boolean): True if debug information should be shown

Source code in lodstorage/uml.py

def __init__(self, debug=False):
    """
    Constructor
    Args:
        debug(boolean): True if debug information should be shown
    """
    self.debug = debug

`mergeSchema(schemaManager, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)`

merge Schema and tableList to PlantUml notation

Parameters:

Name	Description	Default
`schemaManager(SchemaManager)`	a schema manager to be used	required
`tableList(list)`	the tableList list of Dicts from getTableList() to convert	required
`title(string)`	optional title to be added	required
`packageName(string)`	optional packageName to be added	required
`generalizeTo(string)`	optional name of a general table to be derived	required
`withSkin(boolean)`	if True add default BITPlan skin parameters	required

Returns:

Name	Type	Description
`string`		the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py

def mergeSchema(
    self,
    schemaManager,
    tableList,
    title=None,
    packageName=None,
    generalizeTo=None,
    withSkin=True,
):
    """
    merge Schema and tableList to PlantUml notation

    Args:
        schemaManager(SchemaManager): a schema manager to be used
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist

    """
    if schemaManager is not None:
        for table in tableList:
            if "schema" in table:
                schema = schemaManager.schemasByName[table["schema"]]
                url = "%s/%s" % (schemaManager.baseUrl, schema.name)
                url = url.replace(" ", "_")  # mediawiki
                instanceNote = ""
                if "instances" in table:
                    instanceNote = "\n%d instances " % (table["instances"])
                table["notes"] = """[[%s %s]]%s""" % (
                    url,
                    schema.name,
                    instanceNote,
                )
                for col in table["columns"]:
                    colName = col["name"]
                    if colName in schema.propsByName:
                        prop = schema.propsByName[colName]
                        if prop.iri is not None:
                            tooltip = ""
                            if prop.definition is not None:
                                tooltip = "{%s}" % prop.definition
                            col["link"] = "[[%s%s %s]]" % (
                                prop.iri,
                                tooltip,
                                colName,
                            )
                            col["special"] = True  # keep column even if generalized
                pass
    plantuml = self.tableListToPlantUml(
        tableList,
        title=title,
        packageName=packageName,
        generalizeTo=generalizeTo,
        withSkin=withSkin,
    )
    return plantuml

`tableListToPlantUml(tableList, title=None, packageName=None, generalizeTo=None, withSkin=True)`

convert tableList to PlantUml notation

Parameters:

Name	Description	Default
`tableList(list)`	the tableList list of Dicts from getTableList() to convert	required
`title(string)`	optional title to be added	required
`packageName(string)`	optional packageName to be added	required
`generalizeTo(string)`	optional name of a general table to be derived	required
`withSkin(boolean)`	if True add default BITPlan skin parameters	required

Returns:

Name	Type	Description
`string`		the Plantuml notation for the entities in columns of the given tablelist

Source code in lodstorage/uml.py

def tableListToPlantUml(
    self, tableList, title=None, packageName=None, generalizeTo=None, withSkin=True
):
    """
    convert tableList to PlantUml notation

    Args:
        tableList(list): the tableList list of Dicts from getTableList() to convert
        title(string): optional title to be added
        packageName(string): optional packageName to be added
        generalizeTo(string): optional name of a general table to be derived
        withSkin(boolean): if True add default BITPlan skin parameters

    Returns:
        string: the Plantuml notation for the entities in columns of the given tablelist
    """
    uml = ""
    indent = ""
    inherit = ""
    if title is not None:
        uml += "title\n%s\nend title\n" % title
    if packageName is not None:
        uml += "package %s {\n" % packageName
        indent = "  "
    if generalizeTo is not None:
        generalTable = Schema.getGeneral(tableList, generalizeTo)
        for table in tableList:
            inherit += "%s%s <|-- %s\n" % (indent, generalizeTo, table["name"])
        tableList.insert(0, generalTable)
    for table in tableList:
        colUml = ""
        sortedColumns = sorted(table["columns"], key=lambda col: col["name"])
        for col in sortedColumns:
            mandatory = "*" if col["notnull"] == 1 else ""
            pk = "<<PK>>" if col["pk"] == 1 else ""
            colName = col["name"]
            colType = col["type"]
            if "link" in col:
                colName = col["link"]
            colUml += "%s %s%s : %s %s\n" % (
                indent,
                mandatory,
                colName,
                colType,
                pk,
            )
        tableName = table["name"]
        if "notes" in table:
            uml += "Note top of %s\n%s\nEnd note\n" % (tableName, table["notes"])
        uml += "%sclass %s << Entity >> {\n%s%s}\n" % (
            indent,
            tableName,
            colUml,
            indent,
        )
    uml += inherit
    if packageName is not None:
        uml += "}\n"
    if withSkin:
        uml += UML.skinparams
    return uml

`version`

Created on 2022-03-06

@author: wf

`Version`

Bases: object

Version handling for pyLoDStorage

Source code in lodstorage/version.py

class Version(object):
    """
    Version handling for pyLoDStorage
    """

    name = "pylodstorage"
    version = lodstorage.__version__
    date = "2020-09-10"
    updated = "2024-08-02"
    description = "python List of Dict (Table) Storage library"

`xml`

Created on 2022-06-20

see https://github.com/tyleradams/json-toolkit https://stackoverflow.com/questions/36021526/converting-an-array-dict-to-xml-in-python

@author: tyleradams @author: wf

`Lod2Xml`

convert a list of dicts to XML

Source code in lodstorage/xml.py

class Lod2Xml:
    """
    convert a list of dicts to XML
    """

    def __init__(
        self, lod, root: str = "root", node_name: callable = (lambda x: "node")
    ):
        """
        construct me with the given list of dicts

        Args:
            lod (list): the list of dicts to convert to XML
            root (str): the name of the root nod
            item_name (func): the function to use to calculate node names
        """
        self.lod = lod
        self.root = root
        self.item_name = node_name

    def asXml(self, pretty: bool = True):
        """
        convert result to XML

        Args:
            pretty (bool): if True pretty print the result

        """
        xml = dicttoxml(
            self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
        )
        if pretty:
            dom = parseString(xml)
            prettyXml = dom.toprettyxml()
        else:
            prettyXml = xml
        return prettyXml

`init(lod, root='root', node_name=lambda x: 'node')`

construct me with the given list of dicts

Parameters:

Name	Type	Description	Default
`lod`	`list`	the list of dicts to convert to XML	required
`root`	`str`	the name of the root nod	`'root'`
`item_name`	`func`	the function to use to calculate node names	required

Source code in lodstorage/xml.py

def __init__(
    self, lod, root: str = "root", node_name: callable = (lambda x: "node")
):
    """
    construct me with the given list of dicts

    Args:
        lod (list): the list of dicts to convert to XML
        root (str): the name of the root nod
        item_name (func): the function to use to calculate node names
    """
    self.lod = lod
    self.root = root
    self.item_name = node_name

`asXml(pretty=True)`

convert result to XML

Parameters:

Name	Type	Description	Default
`pretty`	`bool`	if True pretty print the result	`True`

Source code in lodstorage/xml.py

def asXml(self, pretty: bool = True):
    """
    convert result to XML

    Args:
        pretty (bool): if True pretty print the result

    """
    xml = dicttoxml(
        self.lod, custom_root=self.root, item_func=self.item_name, attr_type=False
    )
    if pretty:
        dom = parseString(xml)
        prettyXml = dom.toprettyxml()
    else:
        prettyXml = xml
    return prettyXml

`yamlable`

Created on 2023-12-08, Extended on 2023-16-12 and 2024-01-25

@author: wf, ChatGPT

Prompts for the development and extension of the 'YamlAble' class within the 'yamable' module:

Develop 'YamlAble' class in 'yamable' module. It should convert dataclass instances to/from YAML.
Implement methods for YAML block scalar style and exclude None values in 'YamlAble' class.
Add functionality to remove None values from dataclass instances before YAML conversion.
Ensure 'YamlAble' processes only dataclass instances, with error handling for non-dataclass objects.
Extend 'YamlAble' for JSON serialization and deserialization.
Add methods for saving/loading dataclass instances to/from YAML and JSON files in 'YamlAble'.
Implement loading of dataclass instances from URLs for both YAML and JSON in 'YamlAble'.
Write tests for 'YamlAble' within the pyLodStorage context. Use 'samples 2' example from pyLoDStorage https://github.com/WolfgangFahl/pyLoDStorage/blob/master/lodstorage/sample2.py as a reference.
Ensure tests cover YAML/JSON serialization, deserialization, and file I/O operations, using the sample-based approach..
Use Google-style docstrings, comments, and type hints in 'YamlAble' class and tests.
Adhere to instructions and seek clarification for any uncertainties.
Add @lod_storable annotation support that will automatically YamlAble support and add @dataclass and @dataclass_json prerequisite behavior to a class

`DateConvert`

date converter

Source code in lodstorage/yamlable.py

class DateConvert:
    """
    date converter
    """

    @classmethod
    def iso_date_to_datetime(cls, iso_date: str) -> datetime.date:
        date = datetime.strptime(iso_date, "%Y-%m-%d").date() if iso_date else None
        return date

`YamlAble`

Bases: Generic[T]

An extended YAML handler class for converting dataclass objects to and from YAML format, and handling loading from and saving to files and URLs.

Source code in lodstorage/yamlable.py

class YamlAble(Generic[T]):
    """
    An extended YAML handler class for converting dataclass objects to and from YAML format,
    and handling loading from and saving to files and URLs.
    """

    def _yaml_setup(self):
        """
        Initializes the YamAble handler, setting up custom representers and preparing it for various operations.
        """
        if not is_dataclass(self):
            raise ValueError("I must be a dataclass instance.")
        if not hasattr(self, "_yaml_dumper"):
            self._yaml_dumper = yaml.Dumper
            self._yaml_dumper.ignore_aliases = lambda *_args: True
            self._yaml_dumper.add_representer(type(None), self.represent_none)
            self._yaml_dumper.add_representer(str, self.represent_literal)

    def represent_none(self, _, __) -> yaml.Node:
        """
        Custom representer for ignoring None values in the YAML output.
        """
        return self._yaml_dumper.represent_scalar("tag:yaml.org,2002:null", "")

    def represent_literal(self, dumper: yaml.Dumper, data: str) -> yaml.Node:
        """
        Custom representer for block scalar style for strings.
        """
        if "\n" in data:
            return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
        return dumper.represent_scalar("tag:yaml.org,2002:str", data)

    def to_yaml(
        self,
        ignore_none: bool = True,
        ignore_underscore: bool = True,
        allow_unicode: bool = True,
        sort_keys: bool = False,
    ) -> str:
        """
        Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables,
        and using block scalar style for strings.

        Args:
            ignore_none: Flag to indicate whether None values should be removed from the YAML output.
            ignore_underscore: Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.
            allow_unicode: Flag to indicate whether to allow unicode characters in the output.
            sort_keys: Flag to indicate whether to sort the dictionary keys in the output.

        Returns:
            A string representation of the dataclass object in YAML format.
        """
        obj_dict = asdict(self)
        self._yaml_setup()
        clean_dict = self.remove_ignored_values(
            obj_dict, ignore_none, ignore_underscore
        )
        yaml_str = yaml.dump(
            clean_dict,
            Dumper=self._yaml_dumper,
            default_flow_style=False,
            allow_unicode=allow_unicode,
            sort_keys=sort_keys,
        )
        return yaml_str

    @classmethod
    def from_yaml(cls: Type[T], yaml_str: str) -> T:
        """
        Deserializes a YAML string to a dataclass instance.

        Args:
            yaml_str (str): A string containing YAML formatted data.

        Returns:
            T: An instance of the dataclass.
        """
        data: dict[str, Any] = yaml.safe_load(yaml_str)
        instance: T = cls.from_dict(data)
        return instance

    @classmethod
    def load_from_yaml_file(cls: Type[T], filename: str) -> T:
        """
        Loads a dataclass instance from a YAML file.

        Args:
            filename (str): The path to the YAML file.

        Returns:
            T: An instance of the dataclass.
        """
        with open(filename, "r") as file:
            yaml_str: str = file.read()
        instance: T = cls.from_yaml(yaml_str)
        return instance

    @classmethod
    def load_from_yaml_url(cls: Type[T], url: str) -> T:
        """
        Loads a dataclass instance from a YAML string obtained from a URL.

        Args:
            url (str): The URL pointing to the YAML data.

        Returns:
            T: An instance of the dataclass.
        """
        yaml_str: str = cls.read_from_url(url)
        instance: T = cls.from_yaml(yaml_str)
        return instance

    def save_to_yaml_file(self, filename: str):
        """
        Saves the current dataclass instance to a YAML file.

        Args:
            filename (str): The path where the YAML file will be saved.
        """
        yaml_content: str = self.to_yaml()
        with open(filename, "w", encoding="utf-8") as file:
            file.write(yaml_content)

    @classmethod
    def load_from_json_file(cls: Type[T], filename: Union[str, Path]) -> T:
        """
        Loads a dataclass instance from a JSON file.

        Args:
            filename (str): The path to the JSON file.

        Returns:
            T: An instance of the dataclass.
        """
        with open(filename, "r", encoding="utf-8") as file:
            json_str: str = file.read()
        instance: T = cls.from_json(json_str)
        return instance

    @classmethod
    def load_from_json_url(cls: Type[T], url: str) -> T:
        """
        Loads a dataclass instance from a JSON string obtained from a URL.

        Args:
            url (str): The URL pointing to the JSON data.

        Returns:
            T: An instance of the dataclass.
        """
        json_str: str = cls.read_from_url(url)
        instance: T = cls.from_json(json_str)
        return instance

    def save_to_json_file(self, filename: str, **kwargs):
        """
        Saves the current dataclass instance to a JSON file.

        Args:
            filename (str): The path where the JSON file will be saved.
            **kwargs: Additional keyword arguments for the `to_json` method.
        """
        json_content: str = self.to_json(**kwargs)
        with open(filename, "w", encoding="utf-8") as file:
            file.write(json_content)

    @classmethod
    def read_from_url(cls, url: str) -> str:
        """
        Helper method to fetch content from a URL.
        """
        with urllib.request.urlopen(url) as response:
            if response.status == 200:
                return response.read().decode()
            else:
                raise Exception(f"Unable to load data from URL: {url}")

    @classmethod
    def remove_ignored_values(
        cls,
        value: Any,
        ignore_none: bool = True,
        ignore_underscore: bool = False,
        ignore_empty: bool = True,
    ) -> Any:
        """
        Recursively removes specified types of values from a dictionary or list.
        By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

        Args:
            value: The value to process (dictionary, list, or other).
            ignore_none: Flag to indicate whether None values should be removed.
            ignore_underscore: Flag to indicate whether keys starting with an underscore should be removed.
            ignore_empty: Flag to indicate whether empty collections should be removed.
        """

        def is_valid(v):
            """Check if the value is valid based on the specified flags."""
            if ignore_none and v is None:
                return False
            if ignore_empty:
                if isinstance(v, Mapping) and not v:
                    return False  # Empty dictionary
                if (
                    isinstance(v, Iterable)
                    and not isinstance(v, (str, bytes))
                    and not v
                ):
                    return (
                        False  # Empty list, set, tuple, etc., but not string or bytes
                    )
            return True

        if isinstance(value, Mapping):
            value = {
                k: YamlAble.remove_ignored_values(
                    v, ignore_none, ignore_underscore, ignore_empty
                )
                for k, v in value.items()
                if is_valid(v) and (not ignore_underscore or not k.startswith("_"))
            }
        elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
            value = [
                YamlAble.remove_ignored_values(
                    v, ignore_none, ignore_underscore, ignore_empty
                )
                for v in value
                if is_valid(v)
            ]
        return value

    @classmethod
    def from_dict2(cls: Type[T], data: dict) -> T:
        """
        Creates an instance of a dataclass from a dictionary, typically used in deserialization.
        """
        if not data:
            return None
        instance = from_dict(data_class=cls, data=data)
        return instance

`from_dict2(data)` `classmethod`

Creates an instance of a dataclass from a dictionary, typically used in deserialization.

Source code in lodstorage/yamlable.py

@classmethod
def from_dict2(cls: Type[T], data: dict) -> T:
    """
    Creates an instance of a dataclass from a dictionary, typically used in deserialization.
    """
    if not data:
        return None
    instance = from_dict(data_class=cls, data=data)
    return instance

`from_yaml(yaml_str)` `classmethod`

Deserializes a YAML string to a dataclass instance.

Parameters:

Name	Type	Description	Default
`yaml_str`	`str`	A string containing YAML formatted data.	required

Returns:

Name	Type	Description
`T`	`T`	An instance of the dataclass.

Source code in lodstorage/yamlable.py

@classmethod
def from_yaml(cls: Type[T], yaml_str: str) -> T:
    """
    Deserializes a YAML string to a dataclass instance.

    Args:
        yaml_str (str): A string containing YAML formatted data.

    Returns:
        T: An instance of the dataclass.
    """
    data: dict[str, Any] = yaml.safe_load(yaml_str)
    instance: T = cls.from_dict(data)
    return instance

`load_from_json_file(filename)` `classmethod`

Loads a dataclass instance from a JSON file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The path to the JSON file.	required

Returns:

Name	Type	Description
`T`	`T`	An instance of the dataclass.

Source code in lodstorage/yamlable.py

@classmethod
def load_from_json_file(cls: Type[T], filename: Union[str, Path]) -> T:
    """
    Loads a dataclass instance from a JSON file.

    Args:
        filename (str): The path to the JSON file.

    Returns:
        T: An instance of the dataclass.
    """
    with open(filename, "r", encoding="utf-8") as file:
        json_str: str = file.read()
    instance: T = cls.from_json(json_str)
    return instance

`load_from_json_url(url)` `classmethod`

Loads a dataclass instance from a JSON string obtained from a URL.

Parameters:

Name	Type	Description	Default
`url`	`str`	The URL pointing to the JSON data.	required

Returns:

Name	Type	Description
`T`	`T`	An instance of the dataclass.

Source code in lodstorage/yamlable.py

@classmethod
def load_from_json_url(cls: Type[T], url: str) -> T:
    """
    Loads a dataclass instance from a JSON string obtained from a URL.

    Args:
        url (str): The URL pointing to the JSON data.

    Returns:
        T: An instance of the dataclass.
    """
    json_str: str = cls.read_from_url(url)
    instance: T = cls.from_json(json_str)
    return instance

`load_from_yaml_file(filename)` `classmethod`

Loads a dataclass instance from a YAML file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The path to the YAML file.	required

Returns:

Name	Type	Description
`T`	`T`	An instance of the dataclass.

Source code in lodstorage/yamlable.py

@classmethod
def load_from_yaml_file(cls: Type[T], filename: str) -> T:
    """
    Loads a dataclass instance from a YAML file.

    Args:
        filename (str): The path to the YAML file.

    Returns:
        T: An instance of the dataclass.
    """
    with open(filename, "r") as file:
        yaml_str: str = file.read()
    instance: T = cls.from_yaml(yaml_str)
    return instance

`load_from_yaml_url(url)` `classmethod`

Loads a dataclass instance from a YAML string obtained from a URL.

Parameters:

Name	Type	Description	Default
`url`	`str`	The URL pointing to the YAML data.	required

Returns:

Name	Type	Description
`T`	`T`	An instance of the dataclass.

Source code in lodstorage/yamlable.py

@classmethod
def load_from_yaml_url(cls: Type[T], url: str) -> T:
    """
    Loads a dataclass instance from a YAML string obtained from a URL.

    Args:
        url (str): The URL pointing to the YAML data.

    Returns:
        T: An instance of the dataclass.
    """
    yaml_str: str = cls.read_from_url(url)
    instance: T = cls.from_yaml(yaml_str)
    return instance

`read_from_url(url)` `classmethod`

Helper method to fetch content from a URL.

Source code in lodstorage/yamlable.py

@classmethod
def read_from_url(cls, url: str) -> str:
    """
    Helper method to fetch content from a URL.
    """
    with urllib.request.urlopen(url) as response:
        if response.status == 200:
            return response.read().decode()
        else:
            raise Exception(f"Unable to load data from URL: {url}")

`remove_ignored_values(value, ignore_none=True, ignore_underscore=False, ignore_empty=True)` `classmethod`

Recursively removes specified types of values from a dictionary or list. By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

Parameters:

Name	Type	Description	Default
`value`	`Any`	The value to process (dictionary, list, or other).	required
`ignore_none`	`bool`	Flag to indicate whether None values should be removed.	`True`
`ignore_underscore`	`bool`	Flag to indicate whether keys starting with an underscore should be removed.	`False`
`ignore_empty`	`bool`	Flag to indicate whether empty collections should be removed.	`True`

Source code in lodstorage/yamlable.py

@classmethod
def remove_ignored_values(
    cls,
    value: Any,
    ignore_none: bool = True,
    ignore_underscore: bool = False,
    ignore_empty: bool = True,
) -> Any:
    """
    Recursively removes specified types of values from a dictionary or list.
    By default, it removes keys with None values. Optionally, it can also remove keys starting with an underscore.

    Args:
        value: The value to process (dictionary, list, or other).
        ignore_none: Flag to indicate whether None values should be removed.
        ignore_underscore: Flag to indicate whether keys starting with an underscore should be removed.
        ignore_empty: Flag to indicate whether empty collections should be removed.
    """

    def is_valid(v):
        """Check if the value is valid based on the specified flags."""
        if ignore_none and v is None:
            return False
        if ignore_empty:
            if isinstance(v, Mapping) and not v:
                return False  # Empty dictionary
            if (
                isinstance(v, Iterable)
                and not isinstance(v, (str, bytes))
                and not v
            ):
                return (
                    False  # Empty list, set, tuple, etc., but not string or bytes
                )
        return True

    if isinstance(value, Mapping):
        value = {
            k: YamlAble.remove_ignored_values(
                v, ignore_none, ignore_underscore, ignore_empty
            )
            for k, v in value.items()
            if is_valid(v) and (not ignore_underscore or not k.startswith("_"))
        }
    elif isinstance(value, Iterable) and not isinstance(value, (str, bytes)):
        value = [
            YamlAble.remove_ignored_values(
                v, ignore_none, ignore_underscore, ignore_empty
            )
            for v in value
            if is_valid(v)
        ]
    return value

`represent_literal(dumper, data)`

Custom representer for block scalar style for strings.

Source code in lodstorage/yamlable.py

def represent_literal(self, dumper: yaml.Dumper, data: str) -> yaml.Node:
    """
    Custom representer for block scalar style for strings.
    """
    if "\n" in data:
        return dumper.represent_scalar("tag:yaml.org,2002:str", data, style="|")
    return dumper.represent_scalar("tag:yaml.org,2002:str", data)

`represent_none(_, __)`

Custom representer for ignoring None values in the YAML output.

Source code in lodstorage/yamlable.py

def represent_none(self, _, __) -> yaml.Node:
    """
    Custom representer for ignoring None values in the YAML output.
    """
    return self._yaml_dumper.represent_scalar("tag:yaml.org,2002:null", "")

`save_to_json_file(filename, **kwargs)`

Saves the current dataclass instance to a JSON file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The path where the JSON file will be saved.	required
`**kwargs`		Additional keyword arguments for the `to_json` method.	`{}`

Source code in lodstorage/yamlable.py

def save_to_json_file(self, filename: str, **kwargs):
    """
    Saves the current dataclass instance to a JSON file.

    Args:
        filename (str): The path where the JSON file will be saved.
        **kwargs: Additional keyword arguments for the `to_json` method.
    """
    json_content: str = self.to_json(**kwargs)
    with open(filename, "w", encoding="utf-8") as file:
        file.write(json_content)

`save_to_yaml_file(filename)`

Saves the current dataclass instance to a YAML file.

Parameters:

Name	Type	Description	Default
`filename`	`str`	The path where the YAML file will be saved.	required

Source code in lodstorage/yamlable.py

def save_to_yaml_file(self, filename: str):
    """
    Saves the current dataclass instance to a YAML file.

    Args:
        filename (str): The path where the YAML file will be saved.
    """
    yaml_content: str = self.to_yaml()
    with open(filename, "w", encoding="utf-8") as file:
        file.write(yaml_content)

`to_yaml(ignore_none=True, ignore_underscore=True, allow_unicode=True, sort_keys=False)`

Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables, and using block scalar style for strings.

Parameters:

Name	Type	Description	Default
`ignore_none`	`bool`	Flag to indicate whether None values should be removed from the YAML output.	`True`
`ignore_underscore`	`bool`	Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.	`True`
`allow_unicode`	`bool`	Flag to indicate whether to allow unicode characters in the output.	`True`
`sort_keys`	`bool`	Flag to indicate whether to sort the dictionary keys in the output.	`False`

Returns:

Type	Description
`str`	A string representation of the dataclass object in YAML format.

Source code in lodstorage/yamlable.py

def to_yaml(
    self,
    ignore_none: bool = True,
    ignore_underscore: bool = True,
    allow_unicode: bool = True,
    sort_keys: bool = False,
) -> str:
    """
    Converts this dataclass object to a YAML string, with options to omit None values and/or underscore-prefixed variables,
    and using block scalar style for strings.

    Args:
        ignore_none: Flag to indicate whether None values should be removed from the YAML output.
        ignore_underscore: Flag to indicate whether attributes starting with an underscore should be excluded from the YAML output.
        allow_unicode: Flag to indicate whether to allow unicode characters in the output.
        sort_keys: Flag to indicate whether to sort the dictionary keys in the output.

    Returns:
        A string representation of the dataclass object in YAML format.
    """
    obj_dict = asdict(self)
    self._yaml_setup()
    clean_dict = self.remove_ignored_values(
        obj_dict, ignore_none, ignore_underscore
    )
    yaml_str = yaml.dump(
        clean_dict,
        Dumper=self._yaml_dumper,
        default_flow_style=False,
        allow_unicode=allow_unicode,
        sort_keys=sort_keys,
    )
    return yaml_str

`lod_storable(cls)`

Decorator to make a class LoDStorable by inheriting from YamlAble. This decorator also ensures the class is a dataclass and has JSON serialization/deserialization capabilities.

Source code in lodstorage/yamlable.py

def lod_storable(cls):
    """
    Decorator to make a class LoDStorable by
    inheriting from YamlAble.
    This decorator also ensures the class is a
    dataclass and has JSON serialization/deserialization
    capabilities.
    """
    cls = dataclass(cls)  # Apply the @dataclass decorator
    cls = dataclass_json(cls)  # Apply the @dataclass_json decorator

    class LoDStorable(YamlAble, cls):
        """
        decorator class
        """

        __qualname__ = cls.__qualname__
        pass

    LoDStorable.__name__ = cls.__name__
    LoDStorable.__doc__ = cls.__doc__

    return LoDStorable

`yamlablemixin`

`YamlAbleMixin`

Bases: object

allow reading and writing derived objects from a yaml file

Source code in lodstorage/yamlablemixin.py

class YamlAbleMixin(object):
    """allow reading and writing derived objects from a yaml file"""

    debug = False

    # read me from a yaml file
    @staticmethod
    def readYaml(name):
        yamlFile = name
        if not yamlFile.endswith(".yaml"):
            yamlFile = yamlFile + ".yaml"
        # is there a yamlFile for the given name
        if os.path.isfile(yamlFile):
            with io.open(yamlFile, "r") as stream:
                if YamlAbleMixin.debug:
                    print("reading %s" % (yamlFile))
                result = yaml.load(stream, Loader=yaml.Loader)
                if YamlAbleMixin.debug:
                    print(result)
                return result
        else:
            return None

    # write me to my yaml file
    def writeYaml(self, name):
        yamlFile = name
        if not yamlFile.endswith(".yaml"):
            yamlFile = yamlFile + ".yaml"
        with io.open(yamlFile, "w", encoding="utf-8") as stream:
            yaml.dump(self, stream)
            if YamlAbleMixin.debug:
                print(yaml.dump(self))

pyLodStorage API Documentation

cache

Cache

is_stored: bool property

set_path(base_path)

CacheManager

base_path()

get_cache_by_name(lod_name, ext='.json')

load(lod_name, ext='.json', cls=None, count_attr=None)

store(cache_name, data_to_store, ext='.json', count_attr=None)

docstring_parser

DocstringParser

parse(docstring)

entity

EntityManager

__init__(name, entityName, entityPluralName, listName=None, clazz=None, tableName=None, primaryKey=None, config=None, handleInvalidListTypes=False, filterInvalidListTypes=False, listSeparator='⇹', debug=False)

fromCache(force=False, getListOfDicts=None, append=False, sampleRecordCount=-1)

fromStore(cacheFile=None, setList=True)

getCacheFile(config=None, mode=StoreMode.SQL)

getLoD()

getSQLDB(cacheFile)

initSQLDB(sqldb, listOfDicts=None, withCreate=True, withDrop=True, sampleRecordCount=-1)

isCached()

removeCacheFile()

setNone(record, fields)

showProgress(msg)

store(limit=10000000, batchSize=250, append=False, fixNone=True, sampleRecordCount=-1, replace=False)

storeLoD(listOfDicts, limit=10000000, batchSize=250, cacheFile=None, append=False, fixNone=True, sampleRecordCount=1, replace=False)

storeMode()

jsonable

JSONAble

__init__()

asJSON(asString=True, data=None)

checkExtension(jsonFile, extension='.json')

fromDict(data)

fromJson(jsonStr)

getJSONValue(v)

getJsonTypeSamples()

getJsonTypeSamplesForClass() staticmethod

readJsonFromFile(jsonFilePath) staticmethod

reprDict(srcDict)

restoreFromJsonFile(jsonFile)

singleQuoteToDoubleQuote(singleQuoted, useRegex=False) staticmethod

singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted) staticmethod

singleQuoteToDoubleQuoteUsingRegex(singleQuoted) staticmethod

storeJsonToFile(jsonStr, jsonFilePath) staticmethod

storeToJsonFile(jsonFile, extension='.json', limitToSampleFields=False)

toJSON(limitToSampleFields=False)

toJsonAbleValue(v)

JSONAbleList

__init__(listName=None, clazz=None, tableName=None, initList=True, handleInvalidListTypes=False, filterInvalidListTypes=False)

fromJson(jsonStr, types=None)

fromLoD(lod, append=True, debug=False)

getJsonData()

getList()

getLoDfromJson(jsonStr, types=None, listName=None)

getLookup(attrName, withDuplicates=False)

readLodFromJsonFile(jsonFile, extension='.json')

readLodFromJsonStr(jsonStr)

restoreFromJsonFile(jsonFile)

restoreFromJsonStr(jsonStr)

setListFromLoD(lod)

toJsonAbleValue(v)

JSONAbleSettings

indent = 4 class-attribute instance-attribute

Types

__init__(name, warnOnUnsupportedTypes=True, debug=False)

addType(listName, field, valueType)

fixListOfDicts(typeMap, listOfDicts)

fixTypes(lod, listName)

forTable(instance, listName, warnOnUnsupportedTypes=True, debug=False) staticmethod

getType(typeName)

getTypes(listName, sampleRecords, limit=10)

getTypesForItems(listName, items, warnOnNone=False)

jsonpicklemixin

JsonPickleMixin

asJsonPickle()

checkExtension(jsonFile, extension='.json') staticmethod

readJsonPickle(jsonFileName, extension='.jsonpickle') staticmethod

writeJsonPickle(jsonFileName, extension='.jsonpickle')

`cache`

`Cache`

`is_stored: bool` `property`

`set_path(base_path)`

`CacheManager`

`base_path()`

`get_cache_by_name(lod_name, ext='.json')`

`load(lod_name, ext='.json', cls=None, count_attr=None)`

`store(cache_name, data_to_store, ext='.json', count_attr=None)`

`docstring_parser`

`DocstringParser`

`parse(docstring)`

`entity`

`EntityManager`

`init(name, entityName, entityPluralName, listName=None, clazz=None, tableName=None, primaryKey=None, config=None, handleInvalidListTypes=False, filterInvalidListTypes=False, listSeparator='⇹', debug=False)`

`fromCache(force=False, getListOfDicts=None, append=False, sampleRecordCount=-1)`

`fromStore(cacheFile=None, setList=True)`

`getCacheFile(config=None, mode=StoreMode.SQL)`

`getLoD()`

`getSQLDB(cacheFile)`

`initSQLDB(sqldb, listOfDicts=None, withCreate=True, withDrop=True, sampleRecordCount=-1)`

`isCached()`

`removeCacheFile()`

`setNone(record, fields)`

`showProgress(msg)`

`store(limit=10000000, batchSize=250, append=False, fixNone=True, sampleRecordCount=-1, replace=False)`

`storeLoD(listOfDicts, limit=10000000, batchSize=250, cacheFile=None, append=False, fixNone=True, sampleRecordCount=1, replace=False)`

`storeMode()`

`jsonable`

`JSONAble`

`init()`

`asJSON(asString=True, data=None)`

`checkExtension(jsonFile, extension='.json')`

`fromDict(data)`

`fromJson(jsonStr)`

`getJSONValue(v)`

`getJsonTypeSamples()`

`getJsonTypeSamplesForClass()` `staticmethod`

`readJsonFromFile(jsonFilePath)` `staticmethod`

`reprDict(srcDict)`

`restoreFromJsonFile(jsonFile)`

`singleQuoteToDoubleQuote(singleQuoted, useRegex=False)` `staticmethod`

`singleQuoteToDoubleQuoteUsingBracketLoop(singleQuoted)` `staticmethod`

`singleQuoteToDoubleQuoteUsingRegex(singleQuoted)` `staticmethod`

`storeJsonToFile(jsonStr, jsonFilePath)` `staticmethod`

`storeToJsonFile(jsonFile, extension='.json', limitToSampleFields=False)`

`toJSON(limitToSampleFields=False)`

`toJsonAbleValue(v)`

`JSONAbleList`

`init(listName=None, clazz=None, tableName=None, initList=True, handleInvalidListTypes=False, filterInvalidListTypes=False)`

`fromJson(jsonStr, types=None)`

`fromLoD(lod, append=True, debug=False)`

`getJsonData()`

`getList()`

`getLoDfromJson(jsonStr, types=None, listName=None)`

`getLookup(attrName, withDuplicates=False)`

`readLodFromJsonFile(jsonFile, extension='.json')`

`readLodFromJsonStr(jsonStr)`

`restoreFromJsonFile(jsonFile)`

`restoreFromJsonStr(jsonStr)`

`setListFromLoD(lod)`

`toJsonAbleValue(v)`

`JSONAbleSettings`

`indent = 4` `class-attribute` `instance-attribute`

`Types`

`init(name, warnOnUnsupportedTypes=True, debug=False)`

`addType(listName, field, valueType)`

`fixListOfDicts(typeMap, listOfDicts)`

`fixTypes(lod, listName)`

`forTable(instance, listName, warnOnUnsupportedTypes=True, debug=False)` `staticmethod`

`getType(typeName)`

`getTypes(listName, sampleRecords, limit=10)`

`getTypesForItems(listName, items, warnOnNone=False)`

`jsonpicklemixin`

`JsonPickleMixin`

`asJsonPickle()`

`checkExtension(jsonFile, extension='.json')` `staticmethod`

`readJsonPickle(jsonFileName, extension='.jsonpickle')` `staticmethod`

`writeJsonPickle(jsonFileName, extension='.jsonpickle')`

`linkml`