pySemanticSlides API Documentation

`doi`

Created on 2023-02-12

@author: wf

`DOI` `dataclass`

get DOI data

Source code in slides/doi.py

@dataclass
class DOI:
    """
    get DOI data
    """
    doi:str
    debug:bool=False

    def debug_dump(self,d:dict):
        """
        dump the given dict if debug mode is on

        Args:
            d(dict): the dictionary to dump
        """
        if self.debug:
            print(json.dumps(d,indent=2))

    def fetchMeta(self,headers:dict)->dict:
        """
        get the metadata for my doi

        Args:
            headers(dict): the headers to use

        Returns:
            dict: the metadata according to the given headers
        """
        url=f"https://doi.org/{self.doi}"
        req=urllib.request.Request(url,headers=headers)
        response=urllib.request.urlopen(req)
        encoding = response.headers.get_content_charset('utf-8')
        content = response.read()
        text = content.decode(encoding)
        return text

    def fetchBibtexMeta(self)->dict:
        """
        get the meta data for my  doi by getting the bibtext JSON 
        result for the doi

        Returns:
            dict: metadata

        """
        headers= {
            'Accept': 'application/x-bibtex; charset=utf-8'
        }
        text=self.fetchMeta(headers)
        if self.debug:
            print(text)
        return text

    def fetchCiteprocMeta(self)->dict:
        """
        get the meta data for my  doi by getting the Citeproc JSON 
        result for the doi

        see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html

        Returns:
            dict: metadata
        """
        headers= {
            'Accept': 'application/vnd.citationstyles.csl+json; charset=utf-8'
        }
        text=self.fetchMeta(headers)
        json_data=json.loads(text)
        self.debug_dump(json_data)
        return json_data

    def fetchBibTexDict(self)->dict:
        """
        get a latex BibTexDict for my doi

        Returns:
            dict: a dict with bibliographic metadata in bibtex latex format
        """
        meta_bibtex=self.fetchBibtexMeta()
        bd=bibtexparser.loads(meta_bibtex)
        btex=None
        if len(bd.entries)>0:
            btex=bd.entries[0]
            self.debug_dump(btex)
        return btex

    def fetchPlainTextBibTexDict(self)->dict:
        """
        get a plain text BibTexDict for my doi

        Returns:
            dict: a dict with bibliographic metadata in bibtex utf-8 (no latex) format
        """
        btex=self.fetchBibTexDict()
        if btex:
            ln2t=LatexNodes2Text()
            for key in btex:
                latex=btex[key]
                no_latex=ln2t.latex_to_text(latex)
                btex[key]=no_latex
            self.debug_dump(btex)
        return btex

`debug_dump(d)`

dump the given dict if debug mode is on

Parameters:

Name	Type	Description	Default
`d(dict)`		the dictionary to dump	required

Source code in slides/doi.py

def debug_dump(self,d:dict):
    """
    dump the given dict if debug mode is on

    Args:
        d(dict): the dictionary to dump
    """
    if self.debug:
        print(json.dumps(d,indent=2))

`fetchBibTexDict()`

get a latex BibTexDict for my doi

Returns:

Name	Type	Description
`dict`	`dict`	a dict with bibliographic metadata in bibtex latex format

Source code in slides/doi.py

def fetchBibTexDict(self)->dict:
    """
    get a latex BibTexDict for my doi

    Returns:
        dict: a dict with bibliographic metadata in bibtex latex format
    """
    meta_bibtex=self.fetchBibtexMeta()
    bd=bibtexparser.loads(meta_bibtex)
    btex=None
    if len(bd.entries)>0:
        btex=bd.entries[0]
        self.debug_dump(btex)
    return btex

`fetchBibtexMeta()`

get the meta data for my doi by getting the bibtext JSON result for the doi

Returns:

Name	Type	Description
`dict`	`dict`	metadata

Source code in slides/doi.py

def fetchBibtexMeta(self)->dict:
    """
    get the meta data for my  doi by getting the bibtext JSON 
    result for the doi

    Returns:
        dict: metadata

    """
    headers= {
        'Accept': 'application/x-bibtex; charset=utf-8'
    }
    text=self.fetchMeta(headers)
    if self.debug:
        print(text)
    return text

`fetchCiteprocMeta()`

get the meta data for my doi by getting the Citeproc JSON result for the doi

see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html

Returns:

Name	Type	Description
`dict`	`dict`	metadata

Source code in slides/doi.py

def fetchCiteprocMeta(self)->dict:
    """
    get the meta data for my  doi by getting the Citeproc JSON 
    result for the doi

    see https://citeproc-js.readthedocs.io/en/latest/csl-json/markup.html

    Returns:
        dict: metadata
    """
    headers= {
        'Accept': 'application/vnd.citationstyles.csl+json; charset=utf-8'
    }
    text=self.fetchMeta(headers)
    json_data=json.loads(text)
    self.debug_dump(json_data)
    return json_data

`fetchMeta(headers)`

get the metadata for my doi

Parameters:

Name	Type	Description	Default
`headers(dict)`		the headers to use	required

Returns:

Name	Type	Description
`dict`	`dict`	the metadata according to the given headers

Source code in slides/doi.py

def fetchMeta(self,headers:dict)->dict:
    """
    get the metadata for my doi

    Args:
        headers(dict): the headers to use

    Returns:
        dict: the metadata according to the given headers
    """
    url=f"https://doi.org/{self.doi}"
    req=urllib.request.Request(url,headers=headers)
    response=urllib.request.urlopen(req)
    encoding = response.headers.get_content_charset('utf-8')
    content = response.read()
    text = content.decode(encoding)
    return text

`fetchPlainTextBibTexDict()`

get a plain text BibTexDict for my doi

Returns:

Name	Type	Description
`dict`	`dict`	a dict with bibliographic metadata in bibtex utf-8 (no latex) format

Source code in slides/doi.py

def fetchPlainTextBibTexDict(self)->dict:
    """
    get a plain text BibTexDict for my doi

    Returns:
        dict: a dict with bibliographic metadata in bibtex utf-8 (no latex) format
    """
    btex=self.fetchBibTexDict()
    if btex:
        ln2t=LatexNodes2Text()
        for key in btex:
            latex=btex[key]
            no_latex=ln2t.latex_to_text(latex)
            btex[key]=no_latex
        self.debug_dump(btex)
    return btex

`keyvalue_parser`

Created on 2023-02-14

@author: wf

`BaseKeyValueParser`

general KeyValue Parser

Source code in slides/keyvalue_parser.py

class BaseKeyValueParser():
    """
    general KeyValue Parser
    """

    def __init__(self,config:KeyValueParserConfig):
        """
        constructor

        Args:
            config(KeyValueParserConfig): the configuration to use
        """
        self.config=config
        self.errors=[]
        self.keydefs_by_keyword={}

    def setKeydefs(self,keydefs:typing.List[Keydef]):
        """
        set my key definitions

        Args:
             keydefs(List[Keydef]): a list of keyword definitions
        """
        self.keydefs_by_keyword=Keydef.as_dict(keydefs)

    def add_error(self,error_msg:str):
        """
        add the given error to my list of errors

        Args:
            error_msg(str): the error to add
        """
        if self.config.debug:
            print(error_msg)
        self.errors.append(error_msg)

    def handleErrors(self,text:str):
        """
        handle my error with respect to the given text to pars
        """
        if not self.config.ignore_errors:
            error_str="\n".join(self.errors)
            raise Exception(f"key/value parsing of {text} failed with {len(self.errors)} errors:\n{error_str}")

    def getStrippedValues(self,value_list)->list:
        """
        strip all values in the given value list
        """
        if not self.config.strip:
            return value_list
        else:
            stripped_values=[]
            for value in value_list:
                stripped_values.append(value.strip())
            return stripped_values

`init(config)`

constructor

Parameters:

Name	Type	Description	Default
`config(KeyValueParserConfig)`		the configuration to use	required

Source code in slides/keyvalue_parser.py

def __init__(self,config:KeyValueParserConfig):
    """
    constructor

    Args:
        config(KeyValueParserConfig): the configuration to use
    """
    self.config=config
    self.errors=[]
    self.keydefs_by_keyword={}

`add_error(error_msg)`

add the given error to my list of errors

Parameters:

Name	Type	Description	Default
`error_msg(str)`		the error to add	required

Source code in slides/keyvalue_parser.py

def add_error(self,error_msg:str):
    """
    add the given error to my list of errors

    Args:
        error_msg(str): the error to add
    """
    if self.config.debug:
        print(error_msg)
    self.errors.append(error_msg)

`getStrippedValues(value_list)`

strip all values in the given value list

Source code in slides/keyvalue_parser.py

def getStrippedValues(self,value_list)->list:
    """
    strip all values in the given value list
    """
    if not self.config.strip:
        return value_list
    else:
        stripped_values=[]
        for value in value_list:
            stripped_values.append(value.strip())
        return stripped_values

`handleErrors(text)`

handle my error with respect to the given text to pars

Source code in slides/keyvalue_parser.py

def handleErrors(self,text:str):
    """
    handle my error with respect to the given text to pars
    """
    if not self.config.ignore_errors:
        error_str="\n".join(self.errors)
        raise Exception(f"key/value parsing of {text} failed with {len(self.errors)} errors:\n{error_str}")

`setKeydefs(keydefs)`

set my key definitions

Parameters:

Name	Type	Description	Default
`keydefs(List[Keydef])`		a list of keyword definitions	required

Source code in slides/keyvalue_parser.py

def setKeydefs(self,keydefs:typing.List[Keydef]):
    """
    set my key definitions

    Args:
         keydefs(List[Keydef]): a list of keyword definitions
    """
    self.keydefs_by_keyword=Keydef.as_dict(keydefs)

`KeyValueParser`

Bases: BaseKeyValueParser

Key Value Parser (which won't handle all details properly) see https://stackoverflow.com/a/75270267/1497139

Source code in slides/keyvalue_parser.py

class KeyValueParser(BaseKeyValueParser):
    """
    Key Value Parser (which won't handle all details properly)
    see https://stackoverflow.com/a/75270267/1497139
    """

    def __init__(self,config:KeyValueParserConfig):
        """
        constructor

        Args:
            config(KeyValueParserConfig): the configuration to use
        """
        BaseKeyValueParser.__init__(self, config)  
        if config.record_delim=="\n":
            pp.ParserElement.setDefaultWhitespaceChars("\t")
        else:
            pp.ParserElement.setDefaultWhitespaceChars("\n")
        pass


    def setKeydefs(self,keydefs:typing.List[Keydef]):
        """
        overwrite how to set my key definitions

        Args:
             keydefs(List[Keydef]): a list of keyword definitions
        """
        BaseKeyValueParser.setKeydefs(self,keydefs)
        # set local variable from config
        record_delim=self.config.record_delim
        key_value_delim=self.config.key_value_delim
        value_delim=self.config.value_delim
        quote=self.config.quote
        #
        # initialize grammar
        # 
        # valid keys are alphas
        g_key = pp.Word(pp.alphas)
        # items may not have record or value delimiters or must be quoted
        g_item = pp.OneOrMore(pp.Word(pp.printables+" "+self.config.unicode_chars, excludeChars=record_delim+value_delim+quote) | pp.QuotedString(quote_char=quote))
        # a value is a value_delim delimited list of items
        g_value = pp.delimited_list(g_item, delim=value_delim)
        l_key_value_sep = pp.Suppress(pp.Literal(key_value_delim))
        g_key_value = g_key + l_key_value_sep + g_value
        self.g_grammar = pp.delimited_list(g_key_value, delim=record_delim)

        g_key.add_parse_action(lambda x: 
            self.keydefs_by_keyword[x[0]].key if x[0] in self.keydefs_by_keyword else x
        )
        g_value.add_parse_action(lambda x: 
            [x] if len(x) > 1 else x
        )
        g_key_value.add_parse_action(lambda x: 
            (x[0], x[1].as_list()) if isinstance(x[1],pp.ParseResults) else (x[0], x[1])
        )
        pass

    def getKeyValues(self,text:str)->dict:
        """
        get key/value pairs from the given text using the configured keys definition

        Args:
            text(str): the text to parser

        Returns:
            dict: the resulting key-value pairs
        """
        self.errors=[]
        key_values = dict()
        if text:         
            try:
                for k,v in self.g_grammar.parse_string(text, parse_all=True):
                    if self.config.strip:
                        if isinstance(v,list):
                            v=self.getStrippedValues(v)
                        else:
                            v=v.strip()
                    key_values[k] = v
            except Exception as ex:
                tb = traceback.format_exc()
                error_msg=f"parsing {text} failed: \n{str(ex)}\n{tb}"
                self.add_error(error_msg)
            self.handleErrors(text)
        return key_values

`init(config)`

constructor

Parameters:

Name	Type	Description	Default
`config(KeyValueParserConfig)`		the configuration to use	required

Source code in slides/keyvalue_parser.py

def __init__(self,config:KeyValueParserConfig):
    """
    constructor

    Args:
        config(KeyValueParserConfig): the configuration to use
    """
    BaseKeyValueParser.__init__(self, config)  
    if config.record_delim=="\n":
        pp.ParserElement.setDefaultWhitespaceChars("\t")
    else:
        pp.ParserElement.setDefaultWhitespaceChars("\n")
    pass

`getKeyValues(text)`

get key/value pairs from the given text using the configured keys definition

Parameters:

Name	Type	Description	Default
`text(str)`		the text to parser	required

Returns:

Name	Type	Description
`dict`	`dict`	the resulting key-value pairs

Source code in slides/keyvalue_parser.py

def getKeyValues(self,text:str)->dict:
    """
    get key/value pairs from the given text using the configured keys definition

    Args:
        text(str): the text to parser

    Returns:
        dict: the resulting key-value pairs
    """
    self.errors=[]
    key_values = dict()
    if text:         
        try:
            for k,v in self.g_grammar.parse_string(text, parse_all=True):
                if self.config.strip:
                    if isinstance(v,list):
                        v=self.getStrippedValues(v)
                    else:
                        v=v.strip()
                key_values[k] = v
        except Exception as ex:
            tb = traceback.format_exc()
            error_msg=f"parsing {text} failed: \n{str(ex)}\n{tb}"
            self.add_error(error_msg)
        self.handleErrors(text)
    return key_values

`setKeydefs(keydefs)`

overwrite how to set my key definitions

Parameters:

Name	Type	Description	Default
`keydefs(List[Keydef])`		a list of keyword definitions	required

Source code in slides/keyvalue_parser.py

def setKeydefs(self,keydefs:typing.List[Keydef]):
    """
    overwrite how to set my key definitions

    Args:
         keydefs(List[Keydef]): a list of keyword definitions
    """
    BaseKeyValueParser.setKeydefs(self,keydefs)
    # set local variable from config
    record_delim=self.config.record_delim
    key_value_delim=self.config.key_value_delim
    value_delim=self.config.value_delim
    quote=self.config.quote
    #
    # initialize grammar
    # 
    # valid keys are alphas
    g_key = pp.Word(pp.alphas)
    # items may not have record or value delimiters or must be quoted
    g_item = pp.OneOrMore(pp.Word(pp.printables+" "+self.config.unicode_chars, excludeChars=record_delim+value_delim+quote) | pp.QuotedString(quote_char=quote))
    # a value is a value_delim delimited list of items
    g_value = pp.delimited_list(g_item, delim=value_delim)
    l_key_value_sep = pp.Suppress(pp.Literal(key_value_delim))
    g_key_value = g_key + l_key_value_sep + g_value
    self.g_grammar = pp.delimited_list(g_key_value, delim=record_delim)

    g_key.add_parse_action(lambda x: 
        self.keydefs_by_keyword[x[0]].key if x[0] in self.keydefs_by_keyword else x
    )
    g_value.add_parse_action(lambda x: 
        [x] if len(x) > 1 else x
    )
    g_key_value.add_parse_action(lambda x: 
        (x[0], x[1].as_list()) if isinstance(x[1],pp.ParseResults) else (x[0], x[1])
    )
    pass

`KeyValueParserConfig` `dataclass`

a configuration for a key/value Parser

Source code in slides/keyvalue_parser.py

@dataclass
class KeyValueParserConfig():
    """
    a configuration for a key/value Parser
    """
    key_value_delim:str=":"
    record_delim:str="•"
    value_delim:str=","
    quote:str="\'"
    unicode_chars:str="•→–"
    strip:bool=True
    ignore_errors:bool=True
    defined_keys_only:bool=False
    debug:bool=False

`KeyValueSplitParser`

Bases: BaseKeyValueParser

Key / Value Parser

see https://stackoverflow.com/questions/75266188/pyparsing-syntax-tree-from-named-value-list/75270267#75270267

Source code in slides/keyvalue_parser.py

class KeyValueSplitParser(BaseKeyValueParser):
    """
    Key / Value Parser

    see https://stackoverflow.com/questions/75266188/pyparsing-syntax-tree-from-named-value-list/75270267#75270267
    """

    def getKeyValues(self,text:str)->dict:
        """
        get key/value pairs from the given text using the configured keys definition

        Args:
            text(str): the text to parser

        Returns:
            dict: the resulting key-value pairs
        """
        self.errors=[]   
        result = dict()
        if text:
            try: 
                rsplit=Split(delim=self.config.record_delim,unicode_chars=self.config.unicode_chars)
                records=rsplit.split(text)
            except Exception as rsplit_ex:
                self.add_error(f"record split failed {rsplit_ex}")
                records=[]
            for record in records:
                key_value_split=Split(delim=self.config.key_value_delim,unicode_chars=self.config.unicode_chars)
                key_values=key_value_split.split(record)
                if len(key_values)!=2:
                    self.add_error(f"{key_values} has {len(key_values)}) elements but should have two")
                    continue
                else:
                    key_str=key_values[0]
                    keyword=key_str.strip()
                    values_str=key_values[1]
                    # is the keyword defined
                    if not keyword in self.keydefs_by_keyword:
                        if self.config.defined_keys_only:
                            self.add_error(f"undefined keyword {keyword}")
                        key=keyword
                        value=values_str
                    else:
                        keydef=self.keydefs_by_keyword[keyword]
                        # map keyword to key
                        key=keydef.key
                        values_split=Split(delim=self.config.value_delim,unicode_chars=self.config.unicode_chars,keep_quotes=False)
                        if keydef.has_list:
                            value_list=values_split.split(values_str)
                            value_list=self.getStrippedValues(value_list)
                            # value is a list
                            value=value_list
                        else:
                            value=values_str
                    if self.config.strip and isinstance(value,str):
                        value=value.strip()
                result[key]=value
            self.handleErrors(text)    
        return result

`getKeyValues(text)`

get key/value pairs from the given text using the configured keys definition

Parameters:

Name	Type	Description	Default
`text(str)`		the text to parser	required

Returns:

Name	Type	Description
`dict`	`dict`	the resulting key-value pairs

Source code in slides/keyvalue_parser.py

def getKeyValues(self,text:str)->dict:
    """
    get key/value pairs from the given text using the configured keys definition

    Args:
        text(str): the text to parser

    Returns:
        dict: the resulting key-value pairs
    """
    self.errors=[]   
    result = dict()
    if text:
        try: 
            rsplit=Split(delim=self.config.record_delim,unicode_chars=self.config.unicode_chars)
            records=rsplit.split(text)
        except Exception as rsplit_ex:
            self.add_error(f"record split failed {rsplit_ex}")
            records=[]
        for record in records:
            key_value_split=Split(delim=self.config.key_value_delim,unicode_chars=self.config.unicode_chars)
            key_values=key_value_split.split(record)
            if len(key_values)!=2:
                self.add_error(f"{key_values} has {len(key_values)}) elements but should have two")
                continue
            else:
                key_str=key_values[0]
                keyword=key_str.strip()
                values_str=key_values[1]
                # is the keyword defined
                if not keyword in self.keydefs_by_keyword:
                    if self.config.defined_keys_only:
                        self.add_error(f"undefined keyword {keyword}")
                    key=keyword
                    value=values_str
                else:
                    keydef=self.keydefs_by_keyword[keyword]
                    # map keyword to key
                    key=keydef.key
                    values_split=Split(delim=self.config.value_delim,unicode_chars=self.config.unicode_chars,keep_quotes=False)
                    if keydef.has_list:
                        value_list=values_split.split(values_str)
                        value_list=self.getStrippedValues(value_list)
                        # value is a list
                        value=value_list
                    else:
                        value=values_str
                if self.config.strip and isinstance(value,str):
                    value=value.strip()
            result[key]=value
        self.handleErrors(text)    
    return result

`Keydef` `dataclass`

a key definition

Source code in slides/keyvalue_parser.py

@dataclass
class Keydef():
    """
    a key definition
    """
    keyword: str
    key: str
    has_list: bool=False

    @classmethod
    def as_dict(cls,keydefs:typing.List['Keydef'])->typing.Dict[str,'Keydef']:
        """
        convert the given list of keydefs to a dict by keyword

        Args:
            keydefs(list): the list of key defs

        Returns:
            dict: a dict keyword -> Keydef

        """
        keydefs_by_keyword={}
        for keydef in keydefs:
            keydefs_by_keyword[keydef.keyword]=keydef
        return keydefs_by_keyword

`as_dict(keydefs)` `classmethod`

convert the given list of keydefs to a dict by keyword

Parameters:

Name	Type	Description	Default
`keydefs(list)`		the list of key defs	required

Returns:

Name	Type	Description
`dict`	`Dict[str, Keydef]`	a dict keyword -> Keydef

Source code in slides/keyvalue_parser.py

@classmethod
def as_dict(cls,keydefs:typing.List['Keydef'])->typing.Dict[str,'Keydef']:
    """
    convert the given list of keydefs to a dict by keyword

    Args:
        keydefs(list): the list of key defs

    Returns:
        dict: a dict keyword -> Keydef

    """
    keydefs_by_keyword={}
    for keydef in keydefs:
        keydefs_by_keyword[keydef.keyword]=keydef
    return keydefs_by_keyword

`SimpleKeyValueParser`

Bases: BaseKeyValueParser

a simple key value parser (which won't handle quote properly)

Source code in slides/keyvalue_parser.py

class SimpleKeyValueParser(BaseKeyValueParser):
    """
    a simple key value parser (which won't handle quote properly)
    """

    def getKeyValues(self,text:str)->dict:
        """
        get key/value pairs from the given text using the configured keys definition

        Args:
            text(str): the text to parser

        Returns:
            dict: the resulting key-value pairs
        """ 
        result={}
        self.errors=[]
        if text:
            key_values=text.split(self.config.record_delim)
            for key_value in key_values:
                if not self.config.key_value_delim in key_value:
                    error_msg=f"missing key_value delimiter '{self.config.key_value_delim} in {key_value}"
                    self.add_error(error_msg)
                    if self.config.ignore_errors:
                        continue
                parts=key_value.split(self.config.key_value_delim)
                if len(parts)>2:
                    error_msg=(f"notes syntax error: {key_value} has {len(parts)}) elements but should have two")
                    self.add_error(error_msg)
                    break
                # parsed key and value
                pkey,value=parts[0],parts[1]
                pkey=pkey.strip()
                if self.config.strip:
                    value=value.strip()
                if pkey in self.keydefs_by_keyword:
                    keydef=self.keydefs_by_keyword[pkey]
                    key=keydef.key
                    if keydef.has_list:
                        value_list=value.split(self.config.value_delim)
                        value_list=self.getStrippedValues(value_list)
                        # value is a list
                        value=value_list
                else:
                    if self.config.defined_keys_only:
                        error_msg=f"undefined key {pkey}"
                        self.add_error(error_msg)
                    else:
                        key=pkey
                result[key]=value 
                self.handleErrors(text)
        return result

`getKeyValues(text)`

get key/value pairs from the given text using the configured keys definition

Parameters:

Name	Type	Description	Default
`text(str)`		the text to parser	required

Returns:

Name	Type	Description
`dict`	`dict`	the resulting key-value pairs

Source code in slides/keyvalue_parser.py

def getKeyValues(self,text:str)->dict:
    """
    get key/value pairs from the given text using the configured keys definition

    Args:
        text(str): the text to parser

    Returns:
        dict: the resulting key-value pairs
    """ 
    result={}
    self.errors=[]
    if text:
        key_values=text.split(self.config.record_delim)
        for key_value in key_values:
            if not self.config.key_value_delim in key_value:
                error_msg=f"missing key_value delimiter '{self.config.key_value_delim} in {key_value}"
                self.add_error(error_msg)
                if self.config.ignore_errors:
                    continue
            parts=key_value.split(self.config.key_value_delim)
            if len(parts)>2:
                error_msg=(f"notes syntax error: {key_value} has {len(parts)}) elements but should have two")
                self.add_error(error_msg)
                break
            # parsed key and value
            pkey,value=parts[0],parts[1]
            pkey=pkey.strip()
            if self.config.strip:
                value=value.strip()
            if pkey in self.keydefs_by_keyword:
                keydef=self.keydefs_by_keyword[pkey]
                key=keydef.key
                if keydef.has_list:
                    value_list=value.split(self.config.value_delim)
                    value_list=self.getStrippedValues(value_list)
                    # value is a list
                    value=value_list
            else:
                if self.config.defined_keys_only:
                    error_msg=f"undefined key {pkey}"
                    self.add_error(error_msg)
                else:
                    key=pkey
            result[key]=value 
            self.handleErrors(text)
    return result

`Split`

quoted string splitter

Source code in slides/keyvalue_parser.py

class Split():
    """
    quoted string splitter
    """

    def __init__(self,delim:str=',',quote:str="'",unicode_chars:str="•→–",keep_quotes:bool=True):
        """
        constructor

        Args:
            delim(str): the delimiter char, default comma
            quote(str): the quote char, default single quote
            unicode_chars(str): unicode characters to allow
            keep_quotes(str): if True keep the quoted strings if False remove quotes

        """
        self.delim=delim
        self.quote=quote
        self.keep_quotes=keep_quotes
        pp.ParserElement.setDefaultWhitespaceChars("")
        self.g_quoted=pp.QuotedString(quote_char=quote)
        self.g_value = pp.OneOrMore(pp.Word(pp.printables+unicode_chars+" ", excludeChars=delim+quote) | self.g_quoted)
        self.g_quoted.add_parse_action(lambda x:
            f"{quote}{x[0]}{quote}" if self.keep_quotes else f"{x[0]}"
        )
        self.g_value.add_parse_action(lambda x: 
            "".join(x) if len(x) > 1 else x
        )  
        self.g_split = pp.delimited_list(self.g_value, delim=delim)
        pass

    def split(self,text:str,)->list:
        """
        split the given text with my delim acknowleding my quote char for quoted strings

        Args:
            text(str): the text to split

        Returns:
            list: a list of strings
        """
        parse_result=self.g_split.parse_string(text, parse_all=True)
        result_list=parse_result.asList()
        return result_list

`init(delim=',', quote="'", unicode_chars='•→–', keep_quotes=True)`

constructor

Parameters:

Name	Description	Default
`delim(str)`	the delimiter char, default comma	required
`quote(str)`	the quote char, default single quote	required
`unicode_chars(str)`	unicode characters to allow	required
`keep_quotes(str)`	if True keep the quoted strings if False remove quotes	required

Source code in slides/keyvalue_parser.py

def __init__(self,delim:str=',',quote:str="'",unicode_chars:str="•→–",keep_quotes:bool=True):
    """
    constructor

    Args:
        delim(str): the delimiter char, default comma
        quote(str): the quote char, default single quote
        unicode_chars(str): unicode characters to allow
        keep_quotes(str): if True keep the quoted strings if False remove quotes

    """
    self.delim=delim
    self.quote=quote
    self.keep_quotes=keep_quotes
    pp.ParserElement.setDefaultWhitespaceChars("")
    self.g_quoted=pp.QuotedString(quote_char=quote)
    self.g_value = pp.OneOrMore(pp.Word(pp.printables+unicode_chars+" ", excludeChars=delim+quote) | self.g_quoted)
    self.g_quoted.add_parse_action(lambda x:
        f"{quote}{x[0]}{quote}" if self.keep_quotes else f"{x[0]}"
    )
    self.g_value.add_parse_action(lambda x: 
        "".join(x) if len(x) > 1 else x
    )  
    self.g_split = pp.delimited_list(self.g_value, delim=delim)
    pass

`split(text)`

split the given text with my delim acknowleding my quote char for quoted strings

Parameters:

Name	Type	Description	Default
`text(str)`		the text to split	required

Returns:

Name	Type	Description
`list`	`list`	a list of strings

Source code in slides/keyvalue_parser.py

def split(self,text:str,)->list:
    """
    split the given text with my delim acknowleding my quote char for quoted strings

    Args:
        text(str): the text to split

    Returns:
        list: a list of strings
    """
    parse_result=self.g_split.parse_string(text, parse_all=True)
    result_list=parse_result.asList()
    return result_list

`semslides`

Created on 2023-02-23

@author: wf

`SemSlides`

a semantic mediawiki for slides

Source code in slides/semslides.py

class SemSlides():
    """
    a semantic mediawiki for slides
    """

    def __init__(self,args):
        """
        constructor

        Args:
            args(Args): my command line arguments
        """
        self.args=args

    @classmethod
    def getArgParser(cls,version_msg)->ArgumentParser:
        """
        Setup command line argument parser

        Args:
            description(str): the description
            version_msg(str): the version message

        Returns:
            ArgumentParser: the argument parser
        """
        parser = ArgumentParser(description=Version.description, formatter_class=RawDescriptionHelpFormatter)
        parser.add_argument("-a","--about",help="show about info [default: %(default)s]",action="store_true")
        parser.add_argument('--context', default="MetaModel",help='context to generate from [default: %(default)s]')
        parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="show debug info")
        parser.add_argument('-V', '--version', action='version', version=version_msg)
        parser.add_argument('--wikiId', default="wiki",help='id of the wiki to generate for [default: %(default)s]')
        return parser

`init(args)`

constructor

Parameters:

Name	Type	Description	Default
`args(Args)`		my command line arguments	required

Source code in slides/semslides.py

def __init__(self,args):
    """
    constructor

    Args:
        args(Args): my command line arguments
    """
    self.args=args

`getArgParser(version_msg)` `classmethod`

Setup command line argument parser

Parameters:

Name	Type	Description	Default
`description(str)`		the description	required
`version_msg(str)`		the version message	required

Returns:

Name	Type	Description
`ArgumentParser`	`ArgumentParser`	the argument parser

Source code in slides/semslides.py

@classmethod
def getArgParser(cls,version_msg)->ArgumentParser:
    """
    Setup command line argument parser

    Args:
        description(str): the description
        version_msg(str): the version message

    Returns:
        ArgumentParser: the argument parser
    """
    parser = ArgumentParser(description=Version.description, formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-a","--about",help="show about info [default: %(default)s]",action="store_true")
    parser.add_argument('--context', default="MetaModel",help='context to generate from [default: %(default)s]')
    parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="show debug info")
    parser.add_argument('-V', '--version', action='version', version=version_msg)
    parser.add_argument('--wikiId', default="wiki",help='id of the wiki to generate for [default: %(default)s]')
    return parser

`main(argv=None)`

main routine

Source code in slides/semslides.py

def main(argv=None):
    '''
    main routine
    '''
    if argv is None:
        argv = sys.argv   
    program_name = os.path.basename(sys.argv[0])#


    debug=True
    try:
        program_version_message = f'{program_name} (v{Version.version},{Version.updated})'
        parser=SemSlides.getArgParser(program_version_message)
        args = parser.parse_args(argv[1:])
        semSlides=SemSlides(args)
        if args.about:
            print(program_version_message)
            print(f"see {Version.doc_url}")
            webbrowser.open(Version.doc_url)
    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if debug:
            print(traceback.format_exc())
        return 2     

`slidewalker`

Created on 2022-04-07

@author: wf

`PPT`

Bases: object

PowerPoint Presentation with lecture

Source code in slides/slidewalker.py

class PPT(object):
    '''
    PowerPoint Presentation with lecture
    '''

    def __init__(self,filepath,pageHeight=297):
        '''
        Constructor
        '''
        self.filepath=filepath
        self.basename=os.path.basename(filepath)
        self.pageHeight=pageHeight
        if not os.path.isfile(filepath):
            raise Exception("%s does not exist" % filepath)
        self.prs=None
        self.error=None
        self.slides=[]

    def summary(self)->str:
        '''
        show a summary of the given lecture
        '''
        if self.error:
            summary=f"error: {self.error} at {self.filepath}"
        else:
            if hasattr(self, "lecture"):
                summary=f"{self.title}({len(self.lecture)} lecture)/{self.author}/{self.created}  {self.basename}"
            else:
                summary=f"{self.title}/{self.author}/{self.created}  {self.basename}"
        return summary

    def asDict(self)->dict:
        """
        convert me to a dict

        Returns:
            dict: summary
        """
        if self.error:
            summary={"error":str(self.error),"path":self.filepath}
        else:
            summary={"title":self.title,"author":self.author,"created":self.created,"path":self.filepath}
        return summary

    def open(self):
        '''
        open my presentation
        '''
        try:
            self.prs = Presentation(self.filepath)
            self.author=self.prs.core_properties.author
            self.created=self.prs.core_properties.created
            self.title=self.prs.core_properties.title
        except Exception as ex:
            self.error=ex

    def getSlides(self, excludeHiddenSlides:bool=False,runDelim:str=None):
        '''
        get my slides

        Args:
            excludeHiddenSlides(bool): if True exclude hidden Slides
        '''
        if runDelim is None:
            runDelim=Slide.defaultRunDelim
        if self.prs is None:
            self.open()
        if not self.error:
            page=0
            pdf_page=0
            for slide in self.prs.slides:
                page+=1
                if excludeHiddenSlides:
                    if slide._element.get('show') == '0':
                        # slide is hidden → go to next slide
                        continue
                pdf_page += 1
                pptSlide=Slide(self,slide,page=page,pdf_page=pdf_page,runDelim=runDelim)
                self.slides.append(pptSlide)
        return self.slides

`init(filepath, pageHeight=297)`

Constructor

Source code in slides/slidewalker.py

def __init__(self,filepath,pageHeight=297):
    '''
    Constructor
    '''
    self.filepath=filepath
    self.basename=os.path.basename(filepath)
    self.pageHeight=pageHeight
    if not os.path.isfile(filepath):
        raise Exception("%s does not exist" % filepath)
    self.prs=None
    self.error=None
    self.slides=[]

`asDict()`

convert me to a dict

Returns:

Name	Type	Description
`dict`	`dict`	summary

Source code in slides/slidewalker.py

def asDict(self)->dict:
    """
    convert me to a dict

    Returns:
        dict: summary
    """
    if self.error:
        summary={"error":str(self.error),"path":self.filepath}
    else:
        summary={"title":self.title,"author":self.author,"created":self.created,"path":self.filepath}
    return summary

`getSlides(excludeHiddenSlides=False, runDelim=None)`

get my slides

Parameters:

Name	Type	Description	Default
`excludeHiddenSlides(bool)`		if True exclude hidden Slides	required

Source code in slides/slidewalker.py

def getSlides(self, excludeHiddenSlides:bool=False,runDelim:str=None):
    '''
    get my slides

    Args:
        excludeHiddenSlides(bool): if True exclude hidden Slides
    '''
    if runDelim is None:
        runDelim=Slide.defaultRunDelim
    if self.prs is None:
        self.open()
    if not self.error:
        page=0
        pdf_page=0
        for slide in self.prs.slides:
            page+=1
            if excludeHiddenSlides:
                if slide._element.get('show') == '0':
                    # slide is hidden → go to next slide
                    continue
            pdf_page += 1
            pptSlide=Slide(self,slide,page=page,pdf_page=pdf_page,runDelim=runDelim)
            self.slides.append(pptSlide)
    return self.slides

`open()`

open my presentation

Source code in slides/slidewalker.py

def open(self):
    '''
    open my presentation
    '''
    try:
        self.prs = Presentation(self.filepath)
        self.author=self.prs.core_properties.author
        self.created=self.prs.core_properties.created
        self.title=self.prs.core_properties.title
    except Exception as ex:
        self.error=ex

`summary()`

show a summary of the given lecture

Source code in slides/slidewalker.py

def summary(self)->str:
    '''
    show a summary of the given lecture
    '''
    if self.error:
        summary=f"error: {self.error} at {self.filepath}"
    else:
        if hasattr(self, "lecture"):
            summary=f"{self.title}({len(self.lecture)} lecture)/{self.author}/{self.created}  {self.basename}"
        else:
            summary=f"{self.title}/{self.author}/{self.created}  {self.basename}"
    return summary

`Slide`

Bases: object

a single slide

Source code in slides/slidewalker.py

class Slide(object):
    '''
    a single slide
    '''
    defaultRunDelim=""

    def __init__(self,ppt,slide,page,pdf_page,runDelim:str=None):
        """
        constructor
        """
        self.ppt=ppt
        self.slide=slide
        self.page=page
        self.pdf_page=pdf_page
        self.name=slide.name
        self.title=None
        if runDelim is None:
            runDelim=Slide.defaultRunDelim
        self.runDelim=runDelim
        # https://stackoverflow.com/a/40821359/1497139
        if slide.shapes.title:
            self.title=slide.shapes.title.text
        if self.title is None:
            self.title=self.name
        pass

    def asDict(self):
        summary={
            "page": self.page,
            "pdf_page": self.pdf_page,
            "title": self.title,
            "name": self.name,
            "text": self.getText(),
            "notes": self.getNotes()
        }
        return summary

    def summary(self):
        text=(f"{self.page:3d}({self.name}):{self.title}")
        return text

    def getMM(self,emu):
        # https://startbigthinksmall.wordpress.com/2010/01/04/points-inches-and-emus-measuring-units-in-office-open-xml/
        if emu is None:
            return 0
        else:
            return emu.mm

    def getText4Shapes(self,shapes,yRange,runDelim:str=None):
        """
        get the text for the given shapes in the given yRange using the given
        run delimiter

        Args:
            shapes:
            yRange:
            runDelim(str): the delimiter for text runs
        """
        # lines will be populated with a list of strings,
        # one for each "line"  in presentation
        lines = []
        line=""
        delim=""
        if runDelim is None:
            runDelim=self.runDelim
        y=None
        for shape in shapes:
            if not shape.has_text_frame:
                continue

            for paragraph in shape.text_frame.paragraphs:
                for run in paragraph.runs:
                    line+=f"{delim}{run.text}"
                    delim=runDelim

            y=self.getMM(shape.top)
            if y and YRange.isIn(yRange,y):
                lines.append(line)

            delim=""
            line=""

        if y and YRange.isIn(yRange,y):
            lines.append(line)
            return lines

    def getText(self,yRange=None):
        '''
        get the text in the given yRange

        Args:
            yRange:

        Return:
            str: the notes for this slide
        '''
        text=self.getText4Shapes(self.slide.shapes,yRange,runDelim=self.runDelim)
        return text

    def getNotes(self,yRange=None,useShapes:bool=False)->str:
        """
        get the notes

        Return:
            str: the notes for this slide
        """
        text=""
        if self.slide.has_notes_slide:
            notes_slide=self.slide.notes_slide
            if useShapes:
                text=self.getText4Shapes(notes_slide.shapes,yRange,runDelim=self.runDelim)
            elif notes_slide.notes_text_frame:
                text=notes_slide.notes_text_frame.text
        return text

    def getLayoutName(self)->str:
        '''
        get the layoutName of this slide
        '''
        layoutName=self.slide.slide_layout.name
        return layoutName

`init(ppt, slide, page, pdf_page, runDelim=None)`

constructor

Source code in slides/slidewalker.py

def __init__(self,ppt,slide,page,pdf_page,runDelim:str=None):
    """
    constructor
    """
    self.ppt=ppt
    self.slide=slide
    self.page=page
    self.pdf_page=pdf_page
    self.name=slide.name
    self.title=None
    if runDelim is None:
        runDelim=Slide.defaultRunDelim
    self.runDelim=runDelim
    # https://stackoverflow.com/a/40821359/1497139
    if slide.shapes.title:
        self.title=slide.shapes.title.text
    if self.title is None:
        self.title=self.name
    pass

`getLayoutName()`

get the layoutName of this slide

Source code in slides/slidewalker.py

def getLayoutName(self)->str:
    '''
    get the layoutName of this slide
    '''
    layoutName=self.slide.slide_layout.name
    return layoutName

`getNotes(yRange=None, useShapes=False)`

get the notes

Return

str: the notes for this slide

Source code in slides/slidewalker.py

def getNotes(self,yRange=None,useShapes:bool=False)->str:
    """
    get the notes

    Return:
        str: the notes for this slide
    """
    text=""
    if self.slide.has_notes_slide:
        notes_slide=self.slide.notes_slide
        if useShapes:
            text=self.getText4Shapes(notes_slide.shapes,yRange,runDelim=self.runDelim)
        elif notes_slide.notes_text_frame:
            text=notes_slide.notes_text_frame.text
    return text

`getText(yRange=None)`

get the text in the given yRange

Parameters:

Name	Type	Description	Default
`yRange`			`None`

Return

str: the notes for this slide

Source code in slides/slidewalker.py

def getText(self,yRange=None):
    '''
    get the text in the given yRange

    Args:
        yRange:

    Return:
        str: the notes for this slide
    '''
    text=self.getText4Shapes(self.slide.shapes,yRange,runDelim=self.runDelim)
    return text

`getText4Shapes(shapes, yRange, runDelim=None)`

get the text for the given shapes in the given yRange using the given run delimiter

Parameters:

Name	Description	Default
`shapes`		required
`yRange`		required
`runDelim(str)`	the delimiter for text runs	required

Source code in slides/slidewalker.py

def getText4Shapes(self,shapes,yRange,runDelim:str=None):
    """
    get the text for the given shapes in the given yRange using the given
    run delimiter

    Args:
        shapes:
        yRange:
        runDelim(str): the delimiter for text runs
    """
    # lines will be populated with a list of strings,
    # one for each "line"  in presentation
    lines = []
    line=""
    delim=""
    if runDelim is None:
        runDelim=self.runDelim
    y=None
    for shape in shapes:
        if not shape.has_text_frame:
            continue

        for paragraph in shape.text_frame.paragraphs:
            for run in paragraph.runs:
                line+=f"{delim}{run.text}"
                delim=runDelim

        y=self.getMM(shape.top)
        if y and YRange.isIn(yRange,y):
            lines.append(line)

        delim=""
        line=""

    if y and YRange.isIn(yRange,y):
        lines.append(line)
        return lines

`SlideWalker`

Bases: object

get meta information for all powerpoint presentations in a certain folder

Source code in slides/slidewalker.py

class SlideWalker(object):
    '''
    get meta information for all powerpoint presentations in a certain folder
    '''

    def __init__(self, rootFolder:str, debug:bool=False):
        '''
        Constructor

        Args:
            rootFolder(str): the path to the root folder of the analysis
            debug(bool): if True switch on debugging
        '''
        self.rootFolder=rootFolder
        self.debug=debug

    def asCsv(self,listOfDicts:list,fieldNames:list=None)->str:
        ''' convert the given list of dicts to CSV
        see https://stackoverflow.com/a/9157370/1497139

        Args:
            listOfDicts(list): the table to convert

        Returns:
            str: the CSV formated result
        '''
        output=io.StringIO()
        if fieldNames is None:
            fieldNameSet=set()
            for record in listOfDicts:
                for key in record.keys():
                    fieldNameSet.add(key)
            fieldNames=list(fieldNameSet)
        writer=csv.DictWriter(output,fieldnames=fieldNames,quoting=csv.QUOTE_NONNUMERIC)
        writer.writeheader()
        for record in listOfDicts:
            writer.writerow(record)
        return output.getvalue()

    def yieldPowerPointFiles(self,verbose:bool=False):
        """
        generate  my power point files

        Args:
            verbose(bool): if True show information about the processing
        """
        pptxFiles=self.findFiles(self.rootFolder, ".pptx")
        if verbose:
            print(f"found {len(pptxFiles)} powerpoint files")
        for pptxFile in pptxFiles:
            if verbose:
                print(f"Extracting data from {pptxFile}")
            ppt=PPT(pptxFile)
            ppt.open()
            if not ppt.error:
                yield ppt

    def yieldSlides(self,ppt,verbose:bool, excludeHiddenSlides:bool=False, runDelim:str=None,slideDetails:bool=False):
        """
        yield all slides

        Args:
            verbose(bool): if True print details on stdout
            excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
            runDelim(str): the delimiter to use for powerpoint slide text
        """
        ppt.getSlides(excludeHiddenSlides=excludeHiddenSlides,runDelim=runDelim)
        for slide in ppt.slides:
            if verbose and slideDetails:
                print(slide.summary())
            yield slide

    def dumpInfo(self,outputFormat:str, excludeHiddenSlides:bool=False, runDelim:str=None, slideDetails:bool=False):
        '''
        dump information about the lecture in the given format

        Args:
            outputFormat(str): csv, json or txt
            excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
            runDelim(str): the delimiter to use for powerpoint slide text
        '''
        info={}
        csvRecords=[]
        verbose=self.debug or outputFormat=="txt"
        for ppt in self.yieldPowerPointFiles(verbose):
            pptSummary=ppt.asDict()
            if verbose:
                print (f"{ppt.summary()}")
            slideSummary=[]
            for slide in self.yieldSlides(ppt,verbose, excludeHiddenSlides, runDelim,slideDetails=slideDetails):
                slideRecord=slide.asDict()
                csvRecord = OrderedDict()
                csvRecord["basename"]=ppt.basename
                csvRecord["page"]=slideRecord["page"]
                csvRecord["name"]=slideRecord["name"]
                title=''.join(slideRecord["title"].split())
                csvRecord["title"]=title
                csvRecords.append(csvRecord)
                slideSummary.append(slideRecord)
            pptSummary["slides"]=slideSummary
            info[ppt.basename]=pptSummary
        if outputFormat=="json":
            #
            # avoid the windows horror story
            # https://stackoverflow.com/questions/9233027/unicodedecodeerror-charmap-codec-cant-decode-byte-x-in-position-y-character
            # https://stackoverflow.com/a/18337754/1497139
            jsonStr=json.dumps(info,indent=2,default=str,ensure_ascii=False).encode('utf8')
            print(jsonStr.decode("utf-8"))
        elif outputFormat=="csv":
            sortedCsvRecords=sorted(csvRecords, key = lambda row: (row["basename"], int(row["page"])))
            csvText=self.asCsv(sortedCsvRecords,["basename","page","name","title"])
            print(csvText)
        elif outputFormat=="lod":
            return info

    def dumpInfoToString(self,outputFormat:str, excludeHiddenSlides:bool=True):
        """
        dump information about the presentations in the given format

        Args:
            outputFormat(str): csv, json or txt
            excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
        """
        f = StringIO()
        with redirect_stdout(f):
            self.dumpInfo(outputFormat, excludeHiddenSlides=excludeHiddenSlides)
        stdout = f.getvalue()
        return stdout

    def findFiles(self,path:str,ext:str)->list:
        '''
        find Files with the given extension in the given path

        Args:
            path(str): the path to start with
            ext(str): the extension to search for

        Returns:
            list: a list of files found
        '''
        foundFiles=[]
        for root, _dirs, files in os.walk(path, topdown=False):
            for name in files:
                if name.endswith(ext) and not name.startswith("~$"):
                    filepath=os.path.join(root, name)
                    foundFiles.append(filepath)
        return foundFiles

`init(rootFolder, debug=False)`

Constructor

Parameters:

Name	Type	Description	Default
`rootFolder(str)`		the path to the root folder of the analysis	required
`debug(bool)`		if True switch on debugging	required

Source code in slides/slidewalker.py

def __init__(self, rootFolder:str, debug:bool=False):
    '''
    Constructor

    Args:
        rootFolder(str): the path to the root folder of the analysis
        debug(bool): if True switch on debugging
    '''
    self.rootFolder=rootFolder
    self.debug=debug

`asCsv(listOfDicts, fieldNames=None)`

convert the given list of dicts to CSV see https://stackoverflow.com/a/9157370/1497139

Parameters:

Name	Type	Description	Default
`listOfDicts(list)`		the table to convert	required

Returns:

Name	Type	Description
`str`	`str`	the CSV formated result

Source code in slides/slidewalker.py

def asCsv(self,listOfDicts:list,fieldNames:list=None)->str:
    ''' convert the given list of dicts to CSV
    see https://stackoverflow.com/a/9157370/1497139

    Args:
        listOfDicts(list): the table to convert

    Returns:
        str: the CSV formated result
    '''
    output=io.StringIO()
    if fieldNames is None:
        fieldNameSet=set()
        for record in listOfDicts:
            for key in record.keys():
                fieldNameSet.add(key)
        fieldNames=list(fieldNameSet)
    writer=csv.DictWriter(output,fieldnames=fieldNames,quoting=csv.QUOTE_NONNUMERIC)
    writer.writeheader()
    for record in listOfDicts:
        writer.writerow(record)
    return output.getvalue()

`dumpInfo(outputFormat, excludeHiddenSlides=False, runDelim=None, slideDetails=False)`

dump information about the lecture in the given format

Parameters:

Name	Description	Default
`outputFormat(str)`	csv, json or txt	required
`excludeHiddenSlides(bool)`	If True hidden lecture will be excluded and also ignored in the page counting	required
`runDelim(str)`	the delimiter to use for powerpoint slide text	required

Source code in slides/slidewalker.py

def dumpInfo(self,outputFormat:str, excludeHiddenSlides:bool=False, runDelim:str=None, slideDetails:bool=False):
    '''
    dump information about the lecture in the given format

    Args:
        outputFormat(str): csv, json or txt
        excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
        runDelim(str): the delimiter to use for powerpoint slide text
    '''
    info={}
    csvRecords=[]
    verbose=self.debug or outputFormat=="txt"
    for ppt in self.yieldPowerPointFiles(verbose):
        pptSummary=ppt.asDict()
        if verbose:
            print (f"{ppt.summary()}")
        slideSummary=[]
        for slide in self.yieldSlides(ppt,verbose, excludeHiddenSlides, runDelim,slideDetails=slideDetails):
            slideRecord=slide.asDict()
            csvRecord = OrderedDict()
            csvRecord["basename"]=ppt.basename
            csvRecord["page"]=slideRecord["page"]
            csvRecord["name"]=slideRecord["name"]
            title=''.join(slideRecord["title"].split())
            csvRecord["title"]=title
            csvRecords.append(csvRecord)
            slideSummary.append(slideRecord)
        pptSummary["slides"]=slideSummary
        info[ppt.basename]=pptSummary
    if outputFormat=="json":
        #
        # avoid the windows horror story
        # https://stackoverflow.com/questions/9233027/unicodedecodeerror-charmap-codec-cant-decode-byte-x-in-position-y-character
        # https://stackoverflow.com/a/18337754/1497139
        jsonStr=json.dumps(info,indent=2,default=str,ensure_ascii=False).encode('utf8')
        print(jsonStr.decode("utf-8"))
    elif outputFormat=="csv":
        sortedCsvRecords=sorted(csvRecords, key = lambda row: (row["basename"], int(row["page"])))
        csvText=self.asCsv(sortedCsvRecords,["basename","page","name","title"])
        print(csvText)
    elif outputFormat=="lod":
        return info

`dumpInfoToString(outputFormat, excludeHiddenSlides=True)`

dump information about the presentations in the given format

Parameters:

Name	Type	Description	Default
`outputFormat(str)`		csv, json or txt	required
`excludeHiddenSlides(bool)`		If True hidden lecture will be excluded and also ignored in the page counting	required

Source code in slides/slidewalker.py

def dumpInfoToString(self,outputFormat:str, excludeHiddenSlides:bool=True):
    """
    dump information about the presentations in the given format

    Args:
        outputFormat(str): csv, json or txt
        excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
    """
    f = StringIO()
    with redirect_stdout(f):
        self.dumpInfo(outputFormat, excludeHiddenSlides=excludeHiddenSlides)
    stdout = f.getvalue()
    return stdout

`findFiles(path, ext)`

find Files with the given extension in the given path

Parameters:

Name	Type	Description	Default
`path(str)`		the path to start with	required
`ext(str)`		the extension to search for	required

Returns:

Name	Type	Description
`list`	`list`	a list of files found

Source code in slides/slidewalker.py

def findFiles(self,path:str,ext:str)->list:
    '''
    find Files with the given extension in the given path

    Args:
        path(str): the path to start with
        ext(str): the extension to search for

    Returns:
        list: a list of files found
    '''
    foundFiles=[]
    for root, _dirs, files in os.walk(path, topdown=False):
        for name in files:
            if name.endswith(ext) and not name.startswith("~$"):
                filepath=os.path.join(root, name)
                foundFiles.append(filepath)
    return foundFiles

`yieldPowerPointFiles(verbose=False)`

generate my power point files

Parameters:

Name	Type	Description	Default
`verbose(bool)`		if True show information about the processing	required

Source code in slides/slidewalker.py

def yieldPowerPointFiles(self,verbose:bool=False):
    """
    generate  my power point files

    Args:
        verbose(bool): if True show information about the processing
    """
    pptxFiles=self.findFiles(self.rootFolder, ".pptx")
    if verbose:
        print(f"found {len(pptxFiles)} powerpoint files")
    for pptxFile in pptxFiles:
        if verbose:
            print(f"Extracting data from {pptxFile}")
        ppt=PPT(pptxFile)
        ppt.open()
        if not ppt.error:
            yield ppt

`yieldSlides(ppt, verbose, excludeHiddenSlides=False, runDelim=None, slideDetails=False)`

yield all slides

Parameters:

Name	Description	Default
`verbose(bool)`	if True print details on stdout	required
`excludeHiddenSlides(bool)`	If True hidden lecture will be excluded and also ignored in the page counting	required
`runDelim(str)`	the delimiter to use for powerpoint slide text	required

Source code in slides/slidewalker.py

def yieldSlides(self,ppt,verbose:bool, excludeHiddenSlides:bool=False, runDelim:str=None,slideDetails:bool=False):
    """
    yield all slides

    Args:
        verbose(bool): if True print details on stdout
        excludeHiddenSlides(bool): If True hidden lecture will be excluded and also ignored in the page counting
        runDelim(str): the delimiter to use for powerpoint slide text
    """
    ppt.getSlides(excludeHiddenSlides=excludeHiddenSlides,runDelim=runDelim)
    for slide in ppt.slides:
        if verbose and slideDetails:
            print(slide.summary())
        yield slide

`YRange`

an Y Range

Source code in slides/slidewalker.py

class YRange():
    '''
    an Y Range
    '''
    def __init__(self,minY=0,maxY=300):
        self.minY=minY
        self.maxY=maxY

    @staticmethod
    def isIn(yRange,y):
        result=y==0 or yRange is None or (y>=yRange.minY and y<=yRange.maxY)
        return result

`main(argv=None)`

main routine

Source code in slides/slidewalker.py

def main(argv=None):
    '''
    main routine
    '''
    if argv is None:
        argv = sys.argv
    program_name = os.path.basename(sys.argv[0])
    program_version_message = f'{program_name} (v{Version.version},{Version.updated})'
    try:
        parser = argparse.ArgumentParser(description='SlideWalker - get meta information for all powerpoint presentations in a certain folder')
        parser.add_argument("-a","--about",help="show about info [default: %(default)s]",action="store_true")
        parser.add_argument("-d", "--debug", dest="debug", action="store_true", help="show debug info")
        parser.add_argument("-f", "--format", default="json", help="output format to create: csv,json or txt (default: %(default)s)")
        parser.add_argument("--includeHidden",action="store_true",help="exclude hidden slides (default: %(default)s)")
        parser.add_argument("--rd","--runDelimiter",dest="runDelim",help="text run delimiter (default: %(default)s) suggested: ＿↵•",default=Slide.defaultRunDelim)
        parser.add_argument("--rootPath",default=".")
        parser.add_argument('-V', '--version', action='version', version=program_version_message)
        args = parser.parse_args(argv[1:])
        if args.about:
            print(program_version_message)
            print(f"see {Version.doc_url}")
            webbrowser.open(Version.doc_url)
        else:
            sw=SlideWalker(args.rootPath,args.debug)
            sw.dumpInfo(args.format,excludeHiddenSlides=not args.includeHidden,runDelim=args.runDelim)

    except KeyboardInterrupt:
        ### handle keyboard interrupt ###
        return 1
    except Exception as e:
        indent = len(program_name) * " "
        sys.stderr.write(program_name + ": " + repr(e) + "\n")
        sys.stderr.write(indent + "  for help use --help")
        if args.debug:
            print(traceback.format_exc())
        return 2

`version`

Created on 2022-04-01

@author: wf

`Version`

Bases: object

Version handling for pysotsog

Source code in slides/version.py

class Version(object):
    '''
    Version handling for pysotsog
    '''
    name="pySemanticSlides"
    description='generate Semantic Mediawiki for a set of powerpoint presentations with semantic annotations' 
    version=slides.__version__
    date = '2023-02-14'
    updated = '2023-02-22'
    authors='Wolfgang Fahl'
    doc_url="https://wiki.bitplan.com/index.php/PySemanticSlides"
    chat_url="https://github.com/WolfgangFahl/pySemanticSlides/discussions"
    cm_url="https://github.com/WolfgangFahl/pySemanticSlides"
    license=f'''Copyright 2020-2023 contributors. All rights reserved.
  Licensed under the Apache License 2.0
  http://www.apache.org/licenses/LICENSE-2.0
  Distributed on an "AS IS" basis without warranties
  or conditions of any kind, either express or implied.'''
    longDescription=f"""{name} version {version}
{description}
  Created by {authors} on {date} last updated {updated}"""

pySemanticSlides API Documentation

doi

DOI dataclass

debug_dump(d)

fetchBibTexDict()

fetchBibtexMeta()

fetchCiteprocMeta()

fetchMeta(headers)

fetchPlainTextBibTexDict()

keyvalue_parser

BaseKeyValueParser

__init__(config)

add_error(error_msg)

getStrippedValues(value_list)

handleErrors(text)

setKeydefs(keydefs)

KeyValueParser

__init__(config)

getKeyValues(text)

setKeydefs(keydefs)

KeyValueParserConfig dataclass

KeyValueSplitParser

getKeyValues(text)

Keydef dataclass

as_dict(keydefs) classmethod

SimpleKeyValueParser

getKeyValues(text)

Split

__init__(delim=',', quote="'", unicode_chars='•→–', keep_quotes=True)

split(text)

semslides

SemSlides

__init__(args)

getArgParser(version_msg) classmethod

main(argv=None)

slidewalker

PPT

__init__(filepath, pageHeight=297)

asDict()

getSlides(excludeHiddenSlides=False, runDelim=None)

open()

summary()

Slide

__init__(ppt, slide, page, pdf_page, runDelim=None)

getLayoutName()

getNotes(yRange=None, useShapes=False)

getText(yRange=None)

getText4Shapes(shapes, yRange, runDelim=None)

SlideWalker

__init__(rootFolder, debug=False)

asCsv(listOfDicts, fieldNames=None)

dumpInfo(outputFormat, excludeHiddenSlides=False, runDelim=None, slideDetails=False)

dumpInfoToString(outputFormat, excludeHiddenSlides=True)

findFiles(path, ext)

yieldPowerPointFiles(verbose=False)

yieldSlides(ppt, verbose, excludeHiddenSlides=False, runDelim=None, slideDetails=False)

YRange

main(argv=None)

version

Version

`doi`

`DOI` `dataclass`

`debug_dump(d)`

`fetchBibTexDict()`

`fetchBibtexMeta()`

`fetchCiteprocMeta()`

`fetchMeta(headers)`

`fetchPlainTextBibTexDict()`

`keyvalue_parser`

`BaseKeyValueParser`

`init(config)`

`add_error(error_msg)`

`getStrippedValues(value_list)`

`handleErrors(text)`

`setKeydefs(keydefs)`

`KeyValueParser`

`init(config)`

`getKeyValues(text)`

`setKeydefs(keydefs)`

`KeyValueParserConfig` `dataclass`

`KeyValueSplitParser`

`getKeyValues(text)`

`Keydef` `dataclass`

`as_dict(keydefs)` `classmethod`

`SimpleKeyValueParser`

`getKeyValues(text)`

`Split`

`init(delim=',', quote="'", unicode_chars='•→–', keep_quotes=True)`

`split(text)`

`semslides`

`SemSlides`

`init(args)`

`getArgParser(version_msg)` `classmethod`

`main(argv=None)`

`slidewalker`

`PPT`

`init(filepath, pageHeight=297)`

`asDict()`

`getSlides(excludeHiddenSlides=False, runDelim=None)`

`open()`

`summary()`

`Slide`

`init(ppt, slide, page, pdf_page, runDelim=None)`

`getLayoutName()`

`getNotes(yRange=None, useShapes=False)`

`getText(yRange=None)`

`getText4Shapes(shapes, yRange, runDelim=None)`

`SlideWalker`

`init(rootFolder, debug=False)`

`asCsv(listOfDicts, fieldNames=None)`

`dumpInfo(outputFormat, excludeHiddenSlides=False, runDelim=None, slideDetails=False)`

`dumpInfoToString(outputFormat, excludeHiddenSlides=True)`

`findFiles(path, ext)`

`yieldPowerPointFiles(verbose=False)`

`yieldSlides(ppt, verbose, excludeHiddenSlides=False, runDelim=None, slideDetails=False)`

`YRange`

`main(argv=None)`

`version`

`Version`