Source code for mdf_reader.data_models.code_tables

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""

This module has functions to manage data model
code table files and objects according to the
requirements of the data reader tool

"""

import sys
import json
import datetime
import numpy as np
import pandas as pd
import os
import glob
import shutil
from copy import deepcopy
from pandas.io.json.normalize import nested_to_record
import ast

#https://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values
#def print_nested(d):
#    if isinstance(d, dict):
#        for k, v in d.items():
#            print_nested(v)
#    elif hasattr(d, '__iter__') and not isinstance(d, str):
#        for item in d:
#            print_nested(item)
#    elif isinstance(d, str):
#        print(d)
#
#    else:
#        print(d)

[docs]toolPath = os.path.dirname(os.path.abspath(__file__))
[docs]table_lib = os.path.join(toolPath,'lib')
[docs]templates_path = os.path.join(table_lib,'templates','code_tables')


[docs]def read_table(table_path):
    """

    Reads a data model code table file to a dictionary.
    It completes the code table to the full complexity
    the data reader expects, by appending information
    on secondary keys and expanding range keys.
    
    Arguments
    ---------
    table_path : str
        The file path of the code table.

    Returns
    -------
    dict
        Code table

    """
    
    with open(table_path) as fileObj:
        table = json.load(fileObj)
    # Add keys for nested code tables    
    keys_path = ".".join([".".join(table_path.split('.')[:-1]),'keys'])
    if os.path.isfile(keys_path):
        with open(keys_path) as fileObj:
            table_keys = json.load(fileObj)
            table['_keys'] = {}
            for x,y in table_keys.items():
                key = eval_dict_items(x)
                values = [ eval_dict_items(k) for k in y ]
                table['_keys'][key] = values
    # Expand range keys            
    expand_integer_range_key(table)
    
    return table

[docs]def templates():
    """

    Lists the name of the available code table templates

    Returns
    -------
    list
        Code table template aliases

    """
    
    tables = glob.glob(os.path.join(templates_path,'*.json'))
    return [ os.path.basename(x).split(".")[0] for x in tables ]

[docs]def copy_template(table, out_dir = None,out_path = None):
    """

    Copies a code table template to an output
    file or path
    
    Parameters
    ----------
    table : str
        Code table template name to copy
        
    Keyword Arguments
    -----------------
    out_dir : dict, opt
        Directory to copy code table file template to
    out_path : dict, opt
        Full filename to copy code table file template to
    
    Either out_dir or out_path must be provided


    """
    
    tables = templates()
    if table in tables:
        table_path = os.path.join(templates_path,table + '.json')
        table_out = out_path if out_path else os.path.join(out_dir,table + '.json')
        shutil.copyfile(table_path,  table_out)
        if os.path.isfile( table_out):
            print('Schema template {0} copied to {1}'.format(table, table_out))
            return
        else:
            print('copy_template ERROR:')
            print('\tError copying table template {0} copied to {1}'.format(table, table_out))
            return
    else:
        print('copy_template ERROR:')
        print('\tRequested template {} must be a valid name.'.format(table))
        print('\tValid names are: {}'.format(", ".join(tables)))
        return

[docs]def expand_integer_range_key(d):
    # Looping based on print_nested above
    if isinstance(d, dict):
        for k,v in list(d.items()):
            if 'range_key' in k[0:9]:
                range_params = k[10:-1].split(",")
                try:
                    lower = int(range_params[0])
                except Exception as e:
                    print("Lower bound parsing error in range key: ",k)
                    print("Error is:")
                    print(e)
                    return
                try:
                    upper = int(range_params[1])
                except Exception as e:
                    if range_params[1] == 'yyyy':
                        upper = datetime.date.today().year
                    else:
                        print("Upper bound parsing error in range key: ",k)
                        print("Error is:")
                        print(e)
                        return
                if len(range_params) > 2:
                    try:
                        step = int(range_params[2])
                    except Exception as e:
                        print("Range step parsing error in range key: ",k)
                        print("Error is:")
                        print(e)
                        return
                else:
                    step = 1
                for i_range in range(lower,upper + 1,step):
                    deep_copy_value = deepcopy(d[k]) # Otherwiserepetitions are linked and act as one!
                    d.update({str(i_range):deep_copy_value})
                d.pop(k, None)
            else:
                for k, v in d.items():
                    expand_integer_range_key(v)


[docs]def eval_dict_items(item):
    try:
        return ast.literal_eval(item)
    except:
        return item

[docs]def table_keys(table):
    separator = '∿' # something hopefully not in keys...
    if table.get('_keys'):
        _table = deepcopy(table)
        _table.pop('_keys')
        keys = list(nested_to_record(_table,sep = separator).keys())

        return [ x.split(separator) for x in keys ]
    else:
        return list(table.keys())


[docs]def get_nested(table,*args):
    # HERE HAVE TO ADD WHICH ITEM TO GET FROM THE KEY: WE HAVE TO ADD VALUE, LOWER, ETC...TO THE CODE TABLES!!!
    # CAN BE AND OPTIONAL PARAMETER, LIKE: similarly, would have to add tbis to table_value_from_keys
#    def get_nested(table,param = None,*args):
#       nested_get_str = 'table'
#       z = np.array([*args])
#       for i,x in enumerate(z):
#           nested_get_str += '.get(z[' + str(i) + '])'
#       if param:
#           nested_get_str += '.get(' + param + ')'
#       try:
#           return eval(nested_get_str)
#       except:
#           return None
    nested_get_str = 'table'
    z = np.array([*args])
    for i,x in enumerate(z):
        nested_get_str += '.get(z[' + str(i) + '])'
    try:
        return eval(nested_get_str)
    except:
        return None

[docs]def table_value_from_keys(table,df):
    # df is pd.DataFrame or Series
    v_nested_get = np.vectorize(get_nested) # Because cannot directly vectorize a nested get, we build it in a function, and then vectorize it
    calling_str = 'v_nested_get(table'
    if isinstance(df, pd.DataFrame):
        #return v_nested_get(table,[ df[x]  for x in df]) # This won't work
        for i,x in enumerate(df.columns):
            calling_str += ',df[' + str(x) + '].astype(str)' # have to do likewise in not DataFrame!!!
        calling_str += ')'
        return eval(calling_str)
    else:
        return v_nested_get(table,df)