Source code for mdf_reader.data_models.code_tables

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""

This module has functions to manage data model
code table files and objects according to the
requirements of the data reader tool

"""

import sys
import json
import datetime
import numpy as np
import pandas as pd
import os
import glob
import shutil
from copy import deepcopy
from pandas.io.json.normalize import nested_to_record
import ast

#https://stackoverflow.com/questions/10756427/loop-through-all-nested-dictionary-values
#def print_nested(d):
#    if isinstance(d, dict):
#        for k, v in d.items():
#            print_nested(v)
#    elif hasattr(d, '__iter__') and not isinstance(d, str):
#        for item in d:
#            print_nested(item)
#    elif isinstance(d, str):
#        print(d)
#
#    else:
#        print(d)

[docs]toolPath = os.path.dirname(os.path.abspath(__file__))
[docs]table_lib = os.path.join(toolPath,'lib')
[docs]templates_path = os.path.join(table_lib,'templates','code_tables')
[docs]def read_table(table_path): """ Reads a data model code table file to a dictionary. It completes the code table to the full complexity the data reader expects, by appending information on secondary keys and expanding range keys. Arguments --------- table_path : str The file path of the code table. Returns ------- dict Code table """ with open(table_path) as fileObj: table = json.load(fileObj) # Add keys for nested code tables keys_path = ".".join([".".join(table_path.split('.')[:-1]),'keys']) if os.path.isfile(keys_path): with open(keys_path) as fileObj: table_keys = json.load(fileObj) table['_keys'] = {} for x,y in table_keys.items(): key = eval_dict_items(x) values = [ eval_dict_items(k) for k in y ] table['_keys'][key] = values # Expand range keys expand_integer_range_key(table) return table
[docs]def templates(): """ Lists the name of the available code table templates Returns ------- list Code table template aliases """ tables = glob.glob(os.path.join(templates_path,'*.json')) return [ os.path.basename(x).split(".")[0] for x in tables ]
[docs]def copy_template(table, out_dir = None,out_path = None): """ Copies a code table template to an output file or path Parameters ---------- table : str Code table template name to copy Keyword Arguments ----------------- out_dir : dict, opt Directory to copy code table file template to out_path : dict, opt Full filename to copy code table file template to Either out_dir or out_path must be provided """ tables = templates() if table in tables: table_path = os.path.join(templates_path,table + '.json') table_out = out_path if out_path else os.path.join(out_dir,table + '.json') shutil.copyfile(table_path, table_out) if os.path.isfile( table_out): print('Schema template {0} copied to {1}'.format(table, table_out)) return else: print('copy_template ERROR:') print('\tError copying table template {0} copied to {1}'.format(table, table_out)) return else: print('copy_template ERROR:') print('\tRequested template {} must be a valid name.'.format(table)) print('\tValid names are: {}'.format(", ".join(tables))) return
[docs]def expand_integer_range_key(d): # Looping based on print_nested above if isinstance(d, dict): for k,v in list(d.items()): if 'range_key' in k[0:9]: range_params = k[10:-1].split(",") try: lower = int(range_params[0]) except Exception as e: print("Lower bound parsing error in range key: ",k) print("Error is:") print(e) return try: upper = int(range_params[1]) except Exception as e: if range_params[1] == 'yyyy': upper = datetime.date.today().year else: print("Upper bound parsing error in range key: ",k) print("Error is:") print(e) return if len(range_params) > 2: try: step = int(range_params[2]) except Exception as e: print("Range step parsing error in range key: ",k) print("Error is:") print(e) return else: step = 1 for i_range in range(lower,upper + 1,step): deep_copy_value = deepcopy(d[k]) # Otherwiserepetitions are linked and act as one! d.update({str(i_range):deep_copy_value}) d.pop(k, None) else: for k, v in d.items(): expand_integer_range_key(v)
[docs]def eval_dict_items(item): try: return ast.literal_eval(item) except: return item
[docs]def table_keys(table): separator = '∿' # something hopefully not in keys... if table.get('_keys'): _table = deepcopy(table) _table.pop('_keys') keys = list(nested_to_record(_table,sep = separator).keys()) return [ x.split(separator) for x in keys ] else: return list(table.keys())
[docs]def get_nested(table,*args): # HERE HAVE TO ADD WHICH ITEM TO GET FROM THE KEY: WE HAVE TO ADD VALUE, LOWER, ETC...TO THE CODE TABLES!!! # CAN BE AND OPTIONAL PARAMETER, LIKE: similarly, would have to add tbis to table_value_from_keys # def get_nested(table,param = None,*args): # nested_get_str = 'table' # z = np.array([*args]) # for i,x in enumerate(z): # nested_get_str += '.get(z[' + str(i) + '])' # if param: # nested_get_str += '.get(' + param + ')' # try: # return eval(nested_get_str) # except: # return None nested_get_str = 'table' z = np.array([*args]) for i,x in enumerate(z): nested_get_str += '.get(z[' + str(i) + '])' try: return eval(nested_get_str) except: return None
[docs]def table_value_from_keys(table,df): # df is pd.DataFrame or Series v_nested_get = np.vectorize(get_nested) # Because cannot directly vectorize a nested get, we build it in a function, and then vectorize it calling_str = 'v_nested_get(table' if isinstance(df, pd.DataFrame): #return v_nested_get(table,[ df[x] for x in df]) # This won't work for i,x in enumerate(df.columns): calling_str += ',df[' + str(x) + '].astype(str)' # have to do likewise in not DataFrame!!! calling_str += ')' return eval(calling_str) else: return v_nested_get(table,df)