Source code for mdf_reader.common.decoders


import numpy as np
import pandas as pd
import string

from .. import properties


#for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None):
#    print(modname.split(".")[-1])
# TO DECODE FROM OBJECT TO INTEGER
#
# Decodes input object type pd.series to a specified data type
#
# On missing data, the resulting DATA type in numerics will be as integer promotion to accomodate np.nan:
# Promotion dtype for storing NAs: integer	cast to float64
# (https://pandas.pydata.org/pandas-docs/version/0.22/gotchas.html#nan-integer-na-values-and-na-type-promotions)
#
# return base10.astype(self.dtype, casting = 'safe')
# safe casting specified, otherwise converts np.nan to some number depending on dtype.

[docs]def signed_overpunch_i(x): # Blanks and np.nan as missing data # In TDF-11, mix of overpunch and no overpunch: include integers in dictionary # Define decoding dictionary: should do this smart-like: None where non-existing keys!!!! overpunch_number = { string.digits[i]:str(i) for i in range(0,10)} overpunch_number.update({ string.ascii_uppercase[i]:str(i+1) for i in range(0,9)}) overpunch_number.update({ string.ascii_uppercase[i]:str(i-8) for i in range(9,18)}) overpunch_number.update({'{':str(0)}) overpunch_number.update({'<':str(0)}) overpunch_number.update({'}':str(0)}) overpunch_number.update({'!':str(0)}) overpunch_factor = { string.digits[i]:1 for i in range(0,10)} overpunch_factor.update({ string.ascii_uppercase[i]:1 for i in range(0,9)}) overpunch_factor.update({ string.ascii_uppercase[i]:-1 for i in range(9,18)}) overpunch_factor.update({'}':-1}) overpunch_factor.update({'!':-1}) overpunch_factor.update({'{':1}) overpunch_factor.update({'<':1}) try: n = "".join(list(map(lambda x: overpunch_number.get(x,np.nan), list(x) ))) if x==x else np.nan f = np.prod(list(map(lambda x: overpunch_factor.get(x,np.nan), list(x) ))) if x==x else np.nan converted = f*int(n) if f and n and n == n and f == f else np.nan return converted except Exception as e: print('ERROR decoding element: {}'.format(x)) print(e) print('Conversion sequence:') try: print('number base conversion: {}'.format(n)) except: pass try: print('factor conversion: {}'.format(f)) except: pass return np.nan
[docs]class df_decoders(): def __init__(self, dtype): # Return as object, conversion to actual type in converters only! self.dtype = 'object'
[docs] def signed_overpunch(self, data ): decoded_numeric = np.vectorize(signed_overpunch_i,otypes=[float])(data) return pd.Series(decoded_numeric,dtype = self.dtype)
[docs] def base36(self, data): # Caution: int(str(np.nan),36) ==> 30191 base10 = [ str(int(str(i), 36)) if i == i and i else np.nan for i in data ] return pd.Series(base10,dtype = self.dtype)
[docs]decoders = dict()
decoders['signed_overpunch'] = dict() for dtype in properties.numeric_types: decoders['signed_overpunch'][dtype] = df_decoders(dtype).signed_overpunch decoders['signed_overpunch']['key'] = df_decoders('key').signed_overpunch decoders['base36'] = dict() for dtype in properties.numeric_types: decoders['base36'][dtype] = df_decoders(dtype).base36 decoders['base36']['key'] = df_decoders('key').base36 ## Now add the file format specific decoders #import pkgutil #import importlib #from mdf_reader import fs_decoders #package=fs_decoders #for importer, modname, ispkg in pkgutil.walk_packages(path=package.__path__,prefix=package.__name__+'.',onerror=lambda x: None): # file_format = modname.split(".")[-1] # try: # file_format_decoders = importlib.import_module(modname, package=None).decoders # for decoder in file_format_decoders.keys(): # decoders[".".join([file_format,decoder])] = file_format_decoders.get(decoder) # except Exception as e: # logging.error("Error loading {0} decoders: {1}".format(modname,e)) #