Source code for cdm.table_writer.table_writer

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 11 13:45:38 2019

Exports tables written in the C3S Climate Data Store Common Data Model (CDM) format to ascii files,
The tables format is contained in a python dictionary, stored as an attribute in a pandas.DataFrame
(or pd.io.parsers.TextFileReader).

This module uses a set of printer functions to "print" element values to a
string object before exporting them to a final ascii file.

Each of the CDM table element's has a data type (pseudo-sql as defined in the CDM documentation) which defines
which printer function needs to be used.

Numeric data types are printed with an specific number of decimal places, defined in the data element attributes. This
can vary according to each CDM, element, imodel and mapping .json file. If this is not defined in the input attributes
of the imodel, the number of decimal places used comes from a default tool defined in properties.py

@author: iregon
"""

import os
import pandas as pd
import numpy as np
from io import StringIO
from cdm import properties
from cdm.common import pandas_TextParser_hdlr
from cdm.common import logging_hdlr

[docs]module_path = os.path.dirname(os.path.abspath(__file__))
#TODO: tell this to dave and delete them... put error messages in fuctions above
[docs]printers = {'int': print_integer, 'numeric': print_float, 'varchar': print_varchar, 'timestamp with timezone': print_datetime, 'int[]': print_integer_array, 'numeric[]': print_float_array, 'varchar[]': print_varchar_array, 'timestamp with timezone[]': print_datetime_array}
[docs]iprinters_kwargs = {'numeric': ['decimal_places'], 'numeric[]': ['decimal_places']}
[docs]def table_to_ascii(table, table_atts, delimiter='|', null_label='null', cdm_complete=True, filename=None, full_table=True, log_level='INFO'): """ Exports a cdm table to an ascii file. Exports tables written in the C3S Climate Data Store Common Data Model (CDM) format to ascii files. The tables format is contained in a python dictionary, stored as an attribute in a ``pandas.DataFrame`` (or ``pd.io.parsers.TextFileReader``). Parameters ---------- table: pandas.Dataframe to export table_atts: attributes of the pandas.Dataframe stored as a python dictionary. This contains all element names, characteristics and types encoding, as well as other characteristics e.g. decimal places, etc. delimiter: default '|' null_label: specified how nan are represented cdm_complete: if we export the entire set of tables. default is ``True`` filename: the name of the file to stored the data full_table: if we export a single table log_level: level of logging information to be saved Returns ------- Saves cdm tables as ascii files """ logger = logging_hdlr.init_logger(__name__, level=log_level) empty_table = False if 'observation_value' in table: table.dropna(subset=['observation_value'], inplace=True) empty_table = True if len(table) == 0 else False elif 'observation_value' in table_atts.keys(): empty_table = True else: empty_table = True if len(table) == 0 else False if empty_table: logger.warning('No observation values in table') ascii_table = pd.DataFrame(columns=table_atts.keys(), dtype='object') ascii_table.to_csv(filename, index=False, sep=delimiter, header=True, mode='w') return ascii_table = pd.DataFrame(index=table.index, columns=table_atts.keys(), dtype='object') for iele in table_atts.keys(): if iele in table: itype = table_atts.get(iele).get('data_type') if printers.get(itype): iprinter_kwargs = iprinters_kwargs.get(itype) if iprinter_kwargs: kwargs = {x: table_atts.get(iele).get(x) for x in iprinter_kwargs} else: kwargs = {} ascii_table[iele] = printers.get(itype)(table[iele], null_label, **kwargs) else: logger.error('No printer defined for element {}'.format(iele)) else: ascii_table[iele] = null_label header = True wmode = 'w' columns_to_ascii = [x for x in table_atts.keys() if x in table.columns] if not cdm_complete else table_atts.keys() ascii_table.to_csv(filename, index=False, sep=delimiter, columns=columns_to_ascii, header=header, mode=wmode) # # Convert to iterable if plain dataframe # # This is no longer needed as the mapper now only produces real dataframes, # # never TextParser... # if isinstance(table,pd.DataFrame): # table = [table] # ichunk = 0 # for itable in table: # # drop records with no 'observation_value' # empty_table = False # if 'observation_value' in itable: # itable.dropna(subset=['observation_value'],inplace=True) # empty_table = True if len(itable) == 0 else False # elif 'observation_value' in table_atts.keys(): # empty_table = True # if empty_table: # logger.warning('No observation values in table') # ascii_table = pd.DataFrame(columns = table_atts.keys(), dtype = 'object') # ascii_table.to_csv(filename, index = False, sep = delimiter, header = True, mode = 'w') # break # ascii_table = pd.DataFrame(index = itable.index, columns = table_atts.keys(), dtype = 'object') # for iele in table_atts.keys(): # if iele in itable: # itype = table_atts.get(iele).get('data_type') # if printers.get(itype): # iprinter_kwargs = iprinters_kwargs.get(itype) # if iprinter_kwargs: # kwargs = { x:table_atts.get(iele).get(x) for x in iprinter_kwargs} # else: # kwargs = {} # ascii_table[iele] = printers.get(itype)(itable[iele], null_label, **kwargs) # else: # logger.error('No printer defined for element {}'.format(iele)) # else: # ascii_table[iele] = null_label # # header = False if ichunk > 0 else True # wmode = 'a' if ichunk > 0 else 'w' # columns_to_ascii = [ x for x in table_atts.keys() if x in itable.columns ] # if not cdm_complete else table_atts.keys() # ascii_table.to_csv(filename, index = False, sep = delimiter, # columns = columns_to_ascii, header = header, mode = wmode) # ichunk += 1 return
[docs]def cdm_to_ascii(cdm, delimiter='|', null_label='null', cdm_complete=True, extension='psv', out_dir=None, suffix=None, prefix=None, log_level='INFO'): """ Exports a complete cdm file with multiple tables to an ascii file. Exports a complete cdm file with multiple tables written in the C3S Climate Data Store Common Data Model (CDM) format to ascii files. The tables format is contained in a python dictionary, stored as an attribute in a ``pandas.DataFrame`` (or ``pd.io.parsers.TextFileReader``). Parameters ---------- cdm: common data model tables to export delimiter: default '|' null_label: specified how nan are represented cdm_complete: extract the entire cdm file extension: default 'psv' out_dir: where to stored the ascii file suffix: file suffix prefix: file prefix log_level: level of logging information Returns ------- Saves the cdm tables as ascii files in the given directory with a psv extension. """ logger = logging_hdlr.init_logger(__name__, level=log_level) # Because how the printers are written, they modify the original data frame!, # also removing rows with empty observation_value in observation_tables extension = '.' + extension for table in cdm.keys(): logger.info('Printing table {}'.format(table)) filename = '-'.join(filter(bool, [prefix, table, suffix])) + extension filepath = filename if not out_dir else os.path.join(out_dir, filename) table_to_ascii(cdm[table]['data'], cdm[table]['atts'], delimiter=delimiter, null_label=null_label, cdm_complete=cdm_complete, filename=filepath, log_level=log_level) return