Source code for pytplot.importers.cdf_to_tplot

# Copyright 2020 Regents of the University of Colorado. All Rights Reserved.
# Released under the MIT license.
# This software was developed at the University of Colorado's Laboratory for
# Atmospheric and Space Physics.
# Verify current version before use at: https://github.com/MAVENSDC/PyTplot

import cdflib

# If the user has astropy installed, use the cdflib's CDFAstropy class for time conversion
# (Converting to unix time is much, much faster this way)
try:
    from cdflib.epochs_astropy import CDFAstropy as cdfepoch
except:
    from cdflib.epochs import CDFepoch as cdfepoch

import re
import numpy as np
import xarray as xr
from pytplot.store_data import store_data
from pytplot.tplot import tplot
from pytplot.options import options
import pytplot
import copy


[docs]def cdf_to_tplot(filenames, varformat=None, get_support_data=False, get_metadata=False, get_ignore_data=False, string_encoding='ascii', prefix='', suffix='', plot=False, merge=False, center_measurement=False, notplot=False, varnames=[]): """ This function will automatically create tplot variables from CDF files. In general, the files should be ISTP compliant for this importer to work. Each variable is read into a new tplot variable (a.k.a an xarray DataArray), and all associated file/variable metadata is read into the attrs dictionary. .. note:: Variables must have an attribute named "VAR_TYPE". If the attribute entry is "data" (or "support_data"), then they will be added as tplot variables. Additionally, data variables should have attributes named "DEPEND_TIME" or "DEPEND_0" that describes which variable is x axis. If the data is 2D, then an attribute "DEPEND_1" must describe which variable contains the secondary axis. Parameters: filenames : str/list of str The file names and full paths of CDF files. varformat : str The file variable formats to load into tplot. Wildcard character "*" is accepted. By default, all variables are loaded in. get_support_data: bool Data with an attribute "VAR_TYPE" with a value of "support_data" will be loaded into tplot. By default, only loads in data with a "VAR_TYPE" attribute of "data". prefix: str The tplot variable names will be given this prefix. By default, no prefix is added. suffix: str The tplot variable names will be given this suffix. By default, no suffix is added. plot: bool The data is plotted immediately after being generated. All tplot variables generated from this function will be on the same plot. merge: bool If True, then data from different cdf files will be merged into a single pytplot variable. get_ignore_data: bool Data with an attribute "VAR_TYPE" with a value of "ignore_data" will be loaded into tplot. By default, only loads in data with a "VAR_TYPE" attribute of "data". center_measurement: bool If True, the CDF epoch variables are time-shifted to the middle of the accumulation interval by their DELTA_PLUS_VAR and DELTA_MINUS_VAR variable attributes notplot: bool If True, then data are returned in a hash table instead of being stored in tplot variables (useful for debugging, and access to multi-dimensional data products) varnames: str or list of str Load these variables only. If [] or ['*'], then load everything. Returns: List of tplot variables created (unless notplot keyword is used). """ stored_variables = [] epoch_cache = {} output_table = {} metadata = {} if not isinstance(varnames, list): varnames = [varnames] if len(varnames) > 0: if '*' in varnames: varnames = [] # pytplot.data_quants = {} if isinstance(filenames, str): filenames = [filenames] elif isinstance(filenames, list): filenames = filenames else: print("Invalid filenames input.") return stored_variables var_type = ['data'] if varformat is None: varformat = ".*" if get_support_data: var_type.append('support_data') if get_metadata: var_type.append('metadata') if get_ignore_data: var_type.append('ignore_data') varformat = varformat.replace("*", ".*") var_regex = re.compile(varformat) filenames.sort() for filename in filenames: cdf_file = cdflib.CDF(filename) cdf_file.string_encoding = string_encoding cdf_info = cdf_file.cdf_info() all_cdf_variables = cdf_info['rVariables'] + cdf_info['zVariables'] # User defined variables. if len(varnames) > 0: load_cdf_variables = [value for value in varnames if value in all_cdf_variables] else: load_cdf_variables = all_cdf_variables try: gatt = cdf_file.globalattsget() except: gatt={} for var in load_cdf_variables: if not re.match(var_regex, var): continue var_atts = cdf_file.varattsget(var) if 'VAR_TYPE' in var_atts: this_var_type = var_atts['VAR_TYPE'].lower() elif 'PARAMETER_TYPE' in var_atts: this_var_type = var_atts['PARAMETER_TYPE'].lower() else: # 'VAR_TYPE' and 'PARAMETER_TYPE' not found in the variable attributes continue if this_var_type in var_type: var_atts = cdf_file.varattsget(var) var_properties = cdf_file.varinq(var) # Find data name and if it is already in stored variables if 'TPLOT_NAME' in var_atts: var_name = prefix + var_atts['TPLOT_NAME'] + suffix else: var_name = prefix + var + suffix if "DEPEND_TIME" in var_atts: x_axis_var = var_atts["DEPEND_TIME"] elif "DEPEND_0" in var_atts: x_axis_var = var_atts["DEPEND_0"] else: # non-record varying variables (NRVs) # added by egrimes, 13Jan2021 # here we assume if there isn't a DEPEND_TIME or DEPEND_0, there are no other depends try: ydata = cdf_file.varget(var) except: continue if ydata is None: continue # since NRVs don't vary with time, they shouldn't vary across files output_table[var_name] = {'y': ydata} continue data_type_description \ = cdf_file.varinq(x_axis_var)['Data_Type_Description'] if epoch_cache.get(filename+x_axis_var) is None: delta_plus_var = 0.0 delta_minus_var = 0.0 delta_time = 0.0 # Skip variables with ValueErrors. try: xdata = cdf_file.varget(x_axis_var) epoch_var_atts = cdf_file.varattsget(x_axis_var) except ValueError: continue # check for DELTA_PLUS_VAR/DELTA_MINUS_VAR attributes if center_measurement: if 'DELTA_PLUS_VAR' in epoch_var_atts: delta_plus_var = cdf_file.varget(epoch_var_atts['DELTA_PLUS_VAR']) delta_plus_var_att = cdf_file.varattsget(epoch_var_atts['DELTA_PLUS_VAR']) # check if a conversion to seconds is required if 'SI_CONVERSION' in delta_plus_var_att: si_conv = delta_plus_var_att['SI_CONVERSION'] delta_plus_var = delta_plus_var.astype(float)*np.float(si_conv.split('>')[0]) elif 'SI_CONV' in delta_plus_var_att: si_conv = delta_plus_var_att['SI_CONV'] delta_plus_var = delta_plus_var.astype(float)*np.float(si_conv.split('>')[0]) if 'DELTA_MINUS_VAR' in epoch_var_atts: delta_minus_var = cdf_file.varget(epoch_var_atts['DELTA_MINUS_VAR']) delta_minus_var_att = cdf_file.varattsget(epoch_var_atts['DELTA_MINUS_VAR']) # check if a conversion to seconds is required if 'SI_CONVERSION' in delta_minus_var_att: si_conv = delta_minus_var_att['SI_CONVERSION'] delta_minus_var = delta_minus_var.astype(float)*np.float(si_conv.split('>')[0]) elif 'SI_CONV' in delta_minus_var_att: si_conv = delta_minus_var_att['SI_CONV'] delta_minus_var = delta_minus_var.astype(float)*np.float(si_conv.split('>')[0]) # sometimes these are specified as arrays if isinstance(delta_plus_var, np.ndarray) and isinstance(delta_minus_var, np.ndarray): delta_time = (delta_plus_var-delta_minus_var)/2.0 else: # and sometimes constants if delta_plus_var != 0.0 or delta_minus_var != 0.0: delta_time = (delta_plus_var-delta_minus_var)/2.0 if epoch_cache.get(filename + x_axis_var) is None: if ('CDF_TIME' in data_type_description) or \ ('CDF_EPOCH' in data_type_description): xdata = cdfepoch.unixtime(xdata) epoch_cache[filename+x_axis_var] = np.array(xdata)+delta_time else: xdata = epoch_cache[filename + x_axis_var] try: ydata = cdf_file.varget(var) except: continue if ydata is None: continue if "FILLVAL" in var_atts: if (var_properties['Data_Type_Description'] == 'CDF_FLOAT' or var_properties['Data_Type_Description'] == 'CDF_REAL4' or var_properties['Data_Type_Description'] == 'CDF_DOUBLE' or var_properties['Data_Type_Description'] == 'CDF_REAL8'): if ydata[ydata == var_atts["FILLVAL"]].size != 0: ydata[ydata == var_atts["FILLVAL"]] = np.nan elif var_properties['Data_Type_Description'][:7] == 'CDF_INT': # NaN is only valid for floating point data # but we still need to handle FILLVAL's for # integer data, so we'll just set those to 0 ydata[ydata == var_atts["FILLVAL"]] = 0 tplot_data = {'x': xdata, 'y': ydata} # Data may depend on other data in the CDF. depend_1 = None depend_2 = None depend_3 = None if "DEPEND_1" in var_atts: if var_atts["DEPEND_1"] in all_cdf_variables: depend_1 = np.array(cdf_file.varget(var_atts["DEPEND_1"])) # Ignore the depend types if they are strings if depend_1.dtype.type is np.str_: depend_1 = None if "DEPEND_2" in var_atts: if var_atts["DEPEND_2"] in all_cdf_variables: depend_2 = np.array(cdf_file.varget(var_atts["DEPEND_2"])) # Ignore the depend types if they are strings if depend_2.dtype.type is np.str_: depend_2 = None if "DEPEND_3" in var_atts: if var_atts["DEPEND_3"] in all_cdf_variables: depend_3 = np.array(cdf_file.varget(var_atts["DEPEND_3"])) # Ignore the depend types if they are strings if depend_3.dtype.type is np.str_: depend_3 = None nontime_varying_depends = [] if depend_1 is not None and depend_2 is not None and depend_3 is not None: tplot_data['v1'] = depend_1 tplot_data['v2'] = depend_2 tplot_data['v3'] = depend_3 if len(depend_1.shape) == 1: nontime_varying_depends.append('v1') if len(depend_2.shape) == 1: nontime_varying_depends.append('v2') if len(depend_3.shape) == 1: nontime_varying_depends.append('v3') elif depend_1 is not None and depend_2 is not None: tplot_data['v1'] = depend_1 tplot_data['v2'] = depend_2 if len(depend_1.shape) == 1: nontime_varying_depends.append('v1') if len(depend_2.shape) == 1: nontime_varying_depends.append('v2') elif depend_1 is not None: tplot_data['v'] = depend_1 if len(depend_1.shape) == 1: nontime_varying_depends.append('v') elif depend_2 is not None: tplot_data['v'] = depend_2 if len(depend_2.shape) == 1: nontime_varying_depends.append('v') metadata[var_name] = {'display_type': var_atts.get("DISPLAY_TYPE", "time_series"), 'scale_type': var_atts.get("SCALE_TYP", "linear"), 'var_attrs': var_atts, 'file_name': filename, 'global_attrs': gatt} # Check if the variable already exists in the for loop output if var_name not in output_table: output_table[var_name] = tplot_data else: # If it does, loop though the existing variable's x,y,v,v2,v3,etc var_data = output_table[var_name] for output_var in var_data: if output_var not in nontime_varying_depends: if np.asarray(tplot_data[output_var]).ndim == 0 and np.equal(tplot_data[output_var], None): # If there is nothing in the new variable, then pass pass elif np.asarray(var_data[output_var]).ndim == 0 and np.equal(var_data[output_var], None): # If there is nothing in the old variable, then replace var_data[output_var] = tplot_data[output_var] else: # If they both have something, then concatenate var_data[output_var] = np.concatenate((var_data[output_var], tplot_data[output_var])) if notplot: return output_table for var_name in output_table.keys(): to_merge = False if var_name in pytplot.data_quants.keys() and merge: prev_data_quant = pytplot.data_quants[var_name] to_merge = True try: attr_dict = {} if metadata.get(var_name) is not None: attr_dict["CDF"] = {} attr_dict["CDF"]["VATT"] = metadata[var_name]['var_attrs'] attr_dict["CDF"]["GATT"] = metadata[var_name]['global_attrs'] attr_dict["CDF"]["FILENAME"] = metadata[var_name]['file_name'] # extract the coordinate system, if available vatt_keys = list(attr_dict["CDF"]["VATT"].keys()) vatt_lower = [k.lower() for k in vatt_keys] if 'coordinate_system' in vatt_lower: attr_dict['data_att'] = {'coord_sys': attr_dict["CDF"]["VATT"][vatt_keys[vatt_lower.index('coordinate_system')]]} store_data(var_name, data=output_table[var_name], attr_dict=attr_dict) except ValueError: continue if var_name not in stored_variables: stored_variables.append(var_name) if metadata.get(var_name) is not None: if metadata[var_name]['display_type'] == "spectrogram": options(var_name, 'spec', 1) if metadata[var_name]['scale_type'] == 'log': options(var_name, 'ylog', 1) if metadata[var_name].get('var_attrs') is not None: if metadata[var_name]['var_attrs'].get('LABLAXIS') is not None: options(var_name, 'ytitle', metadata[var_name]['var_attrs']['LABLAXIS']) if metadata[var_name]['var_attrs'].get('UNITS') is not None: if metadata[var_name]['display_type'] == 'spectrogram': options(var_name, 'ztitle', '[' + metadata[var_name]['var_attrs']['UNITS'] + ']') else: options(var_name, 'ysubtitle', '[' + metadata[var_name]['var_attrs']['UNITS'] + ']') # Gather up all options in the variable attribute section, toss them into options and see what sticks options(var_name, opt_dict=metadata[var_name]['var_attrs']) if to_merge is True: cur_data_quant = pytplot.data_quants[var_name] plot_options = copy.deepcopy(pytplot.data_quants[var_name].attrs) pytplot.data_quants[var_name] = xr.concat([prev_data_quant, cur_data_quant], dim='time').sortby('time') pytplot.data_quants[var_name].attrs = plot_options if notplot: return output_table if plot: tplot(stored_variables) return stored_variables