Ken, Here's a sample python program that should make 'foo.nc' from your 'foo.csv'. Just call the function Write_netcdf with whatever input and output filename you want. There's some basic error checking, but more might be needed depending on your data. Also attached is the resultant netcdf file. -Sourish On 3/18/19 2:57 PM, Ken Mankoff wrote: > On 2019-03-18 at 13:12 -0700, Sourish Basu <Sourish.Basu@xxxxxxxxxxxx> > wrote... >> In your example dataset, there are five values for the time >> coordinate. However, the values of x, y, lat, lon, and elev do not >> seem to depend on the values of time. Is this true in general for your >> data? If that's true (while still allowing x, y etc. to vary from year >> to year, or file to file), that makes packaging even simpler. > Correct. There are 6 header rows that *never* change: ID, lon,lat, x,y, elev. > There is 1 index column that is date. Then the data that is a function of > (ID,date) (or ((lon,lat),date), or ((x,y),date)) does change. > > -k.
Attachment:
foo.nc
Description: Cdf file
from netCDF4 import Dataset import numpy as np from datetime import datetime def Read_CSV(file_name): # empty dictionary in which to return everything ret_dict = {'var_names': [], 'var_values': {}, 'time_values': [], 'ret_array': None} # data types for the different 1D arrays (before the time variation starts) data_types = {'ID': np.int32, 'x': np.float32, 'y': np.float32, 'lat': np.float32, 'lon': np.float32, 'elev': np.float32} num_vars = len(data_types.keys()) # read all the lines with open(file_name, 'r') as fid: all_lines = fid.readlines() # read all lines # the next num_vars lines contain scalar arrays, whose names have to be the first column num_vals = None for i in range(num_vars): relevant_line = all_lines[i] key = relevant_line.split(',')[0] values = np.array([float(x) for x in relevant_line.split(',')[1:]], dtype=data_types[key]) ret_dict['var_names'].append(key) ret_dict['var_values'][key] = values # basic check to ensure that all lines have the same number of values if num_vals is None: # this is the first line, so get the record length num_vals = len(values) else: # check if subsequent lines have the same record length if len(values) != num_vals: raise RuntimeError('%s has %i records, expected %i'%(key, len(values), num_vals)) all_lines = all_lines[num_vars:] # all lines henceforth have YYYY-MM-DD (or is it YYYY-DD-MM? can't tell from the provided file) as the first column # coding now assuming YYYY-MM-DD num_times = len(all_lines) ret_dict['ret_array'] = np.zeros((num_times, num_vals), np.float32) for i, line in enumerate(all_lines): time_val = datetime.strptime(line.split(',')[0], '%Y-%m-%d') var_val = np.array([float(x) for x in line.split(',')[1:]], dtype=np.float32) # check if the length of var_val matches the expected record length if len(var_val) != num_vals: raise RuntimeError('Time %s has %i records, expected %i'%(time_val.strftime('%Y-%m-%d'), len(var_val), num_vals)) ret_dict['time_values'].append(time_val) ret_dict['ret_array'][i] = var_val return ret_dict def Write_netcdf(netcdf_file='foo.nc', csv_file='foo.csv'): data = Read_CSV(csv_file) # compression (optional) comp_dict = {'zlib': True, 'shuffle': True, 'complevel': 6} with Dataset(netcdf_file, 'w') as fid: # create the dimensions fid.createDimension('times', None) # unlimited dimension fid.createDimension('record', None) # unlimited dimension fid.createDimension('time_tuple', 3) # write the auxiliary variables for var_name in data['var_names']: var_values = data['var_values'][var_name] v = fid.createVariable(var_name, var_values.dtype, ('record',), **comp_dict) v[:] = var_values # write the time values v = fid.createVariable('date_components', np.int16, ('times', 'time_tuple'), **comp_dict) v[:] = np.array([d.timetuple()[:3] for d in data['time_values']], dtype=np.int16) # now write the 2D array of values v = fid.createVariable('data_values', data['ret_array'].dtype, ('times', 'record'), **comp_dict) v[:] = data['ret_array']
Attachment:
signature.asc
Description: OpenPGP digital signature
netcdfgroup
archives: