Since things are a little slow in this mail group, I thought
I'd post a little program written by Chuck Denham
that some of you may find useful.
tbl2cdf scans an ASCII flat file, and creates UNLIMITED dimensioned
netCDF variables from the data in each column. The variable name is
determined from the column heading, or optionally, from a header file.
The variable type is automatically deterimined to be the most general
type that describes the data, or may be specified by the user in the
header.
Example:
lat:float lon:float temp:double qual:short
41.23 -70.24332 12.322334224533 1
41.33 -70.25361 12.232334224533 1
41.43 -70.26352 12.322334224533 0
41.53 -70.23332 12.332334224533 0
See the comments in the source code for more details.
--
Rich Signell | rsignell@xxxxxxxxxxxxxxxxxx
U.S. Geological Survey | (508) 548-8700
Quissett Campus | "You need a license to dig clams...
Woods Hole, MA 02543 | ... but anybody can have kids."
--------------------tbl2cdf.c -----------------------------------
/*
* tbl2cdf: Convert a flat ascii table to a NetCDF file.
*
* Charles R. Denham
*
* This program scans a flat ascii table for the name and
* most general datatype of each column. It then creates
* and populates a NetCDF file with the data.
*
* Usage: tbl2cdf [options] tablefile NetCDFfile
*
* Options:
*
* -h hfile
* The headings to be used as variable99
* names are contained in file hfile.
*
* -x bslfd (specify one or more)
* Exclude the given automated types.
* The procedure scans the excluded
* types from left to right. See data
* type abbreviations below.
*
* -x n
* Exclude automated numeric types; use char.
*
* Headings:
*
* Starting with the first token, variable names are
* read until the first newline, excepting that the
* backslash character can be used to continue the
* headings onto subsequent lines. Tokens are
* expected to be separated by white-space, consisting
* of blanks, tabs, and newlines.
*
* Data types:
*
* The preferred data type can be specified for each
* variable name, by appending a colon and a data
* type abbreviation to the variable name. Example:
* x:d signifies that variable x is to be stored
* as NC_DOUBLE (NetCDF double). The abbreviations
* are:
* b byte NC_BYTE
* c char NC_CHAR
* s short NC_SHORT
* l long NC_LONG
* f float NC_FLOAT
* d double NC_DOUBLE
* v void NC_IGNORE
*
* No variable is created for type NC_IGNORE.
*
* Data:
*
* Following the headings, the data tokens are scanned
* and a data type is assigned to each variable, capable
* of holding all the values in the column efficiently.
* The automatic types can be overridden by appending
* types to the headings, or by using
* the -x exclusionary option. The data tokens are
* expected to be separated by white-space, and each
* record may be written over several lines. Any
* backslash continuation characters are ignored.
* Every field of every record is expected to have
* a contiguous entry. No provision exists for
* representing generalized character strings as
* lists of words.
*
* Comment Lines:
*
* Blank lines and lines beginning with '#' are ignored.
*
*/
# include <ctype.h>
# include <stdio.h>
# include <stdlib.h>
# include <string.h>
# include "netcdf.h"
void version(void);
void
version(void)
{
#ifdef __DATE__
#ifdef __TIME__
printf("Version:\n\t%s %s\n", __DATE__, __TIME__);
#endif
#endif
}
# define MAX_STRING 2048
# define MOD 200 /* Verbosity.
*/
# define NC_SPECIAL 0 /* For NC_BYTE and
NC_DOUBLE. */
typedef struct _var *varptr;
typedef struct _var {
char name[MAX_NC_NAME];
int varid;
nc_type datatype;
int locked;
int len;
int dimid;
varptr * next;
} variable;
void usage(void);
void version(void);
int isnumeric(char *);
nc_type numkind(double);
int findarg(int, char **, char *);
# define max(A, B) (A > B) ? A : B
# define DELIMS " \t\n"
# define NO_WRITE "r"
# define HEADERS "-h"
# define EXCLUDES "-x"
# define CONTINUATION "\\"
# define POUND '#'
# define BYTE_TYPE 'b'
# define CHAR_TYPE 'c'
# define SHORT_TYPE 's'
# define LONG_TYPE 'l'
# define FLOAT_TYPE 'f'
# define DOUBLE_TYPE 'd'
# define NUMERIC_TYPE 'n'
# define VOID_TYPE 'v'
# define NC_IGNORE -1
# define UNLIMITED "u" /*
Unlimited dimension name. */
# define DIMENSION "dim_%d" /* Format for
dimension name. */
int
main (
int argc,
char * argv[]
)
{
variable * var[MAX_NC_VARS];
char s[MAX_STRING];
char t[32];
char * p;
char * q;
char * r;
int cdfid;
int ndims;
int dimid[MAX_NC_DIMS];
int nvars;
int coord[2];
int count[2];
int maxlen;
double value;
char name[MAX_NC_NAME];
FILE * fp;
int pos;
int i, j, k, n;
int nrecords;
int status;
int flag;
int hfile, dfile, ofile;
int exclude;
if (argc < 3) {
version();
usage();
return (0);
}
/* Locate the filenames. */
ofile = argc - 1; /* Output filename. */
dfile = argc - 2; /* Data filename. */
hfile = dfile; /* Header filename. */
i = findarg(argc, argv, HEADERS);
if (i != -1) {
hfile = i + 1;
}
if (!strcmp(argv[hfile], argv[dfile])) {
hfile = dfile;
}
/* Are any data types to be excluded? */
exclude = findarg(argc, argv, EXCLUDES);
if (exclude != -1) {
exclude = exclude + 1;
}
/* Open the header file. */
fp = fopen(argv[hfile], NO_WRITE);
if (!fp) {
printf("fopen failure: %s\n", argv[hfile]);
return (0);
}
/*
* The first non-blank string contains the column headings.
* The headings line may contain continuations, symbolized
* by a back-slash ("\"), as in the C-Language.
*
*/
nvars = 0;
while ((fgets(s, MAX_STRING, fp)) != NULL) {
if (*s == POUND) {
continue;
}
flag = 0;
p = s;
while (q = strtok(p, DELIMS)) {
strcpy(name, q);
if (!strcmp(q, CONTINUATION)) {
flag = 1;
break;
}
else if (!isprint(*q)) {
break;
}
var[nvars] = (variable *) calloc(1, sizeof(variable));
if (r = strchr(q, ':')) {
var[nvars]->locked = 1;
switch(*(r+1)) {
case BYTE_TYPE:
var[nvars]->datatype = NC_BYTE;
break;
case CHAR_TYPE:
var[nvars]->datatype = NC_CHAR;
break;
case SHORT_TYPE:
var[nvars]->datatype = NC_SHORT;
break;
case LONG_TYPE:
var[nvars]->datatype = NC_LONG;
break;
case FLOAT_TYPE:
var[nvars]->datatype = NC_FLOAT;
break;
case DOUBLE_TYPE:
var[nvars]->datatype =
NC_DOUBLE;
break;
case VOID_TYPE:
var[nvars]->datatype =
NC_IGNORE;
break;
default:
var[nvars]->locked = 0;
}
name[r-q] = '\0';
}
strcpy(var[nvars]->name, name);
nvars++;
p = 0;
}
if (nvars > 0 && !flag) {
break;
}
}
printf("Number of variables: %d\n", nvars);
fflush(stdout);
/* Open the data file. */
if (dfile != hfile) {
fclose(fp);
fp = fopen(argv[dfile], NO_WRITE);
if (!fp) {
printf("fopen failure: %s\n", argv[dfile]);
return (0);
}
}
pos = ftell(fp); /* Data lie beyond here. */
maxlen = 0;
nrecords = 0;
/*
* Scan each data entry; determining minimal data type.
* Blank-lines are ignored, and each data record may be
* continued onto a subsequent line. No continuation
* character is expected, but a C-Language back-slash
* ("\") may be used if desired.
*
*/
i = 0;
while ((fgets(s, MAX_STRING, fp)) != NULL) {
if (*s == POUND) {
continue;
}
flag = 0;
p = s;
while ((q = strtok(p, DELIMS)) != NULL) {
p = (char *) NULL;
if (!strcmp(q, CONTINUATION)) {
flag = 1;
break;
}
else if (!isprint(*q)) {
break;
}
if (i >= nvars) {
i = 0;
nrecords++;
if ((nrecords % MOD) == 0) {
printf("Records scanned: %d\n",
nrecords);
fflush(stdout);
}
}
/* Maximum string length, in case everything goes to NC_CHAR. */
var[i]->len = max(var[i]->len, strlen(q));
maxlen = max(maxlen, var[i]->len);
if (!var[i]->locked) {
if (!isnumeric(q)) {
var[i]->datatype = NC_CHAR;
}
else {
value = atof(q);
k = numkind(value);
var[i]->datatype = max(k,
var[i]->datatype);
}
}
i++;
}
}
nrecords++;
printf("Total records scanned: %d\n", nrecords);
fflush(stdout);
/* Optional exclusion of some data types. */
if (exclude != -1) {
p = argv[exclude];
n = strlen(p);
for (i = 0; i < nvars; i++) {
if (!var[i]->locked) {
k = var[i]->datatype;
q = p;
for (j = 0; j < n; j++) {
switch(*q) {
case DOUBLE_TYPE:
if (k == NC_DOUBLE)
{
k = NC_FLOAT;
}
break;
case FLOAT_TYPE:
if (k == NC_FLOAT)
{
k = NC_DOUBLE;
}
break;
case BYTE_TYPE:
if (k == NC_BYTE)
{
k = NC_SHORT;
}
break;
case SHORT_TYPE:
if (k == NC_SHORT)
{
k = NC_LONG;
}
break;
case LONG_TYPE:
if (k == NC_LONG)
{
k = NC_DOUBLE;
}
case NUMERIC_TYPE:
k = NC_CHAR;
break;
}
q++;
}
var[i]->datatype = k;
}
}
}
/* Create the NetCDF file to receive the data. */
cdfid = nccreate(argv[ofile], NC_CLOBBER);
if (cdfid == -1) {
printf("nccreate failure: %s\n", argv[ofile]);
return (0);
}
status = ncdimdef(cdfid, UNLIMITED, NC_UNLIMITED);
for (i = 0; i < nvars; i++) {
if (var[i]->datatype == NC_CHAR) {
sprintf(name, DIMENSION, i);
var[i]->dimid = ncdimdef(cdfid, name, var[i]->len + 1);
}
}
dimid[0] = 0;
for (i = 0; i < nvars; i++) {
ndims = 1;
if (var[i]->datatype == NC_CHAR) {
ndims = 2;
dimid[1] = var[i]->dimid;
}
if (var[i]->datatype != NC_IGNORE) {
var[i]->varid = ncvardef(cdfid, var[i]->name,
var[i]->datatype, ndims, dimid);
}
}
status = ncendef(cdfid);
coord[0] = 0;
coord[1] = 0;
count[0] = 1;
count[1] = maxlen + 1;
status = fseek(fp, pos, 0);
/* exit(0); */
/* Scan the file again and stash the data into NetCDF. */
i = 0;
while ((fgets(s, MAX_STRING, fp)) != NULL) {
if (*s == POUND) {
continue;
}
flag = 0;
p = s;
while ((q = strtok(p, DELIMS)) != NULL) {
p = (char *) NULL;
if (!strcmp(q, CONTINUATION)) {
flag = 1;
break;
}
else if (!isprint(*q)) {
break;
}
if (i >= nvars) {
i = 0;
coord[0]++;
if ((coord[0] % MOD) == 0) {
printf("Records written: %d\n",
coord[0]);
fflush(stdout);
}
}
if (var[i]->datatype == NC_CHAR) {
count[1] = strlen(q) + 1;
ncvarput(cdfid, var[i]->varid, coord, count, q);
}
else if (var[i]->datatype != NC_IGNORE) {
r = (char *) NULL;
value = strtod(q, &r);
switch (var[i]->datatype) {
case NC_BYTE:
*((char *) t) = (char) value;
break;
case NC_SHORT:
*((short *) t) = (short) value;
break;
case NC_LONG:
*((long *) t) = (long) value;
break;
case NC_FLOAT:
*((float *) t) = (float) value;
break;
case NC_DOUBLE:
*((double *) t) = (double)
value;
break;
}
ncvarput(cdfid, var[i]->varid, coord, count, t);
}
i++;
}
}
/*
i = nvars;
while (i > 0) {
i--;
if(var[i]) {
free(var[i]);
}
}
*/
printf("Total records written: %d\n", coord[0] + 1);
fflush(stdout);
status = ncclose(cdfid);
fclose(fp);
return (0);
}
/* isnumeric: True if the string prefix is purely numeric. */
int
isnumeric (
char * s
)
{
char * endp;
double value;
endp = (char *) NULL;
value = strtod(s, &endp);
#define WHITE_SPACE " \t\n"
if ((*endp == '\0') || strchr(WHITE_SPACE, *endp)) {
return (1);
}
else {
return (0);
}
}
/* kind: Minimal numeric type for a double precision value. */
nc_type
numkind (
double value
)
{
signed char byteval;
signed short shortval;
signed long longval;
float floatval;
double doubleval;
nc_type k;
byteval = (signed char) value;
shortval = (signed short) value;
longval = (signed long) value;
floatval = (float) value;
doubleval = value;
if (longval == doubleval) {
k = NC_LONG;
if (shortval == longval) {
k = NC_SHORT;
}
if (byteval == longval) {
k = NC_BYTE;
}
}
else {
k = NC_DOUBLE;
if (floatval == doubleval) {
k = NC_FLOAT;
}
}
return (k);
}
/* findarg: Find the argument with the given prefix. */
int
findarg (
int argc,
char ** argv,
char * prefix
)
{
int i;
char * p;
for (i = 0; i < argc; i++) {
if ((p = strstr(argv[i], prefix)) == argv[i]) {
return (i);
}
}
return (-1);
}
/* usage: Usage message for the program. */
void
usage()
{
printf("Usage:\n");
printf("\ttbl2cdf [-x bslfdn] [-h hfile] dfile ofile\n");
printf("\t\tbslfd: exclude these NetCDF data types:\n");
printf("\t\t\tb = NC_BYTE, s = NC_SHORT, etc.; n = all numerics).\n");
printf("\t\thfile: ascii header filename.\n");
printf("\t\tdfile: ascii data table filename for input.\n");
printf("\t\tofile: NetCDF filename for output.\n");
printf("Purpose:\n");
printf("\tConvert ascii table to NetCDF file.\n");
printf("Example:\n");
printf("\ttbl2cdf -h foo.head foo.dat foo.cdf\n");
}