tbl2cdf: converts ASCII table to netCDF

Since things are a little slow in this mail group, I thought
I'd post a little program written by Chuck Denham 
that some of you may find useful.

tbl2cdf scans an ASCII flat file, and creates UNLIMITED dimensioned
netCDF variables from the data in each column.  The variable name is
determined from the column heading, or optionally, from a header file.
The variable type is automatically deterimined to be the most general
type that describes the data, or may be specified by the user in the 
header.

Example:

lat:float    lon:float      temp:double          qual:short
 41.23        -70.24332     12.322334224533   1
 41.33        -70.25361     12.232334224533   1
 41.43        -70.26352     12.322334224533   0
 41.53        -70.23332     12.332334224533   0

See the comments in the source code for more details.
-- 
Rich Signell               |   rsignell@xxxxxxxxxxxxxxxxxx
U.S. Geological Survey     |   (508) 548-8700
Quissett Campus            |   "You need a license to dig clams...
Woods Hole, MA  02543      |      ... but anybody can have kids."

--------------------tbl2cdf.c -----------------------------------
/*
 *      tbl2cdf: Convert a flat ascii table to a NetCDF file.
 *
 *      Charles R. Denham
 *
 *      This program scans a flat ascii table for the name and
 *      most general datatype of each column.  It then creates
 *      and populates a NetCDF file with the data.
 *
 *      Usage:  tbl2cdf [options] tablefile NetCDFfile
 *
 *      Options:
 *
 *              -h hfile
 *                      The headings to be used as variable99
 *                      names are contained in file hfile.
 *
 *              -x bslfd (specify one or more)
 *                      Exclude the given automated types.
 *                      The procedure scans the excluded
 *                      types from left to right.  See data
 *                      type abbreviations below.
 *
 *              -x n
 *                      Exclude automated numeric types; use char.
 *
 *      Headings:
 *
 *              Starting with the first token, variable names are
 *              read until the first newline, excepting that the
 *              backslash character can be used to continue the
 *              headings onto subsequent lines.  Tokens are
 *              expected to be separated by white-space, consisting
 *              of blanks, tabs, and newlines.
 *
 *      Data types:
 *
 *              The preferred data type can be specified for each
 *              variable name, by appending a colon and a data
 *              type abbreviation to the variable name.  Example:
 *              x:d signifies that variable x is to be stored
 *              as NC_DOUBLE (NetCDF double).  The abbreviations
 *              are:
 *                              b       byte    NC_BYTE
 *                              c       char    NC_CHAR
 *                              s       short   NC_SHORT
 *                              l       long    NC_LONG
 *                              f       float   NC_FLOAT
 *                              d       double  NC_DOUBLE
 *                              v       void    NC_IGNORE
 *
 *              No variable is created for type NC_IGNORE.
 *
 *      Data:
 *
 *              Following the headings, the data tokens are scanned
 *              and a data type is assigned to each variable, capable
 *              of holding all the values in the column efficiently.
 *              The automatic types can be overridden by appending
 *              types to the headings, or by using
 *              the -x exclusionary option.  The data tokens are
 *              expected to be separated by white-space, and each
 *              record may be written over several lines.  Any
 *              backslash continuation characters are ignored.
 *              Every field of every record is expected to have
 *              a contiguous entry.  No provision exists for
 *              representing generalized character strings as
 *              lists of words.
 *
 *      Comment Lines:
 *
 *              Blank lines and lines beginning with '#' are ignored.
 *              
 */

# include       <ctype.h>
# include       <stdio.h>
# include       <stdlib.h>
# include       <string.h>

# include       "netcdf.h"

void    version(void);

void
version(void)

{
#ifdef  __DATE__
#ifdef  __TIME__
        printf("Version:\n\t%s %s\n", __DATE__, __TIME__);
#endif
#endif
}
# define        MAX_STRING              2048

# define        MOD                             200     /*      Verbosity.      
*/

# define        NC_SPECIAL              0       /*      For NC_BYTE and 
NC_DOUBLE.      */

typedef struct  _var    *varptr;

typedef struct  _var    {
        char                    name[MAX_NC_NAME];
        int                             varid;
        nc_type                 datatype;
        int                             locked;
        int                             len;
        int                             dimid;
        varptr          *       next;
}       variable;

void            usage(void);
void            version(void);
int                     isnumeric(char *);
nc_type         numkind(double);
int                     findarg(int, char **, char *);

# define        max(A, B) (A > B) ? A : B

# define        DELIMS                  " \t\n"

# define        NO_WRITE                "r"

# define        HEADERS                 "-h"
# define        EXCLUDES                "-x"

# define        CONTINUATION    "\\"

# define        POUND                   '#'

# define        BYTE_TYPE               'b'
# define        CHAR_TYPE               'c'
# define        SHORT_TYPE              's'
# define        LONG_TYPE               'l'
# define        FLOAT_TYPE              'f'
# define        DOUBLE_TYPE             'd'
# define        NUMERIC_TYPE    'n'
# define        VOID_TYPE               'v'

# define        NC_IGNORE               -1

# define        UNLIMITED               "u"                     /*      
Unlimited dimension name.       */
# define        DIMENSION               "dim_%d"        /*      Format for 
dimension name.      */

int
main    (
        int                     argc,
        char    *       argv[]
)

{
        variable        *       var[MAX_NC_VARS];
        char                    s[MAX_STRING];
        char                    t[32];
        char            *       p;
        char            *       q;
        char            *       r;
        
        int                     cdfid;
        int                     ndims;
        int                     dimid[MAX_NC_DIMS];
        int                     nvars;
        int                     coord[2];
        int                     count[2];
        int                     maxlen;
        double          value;
        char            name[MAX_NC_NAME];
        
        FILE    *       fp;
        int                     pos;
        
        int                     i, j, k, n;
        int                     nrecords;
        int                     status;
        int                     flag;
        
        int                     hfile, dfile, ofile;
        int                     exclude;
        
        if (argc < 3)   {
                version();
                usage();
                return (0);
        }
        
/*      Locate the filenames.   */
        
        ofile = argc - 1;       /*      Output filename.        */
        dfile = argc - 2;       /*      Data filename.  */
        hfile = dfile;          /*      Header filename.        */
        i = findarg(argc, argv, HEADERS);
        if (i != -1)    {
                hfile = i + 1;
        }
        if (!strcmp(argv[hfile], argv[dfile]))  {
                hfile = dfile;
        }
        
/*      Are any data types to be excluded?      */

        exclude = findarg(argc, argv, EXCLUDES);
        if (exclude != -1)      {
                exclude = exclude + 1;
        }
        
/*      Open the header file.   */
        
        fp = fopen(argv[hfile], NO_WRITE);
        if (!fp)        {
                printf("fopen failure: %s\n", argv[hfile]);
                return (0);
        }
        
/*
 *      The first non-blank string contains the column headings.
 *      The headings line may contain continuations, symbolized
 *      by a back-slash ("\"), as in the C-Language.
 *
 */
        
        nvars = 0;
        while ((fgets(s, MAX_STRING, fp)) != NULL)      {
                if (*s == POUND)        {
                        continue;
                }
                flag = 0;
                p = s;
                while (q = strtok(p, DELIMS))   {
                
                        strcpy(name, q);
                
                        if (!strcmp(q, CONTINUATION))   {
                                flag = 1;
                                break;
                        }
                        else if (!isprint(*q))  {
                                break;
                        }
                        
                        var[nvars] = (variable *) calloc(1, sizeof(variable));
                        if (r = strchr(q, ':')) {
                                var[nvars]->locked = 1;
                                switch(*(r+1))  {
                                        case BYTE_TYPE:
                                                var[nvars]->datatype = NC_BYTE;
                                                break;
                                        case CHAR_TYPE:
                                                var[nvars]->datatype = NC_CHAR;
                                                break;
                                        case SHORT_TYPE:
                                                var[nvars]->datatype = NC_SHORT;
                                                break;
                                        case LONG_TYPE:
                                                var[nvars]->datatype = NC_LONG;
                                                break;
                                        case FLOAT_TYPE:
                                                var[nvars]->datatype = NC_FLOAT;
                                                break;
                                        case DOUBLE_TYPE:
                                                var[nvars]->datatype = 
NC_DOUBLE;
                                                break;
                                        case VOID_TYPE:
                                                var[nvars]->datatype = 
NC_IGNORE;
                                                break;
                                        default:
                                                var[nvars]->locked = 0;
                                                
                                }
                                name[r-q] = '\0';
                        }
                        strcpy(var[nvars]->name, name);
                        nvars++;
                        p = 0;
                }
                if (nvars > 0 && !flag) {
                        break;
                }
        }
        
        printf("Number of variables:   %d\n", nvars);
        fflush(stdout);
        
/*      Open the data file.     */
        
        if (dfile != hfile)     {
                fclose(fp);
                fp = fopen(argv[dfile], NO_WRITE);
                if (!fp)        {
                        printf("fopen failure: %s\n", argv[dfile]);
                        return (0);
                }
        }
        
        pos = ftell(fp);        /*      Data lie beyond here.   */
        
        maxlen = 0;
        nrecords = 0;
        
/*
 *      Scan each data entry; determining minimal data type.
 *      Blank-lines are ignored, and each data record may be
 *      continued onto a subsequent line.  No continuation
 *      character is expected, but a C-Language back-slash
 *      ("\") may be used if desired.
 *
 */

        i = 0;

        while ((fgets(s, MAX_STRING, fp)) != NULL)      {
        
                if (*s == POUND)        {
                        continue;
                }
        
                flag = 0;
                p = s;
                
                while ((q = strtok(p, DELIMS)) != NULL) {
                
                        p = (char *) NULL;
                
                        if (!strcmp(q, CONTINUATION))   {
                                flag = 1;
                                break;
                        }
                        else if (!isprint(*q))  {
                                break;
                        }
                
                        if (i >= nvars) {
                                i = 0;
                                nrecords++;
                                if ((nrecords % MOD) == 0)      {
                                        printf("Records scanned: %d\n", 
nrecords);
                                        fflush(stdout);
                                }
                        }
                        
/*      Maximum string length, in case everything goes to NC_CHAR.      */

                        var[i]->len = max(var[i]->len, strlen(q));
                        maxlen = max(maxlen, var[i]->len);
                        
                        if (!var[i]->locked)    {
                                
                                if (!isnumeric(q))      {
                                        var[i]->datatype = NC_CHAR;
                                }
                                
                                else    {
                                        value = atof(q);
                                        k = numkind(value);
                                        var[i]->datatype = max(k, 
var[i]->datatype);
                                }
                        }
                        
                        i++;
                }
        }
        
        nrecords++;
        printf("Total records scanned: %d\n", nrecords);
        fflush(stdout);
        
/*      Optional exclusion of some data types.  */

        if (exclude != -1)      {
                p = argv[exclude];
                n = strlen(p);
                for (i = 0; i < nvars; i++)     {
                        if (!var[i]->locked)    {
                                k = var[i]->datatype;
                                q = p;
                                for (j = 0; j < n; j++) {
                                        switch(*q)      {
                                                case DOUBLE_TYPE:
                                                        if (k == NC_DOUBLE)     
{
                                                                k = NC_FLOAT;
                                                        }
                                                        break;
                                                case FLOAT_TYPE:
                                                        if (k == NC_FLOAT)      
{
                                                                k = NC_DOUBLE;
                                                        }
                                                        break;
                                                case BYTE_TYPE:
                                                        if (k == NC_BYTE)       
{
                                                                k = NC_SHORT;
                                                        }
                                                        break;
                                                case SHORT_TYPE:
                                                        if (k == NC_SHORT)      
{
                                                                k = NC_LONG;
                                                        }
                                                        break;
                                                case LONG_TYPE:
                                                        if (k == NC_LONG)       
{
                                                                k = NC_DOUBLE;
                                                        }
                                                case NUMERIC_TYPE:
                                                                k = NC_CHAR;
                                                        break;
                                        }
                                        q++;
                                }
                                var[i]->datatype = k;
                        }
                }
        }
        
/*      Create the NetCDF file to receive the data.     */
        
        cdfid = nccreate(argv[ofile], NC_CLOBBER);
        if (cdfid == -1)        {
                printf("nccreate failure: %s\n", argv[ofile]);
                return (0);
        }
        status = ncdimdef(cdfid, UNLIMITED, NC_UNLIMITED);
        for (i = 0; i < nvars; i++)     {
                if (var[i]->datatype == NC_CHAR)        {
                        sprintf(name, DIMENSION, i);
                        var[i]->dimid = ncdimdef(cdfid, name, var[i]->len + 1);
                }
        }
        dimid[0] = 0;
        for (i = 0; i < nvars; i++)     {
                ndims = 1;
                if (var[i]->datatype == NC_CHAR)        {
                        ndims = 2;
                        dimid[1] = var[i]->dimid;
                }
                if (var[i]->datatype != NC_IGNORE)      {
                        var[i]->varid = ncvardef(cdfid, var[i]->name,
                                                var[i]->datatype, ndims, dimid);
                }
        }
        status = ncendef(cdfid);
        
        coord[0] = 0;
        coord[1] = 0;
        count[0] = 1;
        count[1] = maxlen + 1;
        
        status = fseek(fp, pos, 0);
        
/*      exit(0);        */
        
/*      Scan the file again and stash the data into NetCDF.     */
        
        i = 0;
        
        while ((fgets(s, MAX_STRING, fp)) != NULL)      {
        
                if (*s == POUND)        {
                        continue;
                }
        
                flag = 0;
                p = s;
                
                while ((q = strtok(p, DELIMS)) != NULL) {
                
                        p = (char *) NULL;
                
                        if (!strcmp(q, CONTINUATION))   {
                                flag = 1;
                                break;
                        }
                        else if (!isprint(*q))  {
                                break;
                        }
                
                        if (i >= nvars) {
                                i = 0;
                                coord[0]++;
                                if ((coord[0] % MOD) == 0)      {
                                        printf("Records written: %d\n", 
coord[0]);
                                        fflush(stdout);
                                }
                        }
                        
                        if (var[i]->datatype == NC_CHAR)        {
                                count[1] = strlen(q) + 1;

                                ncvarput(cdfid, var[i]->varid, coord, count, q);

                        }
                        
                        else if (var[i]->datatype != NC_IGNORE) {
                        
                                r = (char *) NULL;

                                value = strtod(q, &r);
                                
                                switch (var[i]->datatype)       {
                                
                                        case NC_BYTE:
                                                *((char *) t) = (char) value;
                                                break;
                                        
                                        case NC_SHORT:
                                                *((short *) t) = (short) value;
                                                break;
                                                
                                        case NC_LONG:
                                                *((long *) t) = (long) value;
                                                break;
                                                
                                        case NC_FLOAT:
                                                *((float *) t) = (float) value;
                                                break;
                                                
                                        case NC_DOUBLE:
                                                *((double *) t) = (double) 
value;
                                                break;
                                }
                                
                                ncvarput(cdfid, var[i]->varid, coord, count, t);

                        }
                        
                        i++;
                }
        }
        
/*
        i = nvars;
        while (i > 0)   {
                i--;
                if(var[i])      {
                        free(var[i]);
                }
        }
*/
        
        printf("Total records written: %d\n", coord[0] + 1);
        fflush(stdout);
        
        status = ncclose(cdfid);
        fclose(fp);
        
        return (0);
}


/*      isnumeric: True if the string prefix is purely numeric. */

int
isnumeric       (
        char    *       s
)

{
        char    *       endp;
        double          value;
        
        endp = (char *) NULL;
        value = strtod(s, &endp);
        
#define WHITE_SPACE     " \t\n"
        
        if ((*endp == '\0') || strchr(WHITE_SPACE, *endp))      {
                return (1);
        }
        else    {
                return (0);
        }
}


/*      kind: Minimal numeric type for a double precision value.        */

nc_type
numkind (
        double  value
)

{
        signed char             byteval;
        signed short    shortval;
        signed long             longval;
        float                   floatval;
        double                  doubleval;
        
        nc_type k;
        
        byteval = (signed char) value;
        shortval = (signed short) value;
        longval = (signed long) value;
        floatval = (float) value;
        doubleval = value;
        
        if (longval == doubleval)       {
                k = NC_LONG;
                if (shortval == longval)        {
                        k = NC_SHORT;
                }
                if (byteval == longval) {
                        k = NC_BYTE;
                }
        }
        else    {
                k = NC_DOUBLE;
                if (floatval == doubleval)      {
                        k = NC_FLOAT;
                }
        }
        
        return (k);
}


/*      findarg: Find the argument with the given prefix.       */

int
findarg (
        int                     argc,
        char    **      argv,
        char    *       prefix
)

{
        int                     i;
        char    *       p;
        
        for (i = 0; i < argc; i++)      {
                if ((p = strstr(argv[i], prefix)) == argv[i])   {
                        return (i);
                }
        }
        
        return (-1);
}


/*      usage: Usage message for the program.   */

void
usage()

{
        printf("Usage:\n");
        printf("\ttbl2cdf [-x bslfdn] [-h hfile] dfile ofile\n");
        printf("\t\tbslfd: exclude these NetCDF data types:\n");
        printf("\t\t\tb = NC_BYTE, s = NC_SHORT, etc.; n = all numerics).\n");
        printf("\t\thfile: ascii header filename.\n");
        printf("\t\tdfile: ascii data table filename for input.\n");
        printf("\t\tofile: NetCDF filename for output.\n");
        printf("Purpose:\n");
        printf("\tConvert ascii table to NetCDF file.\n");
        printf("Example:\n");
        printf("\ttbl2cdf -h foo.head foo.dat foo.cdf\n");
        
}


  • 1991 messages navigation, sorted by:
    1. Thread
    2. Subject
    3. Author
    4. Date
    5. ↑ Table Of Contents
  • Search the netcdfgroup archives: