Hello NetCDF-Group,
We are currently evaluating NetCDF as a format to store large amounts of
timeseries data. Since we need microsecond resolution for the time, I decided
to use the compound variable functionality of NetCDF4 to create the equivalent
of the standard Linux timeval data type.
To get a feeling of the use and performance of NetCDF, I wrote a small test
program to create a sample nc file with a selectable number of parameters and a
selectable number of time points (via the NUMVARS and DATALENGTH defines.
When I run the program with very small values like ten, there is no problem.
The program completes quickly and the nc file that is created can be viewed
with ncdump. It contains what I would expect it to contain (from the
documentation). However, if I increase the number of variables and/or data
points to more realistic values (hundreds of variables, thousands of data
points), the program's memory footprint keeps growing and growing until it uses
up essentially all of the machine's memory (this is on a 16GiB machine!),
resulting in very poor performance (with NUMVARS=100 and DATLENGTH=100000, it
grows to ca. 16GiB in the first three minutes, which is all the RAM I have.
After that the swapping starts and performance is really poor.). Looking at the
memory footprint with pmap, the large chunk of memory used is [ anon ], i.e.
dynamically allocated by malloc & co.
I am using the netcdf/hdf packets from the current Debian stable distribution
(which are netCDF 4.1.1 and HDF5 1.8.4).
I may be doing something really stupid here, but I am running out of ideas what
else to try. Any help is appreciated.
Regards,
Thorsten
PS: Below is the test code I used. On my Debian box it compiles with "gcc
netcdftest.c -lnetcdf -Wall"
#include <stdio.h>
#include <netcdf.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <string.h>
#include <unistd.h>
#include <sys/time.h>
#define ERRCODE 2
#define ERR(e) {printf ("Error: %s\n", nc_strerror(e)); exit (ERRCODE);}
#define NUMVARS 100
#define DATALENGTH 100000
struct nc_timeval {
int64_t tv_sec; /* seconds */
int64_t tv_usec; /* microseconds */
};
struct pilVariable {
char name[64];
char unit[64];
int id;
};
struct nc_timeval mknctime (struct timeval mytimeofday) {
struct nc_timeval mynctime;
mynctime.tv_sec = (int64_t) mytimeofday.tv_sec;
mynctime.tv_usec = (int64_t) mytimeofday.tv_usec;
return(mynctime);
}
int main (void) {
int retval;
int ncid;
int ncvarid;
int ncdimid;
int nctimeid;
int i,k;
struct timeval mytimeofday;
struct nc_timeval mynctime;
struct pilVariable myVars[NUMVARS];
static size_t mystart = 1;
double value;
/* Create netCDF dataset, enter define mode */
retval = nc_create ("myNetCDF.nc",NC_NOCLOBBER|NC_NETCDF4,&ncid);
if (retval != NC_NOERR) {
ERR(retval);
}
/* Define compound type(s) */
retval = nc_def_compound (ncid,sizeof(struct
nc_timeval),"timeval",&ncvarid);
if (retval != NC_NOERR) {
ERR(retval);
}
retval = nc_insert_compound (ncid,ncvarid,"tv_sec",0,NC_INT64);
if (retval != NC_NOERR) {
ERR(retval);
}
retval = nc_insert_compound
(ncid,ncvarid,"tv_usec",sizeof(int64_t),NC_INT64);
if (retval != NC_NOERR) {
ERR(retval);
}
/* define dimensions: from name and length */
retval = nc_def_dim (ncid,"Time",NC_UNLIMITED,&ncdimid);
if (retval != NC_NOERR) {
ERR(retval);
}
/* define variables: from name, type, ... */
/* Time is special, all others are double */
retval = nc_def_var (ncid,"Time",ncvarid,1,&ncdimid,&nctimeid);
if (retval != NC_NOERR) {
ERR(retval);
}
retval = nc_put_att_text (ncid,nctimeid,"Unit",strlen("seconds since
epoch, microseconds"),"seconds since epoch, microseconds");
if (retval != NC_NOERR) {
ERR(retval);
}
for (i=0;i<NUMVARS;i++) {
snprintf(myVars[i].name,64,"Variable_%d",i);
snprintf(myVars[i].unit,64,"Unit_%d",i);
/* Create Variable */
retval = nc_def_var
(ncid,myVars[i].name,NC_DOUBLE,1,&ncdimid,&myVars[i].id);
if (retval != NC_NOERR) {
ERR(retval);
}
/* Create Unit Attribute */
retval = nc_put_att_text
(ncid,myVars[i].id,"Unit",strlen(myVars[i].unit),myVars[i].unit);
if (retval != NC_NOERR) {
ERR(retval);
}
}
/* put attribute: assign attribute values */
/* end definitions: leave define mode */
retval = nc_enddef (ncid);
if (retval != NC_NOERR) {
ERR(retval);
}
/* provide values for variables */
/* Time */
for (i=0;i<DATALENGTH;i++) {
gettimeofday(&mytimeofday,0);
mynctime = mknctime(mytimeofday);
mystart = (size_t) i;
retval = nc_put_var1 (ncid,nctimeid,&mystart,&mynctime);
if (retval != NC_NOERR) {
ERR(retval);
}
for (k=0;k<NUMVARS;k++) {
value = (double)i;
retval = nc_put_var1_double
(ncid,myVars[k].id,&mystart,&value);
if (retval != NC_NOERR) {
ERR(retval);
}
}
// nc_sync(ncid);
}
/* close: save new netCDF dataset */
retval = nc_close (ncid);
if (retval != NC_NOERR) {
ERR(retval);
}
return(0);
}
__________________________________________________
Pilatus Aircraft Ltd.
Thorsten Schmidt
Flight Test Engineer
P.O. Box 992
6371 Stans, Switzerland
phone: +41 41 619 68 06
fax: +41 41 619 65 99
e-mail: thorsten.schmidt@xxxxxxxxxxxxxxxxxxxx
website: www.pilatus-aircraft.com
Please consider the environment before printing this email!
***DISCLAIMER***
The information and any attachments (herein referred to as 'document')
transmitted is intended only for the person or entity to which it is addressed.
It may contain confidential and/or privileged information. If you are not the
intended recipient of this document, you are hereby notified that any
dissemination, distribution, copying, other use of, or taking any action in
reliance upon this document by persons or entities other than the intended
recipient is illegal and prohibited. If you have received this in error, please
immediately notify the sender, permanently delete the original from any
computer and/or system, and destroy any printout of the 'document'. We thank
you for your co-operation in this matter. Pilatus Aircraft Ltd.