Hi all-
I'm working on generating my first NetCDF files and have a question. The
files I'm creating seem to be far larger than I would have thought
necessary to hold the given data. I'm wondering if there is something I can
do to trim this down a bit.
Our data is simple time-series data (one unlimited dimension). Below is a
simple Java test program that generates a file with 10000 records, each of
which contains a 24-character timestamp string and three 2-byte values.
This gives a raw data requirement of 30000 bytes. The generated NetCDF file
is 2420656 bytes, or 80x larger. Is this what is expected? In my
development with real data I'm seeing 7MB of data creating an 86MB NetCDF
file, etc. It seems to settle out at about 12x as the data sets grow, which
is still pretty onerous. Any insights or suggestions appreciated.
package gov.noaa.swpc.solarwind;
import org.joda.time.DateTime;
import ucar.ma2.ArrayShort;
import ucar.ma2.ArrayString;
import ucar.ma2.DataType;
import ucar.ma2.InvalidRangeException;
import ucar.nc2.*;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
public class TestGenFile {
public static void main(String[] args) {
DateTime startDate = new DateTime();
DateTime endDate = startDate.plusDays(1);
NetcdfFileWriter dataFile = null;
try {
try {
// define the file
String filePathName = "output.nc";
// delete the file if it already exists
Path path = FileSystems.getDefault().getPath(filePathName);
Files.deleteIfExists(path);
// enter definition mode for this NetCDF-4 file
dataFile =
NetcdfFileWriter.createNew(NetcdfFileWriter.Version.netcdf4, filePathName);
// create the root group
Group rootGroup = dataFile.addGroup(null, null);
// define the global attributes
dataFile.addGroupAttribute(rootGroup, new Attribute("startDate",
startDate.toString()));
dataFile.addGroupAttribute(rootGroup, new Attribute("endDate",
endDate.toString()));
// define dimensions, in this case only one: time
Dimension timeDim = dataFile.addUnlimitedDimension("time");
List<Dimension> dimList = new ArrayList<>();
dimList.add(timeDim);
// define variables
Variable time = dataFile.addVariable(rootGroup, "time",
DataType.STRING, dimList);
dataFile.addVariableAttribute(time, new Attribute("standard_name",
"time"));
Variable bx = dataFile.addVariable(rootGroup, "bx", DataType.SHORT,
dimList);
dataFile.addVariableAttribute(bx, new Attribute("long_name", "IMF
Bx"));
dataFile.addVariableAttribute(bx, new Attribute("units", "raw
counts"));
Variable by = dataFile.addVariable(rootGroup, "by", DataType.SHORT,
dimList);
dataFile.addVariableAttribute(by, new Attribute("long_name", "IMF
By"));
dataFile.addVariableAttribute(by, new Attribute("units", "raw
counts"));
Variable bz = dataFile.addVariable(rootGroup, "bz", DataType.SHORT,
dimList);
dataFile.addVariableAttribute(bz, new Attribute("long_name", "IMF
Bz"));
dataFile.addVariableAttribute(bz, new Attribute("units", "raw
counts"));
// create the file
dataFile.create();
// create 1-D arrays to hold data values (time is the dimension)
ArrayString timeArray = new ArrayString.D1(1);
ArrayShort.D1 bxArray = new ArrayShort.D1(1);
ArrayShort.D1 byArray = new ArrayShort.D1(1);
ArrayShort.D1 bzArray = new ArrayShort.D1(1);
int[] origin = new int[]{0};
// write the records to the file
for (int i = 0; i < 10000; i++) {
// load data into array variables
timeArray.setObject(timeArray.getIndex(), new
DateTime().toString());
bxArray.set(0, (short) i);
byArray.set(0, (short) (i * 2));
bzArray.set(0, (short) (i * 3));
origin[0] = i;
// write a record
dataFile.write(time, origin, timeArray);
dataFile.write(bx, origin, bxArray);
dataFile.write(by, origin, byArray);
dataFile.write(bz, origin, bzArray);
}
} finally {
if (null != dataFile) {
// close the file
dataFile.close();
}
}
} catch (IOException | InvalidRangeException e) {
e.printStackTrace();
}
}
}
thanks,
jeff
--
Jeff Johnson
DSCOVR Ground System Development
Space Weather Prediction Center
jeff.m.johnson@xxxxxxxx