h5dump -Hp ndb.BS_COMPRESS0.005000_Q1
HDF5 "ndb.BS_COMPRESS0.005000_Q1" {
GROUP "/" {
ATTRIBUTE "_NCProperties" {
DATATYPE H5T_STRING {
STRSIZE 57;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
DATASET "BSE_RESONANT_COMPRESSED1" {
DATATYPE H5T_IEEE_F32LE
DATASPACE SIMPLE { ( 24776792, 2 ) / ( 24776792, 2 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 198214336
OFFSET 16158554547
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE 9.96921e+36
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "DIMENSION_LIST" {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
}
}
DATASET "BSE_RESONANT_COMPRESSED1_DONE" {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_UTF8;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 2025000000 ) / ( 2025000000 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 2025000000
OFFSET 8100002379
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE ""
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "DIMENSION_LIST" {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
DATASET "BSE_RESONANT_COMPRESSED2_DONE" {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_UTF8;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 2025000000 ) / ( 2025000000 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 2025000000
OFFSET 10125006475
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE ""
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "DIMENSION_LIST" {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
DATASET "BSE_RESONANT_COMPRESSED3_DONE" {
DATATYPE H5T_STRING {
STRSIZE 1;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_UTF8;
CTYPE H5T_C_S1;
}
DATASPACE SIMPLE { ( 781887360 ) / ( 781887360 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 781887360
OFFSET 15277557963
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE ""
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "DIMENSION_LIST" {
DATATYPE H5T_VLEN { H5T_REFERENCE { H5T_STD_REF_OBJECT }}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
DATASET "BS_K_compressed1" {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 24776792 ) / ( 24776792 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 99107168
OFFSET 16059447379
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "CLASS" {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "NAME" {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "REFERENCE_LIST" {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } "dataset";
H5T_STD_I32LE "dimension";
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
DATASET "BS_K_linearized1" {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 2025000000 ) / ( 2025000000 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 8100000000
OFFSET 2379
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "CLASS" {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "NAME" {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "REFERENCE_LIST" {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } "dataset";
H5T_STD_I32LE "dimension";
}
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
}
}
DATASET "BS_K_linearized2" {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 781887360 ) / ( 781887360 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 3127549440
OFFSET 12150006475
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "CLASS" {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "NAME" {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "REFERENCE_LIST" {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } "dataset";
H5T_STD_I32LE "dimension";
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
DATASET "complex" {
DATATYPE H5T_IEEE_F32BE
DATASPACE SIMPLE { ( 2 ) / ( 2 ) }
STORAGE_LAYOUT {
CONTIGUOUS
SIZE 8
OFFSET 16059447371
}
FILTERS {
NONE
}
FILLVALUE {
FILL_TIME H5D_FILL_TIME_IFSET
VALUE H5D_FILL_VALUE_DEFAULT
}
ALLOCATION_TIME {
H5D_ALLOC_TIME_EARLY
}
ATTRIBUTE "CLASS" {
DATATYPE H5T_STRING {
STRSIZE 16;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "NAME" {
DATATYPE H5T_STRING {
STRSIZE 64;
STRPAD H5T_STR_NULLTERM;
CSET H5T_CSET_ASCII;
CTYPE H5T_C_S1;
}
DATASPACE SCALAR
}
ATTRIBUTE "REFERENCE_LIST" {
DATATYPE H5T_COMPOUND {
H5T_REFERENCE { H5T_STD_REF_OBJECT } "dataset";
H5T_STD_I32LE "dimension";
}
DATASPACE SIMPLE { ( 1 ) / ( 1 ) }
}
}
}
}
On Sat, May 2, 2020 at 5:55 PM +0200, "Wei-Keng Liao" <wkliao@xxxxxxxxxxxxxxxx>
wrote:
For HDF5 files, command “h5dump -Hp ndb.BS_COMPRESS0.005000_Q1” shows
the data chunk settings used by all datasets in the file.
Command “h5stat -Ss ndb.BS_COMPRESS0.005000_Q1” shows information about
free space, metadata, raw data, etc.
They may reveal why your file is abnormal big.
Most likely it is the chunk setting you used.
Wei-keng
> On May 1, 2020, at 6:40 PM, Davide Sangalli wrote:
>
> I also add
>
> ncvalidator ndb.BS_COMPRESS0.005000_Q1
> Error: Unknow file signature
> Expecting "CDF1", "CDF2", or "CDF5", but got "�HDF"
> File "ndb.BS_COMPRESS0.005000_Q1" fails to conform with CDF file format
> specifications
>
> Best,
> D.
>
> On 02/05/20 01:26, Davide Sangalli wrote:
>> Output of ncdump -hs
>>
>> D.
>>
>> ncdump -hs BSK_2-5B_X59RL-50B_SP_bse-io/ndb.BS_COMPRESS0.005000_Q1
>>
>> netcdf ndb.BS_COMPRESS0 {
>> dimensions:
>> BS_K_linearized1 = 2025000000 ;
>> BS_K_linearized2 = 781887360 ;
>> complex = 2 ;
>> BS_K_compressed1 = 24776792 ;
>> variables:
>> char BSE_RESONANT_COMPRESSED1_DONE(BS_K_linearized1) ;
>> BSE_RESONANT_COMPRESSED1_DONE:_Storage = "contiguous" ;
>> char BSE_RESONANT_COMPRESSED2_DONE(BS_K_linearized1) ;
>> BSE_RESONANT_COMPRESSED2_DONE:_Storage = "contiguous" ;
>> char BSE_RESONANT_COMPRESSED3_DONE(BS_K_linearized2) ;
>> BSE_RESONANT_COMPRESSED3_DONE:_Storage = "contiguous" ;
>> float BSE_RESONANT_COMPRESSED1(BS_K_compressed1, complex) ;
>> BSE_RESONANT_COMPRESSED1:_Storage = "contiguous" ;
>> BSE_RESONANT_COMPRESSED1:_Endianness = "little" ;
>> // global attributes:
>> :_NCProperties =
>> "version=1|netcdflibversion=4.4.1.1|hdf5libversion=1.8.18" ;
>> :_SuperblockVersion = 0 ;
>> :_IsNetcdf4 = 1 ;
>> :_Format = "netCDF-4" ;
>>
>>
>>
>> On Sat, May 2, 2020 at 12:24 AM +0200, "Dave Allured - NOAA Affiliate"
>> wrote:
>>
>> I agree that you should expect the file size to be about 1 byte per stored
>> character. IMO the most likely explanation is that you have a netcdf-4 file
>> with inappropriately small chunk size. Another possibility is a 64-bit
>> offset file with crazy huge padding between file sections. This is very
>> unlikely, but I do not know what is inside your writer code .
>>
>> Diagnose, please. Ncdump -hs. If it is 64-bit offset, I think ncvalidator
>> can display the hidden pad sizes.
>>
>>
>> On Fri, May 1, 2020 at 3:37 PM Davide Sangalli wrote:
>> Dear all,
>> I'm a developer of a fortran code which uses netcdf for I/O
>>
>> In one of my runs I created a file with some huge array of characters.
>> The header of the file is the following:
>> netcdf ndb.BS_COMPRESS0 {
>> dimensions:
>> BS_K_linearized1 = 2025000000 ;
>> BS_K_linearized2 = 781887360 ;
>> variables:
>> char BSE_RESONANT_COMPRESSED1_DONE(BS_K_linearized1) ;
>> char BSE_RESONANT_COMPRESSED2_DONE(BS_K_linearized1) ;
>> char BSE_RESONANT_COMPRESSED3_DONE(BS_K_linearized2) ;
>> }
>>
>> The variable is declared as nf90_char which, according to the documentation
>> should be 1 byte per element.
>> Thus I would expect the total size of the file to be 1
>> byte*(2*2025000000+781887360) ~ 4.5 GB
>> Instead the file size is 16059445323 bytes ~ 14.96 GB, i.e. 10.46 GB more
>> and a factor 3.33 bigger
>>
>> This happens consistently if I consider the file
>> netcdf ndb {
>> dimensions:
>> complex = 2 ;
>> BS_K_linearized1 = 2025000000 ;
>> BS_K_linearized2 = 781887360 ;
>> variables:
>> float BSE_RESONANT_LINEARIZED1(BS_K_linearized1, complex) ;
>> char BSE_RESONANT_LINEARIZED1_DONE(BS_K_linearized1) ;
>> float BSE_RESONANT_LINEARIZED2(BS_K_linearized1, complex) ;
>> char BSE_RESONANT_LINEARIZED2_DONE(BS_K_linearized1) ;
>> float BSE_RESONANT_LINEARIZED3(BS_K_linearized2, complex) ;
>> char BSE_RESONANT_LINEARIZED3_DONE(BS_K_linearized2) ;
>> }
>> The float component should weight ~36 GB while the char component should be
>> identical to before, i.e. 4.5 GB for a total of 40.5 GB
>> The file is instead ~ 50.96 GB, i.e. again a factor 10.46 GB bigger than
>> expected.
>>
>> Why ?
>>
>> My character variables are something like
>> "tnnnntnnnntnnnnnnnntnnnnnttnnnnnnnnnnnnnnnnt..."
>> but the file size is already like that just after the file creation, i.e.
>> before filling it.
>>
>> Few info about the library, compiled linking to HDF5 (hdf5-1.8.18), with
>> parallel IO support:
>> Name: netcdf
>> Description: NetCDF Client Library for C
>> URL: http://www.unidata.ucar.edu/netcdf
>> Version: 4.4.1.1
>> Libs: -L${libdir} -lnetcdf -ldl -lm
>> /nfs/data/bin/Yambo/gcc-8.1.0/openmpi-3.1.0/yambo_ext_libs/gfortran/mpifort/v4/parallel/lib/libhdf5hl_fortran.a
>>
>> /nfs/data/bin/Yambo/gcc-8.1.0/openmpi-3.1.0/yambo_ext_libs/gfortran/mpifort/v4/parallel/lib/libhdf5_fortran.a
>>
>> /nfs/data/bin/Yambo/gcc-8.1.0/openmpi-3.1.0/yambo_ext_libs/gfortran/mpifort/v4/parallel/lib/libhdf5_hl.a
>>
>> /nfs/data/bin/Yambo/gcc-8.1.0/openmpi-3.1.0/yambo_ext_libs/gfortran/mpifort/v4/parallel/lib/libhdf5.a
>> -lz -lm -ldl -lcurl
>> Cflags: -I${includedir}
>>
>> Name: netcdf-fortran
>> Description: NetCDF Client Library for Fortran
>> URL: http://www.unidata.ucar.edu/netcdf
>> Version: 4.4.4
>> Requires.private: netcdf > 4.1.1
>> Libs: -L${libdir} -lnetcdff
>> Libs.private: -L${libdir} -lnetcdff -lnetcdf
>> Cflags: -I${includedir}
>>
>> Best,
>> D.
>> --
>> Davide Sangalli, PhD
>> CNR-ISM, Division of Ultrafast Processes in Materials (FLASHit) and MaX
>> Centre
>> Area della Ricerca di Roma 1, 00016 Monterotondo Scalo, Italy
>> http://www.ism.cnr.it/en/davide-sangalli-cv/
>> http://www.max-centre.eu/
>
> _______________________________________________
> NOTE: All exchanges posted to Unidata maintained email lists are
> recorded in the Unidata inquiry tracking system and made publicly
> available through the web. Users who post to any of the lists we
> maintain are reminded to remove any personal information that they
> do not want to be made public.
>
>
> netcdfgroup mailing list
> netcdfgroup@xxxxxxxxxxxxxxxx
> For list information or to unsubscribe, visit:
> https://urldefense.com/v3/__https://www.unidata.ucar.edu/mailing_lists/__;!!Dq0X2DkFhyF93HkjWTBQKhk!GlMUXr2ZUUJOLFkvEP_YqN7UDZILtBBWb_Z5DVa2Mwi9UIg_yB2Hb7tJibyV8bgan4ku$
>