Re: [netcdfgroup] [netcdf-hdf] netcdf4 and OpenMP

Hi Henry,

On May 20, 2008, at 4:53 AM, Henry Butowsky wrote:

Hi all,

The NCO operator program ncbo (aka ncdiff) uses the OpenMP interface
to thread its work over the loop of variables in the input files.
The threading works fine with netcdf3 but not with netcdf4.
( The failure only occurs with  netcdf4 files and the netcdf4 API
  netcdf3 files and the netcdf4 API works)

We are unsure why and are wondering if others have unexplained
problems with OpenMP code that utilizes the new netcdf4 library?

A simplified version of the code is below.
ncbo takes 3 file arguments - 2 input files and one output file
It then subtracts the variables in file 1 from the variables in file 2
and writes the results to the output file.
Each thread has its own file handles in_id_1 & in_id_2 for the input files.

A couple of notes:
nco_msa_var_get()  wraps the netcdf functions nco_get_vara_type().
nco_var_mtd_refresh() refreshes the variable structure with type, id
 numberof dimensions & missing value (if any)
nco_var_sbt() subtracts values in var_prc_2 from var_prc_1

Ideas/suggestions as to what is going wrong or how to debug this
problem would be much appreciated.

Are you using a threadsafe version of HDF5? (i.e. one that is configured with the "--enable-threadsafe" option) It's also possible that netCDF-4 needs some locking mechanisms also, but that's a question for Ed or Russ.

        Quincey



Regards.
Henry



/ *****************************************************************************************************/


 /* Open output file */

fl_out_tmp = nco_fl_out_open (fl_out,FORCE_APPEND,FORCE_OVERWRITE,fl_out_fmt,&out_id);

   /* create file handles for input file 1 */
 for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
   rcd=nco_open(fl_in_1,NC_NOWRITE,in_id_1_arr+thr_idx);

     /* create file handles for input file 2 */
 for(thr_idx=0;thr_idx<thr_nbr;thr_idx++)
   rcd=nco_open(fl_in_2,NC_NOWRITE,in_id_2_arr+thr_idx);



#ifdef _OPENMP
 /* OpenMP notes:
    shared(): msk and wgt are not altered within loop
    private(): wgt_avg does not need initialization */
#pragma omp parallel for default(none) firstprivate(ddra_info)
private(idx,in_id_1,in_id_2,dmn_idx,dmn_jdx)
shared (dbg_lvl ,dim_1 ,fl_in_1 ,fl_in_2 ,fl_out ,flg_ddra ,in_id_1_arr ,in_id_2_arr ,nbr_dmn_xtr_1 ,nbr_var_prc_1 ,nbr_var_prc_2 ,nco_op_typ ,out_id ,prg_nm,rcd,var_prc_1,var_prc_2,var_prc_out,lmt_all_lst,nbr_dmn_fl_1)
#endif /* !_OPENMP */
 for(idx=0;idx<nbr_var_prc_1;idx++){
   int has_mss_val=False;
   ptr_unn mss_val;


   in_id_1=in_id_1_arr[omp_get_thread_num()];
   in_id_2=in_id_2_arr[omp_get_thread_num()];

   (void)nco_var_mtd_refresh(in_id_1,var_prc_1[idx]);
   has_mss_val=var_prc_1[idx]->has_mss_val;
(void )nco_msa_var_get(in_id_1,var_prc_1[idx],lmt_all_lst,nbr_dmn_fl_1);

/* Find and set variable dmn_nbr, ID, mss_val, type in second file */
   (void)nco_var_mtd_refresh(in_id_2,var_prc_2[idx]);

   /* Read hyperslab from second file */
(void )nco_msa_var_get(in_id_2,var_prc_2[idx],lmt_all_lst,nbr_dmn_fl_1); var_prc_2[idx]=nco_var_cnf_typ(var_prc_1[idx]- >type,var_prc_2[idx]);

   /* Change missing_value of var_prc_2, if any, to missing_value of
var_prc_1, if any */
   has_mss_val=nco_mss_val_cnf(var_prc_1[idx],var_prc_2[idx]);

   /* mss_val in fl_1, if any, overrides mss_val in fl_2 */
   if(has_mss_val) mss_val=var_prc_1[idx]->mss_val;

   /* Subtract file_2 from file_1 */

(void)nco_var_sbt(var_prc_1[idx]->type,var_prc_1[idx]- >sz,has_mss_val,mss_val,var_prc_2[idx]->val,var_prc_1[idx]->val);



   var_prc_2[idx]->val.vp=nco_free(var_prc_2[idx]->val.vp);

#ifdef _OPENMP
#pragma omp critical
#endif /* _OPENMP */
   { /* begin OpenMP critical */
     /* Copy result to output file and free workspace buffer */
     if(var_prc_1[idx]->nbr_dim == 0){
(void)nco_put_var1(out_id,var_prc_out[idx]->id,var_prc_out[idx]- >srt,var_prc_1[idx]->val.vp,var_prc_1[idx]->type);
     }else{ /* end if variable is scalar */
(void)nco_put_vara(out_id,var_prc_out[idx]->id,var_prc_out[idx]- >srt,var_prc_out[idx]->cnt,var_prc_1[idx]->val.vp,var_prc_1[idx]- >type);
     } /* end else */
   } /* end OpenMP critical */
   var_prc_1[idx]->val.vp=nco_free(var_prc_1[idx]->val.vp);

 } /* end (OpenMP parallel for) loop over idx */


/ *****************************************************************************************************/



Variable structure


 typedef struct var_sct_tag{ /* var_sct */
   char *nm; /* [sng] Variable name */
   int id; /* [id] Variable ID */
   int nc_id; /* [id] File ID */
int nbr_dim; /* [nbr] Number of dimensions of variable in input file */
   nc_type type; /* [enm] Type of variable in RAM */
nc_type typ_dsk; /* [enm] Type of variable on disk (never changes) */
   short is_rec_var; /* [flg] Is this a record variable? */
   short is_crd_var; /* [flg] Is this a coordinate variable? */
   long sz; /* [nbr] Number of elements (NOT bytes) in hyperslab (NOT
full size of variable in input file!) */
long sz_rec; /* [nbr] Number of elements in one record of hyperslab */
   int nbr_att; /* [nbr] Number of attributes */
   int has_dpl_dmn; /* [flg] Variable has duplicate copies of same
dimension */
   int has_mss_val; /* [flg] Is there a missing_value attribute? */
   ptr_unn mss_val; /* [frc] Value of missing_value attribute, if any
(mss_val stored in this structure must be same type as variable) */
   int cid; /* [id] Dimension ID of associated coordinate, if any */
   char fmt[5]; /* [sng] Hint for printf()-style formatting */
   dmn_sct **dim; /* [sct] Pointers to full dimension structures */
   int *dmn_id; /* [id] Contiguous vector of dimension IDs */
long *srt; /* [id] Contiguous vector of indices to start of hyperslab */ long *end; /* [id] Contiguous vector of indices to end of hyperslab */
   long *cnt; /* [id] Contiguous vector of lengths of hyperslab */
   long *srd; /* [id] Contiguous vector of stride of hyperslab */
   ptr_unn val; /* [bfr] Buffer to hold hyperslab */
long *tally; /* [nbr] Number of valid operations performed so far */
   struct var_sct_tag *xrf; /* [sct] Cross-reference to associated
variable structure (usually structure for variable on output) fxm:
deprecate! TODO nco226 */
   int pck_dsk; /* [flg] Variable is packed on disk (valid
scale_factor, add_offset, or both attributes exist) */
   int pck_ram; /* [flg] Variable is packed in memory (valid
scale_factor, add_offset, or both attributes exist) */
   int has_scl_fct; /* [flg] Valid scale_factor attribute exists */
   int has_add_fst; /* [flg] Valid add_offset attribute exists */
   ptr_unn scl_fct; /* [frc] Value of scale_factor attribute of type
typ_upk */
   ptr_unn add_fst; /* [frc] Value of add_offset attribute of type
typ_upk */
   nc_type typ_pck; /* [enm] Type of variable when packed (on disk).
typ_pck = typ_dsk except in cases where variable is packed in input file
and unpacked in output file. */
   nc_type typ_upk; /* [enm] Type of variable when unpacked (expanded)
(in memory) */
int undefined; /* [flg] Variable is still undefined (in first parser
pass) */
int is_fix_var; /* [flg] Is this a fixed (non-processed) variable? */
 } var_sct; /* end var_sct_tag */





_______________________________________________
netcdf-hdf mailing list
netcdf-hdf@xxxxxxxxxxxxxxxx
For list information or to unsubscribe, visit: 
http://www.unidata.ucar.edu/mailing_lists/




  • 2008 messages navigation, sorted by:
    1. Thread
    2. Subject
    3. Author
    4. Date
    5. ↑ Table Of Contents
  • Search the netcdfgroup archives: