netCDF  4.3.0
 All Data Structures Files Functions Variables Typedefs Macros Groups Pages
nccopy.c
1 /*********************************************************************
2  * Copyright 2010, University Corporation for Atmospheric Research
3  * See netcdf/README file for copying and redistribution conditions.
4  * Thanks to Philippe Poilbarbe and Antonio S. CofiƱo for
5  * compression additions.
6  * $Id: nccopy.c 400 2010-08-27 21:02:52Z russ $
7  *********************************************************************/
8 
9 #include "config.h" /* for USE_NETCDF4 macro */
10 #include <stdlib.h>
11 #ifdef HAVE_GETOPT_H
12 #include <getopt.h>
13 #endif
14 #ifdef HAVE_UNISTD_H
15 #include <unistd.h>
16 #endif
17 #include <string.h>
18 #include <netcdf.h>
19 #include "nciter.h"
20 #include "chunkspec.h"
21 #include "utils.h"
22 #include "dimmap.h"
23 #include "nccomps.h"
24 
25 #ifdef _MSC_VER
26 #include "XGetopt.h"
27 #define snprintf _snprintf
28 int opterr;
29 int optind;
30 #endif
31 
32 /* default bytes of memory we are willing to allocate for variable
33  * values during copy */
34 #define COPY_BUFFER_SIZE (5000000)
35 #define COPY_CHUNKCACHE_PREEMPTION (1.0f) /* for copying, can eject fully read chunks */
36 #define SAME_AS_INPUT (-1) /* default, if kind not specified */
37 #define CHUNK_THRESHOLD (8192) /* variables with fewer bytes don't get chunked */
38 
39 #ifndef USE_NETCDF4
40 #define NC_CLASSIC_MODEL 0x0100 /* Enforce classic model if netCDF-4 not available. */
41 #endif
42 
43 /* Global variables for command-line requests */
44 char *progname; /* for error messages */
45 static int option_kind = SAME_AS_INPUT;
46 static int option_deflate_level = -1; /* default, compress output only if input compressed */
47 static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */
48 static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */
49 static char* option_chunkspec = 0; /* default, no chunk specification */
50 static size_t option_copy_buffer_size = COPY_BUFFER_SIZE;
51 static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */
52 static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */
53 static int option_read_diskless = 0; /* default, don't read input into memory on open */
54 static int option_write_diskless = 0; /* default, don't write output to diskless file */
55 static int option_min_chunk_bytes = CHUNK_THRESHOLD; /* default, don't chunk variable if prod of
56  * chunksizes of its dimensions is smaller
57  * than this */
58 static int option_nlgrps = 0; /* Number of groups specified with -g
59  * option on command line */
60 static char** option_lgrps = 0; /* list of group names specified with -g
61  * option on command line */
62 static idnode_t* option_grpids = 0; /* list of grpids matching list specified with -g option */
63 static bool_t option_grpstruct = false; /* if -g set, copy structure for non-selected groups */
64 static int option_nlvars = 0; /* Number of variables specified with -v * option on command line */
65 static char** option_lvars = 0; /* list of variable names specified with -v
66  * option on command line */
67 static bool_t option_varstruct = false; /* if -v set, copy structure for non-selected vars */
68 static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of
69  * chunk cache for each variable */
70 
71 /* get group id in output corresponding to group igrp in input,
72  * given parent group id (or root group id) parid in output. */
73 static int
74 get_grpid(int igrp, int parid, int *ogrpp) {
75  int stat = NC_NOERR;
76  int ogid = parid; /* like igrp but in output file */
77 #ifdef USE_NETCDF4
78  int inparid;
79 
80  /* if not root group, get corresponding output groupid from group name */
81  stat = nc_inq_grp_parent(igrp, &inparid);
82  if(stat == NC_NOERR) { /* not root group */
83  char grpname[NC_MAX_NAME + 1];
84  NC_CHECK(nc_inq_grpname(igrp, grpname));
85  NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid));
86  } else if(stat == NC_ENOGRP) { /* root group */
87  stat = NC_NOERR;
88  } else {
89  NC_CHECK(stat);
90  }
91 #endif /* USE_NETCDF4 */
92  *ogrpp = ogid;
93  return stat;
94 }
95 
96 /* Return size in bytes of a variable value */
97 static size_t
98 val_size(int grpid, int varid) {
99  nc_type vartype;
100  size_t value_size;
101  NC_CHECK(nc_inq_vartype(grpid, varid, &vartype));
102  NC_CHECK(nc_inq_type(grpid, vartype, NULL, &value_size));
103  return value_size;
104 }
105 
106 #ifdef USE_NETCDF4
107 /* Get parent id needed to define a new group from its full name in an
108  * open file identified by ncid. Assumes all intermediate groups are
109  * already defined. */
110 static int
111 nc_inq_parid(int ncid, const char *fullname, int *locidp) {
112  char *parent = strdup(fullname);
113  char *slash = "/"; /* groupname separator */
114  char *last_slash;
115  if(parent == NULL) {
116  return NC_ENOMEM; /* exits */
117  }
118  last_slash = strrchr(parent, '/');
119  if(last_slash == parent || last_slash == NULL) { /* parent is root */
120  free(parent);
121  parent = strdup(slash);
122  } else {
123  *last_slash = '\0'; /* truncate to get parent name */
124  }
125  NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp));
126  free(parent);
127  return NC_NOERR;
128 }
129 
130 /* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if
131  * layout is contiguous */
132 static int
133 inq_var_chunksize(int igrp, int varid, size_t* chunksizep) {
134  int stat = NC_NOERR;
135  int ndims;
136  size_t *chunksizes;
137  int dim;
138  int contig = 1;
139  nc_type vartype;
140  size_t value_size;
141  size_t prod;
142 
143  NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
144  /* from type, get size in memory needed for each value */
145  NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
146  prod = value_size;
147  NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
148  chunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
149  if(ndims > 0) {
150  NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL));
151  }
152  if(contig == 1) {
153  *chunksizep = 0;
154  } else {
155  NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunksizes));
156  for(dim = 0; dim < ndims; dim++) {
157  prod *= chunksizes[dim];
158  }
159  *chunksizep = prod;
160  }
161  free(chunksizes);
162  return stat;
163 }
164 
165 /* Return estimated number of elems required in chunk cache and
166  * estimated size of chunk cache adequate to efficiently copy input
167  * variable ivarid to output variable ovarid, which may have different
168  * chunk size and shape */
169 static int
170 inq_var_chunking_params(int igrp, int ivarid, int ogrp, int ovarid,
171  size_t* chunkcache_sizep,
172  size_t *chunkcache_nelemsp,
173  float * chunkcache_preemptionp)
174 {
175  int stat = NC_NOERR;
176  int ndims;
177  size_t *ichunksizes, *ochunksizes;
178  int dim;
179  int icontig = 1, ocontig = 1;
180  nc_type vartype;
181  size_t value_size;
182  size_t prod, iprod, oprod;
183  size_t nelems;
184  *chunkcache_nelemsp = CHUNK_CACHE_NELEMS;
185  *chunkcache_sizep = CHUNK_CACHE_SIZE;
186  *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION;
187 
188  NC_CHECK(nc_inq_varndims(igrp, ivarid, &ndims));
189  if(ndims > 0) {
190  NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, NULL));
191  NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, NULL));
192  }
193  if(icontig == 1 && ocontig == 1) { /* no chunking in input or output */
194  *chunkcache_nelemsp = 0;
195  *chunkcache_sizep = 0;
196  *chunkcache_preemptionp = 0;
197  return stat;
198  }
199 
200  NC_CHECK(nc_inq_vartype(igrp, ivarid, &vartype));
201  NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
202  iprod = value_size;
203 
204  if(icontig == 0 && ocontig == 1) { /* chunking only in input */
205  *chunkcache_nelemsp = 1; /* read one input chunk at a time */
206  *chunkcache_sizep = iprod;
207  *chunkcache_preemptionp = 1.0f;
208  return stat;
209  }
210 
211  ichunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
212  if(icontig == 1) { /* if input contiguous, treat as if chunked on
213  * first dimension */
214  ichunksizes[0] = 1;
215  for(dim = 1; dim < ndims; dim++) {
216  ichunksizes[dim] = dim;
217  }
218  } else {
219  NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, ichunksizes));
220  }
221 
222  /* now can assume chunking in both input and output */
223  ochunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
224  NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, ochunksizes));
225 
226  nelems = 1;
227  oprod = value_size;
228  for(dim = 0; dim < ndims; dim++) {
229  nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim];
230  iprod *= ichunksizes[dim];
231  oprod *= ochunksizes[dim];
232  }
233  prod = iprod + oprod * (nelems - 1);
234  *chunkcache_nelemsp = nelems;
235  *chunkcache_sizep = prod;
236  free(ichunksizes);
237  free(ochunksizes);
238  return stat;
239 }
240 
241 /* Forward declaration, because copy_type, copy_vlen_type call each other */
242 static int copy_type(int igrp, nc_type typeid, int ogrp);
243 
244 /*
245  * copy a user-defined variable length type in the group igrp to the
246  * group ogrp
247  */
248 static int
249 copy_vlen_type(int igrp, nc_type itype, int ogrp)
250 {
251  int stat = NC_NOERR;
252  nc_type ibasetype;
253  nc_type obasetype; /* base type in target group */
254  char name[NC_MAX_NAME];
255  size_t size;
256  char basename[NC_MAX_NAME];
257  size_t basesize;
258  nc_type vlen_type;
259 
260  NC_CHECK(nc_inq_vlen(igrp, itype, name, &size, &ibasetype));
261  /* to get base type id in target group, use name of base type in
262  * source group */
263  NC_CHECK(nc_inq_type(igrp, ibasetype, basename, &basesize));
264  stat = nc_inq_typeid(ogrp, basename, &obasetype);
265  /* if no such type, create it now */
266  if(stat == NC_EBADTYPE) {
267  NC_CHECK(copy_type(igrp, ibasetype, ogrp));
268  stat = nc_inq_typeid(ogrp, basename, &obasetype);
269  }
270  NC_CHECK(stat);
271 
272  /* Now we know base type exists in output and we know its type id */
273  NC_CHECK(nc_def_vlen(ogrp, name, obasetype, &vlen_type));
274 
275  return stat;
276 }
277 
278 /*
279  * copy a user-defined opaque type in the group igrp to the group ogrp
280  */
281 static int
282 copy_opaque_type(int igrp, nc_type itype, int ogrp)
283 {
284  int stat = NC_NOERR;
285  nc_type otype;
286  char name[NC_MAX_NAME];
287  size_t size;
288 
289  NC_CHECK(nc_inq_opaque(igrp, itype, name, &size));
290  NC_CHECK(nc_def_opaque(ogrp, size, name, &otype));
291 
292  return stat;
293 }
294 
295 /*
296  * copy a user-defined enum type in the group igrp to the group ogrp
297  */
298 static int
299 copy_enum_type(int igrp, nc_type itype, int ogrp)
300 {
301  int stat = NC_NOERR;
302  nc_type otype;
303  nc_type basetype;
304  size_t basesize;
305  size_t nmembers;
306  char name[NC_MAX_NAME];
307  int i;
308 
309  NC_CHECK(nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers));
310  NC_CHECK(nc_def_enum(ogrp, basetype, name, &otype));
311  for(i = 0; i < nmembers; i++) { /* insert enum members */
312  char ename[NC_MAX_NAME];
313  long long val; /* large enough to hold any integer type */
314  NC_CHECK(nc_inq_enum_member(igrp, itype, i, ename, &val));
315  NC_CHECK(nc_insert_enum(ogrp, otype, ename, &val));
316  }
317  return stat;
318 }
319 
320 /*
321  * copy a user-defined compound type in the group igrp to the group ogrp
322  */
323 static int
324 copy_compound_type(int igrp, nc_type itype, int ogrp)
325 {
326  int stat = NC_NOERR;
327  char name[NC_MAX_NAME];
328  size_t size;
329  size_t nfields;
330  nc_type otype;
331  int fid;
332 
333  NC_CHECK(nc_inq_compound(igrp, itype, name, &size, &nfields));
334  NC_CHECK(nc_def_compound(ogrp, size, name, &otype));
335 
336  for (fid = 0; fid < nfields; fid++) {
337  char fname[NC_MAX_NAME];
338  char ftypename[NC_MAX_NAME];
339  size_t foff;
340  nc_type iftype, oftype;
341  int fndims;
342 
343  NC_CHECK(nc_inq_compound_field(igrp, itype, fid, fname, &foff, &iftype, &fndims, NULL));
344  /* type ids in source don't necessarily correspond to same
345  * typeids in destination, so look up destination typeid by using
346  * field type name */
347  NC_CHECK(nc_inq_type(igrp, iftype, ftypename, NULL));
348  NC_CHECK(nc_inq_typeid(ogrp, ftypename, &oftype));
349  if(fndims == 0) {
350  NC_CHECK(nc_insert_compound(ogrp, otype, fname, foff, oftype));
351  } else { /* field is array type */
352  int *fdimsizes;
353  fdimsizes = (int *) emalloc((fndims + 1) * sizeof(int));
354  stat = nc_inq_compound_field(igrp, itype, fid, NULL, NULL, NULL,
355  NULL, fdimsizes);
356  NC_CHECK(nc_insert_array_compound(ogrp, otype, fname, foff, oftype, fndims, fdimsizes));
357  free(fdimsizes);
358  }
359  }
360  return stat;
361 }
362 
363 
364 /*
365  * copy a user-defined type in the group igrp to the group ogrp
366  */
367 static int
368 copy_type(int igrp, nc_type typeid, int ogrp)
369 {
370  int stat = NC_NOERR;
371  nc_type type_class;
372 
373  NC_CHECK(nc_inq_user_type(igrp, typeid, NULL, NULL, NULL, NULL, &type_class));
374 
375  switch(type_class) {
376  case NC_VLEN:
377  NC_CHECK(copy_vlen_type(igrp, typeid, ogrp));
378  break;
379  case NC_OPAQUE:
380  NC_CHECK(copy_opaque_type(igrp, typeid, ogrp));
381  break;
382  case NC_ENUM:
383  NC_CHECK(copy_enum_type(igrp, typeid, ogrp));
384  break;
385  case NC_COMPOUND:
386  NC_CHECK(copy_compound_type(igrp, typeid, ogrp));
387  break;
388  default:
389  NC_CHECK(NC_EBADTYPE);
390  }
391  return stat;
392 }
393 
394 /* Copy a group and all its subgroups, recursively, from iroot to
395  * oroot, the ncids of input file and output file. This just creates
396  * all the groups in the destination, but doesn't copy anything that's
397  * in the groups yet. */
398 static int
399 copy_groups(int iroot, int oroot)
400 {
401  int stat = NC_NOERR;
402  int numgrps;
403  int *grpids;
404  int i;
405 
406  /* get total number of groups and their ids, including all descendants */
407  NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL));
408  if(numgrps > 1) { /* there's always 1 root group */
409  grpids = emalloc(numgrps * sizeof(int));
410  NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids));
411  /* create corresponding new groups in ogrp, except for root group */
412  for(i = 1; i < numgrps; i++) {
413  char *grpname_full;
414  char grpname[NC_MAX_NAME];
415  size_t len_name;
416  int ogid, oparid, iparid;
417  /* get full group name of input group */
418  NC_CHECK(nc_inq_grpname(grpids[i], grpname));
419  if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
420  NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL));
421  grpname_full = emalloc(len_name + 1);
422  NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full));
423  /* Make sure, the parent group is also wanted (root group is always wanted) */
424  NC_CHECK(nc_inq_parid(iroot, grpname_full, &iparid));
425  if (!option_grpstruct && !group_wanted(iparid, option_nlgrps, option_grpids)
426  && iparid != iroot) {
427  error("ERROR: trying to copy a group but not the parent: %s", grpname_full);
428  }
429  /* get id of parent group of corresponding group in output.
430  * Note that this exists, because nc_inq_groups returned
431  * grpids in preorder, so parents are always copied before
432  * their subgroups */
433  NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid));
434  NC_CHECK(nc_inq_grpname(grpids[i], grpname));
435  /* define corresponding group in output */
436  NC_CHECK(nc_def_grp(oparid, grpname, &ogid));
437  free(grpname_full);
438  }
439  }
440  free(grpids);
441  }
442  return stat;
443 }
444 
445 /*
446  * Copy the user-defined types in this group (igrp) and all its
447  * subgroups, recursively, to corresponding group in output (ogrp)
448  */
449 static int
450 copy_types(int igrp, int ogrp)
451 {
452  int stat = NC_NOERR;
453  int ntypes;
454  nc_type *types = NULL;
455  int numgrps;
456  int *grpids = NULL;
457  int i;
458 
459  NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL));
460 
461  if(ntypes > 0) {
462  types = (nc_type *) emalloc(ntypes * sizeof(nc_type));
463  NC_CHECK(nc_inq_typeids(igrp, &ntypes, types));
464  for (i = 0; i < ntypes; i++) {
465  NC_CHECK(copy_type(igrp, types[i], ogrp));
466  }
467  free(types);
468  }
469 
470  /* Copy types from subgroups */
471  NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL));
472  if(numgrps > 0) {
473  grpids = (int *)emalloc(sizeof(int) * numgrps);
474  NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
475  for(i = 0; i < numgrps; i++) {
476  if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
477  int ogid;
478  /* get groupid in output corresponding to grpids[i] in
479  * input, given parent group (or root group) ogrp in
480  * output */
481  NC_CHECK(get_grpid(grpids[i], ogrp, &ogid));
482  NC_CHECK(copy_types(grpids[i], ogid));
483  }
484  }
485  free(grpids);
486  }
487  return stat;
488 }
489 
490 /* Copy all netCDF-4 specific variable properties such as chunking,
491  * endianness, deflation, checksumming, fill, etc. */
492 static int
493 copy_var_specials(int igrp, int varid, int ogrp, int o_varid)
494 {
495  int stat = NC_NOERR;
496  { /* handle chunking parameters */
497  int ndims;
498  NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
499  if (ndims > 0) { /* no chunking for scalar variables */
500  int contig = 0;
501  size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
502  int *dimids = (int *) emalloc(ndims * sizeof(int));
503  int idim;
504  /* size of a chunk: product of dimension chunksizes and size of value */
505  size_t csprod = val_size(ogrp, o_varid);
506  int is_unlimited = 0;
507  NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunkp));
508  NC_CHECK(nc_inq_vardimid(igrp, varid, dimids));
509 
510  for(idim = 0; idim < ndims; idim++) {
511  int idimid = dimids[idim];
512  int odimid = dimmap_odimid(idimid);
513  size_t chunksize = chunkspec_size(idimid);
514  if(chunksize > 0) { /* found in chunkspec */
515  chunkp[idim] = chunksize;
516  }
517  csprod *= chunkp[idim];
518  if(dimmap_ounlim(odimid))
519  is_unlimited = 1;
520  }
521  /* Explicitly set chunking, even if default */
522  /* If product of chunksizes is too small and no unlimited
523  * dimensions used, don't chunk */
524  if ((csprod < option_min_chunk_bytes && !is_unlimited) || contig == 1) {
525  NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CONTIGUOUS, NULL));
526  } else {
527  NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
528  }
529  free(dimids);
530  free(chunkp);
531  }
532  }
533  { /* handle compression parameters, copying from input, overriding
534  * with command-line options */
535  int shuffle, deflate, deflate_level;
536  NC_CHECK(nc_inq_var_deflate(igrp, varid, &shuffle, &deflate, &deflate_level));
537  if(deflate_level == 0 && deflate == 1) /* TODO: why is this needed?? Bug in nc_inq_var_deflate? */
538  deflate = 0;
539  if(option_deflate_level >= 0) { /* change output compression, if requested */
540  deflate_level = option_deflate_level;
541  deflate=1;
542  }
543  if(shuffle==0 && option_shuffle_vars != 0) {
544  shuffle = option_shuffle_vars;
545  }
546  if(deflate != 0 || shuffle != 0) {
547  NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle, deflate_level > 0, deflate_level));
548  }
549  }
550  { /* handle checksum parameters */
551  int fletcher32 = 0;
552  NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32));
553  if(fletcher32 != 0) {
554  NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32));
555  }
556  }
557  { /* handle endianness */
558  int endianness = 0;
559  NC_CHECK(nc_inq_var_endian(igrp, varid, &endianness));
560  if(endianness != NC_ENDIAN_NATIVE) { /* native is the default */
561  NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness));
562  }
563  }
564  return stat;
565 }
566 
567 /* Set output variable o_varid (in group ogrp) to use chunking
568  * specified on command line, only called for classic format input and
569  * netCDF-4 format output, so no existing chunk lengths to override. */
570 static int
571 set_var_chunked(int ogrp, int o_varid)
572 {
573  int stat = NC_NOERR;
574  int ndims;
575  int odim;
576  size_t chunk_threshold = CHUNK_THRESHOLD;
577 
578  if(chunkspec_ndims() == 0) /* no chunking specified on command line */
579  return stat;
580  NC_CHECK(nc_inq_varndims(ogrp, o_varid, &ndims));
581 
582  if (ndims > 0) { /* no chunking for scalar variables */
583  int chunked = 0;
584  int *dimids = (int *) emalloc(ndims * sizeof(int));
585  size_t varsize;
586  nc_type vartype;
587  size_t value_size;
588  int is_unlimited = 0;
589 
590  NC_CHECK(nc_inq_vardimid (ogrp, o_varid, dimids));
591  NC_CHECK(nc_inq_vartype(ogrp, o_varid, &vartype));
592  /* from type, get size in memory needed for each value */
593  NC_CHECK(nc_inq_type(ogrp, vartype, NULL, &value_size));
594  varsize = value_size;
595 
596  /* Determine if this variable should be chunked. A variable
597  * should be chunked if any of its dims are in command-line
598  * chunk spec. It will also be chunked if any of its
599  * dims are unlimited. */
600  for(odim = 0; odim < ndims; odim++) {
601  int odimid = dimids[odim];
602  int idimid = dimmap_idimid(odimid); /* corresponding dimid in input file */
603  if(dimmap_ounlim(odimid))
604  is_unlimited = 1;
605  if(idimid != -1) {
606  size_t chunksize = chunkspec_size(idimid); /* from chunkspec */
607  size_t dimlen;
608  NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen));
609  if( (chunksize > 0) || dimmap_ounlim(odimid)) {
610  chunked = 1;
611  }
612  varsize *= dimlen;
613  }
614  }
615  /* Don't chunk small variables that don't use an unlimited
616  * dimension. */
617  if(varsize < chunk_threshold && !is_unlimited)
618  chunked = 0;
619 
620  if(chunked) {
621  /* Allocate chunksizes and set defaults to dimsize for any
622  * dimensions not mentioned in chunkspec. */
623  size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
624  for(odim = 0; odim < ndims; odim++) {
625  int odimid = dimids[odim];
626  int idimid = dimmap_idimid(odimid);
627  size_t chunksize = chunkspec_size(idimid);
628  if(chunksize > 0) {
629  chunkp[odim] = chunksize;
630  } else {
631  NC_CHECK(nc_inq_dimlen(ogrp, odimid, &chunkp[odim]));
632  }
633  }
634  NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
635  free(chunkp);
636  }
637  free(dimids);
638  }
639  return stat;
640 }
641 
642 /* Set variable to compression specified on command line */
643 static int
644 set_var_compressed(int ogrp, int o_varid)
645 {
646  int stat = NC_NOERR;
647  if (option_deflate_level >= 0) {
648  int deflate = 1;
649  NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level));
650  }
651  return stat;
652 }
653 
654 /* Release the variable chunk cache allocated for variable varid in
655  * group grp. This is not necessary, but will save some memory when
656  * processing one variable at a time. */
657 #ifdef UNUSED
658 static int
659 free_var_chunk_cache(int grp, int varid)
660 {
661  int stat = NC_NOERR;
662  size_t chunk_cache_size = 1;
663  size_t cache_nelems = 1;
664  float cache_preemp = 0;
665  int kind;
666  NC_CHECK(nc_inq_format(grp, &kind));
667  if(kind == NC_FORMAT_NETCDF4 || kind == NC_FORMAT_NETCDF4_CLASSIC) {
668  int contig = 1;
669  NC_CHECK(nc_inq_var_chunking(grp, varid, &contig, NULL));
670  if(contig == 0) { /* chunked */
671  NC_CHECK(nc_set_var_chunk_cache(grp, varid, chunk_cache_size, cache_nelems, cache_preemp));
672  }
673  }
674  return stat;
675 }
676 #endif
677 
678 #endif /* USE_NETCDF4 */
679 
680 /* Copy dimensions from group igrp to group ogrp, also associate input
681  * dimids with output dimids (they need not match, because the input
682  * dimensions may have been defined in a different order than we define
683  * the output dimensions here. */
684 static int
685 copy_dims(int igrp, int ogrp)
686 {
687  int stat = NC_NOERR;
688  int ndims;
689  int dgrp;
690 #ifdef USE_NETCDF4
691  int nunlims;
692  int *dimids;
693  int *unlimids;
694 #else
695  int unlimid;
696 #endif /* USE_NETCDF4 */
697 
698  NC_CHECK(nc_inq_ndims(igrp, &ndims));
699 
700 #ifdef USE_NETCDF4
701  /* In netCDF-4 files, dimids may not be sequential because they
702  * may be defined in various groups, and we are only looking at one
703  * group at a time. */
704  /* Find the dimension ids in this group, don't include parents. */
705  dimids = (int *) emalloc((ndims + 1) * sizeof(int));
706  NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0));
707  /* Find the number of unlimited dimensions and get their IDs */
708  NC_CHECK(nc_inq_unlimdims(igrp, &nunlims, NULL));
709  unlimids = (int *) emalloc((nunlims + 1) * sizeof(int));
710  NC_CHECK(nc_inq_unlimdims(igrp, NULL, unlimids));
711 #else
712  NC_CHECK(nc_inq_unlimdim(igrp, &unlimid));
713 #endif /* USE_NETCDF4 */
714 
715  /* Copy each dimension to output, including unlimited dimension(s) */
716  for (dgrp = 0; dgrp < ndims; dgrp++) {
717  char name[NC_MAX_NAME];
718  size_t length;
719  int i_is_unlim;
720  int o_is_unlim;
721  int idimid, odimid;
722 #ifdef USE_NETCDF4
723  int uld;
724 #endif
725 
726  i_is_unlim = 0;
727 #ifdef USE_NETCDF4
728  idimid = dimids[dgrp];
729  for (uld = 0; uld < nunlims; uld++) {
730  if(idimid == unlimids[uld]) {
731  i_is_unlim = 1;
732  break;
733  }
734  }
735 #else
736  idimid = dgrp;
737  if(unlimid != -1 && (idimid == unlimid)) {
738  i_is_unlim = 1;
739  }
740 #endif /* USE_NETCDF4 */
741 
742  stat = nc_inq_dim(igrp, idimid, name, &length);
743  if (stat == NC_EDIMSIZE && sizeof(size_t) < 8) {
744  error("dimension \"%s\" requires 64-bit platform", name);
745  }
746  NC_CHECK(stat);
747  o_is_unlim = i_is_unlim;
748  if(i_is_unlim && !option_fix_unlimdims) {
749  NC_CHECK(nc_def_dim(ogrp, name, NC_UNLIMITED, &odimid));
750  } else {
751  NC_CHECK(nc_def_dim(ogrp, name, length, &odimid));
752  o_is_unlim = 0;
753  }
754  /* Store (idimid, odimid) mapping for later use, also whether unlimited */
755  dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim);
756  }
757 #ifdef USE_NETCDF4
758  free(dimids);
759  free(unlimids);
760 #endif /* USE_NETCDF4 */
761  return stat;
762 }
763 
764 /* Copy the attributes for variable ivar in group igrp to variable
765  * ovar in group ogrp. Global (group) attributes are specified by
766  * using the varid NC_GLOBAL */
767 static int
768 copy_atts(int igrp, int ivar, int ogrp, int ovar)
769 {
770  int natts;
771  int iatt;
772  int stat = NC_NOERR;
773 
774  NC_CHECK(nc_inq_varnatts(igrp, ivar, &natts));
775 
776  for(iatt = 0; iatt < natts; iatt++) {
777  char name[NC_MAX_NAME];
778  NC_CHECK(nc_inq_attname(igrp, ivar, iatt, name));
779  NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar));
780  }
781  return stat;
782 }
783 
784 /* copy the schema for a single variable in group igrp to group ogrp */
785 static int
786 copy_var(int igrp, int varid, int ogrp)
787 {
788  int stat = NC_NOERR;
789  int ndims;
790  int *idimids; /* ids of dims for input variable */
791  int *odimids; /* ids of dims for output variable */
792  char name[NC_MAX_NAME];
793  nc_type typeid, o_typeid;
794  int natts;
795  int i;
796  int o_varid;
797 
798  NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
799  idimids = (int *) emalloc((ndims + 1) * sizeof(int));
800  NC_CHECK(nc_inq_var(igrp, varid, name, &typeid, NULL, idimids, &natts));
801  o_typeid = typeid;
802 #ifdef USE_NETCDF4
803  if (typeid > NC_STRING) { /* user-defined type */
804  /* type ids in source don't necessarily correspond to same
805  * typeids in destination, so look up destination typeid by
806  * using type name */
807  char type_name[NC_MAX_NAME];
808  NC_CHECK(nc_inq_type(igrp, typeid, type_name, NULL));
809  NC_CHECK(nc_inq_typeid(ogrp, type_name, &o_typeid));
810  }
811 #endif /* USE_NETCDF4 */
812 
813  /* get the corresponding dimids in the output file */
814  odimids = (int *) emalloc((ndims + 1) * sizeof(int));
815  for(i = 0; i < ndims; i++) {
816  odimids[i] = dimmap_odimid(idimids[i]);
817  if(odimids[i] == -1) {
818  error("Oops, no dimension in output associated with input dimid %d", idimids[i]);
819  }
820  }
821 
822  /* define the output variable */
823  NC_CHECK(nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid));
824 
825  /* attach the variable attributes to the output variable */
826  NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid));
827 #ifdef USE_NETCDF4
828  {
829  int inkind;
830  int outkind;
831  NC_CHECK(nc_inq_format(igrp, &inkind));
832  NC_CHECK(nc_inq_format(ogrp, &outkind));
833  if(outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC) {
834  if((inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC)) {
835  /* Copy all netCDF-4 specific variable properties such as
836  * chunking, endianness, deflation, checksumming, fill, etc. */
837  NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid));
838  } else {
839  /* Set chunking if specified in command line option */
840  NC_CHECK(set_var_chunked(ogrp, o_varid));
841  }
842  /* Set compression if specified in command line option */
843  NC_CHECK(set_var_compressed(ogrp, o_varid));
844  }
845  }
846 #endif /* USE_NETCDF4 */
847  free(idimids);
848  free(odimids);
849  return stat;
850 }
851 
852 /* copy the schema for all the variables in group igrp to group ogrp */
853 static int
854 copy_vars(int igrp, int ogrp)
855 {
856  int stat = NC_NOERR;
857  int nvars;
858  int varid;
859 
860  int iv; /* variable number */
861  idnode_t* vlist = 0; /* list for vars specified with -v option */
862 
863  /*
864  * If any vars were specified with -v option, get list of
865  * associated variable ids relative to this group. Assume vars
866  * specified with syntax like "grp1/grp2/varname" or
867  * "/grp1/grp2/varname" if they are in groups.
868  */
869  vlist = newidlist(); /* list for vars specified with -v option */
870  for (iv=0; iv < option_nlvars; iv++) {
871  if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR)
872  idadd(vlist, varid);
873  }
874 
875  NC_CHECK(nc_inq_nvars(igrp, &nvars));
876  for (varid = 0; varid < nvars; varid++) {
877  if (!option_varstruct && option_nlvars > 0 && ! idmember(vlist, varid))
878  continue;
879  NC_CHECK(copy_var(igrp, varid, ogrp));
880  }
881  free(vlist);
882  return stat;
883 }
884 
885 /* Copy the schema in a group and all its subgroups, recursively, from
886  * group igrp in input to parent group ogrp in destination. Use
887  * dimmap array to map input dimids to output dimids. */
888 static int
889 copy_schema(int igrp, int ogrp)
890 {
891  int stat = NC_NOERR;
892  int ogid; /* like igrp but in output file */
893 
894  /* get groupid in output corresponding to group igrp in input,
895  * given parent group (or root group) ogrp in output */
896  NC_CHECK(get_grpid(igrp, ogrp, &ogid));
897 
898  NC_CHECK(copy_dims(igrp, ogid));
899  NC_CHECK(copy_atts(igrp, NC_GLOBAL, ogid, NC_GLOBAL));
900  NC_CHECK(copy_vars(igrp, ogid));
901 #ifdef USE_NETCDF4
902  {
903  int numgrps;
904  int *grpids;
905  int i;
906  /* Copy schema from subgroups */
907  stat = nc_inq_grps(igrp, &numgrps, NULL);
908  grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
909  NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
910 
911  for(i = 0; i < numgrps; i++) {
912  if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) {
913  NC_CHECK(copy_schema(grpids[i], ogid));
914  }
915  }
916  free(grpids);
917  }
918 #endif /* USE_NETCDF4 */
919  return stat;
920 }
921 
922 /* Return number of values for a variable varid in a group igrp */
923 static int
924 inq_nvals(int igrp, int varid, long long *nvalsp) {
925  int stat = NC_NOERR;
926  int ndims;
927  int *dimids;
928  int dim;
929  long long nvals = 1;
930 
931  NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
932  dimids = (int *) emalloc((ndims + 1) * sizeof(int));
933  NC_CHECK(nc_inq_vardimid (igrp, varid, dimids));
934  for(dim = 0; dim < ndims; dim++) {
935  size_t len;
936  NC_CHECK(nc_inq_dimlen(igrp, dimids[dim], &len));
937  nvals *= len;
938  }
939  if(nvalsp)
940  *nvalsp = nvals;
941  free(dimids);
942  return stat;
943 }
944 
945 /* Copy data from variable varid in group igrp to corresponding group
946  * ogrp. */
947 static int
948 copy_var_data(int igrp, int varid, int ogrp) {
949  int stat = NC_NOERR;
950  nc_type vartype;
951  long long nvalues; /* number of values for this variable */
952  size_t ntoget; /* number of values to access this iteration */
953  size_t value_size; /* size of a single value of this variable */
954  static void *buf = 0; /* buffer for the variable values */
955  char varname[NC_MAX_NAME];
956  int ovarid;
957  size_t *start;
958  size_t *count;
959  nciter_t *iterp; /* opaque structure for iteration status */
960  int do_realloc = 0;
961 #ifdef USE_NETCDF4
962  int okind;
963  size_t chunksize;
964 #endif
965 
966  NC_CHECK(inq_nvals(igrp, varid, &nvalues));
967  if(nvalues == 0)
968  return stat;
969  /* get corresponding output variable */
970  NC_CHECK(nc_inq_varname(igrp, varid, varname));
971  NC_CHECK(nc_inq_varid(ogrp, varname, &ovarid));
972  NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
973  value_size = val_size(igrp, varid);
974  if(value_size > option_copy_buffer_size) {
975  option_copy_buffer_size = value_size;
976  do_realloc = 1;
977  }
978 #ifdef USE_NETCDF4
979  NC_CHECK(nc_inq_format(ogrp, &okind));
980  if(okind == NC_FORMAT_NETCDF4 || okind == NC_FORMAT_NETCDF4_CLASSIC) {
981  /* if this variable chunked, set variable chunk cache size */
982  int contig = 1;
983  NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL));
984  if(contig == 0) { /* chunked */
985  if(option_compute_chunkcaches) {
986  /* Try to estimate variable-specific chunk cache,
987  * depending on specific size and shape of this
988  * variable's chunks. This doesn't work yet. */
989  size_t chunkcache_size, chunkcache_nelems;
990  float chunkcache_preemption;
991  NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid,
992  &chunkcache_size,
993  &chunkcache_nelems,
994  &chunkcache_preemption));
995  NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid,
996  chunkcache_size,
997  chunkcache_nelems,
998  chunkcache_preemption));
999  } else {
1000  /* by default, use same chunk cache for all chunked variables */
1001  NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid,
1002  option_chunk_cache_size,
1003  option_chunk_cache_nelems,
1004  COPY_CHUNKCACHE_PREEMPTION));
1005  }
1006  }
1007  }
1008  /* For chunked variables, option_copy_buffer_size must also be at least as large as
1009  * size of a chunk in input, otherwise resize it. */
1010  {
1011  NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize));
1012  if(chunksize > option_copy_buffer_size) {
1013  option_copy_buffer_size = chunksize;
1014  do_realloc = 1;
1015  }
1016  }
1017 #endif /* USE_NETCDF4 */
1018  if(buf && do_realloc) {
1019  free(buf);
1020  buf = 0;
1021  }
1022  if(buf == 0) { /* first time or needs to grow */
1023  buf = emalloc(option_copy_buffer_size);
1024  memset((void*)buf,0,option_copy_buffer_size);
1025  }
1026 
1027  /* initialize variable iteration */
1028  NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp));
1029 
1030  start = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
1031  count = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
1032  /* nc_next_iter() initializes start and count on first call,
1033  * changes start and count to iterate through whole variable on
1034  * subsequent calls. */
1035  while((ntoget = nc_next_iter(iterp, start, count)) > 0) {
1036  NC_CHECK(nc_get_vara(igrp, varid, start, count, buf));
1037  NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
1038 #ifdef USE_NETCDF4
1039  /* we have to explicitly free values for strings and vlens */
1040  if(vartype == NC_STRING) {
1041  NC_CHECK(nc_free_string(ntoget, (char **)buf));
1042  } else if(vartype > NC_STRING) { /* user-defined type */
1043  nc_type vclass;
1044  NC_CHECK(nc_inq_user_type(igrp, vartype, NULL, NULL, NULL, NULL, &vclass));
1045  if(vclass == NC_VLEN) {
1046  NC_CHECK(nc_free_vlens(ntoget, (nc_vlen_t *)buf));
1047  }
1048  }
1049 #endif /* USE_NETCDF4 */
1050  } /* end main iteration loop */
1051 #ifdef USE_NETCDF4
1052  /* We're all done with this input and output variable, so if
1053  * either variable is chunked, free up its variable chunk cache */
1054  /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */
1055  /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */
1056 #endif /* USE_NETCDF4 */
1057  free(start);
1058  free(count);
1059  NC_CHECK(nc_free_iter(iterp));
1060  return stat;
1061 }
1062 
1063 /* Copy data from variables in group igrp to variables in
1064  * corresponding group with parent ogrp, and all subgroups
1065  * recursively */
1066 static int
1067 copy_data(int igrp, int ogrp)
1068 {
1069  int stat = NC_NOERR;
1070  int ogid;
1071  int nvars;
1072  int varid;
1073 #ifdef USE_NETCDF4
1074  int numgrps;
1075  int *grpids;
1076  int i;
1077 #endif
1078 
1079  int iv; /* variable number */
1080  idnode_t* vlist = 0; /* list for vars specified with -v option */
1081 
1082  /*
1083  * If any vars were specified with -v option, get list of
1084  * associated variable ids relative to this group. Assume vars
1085  * specified with syntax like "grp1/grp2/varname" or
1086  * "/grp1/grp2/varname" if they are in groups.
1087  */
1088  vlist = newidlist(); /* list for vars specified with -v option */
1089  for (iv=0; iv < option_nlvars; iv++) {
1090  if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR)
1091  idadd(vlist, varid);
1092  }
1093 
1094  /* get groupid in output corresponding to group igrp in input,
1095  * given parent group (or root group) ogrp in output */
1096  NC_CHECK(get_grpid(igrp, ogrp, &ogid));
1097 
1098  /* Copy data from this group */
1099  NC_CHECK(nc_inq_nvars(igrp, &nvars));
1100 
1101  for (varid = 0; varid < nvars; varid++) {
1102  if (option_nlvars > 0 && ! idmember(vlist, varid))
1103  continue;
1104  if (!group_wanted(igrp, option_nlgrps, option_grpids))
1105  continue;
1106  NC_CHECK(copy_var_data(igrp, varid, ogid));
1107  }
1108 #ifdef USE_NETCDF4
1109  /* Copy data from subgroups */
1110  stat = nc_inq_grps(igrp, &numgrps, NULL);
1111  grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
1112  NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
1113 
1114  for(i = 0; i < numgrps; i++) {
1115  if (!option_grpstruct && !group_wanted(grpids[i], option_nlgrps, option_grpids))
1116  continue;
1117  NC_CHECK(copy_data(grpids[i], ogid));
1118  }
1119  free(grpids);
1120 #endif /* USE_NETCDF4 */
1121  return stat;
1122 }
1123 
1124 /* Count total number of dimensions in ncid and all its descendant subgroups */
1125 int
1126 count_dims(ncid) {
1127  int numgrps;
1128  int ndims;
1129  NC_CHECK(nc_inq_ndims(ncid, &ndims));
1130 #ifdef USE_NETCDF4
1131  NC_CHECK(nc_inq_grps(ncid, &numgrps, NULL));
1132  if(numgrps > 0) {
1133  int igrp;
1134  int *grpids = emalloc(numgrps * sizeof(int));
1135  NC_CHECK(nc_inq_grps(ncid, &numgrps, grpids));
1136  for(igrp = 0; igrp < numgrps; igrp++) {
1137  ndims += count_dims(grpids[igrp]);
1138  }
1139  free(grpids);
1140  }
1141 #endif /* USE_NETCDF4 */
1142  return ndims;
1143 }
1144 
1145 /* Test if special case: netCDF-3 file with more than one record
1146  * variable. Performance can be very slow for this case when the disk
1147  * block size is large, there are many record variables, and a
1148  * record's worth of data for some variables is smaller than the disk
1149  * block size. In this case, copying the record variables a variable
1150  * at a time causes much rereading of record data, so instead we want
1151  * to copy data a record at a time. */
1152 static int
1153 nc3_special_case(int ncid, int kind) {
1154  if (kind == NC_FORMAT_CLASSIC || kind == NC_FORMAT_64BIT) {
1155  int recdimid = 0;
1156  NC_CHECK(nc_inq_unlimdim(ncid, &recdimid));
1157  if (recdimid != -1) { /* we have a record dimension */
1158  int nvars;
1159  int varid;
1160  NC_CHECK(nc_inq_nvars(ncid, &nvars));
1161  for (varid = 0; varid < nvars; varid++) {
1162  int *dimids = 0;
1163  int ndims;
1164  NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) );
1165  if (ndims > 0) {
1166  int dimids0;
1167  dimids = (int *) emalloc((ndims + 1) * sizeof(int));
1168  NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) );
1169  dimids0 = dimids[0];
1170  free(dimids);
1171  if(dimids0 == recdimid) {
1172  return 1; /* found a record variable */
1173  }
1174  }
1175  }
1176  }
1177  }
1178  return 0;
1179 }
1180 
1181 /* Classify variables in ncid as either fixed-size variables (with no
1182  * unlimited dimension) or as record variables (with an unlimited
1183  * dimension) */
1184 static int
1185 classify_vars(
1186  int ncid, /* netCDF ID */
1187  size_t *nf, /* for returning number of fixed-size variables */
1188  int **fvars, /* the array of fixed_size variable IDS, caller should free */
1189  size_t *nr, /* for returning number of record variables */
1190  int **rvars) /* the array of record variable IDs, caller should free */
1191 {
1192  int varid;
1193  int nvars;
1194  NC_CHECK(nc_inq_nvars(ncid, &nvars));
1195  *nf = 0;
1196  *fvars = (int *) emalloc(nvars * sizeof(int));
1197  *nr = 0;
1198  *rvars = (int *) emalloc(nvars * sizeof(int));
1199  for (varid = 0; varid < nvars; varid++) {
1200  if (isrecvar(ncid, varid)) {
1201  (*rvars)[*nr] = varid;
1202  (*nr)++;
1203  } else {
1204  (*fvars)[*nf] = varid;
1205  (*nf)++;
1206  }
1207  }
1208  return NC_NOERR;
1209 }
1210 
1211 /* Only called for classic format or 64-bit offset format files, to speed up special case */
1212 static int
1213 copy_fixed_size_data(int igrp, int ogrp, size_t nfixed_vars, int *fixed_varids) {
1214  size_t ivar;
1215  /* for each fixed-size variable, copy data */
1216  for (ivar = 0; ivar < nfixed_vars; ivar++) {
1217  int varid = fixed_varids[ivar];
1218  NC_CHECK(copy_var_data(igrp, varid, ogrp));
1219  }
1220  if (fixed_varids)
1221  free(fixed_varids);
1222  return NC_NOERR;
1223 }
1224 
1225 /* copy a record's worth of data for a variable from input to output */
1226 static int
1227 copy_rec_var_data(int ncid, /* input */
1228  int ogrp, /* output */
1229  int irec, /* record number */
1230  int varid, /* input variable id */
1231  int ovarid, /* output variable id */
1232  size_t *start, /* start indices for record data */
1233  size_t *count, /* edge lengths for record data */
1234  void *buf /* buffer large enough to hold data */
1235  )
1236 {
1237  NC_CHECK(nc_get_vara(ncid, varid, start, count, buf));
1238  NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
1239  return NC_NOERR;
1240 }
1241 
1242 /* Only called for classic format or 64-bit offset format files, to speed up special case */
1243 static int
1244 copy_record_data(int ncid, int ogrp, size_t nrec_vars, int *rec_varids) {
1245  int unlimid;
1246  size_t nrecs = 0; /* how many records? */
1247  size_t irec;
1248  size_t ivar;
1249  void **buf; /* space for reading in data for each variable */
1250  int *rec_ovarids; /* corresponding varids in output */
1251  size_t **start;
1252  size_t **count;
1253  NC_CHECK(nc_inq_unlimdim(ncid, &unlimid));
1254  NC_CHECK(nc_inq_dimlen(ncid, unlimid, &nrecs));
1255  buf = (void **) emalloc(nrec_vars * sizeof(void *));
1256  rec_ovarids = (int *) emalloc(nrec_vars * sizeof(int));
1257  start = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
1258  count = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
1259  /* get space to hold one record's worth of data for each record variable */
1260  for (ivar = 0; ivar < nrec_vars; ivar++) {
1261  int varid;
1262  int ndims;
1263  int *dimids;
1264  size_t value_size;
1265  int dimid;
1266  int ii;
1267  size_t nvals;
1268  char varname[NC_MAX_NAME];
1269  varid = rec_varids[ivar];
1270  NC_CHECK(nc_inq_varndims(ncid, varid, &ndims));
1271  dimids = (int *) emalloc((1 + ndims) * sizeof(int));
1272  start[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
1273  count[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
1274  NC_CHECK(nc_inq_vardimid (ncid, varid, dimids));
1275  value_size = val_size(ncid, varid);
1276  nvals = 1;
1277  for(ii = 1; ii < ndims; ii++) { /* for rec size, don't include first record dimension */
1278  size_t dimlen;
1279  dimid = dimids[ii];
1280  NC_CHECK(nc_inq_dimlen(ncid, dimid, &dimlen));
1281  nvals *= dimlen;
1282  start[ivar][ii] = 0;
1283  count[ivar][ii] = dimlen;
1284  }
1285  start[ivar][0] = 0;
1286  count[ivar][0] = 1; /* 1 record */
1287  buf[ivar] = (void *) emalloc(nvals * value_size);
1288  NC_CHECK(nc_inq_varname(ncid, varid, varname));
1289  NC_CHECK(nc_inq_varid(ogrp, varname, &rec_ovarids[ivar]));
1290  if(dimids)
1291  free(dimids);
1292  }
1293 
1294  /* for each record, copy all variable data */
1295  for(irec = 0; irec < nrecs; irec++) {
1296  for (ivar = 0; ivar < nrec_vars; ivar++) {
1297  int varid, ovarid;
1298  varid = rec_varids[ivar];
1299  ovarid = rec_ovarids[ivar];
1300  start[ivar][0] = irec;
1301  NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid,
1302  start[ivar], count[ivar], buf[ivar]));
1303  }
1304  }
1305  for (ivar = 0; ivar < nrec_vars; ivar++) {
1306  if(start[ivar])
1307  free(start[ivar]);
1308  if(count[ivar])
1309  free(count[ivar]);
1310  }
1311  if(start)
1312  free(start);
1313  if(count)
1314  free(count);
1315  for (ivar = 0; ivar < nrec_vars; ivar++) {
1316  if(buf[ivar]) {
1317  free(buf[ivar]);
1318  }
1319  }
1320  if (rec_varids)
1321  free(rec_varids);
1322  if(buf)
1323  free(buf);
1324  if(rec_ovarids)
1325  free(rec_ovarids);
1326  return NC_NOERR;
1327 }
1328 
1329 /* copy infile to outfile using netCDF API
1330  */
1331 static int
1332 copy(char* infile, char* outfile)
1333 {
1334  int stat = NC_NOERR;
1335  int igrp, ogrp;
1336  int inkind, outkind;
1337  int open_mode = NC_NOWRITE;
1338  int create_mode = NC_CLOBBER;
1339  size_t ndims;
1340 
1341  if(option_read_diskless) {
1342  open_mode |= NC_DISKLESS;
1343  }
1344 
1345  NC_CHECK(nc_open(infile, open_mode, &igrp));
1346 
1347  NC_CHECK(nc_inq_format(igrp, &inkind));
1348 
1349 /* option_kind specifies which netCDF format for output:
1350  * -1 -> same as input,
1351  * 1 -> classic
1352  * 2 -> 64-bit offset
1353  * 3 -> netCDF-4,
1354  * 4 -> netCDF-4 classic model
1355  *
1356  * However, if compression or shuffling was specified and kind was -1,
1357  * kind is changed to format 4 that supports compression for input of
1358  * type 1 or 2.
1359  */
1360  outkind = option_kind;
1361  if (option_kind == SAME_AS_INPUT) { /* default, kind not specified */
1362  outkind = inkind;
1363  /* Deduce output kind if netCDF-4 features requested */
1364  if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) {
1365  if (option_deflate_level > 0 ||
1366  option_shuffle_vars == NC_SHUFFLE ||
1367  option_chunkspec)
1368  {
1369  outkind = NC_FORMAT_NETCDF4_CLASSIC;
1370  }
1371  }
1372  }
1373 
1374 #ifdef USE_NETCDF4
1375  if(option_chunkspec) {
1376  /* Now that input is open, can parse option_chunkspec into binary
1377  * structure. */
1378  NC_CHECK(chunkspec_parse(igrp, option_chunkspec));
1379  }
1380 #endif /* USE_NETCDF4 */
1381 
1382  /* Check if any vars in -v don't exist */
1383  if(missing_vars(igrp, option_nlvars, option_lvars))
1384  return EXIT_FAILURE;
1385 
1386  if(option_nlgrps > 0) {
1387  if(inkind != NC_FORMAT_NETCDF4) {
1388  error("Group list (-g ...) only permitted for netCDF-4 file");
1389  return EXIT_FAILURE;
1390  }
1391  /* Check if any grps in -g don't exist */
1392  if(grp_matches(igrp, option_nlgrps, option_lgrps, option_grpids) == 0)
1393  return EXIT_FAILURE;
1394  }
1395 
1396  if(option_write_diskless)
1397  create_mode |= NC_WRITE | NC_DISKLESS; /* NC_WRITE persists diskless file on close */
1398  switch(outkind) {
1399  case NC_FORMAT_CLASSIC:
1400  /* nothing to do */
1401  break;
1402  case NC_FORMAT_64BIT:
1403  create_mode |= NC_64BIT_OFFSET;
1404  break;
1405 #ifdef USE_NETCDF4
1406  case NC_FORMAT_NETCDF4:
1407  create_mode |= NC_NETCDF4;
1408  break;
1410  create_mode |= NC_NETCDF4 | NC_CLASSIC_MODEL;
1411  break;
1412 #else
1413  case NC_FORMAT_NETCDF4:
1415  error("nccopy built with --disable-netcdf4, can't create netCDF-4 files");
1416  break;
1417 #endif /* USE_NETCDF4 */
1418  default:
1419  error("bad value (%d) for -k option\n", option_kind);
1420  break;
1421  }
1422  NC_CHECK(nc_create(outfile, create_mode, &ogrp));
1423  NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL));
1424 
1425 #ifdef USE_NETCDF4
1426  /* Because types in one group may depend on types in a different
1427  * group, need to create all groups before defining types */
1428  if(inkind == NC_FORMAT_NETCDF4) {
1429  NC_CHECK(copy_groups(igrp, ogrp));
1430  NC_CHECK(copy_types(igrp, ogrp));
1431  }
1432 #endif /* USE_NETCDF4 */
1433 
1434  ndims = count_dims(igrp);
1435  NC_CHECK(dimmap_init(ndims));
1436  NC_CHECK(copy_schema(igrp, ogrp));
1437  NC_CHECK(nc_enddef(ogrp));
1438 
1439  /* For performance, special case netCDF-3 input or output file with record
1440  * variables, to copy a record-at-a-time instead of a
1441  * variable-at-a-time. */
1442  /* TODO: check that these special cases work with -v option */
1443  if(nc3_special_case(igrp, inkind)) {
1444  size_t nfixed_vars, nrec_vars;
1445  int *fixed_varids;
1446  int *rec_varids;
1447  NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
1448  NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
1449  NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
1450  } else if (nc3_special_case(ogrp, outkind)) {
1451  size_t nfixed_vars, nrec_vars;
1452  int *fixed_varids;
1453  int *rec_varids;
1454  /* classifies output vars, but returns input varids */
1455  NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
1456  NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
1457  NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
1458  } else {
1459  NC_CHECK(copy_data(igrp, ogrp)); /* recursive, to handle nested groups */
1460  }
1461 
1462  NC_CHECK(nc_close(igrp));
1463  NC_CHECK(nc_close(ogrp));
1464  return stat;
1465 }
1466 
1467 /*
1468  * For non-negative numeric string with multiplier suffix K, M, G, T,
1469  * or P (or lower-case equivalent), return corresponding value
1470  * incorporating multiplier 1000, 1000000, 1.0d9, ... 1.0d15, or -1.0
1471  * for error.
1472  */
1473 static double
1474 double_with_suffix(char *str) {
1475  double dval;
1476  char *suffix = 0;
1477  errno = 0;
1478  dval = strtod(str, &suffix);
1479  if(dval < 0 || errno != 0)
1480  return -1.0;
1481  if(*suffix) {
1482  switch (*suffix) {
1483  case 'k': case 'K':
1484  dval *= 1000;
1485  break;
1486  case 'm': case 'M':
1487  dval *= 1000000;
1488  break;
1489  case 'g': case 'G':
1490  dval *= 1000000000;
1491  break;
1492  case 't': case 'T':
1493  dval *= 1.0e12;
1494  break;
1495  case 'p': case 'P':
1496  dval *= 1.0e15;
1497  break;
1498  default:
1499  dval = -1.0; /* error, suffix multiplier must be K, M, G, or T */
1500  }
1501  }
1502  return dval;
1503 }
1504 
1505 static void
1506 usage(void)
1507 {
1508 #define USAGE "\
1509  [-k n] specify kind of netCDF format for output file, default same as input\n\
1510  1 classic, 2 64-bit offset, 3 netCDF-4, 4 netCDF-4 classic model\n\
1511  [-d n] set deflation compression level, default same as input (0=none 9=max)\n\
1512  [-s] add shuffle option to deflation compression\n\
1513  [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\
1514  [-u] convert unlimited dimensions to fixed-size dimensions in output copy\n\
1515  [-w] write whole output file from diskless netCDF on close\n\
1516  [-v var1,...] include data for only listed variables, but definitions for all variables\n\
1517  [-V var1,...] include definitions and data for only listed variables\n\
1518  [-g grp1,...] include data for only variables in listed groups, but all definitions\n\
1519  [-G grp1,...] include definitions and data only for variables in listed groups\n\
1520  [-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\
1521  [-h n] set size in bytes of chunk_cache for chunked variables\n\
1522  [-e n] set number of elements that chunk_cache can hold\n\
1523  [-r] read whole input file into diskless file on open (classic or 64-bit offset format only)\n\
1524  infile name of netCDF input file\n\
1525  outfile name for netCDF output file\n"
1526 
1527  /* Don't document this flaky option until it works better */
1528  /* [-x] use experimental computed estimates for variable-specific chunk caches\n\ */
1529 
1530  error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-w] [-[v|V] varlist] [-[g|G] grplist] [-m n] [-h n] [-e n] [-r] infile outfile\n%s",
1531  progname, USAGE);
1532 }
1533 
1836 int
1837 main(int argc, char**argv)
1838 {
1839  char* inputfile = NULL;
1840  char* outputfile = NULL;
1841  int c;
1842 
1843 /* table of formats for legal -k values */
1844  struct Kvalues {
1845  char* name;
1846  int kind;
1847  } legalkinds[] = {
1848  {"1", NC_FORMAT_CLASSIC},
1849  {"classic", NC_FORMAT_CLASSIC},
1850 
1851  /* The 64-bit offset kind (2) */
1852  {"2", NC_FORMAT_64BIT},
1853  {"64-bit-offset", NC_FORMAT_64BIT},
1854  {"64-bit offset", NC_FORMAT_64BIT},
1855 
1856  /* NetCDF-4 HDF5 format */
1857  {"3", NC_FORMAT_NETCDF4},
1858  {"hdf5", NC_FORMAT_NETCDF4},
1859  {"netCDF-4", NC_FORMAT_NETCDF4},
1860  {"netCDF4", NC_FORMAT_NETCDF4},
1861  {"enhanced", NC_FORMAT_NETCDF4},
1862 
1863  /* NetCDF-4 HDF5 format, but using only nc3 data model */
1865  {"hdf5-nc3", NC_FORMAT_NETCDF4_CLASSIC},
1866  {"netCDF-4 classic model", NC_FORMAT_NETCDF4_CLASSIC},
1867  {"netCDF4_classic", NC_FORMAT_NETCDF4_CLASSIC},
1868  {"enhanced-nc3", NC_FORMAT_NETCDF4_CLASSIC},
1869 
1870  /* null terminate*/
1871  {NULL,0}
1872  };
1873 
1874  opterr = 1;
1875  progname = argv[0];
1876 
1877  if (argc <= 1)
1878  {
1879  usage();
1880  }
1881 
1882  while ((c = getopt(argc, argv, "k:d:sum:c:h:e:rwxg:G:v:V:")) != -1) {
1883  switch(c) {
1884  case 'k': /* for specifying variant of netCDF format to be generated
1885  Possible values are:
1886  1 (=> classic 32 bit)
1887  2 (=> classic 64 bit offsets)
1888  3 (=> netCDF-4/HDF5)
1889  4 (=> classic, but stored in netCDF-4/HDF5 format)
1890  Also allow string versions of above
1891  "classic"
1892  "64-bit-offset"
1893  "64-bit offset"
1894  "enhanced" | "hdf5" | "netCDF-4"
1895  "enhanced-nc3" | "hdf5-nc3" | "netCDF-4 classic model"
1896  */
1897  {
1898  struct Kvalues* kvalue;
1899  char *kind_name = (char *) emalloc(strlen(optarg)+1);
1900  (void)strcpy(kind_name, optarg);
1901  for(kvalue=legalkinds;kvalue->name;kvalue++) {
1902  if(strcmp(kind_name,kvalue->name) == 0) {
1903  option_kind = kvalue->kind;
1904  break;
1905  }
1906  }
1907  if(kvalue->name == NULL) {
1908  error("invalid format: %s", kind_name);
1909  }
1910  }
1911  break;
1912  case 'd': /* non-default compression level specified */
1913  option_deflate_level = strtol(optarg, NULL, 10);
1914  if(option_deflate_level < 0 || option_deflate_level > 9) {
1915  error("invalid deflation level: %d", option_deflate_level);
1916  }
1917  break;
1918  case 's': /* shuffling, may improve compression */
1919  option_shuffle_vars = NC_SHUFFLE;
1920  break;
1921  case 'u': /* convert unlimited dimensions to fixed size */
1922  option_fix_unlimdims = 1;
1923  break;
1924  case 'm': /* non-default size of data copy buffer */
1925  {
1926  double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */
1927  if(dval < 0)
1928  error("Suffix used for '-m' option value must be K, M, G, T, or P");
1929  option_copy_buffer_size = dval;
1930  break;
1931  }
1932  case 'h': /* non-default size of chunk cache */
1933  {
1934  double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */
1935  if(dval < 0)
1936  error("Suffix used for '-h' option value must be K, M, G, T, or P");
1937  option_chunk_cache_size = dval;
1938  break;
1939  }
1940  case 'e': /* number of elements chunk cache can hold */
1941  {
1942  double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */
1943  if(dval < 0 )
1944  error("Suffix used for '-e' option value must be K, M, G, T, or P");
1945  option_chunk_cache_nelems = (long)dval;
1946  break;
1947  }
1948  case 'r':
1949  option_read_diskless = 1; /* read into memory on open */
1950  break;
1951  case 'w':
1952  option_write_diskless = 1; /* write to memory, persist on close */
1953  break;
1954  case 'x': /* use experimental variable-specific chunk caches */
1955  option_compute_chunkcaches = 1;
1956  break;
1957  case 'c': /* optional chunking spec for each dimension in list */
1958  /* save chunkspec string for parsing later, once we know input ncid */
1959  option_chunkspec = strdup(optarg);
1960  break;
1961  case 'g': /* group names */
1962  /* make list of names of groups specified */
1963  make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids);
1964  option_grpstruct = true;
1965  break;
1966  case 'G': /* group names */
1967  /* make list of names of groups specified */
1968  make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids);
1969  option_grpstruct = false;
1970  break;
1971  case 'v': /* variable names */
1972  /* make list of names of variables specified */
1973  make_lvars (optarg, &option_nlvars, &option_lvars);
1974  option_varstruct = true;
1975  break;
1976  case 'V': /* variable names */
1977  /* make list of names of variables specified */
1978  make_lvars (optarg, &option_nlvars, &option_lvars);
1979  option_varstruct = false;
1980  break;
1981  default:
1982  usage();
1983  }
1984  }
1985  argc -= optind;
1986  argv += optind;
1987 
1988  if (argc != 2) {
1989  error("one input file and one output file required");
1990  }
1991  inputfile = argv[0];
1992  outputfile = argv[1];
1993 
1994  if(strcmp(inputfile, outputfile) == 0) {
1995  error("output would overwrite input");
1996  }
1997 
1998  if(copy(inputfile, outputfile) != NC_NOERR)
1999  exit(1);
2000  return 0;
2001 }
2002 END_OF_MAIN();

Generated on Tue Jul 9 2013 19:18:11 for netCDF. NetCDF is a Unidata library.