20 #include "chunkspec.h"
26 #define COPY_BUFFER_SIZE (5000000)
27 #define COPY_CHUNKCACHE_PREEMPTION (1.0f)
28 #define SAME_AS_INPUT (-1)
29 #define CHUNK_THRESHOLD (1024)
32 #define NC_CLASSIC_MODEL 0x0100
37 static int option_kind = SAME_AS_INPUT;
38 static int option_deflate_level = -1;
40 static int option_fix_unlimdims = 0;
41 static char* option_chunkspec = 0;
42 static size_t option_copy_buffer_size = COPY_BUFFER_SIZE;
43 static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE;
44 static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS;
45 static int option_compute_chunkcaches = 0;
47 static int option_read_diskless = 0;
48 static int option_write_diskless = 0;
53 get_grpid(
int igrp,
int parid,
int *ogrpp) {
60 stat = nc_inq_grp_parent(igrp, &inparid);
63 NC_CHECK(nc_inq_grpname(igrp, grpname));
64 NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid));
81 nc_inq_parid(
int ncid,
const char *fullname,
int *locidp) {
83 char *parent = strdup(fullname);
89 last_slash = strrchr(parent,
'/');
90 if(last_slash == parent) {
92 parent = strdup(slash);
96 NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp));
104 inq_var_chunksize(
int igrp,
int varid,
size_t* chunksizep) {
116 NC_CHECK(
nc_inq_type(igrp, vartype, NULL, &value_size));
119 chunksizes = (
size_t *) emalloc((ndims + 1) *
sizeof(size_t));
127 for(dim = 0; dim < ndims; dim++) {
128 prod *= chunksizes[dim];
141 inq_var_chunking_params(
int igrp,
int ivarid,
int ogrp,
int ovarid,
142 size_t* chunkcache_sizep,
143 size_t *chunkcache_nelemsp,
144 float * chunkcache_preemptionp)
148 size_t *ichunksizes, *ochunksizes;
150 int icontig = 1, ocontig = 1;
153 size_t prod, iprod, oprod;
155 *chunkcache_nelemsp = CHUNK_CACHE_NELEMS;
156 *chunkcache_sizep = CHUNK_CACHE_SIZE;
157 *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION;
164 if(icontig == 1 && ocontig == 1) {
165 *chunkcache_nelemsp = 0;
166 *chunkcache_sizep = 0;
167 *chunkcache_preemptionp = 0;
172 NC_CHECK(
nc_inq_type(igrp, vartype, NULL, &value_size));
175 if(icontig == 0 && ocontig == 1) {
176 *chunkcache_nelemsp = 1;
177 *chunkcache_sizep = iprod;
178 *chunkcache_preemptionp = 1.0f;
182 ichunksizes = (
size_t *) emalloc((ndims + 1) *
sizeof(size_t));
186 for(dim = 1; dim < ndims; dim++) {
187 ichunksizes[dim] = dim;
194 ochunksizes = (
size_t *) emalloc((ndims + 1) *
sizeof(size_t));
199 for(dim = 0; dim < ndims; dim++) {
200 nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim];
201 iprod *= ichunksizes[dim];
202 oprod *= ochunksizes[dim];
204 prod = iprod + oprod * (nelems - 1);
205 *chunkcache_nelemsp = nelems;
206 *chunkcache_sizep = prod;
213 static int copy_type(
int igrp,
nc_type typeid,
int ogrp);
220 copy_vlen_type(
int igrp,
nc_type itype,
int ogrp)
231 NC_CHECK(
nc_inq_vlen(igrp, itype, name, &size, &ibasetype));
234 NC_CHECK(
nc_inq_type(igrp, ibasetype, basename, &basesize));
238 NC_CHECK(copy_type(igrp, ibasetype, ogrp));
244 NC_CHECK(
nc_def_vlen(ogrp, name, obasetype, &vlen_type));
253 copy_opaque_type(
int igrp,
nc_type itype,
int ogrp)
270 copy_enum_type(
int igrp,
nc_type itype,
int ogrp)
280 NC_CHECK(
nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers));
281 NC_CHECK(
nc_def_enum(ogrp, basetype, name, &otype));
282 for(i = 0; i < nmembers; i++) {
295 copy_compound_type(
int igrp,
nc_type itype,
int ogrp)
307 for (fid = 0; fid < nfields; fid++) {
318 NC_CHECK(
nc_inq_type(igrp, iftype, ftypename, NULL));
324 fdimsizes = (
int *) emalloc((fndims + 1) *
sizeof(int));
339 copy_type(
int igrp,
nc_type typeid,
int ogrp)
344 NC_CHECK(
nc_inq_user_type(igrp,
typeid, NULL, NULL, NULL, NULL, &type_class));
348 NC_CHECK(copy_vlen_type(igrp,
typeid, ogrp));
351 NC_CHECK(copy_opaque_type(igrp,
typeid, ogrp));
354 NC_CHECK(copy_enum_type(igrp,
typeid, ogrp));
357 NC_CHECK(copy_compound_type(igrp,
typeid, ogrp));
370 copy_groups(
int iroot,
int oroot)
378 NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL));
379 grpids = emalloc(numgrps *
sizeof(
int));
380 NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids));
382 for(i = 1; i < numgrps; i++) {
388 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL));
389 grpname_full = emalloc(len_name + 1);
390 NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full));
395 NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid));
396 NC_CHECK(nc_inq_grpname(grpids[i], grpname));
398 NC_CHECK(nc_def_grp(oparid, grpname, &ogid));
410 copy_types(
int igrp,
int ogrp)
419 NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL));
423 NC_CHECK(nc_inq_typeids(igrp, &ntypes, types));
424 for (i = 0; i < ntypes; i++) {
425 NC_CHECK(copy_type(igrp, types[i], ogrp));
431 NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL));
433 grpids = (
int *)emalloc(
sizeof(
int) * numgrps);
434 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
435 for(i = 0; i < numgrps; i++) {
440 NC_CHECK(get_grpid(grpids[i], ogrp, &ogid));
441 NC_CHECK(copy_types(grpids[i], ogid));
451 copy_var_specials(
int igrp,
int varid,
int ogrp,
int o_varid)
461 NC_CHECK(nc_def_var_chunking(ogrp, o_varid,
NC_CONTIGUOUS, NULL));
463 size_t *chunkp = (
size_t *) emalloc(ndims *
sizeof(
size_t));
464 int *dimids = (
int *) emalloc(ndims *
sizeof(
int));
468 for(idim = 0; idim < ndims; idim++) {
469 int dimid = dimids[idim];
470 size_t chunksize = chunkspec_size(dimid);
471 if(chunkspec_size(dimid) > 0) {
472 chunkp[idim] = chunksize;
476 NC_CHECK(nc_def_var_chunking(ogrp, o_varid,
NC_CHUNKED, chunkp));
484 int shuffle, deflate, deflate_level;
486 if(option_deflate_level >= 0) {
487 deflate_level = option_deflate_level;
490 if(shuffle==0 && option_shuffle_vars != 0) {
491 shuffle = option_shuffle_vars;
493 if(deflate != 0 || shuffle != 0) {
494 NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle, deflate_level > 0, deflate_level));
500 if(fletcher32 != 0) {
501 NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32));
508 NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness));
518 set_var_chunked(
int ogrp,
int o_varid)
523 size_t chunk_threshold = CHUNK_THRESHOLD;
525 if(chunkspec_ndims() == 0)
531 int *dimids = (
int *) emalloc(ndims *
sizeof(
int));
535 int is_unlimited = 0;
540 NC_CHECK(
nc_inq_type(ogrp, vartype, NULL, &value_size));
541 varsize = value_size;
547 for(odim = 0; odim < ndims; odim++) {
548 int odimid = dimids[odim];
549 int idimid = dimmap_idimid(odimid);
550 if(dimmap_ounlim(odimid))
553 size_t chunksize = chunkspec_size(idimid);
556 if( (chunksize > 0) || dimmap_ounlim(odimid)) {
564 if(varsize < chunk_threshold && !is_unlimited)
570 size_t *chunkp = (
size_t *) emalloc(ndims *
sizeof(
size_t));
571 for(odim = 0; odim < ndims; odim++) {
572 int odimid = dimids[odim];
573 int idimid = dimmap_idimid(odimid);
574 size_t chunksize = chunkspec_size(idimid);
576 chunkp[odim] = chunksize;
581 NC_CHECK(nc_def_var_chunking(ogrp, o_varid,
NC_CHUNKED, chunkp));
591 set_var_compressed(
int ogrp,
int o_varid)
594 if (option_deflate_level >= 0) {
596 NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level));
606 free_var_chunk_cache(
int grp,
int varid)
609 size_t chunk_cache_size = 1;
610 size_t cache_nelems = 1;
611 float cache_preemp = 0;
632 copy_dims(
int igrp,
int ogrp)
652 dimids = (
int *) emalloc((ndims + 1) *
sizeof(int));
653 NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0));
656 unlimids = (
int *) emalloc((nunlims + 1) *
sizeof(int));
663 for (dgrp = 0; dgrp < ndims; dgrp++) {
675 idimid = dimids[dgrp];
676 for (uld = 0; uld < nunlims; uld++) {
677 if(idimid == unlimids[uld]) {
684 if(unlimid != -1 && (idimid == unlimid)) {
689 stat =
nc_inq_dim(igrp, idimid, name, &length);
691 error(
"dimension \"%s\" requires 64-bit platform", name);
694 o_is_unlim = i_is_unlim;
695 if(i_is_unlim && !option_fix_unlimdims) {
698 NC_CHECK(
nc_def_dim(ogrp, name, length, &odimid));
702 dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim);
715 copy_atts(
int igrp,
int ivar,
int ogrp,
int ovar)
723 for(iatt = 0; iatt < natts; iatt++) {
726 NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar));
733 copy_var(
int igrp,
int varid,
int ogrp)
746 idimids = (
int *) emalloc((ndims + 1) *
sizeof(int));
747 NC_CHECK(
nc_inq_var(igrp, varid, name, &
typeid, NULL, idimids, &natts));
755 NC_CHECK(
nc_inq_type(igrp,
typeid, type_name, NULL));
761 odimids = (
int *) emalloc((ndims + 1) *
sizeof(int));
762 for(i = 0; i < ndims; i++) {
763 odimids[i] = dimmap_odimid(idimids[i]);
764 if(odimids[i] == -1) {
765 error(
"Oops, no dimension in output associated with input dimid %d", idimids[i]);
770 NC_CHECK(
nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid));
773 NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid));
784 NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid));
787 NC_CHECK(set_var_chunked(ogrp, o_varid));
790 NC_CHECK(set_var_compressed(ogrp, o_varid));
801 copy_vars(
int igrp,
int ogrp)
807 NC_CHECK(nc_inq_nvars(igrp, &nvars));
808 for (varid = 0; varid < nvars; varid++) {
809 NC_CHECK(copy_var(igrp, varid, ogrp));
818 copy_schema(
int igrp,
int ogrp)
825 NC_CHECK(get_grpid(igrp, ogrp, &ogid));
827 NC_CHECK(copy_dims(igrp, ogid));
829 NC_CHECK(copy_vars(igrp, ogid));
836 stat = nc_inq_grps(igrp, &numgrps, NULL);
837 grpids = (
int *)emalloc((numgrps + 1) *
sizeof(int));
838 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
840 for(i = 0; i < numgrps; i++) {
841 NC_CHECK(copy_schema(grpids[i], ogid));
851 inq_nvals(
int igrp,
int varid,
long long *nvalsp) {
859 dimids = (
int *) emalloc((ndims + 1) *
sizeof(int));
861 for(dim = 0; dim < ndims; dim++) {
875 copy_var_data(
int igrp,
int varid,
int ogrp) {
881 static void *buf = 0;
893 NC_CHECK(inq_nvals(igrp, varid, &nvalues));
901 NC_CHECK(
nc_inq_type(igrp, vartype, NULL, &value_size));
902 if(value_size > option_copy_buffer_size) {
903 option_copy_buffer_size = value_size;
913 if(option_compute_chunkcaches) {
917 size_t chunkcache_size, chunkcache_nelems;
918 float chunkcache_preemption;
919 NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid,
922 &chunkcache_preemption));
926 chunkcache_preemption));
930 option_chunk_cache_size,
931 option_chunk_cache_nelems,
932 COPY_CHUNKCACHE_PREEMPTION));
939 NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize));
940 if(chunksize > option_copy_buffer_size) {
941 option_copy_buffer_size = chunksize;
946 if(buf && do_realloc) {
951 buf = emalloc(option_copy_buffer_size);
952 memset((
void*)buf,0,option_copy_buffer_size);
956 NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp));
958 start = (
size_t *) emalloc((iterp->rank + 1) *
sizeof(size_t));
959 count = (
size_t *) emalloc((iterp->rank + 1) *
sizeof(size_t));
963 while((ntoget = nc_next_iter(iterp, start, count)) > 0) {
964 NC_CHECK(
nc_get_vara(igrp, varid, start, count, buf));
965 NC_CHECK(
nc_put_vara(ogrp, ovarid, start, count, buf));
987 NC_CHECK(nc_free_iter(iterp));
995 copy_data(
int igrp,
int ogrp)
1009 NC_CHECK(get_grpid(igrp, ogrp, &ogid));
1012 NC_CHECK(nc_inq_nvars(igrp, &nvars));
1014 for (varid = 0; varid < nvars; varid++) {
1015 NC_CHECK(copy_var_data(igrp, varid, ogid));
1019 stat = nc_inq_grps(igrp, &numgrps, NULL);
1020 grpids = (
int *)emalloc((numgrps + 1) *
sizeof(int));
1021 NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
1023 for(i = 0; i < numgrps; i++) {
1024 NC_CHECK(copy_data(grpids[i], ogid));
1039 NC_CHECK(nc_inq_grps_full(ncid, &numgrps, NULL));
1040 grpids = emalloc(numgrps *
sizeof(
int));
1041 NC_CHECK(nc_inq_grps_full(ncid, NULL, grpids));
1042 for(igrp = 0; igrp < numgrps; igrp++) {
1045 ndims += ndims_local;
1059 nc3_special_case(
int ncid,
int kind) {
1063 if (recdimid != -1) {
1066 NC_CHECK(nc_inq_nvars(ncid, &nvars));
1067 for (varid = 0; varid < nvars; varid++) {
1073 dimids = (
int *) emalloc((ndims + 1) *
sizeof(int));
1075 dimids0 = dimids[0];
1077 if(dimids0 == recdimid) {
1100 NC_CHECK(nc_inq_nvars(ncid, &nvars));
1102 *fvars = (
int *) emalloc(nvars *
sizeof(
int));
1104 *rvars = (
int *) emalloc(nvars *
sizeof(
int));
1105 for (varid = 0; varid < nvars; varid++) {
1106 if (isrecvar(ncid, varid)) {
1107 (*rvars)[*nr] = varid;
1110 (*fvars)[*nf] = varid;
1119 copy_fixed_size_data(
int igrp,
int ogrp,
size_t nfixed_vars,
int *fixed_varids) {
1122 for (ivar = 0; ivar < nfixed_vars; ivar++) {
1123 int varid = fixed_varids[ivar];
1124 NC_CHECK(copy_var_data(igrp, varid, ogrp));
1133 copy_rec_var_data(
int ncid,
1143 NC_CHECK(
nc_get_vara(ncid, varid, start, count, buf));
1144 NC_CHECK(
nc_put_vara(ogrp, ovarid, start, count, buf));
1150 copy_record_data(
int ncid,
int ogrp,
size_t nrec_vars,
int *rec_varids) {
1161 buf = (
void **) emalloc(nrec_vars *
sizeof(
void *));
1162 rec_ovarids = (
int *) emalloc(nrec_vars *
sizeof(
int));
1163 start = (
size_t **) emalloc(nrec_vars *
sizeof(
size_t*));
1164 count = (
size_t **) emalloc(nrec_vars *
sizeof(
size_t*));
1166 for (ivar = 0; ivar < nrec_vars; ivar++) {
1176 varid = rec_varids[ivar];
1178 dimids = (
int *) emalloc((1 + ndims) *
sizeof(int));
1179 start[ivar] = (
size_t *) emalloc(ndims *
sizeof(
size_t));
1180 count[ivar] = (
size_t *) emalloc(ndims *
sizeof(
size_t));
1183 NC_CHECK(
nc_inq_type(ncid, vartype, NULL, &value_size));
1185 for(ii = 1; ii < ndims; ii++) {
1190 start[ivar][ii] = 0;
1191 count[ivar][ii] = dimlen;
1195 buf[ivar] = (
void *) emalloc(nvals * value_size);
1197 NC_CHECK(
nc_inq_varid(ogrp, varname, &rec_ovarids[ivar]));
1203 for(irec = 0; irec < nrecs; irec++) {
1204 for (ivar = 0; ivar < nrec_vars; ivar++) {
1206 varid = rec_varids[ivar];
1207 ovarid = rec_ovarids[ivar];
1208 start[ivar][0] = irec;
1209 NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid,
1210 start[ivar], count[ivar], buf[ivar]));
1213 for (ivar = 0; ivar < nrec_vars; ivar++) {
1223 for (ivar = 0; ivar < nrec_vars; ivar++) {
1240 copy(
char* infile,
char* outfile)
1244 int inkind, outkind;
1249 if(option_read_diskless) {
1253 NC_CHECK(
nc_open(infile, open_mode, &igrp));
1268 outkind = option_kind;
1269 if (option_kind == SAME_AS_INPUT) {
1273 if (option_deflate_level > 0 ||
1283 if(option_chunkspec) {
1286 NC_CHECK(chunkspec_parse(igrp, option_chunkspec));
1290 if(option_write_diskless)
1309 error(
"nccopy built with --disable-netcdf4, can't create netCDF-4 files");
1313 error(
"bad value (%d) for -k option\n", option_kind);
1316 NC_CHECK(
nc_create(outfile, create_mode, &ogrp));
1323 NC_CHECK(copy_groups(igrp, ogrp));
1324 NC_CHECK(copy_types(igrp, ogrp));
1328 ndims = count_dims(igrp);
1329 NC_CHECK(dimmap_init(ndims));
1330 NC_CHECK(copy_schema(igrp, ogrp));
1336 if(nc3_special_case(igrp, inkind)) {
1337 size_t nfixed_vars, nrec_vars;
1340 NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
1341 NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
1342 NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
1343 }
else if (nc3_special_case(ogrp, outkind)) {
1344 size_t nfixed_vars, nrec_vars;
1348 NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
1349 NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
1350 NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
1352 NC_CHECK(copy_data(igrp, ogrp));
1364 [-k n] specify kind of netCDF format for output file, default same as input\n\
1365 1 classic, 2 64-bit offset, 3 netCDF-4, 4 netCDF-4 classic model\n\
1366 [-d n] set deflation compression level, default same as input (0=none 9=max)\n\
1367 [-s] add shuffle option to deflation compression\n\
1368 [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\
1369 [-u] convert unlimited dimensions to fixed-size dimensions in output copy\n\
1370 [-w] write whole output file from diskless netCDF on close\n\
1371 [-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\
1372 [-h n] set size in bytes of chunk_cache for chunked variables\n\
1373 [-e n] set number of elements that chunk_cache can hold\n\
1374 [-r] read whole input file into diskless file on open (classic or 64-bit offset format only)\n\
1375 infile name of netCDF input file\n\
1376 outfile name for netCDF output file\n"
1381 error(
"%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-w] [-m n] [-h n] [-e n] [-r] infile outfile\n%s",
1635 main(
int argc,
char**argv)
1637 char* inputfile = NULL;
1638 char* outputfile = NULL;
1680 while ((c = getopt(argc, argv,
"k:d:sum:c:h:e:rwx")) != -1) {
1696 struct Kvalues* kvalue;
1697 char *kind_name = (
char *) emalloc(strlen(optarg)+1);
1698 (void)strcpy(kind_name, optarg);
1699 for(kvalue=legalkinds;kvalue->name;kvalue++) {
1700 if(strcmp(kind_name,kvalue->name) == 0) {
1701 option_kind = kvalue->kind;
1705 if(kvalue->name == NULL) {
1706 error(
"invalid format: %s", kind_name);
1711 option_deflate_level = strtol(optarg, NULL, 10);
1712 if(option_deflate_level < 0 || option_deflate_level > 9) {
1713 error(
"invalid deflation level: %d", option_deflate_level);
1720 option_fix_unlimdims = 1;
1726 dval = strtod(optarg, &suffix);
1742 error(
"If suffix used for '-m' option value, it must be K, M, G, or T: %c",
1746 option_copy_buffer_size = dval;
1753 dval = strtod(optarg, &suffix);
1769 error(
"If suffix used for '-h' option value, it must be K, M, G, or T: %c",
1773 option_chunk_cache_size = dval;
1777 option_chunk_cache_nelems = strtol(optarg, NULL, 10);
1778 if(option_chunk_cache_nelems <= 0) {
1779 error(
"invalid value for number of chunk cache elements: %d", option_chunk_cache_nelems);
1783 option_read_diskless = 1;
1786 option_write_diskless = 1;
1789 option_compute_chunkcaches = 1;
1794 option_chunkspec = strdup(optarg);
1805 error(
"one input file and one output file required");
1807 inputfile = argv[0];
1808 outputfile = argv[1];
1810 if(strcmp(inputfile, outputfile) == 0) {
1811 error(
"output would overwrite input");
1814 if(copy(inputfile, outputfile) !=
NC_NOERR)