/[escript]/trunk/tools/netcdf.cc
ViewVC logotype

Contents of /trunk/tools/netcdf.cc

Parent Directory Parent Directory | Revision Log Revision Log


Revision 6497 - (show annotations)
Wed Feb 15 00:44:55 2017 UTC (4 years, 7 months ago) by jfenwick
File size: 14719 byte(s)
Reader for netcdf classic headers.


1
2 /*****************************************************************************
3 *
4 * Copyright (c) 2017 by The University of Queensland
5 * http://www.uq.edu.au
6 *
7 * Primary Business: Queensland, Australia
8 * Licensed under the Apache License, version 2.0
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 * Development 2012-2013 by School of Earth Sciences
13 * Development from 2014 by Centre for Geoscience Computing (GeoComp)
14 *
15 *****************************************************************************/
16
17
18
19 #include <iostream>
20 #include <fstream>
21 #include <string>
22 #include <list>
23 #include <vector>
24 #include <boost/lexical_cast.hpp>
25
26 /* From http://www.unidata.ucar.edu/software/netcdf/docs/file_format_specifications.html
27 The grammar for the header is:
28
29 header = magic numrecs dim_list gatt_list var_list
30 = CDF? followed by 4 bytes(which we don't care about)
31 We can just search for the relevant tags:
32
33
34 */
35
36 union esV
37 {
38 unsigned char b;
39 short s;
40 int i;
41 float f;
42 double d;
43 };
44
45
46 class esncdfValues
47 {
48 public:
49 char type;
50 std::string svalue;
51 std::vector<union esV> vec;
52 };
53
54 namespace
55 {
56 char NC_DIM_TAG=10;
57 char NC_VAR_TAG=11;
58 char NC_ATT_TAG=12;
59
60
61
62 enum nc_type
63 {
64 NC_BYTE=1,
65 NC_CHAR=2,
66 NC_SHORT=3,
67 NC_INT=4,
68 NC_FLOAT=5,
69 NC_DOUBLE=6
70 };
71
72 int offset;
73
74 char getChar(std::ifstream& ifs)
75 {
76 offset++;
77 return ifs.get();
78 }
79
80
81
82
83 typedef std::vector<std::pair<std::string, esncdfValues> > vsu;
84
85 }
86
87
88 using namespace std;
89 using namespace boost;
90
91
92
93
94
95
96
97
98 // Holds a list of name value pairs
99 class esncdfCollection
100 {
101 public:
102 void add(const std::string& k, const esncdfValues& v)
103 {
104 vec.push_back(std::pair<std::string, esncdfValues>(k,v));
105 }
106 void add_int(const std::string& k, int i)
107 {
108 esncdfValues v;
109 v.type=NC_INT;
110 union esV es;
111 es.i=i;
112 v.vec.push_back(es);
113 vec.push_back(std::pair<std::string, esncdfValues>(k,v));
114 }
115
116 /* void add(const std::string& k, const std::string& s)
117 {
118 esncdfValues u;
119 u.type=NC_CHAR;
120 u.svalue=s;
121 vec.push_back(std::pair<std::string, esncdfValues>(k,u));
122 } */
123 vsu vec;
124 // Here would be extra information regarding variables
125 std::string name;
126 std::string type;
127 };
128
129
130 class esncdfVariables
131 {
132 public:
133 std::vector<esncdfCollection> vars;
134 void add(const esncdfCollection& c)
135 {
136 vars.push_back(c);
137 }
138 };
139
140
141 class esncdfHeader
142 {
143 public:
144 esncdfCollection dimensions;
145 esncdfCollection attributes; // global attributes
146 esncdfVariables variables; // including specific attributes
147 };
148
149
150 bool read_list(std::ifstream& ifs, char t, bool offset32, esncdfCollection& coll);
151
152
153 bool gobble(ifstream& ifs, int s)
154 {
155 for (int i=0;i<s;++i)
156 {
157 getChar(ifs);
158 }
159 return ifs.good();
160 }
161
162
163 bool read_int(std::ifstream& ifs, int& res)
164 {
165 char buff32[4];
166 for (int i=0;i<4;++i)
167 {
168 buff32[i]=getChar(ifs);
169 }
170 if (!ifs)
171 {
172 return false;
173 }
174 res=0;
175 for (int i=0;i<4;++i)
176 {
177 res=(res << 8);
178 res+=(unsigned char)buff32[i];
179 }
180 return true;
181 }
182
183 bool read_item_header(std::ifstream& ifs, std::string& iname)
184 {
185 // format for name is 32 bit length followed by chars (padded to 4byte boundary)
186 int len=0;
187 if (!read_int(ifs,len))
188 {
189 return false;
190 }
191 for (int i=0;i<len;++i)
192 {
193 char c=getChar(ifs);
194 if (c!=0) // if anyone puts \x00 in their string
195 { // ...
196 iname+=c;
197 }
198 }
199 // now we need to skip padding
200 if (len%4!=0)
201 {
202 for (int i=0;i<4-len%4;++i)
203 {
204 char c=getChar(ifs);
205 }
206 }
207 if (!ifs)
208 {
209 return false;
210 }
211 return true;
212 }
213
214 bool read_item_dim(std::ifstream& ifs, std::string& iname, esncdfCollection& dims)
215 {
216 if (!read_item_header(ifs, iname))
217 {
218 return false;
219 }
220 int len=0;
221 if (!read_int(ifs, len))
222 {
223 return false;
224 }
225 dims.add_int(iname, len);
226 return true;
227 }
228
229 bool read_tag(std::ifstream& ifs, char& t)
230 {
231 for (int i=0;i<3;++i)
232 {
233 t=getChar(ifs);
234 if (t!=0)
235 {
236 return false;
237 }
238 }
239 t=getChar(ifs);
240 return ifs.good();
241 }
242
243 esncdfValues get_string_value(char tag, int len, char* bytes)
244 {
245 esncdfValues res;
246 res.type=tag;
247 switch (tag)
248 {
249 case NC_CHAR: res.svalue=std::string(bytes); break;
250 case NC_BYTE:
251 {
252 union esV e;
253 for (int i=0;i<len;++i)
254 {
255 e.b=bytes[i];
256 res.vec.push_back(e);
257 }
258 break;
259 }
260 case NC_SHORT:
261 {
262 union esV e;
263 for (int i=0;i<len;++i)
264 {
265 short v=(bytes[0] << 8)+bytes[1];
266 e.s=v;
267 res.vec.push_back(e);
268 }
269 break;
270 }
271 case NC_INT:
272 {
273 union esV e;
274 for (int i=0;i<len;++i)
275 {
276 int v=(bytes[0] << 24) +(bytes[1] << 16) + (bytes[2] << 8) +bytes[3];
277 e.i=v;
278 res.vec.push_back(e);
279 }
280 break;
281 }
282 case NC_FLOAT: // here we assume the correct floating point std
283 {
284 union esV e;
285 float f=-0.0;
286 if (reinterpret_cast<unsigned char*>(&f)[0]!=0) // big endian (yes we assume IEEE floating point
287 {
288 for (int i=0;i<len;++i)
289 {
290 float f=0; // assumes floats are 32bit!
291 unsigned char* v=reinterpret_cast<unsigned char*>(&f);
292 for (int j=0;j<4;++j)
293 {
294 v[j]=bytes[4*i+j];
295 }
296 e.f=f;
297 res.vec.push_back(e);
298 }
299 }
300 else
301 {
302 for (int i=0;i<len;++i)
303 {
304 float f=0; // assumes floats are 32bit!
305 unsigned char* v=reinterpret_cast<unsigned char*>(&f);
306 for (int j=0;j<4;++j)
307 {
308 v[j]=bytes[4*i+3-j];
309 }
310 e.f=f;
311 res.vec.push_back(e);
312 }
313 }
314 break;
315 }
316 case NC_DOUBLE:
317 {
318 union esV e;
319 double f=-0.0;
320 if (reinterpret_cast<unsigned char*>(&f)[0]!=0) // big endian (yes we assume IEEE floating point
321 {
322 for (int i=0;i<len;++i)
323 {
324 double f=0;
325 unsigned char* v=reinterpret_cast<unsigned char*>(&f);
326 for (int j=0;j<8;++j)
327 {
328 v[j]=bytes[8*i+j];
329 }
330 e.d=f;
331 res.vec.push_back(e);
332 }
333 }
334 else
335 {
336 for (int i=0;i<len;++i)
337 {
338 double f=0;
339 unsigned char* v=reinterpret_cast<unsigned char*>(&f);
340 for (int j=0;j<8;++j)
341 {
342 v[j]=bytes[8*i+7-j];
343 }
344 e.d=f;
345 res.vec.push_back(e);
346 }
347 }
348 break;
349 }
350
351
352 default:
353 res.type=NC_CHAR;
354 res.svalue="?????";
355 }
356 return res;
357 }
358
359 // structure for an attribute is:
360 // name nc_type nelems [values ...]
361 bool read_item_attr(std::ifstream& ifs, std::string& iname, esncdfCollection& coll)
362 {
363 if (!read_item_header(ifs, iname))
364 {
365 return false;
366 }
367 int len=0;
368 char tag;
369 if (!read_tag(ifs, tag))
370 {
371 return false;
372 }
373 if ((tag<NC_BYTE) || (tag>NC_DOUBLE)) // invalid type
374 {
375 return false;
376 }
377 if (!read_int(ifs, len))
378 {
379 return false;
380 }
381 // for now I'm just going to skip over the values
382 // to do that, I need to know how big each type is
383 int size=0;
384 switch (tag)
385 {
386 case NC_SHORT: size=2; break;
387 case NC_INT: size=4; break;
388 case NC_FLOAT: size=4; break;
389 case NC_DOUBLE: size=8; break;
390 default:
391 size=1;
392 }
393
394 int skip=len*size;
395 // now we need to skip padding
396 if (len*size%4!=0)
397 {
398 skip+=4-len*size%4;
399 }
400 char buff[skip+1];
401 buff[skip]=0;
402 for (int i=0;i<skip;++i)
403 {
404 buff[i]=getChar(ifs);
405 }
406 if (!ifs.good())
407 {
408 return false;
409 }
410 esncdfValues vs=get_string_value(tag, len, buff);
411 coll.add(iname, vs);
412 return true;
413 }
414
415
416
417
418
419 // structure for a variable is:
420 // name nelems [dimid ...] vatt_list nc_type vsize begin
421 bool read_item_var(std::ifstream& ifs, std::string& iname, bool offset32, esncdfVariables& vars)
422 {
423 if (!read_item_header(ifs, iname))
424 {
425 return false;
426 }
427 int dim=0;
428 if (!read_int(ifs, dim))
429 {
430 return false;
431 }
432 for (int i=0;i<dim;++i) // just disgarding these at the moment
433 {
434 int v;
435 if (!read_int(ifs, v))
436 {
437 return false;
438 }
439 }
440 esncdfCollection current;
441 current.name=iname;
442 if (!read_list(ifs, NC_ATT_TAG, offset32, current))
443 {
444 return false;
445 }
446 // now determine the type
447 char tag;
448 if (!read_tag(ifs, tag))
449 {
450 return false;
451 }
452 if ((tag<NC_BYTE) || (tag>NC_DOUBLE)) // invalid type
453 {
454 return false;
455 }
456 switch(tag)
457 {
458 case NC_CHAR: current.type="char"; break;
459 case NC_BYTE: current.type="byte";break;
460 case NC_SHORT: current.type="short";break;
461 case NC_INT: current.type="int";break;
462 case NC_FLOAT: current.type="float";break;
463 case NC_DOUBLE: current.type="double";break;
464 default:
465 current.type="????";
466 }
467 int len;
468 if (!read_int(ifs, len))
469 {
470 return false;
471 }
472 // now we need to get the offset of the variable in the file
473 // which we will promptly discard
474 int gobcount=offset32?4:8;
475 if (!gobble(ifs, gobcount))
476 {
477 return false;
478 }
479 vars.add(current);
480 return true;
481 }
482
483 // lists<T> are defined as: ABSENT | NC_DIMENSION nelems [T ...]
484 // where T in {dim, attr, var}
485 bool read_list_vars(std::ifstream& ifs, bool offset32, esncdfVariables& coll)
486 {
487 char b;
488 // we expect either 8 zeros indicating no list or the expected tag followed
489 // by list information
490 for (int i=0;i<3;++i)
491 {
492
493 if (!ifs || ((b=getChar(ifs))!=0))
494 {
495 return false;
496 }
497 }
498 b=getChar(ifs);
499 if (!ifs || (b!=0 && b!=NC_VAR_TAG)) // didn't get tag we expected
500 {
501 return false;
502 }
503 if (b==0) // expecting 4 more zeros
504 {
505 for (int i=0;i<4;++i)
506 {
507 b=getChar(ifs);
508 if (!ifs || b!=0)
509 {
510 return false;
511 }
512 }
513 return true; // This list is absent
514 }
515 // we must have seen the correct tag
516 // first thing will be number of items
517 int count=0;
518 if (!read_int(ifs, count))
519 {
520 return false;
521 }
522 for (int i=0;i<count;++i)
523 {
524 std::string itemname;
525 if (!read_item_var(ifs, itemname, offset32, coll))
526 {
527 return false;
528 }
529 }
530 return true;
531 }
532
533
534
535
536 // lists<T> are defined as: ABSENT | NC_DIMENSION nelems [T ...]
537 // where T in {dim, attr, var}
538 bool read_list(std::ifstream& ifs, char t, bool offset32, esncdfCollection& coll)
539 {
540 char b;
541 // we expect either 8 zeros indicating no list or the expected tag followed
542 // by list information
543 for (int i=0;i<3;++i)
544 {
545
546 if (!ifs || ((b=getChar(ifs))!=0))
547 {
548 return false;
549 }
550 }
551 b=getChar(ifs);
552 if (!ifs || (b!=0 && b!=t)) // didn't get tag we expected
553 {
554 return false;
555 }
556 if (b==0) // expecting 4 more zeros
557 {
558 for (int i=0;i<4;++i)
559 {
560 b=getChar(ifs);
561 if (!ifs || b!=0)
562 {
563 return false;
564 }
565 }
566 return true; // This list is absent
567 }
568 // we must have seen the correct tag
569 // first thing will be number of items
570 int count=0;
571 if (!read_int(ifs, count))
572 {
573 return false;
574 }
575 if (t==NC_DIM_TAG)
576 {
577 for (int i=0;i<count;++i)
578 {
579 std::string itemname;
580 if (!read_item_dim(ifs, itemname, coll))
581 {
582 return false;
583 }
584 }
585 }
586 else if (t==NC_ATT_TAG)
587 {
588 for (int i=0;i<count;++i)
589 {
590 std::string itemname;
591 if (!read_item_attr(ifs, itemname, coll))
592 {
593 return false;
594 }
595 }
596 }
597 else
598 {
599 return false;
600 }
601 return true;
602 }
603
604 bool read_header(const char* fname, esncdfHeader& header)
605 {
606 ifstream ifs(fname);
607 if (!ifs)
608 {
609 return false;
610 }
611 gobble(ifs,3); // skip over "CDF"
612 bool offset32=(getChar(ifs)==1);
613 gobble(ifs, 4);
614 if (!ifs)
615 {
616 return false;
617 }
618 if (!read_list(ifs, NC_DIM_TAG, offset32, header.dimensions))
619 {
620 return false;
621 }
622 cout << "Dimensions:\n";
623 cout << "Attributes:\n";
624 if (!read_list(ifs, NC_ATT_TAG, offset32, header.attributes))
625 {
626 return false;
627 }
628
629 cout << "Variables:\n";
630 if (!read_list_vars(ifs, offset32, header.variables))
631 {
632 return false;
633 }
634 return true;
635 }
636
637 void dump_Values(std::ostream& os, const esncdfValues& u)
638 {
639 if (u.type==NC_CHAR)
640 {
641 os << u.svalue;
642 }
643 else
644 {
645 for (int i=0;i<u.vec.size();++i)
646 {
647 switch (u.type)
648 {
649 case NC_BYTE:
650 {
651 unsigned char x=u.vec[i].b;
652 os << hex << (unsigned short)(x) << dec << " "; break;
653 }
654 case NC_SHORT: os << u.vec[i].s << " "; break;
655 case NC_INT: os << u.vec[i].i << " "; break;
656 case NC_FLOAT: os << u.vec[i].f << " "; break;
657 case NC_DOUBLE: os << u.vec[i].d << " "; break;
658 default:
659 os << "????";
660 }
661 }
662 }
663
664 }
665
666 void print_header(const esncdfHeader& h)
667 {
668 cout << "Dimensions:\n";
669 for (auto it=h.dimensions.vec.begin();it!=h.dimensions.vec.end();++it)
670 {
671 const esncdfValues& v=it->second;
672 cout << it->first << " = ";
673 dump_Values(cout, v);
674 cout << endl;
675 }
676 cout << "Attributes:\n";
677 for (auto it=h.attributes.vec.begin();it!=h.attributes.vec.end();++it)
678 {
679 const esncdfValues& v=it->second;
680 cout << it->first << " = ";
681 dump_Values(cout, v);
682 cout << endl;
683 }
684 cout << "Variables:\n";
685 for (auto it=h.variables.vars.begin();it!=h.variables.vars.end();++it)
686 {
687 cout << it->type << " " << it->name << ":\n";
688 for (auto jt=it->vec.begin();jt!=it->vec.end();++jt)
689 {
690 cout << " " << jt->first << " = ";
691 const esncdfValues& v=jt->second;
692 dump_Values(cout, v);
693 cout << endl;
694 }
695 }
696 }
697
698 int main(int argc, char** argv)
699 {
700 offset=-1;
701 if (argc==1)
702 {
703 std::cerr << "Usage: " << argv[0] << " file.nc\n";
704 return 1;
705 }
706 esncdfHeader header;
707 if (!read_header(argv[1], header))
708 {
709 std::cerr << "Failed\n";
710 }
711 cout << "-------------\n";
712 print_header(header);
713 return 0;
714
715 }

  ViewVC Help
Powered by ViewVC 1.1.26