/[escript]/trunk/downunder/py_src/datasources.py
ViewVC logotype

Contents of /trunk/downunder/py_src/datasources.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4097 - (show annotations)
Fri Dec 7 01:18:35 2012 UTC (6 years, 8 months ago) by caltinay
File MIME type: text/x-python
File size: 23010 byte(s)
Minor edits.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Data readers/providers for inversions"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['DataSource','ErMapperData','SyntheticData','SmoothAnomaly']
26
27 import logging
28 import numpy as np
29 from esys.escript import ReducedFunction
30 from esys.escript.linearPDEs import LinearSinglePDE
31 from esys.escript.util import *
32 import esys.escript.unitsSI as U
33 from esys.ripley import Brick, Rectangle, ripleycpp
34
35 try:
36 from scipy.io.netcdf import netcdf_file
37 __all__ += ['NetCdfData']
38 except:
39 pass
40
41 def LatLonToUTM(lon, lat, wkt_string=None):
42 """
43 Converts one or more longitude,latitude pairs to the corresponding x,y
44 coordinates in the Universal Transverse Mercator projection.
45
46 :note: If the ``pyproj`` module is not installed a warning is printed and
47 the input values are scaled by a constant and returned.
48 :note: If `wkt_string` is not given or invalid or the ``gdal`` module is
49 not available to convert the string, then the input values are
50 assumed to be given in the Clarke 1866 projection.
51
52 :param lon: longitude value(s)
53 :type lon: `float`, `list`, `tuple`, or ``numpy.array``
54 :param lat: latitude value(s)
55 :type lat: `float`, `list`, `tuple`, or ``numpy.array``
56 :rtype: ``numpy.array``
57 """
58
59 # not really optimal: if pyproj is not installed we return the input
60 # values scaled by a constant.
61 try:
62 import pyproj
63 except:
64 print("Warning, pyproj not available. Domain extents will be wrong")
65 return np.array(lon)*1000., np.array(lat)*1000.
66
67 # determine UTM zone from the input data
68 zone=int(np.median((np.floor((np.array(lon) + 180)/6) + 1) % 60))
69 try:
70 import osgeo.osr
71 srs = osgeo.osr.SpatialReference()
72 srs.ImportFromWkt(wkt_string)
73 p_src = pyproj.Proj(srs.ExportToProj4())
74 except:
75 p_src = pyproj.Proj('+proj=longlat +ellps=clrk66 +no_defs')
76 # we assume southern hemisphere here
77 p_dest = pyproj.Proj('+proj=utm +zone=%d +south +units=m'%zone)
78 x,y=pyproj.transform(p_src, p_dest, lon, lat)
79 return x,y
80
81
82 class DataSource(object):
83 """
84 A class that provides survey data for the inversion process.
85 This is an abstract base class that implements common functionality.
86 Methods to be overwritten by subclasses are marked as such.
87 This class assumes 2D data which is mapped to a slice of a 3D domain.
88 For other setups override the methods as required.
89 """
90
91 GRAVITY, MAGNETIC = list(range(2))
92
93 def __init__(self):
94 """
95 Constructor. Sets some defaults and initializes logger.
96 """
97 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
98 self.__subsampling_factor=1
99
100 def getDataExtents(self):
101 """
102 returns a tuple of tuples ``( (x0, y0), (nx, ny), (dx, dy) )``, where
103
104 - ``x0``, ``y0`` = coordinates of data origin
105 - ``nx``, ``ny`` = number of data points in x and y
106 - ``dx``, ``dy`` = spacing of data points in x and y
107
108 This method must be implemented in subclasses.
109 """
110 raise NotImplementedError
111
112 def getDataType(self):
113 """
114 Returns the type of survey data managed by this source.
115 Subclasses must return `GRAVITY` or `MAGNETIC` as appropriate.
116 """
117 raise NotImplementedError
118
119 def getSurveyData(self, domain, origin, NE, spacing):
120 """
121 This method is called by the `DomainBuilder` to retrieve the survey
122 data as `Data` objects on the given domain.
123 Subclasses should return one or more `Data` objects with survey data
124 interpolated on the given ripley domain. The exact return type
125 depends on the type of data.
126
127 :param domain: the escript domain to use
128 :type domain: `esys.escript.Domain`
129 :param origin: the origin coordinates of the domain
130 :type origin: ``tuple`` or ``list``
131 :param NE: the number of domain elements in each dimension
132 :type NE: ``tuple`` or ``list``
133 :param spacing: the cell sizes (node spacing) in the domain
134 :type spacing: ``tuple`` or ``list``
135 """
136 raise NotImplementedError
137
138 def setSubsamplingFactor(self, f):
139 """
140 Sets the data subsampling factor (default=1).
141 The factor is applied in all dimensions. For example a 2D dataset
142 with 300 x 150 data points will be reduced to 150 x 75 when a
143 subsampling factor of 2 is used.
144 This becomes important when adding data of varying resolution to
145 a `DomainBuilder`.
146 """
147 self.__subsampling_factor=f
148
149 def getSubsamplingFactor(self):
150 """
151 Returns the subsampling factor that was set via `setSubsamplingFactor`
152 (see there).
153 """
154 return self.__subsampling_factor
155
156
157 ##############################################################################
158 class ErMapperData(DataSource):
159 """
160 Data Source for ER Mapper raster data.
161 Note that this class only accepts a very specific type of ER Mapper data
162 input and will raise an exception if other data is found.
163 """
164 def __init__(self, datatype, headerfile, datafile=None, altitude=0.):
165 """
166 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
167 :type datatype: ``int``
168 :param headerfile: ER Mapper header file (usually ends in .ers)
169 :type headerfile: ``str``
170 :param datafile: ER Mapper binary data file name. If not supplied the
171 name of the header file without '.ers' is assumed
172 :type datafile: ``str``
173 :param altitude: altitude of measurements above ground in meters
174 :type altitude: ``float``
175 """
176 super(ErMapperData,self).__init__()
177 self.__headerfile=headerfile
178 if datafile is None:
179 self.__datafile=headerfile[:-4]
180 else:
181 self.__datafile=datafile
182 self.__altitude=altitude
183 self.__datatype=datatype
184 self.__readHeader()
185
186 def __readHeader(self):
187 self.logger.debug("Checking Data Source: %s (header: %s)"%(self.__datafile, self.__headerfile))
188 metadata=open(self.__headerfile, 'r').readlines()
189 # parse metadata
190 start=-1
191 for i in range(len(metadata)):
192 if metadata[i].strip() == 'DatasetHeader Begin':
193 start=i+1
194 if start==-1:
195 raise RuntimeError('Invalid ERS file (DatasetHeader not found)')
196
197 md_dict={}
198 section=[]
199 for i in range(start, len(metadata)):
200 line=metadata[i]
201 if line[-6:].strip() == 'Begin':
202 section.append(line[:-6].strip())
203 elif line[-4:].strip() == 'End':
204 if len(section)>0:
205 section.pop()
206 else:
207 vals=line.split('=')
208 if len(vals)==2:
209 key = vals[0].strip()
210 value = vals[1].strip()
211 fullkey='.'.join(section+[key])
212 md_dict[fullkey]=value
213
214 try:
215 if md_dict['RasterInfo.CellType'] != 'IEEE4ByteReal':
216 raise RuntimeError('Unsupported data type '+md_dict['RasterInfo.CellType'])
217 except KeyError:
218 self.logger.warn("Cell type not specified. Assuming IEEE4ByteReal.")
219
220 try:
221 NX = int(md_dict['RasterInfo.NrOfCellsPerLine'])
222 NY = int(md_dict['RasterInfo.NrOfLines'])
223 except:
224 raise RuntimeError("Could not determine extents of data")
225
226 try:
227 maskval = float(md_dict['RasterInfo.NullCellValue'])
228 except:
229 maskval = 99999
230
231 try:
232 spacingX = float(md_dict['RasterInfo.CellInfo.Xdimension'])
233 spacingY = float(md_dict['RasterInfo.CellInfo.Ydimension'])
234 except:
235 raise RuntimeError("Could not determine cell dimensions")
236
237 try:
238 if md_dict['CoordinateSpace.CoordinateType']=='EN':
239 originX = float(md_dict['RasterInfo.RegistrationCoord.Eastings'])
240 originY = float(md_dict['RasterInfo.RegistrationCoord.Northings'])
241 elif md_dict['CoordinateSpace.CoordinateType']=='LL':
242 originX = float(md_dict['RasterInfo.RegistrationCoord.Longitude'])
243 originY = float(md_dict['RasterInfo.RegistrationCoord.Latitude'])
244 else:
245 raise RuntimeError("Unknown CoordinateType")
246 except:
247 self.logger.warn("Could not determine coordinate origin. Setting to (0.0, 0.0)")
248 originX,originY = 0.0, 0.0
249
250 if 'GEODETIC' in md_dict['CoordinateSpace.Projection']:
251 # it appears we have lat/lon coordinates so need to convert
252 # origin and spacing. Try using gdal to get the wkt if available:
253 try:
254 from osgeo import gdal
255 ds=gdal.Open(self.__headerfile)
256 wkt=ds.GetProjection()
257 except:
258 wkt='GEOGCS["GEOCENTRIC DATUM of AUSTRALIA",DATUM["GDA94",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
259 self.logger.warn('GDAL not available or file read error, assuming GDA94 data')
260 originX_UTM,originY_UTM=LatLonToUTM(originX, originY, wkt)
261 op1X,op1Y=LatLonToUTM(originX+spacingX, originY+spacingY, wkt)
262 # we are rounding to avoid interpolation issues
263 spacingX=np.round(op1X-originX_UTM)
264 spacingY=np.round(op1Y-originY_UTM)
265 originX=np.round(originX_UTM)
266 originY=np.round(originY_UTM)
267
268 # data sets have origin in top-left corner so y runs top-down
269 self.__dataorigin=[originX, originY]
270 originY-=(NY-1)*spacingY
271 self.__delta = [spacingX, spacingY]
272 self.__maskval = maskval
273 self.__nPts = [NX, NY]
274 self.__origin = [originX, originY]
275 if self.__datatype == self.GRAVITY:
276 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
277 self.__scalefactor = 1e-6
278 else:
279 self.logger.info("Assuming magnetic data units are 'nT'.")
280 self.__scalefactor = 1e-9
281
282 def getDataExtents(self):
283 """
284 returns ( (x0, y0), (nx, ny), (dx, dy) )
285 """
286 return (list(self.__origin), list(self.__nPts), list(self.__delta))
287
288 def getDataType(self):
289 return self.__datatype
290
291 def getSurveyData(self, domain, origin, NE, spacing):
292 nValues=self.__nPts
293 # determine base location of this dataset within the domain
294 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
295 if domain.getDim()==3:
296 first.append(int((self.__altitude-origin[2])/spacing[2]))
297 nValues=nValues+[1]
298
299 data=ripleycpp._readBinaryGrid(self.__datafile,
300 ReducedFunction(domain),
301 first, nValues, (), self.__maskval)
302 sigma = whereNonZero(data-self.__maskval)
303 data = data*self.__scalefactor
304 sigma = sigma * 2. * self.__scalefactor
305 return data, sigma
306
307
308 ##############################################################################
309 class NetCdfData(DataSource):
310 """
311 Data Source for gridded netCDF data that use CF/COARDS conventions.
312 """
313 def __init__(self, datatype, filename, altitude=0.):
314 """
315 :param filename: file name for survey data in netCDF format
316 :type filename: ``str``
317 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
318 :type datatype: ``int``
319 :param altitude: altitude of measurements in meters
320 :type altitude: ``float``
321 """
322 super(NetCdfData,self).__init__()
323 self.__filename=filename
324 if not datatype in [self.GRAVITY,self.MAGNETIC]:
325 raise ValueError("Invalid value for datatype parameter")
326 self.__datatype=datatype
327 self.__altitude=altitude
328 self.__readMetadata()
329
330 def __readMetadata(self):
331 self.logger.debug("Checking Data Source: %s"%self.__filename)
332 f=netcdf_file(self.__filename, 'r')
333 NX=0
334 for n in ['lon','longitude','x']:
335 if n in f.dimensions:
336 NX=f.dimensions[n]
337 break
338 if NX==0:
339 raise RuntimeError("Could not determine extents of data")
340 NY=0
341 for n in ['lat','latitude','y']:
342 if n in f.dimensions:
343 NY=f.dimensions[n]
344 break
345 if NY==0:
346 raise RuntimeError("Could not determine extents of data")
347
348 # find longitude and latitude variables
349 lon_name=None
350 for n in ['lon','longitude']:
351 if n in f.variables:
352 lon_name=n
353 longitude=f.variables.pop(n)
354 break
355 if lon_name is None:
356 raise RuntimeError("Could not determine longitude variable")
357 lat_name=None
358 for n in ['lat','latitude']:
359 if n in f.variables:
360 lat_name=n
361 latitude=f.variables.pop(n)
362 break
363 if lat_name is None:
364 raise RuntimeError("Could not determine latitude variable")
365
366 # try to figure out data variable name
367 data_name=None
368 if len(f.variables)==1:
369 data_name=f.variables.keys()[0]
370 else:
371 for n in f.variables.keys():
372 dims=f.variables[n].dimensions
373 if (lat_name in dims) and (lon_name in dims):
374 data_name=n
375 break
376 if data_name is None:
377 raise RuntimeError("Could not determine data variable")
378
379 # try to determine value for unused data
380 if hasattr(f.variables[data_name], 'missing_value'):
381 maskval = float(f.variables[data_name].missing_value)
382 elif hasattr(f.variables[data_name], '_FillValue'):
383 maskval = float(f.variables[data_name]._FillValue)
384 else:
385 self.logger.debug("missing_value attribute not found, using default.")
386 maskval = 99999
387
388 # try to determine units of data - this is disabled for now
389 #if hasattr(f.variables[data_name], 'units'):
390 # units=f.variables[data_name].units
391 if self.__datatype == self.GRAVITY:
392 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
393 self.__scalefactor = 1e-6
394 else:
395 self.logger.info("Assuming magnetic data units are 'nT'.")
396 self.__scalefactor = 1e-9
397
398 # see if there is a wkt string to convert coordinates
399 try:
400 wkt_string=f.variables[data_name].esri_pe_string
401 except:
402 wkt_string=None
403
404 # we don't trust actual_range & geospatial_lon_min/max since subset
405 # data does not seem to have these fields updated.
406 # Getting min/max from the arrays is obviously not very efficient but..
407 #lon_range=longitude.actual_range
408 #lat_range=latitude.actual_range
409 #lon_range=[f.geospatial_lon_min,f.geospatial_lon_max]
410 #lat_range=[f.geospatial_lat_min,f.geospatial_lat_max]
411 lon_range=longitude.data.min(),longitude.data.max()
412 lat_range=latitude.data.min(),latitude.data.max()
413 if lon_range[1]<180:
414 lon_range,lat_range=LatLonToUTM(lon_range, lat_range, wkt_string)
415 lengths=[lon_range[1]-lon_range[0], lat_range[1]-lat_range[0]]
416 f.close()
417
418 self.__nPts=[NX, NY]
419 self.__origin=[lon_range[0],lat_range[0]]
420 # we are rounding to avoid interpolation issues
421 self.__delta=[np.round(lengths[i]/self.__nPts[i]) for i in range(2)]
422 #self.__wkt_string=wkt_string
423 #self.__lon_name=lon_name
424 #self.__lat_name=lat_name
425 self.__data_name=data_name
426 self.__maskval=maskval
427
428 def getDataExtents(self):
429 """
430 returns ( (x0, y0), (nx, ny), (dx, dy) )
431 """
432 return (list(self.__origin), list(self.__nPts), list(self.__delta))
433
434 def getDataType(self):
435 return self.__datatype
436
437 def getSurveyData(self, domain, origin, NE, spacing):
438 nValues=self.__nPts
439 # determine base location of this dataset within the domain
440 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
441 if domain.getDim()==3:
442 first.append(int((self.__altitude-origin[2])/spacing[2]))
443 nValues=nValues+[1]
444
445 data=ripleycpp._readNcGrid(self.__filename, self.__data_name,
446 ReducedFunction(domain), first, nValues, (), self.__maskval)
447 sigma=whereNonZero(data-self.__maskval)
448 data=data*self.__scalefactor
449 sigma=sigma * 2. * self.__scalefactor
450 return data, sigma
451
452
453 ##############################################################################
454 class SourceFeature(object):
455 """
456 A feature adds a density distribution to (parts of) a domain of a synthetic
457 data source, for example a layer of a specific rock type or a simulated
458 ore body.
459 """
460 def getValue(self):
461 """
462 Returns the value for the area covered by mask. It can be constant
463 or a data object with spatial dependency.
464 """
465 raise NotImplementedError
466 def getMask(self, x):
467 """
468 Returns the mask of the area of interest for this feature. That is,
469 mask is non-zero where the density returned by getDensity() should be
470 applied, zero elsewhere.
471 """
472 raise NotImplementedError
473
474 class SmoothAnomaly(SourceFeature):
475 def __init__(self, lx, ly, lz, x, y, depth, v_inner=None, v_outer=None):
476 self.x=x
477 self.y=y
478 self.lx=lx
479 self.ly=ly
480 self.lz=lz
481 self.depth=depth
482 self.v_inner=v_inner
483 self.v_outer=v_outer
484 self.value=None
485 self.mask=None
486
487 def getValue(self,x):
488 if self.value is None:
489 if self.v_outer is None or self.v_inner is None:
490 self.value=0
491 else:
492 DIM=x.getDomain().getDim()
493 alpha=-log(abs(self.v_outer/self.v_inner))*4
494 value=exp(-alpha*((x[0]-self.x)/self.lx)**2)
495 value=value*exp(-alpha*((x[DIM-1]+self.depth)/self.lz)**2)
496 self.value=maximum(abs(self.v_outer), abs(self.v_inner*value))
497 if self.v_inner<0: self.value=-self.value
498
499 return self.value
500
501 def getMask(self, x):
502 DIM=x.getDomain().getDim()
503 m=whereNonNegative(x[DIM-1]+self.depth+self.lz/2) * whereNonPositive(x[DIM-1]+self.depth-self.lz/2) \
504 *whereNonNegative(x[0]-(self.x-self.lx/2)) * whereNonPositive(x[0]-(self.x+self.lx/2))
505 if DIM>2:
506 m*=whereNonNegative(x[1]-(self.y-self.ly/2)) * whereNonPositive(x[1]-(self.y+self.ly/2))
507 self.mask = m
508 return m
509
510 ##############################################################################
511 class SyntheticData(DataSource):
512 def __init__(self, datatype, DIM, NE, l, features):
513 super(SyntheticData,self).__init__()
514 if not datatype in [self.GRAVITY,self.MAGNETIC]:
515 raise ValueError("Invalid value for datatype parameter")
516 self.__datatype = datatype
517 self._features = features
518 self.__origin = [0]*(DIM-1)
519 self.__nPts = [NE]*(DIM-1)
520 self.__delta = [float(l)/NE]*(DIM-1)
521 self.DIM=DIM
522 self.NE=NE
523 self.l=l
524
525 def getDataExtents(self):
526 return (list(self.__origin), list(self.__nPts), list(self.__delta))
527
528 def getDataType(self):
529 return self.__datatype
530
531 def getReferenceDensity(self):
532 """
533 Returns the reference density Data object that was used to generate
534 the gravity anomaly data.
535 """
536 return self._rho
537
538 def getReferenceSusceptibility(self):
539 """
540 Returns the reference magnetic susceptibility Data objects that was
541 used to generate the magnetic field data.
542 """
543 return self._k
544
545 def getSurveyData(self, domain, origin, NE, spacing):
546 pde=LinearSinglePDE(domain)
547 G=U.Gravitational_Constant
548 m_psi_ref=0.
549 x=domain.getX()
550 DIM=domain.getDim()
551 m_psi_ref=whereZero(x[DIM-1]-sup(x[DIM-1])) # + whereZero(x[DIM-1]-inf(x[DIM-1]))
552 if self.getDataType()==DataSource.GRAVITY:
553 rho_ref=0.
554 for f in self._features:
555 m=f.getMask(x)
556 rho_ref = rho_ref * (1-m) + f.getValue(x) * m
557 self._rho=rho_ref
558 pde.setValue(A=kronecker(domain), Y=-4*np.pi*G*rho_ref, q=m_psi_ref)
559 else:
560 k_ref=0.
561 for f in self._features:
562 m=f.getMask(x)
563 k_ref = k_ref * (1-m) + f.getValue(x) * m
564 self._k=k_ref
565 B_b = self.getBackgroundMagneticField()
566 pde.setValue(A=kronecker(domain), X=k_ref*B_b, q=m_psi_ref)
567 pde.setSymmetryOn()
568 psi_ref=pde.getSolution()
569 del pde
570 if self.getDataType()==DataSource.GRAVITY:
571 data = -grad(psi_ref, ReducedFunction(domain))
572 else:
573 data = self._k*B_b-grad(psi_ref, ReducedFunction(domain))
574
575 sigma=1.
576 # limit mask to non-padding in horizontal area
577 x=ReducedFunction(domain).getX()
578 for i in range(self.DIM-1):
579 sigma=sigma * wherePositive(x[i]) \
580 * whereNegative(x[i]-(sup(x[i])+inf(x[i])))
581 # limit mask to one cell thickness at z=0
582 sigma = sigma * whereNonNegative(x[self.DIM-1]) \
583 * whereNonPositive(x[self.DIM-1]-spacing[self.DIM-1])
584 return data,sigma
585
586 def getBackgroundMagneticField(self):
587 #FIXME:
588 latitude=-28.5
589 theta = (90-latitude)/180.*np.pi
590 B_0=U.Mu_0 * U.Magnetic_Dipole_Moment_Earth / (4 * np.pi * U.R_Earth**3)
591 B_theta= B_0 * sin(theta)
592 B_r= 2 * B_0 * cos(theta)
593 if self.DIM<3:
594 return np.array([0., -B_r])
595 else:
596 return np.array([-B_theta, 0., -B_r])
597

  ViewVC Help
Powered by ViewVC 1.1.26