/[escript]/trunk/downunder/py_src/datasources.py
ViewVC logotype

Contents of /trunk/downunder/py_src/datasources.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4060 - (show annotations)
Fri Nov 9 03:50:19 2012 UTC (6 years, 8 months ago) by caltinay
File MIME type: text/x-python
File size: 22950 byte(s)
-Overhaul of inversion data sources. Domain is now generated and managed by separate class.
-Removed UBCDataSource which was only used for testing.
-Updated example scripts.

1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Data readers/providers for inversions"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['DataSource','ErMapperData','SyntheticData','SmoothAnomaly']
26
27 import logging
28 import numpy as np
29 from esys.escript import ReducedFunction
30 from esys.escript.linearPDEs import LinearSinglePDE
31 from esys.escript.util import *
32 import esys.escript.unitsSI as U
33 from esys.ripley import Brick, Rectangle, ripleycpp
34
35 try:
36 from scipy.io.netcdf import netcdf_file
37 __all__ += ['NetCdfData']
38 except:
39 pass
40
41 def LatLonToUTM(lon, lat, wkt_string=None):
42 """
43 Converts one or more longitude,latitude pairs to the corresponding x,y
44 coordinates in the Universal Transverse Mercator projection.
45
46 :note: If the ``pyproj`` module is not installed a warning is printed and
47 the input values are scaled by a constant and returned.
48 :note: If `wkt_string` is not given or invalid or the ``gdal`` module is
49 not available to convert the string, then the input values are
50 assumed to be given in the Clarke 1866 projection.
51
52 :param lon: longitude value(s)
53 :type lon: `float`, `list`, `tuple`, or ``numpy.array``
54 :param lat: latitude value(s)
55 :type lat: `float`, `list`, `tuple`, or ``numpy.array``
56 :rtype: ``numpy.array``
57 """
58
59 # not really optimal: if pyproj is not installed we return the input
60 # values scaled by a constant.
61 try:
62 import pyproj
63 except:
64 print("Warning, pyproj not available. Domain extents will be wrong")
65 return np.array(lon)*1000., np.array(lat)*1000.
66
67 # determine UTM zone from the input data
68 zone=int(np.median((np.floor((np.array(lon) + 180)/6) + 1) % 60))
69 try:
70 import osgeo.osr
71 srs = osgeo.osr.SpatialReference()
72 srs.ImportFromWkt(wkt_string)
73 p_src = pyproj.Proj(srs.ExportToProj4())
74 except:
75 p_src = pyproj.Proj('+proj=longlat +ellps=clrk66 +no_defs')
76 # we assume southern hemisphere here
77 p_dest = pyproj.Proj('+proj=utm +zone=%d +south +units=m'%zone)
78 x,y=pyproj.transform(p_src, p_dest, lon, lat)
79 return x,y
80
81
82 class DataSource(object):
83 """
84 A class that provides survey data for the inversion process.
85 This is an abstract base class that implements common functionality.
86 Methods to be overwritten by subclasses are marked as such.
87 This class assumes 2D data which is mapped to a slice of a 3D domain.
88 For other setups override the `_createDomain` method.
89 """
90
91 GRAVITY, MAGNETIC = list(range(2))
92
93 def __init__(self):
94 """
95 Constructor. Sets some defaults and initializes logger.
96 """
97 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
98 self.__subsampling_factor=1
99
100 def getDataExtents(self):
101 """
102 returns a tuple of tuples ``( (x0, y0), (nx, ny), (dx, dy) )``, where
103
104 - ``x0``, ``y0`` = coordinates of data origin
105 - ``nx``, ``ny`` = number of data points in x and y
106 - ``dx``, ``dy`` = spacing of data points in x and y
107
108 This method must be implemented in subclasses.
109 """
110 raise NotImplementedError
111
112 def getDataType(self):
113 """
114 Returns the type of survey data managed by this source.
115 Subclasses must return `GRAVITY` or `MAGNETIC` as appropriate.
116 """
117 raise NotImplementedError
118
119 def getSurveyData(self, domain, origin, NE, spacing):
120 """
121 This method is called by the `DomainBuilder` to retrieve the survey
122 data as `Data` objects on the given domain.
123 Subclasses should return one or more `Data` objects with survey data
124 interpolated on the given ripley domain. The exact return type
125 depends on the type of data.
126
127 :param domain: the escript domain to use
128 :type domain: `esys.escript.Domain`
129 :param origin: the origin coordinates of the domain
130 :type origin: ``tuple`` or ``list``
131 :param NE: the number of domain elements in each dimension
132 :type NE: ``tuple`` or ``list``
133 :param spacing: the cell sizes (node spacing) in the domain
134 :type spacing: ``tuple`` or ``list``
135 """
136 raise NotImplementedError
137
138 def setSubsamplingFactor(self, f):
139 """
140 Sets the data subsampling factor (default=1).
141 The factor is applied in all dimensions. For example a 2D dataset
142 with 300 x 150 data points will be reduced to 150 x 75 when a
143 subsampling factor of 2 is used.
144 This becomes important when adding data of varying resolution to
145 a `DomainBuilder`.
146 """
147 self.__subsampling_factor=f
148
149 def getSubsamplingFactor(self):
150 """
151 Returns the subsampling factor that was set via `setSubsamplingFactor`
152 (see there).
153 """
154 return self.__subsampling_factor
155
156
157 ##############################################################################
158 class ErMapperData(DataSource):
159 """
160 Data Source for ER Mapper raster data.
161 Note that this class only accepts a very specific type of ER Mapper data
162 input and will raise an exception if other data is found.
163 """
164 def __init__(self, datatype, headerfile, datafile=None, altitude=0.):
165 """
166 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
167 :type datatype: ``int``
168 :param headerfile: ER Mapper header file (usually ends in .ers)
169 :type headerfile: ``str``
170 :param datafile: ER Mapper binary data file name. If not supplied the
171 name of the header file without '.ers' is assumed
172 :type datafile: ``str``
173 :param altitude: altitude of measurements above ground in meters
174 :type altitude: ``float``
175 """
176 super(ErMapperData,self).__init__()
177 self.__headerfile=headerfile
178 if datafile is None:
179 self.__datafile=headerfile[:-4]
180 else:
181 self.__datafile=datafile
182 self.__altOfData=altitude
183 self.__datatype=datatype
184 self.__readHeader()
185
186 def __readHeader(self):
187 self.logger.debug("Checking Data Source: %s (header: %s)"%(self.__datafile, self.__headerfile))
188 metadata=open(self.__headerfile, 'r').readlines()
189 # parse metadata
190 start=-1
191 for i in range(len(metadata)):
192 if metadata[i].strip() == 'DatasetHeader Begin':
193 start=i+1
194 if start==-1:
195 raise RuntimeError('Invalid ERS file (DatasetHeader not found)')
196
197 md_dict={}
198 section=[]
199 for i in range(start, len(metadata)):
200 line=metadata[i]
201 if line[-6:].strip() == 'Begin':
202 section.append(line[:-6].strip())
203 elif line[-4:].strip() == 'End':
204 if len(section)>0:
205 section.pop()
206 else:
207 vals=line.split('=')
208 if len(vals)==2:
209 key = vals[0].strip()
210 value = vals[1].strip()
211 fullkey='.'.join(section+[key])
212 md_dict[fullkey]=value
213
214 try:
215 if md_dict['RasterInfo.CellType'] != 'IEEE4ByteReal':
216 raise RuntimeError('Unsupported data type '+md_dict['RasterInfo.CellType'])
217 except KeyError:
218 self.logger.warn("Cell type not specified. Assuming IEEE4ByteReal.")
219
220 try:
221 NX = int(md_dict['RasterInfo.NrOfCellsPerLine'])
222 NY = int(md_dict['RasterInfo.NrOfLines'])
223 except:
224 raise RuntimeError("Could not determine extents of data")
225
226 try:
227 maskval = float(md_dict['RasterInfo.NullCellValue'])
228 except:
229 maskval = 99999
230
231 try:
232 spacingX = float(md_dict['RasterInfo.CellInfo.Xdimension'])
233 spacingY = float(md_dict['RasterInfo.CellInfo.Ydimension'])
234 except:
235 raise RuntimeError("Could not determine cell dimensions")
236
237 try:
238 if md_dict['CoordinateSpace.CoordinateType']=='EN':
239 originX = float(md_dict['RasterInfo.RegistrationCoord.Eastings'])
240 originY = float(md_dict['RasterInfo.RegistrationCoord.Northings'])
241 elif md_dict['CoordinateSpace.CoordinateType']=='LL':
242 originX = float(md_dict['RasterInfo.RegistrationCoord.Longitude'])
243 originY = float(md_dict['RasterInfo.RegistrationCoord.Latitude'])
244 else:
245 raise RuntimeError("Unknown CoordinateType")
246 except:
247 self.logger.warn("Could not determine coordinate origin. Setting to (0.0, 0.0)")
248 originX,originY = 0.0, 0.0
249
250 if 'GEODETIC' in md_dict['CoordinateSpace.Projection']:
251 # it appears we have lat/lon coordinates so need to convert
252 # origin and spacing. Try using gdal to get the wkt if available:
253 try:
254 from osgeo import gdal
255 ds=gdal.Open(self.__headerfile)
256 wkt=ds.GetProjection()
257 except:
258 wkt='GEOGCS["GEOCENTRIC DATUM of AUSTRALIA",DATUM["GDA94",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
259 self.logger.warn('GDAL not available or file read error, assuming GDA94 data')
260 originX_UTM,originY_UTM=LatLonToUTM(originX, originY, wkt)
261 op1X,op1Y=LatLonToUTM(originX+spacingX, originY+spacingY, wkt)
262 # we are rounding to avoid interpolation issues
263 spacingX=np.round(op1X-originX_UTM)
264 spacingY=np.round(op1Y-originY_UTM)
265 originX=np.round(originX_UTM)
266 originY=np.round(originY_UTM)
267
268 # data sets have origin in top-left corner so y runs top-down
269 self.__dataorigin=[originX, originY]
270 originY-=(NY-1)*spacingY
271 self.__delta = [spacingX, spacingY]
272 self.__maskval = maskval
273 self.__nPts = [NX, NY]
274 self.__origin = [originX, originY]
275 if self.__datatype == self.GRAVITY:
276 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
277 self.__scalefactor = 1e-6
278 else:
279 self.logger.info("Assuming magnetic data units are 'nT'.")
280 self.__scalefactor = 1e-9
281
282 def getDataExtents(self):
283 """
284 returns ( (x0, y0), (nx, ny), (dx, dy) )
285 """
286 return (list(self.__origin), list(self.__nPts), list(self.__delta))
287
288 def getSurveyData(self, domain, origin, NE, spacing):
289 nValues=self.__nPts
290 # determine base location of this dataset within the domain
291 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
292 if domain.getDim()==3:
293 first.append(int((self.__altitude-origin[2])/spacing[2]))
294 nValues=nValues+[1]
295
296 data=ripleycpp._readBinaryGrid(self.__datafile,
297 ReducedFunction(domain),
298 first, nValues, (), self.__maskval)
299 sigma = whereNonZero(data-self.__maskval)
300 data = data*self.__scalefactor
301 sigma = sigma*self.__scalefactor
302 return data, sigma
303
304
305 ##############################################################################
306 class NetCdfData(DataSource):
307 """
308 Data Source for gridded netCDF data that use CF/COARDS conventions.
309 """
310 def __init__(self, datatype, filename, altitude=0.):
311 """
312 :param filename: file name for survey data in netCDF format
313 :type filename: ``str``
314 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
315 :type datatype: ``int``
316 :param altitude: altitude of measurements in meters
317 :type datatype: ``float``
318 """
319 super(NetCdfData,self).__init__()
320 self.__filename=filename
321 if not datatype in [self.GRAVITY,self.MAGNETIC]:
322 raise ValueError("Invalid value for datatype parameter")
323 self.__datatype=datatype
324 self.__altitude=altitude
325 self.__readMetadata()
326
327 def __readMetadata(self):
328 self.logger.debug("Checking Data Source: %s"%self.__filename)
329 f=netcdf_file(self.__filename, 'r')
330 NX=0
331 for n in ['lon','longitude','x']:
332 if n in f.dimensions:
333 NX=f.dimensions[n]
334 break
335 if NX==0:
336 raise RuntimeError("Could not determine extents of data")
337 NY=0
338 for n in ['lat','latitude','y']:
339 if n in f.dimensions:
340 NY=f.dimensions[n]
341 break
342 if NY==0:
343 raise RuntimeError("Could not determine extents of data")
344
345 # find longitude and latitude variables
346 lon_name=None
347 for n in ['lon','longitude']:
348 if n in f.variables:
349 lon_name=n
350 longitude=f.variables.pop(n)
351 break
352 if lon_name is None:
353 raise RuntimeError("Could not determine longitude variable")
354 lat_name=None
355 for n in ['lat','latitude']:
356 if n in f.variables:
357 lat_name=n
358 latitude=f.variables.pop(n)
359 break
360 if lat_name is None:
361 raise RuntimeError("Could not determine latitude variable")
362
363 # try to figure out data variable name
364 data_name=None
365 if len(f.variables)==1:
366 data_name=f.variables.keys()[0]
367 else:
368 for n in f.variables.keys():
369 dims=f.variables[n].dimensions
370 if (lat_name in dims) and (lon_name in dims):
371 data_name=n
372 break
373 if data_name is None:
374 raise RuntimeError("Could not determine data variable")
375
376 # try to determine value for unused data
377 if hasattr(f.variables[data_name], 'missing_value'):
378 maskval = float(f.variables[data_name].missing_value)
379 elif hasattr(f.variables[data_name], '_FillValue'):
380 maskval = float(f.variables[data_name]._FillValue)
381 else:
382 self.logger.debug("missing_value attribute not found, using default.")
383 maskval = 99999
384
385 # try to determine units of data - this is disabled for now
386 #if hasattr(f.variables[data_name], 'units'):
387 # units=f.variables[data_name].units
388 if self.__datatype == self.GRAVITY:
389 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
390 self.__scalefactor = 1e-6
391 else:
392 self.logger.info("Assuming magnetic data units are 'nT'.")
393 self.__scalefactor = 1e-9
394
395 # see if there is a wkt string to convert coordinates
396 try:
397 wkt_string=f.variables[data_name].esri_pe_string
398 except:
399 wkt_string=None
400
401 # we don't trust actual_range & geospatial_lon_min/max since subset
402 # data does not seem to have these fields updated.
403 # Getting min/max from the arrays is obviously not very efficient but..
404 #lon_range=longitude.actual_range
405 #lat_range=latitude.actual_range
406 #lon_range=[f.geospatial_lon_min,f.geospatial_lon_max]
407 #lat_range=[f.geospatial_lat_min,f.geospatial_lat_max]
408 lon_range=longitude.data.min(),longitude.data.max()
409 lat_range=latitude.data.min(),latitude.data.max()
410 lon_range,lat_range=LatLonToUTM(lon_range, lat_range, wkt_string)
411 lengths=[lon_range[1]-lon_range[0], lat_range[1]-lat_range[0]]
412 f.close()
413
414 self.__nPts=[NX, NY]
415 self.__origin=[lon_range[0],lat_range[0]]
416 # we are rounding to avoid interpolation issues
417 self.__delta=[np.round(lengths[i]/self.__nPts[i]) for i in range(2)]
418 #self.__wkt_string=wkt_string
419 #self.__lon_name=lon_name
420 #self.__lat_name=lat_name
421 self.__data_name=data_name
422 self.__maskval=maskval
423
424 def getDataExtents(self):
425 """
426 returns ( (x0, y0), (nx, ny), (dx, dy) )
427 """
428 return (list(self.__origin), list(self.__nPts), list(self.__delta))
429
430 def getDataType(self):
431 return self.__datatype
432
433 def getSurveyData(self, domain, origin, NE, spacing):
434 nValues=self.__nPts
435 # determine base location of this dataset within the domain
436 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
437 if domain.getDim()==3:
438 first.append(int((self.__altitude-origin[2])/spacing[2]))
439 nValues=nValues+[1]
440
441 data=ripleycpp._readNcGrid(self.__filename, self.__data_name,
442 ReducedFunction(domain), first, nValues, (), self.__maskval)
443 sigma=whereNonZero(data-self.__maskval)
444 data=data*self.__scalefactor
445 sigma=sigma * 2. * self.__scalefactor
446 return data, sigma
447
448
449 ##############################################################################
450 class SourceFeature(object):
451 """
452 A feature adds a density distribution to (parts of) a domain of a synthetic
453 data source, for example a layer of a specific rock type or a simulated
454 ore body.
455 """
456 def getValue(self):
457 """
458 Returns the value for the area covered by mask. It can be constant
459 or a data object with spatial dependency.
460 """
461 raise NotImplementedError
462 def getMask(self, x):
463 """
464 Returns the mask of the area of interest for this feature. That is,
465 mask is non-zero where the density returned by getDensity() should be
466 applied, zero elsewhere.
467 """
468 raise NotImplementedError
469
470 class SmoothAnomaly(SourceFeature):
471 def __init__(self, lx, ly, lz, x, y, depth, v_inner=None, v_outer=None):
472 self.x=x
473 self.y=y
474 self.lx=lx
475 self.ly=ly
476 self.lz=lz
477 self.depth=depth
478 self.v_inner=v_inner
479 self.v_outer=v_outer
480 self.value=None
481 self.mask=None
482
483 def getValue(self,x):
484 if self.value is None:
485 if self.v_outer is None or self.v_inner is None:
486 self.value=0
487 else:
488 DIM=x.getDomain().getDim()
489 alpha=-log(abs(self.v_outer/self.v_inner))*4
490 value=exp(-alpha*((x[0]-self.x)/self.lx)**2)
491 value=value*exp(-alpha*((x[DIM-1]+self.depth)/self.lz)**2)
492 self.value=maximum(abs(self.v_outer), abs(self.v_inner*value))
493 if self.v_inner<0: self.value=-self.value
494
495 return self.value
496
497 def getMask(self, x):
498 DIM=x.getDomain().getDim()
499 m=whereNonNegative(x[DIM-1]+self.depth+self.lz/2) * whereNonPositive(x[DIM-1]+self.depth-self.lz/2) \
500 *whereNonNegative(x[0]-(self.x-self.lx/2)) * whereNonPositive(x[0]-(self.x+self.lx/2))
501 if DIM>2:
502 m*=whereNonNegative(x[1]-(self.y-self.ly/2)) * whereNonPositive(x[1]-(self.y+self.ly/2))
503 self.mask = m
504 return m
505
506 ##############################################################################
507 class SyntheticData(DataSource):
508 def __init__(self, datatype, DIM, NE, l, features):
509 super(SyntheticData,self).__init__()
510 if not datatype in [self.GRAVITY,self.MAGNETIC]:
511 raise ValueError("Invalid value for datatype parameter")
512 self.__datatype = datatype
513 self._features = features
514 self.__origin = [0]*(DIM-1)
515 self.__nPts = [NE]*(DIM-1)
516 self.__delta = [float(l)/NE]*(DIM-1)
517 self.DIM=DIM
518 self.NE=NE
519 self.l=l
520
521 def getDataExtents(self):
522 return (list(self.__origin), list(self.__nPts), list(self.__delta))
523
524 def getDataType(self):
525 return self.__datatype
526
527 def getReferenceDensity(self):
528 """
529 Returns the reference density Data object that was used to generate
530 the gravity anomaly data.
531 """
532 return self._rho
533
534 def getReferenceSusceptibility(self):
535 """
536 Returns the reference magnetic susceptibility Data objects that was
537 used to generate the magnetic field data.
538 """
539 return self._k
540
541 def getSurveyData(self, domain, origin, NE, spacing):
542 pde=LinearSinglePDE(domain)
543 G=U.Gravitational_Constant
544 m_psi_ref=0.
545 x=domain.getX()
546 for i in range(self.DIM):
547 m_psi_ref=m_psi_ref + whereZero(x[i]-inf(x[i])) \
548 + whereZero(x[i]-sup(x[i]))
549
550 if self.getDataType()==DataSource.GRAVITY:
551 rho_ref=0.
552 for f in self._features:
553 m=f.getMask(x)
554 rho_ref = rho_ref * (1-m) + f.getValue(x) * m
555 self._rho=rho_ref
556 pde.setValue(A=kronecker(domain), Y=-4*np.pi*G*rho_ref, q=m_psi_ref)
557 else:
558 k_ref=0.
559 for f in self._features:
560 m=f.getMask(x)
561 k_ref = k_ref * (1-m) + f.getValue(x) * m
562 self._k=k_ref
563 B_b = self.getBackgroundMagneticField()
564 pde.setValue(A=kronecker(domain), X=(1+k_ref)*B_b, q=m_psi_ref)
565 pde.setSymmetryOn()
566 psi_ref=pde.getSolution()
567 del pde
568 if self.getDataType()==DataSource.GRAVITY:
569 data = -grad(psi_ref, ReducedFunction(domain))
570 else:
571 data = (1+self._k)*B_b-grad(psi_ref, ReducedFunction(domain))
572
573 sigma=1.
574 # limit mask to non-padding in horizontal area
575 x=ReducedFunction(domain).getX()
576 for i in range(self.DIM-1):
577 sigma=sigma * wherePositive(x[i]) \
578 * whereNegative(x[i]-(sup(x[i])+inf(x[i])))
579 # limit mask to one cell thickness at z=0
580 sigma = sigma * whereNonNegative(x[self.DIM-1]) \
581 * whereNonPositive(x[self.DIM-1]-spacing[self.DIM-1])
582 return data,sigma
583
584 def getBackgroundMagneticField(self):
585 #FIXME:
586 latitude=-28.0
587 theta = (90-latitude)/180.*np.pi
588 B_0=U.Mu_0 * U.Magnetic_Dipole_Moment_Earth / (4 * np.pi * U.R_Earth**3)
589 B_theta= B_0 * sin(theta)
590 B_r= 2 * B_0 * cos(theta)
591 if self.DIM<3:
592 return np.array([0., -B_r])
593 else:
594 return np.array([-B_theta, 0., -B_r])
595

  ViewVC Help
Powered by ViewVC 1.1.26