/[escript]/trunk/downunder/py_src/datasources.py
ViewVC logotype

Contents of /trunk/downunder/py_src/datasources.py

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4079 - (show annotations)
Fri Nov 16 07:59:01 2012 UTC (6 years, 11 months ago) by gross
File MIME type: text/x-python
File size: 22988 byte(s)
some modifications to scaling in downunder. still not perfect.
1
2 ##############################################################################
3 #
4 # Copyright (c) 2003-2012 by University of Queensland
5 # http://www.uq.edu.au
6 #
7 # Primary Business: Queensland, Australia
8 # Licensed under the Open Software License version 3.0
9 # http://www.opensource.org/licenses/osl-3.0.php
10 #
11 # Development until 2012 by Earth Systems Science Computational Center (ESSCC)
12 # Development since 2012 by School of Earth Sciences
13 #
14 ##############################################################################
15
16 """Data readers/providers for inversions"""
17
18 __copyright__="""Copyright (c) 2003-2012 by University of Queensland
19 http://www.uq.edu.au
20 Primary Business: Queensland, Australia"""
21 __license__="""Licensed under the Open Software License version 3.0
22 http://www.opensource.org/licenses/osl-3.0.php"""
23 __url__="https://launchpad.net/escript-finley"
24
25 __all__ = ['DataSource','ErMapperData','SyntheticData','SmoothAnomaly']
26
27 import logging
28 import numpy as np
29 from esys.escript import ReducedFunction
30 from esys.escript.linearPDEs import LinearSinglePDE
31 from esys.escript.util import *
32 import esys.escript.unitsSI as U
33 from esys.ripley import Brick, Rectangle, ripleycpp
34
35 try:
36 from scipy.io.netcdf import netcdf_file
37 __all__ += ['NetCdfData']
38 except:
39 pass
40
41 def LatLonToUTM(lon, lat, wkt_string=None):
42 """
43 Converts one or more longitude,latitude pairs to the corresponding x,y
44 coordinates in the Universal Transverse Mercator projection.
45
46 :note: If the ``pyproj`` module is not installed a warning is printed and
47 the input values are scaled by a constant and returned.
48 :note: If `wkt_string` is not given or invalid or the ``gdal`` module is
49 not available to convert the string, then the input values are
50 assumed to be given in the Clarke 1866 projection.
51
52 :param lon: longitude value(s)
53 :type lon: `float`, `list`, `tuple`, or ``numpy.array``
54 :param lat: latitude value(s)
55 :type lat: `float`, `list`, `tuple`, or ``numpy.array``
56 :rtype: ``numpy.array``
57 """
58
59 # not really optimal: if pyproj is not installed we return the input
60 # values scaled by a constant.
61 try:
62 import pyproj
63 except:
64 print("Warning, pyproj not available. Domain extents will be wrong")
65 return np.array(lon)*1000., np.array(lat)*1000.
66
67 # determine UTM zone from the input data
68 zone=int(np.median((np.floor((np.array(lon) + 180)/6) + 1) % 60))
69 try:
70 import osgeo.osr
71 srs = osgeo.osr.SpatialReference()
72 srs.ImportFromWkt(wkt_string)
73 p_src = pyproj.Proj(srs.ExportToProj4())
74 except:
75 p_src = pyproj.Proj('+proj=longlat +ellps=clrk66 +no_defs')
76 # we assume southern hemisphere here
77 p_dest = pyproj.Proj('+proj=utm +zone=%d +south +units=m'%zone)
78 x,y=pyproj.transform(p_src, p_dest, lon, lat)
79 return x,y
80
81
82 class DataSource(object):
83 """
84 A class that provides survey data for the inversion process.
85 This is an abstract base class that implements common functionality.
86 Methods to be overwritten by subclasses are marked as such.
87 This class assumes 2D data which is mapped to a slice of a 3D domain.
88 For other setups override the `_createDomain` method.
89 """
90
91 GRAVITY, MAGNETIC = list(range(2))
92
93 def __init__(self):
94 """
95 Constructor. Sets some defaults and initializes logger.
96 """
97 self.logger = logging.getLogger('inv.%s'%self.__class__.__name__)
98 self.__subsampling_factor=1
99
100 def getDataExtents(self):
101 """
102 returns a tuple of tuples ``( (x0, y0), (nx, ny), (dx, dy) )``, where
103
104 - ``x0``, ``y0`` = coordinates of data origin
105 - ``nx``, ``ny`` = number of data points in x and y
106 - ``dx``, ``dy`` = spacing of data points in x and y
107
108 This method must be implemented in subclasses.
109 """
110 raise NotImplementedError
111
112 def getDataType(self):
113 """
114 Returns the type of survey data managed by this source.
115 Subclasses must return `GRAVITY` or `MAGNETIC` as appropriate.
116 """
117 raise NotImplementedError
118
119 def getSurveyData(self, domain, origin, NE, spacing):
120 """
121 This method is called by the `DomainBuilder` to retrieve the survey
122 data as `Data` objects on the given domain.
123 Subclasses should return one or more `Data` objects with survey data
124 interpolated on the given ripley domain. The exact return type
125 depends on the type of data.
126
127 :param domain: the escript domain to use
128 :type domain: `esys.escript.Domain`
129 :param origin: the origin coordinates of the domain
130 :type origin: ``tuple`` or ``list``
131 :param NE: the number of domain elements in each dimension
132 :type NE: ``tuple`` or ``list``
133 :param spacing: the cell sizes (node spacing) in the domain
134 :type spacing: ``tuple`` or ``list``
135 """
136 raise NotImplementedError
137
138 def setSubsamplingFactor(self, f):
139 """
140 Sets the data subsampling factor (default=1).
141 The factor is applied in all dimensions. For example a 2D dataset
142 with 300 x 150 data points will be reduced to 150 x 75 when a
143 subsampling factor of 2 is used.
144 This becomes important when adding data of varying resolution to
145 a `DomainBuilder`.
146 """
147 self.__subsampling_factor=f
148
149 def getSubsamplingFactor(self):
150 """
151 Returns the subsampling factor that was set via `setSubsamplingFactor`
152 (see there).
153 """
154 return self.__subsampling_factor
155
156
157 ##############################################################################
158 class ErMapperData(DataSource):
159 """
160 Data Source for ER Mapper raster data.
161 Note that this class only accepts a very specific type of ER Mapper data
162 input and will raise an exception if other data is found.
163 """
164 def __init__(self, datatype, headerfile, datafile=None, altitude=0.):
165 """
166 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
167 :type datatype: ``int``
168 :param headerfile: ER Mapper header file (usually ends in .ers)
169 :type headerfile: ``str``
170 :param datafile: ER Mapper binary data file name. If not supplied the
171 name of the header file without '.ers' is assumed
172 :type datafile: ``str``
173 :param altitude: altitude of measurements above ground in meters
174 :type altitude: ``float``
175 """
176 super(ErMapperData,self).__init__()
177 self.__headerfile=headerfile
178 if datafile is None:
179 self.__datafile=headerfile[:-4]
180 else:
181 self.__datafile=datafile
182 self.__altitude=altitude
183 self.__datatype=datatype
184 self.__readHeader()
185
186 def __readHeader(self):
187 self.logger.debug("Checking Data Source: %s (header: %s)"%(self.__datafile, self.__headerfile))
188 metadata=open(self.__headerfile, 'r').readlines()
189 # parse metadata
190 start=-1
191 for i in range(len(metadata)):
192 if metadata[i].strip() == 'DatasetHeader Begin':
193 start=i+1
194 if start==-1:
195 raise RuntimeError('Invalid ERS file (DatasetHeader not found)')
196
197 md_dict={}
198 section=[]
199 for i in range(start, len(metadata)):
200 line=metadata[i]
201 if line[-6:].strip() == 'Begin':
202 section.append(line[:-6].strip())
203 elif line[-4:].strip() == 'End':
204 if len(section)>0:
205 section.pop()
206 else:
207 vals=line.split('=')
208 if len(vals)==2:
209 key = vals[0].strip()
210 value = vals[1].strip()
211 fullkey='.'.join(section+[key])
212 md_dict[fullkey]=value
213
214 try:
215 if md_dict['RasterInfo.CellType'] != 'IEEE4ByteReal':
216 raise RuntimeError('Unsupported data type '+md_dict['RasterInfo.CellType'])
217 except KeyError:
218 self.logger.warn("Cell type not specified. Assuming IEEE4ByteReal.")
219
220 try:
221 NX = int(md_dict['RasterInfo.NrOfCellsPerLine'])
222 NY = int(md_dict['RasterInfo.NrOfLines'])
223 except:
224 raise RuntimeError("Could not determine extents of data")
225
226 try:
227 maskval = float(md_dict['RasterInfo.NullCellValue'])
228 except:
229 maskval = 99999
230
231 try:
232 spacingX = float(md_dict['RasterInfo.CellInfo.Xdimension'])
233 spacingY = float(md_dict['RasterInfo.CellInfo.Ydimension'])
234 except:
235 raise RuntimeError("Could not determine cell dimensions")
236
237 try:
238 if md_dict['CoordinateSpace.CoordinateType']=='EN':
239 originX = float(md_dict['RasterInfo.RegistrationCoord.Eastings'])
240 originY = float(md_dict['RasterInfo.RegistrationCoord.Northings'])
241 elif md_dict['CoordinateSpace.CoordinateType']=='LL':
242 originX = float(md_dict['RasterInfo.RegistrationCoord.Longitude'])
243 originY = float(md_dict['RasterInfo.RegistrationCoord.Latitude'])
244 else:
245 raise RuntimeError("Unknown CoordinateType")
246 except:
247 self.logger.warn("Could not determine coordinate origin. Setting to (0.0, 0.0)")
248 originX,originY = 0.0, 0.0
249
250 if 'GEODETIC' in md_dict['CoordinateSpace.Projection']:
251 # it appears we have lat/lon coordinates so need to convert
252 # origin and spacing. Try using gdal to get the wkt if available:
253 try:
254 from osgeo import gdal
255 ds=gdal.Open(self.__headerfile)
256 wkt=ds.GetProjection()
257 except:
258 wkt='GEOGCS["GEOCENTRIC DATUM of AUSTRALIA",DATUM["GDA94",SPHEROID["GRS80",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["degree",0.0174532925199433]]'
259 self.logger.warn('GDAL not available or file read error, assuming GDA94 data')
260 originX_UTM,originY_UTM=LatLonToUTM(originX, originY, wkt)
261 op1X,op1Y=LatLonToUTM(originX+spacingX, originY+spacingY, wkt)
262 # we are rounding to avoid interpolation issues
263 spacingX=np.round(op1X-originX_UTM)
264 spacingY=np.round(op1Y-originY_UTM)
265 originX=np.round(originX_UTM)
266 originY=np.round(originY_UTM)
267
268 # data sets have origin in top-left corner so y runs top-down
269 self.__dataorigin=[originX, originY]
270 originY-=(NY-1)*spacingY
271 self.__delta = [spacingX, spacingY]
272 self.__maskval = maskval
273 self.__nPts = [NX, NY]
274 self.__origin = [originX, originY]
275 if self.__datatype == self.GRAVITY:
276 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
277 self.__scalefactor = 1e-6
278 else:
279 self.logger.info("Assuming magnetic data units are 'nT'.")
280 self.__scalefactor = 1e-9
281
282 def getDataExtents(self):
283 """
284 returns ( (x0, y0), (nx, ny), (dx, dy) )
285 """
286 return (list(self.__origin), list(self.__nPts), list(self.__delta))
287
288 def getDataType(self):
289 return self.__datatype
290
291 def getSurveyData(self, domain, origin, NE, spacing):
292 nValues=self.__nPts
293 # determine base location of this dataset within the domain
294 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
295 if domain.getDim()==3:
296 first.append(int((self.__altitude-origin[2])/spacing[2]))
297 nValues=nValues+[1]
298
299 data=ripleycpp._readBinaryGrid(self.__datafile,
300 ReducedFunction(domain),
301 first, nValues, (), self.__maskval)
302 sigma = whereNonZero(data-self.__maskval)
303 data = data*self.__scalefactor
304 sigma = sigma * 2. * self.__scalefactor
305 return data, sigma
306
307
308 ##############################################################################
309 class NetCdfData(DataSource):
310 """
311 Data Source for gridded netCDF data that use CF/COARDS conventions.
312 """
313 def __init__(self, datatype, filename, altitude=0.):
314 """
315 :param filename: file name for survey data in netCDF format
316 :type filename: ``str``
317 :param datatype: type of data, must be `GRAVITY` or `MAGNETIC`
318 :type datatype: ``int``
319 :param altitude: altitude of measurements in meters
320 :type datatype: ``float``
321 """
322 super(NetCdfData,self).__init__()
323 self.__filename=filename
324 if not datatype in [self.GRAVITY,self.MAGNETIC]:
325 raise ValueError("Invalid value for datatype parameter")
326 self.__datatype=datatype
327 self.__altitude=altitude
328 self.__readMetadata()
329
330 def __readMetadata(self):
331 self.logger.debug("Checking Data Source: %s"%self.__filename)
332 f=netcdf_file(self.__filename, 'r')
333 NX=0
334 for n in ['lon','longitude','x']:
335 if n in f.dimensions:
336 NX=f.dimensions[n]
337 break
338 if NX==0:
339 raise RuntimeError("Could not determine extents of data")
340 NY=0
341 for n in ['lat','latitude','y']:
342 if n in f.dimensions:
343 NY=f.dimensions[n]
344 break
345 if NY==0:
346 raise RuntimeError("Could not determine extents of data")
347
348 # find longitude and latitude variables
349 lon_name=None
350 for n in ['lon','longitude']:
351 if n in f.variables:
352 lon_name=n
353 longitude=f.variables.pop(n)
354 break
355 if lon_name is None:
356 raise RuntimeError("Could not determine longitude variable")
357 lat_name=None
358 for n in ['lat','latitude']:
359 if n in f.variables:
360 lat_name=n
361 latitude=f.variables.pop(n)
362 break
363 if lat_name is None:
364 raise RuntimeError("Could not determine latitude variable")
365
366 # try to figure out data variable name
367 data_name=None
368 if len(f.variables)==1:
369 data_name=f.variables.keys()[0]
370 else:
371 for n in f.variables.keys():
372 dims=f.variables[n].dimensions
373 if (lat_name in dims) and (lon_name in dims):
374 data_name=n
375 break
376 if data_name is None:
377 raise RuntimeError("Could not determine data variable")
378
379 # try to determine value for unused data
380 if hasattr(f.variables[data_name], 'missing_value'):
381 maskval = float(f.variables[data_name].missing_value)
382 elif hasattr(f.variables[data_name], '_FillValue'):
383 maskval = float(f.variables[data_name]._FillValue)
384 else:
385 self.logger.debug("missing_value attribute not found, using default.")
386 maskval = 99999
387
388 # try to determine units of data - this is disabled for now
389 #if hasattr(f.variables[data_name], 'units'):
390 # units=f.variables[data_name].units
391 if self.__datatype == self.GRAVITY:
392 self.logger.info("Assuming gravity data scale is 1e-6 m/s^2.")
393 self.__scalefactor = 1e-6
394 else:
395 self.logger.info("Assuming magnetic data units are 'nT'.")
396 self.__scalefactor = 1e-9
397
398 # see if there is a wkt string to convert coordinates
399 try:
400 wkt_string=f.variables[data_name].esri_pe_string
401 except:
402 wkt_string=None
403
404 # we don't trust actual_range & geospatial_lon_min/max since subset
405 # data does not seem to have these fields updated.
406 # Getting min/max from the arrays is obviously not very efficient but..
407 #lon_range=longitude.actual_range
408 #lat_range=latitude.actual_range
409 #lon_range=[f.geospatial_lon_min,f.geospatial_lon_max]
410 #lat_range=[f.geospatial_lat_min,f.geospatial_lat_max]
411 lon_range=longitude.data.min(),longitude.data.max()
412 lat_range=latitude.data.min(),latitude.data.max()
413 lon_range,lat_range=LatLonToUTM(lon_range, lat_range, wkt_string)
414 lengths=[lon_range[1]-lon_range[0], lat_range[1]-lat_range[0]]
415 f.close()
416
417 self.__nPts=[NX, NY]
418 self.__origin=[lon_range[0],lat_range[0]]
419 # we are rounding to avoid interpolation issues
420 self.__delta=[np.round(lengths[i]/self.__nPts[i]) for i in range(2)]
421 #self.__wkt_string=wkt_string
422 #self.__lon_name=lon_name
423 #self.__lat_name=lat_name
424 self.__data_name=data_name
425 self.__maskval=maskval
426
427 def getDataExtents(self):
428 """
429 returns ( (x0, y0), (nx, ny), (dx, dy) )
430 """
431 return (list(self.__origin), list(self.__nPts), list(self.__delta))
432
433 def getDataType(self):
434 return self.__datatype
435
436 def getSurveyData(self, domain, origin, NE, spacing):
437 nValues=self.__nPts
438 # determine base location of this dataset within the domain
439 first=[int((self.__origin[i]-origin[i])/spacing[i]) for i in range(len(self.__nPts))]
440 if domain.getDim()==3:
441 first.append(int((self.__altitude-origin[2])/spacing[2]))
442 nValues=nValues+[1]
443
444 data=ripleycpp._readNcGrid(self.__filename, self.__data_name,
445 ReducedFunction(domain), first, nValues, (), self.__maskval)
446 sigma=whereNonZero(data-self.__maskval)
447 data=data*self.__scalefactor
448 sigma=sigma * 2. * self.__scalefactor
449 return data, sigma
450
451
452 ##############################################################################
453 class SourceFeature(object):
454 """
455 A feature adds a density distribution to (parts of) a domain of a synthetic
456 data source, for example a layer of a specific rock type or a simulated
457 ore body.
458 """
459 def getValue(self):
460 """
461 Returns the value for the area covered by mask. It can be constant
462 or a data object with spatial dependency.
463 """
464 raise NotImplementedError
465 def getMask(self, x):
466 """
467 Returns the mask of the area of interest for this feature. That is,
468 mask is non-zero where the density returned by getDensity() should be
469 applied, zero elsewhere.
470 """
471 raise NotImplementedError
472
473 class SmoothAnomaly(SourceFeature):
474 def __init__(self, lx, ly, lz, x, y, depth, v_inner=None, v_outer=None):
475 self.x=x
476 self.y=y
477 self.lx=lx
478 self.ly=ly
479 self.lz=lz
480 self.depth=depth
481 self.v_inner=v_inner
482 self.v_outer=v_outer
483 self.value=None
484 self.mask=None
485
486 def getValue(self,x):
487 if self.value is None:
488 if self.v_outer is None or self.v_inner is None:
489 self.value=0
490 else:
491 DIM=x.getDomain().getDim()
492 alpha=-log(abs(self.v_outer/self.v_inner))*4
493 value=exp(-alpha*((x[0]-self.x)/self.lx)**2)
494 value=value*exp(-alpha*((x[DIM-1]+self.depth)/self.lz)**2)
495 self.value=maximum(abs(self.v_outer), abs(self.v_inner*value))
496 if self.v_inner<0: self.value=-self.value
497
498 return self.value
499
500 def getMask(self, x):
501 DIM=x.getDomain().getDim()
502 m=whereNonNegative(x[DIM-1]+self.depth+self.lz/2) * whereNonPositive(x[DIM-1]+self.depth-self.lz/2) \
503 *whereNonNegative(x[0]-(self.x-self.lx/2)) * whereNonPositive(x[0]-(self.x+self.lx/2))
504 if DIM>2:
505 m*=whereNonNegative(x[1]-(self.y-self.ly/2)) * whereNonPositive(x[1]-(self.y+self.ly/2))
506 self.mask = m
507 return m
508
509 ##############################################################################
510 class SyntheticData(DataSource):
511 def __init__(self, datatype, DIM, NE, l, features):
512 super(SyntheticData,self).__init__()
513 if not datatype in [self.GRAVITY,self.MAGNETIC]:
514 raise ValueError("Invalid value for datatype parameter")
515 self.__datatype = datatype
516 self._features = features
517 self.__origin = [0]*(DIM-1)
518 self.__nPts = [NE]*(DIM-1)
519 self.__delta = [float(l)/NE]*(DIM-1)
520 self.DIM=DIM
521 self.NE=NE
522 self.l=l
523
524 def getDataExtents(self):
525 return (list(self.__origin), list(self.__nPts), list(self.__delta))
526
527 def getDataType(self):
528 return self.__datatype
529
530 def getReferenceDensity(self):
531 """
532 Returns the reference density Data object that was used to generate
533 the gravity anomaly data.
534 """
535 return self._rho
536
537 def getReferenceSusceptibility(self):
538 """
539 Returns the reference magnetic susceptibility Data objects that was
540 used to generate the magnetic field data.
541 """
542 return self._k
543
544 def getSurveyData(self, domain, origin, NE, spacing):
545 pde=LinearSinglePDE(domain)
546 G=U.Gravitational_Constant
547 m_psi_ref=0.
548 x=domain.getX()
549 DIM=domain.getDim()
550 m_psi_ref=whereZero(x[DIM-1]-sup(x[DIM-1])) # + whereZero(x[DIM-1]-inf(x[DIM-1]))
551 if self.getDataType()==DataSource.GRAVITY:
552 rho_ref=0.
553 for f in self._features:
554 m=f.getMask(x)
555 rho_ref = rho_ref * (1-m) + f.getValue(x) * m
556 self._rho=rho_ref
557 pde.setValue(A=kronecker(domain), Y=-4*np.pi*G*rho_ref, q=m_psi_ref)
558 else:
559 k_ref=0.
560 for f in self._features:
561 m=f.getMask(x)
562 k_ref = k_ref * (1-m) + f.getValue(x) * m
563 self._k=k_ref
564 B_b = self.getBackgroundMagneticField()
565 pde.setValue(A=kronecker(domain), X=(1+k_ref)*B_b, q=m_psi_ref)
566 pde.setSymmetryOn()
567 psi_ref=pde.getSolution()
568 del pde
569 if self.getDataType()==DataSource.GRAVITY:
570 data = -grad(psi_ref, ReducedFunction(domain))
571 else:
572 data = (1+self._k)*B_b-grad(psi_ref, ReducedFunction(domain))
573
574 sigma=1.
575 # limit mask to non-padding in horizontal area
576 x=ReducedFunction(domain).getX()
577 for i in range(self.DIM-1):
578 sigma=sigma * wherePositive(x[i]) \
579 * whereNegative(x[i]-(sup(x[i])+inf(x[i])))
580 # limit mask to one cell thickness at z=0
581 sigma = sigma * whereNonNegative(x[self.DIM-1]) \
582 * whereNonPositive(x[self.DIM-1]-spacing[self.DIM-1])
583 return data,sigma
584
585 def getBackgroundMagneticField(self):
586 #FIXME:
587 latitude=-28.0
588 theta = (90-latitude)/180.*np.pi
589 B_0=U.Mu_0 * U.Magnetic_Dipole_Moment_Earth / (4 * np.pi * U.R_Earth**3)
590 B_theta= B_0 * sin(theta)
591 B_r= 2 * B_0 * cos(theta)
592 if self.DIM<3:
593 return np.array([0., -B_r])
594 else:
595 return np.array([-B_theta, 0., -B_r])
596

  ViewVC Help
Powered by ViewVC 1.1.26