/[escript]/trunk/doc/inversion/DataSources.tex
ViewVC logotype

Contents of /trunk/doc/inversion/DataSources.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4376 - (show annotations)
Mon Apr 22 08:44:45 2013 UTC (5 years, 11 months ago) by gross
File MIME type: application/x-tex
File size: 23349 byte(s)
domumentation update on coordinate systems - part I
1 \chapter{Data Sources}\label{Chp:ref:data sources}
2
3 At the source of every inversion is data in the form of gravity anomaly or
4 magnetic flux density values for at least a part of the region of interest.
5 These usually come from surveys and are preprocessed to correct for various
6 factors and distortions.
7 This chapter provides an overview of the classes related to data input for
8 inversions.
9
10 \section{Overview}
11 The inversion module comes with a number of classes that can read gridded
12 (raster) data on a 2-dimensional plane from file or provide artificial values
13 for testing purposes. These classes all derive from the abstract
14 \class{DataSource} class and override methods that return information about
15 the data and the values themselves.
16 The \class{DomainBuilder} class is responsible for creating an \escript domain
17 with a suitable grid spacing and spatial extents that include all data sources
18 attached to it (see Figure~\ref{fig:domainBuilder}).
19 %
20 \begin{figure}[ht]
21 \centering\includegraphics{domainbuilder}
22 \caption{\class{DataSource} instances are added to a \class{DomainBuilder}
23 which creates a suitable domain and \Data objects for the inversion}
24 \label{fig:domainBuilder}
25 \end{figure}
26 %
27 Notice that in the figure there are cells in the region of interest that are
28 not covered by any data source instance.
29 Ideally, all data sources used for an inversion have the same spatial resolution
30 and are spatially adjacent so that all cells have a value but this is not a
31 requirement.
32
33
34 \section{Domain Builder}\label{Chp:ref:domain builder}
35 Every inversion requires one \class{DomainBuilder} instance which creates and
36 holds a reference to the \escript domain as well as associated \Data objects for
37 the input data used for the inversion.
38 The class has the following public methods:
39
40 \begin{classdesc}{DomainBuilder}{\optional{dim=3, }\optional{reference_system=\None}}
41 Constructor for the domain builder. \member{dim} sets the dimensionality of the
42 target domain and must be 2 or 3. By default a 3-dimensional domain is created.
43 \member{reference_system} defines the reference coordinate system to be used, see Section~\ref{sec:ref:reference systems}.
44 If not present the Cartesian reference coordinate system is used.
45 \end{classdesc}
46
47 \begin{methoddesc}[DomainBuilder]{addSource}{source}
48 adds survey data \member{source} (a \class{DataSource} object) to the domain
49 builder. The dimensionality of the data must be less than or equal to the
50 domain dimensionality. The reference coordinate system of the \class{DataSource}
51 must match the reference coordinate system of this class.
52 The \member{source} must be defined for the same reference coordinate system to be used, see Section~\ref{sec:ref:reference systems}.
53 \end{methoddesc}
54
55 \begin{methoddesc}[DomainBuilder]{setVerticalExtents}{%
56 \optional{depth=40000.}%
57 \optional{, air_layer=10000.}%
58 \optional{, num_cells=25}}
59 sets the parameters for the vertical dimension of the domain. The parameter
60 \member{depth} specifies the thickness in meters of the subsurface layer
61 ($-x_2^{min}$ in Figure~\ref{fig:cartesianDomain}).
62 The default value of $40$ km is usually appropriate. Similarly, the
63 \member{air_layer} parameter defines the buffer zone thickness above the surface
64 ($x_2^{max}$ in Figure~\ref{fig:cartesianDomain}) which should be a few
65 kilometres to avoid artefacts in the inversion.
66 The number of elements (or cells) in the vertical dimension is set with the
67 \member{num_cells} parameter. Consider the size and resolution of your datasets,
68 the total vertical length (=\member{depth}+\member{air_layer}) and available
69 compute resources when setting this value.
70 \end{methoddesc}
71
72 \begin{methoddesc}[DomainBuilder]{setFractionalPadding}{%
73 \optional{pad_x=\None}%
74 \optional{, pad_y=\None}}
75 sets the amount of padding around the dataset as a fraction of the dataset side
76 lengths if the reference coordinate system is Cartesian.
77 For example, calling \member{setFractionalPadding(0.2, 0.1)} with a data source
78 of size $10000 \times 20000$ meters will result in the padded data set size $14000 \times 24000$ meters
79 (that is $10000 \times (1+2 \times 0.2)$ and $20000 \times (1+2 \times 0.1)$).
80 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
81 domains.
82 \end{methoddesc}
83
84 \begin{methoddesc}[DomainBuilder]{setFractionalPadding}{%
85 \optional{pad_lat=\None}%
86 \optional{, pad_lon=\None}}
87 sets the amount of padding around the dataset as a fraction of the dataset side
88 lengths if the reference coordinate system is not Cartesian.
89 For example, calling \member{setFractionalPadding(0.2, 0.1)} with a data source
90 of size $10 \times 20$ degree will result in the padded data set size $14 \times 24$ degree
91 (that is $10 \times (1+2 \times 0.2)$ and $20 \times (1+2 \times 0.1)$).
92 By default no padding is applied and \member{pad_lon} is ignored for 2-dimensional
93 domains.
94 \end{methoddesc}
95
96 \begin{methoddesc}[DomainBuilder]{setPadding}{%
97 \optional{pad_x=\None}%
98 \optional{, pad_y=\None}}
99 sets the amount of padding around the dataset in absolute length units.
100 The final domain size will be the length in x (in y) of the dataset plus twice
101 the value of \member{pad_x} (\member{pad_y}). The arguments must be non-negative.
102 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
103 domains. This function can be used for Cartesian reference coordinate system only.
104 \end{methoddesc}
105
106 \begin{methoddesc}[DomainBuilder]{setElementPadding}{%
107 \optional{pad_x=\None}%
108 \optional{, pad_y=\None}}
109 sets the amount of padding around the dataset in number of elements (cells),
110 if the reference coordinate system is Cartesian.
111 When the domain is constructed \member{pad_x} (\member{pad_y}) elements are
112 added on each side of the x- (y-) dimension in case of a Cartesian reference system. The arguments must be non-negative integers.
113 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
114 domains.
115 \end{methoddesc}
116
117 \begin{methoddesc}[DomainBuilder]{setElementPadding}{%
118 \optional{pad_lat=\None}%
119 \optional{, pad_lon=\None}}
120 sets the amount of padding around the dataset in number of elements (cells) if the the reference coordinate system is not Cartesian.
121 When the domain is constructed \member{pad_lat} (\member{pad_lon}) elements are
122 added on each side of the latitudinal (longitudinal) dimension. The arguments must be non-negative integers.
123 By default no padding is applied and \member{pad_lon} is ignored for 2-dimensional
124 domains.
125 \end{methoddesc}
126
127 \begin{methoddesc}[DomainBuilder]{fixDensityBelow}{%
128 \optional{depth=\None}}
129 defines the depth below which the density anomaly is fixed to zero.
130 This method is only useful for inversions that involve gravity data.
131 \end{methoddesc}
132
133 \begin{methoddesc}[DomainBuilder]{fixSusceptibilityBelow}{%
134 \optional{depth=\None}}
135 defines the depth below which the susceptibility anomaly is fixed to zero.
136 This method is only useful for inversions that involve magnetic data.
137 \end{methoddesc}
138
139 \begin{methoddesc}[DomainBuilder]{getGravitySurveys}{}
140 returns a list of all gravity surveys added to the domain builder. See
141 \member{getSurveys()} for more details.
142 \end{methoddesc}
143
144 \begin{methoddesc}[DomainBuilder]{getMagneticSurveys}{}
145 returns a list of all magnetic surveys added to the domain builder. See
146 \member{getSurveys()} for more details.
147 \end{methoddesc}
148
149 \begin{methoddesc}[DomainBuilder]{getSurveys}{datatype}
150 returns a list of surveys of type \member{datatype} available to this domain
151 builder. In the current implementation each survey is a tuple of two \Data
152 objects, the first containing anomaly values and the second standard error
153 values for the survey.
154 \end{methoddesc}
155
156 \begin{methoddesc}[DomainBuilder]{getDomain}{}
157 returns an \escript domain (see~\cite{ESCRIPT}) suitable for running inversions
158 on the attached data sources.
159 The first time this method is called the target parameters (such as resolution,
160 extents and number of elements) are computed, and the domain is created.
161 Subsequent calls return the same domain instance so calls to
162 \member{setPadding()}, \member{addSource()} and other methods that influence
163 the domain will fail once \member{getDomain()} is called the first time.
164 \end{methoddesc}
165
166 \begin{methoddesc}[DomainBuilder]{getReferenceSystem}{}
167 returns the reference coordinate system
168 \end{methoddesc}
169
170 \begin{methoddesc}[DomainBuilder]{setBackgroundMagneticFluxDensity}{B}
171 sets the background magnetic flux density $B=(B_{North},B_{East},B_{Vertical})$
172 which is required for magnetic inversions.
173 $B_{East}$ is ignored for 2-dimensional magnetic inversions.
174 \end{methoddesc}
175
176 \begin{methoddesc}[DomainBuilder]{getBackgroundMagneticFluxDensity}{}
177 returns the background magnetic flux density $B$ set via
178 \member{setBackgroundMagneticFluxDensity()} in a form suitable for the inversion.
179 There should be no need to call this method directly.
180 \end{methoddesc}
181
182 \begin{methoddesc}[DomainBuilder]{getSetDensityMask}{}
183 returns the density mask \Data object which is non-zero for cells that have a
184 fixed density value, zero otherwise.
185 There should be no need to call this method directly.
186 \end{methoddesc}
187
188 \begin{methoddesc}[DomainBuilder]{getSetSusceptibilityMask}{}
189 returns the susceptibility mask \Data object which is non-zero for cells that
190 have a fixed susceptibility value, zero otherwise.
191 There should be no need to call this method directly.
192 \end{methoddesc}
193
194 \section{\class{DataSource} Class}\label{sec:ref:DataSource}
195
196 Data sources added to a \class{DomainBuilder} must provide an implementation for
197 a few methods as described in the class template \class{DataSource} from
198 the \module{esys.downunder.datasources} module:
199
200 \begin{classdesc}{DataSource}{\optional{reference_system=\None}}
201 Base constructor which initializes members and should therefore be invoked by
202 subclasses. Subclasses may then use the member \member{logger} to print any
203 output. \member{reference_system} defines the reference coordinate system to be used, see Section~\ref{sec:ref:reference systems}.
204 If not present the Cartesian reference coordinate system is used.
205 \end{classdesc}
206
207 \begin{methoddesc}[DataSource]{getDataExtents}{}
208 This method should be implemented to return a tuple of tuples
209 ( (x0, y0), (nx, ny), (dx, dy) ), where (x0, y0) denote the UTM coordinates of
210 the data origin, (nx, ny) the number of data points, and (dx, dy) the spacing
211 of data points in a Cartesian reference system is used. Otherwise
212 the data origin and the spacing refer to latitudinal and longitudinal coordinates.
213 \end{methoddesc}
214
215 \begin{methoddesc}[DataSource]{getDataType}{}
216 Subclasses must return \class{DataSource}.\member{GRAVITY} or
217 \class{DataSource}.\member{MAGNETIC} depending on the type of data they provide.
218 \end{methoddesc}
219
220 \begin{methoddesc}[DataSource]{getSurveyData}{domain, origin, NE, spacing}
221 This method is called by the \class{DomainBuilder} to retrieve the actual survey
222 data in the form of \Data objects on the \member{domain}.
223 Data sources are responsible to map or interpolate their data onto the domain
224 which has been constructed to fit the data.
225 The domain \member{origin}, number of elements \member{NE} and element
226 \member{spacing} are provided as tuples or lists to aid with interpolation.
227 \end{methoddesc}
228
229 \begin{methoddesc}[DataSource]{getUtmZone}{}
230 Must be implemented to return the UTM zone that corresponds to the location of
231 this data set as returned by \member{getDataExtents}.
232 \end{methoddesc}
233
234 \begin{methoddesc}[DataSource]{setSubsamplingFactor}{factor}
235 Notifies the data source that data should be subsampled by \member{factor}.
236 This method does not need to be overwritten.
237 See \member{getSubsamplingFactor()} for an explanation.
238 \end{methoddesc}
239
240 \begin{methoddesc}[DataSource]{getSubsamplingFactor}{}
241 Returns the subsampling factor which was set by \member{setSubsamplingFactor()}
242 or $1$ which indicates that no subsampling is requested.
243 Data sources that support subsampling (or interleaving) of their data should use
244 this method to query the subsampling factor before returning surveys via
245 \member{getSurveyData}. If supported, the factor should be applied in all
246 dimensions. For example, a 2-dimensional dataset with 300 x 150 data points
247 should be reduced to 150 x 75 data points when the subsampling factor equals $2$.
248 Subsampling becomes important when the survey data resolution is too fine or
249 when using data with varying resolution in one inversion.
250 Note that data sources may choose to ignore the subsampling factor if they
251 don't support it.
252 \end{methoddesc}
253
254 \begin{methoddesc}[DataSource]{getReferenceSystem}{}
255 returns the reference coordinate system
256 \end{methoddesc}
257
258 \vspace{1em}\noindent The \module{esys.downunder.datasources} module contains the following helper
259 functions:
260
261 \begin{funcdesc}{LatLonToUTM}{longitude, latitude%
262 \optional{, wkt_string=\None}}
263 converts one or more (longitude,latitude) pairs to the corresponding (x,y)
264 coordinates in the \emph{Universal Transverse Mercator} (UTM) projection.
265 This function requires the \module{pyproj} module for conversion and the
266 \module{gdal} module to parse the \member{wkt_string} parameter if supplied.
267 The \member{wkt_string} parameter may describe the coordinate system used
268 for the input values as a \emph{Well-known Text} (WKT) string.
269 \end{funcdesc}
270
271 \subsection{ER Mapper Raster Data}\label{sec:ref:DataSource:ERM}
272 \emph{ER Mapper} files that contain 2-dimensional raster data may be used for
273 inversions through the \class{ErMapperData} class which is derived from
274 \class{DataSource}. Date are given in latitudinal and longitudinal coordinates and if
275 a Cartesian reference coordinate system is used are mapped using the appropriate (UTM) projection.
276 Generally, these datasets contain two files~\cite{ERMAPPER}, a header file and a data file.
277 The former usually has the \texttt{.ers} file extension and is a text file that
278 describes the data format, size, coordinate system used etc.
279 The data file usually has the same file name but no extension.
280 Note, that the current implementation may not work with all \emph{ER Mapper}
281 datasets. For example, the only cell type understood is \emph{IEEE4ByteReal}
282 at the moment.
283 To run inversions on a \emph{ER Mapper} dataset use the following constructor:
284 \begin{classdesc}{ErMapperData}{data_type, headerfile%
285 \optional{, datafile=\None}%
286 \optional{, altitude=0.}%
287 \optional{, error=\None}%
288 \optional{, scale_factor=\None}%
289 \optional{, null_value=\None}
290 \optional{, reference_system=\None}}
291 Creates a new data source from \emph{ER Mapper} data.
292 The parameter \member{data_type} must be one of
293 \class{DataSource}.\member{GRAVITY} or \class{DataSource}.\member{MAGNETIC}
294 depending on the type of data, \member{headerfile} is the name of the header
295 file while \member{datafile} specifies the name of the data file.
296 The parameter \member{datafile} can be left blank if the name is identical to
297 the header file except for the file extension.
298 The \member{altitude} parameter can be used to shift a 2-dimensional slice of
299 data vertically within a 3-dimensional domain.
300 Use \member{error} to set the (constant) measurement error with the same units
301 used by the measurements. By default a value of $2$ units is assumed which
302 equals $0.2 \; mgal$ or $2 \; nT$ depending on the data type.
303 Since ER Mapper files do not store any information about data units or scale
304 the \member{scale_factor} may be used to provide this information.
305 If not set, gravity data is assumed to be given in $\frac{\mu m}{sec^2}$ while
306 magnetic data is assumed to be given in $nT$.
307 Finally, the \member{null_value} parameter can be used to override the value
308 for the areas to be ignored (see Section~\ref{SEC:P1:GRAV:REMARK:DATAHOLES})
309 which is usually provided in the ER Mapper header file.
310 \member{reference_system} defines the reference coordinate system to be used, see Section~\ref{sec:ref:reference systems}.
311 If not present the Cartesian reference coordinate system is used. In the current implementation any
312 coordinate system specified in the file is ignored.
313 \end{classdesc}
314
315 \subsection{NetCDF Data}\label{sec:ref:DataSource:NETCDF}
316 The \class{NetCdfData} class from the \module{esys.downunder.datasources} module
317 provides the means to use data from \netcdf files~\cite{NETCDF} for inversion.
318 Currently, files that follow the \emph{Climate and Forecast (CF)}\footnote{%
319 \url{http://cf-pcmdi.llnl.gov/documents/cf-conventions/latest-cf-conventions-document-1}}
320 and/or the \emph{Cooperative Ocean/Atmosphere Research Data Service (COARDS)}\footnote{%
321 \url{http://ferret.wrc.noaa.gov/noaa_coop/coop_cdf_profile.html}} metadata
322 conventions are supported.
323 The example script \examplefile{create_netcdf.py} demonstrates how a compatible
324 file can be generated from within \Python (provided the \module{scipy} module
325 is available).
326 To plot such an input file including coordinates and legend using
327 \emph{matplotlib}~\cite{matplotlib} see the script \examplefile{show_netcdf.py}.
328 The interface to \class{NetCdfData} looks as follows:
329
330 \begin{classdesc}{NetCdfData}{datatype, filename%
331 \optional{, altitude=0.}%
332 \optional{, data_variable=\None}%
333 \optional{, error=\None}%
334 \optional{, scale_factor=\None}%
335 \optional{, null_value=\None}
336 \optional{, reference_system=\None}}
337 Creates a new data source from compatible \netcdf data.
338 The two required parameters are \member{datatype}, which must be one of
339 \class{DataSource}.\member{GRAVITY} or \class{DataSource}.\member{MAGNETIC}
340 depending on the type of data, and \member{filename} which is the name of the
341 file containing the data.
342 The \member{altitude} parameter can be used to shift a 2-dimensional slice of
343 data vertically within a 3-dimensional domain.
344 Set the \member{data_variable} parameter to the name of the \netcdf variable
345 that contains the measurements unless there is only one data variable in the
346 file in which case the parameter can be left empty.
347 Use \member{error} to set the (constant) measurement error with the same units
348 used by the measurements or the name of the \netcdf variable that contains this
349 information. By default a value of $2$ units is assumed which equals
350 $0.2 \; mgal$ or $2 \; nT$ depending on the data type.
351 The current implementation does not use the units attribute (if available)
352 within the \netcdf file. Use the \member{scale_factor} argument to provide this
353 information instead.
354 If not set, gravity data is assumed to be given in $\frac{\mu m}{sec^2}$ while
355 magnetic data is assumed to be given in $nT$.
356 Finally, the \member{null_value} parameter can be used to override the value
357 for the areas to be ignored (see Section~\ref{SEC:P1:GRAV:REMARK:DATAHOLES})
358 which is usually provided in the \netcdf file. \member{reference_system} defines the reference coordinate system to be used, see Section~\ref{sec:ref:reference systems}.
359 If not present the Cartesian reference coordinate system is used. In the current implementation any
360 coordinate system specified in the file is ignored.
361 \end{classdesc}
362
363 \subsection{Synthetic Data}
364 As a special case the \module{esys.downunder.datasources} module contains
365 classes to generate input data for inversions by solving a forward model with
366 user-defined reference data.
367 The main purpose of using synthetic data is to test the capabilities of the
368 inversion module or for tracking down problems.
369
370 The base class for synthetic data which is derived from \class{DataSource}
371 has the following interface:
372
373 \begin{classdesc}{SyntheticDataBase}{datatype%
374 \optional{, DIM=2}
375 \optional{, number_of_elements=10}
376 \optional{, length=1*U.km}def SphericalReferenceSystem(R=6378137.0*U.m):
377 \optional{, B_b=\None}
378 \optional{, data_offset=0}
379 \optional{, full_knowledge=\False}}
380 Base class to define reference data based on a given property distribution
381 (density or susceptibility). Data are collected from a square region of
382 vertical extent \member{length} on a grid with \member{number_of_elements}
383 cells in each direction.
384 The synthetic data are constructed by solving the appropriate forward problem.
385 Data can be sampled with an offset from the surface at $z=0$ or using the
386 entire subsurface region.
387 \end{classdesc}
388
389 \vspace{1em}\noindent The only additional method which needs to be implemented
390 in subclasses is
391 \begin{methoddesc}[SyntheticDataBase]{getReferenceProperty}{
392 \optional{domain=\None}}
393 Returns the reference \Data object that was used to generate the gravity or
394 susceptibility anomaly data. The \member{domain} argument must be present
395 when this method is called for the first time but not necessarily in
396 subsequent calls.
397 \end{methoddesc}
398
399 \vspace{1em}\noindent Two synthetic data providers are currently available.
400 The class \class{SyntheticData} defines synthetic gravity or magnetic anomaly
401 data based on a harmonic
402 \begin{equation}\label{eq:synthdata}
403 k=A\cdot sin\left(\pi \frac{n_D}{D} (z+\Delta z)\right) \cdot%
404 sin\left(\pi \frac{n_L}{L} (x - x_0)\right)
405 \end{equation}
406 where $A$ is the amplitude, $n_D$, $n_L$ denote the number of oscillations in
407 the vertical and lateral direction, respectively, while $D$ and $L$ is the
408 depth and side length for the data, respectively.
409 The second data provider class is \class{SyntheticFeatureData} which takes a
410 list of \class{SourceFeature} objects that define the anomaly and is thus more
411 generic.
412 The constructors are defined as follows:
413
414 \begin{classdesc}{SyntheticData}{datatype%
415 \optional{, n_length=1}
416 \optional{, n_depth=1}
417 \optional{, depth_offset=0.}
418 \optional{, depth=\None}
419 \optional{, amplitude=\None}
420 \optional{, DIM=2}
421 \optional{, number_of_elements=10}
422 \optional{, length=1*U.km}
423 \optional{, B_b=\None}
424 \optional{, data_offset=0}
425 \optional{, full_knowledge=\False}
426 \optional{, spherical=\False}}
427 The arguments \member{n_length}, \member{n_depth}, \member{depth_offset},
428 \member{depth}, and \member{amplitude} correspond to the respective symbols in
429 Equation~\ref{eq:synthdata}. The remaining arguments are passed to the parent
430 class (\class{SyntheticDataBase}) and described there.
431 If \member{depth} is not set the depth of the domain is used.
432 The argument \member{amplitude} may be left unset as well in which case a
433 default value is used ($200 \frac{kg}{m^3}$ for gravity and $0.1$ for magnetic
434 data).
435 \end{classdesc}
436
437 \begin{classdesc}{SyntheticFeatureData}{datatype, features%
438 \optional{, DIM=2}
439 \optional{, number_of_elements=10}
440 \optional{, length=1*U.km}
441 \optional{, B_b=\None}
442 \optional{, data_offset=0}
443 \optional{, full_knowledge=\False}
444 \optional{, spherical=\False}}
445 The only new argument is \member{features} which is a list of
446 \class{SourceFeature} objects to be included in the data preparation.
447 All other arguments are passed on to the parent class (see there).
448 \end{classdesc}
449
450 \begin{classdesc}{SourceFeature}{}
451 A feature adds a density/susceptibility distribution to (parts of) a domain of
452 a synthetic data source, for example a layer of a specific rock type or a
453 simulated ore body. This base class is empty and only provides the skeleton
454 for subclasses which need to implement the following two methods:
455 \end{classdesc}
456
457 \begin{methoddesc}[SourceFeature]{getValue}{}
458 Returns the value for the area covered by mask. It can be constant or a \Data
459 object with spatial dependency.
460 \end{methoddesc}
461
462 \begin{methoddesc}[SourceFeature]{getMask}{x}
463 Returns the mask of the region of interest for this feature. That is, mask is
464 non-zero where the value returned by \member{getValue()} should be applied,
465 zero elsewhere.
466 \end{methoddesc}
467

  ViewVC Help
Powered by ViewVC 1.1.26