/[escript]/branches/doubleplusgood/doc/inversion/DataSources.tex
ViewVC logotype

Contents of /branches/doubleplusgood/doc/inversion/DataSources.tex

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4345 - (show annotations)
Fri Mar 29 07:09:41 2013 UTC (5 years, 11 months ago) by jfenwick
File MIME type: application/x-tex
File size: 20086 byte(s)
Spelling fixes
1 \chapter{Data Sources}\label{Chp:ref:data sources}
2
3 At the source of every inversion is data in the form of gravity anomaly or
4 magnetic flux density values for at least a part of the region of interest.
5 These usually come from surveys and are preprocessed to correct for various
6 factors and distortions.
7 This chapter provides an overview of the classes related to data input for
8 inversions.
9
10 \section{Overview}
11 The inversion module comes with a number of classes that can read gridded
12 (raster) data on a 2-dimensional plane from file or provide artificial values
13 for testing purposes. These classes all derive from the abstract
14 \class{DataSource} class and override methods that return information about
15 the data and the values themselves.
16 The \class{DomainBuilder} class is responsible for creating an \escript domain
17 with a suitable grid spacing and spatial extents that include all data sources
18 attached to it (see Figure~\ref{fig:domainBuilder}).
19 %
20 \begin{figure}[ht]
21 \centering\includegraphics{domainbuilder}
22 \caption{\class{DataSource} instances are added to a \class{DomainBuilder}
23 which creates a suitable domain and \Data objects for the inversion}
24 \label{fig:domainBuilder}
25 \end{figure}
26 %
27 Notice that in the figure there are cells in the region of interest that are
28 not covered by any data source instance.
29 Ideally, all data sources used for an inversion have the same spatial resolution
30 and are spatially adjacent so that all cells have a value but this is not a
31 requirement.
32
33
34 \section{Domain Builder}\label{Chp:ref:domain builder}
35 Every inversion requires one \class{DomainBuilder} instance which creates and
36 holds a reference to the \escript domain as well as associated \Data objects for
37 the input data used for the inversion.
38 The class has the following public methods:
39
40 \begin{classdesc}{DomainBuilder}{\optional{dim=3}}
41 Constructor for the domain builder. \member{dim} sets the dimensionality of the
42 target domain and must be 2 or 3. By default a 3-dimensional domain is created.
43 \end{classdesc}
44
45 \begin{methoddesc}[DomainBuilder]{addSource}{source}
46 adds survey data \member{source} (a \class{DataSource} object) to the domain
47 builder. The dimensionality of the data must be less than or equal to the
48 domain dimensionality.
49 \end{methoddesc}
50
51 \begin{methoddesc}[DomainBuilder]{setVerticalExtents}{%
52 \optional{depth=40000.}%
53 \optional{, air_layer=10000.}%
54 \optional{, num_cells=25}}
55 sets the parameters for the vertical dimension of the domain. The parameter
56 \member{depth} specifies the thickness in meters of the subsurface layer
57 ($-x_2^{min}$ in Figure~\ref{fig:cartesianDomain}).
58 The default value of $40$ km is usually appropriate. Similarly, the
59 \member{air_layer} parameter defines the buffer zone thickness above the surface
60 ($x_2^{max}$ in Figure~\ref{fig:cartesianDomain}) which should be a few
61 kilometres to avoid artefacts in the inversion.
62 The number of elements (or cells) in the vertical dimension is set with the
63 \member{num_cells} parameter. Consider the size and resolution of your datasets,
64 the total vertical length (=\member{depth}+\member{air_layer}) and available
65 compute resources when setting this value.
66 \end{methoddesc}
67
68 \begin{methoddesc}[DomainBuilder]{setFractionalPadding}{%
69 \optional{pad_x=\None}%
70 \optional{, pad_y=\None}}
71 sets the amount of padding around the dataset as a fraction of the dataset side
72 lengths.
73 For example, calling \member{setFractionalPadding(0.2, 0.1)} with a data source
74 of size $10 \times 20$ will result in the padded data set size $14 \times 24$
75 (that is $10 \times (1+2 \times 0.2)$ and $20 \times (1+2 \times 0.1)$).
76 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
77 domains.
78 \end{methoddesc}
79
80 \begin{methoddesc}[DomainBuilder]{setPadding}{%
81 \optional{pad_x=\None}%
82 \optional{, pad_y=\None}}
83 sets the amount of padding around the dataset in absolute length units.
84 The final domain size will be the length in x (in y) of the dataset plus twice
85 the value of \member{pad_x} (\member{pad_y}). The arguments must be non-negative.
86 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
87 domains.
88 \end{methoddesc}
89
90 \begin{methoddesc}[DomainBuilder]{setElementPadding}{%
91 \optional{pad_x=\None}%
92 \optional{, pad_y=\None}}
93 sets the amount of padding around the dataset in number of elements (cells).
94 When the domain is constructed \member{pad_x} (\member{pad_y}) elements are
95 added on each side of the x- (y-) dimension. The arguments must be non-negative.
96 By default no padding is applied and \member{pad_y} is ignored for 2-dimensional
97 domains.
98 \end{methoddesc}
99
100 \begin{methoddesc}[DomainBuilder]{fixDensityBelow}{%
101 \optional{depth=\None}}
102 defines the depth below which the density anomaly is fixed to zero.
103 This method is only useful for inversions that involve gravity data.
104 \end{methoddesc}
105
106 \begin{methoddesc}[DomainBuilder]{fixSusceptibilityBelow}{%
107 \optional{depth=\None}}
108 defines the depth below which the susceptibility anomaly is fixed to zero.
109 This method is only useful for inversions that involve magnetic data.
110 \end{methoddesc}
111
112 \begin{methoddesc}[DomainBuilder]{getGravitySurveys}{}
113 returns a list of all gravity surveys added to the domain builder. See
114 \member{getSurveys()} for more details.
115 \end{methoddesc}
116
117 \begin{methoddesc}[DomainBuilder]{getMagneticSurveys}{}
118 returns a list of all magnetic surveys added to the domain builder. See
119 \member{getSurveys()} for more details.
120 \end{methoddesc}
121
122 \begin{methoddesc}[DomainBuilder]{getSurveys}{datatype}
123 returns a list of surveys of type \member{datatype} available to this domain
124 builder. In the current implementation each survey is a tuple of two \Data
125 objects, the first containing anomaly values and the second standard error
126 values for the survey.
127 \end{methoddesc}
128
129 \begin{methoddesc}[DomainBuilder]{getDomain}{}
130 returns an \escript domain (see~\cite{ESCRIPT}) suitable for running inversions
131 on the attached data sources.
132 The first time this method is called the target parameters (such as resolution,
133 extents and number of elements) are computed, and the domain is created.
134 Subsequent calls return the same domain instance so calls to
135 \member{setPadding()}, \member{addSource()} and other methods that influence
136 the domain will fail once \member{getDomain()} is called the first time.
137 \end{methoddesc}
138
139 \begin{methoddesc}[DomainBuilder]{setBackgroundMagneticFluxDensity}{B}
140 sets the background magnetic flux density $B=(B_{North},B_{East},B_{Vertical})$
141 which is required for magnetic inversions.
142 $B_{East}$ is ignored for 2-dimensional magnetic inversions.
143 \end{methoddesc}
144
145 \begin{methoddesc}[DomainBuilder]{getBackgroundMagneticFluxDensity}{}
146 returns the background magnetic flux density $B$ set via
147 \member{setBackgroundMagneticFluxDensity()} in a form suitable for the inversion.
148 There should be no need to call this method directly.
149 \end{methoddesc}
150
151 \begin{methoddesc}[DomainBuilder]{getSetDensityMask}{}
152 returns the density mask \Data object which is non-zero for cells that have a
153 fixed density value, zero otherwise.
154 There should be no need to call this method directly.
155 \end{methoddesc}
156
157 \begin{methoddesc}[DomainBuilder]{getSetSusceptibilityMask}{}
158 returns the susceptibility mask \Data object which is non-zero for cells that
159 have a fixed susceptibility value, zero otherwise.
160 There should be no need to call this method directly.
161 \end{methoddesc}
162
163 \section{\class{DataSource} Class}\label{sec:ref:DataSource}
164
165 Data sources added to a \class{DomainBuilder} must provide an implementation for
166 a few methods as described in the class template \class{DataSource} from
167 the \module{esys.downunder.datasources} module:
168
169 \begin{classdesc}{DataSource}{}
170 Base constructor which initializes members and should therefore be invoked by
171 subclasses. Subclasses may then use the member \member{logger} to print any
172 output.
173 \end{classdesc}
174
175 \begin{methoddesc}[DataSource]{getDataExtents}{}
176 This method should be implemented to return a tuple of tuples
177 ( (x0, y0), (nx, ny), (dx, dy) ), where (x0, y0) denote the UTM coordinates of
178 the data origin, (nx, ny) the number of data points, and (dx, dy) the spacing
179 of data points.
180 \end{methoddesc}
181
182 \begin{methoddesc}[DataSource]{getDataType}{}
183 Subclasses must return \class{DataSource}.\member{GRAVITY} or
184 \class{DataSource}.\member{MAGNETIC} depending on the type of data they provide.
185 \end{methoddesc}
186
187 \begin{methoddesc}[DataSource]{getSurveyData}{domain, origin, NE, spacing}
188 This method is called by the \class{DomainBuilder} to retrieve the actual survey
189 data in the form of \Data objects on the \member{domain}.
190 Data sources are responsible to map or interpolate their data onto the domain
191 which has been constructed to fit the data.
192 The domain \member{origin}, number of elements \member{NE} and element
193 \member{spacing} are provided as tuples or lists to aid with interpolation.
194 \end{methoddesc}
195
196 \begin{methoddesc}[DataSource]{getUtmZone}{}
197 Must be implemented to return the UTM zone that corresponds to the location of
198 this data set as returned by \member{getDataExtents}.
199 \end{methoddesc}
200
201 \begin{methoddesc}[DataSource]{setSubsamplingFactor}{factor}
202 Notifies the data source that data should be subsampled by \member{factor}.
203 This method does not need to be overwritten.
204 See \member{getSubsamplingFactor()} for an explanation.
205 \end{methoddesc}
206
207 \begin{methoddesc}[DataSource]{getSubsamplingFactor}{}
208 Returns the subsampling factor which was set by \member{setSubsamplingFactor()}
209 or $1$ which indicates that no subsampling is requested.
210 Data sources that support subsampling (or interleaving) of their data should use
211 this method to query the subsampling factor before returning surveys via
212 \member{getSurveyData}. If supported, the factor should be applied in all
213 dimensions. For example, a 2-dimensional dataset with 300 x 150 data points
214 should be reduced to 150 x 75 data points when the subsampling factor equals $2$.
215 Subsampling becomes important when the survey data resolution is too fine or
216 when using data with varying resolution in one inversion.
217 Note that data sources may choose to ignore the subsampling factor if they
218 don't support it.
219 \end{methoddesc}
220
221 \vspace{1em}\noindent The \module{esys.downunder.datasources} module contains the following helper
222 functions:
223
224 \begin{funcdesc}{LatLonToUTM}{longitude, latitude%
225 \optional{, wkt_string=\None}}
226 converts one or more (longitude,latitude) pairs to the corresponding (x,y)
227 coordinates in the \emph{Universal Transverse Mercator} (UTM) projection.
228 This function requires the \module{pyproj} module for conversion and the
229 \module{gdal} module to parse the \member{wkt_string} parameter if supplied.
230 The \member{wkt_string} parameter may describe the coordinate system used
231 for the input values as a \emph{Well-known Text} (WKT) string.
232 \end{funcdesc}
233
234 \subsection{ER Mapper Raster Data}\label{sec:ref:DataSource:ERM}
235 \emph{ER Mapper} files that contain 2-dimensional raster data may be used for
236 inversions through the \class{ErMapperData} class which is derived from
237 \class{DataSource}.
238 Generally, these datasets contain two files~\cite{ERMAPPER}, a header file and a data file.
239 The former usually has the \texttt{.ers} file extension and is a text file that
240 describes the data format, size, coordinate system used etc.
241 The data file usually has the same file name but no extension.
242 Note, that the current implementation may not work with all \emph{ER Mapper}
243 datasets. For example, the only cell type understood is \emph{IEEE4ByteReal}
244 at the moment.
245 To run inversions on a \emph{ER Mapper} dataset use the following constructor:
246 \begin{classdesc}{ErMapperData}{data_type, headerfile%
247 \optional{, datafile=\None}%
248 \optional{, altitude=0.}%
249 \optional{, error=\None}%
250 \optional{, scale_factor=\None}%
251 \optional{, null_value=\None}}
252 Creates a new data source from \emph{ER Mapper} data.
253 The parameter \member{data_type} must be one of
254 \class{DataSource}.\member{GRAVITY} or \class{DataSource}.\member{MAGNETIC}
255 depending on the type of data, \member{headerfile} is the name of the header
256 file while \member{datafile} specifies the name of the data file.
257 The parameter \member{datafile} can be left blank if the name is identical to
258 the header file except for the file extension.
259 The \member{altitude} parameter can be used to shift a 2-dimensional slice of
260 data vertically within a 3-dimensional domain.
261 Use \member{error} to set the (constant) measurement error with the same units
262 used by the measurements. By default a value of $2$ units is assumed which
263 equals $0.2 \; mgal$ or $2 \; nT$ depending on the data type.
264 Since ER Mapper files do not store any information about data units or scale
265 the \member{scale_factor} may be used to provide this information.
266 If not set, gravity data is assumed to be given in $\frac{\mu m}{sec^2}$ while
267 magnetic data is assumed to be given in $nT$.
268 Finally, the \member{null_value} parameter can be used to override the value
269 for the areas to be ignored (see Section~\ref{SEC:P1:GRAV:REMARK:DATAHOLES})
270 which is usually provided in the ER Mapper header file.
271 \end{classdesc}
272
273 \subsection{NetCDF Data}\label{sec:ref:DataSource:NETCDF}
274 The \class{NetCdfData} class from the \module{esys.downunder.datasources} module
275 provides the means to use data from \netcdf files~\cite{NETCDF} for inversion.
276 Currently, files that follow the \emph{Climate and Forecast (CF)}\footnote{%
277 \url{http://cf-pcmdi.llnl.gov/documents/cf-conventions/latest-cf-conventions-document-1}}
278 and/or the \emph{Cooperative Ocean/Atmosphere Research Data Service (COARDS)}\footnote{%
279 \url{http://ferret.wrc.noaa.gov/noaa_coop/coop_cdf_profile.html}} metadata
280 conventions are supported.
281 The example script \examplefile{create_netcdf.py} demonstrates how a compatible
282 file can be generated from within \Python (provided the \module{scipy} module
283 is available).
284 To plot such an input file including coordinates and legend using
285 \emph{matplotlib}~\cite{matplotlib} see the script \examplefile{show_netcdf.py}.
286 The interface to \class{NetCdfData} looks as follows:
287
288 \begin{classdesc}{NetCdfData}{datatype, filename%
289 \optional{, altitude=0.}%
290 \optional{, data_variable=\None}%
291 \optional{, error=\None}%
292 \optional{, scale_factor=\None}%
293 \optional{, null_value=\None}}
294 Creates a new data source from compatible \netcdf data.
295 The two required parameters are \member{datatype}, which must be one of
296 \class{DataSource}.\member{GRAVITY} or \class{DataSource}.\member{MAGNETIC}
297 depending on the type of data, and \member{filename} which is the name of the
298 file containing the data.
299 The \member{altitude} parameter can be used to shift a 2-dimensional slice of
300 data vertically within a 3-dimensional domain.
301 Set the \member{data_variable} parameter to the name of the \netcdf variable
302 that contains the measurements unless there is only one data variable in the
303 file in which case the parameter can be left empty.
304 Use \member{error} to set the (constant) measurement error with the same units
305 used by the measurements or the name of the \netcdf variable that contains this
306 information. By default a value of $2$ units is assumed which equals
307 $0.2 \; mgal$ or $2 \; nT$ depending on the data type.
308 The current implementation does not use the units attribute (if available)
309 within the \netcdf file. Use the \member{scale_factor} argument to provide this
310 information instead.
311 If not set, gravity data is assumed to be given in $\frac{\mu m}{sec^2}$ while
312 magnetic data is assumed to be given in $nT$.
313 Finally, the \member{null_value} parameter can be used to override the value
314 for the areas to be ignored (see Section~\ref{SEC:P1:GRAV:REMARK:DATAHOLES})
315 which is usually provided in the \netcdf file.
316 \end{classdesc}
317
318 \subsection{Synthetic Data}
319 As a special case the \module{esys.downunder.datasources} module contains
320 classes to generate input data for inversions by solving a forward model with
321 user-defined reference data.
322 The main purpose of using synthetic data is to test the capabilities of the
323 inversion module or for tracking down problems.
324
325 The base class for synthetic data which is derived from \class{DataSource}
326 has the following interface:
327
328 \begin{classdesc}{SyntheticDataBase}{datatype%
329 \optional{, DIM=2}
330 \optional{, number_of_elements=10}
331 \optional{, length=1*U.km}
332 \optional{, B_b=\None}
333 \optional{, data_offset=0}
334 \optional{, full_knowledge=\False}
335 \optional{, spherical=\False}}
336 Base class to define reference data based on a given property distribution
337 (density or susceptibility). Data are collected from a square region of
338 vertical extent \member{length} on a grid with \member{number_of_elements}
339 cells in each direction.
340 The synthetic data are constructed by solving the appropriate forward problem.
341 Data can be sampled with an offset from the surface at $z=0$ or using the
342 entire subsurface region.
343 \end{classdesc}
344
345 \vspace{1em}\noindent The only additional method which needs to be implemented
346 in subclasses is
347 \begin{methoddesc}[SyntheticDataBase]{getReferenceProperty}{
348 \optional{domain=\None}}
349 Returns the reference \Data object that was used to generate the gravity or
350 susceptibility anomaly data. The \member{domain} argument must be present
351 when this method is called for the first time but not necessarily in
352 subsequent calls.
353 \end{methoddesc}
354
355 \vspace{1em}\noindent Two synthetic data providers are currently available.
356 The class \class{SyntheticData} defines synthetic gravity or magnetic anomaly
357 data based on a harmonic
358 \begin{equation}\label{eq:synthdata}
359 k=A\cdot sin\left(\pi \frac{n_D}{D} (z+\Delta z)\right) \cdot%
360 sin\left(\pi \frac{n_L}{L} (x - x_0)\right)
361 \end{equation}
362 where $A$ is the amplitude, $n_D$, $n_L$ denote the number of oscillations in
363 the vertical and lateral direction, respectively, while $D$ and $L$ is the
364 depth and side length for the data, respectively.
365 The second data provider class is \class{SyntheticFeatureData} which takes a
366 list of \class{SourceFeature} objects that define the anomaly and is thus more
367 generic.
368 The constructors are defined as follows:
369
370 \begin{classdesc}{SyntheticData}{datatype%
371 \optional{, n_length=1}
372 \optional{, n_depth=1}
373 \optional{, depth_offset=0.}
374 \optional{, depth=\None}
375 \optional{, amplitude=\None}
376 \optional{, DIM=2}
377 \optional{, number_of_elements=10}
378 \optional{, length=1*U.km}
379 \optional{, B_b=\None}
380 \optional{, data_offset=0}
381 \optional{, full_knowledge=\False}
382 \optional{, spherical=\False}}
383 The arguments \member{n_length}, \member{n_depth}, \member{depth_offset},
384 \member{depth}, and \member{amplitude} correspond to the respective symbols in
385 Equation~\ref{eq:synthdata}. The remaining arguments are passed to the parent
386 class (\class{SyntheticDataBase}) and described there.
387 If \member{depth} is not set the depth of the domain is used.
388 The argument \member{amplitude} may be left unset as well in which case a
389 default value is used ($200 \frac{kg}{m^3}$ for gravity and $0.1$ for magnetic
390 data).
391 \end{classdesc}
392
393 \begin{classdesc}{SyntheticFeatureData}{datatype, features%
394 \optional{, DIM=2}
395 \optional{, number_of_elements=10}
396 \optional{, length=1*U.km}
397 \optional{, B_b=\None}
398 \optional{, data_offset=0}
399 \optional{, full_knowledge=\False}
400 \optional{, spherical=\False}}
401 The only new argument is \member{features} which is a list of
402 \class{SourceFeature} objects to be included in the data preparation.
403 All other arguments are passed on to the parent class (see there).
404 \end{classdesc}
405
406 \begin{classdesc}{SourceFeature}{}
407 A feature adds a density/susceptibility distribution to (parts of) a domain of
408 a synthetic data source, for example a layer of a specific rock type or a
409 simulated ore body. This base class is empty and only provides the skeleton
410 for subclasses which need to implement the following two methods:
411 \end{classdesc}
412
413 \begin{methoddesc}[SourceFeature]{getValue}{}
414 Returns the value for the area covered by mask. It can be constant or a \Data
415 object with spatial dependency.
416 \end{methoddesc}
417
418 \begin{methoddesc}[SourceFeature]{getMask}{x}
419 Returns the mask of the region of interest for this feature. That is, mask is
420 non-zero where the value returned by \member{getValue()} should be applied,
421 zero elsewhere.
422 \end{methoddesc}
423

  ViewVC Help
Powered by ViewVC 1.1.26