Inference 2D
+2D Posterior analysis of the Bayesian inference
+diff --git a/docs/2D_Inference/index.html b/docs/2D_Inference/index.html new file mode 100644 index 00000000..35ce85c1 --- /dev/null +++ b/docs/2D_Inference/index.html @@ -0,0 +1,120 @@ + + +
+ + + +2D Posterior analysis of the Bayesian inference
+Note
+Go to the end +to download the full example code
+All plotting in GeoBIPy can be carried out using the 3D inference class
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+from geobipy import Inference2D
+from create_model import create_model
+
def create_plots(folder, data_type, model_type):
+ #%%
+ # Inference for a line of inferences
+ # ++++++++++++++++++++++++++++++++++
+ #
+ # We can instantiate the inference handler by providing a path to the directory containing
+ # HDF5 files generated by GeoBIPy.
+ #
+ # The InfereceXD classes are low memory. They only read information from the HDF5 files
+ # as and when it is needed.
+ #
+ # The first time you use these classes to create plots, expect longer initial processing times.
+ # I precompute expensive properties and store them in the HDF5 files for later use.
+
+ from numpy.random import Generator
+ from numpy.random import PCG64DXSM
+ generator = PCG64DXSM(seed=0)
+ prng = Generator(generator)
+
+ #%%
+ results_2d = Inference2D.fromHdf('{}/{}/{}/0.0.h5'.format(folder, data_type, model_type), prng=prng)
+
+ kwargs = {
+ "log" : 10,
+ "cmap" : 'jet'
+ }
+
+ fig = plt.figure(figsize=(16, 4))
+ plt.suptitle("{} {}".format(data_type, model_type))
+ gs0 = fig.add_gridspec(3, 4)
+ ax1 = fig.add_subplot(gs0[0, 0])
+ true_model = create_model(model_type)
+
+ if data_type == 'resolve':
+ true_model.mesh.y_edges = true_model.mesh.y_edges / 4.1
+
+ kwargs['vmin'] = np.log10(np.min(true_model.values))
+ kwargs['vmax'] = np.log10(np.max(true_model.values))
+
+ true_model.pcolor(**kwargs)
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ if data_type == 'resolve':
+ plt.ylim([-240, 60])
+ else:
+ plt.ylim([-550, 60])
+
+ ax1 = fig.add_subplot(gs0[1, 0])
+ results_2d.plot_mean_model(**kwargs);
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ # By adding the useVariance keyword, we can make regions of lower confidence more transparent
+ ax1 = fig.add_subplot(gs0[2, 0])
+ results_2d.plot_mode_model(use_variance=False, **kwargs);
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ # # We can also choose to keep parameters above the DOI opaque.
+ # results_2d.compute_doi()
+ # plt.subplot(313)
+ # results_2d.plot_mean_model(use_variance=True, mask_below_doi=True, **kwargs);
+ # results_2d.plot_data_elevation(linewidth=0.3);
+ # results_2d.plot_elevation(linewidth=0.3);
+
+ #%%
+ # We can plot the parameter values that produced the highest posterior
+ ax = fig.add_subplot(gs0[0, 1])
+ results_2d.plot_k_layers()
+
+ ax1 = fig.add_subplot(gs0[1, 1], sharex=ax)
+ results_2d.plot_best_model(**kwargs);
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+
+ del kwargs['vmin']
+ del kwargs['vmax']
+
+ ax1 = fig.add_subplot(gs0[0, 2])
+ plt.title('5%')
+ results_2d.plot_percentile(percent=0.05, **kwargs)
+ ax1 = fig.add_subplot(gs0[1, 2])
+ plt.title('50%')
+ results_2d.plot_percentile(percent=0.5, **kwargs)
+ ax1 = fig.add_subplot(gs0[2, 2])
+ plt.title('95%')
+ results_2d.plot_percentile(percent=0.95, **kwargs)
+
+
+
+ #%%
+ # Now we can start plotting some more interesting posterior properties.
+ # How about the confidence?
+ ax1 = fig.add_subplot(gs0[0, 3])
+ results_2d.plot_confidence();
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ #%%
+ # We can take the interface depth posterior for each data point,
+ # and display an interface probability cross section
+ # This posterior can be washed out, so the clim_scaling keyword lets me saturate
+ # the top and bottom 0.5% of the colour range
+ ax1 = fig.add_subplot(gs0[1, 3])
+ plt.title('P(Interface)')
+ results_2d.plot_interfaces(cmap='Greys', clim_scaling=0.5);
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ ax1 = fig.add_subplot(gs0[2, 3])
+ results_2d.plot_entropy(cmap='Greys', clim_scaling=0.5);
+ results_2d.plot_data_elevation(linewidth=0.3);
+ results_2d.plot_elevation(linewidth=0.3);
+
+ # plt.show(block=True)
+ plt.savefig('{}_{}_{}.png'.format(folder, data_type, model_type), dpi=300)
+
+
+if __name__ == '__main__':
+
+ Parser = argparse.ArgumentParser(description="Plotting 2D inferences",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+ Parser.add_argument('--data_type', dest='data_type', default=None, help='Skip the creation of the HDF5 files. Only do this if you know they have been created.')
+ Parser.add_argument('--model_type', dest='model_type', default=None, help='Specify a numpy seed file to fix the random number generator. Only used in serial mode.')
+
+ args = Parser.parse_args()
+
+ data_types = ['skytem_512', 'resolve', 'tempest'] if args.data_type is None else args.data_type
+ model_types = ['glacial', 'saline_clay', 'resistive_dolomites', 'resistive_basement', 'coastal_salt_water', 'ice_over_salt_water'] if args.model_type is None else args.model_type
+
+ if not isinstance(data_types, list): data_types = [data_types]
+ if not isinstance(model_types, list): model_types = [model_types]
+
+ for data in data_types:
+ print(data)
+ for model in model_types:
+ print(' ',model)
+ create_plots("no_reverse_jump", data, model)
+
00:00.000 total execution time for 1 file from examples/2D_Inference:
+Example |
+Time |
+Mem (MB) |
+
---|---|---|
2D Posterior analysis of the Bayesian inference ( |
+00:00.000 |
+0.0 |
+
The best way to run geobipy with MPI is through the command line entry point. +Upon install, pip will create the “geobipy” entry point into the code base. +This entry point can be used for both serial and parallel modes.
+srun geobipy options.py <output folder> --mpi
+
Please refer to the installation instructions for getting your Python environment setup with mpi4py and mpi enabled hdf5. +Install those two packages first before installing geobipy otherwise pip might inadvertently install the non-parallel-enabled hdf5 library.
+Geopbipy is currently parallelized using only MPI. We do not use single machine parallel libraries like multiprocessing or joblib because we wanted scalability from the start. +We currently have no dependence between data points in a data set, so we can treat each data point independently from its neighbours. This lends itself well to distributed parallelization using MPI. +One of the biggest bottlenecks of any parallel enabled program is file IO, we therefore alleviate this bottleneck by writing results to HDF5 files (With future scope to have these be properly georeferenced netcdf files) +Each unique line number in a data file will have its own separate hdf5 file.
+Here is a sample slurm script to submit an mpi enabled job to the queue. Since we only care about total cores available, we dont need to worry too much about cores per node, or increasing RAM per core. Geobipy operates with relatively small memory requirements, and we have tested with only 256MB per core available. +The code is currently showing linear scalability upto 9000 cores (which was our maximum available at the time).
+#!/bin/bash
+#SBATCH --job-name=geobipy
+#SBATCH -n 5000
+#SBATCH -p <partition>
+#SBATCH --account=<account name>
+#SBATCH --time=dd-hh:mm:ss
+#SBATCH -o %j.out
+
+# Your module load section to enable python
+module load cray-hdf5-parallel cray-python
+# FFTW is required when compiling the time domain forward modeller from Geoscience Australia
+module load cray-fftw
+
+# We use Numba to compile the Python frequency domain forward modeller into C
+export OMP_NUM_THREADS=1
+export NUMBA_CPU_NAME='skylake' # Change your CPU name
+
+# Source your python environment how you need, either conda or venv
+source <Path to env /bin/activate>
+conda activate geobipy
+
+mkdir <output_folder>
+rm <output_folder>/*.h5 # We recommend this in case you have to restart a run. HDF5 files can corrupt on unsuccessful exit.
+srun geobipy options.py <output_folder> --mpi
+
The best way to run geobipy with MPI is through the command line entry point. +Upon install, pip will create the “geobipy” entry point into the code base. +This entry point can be used for both serial and parallel modes.
+srun geobipy options.py <output folder> --mpi
+
Please refer to the installation instructions for getting your Python environment setup with mpi4py and mpi enabled hdf5. +Install those two packages first before installing geobipy otherwise pip might inadvertently install the non-parallel-enabled hdf5 library.
+Geopbipy is currently parallelized using only MPI. We do not use single machine parallel libraries like multiprocessing or joblib because we wanted scalability from the start. +We currently have no dependence between data points in a data set, so we can treat each data point independently from its neighbours. This lends itself well to distributed parallelization using MPI. +One of the biggest bottlenecks of any parallel enabled program is file IO, we therefore alleviate this bottleneck by writing results to HDF5 files (With future scope to have these be properly georeferenced netcdf files) +Each unique line number in a data file will have its own separate hdf5 file.
+Here is a sample slurm script to submit an mpi enabled job to the queue. Since we only care about total cores available, we dont need to worry too much about cores per node, or increasing RAM per core. Geobipy operates with relatively small memory requirements, and we have tested with only 256MB per core available. +The code is currently showing linear scalability upto 9000 cores (which was our maximum available at the time).
+#!/bin/bash
+#SBATCH --job-name=geobipy
+#SBATCH -n 5000
+#SBATCH -p <partition>
+#SBATCH --account=<account name>
+#SBATCH --time=dd-hh:mm:ss
+#SBATCH -o %j.out
+
+# Your module load section to enable python
+module load cray-hdf5-parallel cray-python
+# FFTW is required when compiling the time domain forward modeller from Geoscience Australia
+module load cray-fftw
+
+# We use Numba to compile the Python frequency domain forward modeller into C
+export OMP_NUM_THREADS=1
+export NUMBA_CPU_NAME='skylake' # Change your CPU name
+
+# Source your python environment how you need, either conda or venv
+source <Path to env /bin/activate>
+conda activate geobipy
+
+mkdir <output_folder>
+rm <output_folder>/*.h5 # We recommend this in case you have to restart a run. HDF5 files can corrupt on unsuccessful exit.
+srun geobipy options.py <output_folder> --mpi
+
00:00.000 total execution time for 0 files from examples/Parallel_Inference:
+Example |
+Time |
+Mem (MB) |
+
---|---|---|
N/A |
+N/A |
+N/A |
+