FIx bugs + implement new functionalities for training and plotting (A…

…liceO2Group#4) * Fix major bug: training was done only with the first event of each bunch in the data generator code * Add the possibility of using both SC mean and fluctuations as input for the training * Add plotting utilities and study for multiple events with profiles * Fix pylon
dsekihat · Feb 22, 2020 · 383d2cf · 383d2cf
1 parent a69c8a4
commit 383d2cf
Show file tree

Hide file tree

Showing 7 changed files with 262 additions and 175 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,6 +5,7 @@ dist/*
 *.egg-info
 **/__pycache__
 *.png
+*.pdf
 
 # Python compiled
 *.pyc

diff --git a/tpcwithdnn/database_parameters_DNN_fluctuations.yml b/tpcwithdnn/database_parameters_DNN_fluctuations.yml
@@ -5,17 +5,20 @@ DNN_fluctuations:
   grid_phi: 90
   grid_z: 17
   grid_r: 17
+  #prepare the datasets
+  selopt_input: 0 # this uses all input training data, no z-selection
+  selopt_output: 0 # 0 is use to predict distorsions only for positive z-values
+  opt_train: [0, 1] #first position = meanSC, second = SCfluctuations
+  opt_predout: [1, 0, 0] #R, Rphi, z output distorsion predictions
+  rangeevent_train: [0,800]
+  rangeevent_test: [801,970]
+  use_scaler: 0
+  #DNN configuration
   filters: 4
   pooling: 0
   batch_size: 8
-  n_channels: 1
   shuffle: false
   depth: 4
   batch_normalization: 0
-  side: 0
   dropout: 0.0
-  use_scaler: 0
-  distortion_type: 0 #0 is for checking R direction
   ephocs: 20
-  rangeevent_train: [0,800]
-  rangeevent_test: [801,970]
diff --git a/tpcwithdnn/dataloader.py b/tpcwithdnn/dataloader.py
@@ -0,0 +1,131 @@
+# pylint: disable=fixme, pointless-string-statement
+import numpy as np
+
+def loaddata(inputdata, indexev, selopt_input, selopt_output):
+
+    """ Here we define the functionalties to load the files from the input
+    directory which is set in the database. Here below the description of
+    the input files:
+        - 0-vecZPos.npy, 0-vecRPos.npy, 0-vecPhiPos.npy contains the
+        position of the FIXME. There is only one of these files for each
+        folder, therefore for each bunch of events
+        Input features for training:
+        - vecMeanSC.npy: average space charge in each bin of r, rphi and z.
+        - vecRandomSC.npy: fluctuation of the space charge.
+        Output from the numberical calculations:
+        - vecMeanDistR.npy average distorsion along the R axis in the same
+          grid. It represents the expected distorsion that an electron
+          passing by that region would have as a consequence of the IBF.
+        - vecRandomDistR.npy are the correponding fluctuations.
+        - All the distorsions along the other directions have a consistent
+          naming choice.
+
+    """
+
+    vecZPosFile = inputdata + str(0) + '-vecZPos.npy'
+    scMeanFile = inputdata + str(indexev) + '-vecMeanSC.npy'
+    scRandomFile = inputdata + str(indexev) + '-vecRandomSC.npy'
+    distRMeanFile = inputdata + str(indexev) + '-vecMeanDistR.npy'
+    distRRandomFile = inputdata + str(indexev) + '-vecRandomDistR.npy'
+    distRPhiMeanFile = inputdata + str(indexev) + '-vecMeanDistRPhi.npy'
+    distRPhiRandomFile = inputdata + str(indexev) + '-vecRandomDistRPhi.npy'
+    distZMeanFile = inputdata + str(indexev) + '-vecMeanDistZ.npy'
+    distZRandomFile = inputdata + str(indexev) + '-vecRandomDistZ.npy'
+    vecZPos = np.load(vecZPosFile)
+    vecMeanSC = np.load(scMeanFile)
+    vecRandomSC = np.load(scRandomFile)
+    vecMeanDistR = np.load(distRMeanFile)
+    vecRandomDistR = np.load(distRRandomFile)
+    vecMeanDistRPhi = np.load(distRPhiMeanFile)
+    vecRandomDistRPhi = np.load(distRPhiRandomFile)
+    vecMeanDistZ = np.load(distZMeanFile)
+    vecRandomDistZ = np.load(distZRandomFile)
+
+    """
+    Here below we define the preselections on the input data for the training.
+    Three options are currently implemented.
+    selopt_input == 0 selects only clusters with positive z position
+    selopt_input == 1 selects only clusters with negative z position
+    selopt_input == 2 uses all data with no selections
+
+    """
+    if selopt_input == 0:
+        vecMeanSC_ = vecMeanSC[vecZPos >= 0]
+        vecFluctuationSC_ = vecMeanSC[vecZPos >= 0] - vecRandomSC[vecZPos >= 0]
+    elif selopt_input == 1:
+        vecMeanSC_ = vecMeanSC[vecZPos < 0]
+        vecFluctuationSC_ = vecMeanSC[vecZPos < 0] - vecRandomSC[vecZPos < 0]
+    elif selopt_input == 2:
+        vecMeanSC_ = vecMeanSC
+        vecFluctuationSC_ = vecMeanSC  - vecRandomSC
+
+    """
+    selopt_output == 0 selects only clusters with positive z position
+    selopt_output == 1 selects only clusters with negative z position
+    selopt_output == 2 uses all data with no selections
+
+    """
+    if selopt_output == 0:
+        vecFluctuationDistR_ = \
+                vecMeanDistR[vecZPos >= 0] - vecRandomDistR[vecZPos >= 0]
+        vecFluctuationDistRPhi_ = \
+                vecMeanDistRPhi[vecZPos >= 0] - vecRandomDistRPhi[vecZPos >= 0]
+        vecFluctuationDistZ_ = \
+                vecMeanDistZ[vecZPos >= 0] - vecRandomDistZ[vecZPos >= 0]
+    elif selopt_output == 1:
+        vecFluctuationDistR_ = \
+                vecMeanDistR[vecZPos < 0] - vecRandomDistR[vecZPos < 0]
+        vecFluctuationDistRPhi_ = \
+                vecMeanDistRPhi[vecZPos < 0] - vecRandomDistRPhi[vecZPos < 0]
+        vecFluctuationDistZ_ = \
+                vecMeanDistZ[vecZPos < 0] - vecRandomDistZ[vecZPos < 0]
+    elif selopt_output == 2:
+        vecFluctuationDistR_ = vecMeanDistR - vecRandomDistR
+        vecFluctuationDistRPhi_ = vecMeanDistRPhi - vecRandomDistRPhi
+        vecFluctuationDistZ_ = vecMeanDistZ - vecRandomDistZ
+
+    return [vecMeanSC_, vecFluctuationSC_, vecFluctuationDistR_,
+            vecFluctuationDistRPhi_, vecFluctuationDistZ_]
+
+
+def loadtrain_test(inputdata, indexev, selopt_input, selopt_output,
+                   grid_r, grid_rphi, grid_z, opt_train, opt_pred):
+
+    [vecMeanSC, vecFluctuationSC, vecFluctuationDistR,
+     vecFluctuationDistRPhi, vecFluctuationDistZ] = \
+        loaddata(inputdata, indexev, selopt_input, selopt_output)
+    dim_input = sum(opt_train)
+    dim_output = sum(opt_pred)
+    x_ = np.empty((grid_rphi, grid_r, grid_z, dim_input))
+    y_ = np.empty((grid_rphi, grid_r, grid_z, dim_output))
+
+    indexfillx = 0
+    if opt_train[0] == 1:
+        x_[:, :, :, indexfillx] = \
+                vecMeanSC.reshape(grid_rphi, grid_r, grid_z)
+        indexfillx = indexfillx + 1
+    if opt_train[1] == 1:
+        x_[:, :, :, indexfillx] = \
+                vecFluctuationSC.reshape(grid_rphi, grid_r, grid_z)
+        indexfillx = indexfillx + 1
+
+    if sum(opt_pred) > 1:
+        print("MULTI-OUTPUT NOT IMPLEMENTED YET")
+        return 0
+    indexfilly = 0
+    if opt_pred[0] == 1:
+        y_[:, :, :, indexfilly] = \
+                vecFluctuationDistR.reshape(grid_rphi, grid_r, grid_z)
+        indexfilly = indexfilly + 1
+    if opt_pred[1] == 1:
+        y_[:, :, :, indexfilly] = \
+                vecFluctuationDistRPhi.reshape(grid_rphi, grid_r, grid_z)
+        indexfilly = indexfilly + 1
+    if opt_pred[2] == 1:
+        y_[:, :, :, indexfilly] = \
+                vecFluctuationDistZ.reshape(grid_rphi, grid_r, grid_z)
+        indexfilly = indexfilly + 1
+    #print("DIMENSION INPUT TRAINING", x_.shape)
+    #print("DIMENSION OUTPUT TRAINING", y_.shape)
+
+    return x_, y_
diff --git a/tpcwithdnn/default.yaml b/tpcwithdnn/default.yaml
@@ -1,4 +1,5 @@
 case: DNN_fluctuations
 dotrain: true
 doapply: true
+doplot: true
 dogrid: false