Plot the histograms for general investigation and visual inspection
Functionality for selecting a single run and plotting all lumisections belonging to that run.
Functionality for plotting the moments of the distributions as a function of LS number.
### imports
# external modules
import sys
import os
import numpy as np
import importlib
# local modules
import dataframe_utils as dfu
import plot_utils as pu
import hist_utils as hu
import DataLoader
<module 'DataLoader' from '/eos/home-l/llambrec/SWAN_projects/ML4DQM-DC/tutorials/../src/'>
### read the data
# note: this cell assumes you have a csv file stored at the specified location,
# containing only histograms of the specified type;
# see the tutorial read_and_write_data for examples on how to create such files!
histname = 'chargeInner_PXLayer_2'
filename = 'DF2017_'+histname+'.csv'
datadir = '../data'
dloader = DataLoader.DataLoader()
df = dloader.get_dataframe_from_file( os.path.join(datadir, filename) )
print('raw input data shape: {}'.format( dfu.get_hist_values(df)[0].shape ))
# select a single run
runs = dfu.get_runs(df)
print('number of runs: '+str(len(runs)))
#print(runs) # uncomment this to see a printed list of available runs
runnbs = [305351] # you can also add multiple runs to the list to plot them all together
df = dfu.select_runs(df,runnbs)
# select DCS-bit on data
#df = dfu.select_dcson(df)
INFO in DataLoader.get_dataframe_from_file: loading dataframe from file ../data/DF2017_chargeInner_PXLayer_2.csv...
INFO in DataLoader.get_dataframe_from_file: sorting the dataframe...
INFO in DataLoader.get_dataframe_from_file: loaded a dataframe with 225954 rows and 16 columns.
raw input data shape: (225954, 102)
number of runs: 599
# make a plot of the histograms
xmin =[0,'Xmin']
xmax =[0,'Xmax']
nbins =[0,'Xbins']
(values,_,ls) = dfu.get_hist_values(df)
# (note: get_hist_values returns the raw histograms as stored in the dataframe;
# check out utils/ for more advanced data loading, e.g. normalizing)
print('shape of histogram array: '+str(values.shape))
# just plot all the histograms:
# plot the histograms with a color according to their lumisection number:
# same as before but normalizing each histogram:
shape of histogram array: (874, 102)
(<Figure size 432x288 with 2 Axes>, <AxesSubplot:>)
# select a single lumisection and plot it on top of all the other lumisections
lsnumber = 869
(<Figure size 432x288 with 2 Axes>, <AxesSubplot:>)
# make a plot of the moments of the (normalized) histograms
# use xmin = 0 and xmax = 1 as a kind of normalization
hists = values[:,1:-1]
bins = np.linspace(0,1,num=hists.shape[1],endpoint=True)
moments = hu.histmoments(bins,hists,orders=[1,2,3])
_ = pu.plot_moments(moments,ls,(0,1))