'''This module provides a few statistical data about the given filename.'''
from copy import deepcopy
from matplotlib import pyplot as plt
import pandas
from .lib.base import GeneralSubparser, RPData, file_rename
[docs]
class Statistics(object):
'''Auxiliary class for statistics calculus.'''
def __init__(self, rpdata: RPData, name: str, image_format='svg') -> None:
self.rpdata = deepcopy(rpdata)
self.image_format = image_format
self.name = name
self.outname = file_rename(self.rpdata.path, self.image_format, self.name)
self.subplots = None
self.dataframe = pandas.DataFrame(rpdata.data)
self.data = None
self.columns = None
self.is_index = False
[docs]
def get_histograms(self, no_plot:bool, split_labels: bool) -> None:
'''Make histogram and print statistical summary.'''
self.name = '{}-{}'.format(self.name, 'histogram')
self.outname = file_rename(self.rpdata.path, self.image_format, self.name)
print('Statistical summary: full')
print(self.data.describe().round(2))
if not no_plot:
axes = self.data.hist()
for c, ax in zip(self.columns, axes[0]):
if self.is_index:
ax.set_xlabel('{} index'.format(c))
else:
ax.set_xlabel(c)
ax.set_ylabel('Number of events')
print('Saving file {}...'.format(self.outname))
plt.savefig(self.outname)
if split_labels and self.rpdata.labels:
self.data['Label'] = self.rpdata.labels
new_cols = self.columns[:]
new_cols.append('Label')
_df = self.data[new_cols].groupby('Label')
for label, _df in _df:
print('\nLabel: {}'.format(label))
print(_df.describe().round(2))
if not no_plot:
for c in self.columns:
plt.clf()
_df = self.data[[c, 'Label']]
_df.plot.box(column=c, by='Label', grid=True)
if self.is_index:
plt.ylabel('{} index'.format(c))
else:
plt.ylabel(c)
plt.title('')
plt.xlabel('Labels')
plt.tight_layout()
plt.savefig(file_rename(self.outname, 'svg', 'label-{}-boxplot').format(c.lower()))
[docs]
class AgglomerationDispersionStatistics(Statistics):
def __init__(self, rpdata: RPData, image_format='svg') -> None:
name = 'agglomeration-dispersion'
self.is_index = True
super().__init__(rpdata, name, image_format)
[docs]
def get_histograms(self, no_plot:bool, split_labels: bool) -> None:
'''Make histogram and print statistical summary of agglomeration and dispersion indexes.'''
self.columns = ['Agglomeration', 'Dispersion']
self.data = self.dataframe[self.columns]
super().get_histograms(no_plot, split_labels)
[docs]
class PartsDensityNumberStatistics(Statistics):
def __init__(self, rpdata: RPData, image_format='svg') -> None:
name = 'parts-statistics'
super().__init__(rpdata, name, image_format)
[docs]
def get_histograms(self, no_plot:bool, split_labels: bool) -> None:
'''Make histogram and print statistical summary of number of parts and density number.'''
self.columns = ['Number of parts', 'Density number']
self.data = pandas.DataFrame(self.rpdata.get_number_of_parts_and_density_numbers(), columns=self.columns)
super().get_histograms(no_plot, split_labels)
[docs]
class Subparser(GeneralSubparser):
'''Implements argparser.'''
[docs]
def setup(self) -> None:
self.program_name = 'stats'
self.program_help = 'Statistical tools'
[docs]
def add_arguments(self) -> None:
pass
self.parser.add_argument("-np", "--no_plot", help = "No plot charts", action='store_true')
self.parser.add_argument("-l", "--labels", help = "Split labels", action='store_true')
[docs]
def handle(self, args):
rpdata = RPData(args.filename)
ad_statistics = AgglomerationDispersionStatistics(rpdata, 'svg')
ad_statistics.get_histograms(args.no_plot, args.labels)
pc_statistics = PartsDensityNumberStatistics(rpdata, 'svg')
pc_statistics.get_histograms(args.no_plot, args.labels)