Source code for rpscripts.stats

'''This module provides a few statistical data about the given filename.'''

from copy import deepcopy
from matplotlib import pyplot as plt
import pandas
from .lib.base import GeneralSubparser, RPData, file_rename


[docs] class Statistics(object): '''Auxiliary class for statistics calculus.''' def __init__(self, rpdata: RPData, name: str, image_format='svg') -> None: self.rpdata = deepcopy(rpdata) self.image_format = image_format self.name = name self.outname = file_rename(self.rpdata.path, self.image_format, self.name) self.subplots = None self.dataframe = pandas.DataFrame(rpdata.data) self.data = None self.columns = None self.is_index = False
[docs] def get_histograms(self, no_plot:bool, split_labels: bool) -> None: '''Make histogram and print statistical summary.''' self.name = '{}-{}'.format(self.name, 'histogram') self.outname = file_rename(self.rpdata.path, self.image_format, self.name) print('Statistical summary: full') print(self.data.describe().round(2)) if not no_plot: axes = self.data.hist() for c, ax in zip(self.columns, axes[0]): if self.is_index: ax.set_xlabel('{} index'.format(c)) else: ax.set_xlabel(c) ax.set_ylabel('Number of events') print('Saving file {}...'.format(self.outname)) plt.savefig(self.outname) if split_labels and self.rpdata.labels: self.data['Label'] = self.rpdata.labels new_cols = self.columns[:] new_cols.append('Label') _df = self.data[new_cols].groupby('Label') for label, _df in _df: print('\nLabel: {}'.format(label)) print(_df.describe().round(2)) if not no_plot: for c in self.columns: plt.clf() _df = self.data[[c, 'Label']] _df.plot.box(column=c, by='Label', grid=True) if self.is_index: plt.ylabel('{} index'.format(c)) else: plt.ylabel(c) plt.title('') plt.xlabel('Labels') plt.tight_layout() plt.savefig(file_rename(self.outname, 'svg', 'label-{}-boxplot').format(c.lower()))
[docs] class AgglomerationDispersionStatistics(Statistics): def __init__(self, rpdata: RPData, image_format='svg') -> None: name = 'agglomeration-dispersion' self.is_index = True super().__init__(rpdata, name, image_format)
[docs] def get_histograms(self, no_plot:bool, split_labels: bool) -> None: '''Make histogram and print statistical summary of agglomeration and dispersion indexes.''' self.columns = ['Agglomeration', 'Dispersion'] self.data = self.dataframe[self.columns] super().get_histograms(no_plot, split_labels)
[docs] class PartsDensityNumberStatistics(Statistics): def __init__(self, rpdata: RPData, image_format='svg') -> None: name = 'parts-statistics' super().__init__(rpdata, name, image_format)
[docs] def get_histograms(self, no_plot:bool, split_labels: bool) -> None: '''Make histogram and print statistical summary of number of parts and density number.''' self.columns = ['Number of parts', 'Density number'] self.data = pandas.DataFrame(self.rpdata.get_number_of_parts_and_density_numbers(), columns=self.columns) super().get_histograms(no_plot, split_labels)
[docs] class Subparser(GeneralSubparser): '''Implements argparser.'''
[docs] def setup(self) -> None: self.program_name = 'stats' self.program_help = 'Statistical tools'
[docs] def add_arguments(self) -> None: pass self.parser.add_argument("-np", "--no_plot", help = "No plot charts", action='store_true') self.parser.add_argument("-l", "--labels", help = "Split labels", action='store_true')
[docs] def handle(self, args): rpdata = RPData(args.filename) ad_statistics = AgglomerationDispersionStatistics(rpdata, 'svg') ad_statistics.get_histograms(args.no_plot, args.labels) pc_statistics = PartsDensityNumberStatistics(rpdata, 'svg') pc_statistics.get_histograms(args.no_plot, args.labels)