Source code for AberdeenProject.utilities.statistics

from ..utilities.loadPickledData import loadPickledData

[docs]class Statistics: """ This class generates descriptive statistics of a Pandas dataframe """ def __init__(self, filepath): """ The constructor loads the pickled dataframe :param filepath: Path to the pickeled dataframe :type filepath: String """ self.filepath = filepath self.dataframe = loadPickledData(self.filepath)
[docs] def getShape(self): """ This function returns the shape of a given Pandas dataframe :return: Shape of a Pandas dataframe :rtype: Tuple """ return self.dataframe.shape
[docs] def getKeptColumns(self): """ This function returns the remaining columns of a Pandas dataframe :return: Remaining columns of a Pandas dataframe :rtype: List """ return list(self.dataframe.columns)
[docs] def getNumKeptColumns(self): """ This function returns the number of remaining columns of a Pandas dataframe :return: Number of remaining columns :rtype: Integer """ return len(list(self.dataframe.columns))
[docs] def getUniqueValues(self): """ This function returns all unique values per column :return: Dictionary containing all unique values per column :rtype: Dictionary """ uniqueValue_dict = dict.fromkeys(self.dataframe.columns, None) for column in self.dataframe.columns: try: uniqueValue_dict[column] = self.dataframe[column].unique() except: print(f" Problem by the column: {column}") return uniqueValue_dict
[docs] def getColumnsStatistics(self): """ This function returns a Series containing counts of unique values per column :return: Dictionary containing all counts of unique values per column :rtype: Dictionary """ statistics_dict = dict.fromkeys(self.dataframe.columns, None) for column in self.dataframe.columns: try: statistics_dict[column] = self.dataframe[column].value_counts(normalize=True,dropna=False)*100 except: print(f" Problem by the column: {column}") return statistics_dict