Source code for AberdeenProject.main

import os
os.chdir(os.path.dirname(os.path.abspath(__file__)))

import pickle
import pprint
import pandas as pd
import numpy as np
from .core.dataframeCreator import DataframeCreator
from .core.modelFitting import Model
from .core.dataPipelines import FullPipeline, MissingValuesPipeline, PreprocessingPipeline
from .utilities.statistics import Statistics
from .utilities.loadConfigFile import loadConfigFile


[docs]def main():
    """
    This function defines the complete workflow, from dataframe generation to features extraction up to
    rules extraction
    """

    # Creating the dataframe from csv files
    data = DataframeCreator()
    #print("Converting the pkl files into csv files ...")
    #data.convertPklToCsv()
    print("Creating the dataframe from csv files ...")
    data.createDataframe()
    print("Filtering the dataframe based on the threshold ...")
    data.threshholdFiltering()

    # Loading the dataframe from the previously saved pickle file
    pickleDir = loadConfigFile().get("dirConfig").get("pklDir")
    pickleFile = loadConfigFile().get("fileConfig").get("pickledData_afterThFiltering")
    filePath = os.path.join(pickleDir, pickleFile)
    print('Loading the dataframe from the pickle file ...')
    data = pickle.load(open(filePath, 'rb'))

    # Initializing the full pipeline
    print('Initializing the pipeline ...')
    fullPipeline = FullPipeline()
    fullPipeline.initialize(data)

    # Building the missing values and the preprocessing pipelines
    missingValuesPipeline = MissingValuesPipeline()
    preprocessingPipeline = PreprocessingPipeline()
    print('Building the missing values pipeline ...')
    missingValuesPipeline.buildPipeline()
    print('Building the preprocessing pipeline ...')
    preprocessingPipeline.buildPipeline()

    # Adding the missing values and the preprocessing pipelines to the full pipeline
    print('Adding the missing values pipeline to the full pipelines ...')
    fullPipeline.addPipeline(missingValuesPipeline)
    print('Adding the preprocessing pipeline to the full pipelines ...')
    fullPipeline.addPipeline(preprocessingPipeline)

    # Fitting the dataframe to the full pipeline
    print('Fitting the dataframe to the full pipeline ...')
    data = fullPipeline.fit_transform(data)

    # Fitting the model
    model = Model(data, "decisionTree")
    print('Extracting the most important features ...')
    model.keepBestFeatures()
    print('Fitting the model ...')
    model.fit()
    print('Building the rules ...')
    model.buildRules("decisionTree.dot")


if __name__ == "__main__":
    main()