Add preprocessing module

This commit is contained in:
2020-10-28 19:29:50 +01:00
parent f919985414
commit 127e13d370
4 changed files with 102 additions and 1 deletions

26
src/preprocessing.py Normal file
View File

@@ -0,0 +1,26 @@
from pandas import read_csv
from sklearn.preprocessing import LabelEncoder
def replace_values(df):
columns = ["BI-RADS", "Margin", "Density", "Age"]
for column in columns:
df[column].fillna(value=df[column].mean(), inplace=True)
return df
def process_na(df, action):
if action == "drop":
return df.dropna()
return replace_values(df)
def encode_columns(df):
encoder = LabelEncoder()
encoder.fit(df["Shape"])
def parse_data(source, action):
df = read_csv(filepath_or_buffer=source, na_values="?")
processed_df = process_na(df, action)
return processed_df

1
src/processing.py Normal file
View File

@@ -0,0 +1 @@
from preprocessing import parse_data