Add preprocessing module
This commit is contained in:
26
src/preprocessing.py
Normal file
26
src/preprocessing.py
Normal file
@@ -0,0 +1,26 @@
|
||||
from pandas import read_csv
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
|
||||
|
||||
def replace_values(df):
|
||||
columns = ["BI-RADS", "Margin", "Density", "Age"]
|
||||
for column in columns:
|
||||
df[column].fillna(value=df[column].mean(), inplace=True)
|
||||
return df
|
||||
|
||||
|
||||
def process_na(df, action):
|
||||
if action == "drop":
|
||||
return df.dropna()
|
||||
return replace_values(df)
|
||||
|
||||
|
||||
def encode_columns(df):
|
||||
encoder = LabelEncoder()
|
||||
encoder.fit(df["Shape"])
|
||||
|
||||
|
||||
def parse_data(source, action):
|
||||
df = read_csv(filepath_or_buffer=source, na_values="?")
|
||||
processed_df = process_na(df, action)
|
||||
return processed_df
|
||||
1
src/processing.py
Normal file
1
src/processing.py
Normal file
@@ -0,0 +1 @@
|
||||
from preprocessing import parse_data
|
||||
Reference in New Issue
Block a user