Subset the dataframe columns after case filtering
This commit is contained in:
		
							parent
							
								
									e15685d575
								
							
						
					
					
						commit
						8bcc7fa7bc
					
				@ -22,30 +22,17 @@ def process_na(df, action):
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
def filter_dataframe(df):
 | 
					def filter_dataframe(df):
 | 
				
			||||||
    relevant_columns = [
 | 
					    relevant_columns = [
 | 
				
			||||||
        "HORA",
 | 
					 | 
				
			||||||
        "DIASEMANA",
 | 
					 | 
				
			||||||
        "COMUNIDAD_AUTONOMA",
 | 
					 | 
				
			||||||
        "ISLA",
 | 
					 | 
				
			||||||
        "TOT_HERIDOS_LEVES",
 | 
					        "TOT_HERIDOS_LEVES",
 | 
				
			||||||
        "TOT_HERIDOS_GRAVES",
 | 
					        "TOT_HERIDOS_GRAVES",
 | 
				
			||||||
        "TOT_VEHICULOS_IMPLICADOS",
 | 
					        "TOT_VEHICULOS_IMPLICADOS",
 | 
				
			||||||
        "TOT_MUERTOS",
 | 
					        "TOT_MUERTOS",
 | 
				
			||||||
        "TIPO_VIA",
 | 
					 | 
				
			||||||
        "LUMINOSIDAD",
 | 
					 | 
				
			||||||
        "FACTORES_ATMOSFERICOS",
 | 
					 | 
				
			||||||
    ]
 | 
					    ]
 | 
				
			||||||
    filtered_df = df.filter(items=relevant_columns)
 | 
					    filtered_df = df.filter(items=relevant_columns)
 | 
				
			||||||
    return filtered_df
 | 
					    return filtered_df
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def normalize_numerical_values(df):
 | 
					def normalize_numerical_values(df):
 | 
				
			||||||
    cols = [
 | 
					    filtered_df = filter_dataframe(df=df)
 | 
				
			||||||
        "TOT_HERIDOS_LEVES",
 | 
					 | 
				
			||||||
        "TOT_HERIDOS_GRAVES",
 | 
					 | 
				
			||||||
        "TOT_VEHICULOS_IMPLICADOS",
 | 
					 | 
				
			||||||
        "TOT_MUERTOS",
 | 
					 | 
				
			||||||
    ]
 | 
					 | 
				
			||||||
    filtered_df = df.filter(items=cols)
 | 
					 | 
				
			||||||
    normalized_data = normalize(X=filtered_df)
 | 
					    normalized_data = normalize(X=filtered_df)
 | 
				
			||||||
    normalized_df = DataFrame(data=normalized_data, columns=filtered_df.columns)
 | 
					    normalized_df = DataFrame(data=normalized_data, columns=filtered_df.columns)
 | 
				
			||||||
    df.update(normalized_df)
 | 
					    df.update(normalized_df)
 | 
				
			||||||
@ -55,6 +42,5 @@ def normalize_numerical_values(df):
 | 
				
			|||||||
def parse_data(source, action):
 | 
					def parse_data(source, action):
 | 
				
			||||||
    df = read_csv(filepath_or_buffer=source, na_values="?")
 | 
					    df = read_csv(filepath_or_buffer=source, na_values="?")
 | 
				
			||||||
    processed_df = process_na(df=df, action=action)
 | 
					    processed_df = process_na(df=df, action=action)
 | 
				
			||||||
    filtered_df = filter_dataframe(df=processed_df)
 | 
					    normalized_df = normalize_numerical_values(df=processed_df)
 | 
				
			||||||
    normalized_df = normalize_numerical_values(df=filtered_df)
 | 
					 | 
				
			||||||
    return normalized_df
 | 
					    return normalized_df
 | 
				
			||||||
 | 
				
			|||||||
@ -7,7 +7,7 @@ from seaborn import heatmap, set_style, set_theme, pairplot
 | 
				
			|||||||
from sklearn.metrics import silhouette_score, calinski_harabasz_score
 | 
					from sklearn.metrics import silhouette_score, calinski_harabasz_score
 | 
				
			||||||
from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
 | 
					from sklearn.cluster import KMeans, Birch, AffinityPropagation, MeanShift, DBSCAN
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from preprocessing import parse_data
 | 
					from preprocessing import parse_data, filter_dataframe
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def choose_model(model):
 | 
					def choose_model(model):
 | 
				
			||||||
@ -160,9 +160,10 @@ def main():
 | 
				
			|||||||
    data = parse_data(source="data/accidentes_2013.csv", action=str(argv[1]))
 | 
					    data = parse_data(source="data/accidentes_2013.csv", action=str(argv[1]))
 | 
				
			||||||
    individual_result, complete_results = create_result_dataframes()
 | 
					    individual_result, complete_results = create_result_dataframes()
 | 
				
			||||||
    case_data = construct_case(df=data, choice=case)
 | 
					    case_data = construct_case(df=data, choice=case)
 | 
				
			||||||
 | 
					    filtered_data = filter_dataframe(df=case_data)
 | 
				
			||||||
    for model in models:
 | 
					    for model in models:
 | 
				
			||||||
        model_results = predict_data(
 | 
					        model_results = predict_data(
 | 
				
			||||||
            data=case_data,
 | 
					            data=filtered_data,
 | 
				
			||||||
            model=model,
 | 
					            model=model,
 | 
				
			||||||
            results=individual_result,
 | 
					            results=individual_result,
 | 
				
			||||||
            sample=sample,
 | 
					            sample=sample,
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user