From 65c88d13a1f04c27d16afbba4f25cb1605549ccf Mon Sep 17 00:00:00 2001
From: coolneng <akasroua@gmail.com>
Date: Wed, 27 May 2020 20:13:45 +0200
Subject: [PATCH] Add dataframe creation

---
 .gitignore           |  1 +
 app/constants.py     |  5 +++++
 app/preprocessing.py | 16 ++++++++++++++++
 3 files changed, 22 insertions(+)
 create mode 100644 app/preprocessing.py

diff --git a/.gitignore b/.gitignore
index 1e0f511..ee2c7ca 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 */__pycache__
 Design.org
+data/*.json
diff --git a/app/constants.py b/app/constants.py
index 1e1f1d2..01cf231 100644
--- a/app/constants.py
+++ b/app/constants.py
@@ -9,3 +9,8 @@ filenames = {
     "deconfinement-pistes-cyclables-temporaires": "cycling-paths",
     "deconfinement-parking-relais-doublement-des-places": "relay-parking",
 }
+files = {
+    "cycling-paths": "../data/cycling-paths.json",
+    "relay-parking": "../data/relay-parking.json",
+    "home-delivery": "../data/home-delivery.json",
+}
diff --git a/app/preprocessing.py b/app/preprocessing.py
new file mode 100644
index 0000000..8f2ac3a
--- /dev/null
+++ b/app/preprocessing.py
@@ -0,0 +1,16 @@
+from json import load
+from pandas import read_json, json_normalize, DataFrame
+from constants import files
+
+
+def open_json(dataset):
+    with open(files[dataset]) as f:
+        json = load(f)
+    return json
+
+
+def create_dataframe(dataset):
+    json = open_json(dataset)
+    data = json_normalize(data=json["records"])
+    df = DataFrame.from_dict(data=data)
+    return df