Compare commits

..

No commits in common. "53a34701da18b3e37830e1dda1b7f20902911d02" and "7eba74b3efd6491b7118e9471f2751107dbd06f8" have entirely different histories.

4 changed files with 40 additions and 48 deletions

View File

@ -4,38 +4,42 @@ DATASETS = [
"deconfinement-parking-relais-doublement-des-places", "deconfinement-parking-relais-doublement-des-places",
] ]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
TEST_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset=deconfinement-pistes-cyclables-temporaires&rows=-1" FILENAMES = {
"coronavirus-commercants-parisiens-livraison-a-domicile": "home-delivery",
"deconfinement-pistes-cyclables-temporaires": "cycling-paths",
"deconfinement-parking-relais-doublement-des-places": "relay-parking",
}
FILES = { FILES = {
"deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json", "cycling-paths": "data/cycling-paths.json",
"deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json", "relay-parking": "data/relay-parking.json",
"coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json", "home-delivery": "data/home-delivery.json",
} }
COLUMNS = { COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [ "cycling-paths": [
"fields.geo_shape.coordinates", ["fields", "geo_shape", "coordinates"],
"fields.statut", "statut",
"record_timestamp", "record_timestamp",
"fields.complement", "complement",
], ],
"deconfinement-parking-relais-doublement-des-places": [ "relay-parking": [
"fields.societe", "societe",
"fields.nb_places_dispositif_environ", "nb_places_dispositif_environ",
"fields.parcs", "parcs",
"fields.geo_shape.coordinates", "geo_shape",
"fields.cp", "cp",
"fields.ville", "ville",
"fields.adresse", "adresse",
], ],
"coronavirus-commercants-parisiens-livraison-a-domicile": [ "home-delivery": [
"fields.geo_shape.coordinates", "geo_shape",
"fields.adresse", "adresse",
"fields.code_postal", "code_postal",
"fields.nom_du_commerce", "nom_du_commerce",
"fields.type_de_commerce", "type_du_commerce",
"fields.site_internet", "site_internet",
"record_timestamp", "record_timestamp",
"fields.precisions", "precisions",
"fields.telephone", "telephone",
"fields.mail", "mail",
], ],
} }

View File

@ -1,6 +1,6 @@
from json import load from json import load
from pandas import json_normalize, DataFrame from pandas import json_normalize, DataFrame, set_option
from .constants import FILES, COLUMNS from constants import FILES, COLUMNS
def open_json(dataset) -> dict: def open_json(dataset) -> dict:
@ -18,5 +18,4 @@ def create_dataframe(dataset) -> DataFrame:
""" """
json = open_json(dataset) json = open_json(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",) df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset]) return df
return filtered_df

View File

@ -1,6 +1,6 @@
from json import dump from json import dump
from requests import get from requests import get
from .constants import FILES, URL from constants import FILENAMES, URL
def format_url(dataset) -> str: def format_url(dataset) -> str:
@ -15,7 +15,8 @@ def save_json(data, dataset):
""" """
Dumps the data into a JSON file Dumps the data into a JSON file
""" """
with open(FILES[dataset], "w") as f: data_dir = "data/"
with open(data_dir + FILENAMES[dataset] + ".json", "w") as f:
dump(data, f, ensure_ascii=False) dump(data, f, ensure_ascii=False)

View File

@ -1,21 +1,9 @@
from pandas import DataFrame from app.constants import files
from requests import get
from app.constants import URL, COLUMNS, FILES, DATASETS
from app.request_datasets import request_dataset
from app.preprocessing import create_dataframe from app.preprocessing import create_dataframe
from os import remove from pandas import DataFrame
def test_dataset_request():
for dataset in DATASETS:
response = get(URL.format(dataset))
assert response.status_code == 200
def test_dataframe_creation(): def test_dataframe_creation():
for dataset in DATASETS: for file in files.keys():
request_dataset(dataset) df = create_dataframe(file)
df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame) assert isinstance(df, DataFrame)
assert list(df) == COLUMNS[dataset]