Compare commits

..

3 Commits

Author SHA1 Message Date
53a34701da
Add DataFrame creation and filtering test 2020-06-10 21:50:08 +02:00
7044dda659
Remove FILENAMES constant 2020-06-10 21:49:58 +02:00
3e811338c4
Filter relevant columns on DataFrames 2020-06-10 21:48:44 +02:00
4 changed files with 48 additions and 40 deletions

View File

@ -4,42 +4,38 @@ DATASETS = [
"deconfinement-parking-relais-doublement-des-places", "deconfinement-parking-relais-doublement-des-places",
] ]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FILENAMES = { TEST_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset=deconfinement-pistes-cyclables-temporaires&rows=-1"
"coronavirus-commercants-parisiens-livraison-a-domicile": "home-delivery",
"deconfinement-pistes-cyclables-temporaires": "cycling-paths",
"deconfinement-parking-relais-doublement-des-places": "relay-parking",
}
FILES = { FILES = {
"cycling-paths": "data/cycling-paths.json", "deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json",
"relay-parking": "data/relay-parking.json", "deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json",
"home-delivery": "data/home-delivery.json", "coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json",
} }
COLUMNS = { COLUMNS = {
"cycling-paths": [ "deconfinement-pistes-cyclables-temporaires": [
["fields", "geo_shape", "coordinates"], "fields.geo_shape.coordinates",
"statut", "fields.statut",
"record_timestamp", "record_timestamp",
"complement", "fields.complement",
], ],
"relay-parking": [ "deconfinement-parking-relais-doublement-des-places": [
"societe", "fields.societe",
"nb_places_dispositif_environ", "fields.nb_places_dispositif_environ",
"parcs", "fields.parcs",
"geo_shape", "fields.geo_shape.coordinates",
"cp", "fields.cp",
"ville", "fields.ville",
"adresse", "fields.adresse",
], ],
"home-delivery": [ "coronavirus-commercants-parisiens-livraison-a-domicile": [
"geo_shape", "fields.geo_shape.coordinates",
"adresse", "fields.adresse",
"code_postal", "fields.code_postal",
"nom_du_commerce", "fields.nom_du_commerce",
"type_du_commerce", "fields.type_de_commerce",
"site_internet", "fields.site_internet",
"record_timestamp", "record_timestamp",
"precisions", "fields.precisions",
"telephone", "fields.telephone",
"mail", "fields.mail",
], ],
} }

View File

@ -1,6 +1,6 @@
from json import load from json import load
from pandas import json_normalize, DataFrame, set_option from pandas import json_normalize, DataFrame
from constants import FILES, COLUMNS from .constants import FILES, COLUMNS
def open_json(dataset) -> dict: def open_json(dataset) -> dict:
@ -18,4 +18,5 @@ def create_dataframe(dataset) -> DataFrame:
""" """
json = open_json(dataset) json = open_json(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",) df = json_normalize(data=json, record_path=["records"], errors="ignore",)
return df filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df

View File

@ -1,6 +1,6 @@
from json import dump from json import dump
from requests import get from requests import get
from constants import FILENAMES, URL from .constants import FILES, URL
def format_url(dataset) -> str: def format_url(dataset) -> str:
@ -15,8 +15,7 @@ def save_json(data, dataset):
""" """
Dumps the data into a JSON file Dumps the data into a JSON file
""" """
data_dir = "data/" with open(FILES[dataset], "w") as f:
with open(data_dir + FILENAMES[dataset] + ".json", "w") as f:
dump(data, f, ensure_ascii=False) dump(data, f, ensure_ascii=False)

View File

@ -1,9 +1,21 @@
from app.constants import files
from app.preprocessing import create_dataframe
from pandas import DataFrame from pandas import DataFrame
from requests import get
from app.constants import URL, COLUMNS, FILES, DATASETS
from app.request_datasets import request_dataset
from app.preprocessing import create_dataframe
from os import remove
def test_dataset_request():
for dataset in DATASETS:
response = get(URL.format(dataset))
assert response.status_code == 200
def test_dataframe_creation(): def test_dataframe_creation():
for file in files.keys(): for dataset in DATASETS:
df = create_dataframe(file) request_dataset(dataset)
df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame) assert isinstance(df, DataFrame)
assert list(df) == COLUMNS[dataset]