Compare commits

..

1 Commits

Author SHA1 Message Date
53a34701da Add DataFrame creation and filtering test 2020-06-10 21:50:08 +02:00
23 changed files with 71 additions and 431 deletions

3
.gitignore vendored
View File

@@ -1,4 +1,3 @@
**/__pycache__ */__pycache__
Design.org Design.org
data/*.json data/*.json
app/templates/map.html

View File

@@ -1,80 +0,0 @@
graphPaname
===========
graphPaname is a system that collects real-time data, relevant to the
COVID-19 pandemic de-escalation, from the city of Paris.
It works with 4 datasets about the de-escalation:
- Retailers with home delivery
- Additional parking places in relay parkings (parkings connected to
public transportation)
- Temporary cycling paths
- Temporary pedestrian streets
For each dataset, we offer a table with the data, and a map of Paris
with markers. Additionally, there\'s a section with photos related to
the COVID-19 pandemic.
Technologies
------------
- Flask
- Pandas
- Folium
Data sources
------------
- [Open Data](https://opendata.paris.fr/pages/home/)
- [OpenStreetMap](https://www.openstreetmap.org/)
- [Flickr](https://flickr.com)
Requirements
------------
- Nix
Installation
------------
1. Install Nix (compatible with MacOS and Linux):
``` {.shell}
curl -L https://nixos.org/nix/install | sh
```
There are alternative installation methods, if you don\'t want to pipe
curl to sh
2. Clone the repository:
``` {.shell}
git clone https://coolneng.duckdns.org/gitea/coolneng/graphPaname
```
3. Change the working directory to the project:
``` {.shell}
cd graphPaname
```
4. Enter the nix-shell:
``` {.shell}
nix-shell
```
5. Run the tests:
``` {.shell}
pytest
```
6. Execute the Flask application:
``` {.shell}
flask run
```
The website can be accessed via **localhost:5000**

4
README.org Normal file
View File

@@ -0,0 +1,4 @@
* graphPaname
This project aims to gather information about the smart city of Paris and
organize it in different plots and tables.

View File

@@ -1,11 +0,0 @@
from flask import Flask
from constants import SECRET_KEY
from flask_bootstrap import Bootstrap
app = Flask(__name__)
app.secret_key = SECRET_KEY
app.config['TEMPLATES_AUTO_RELOAD'] = True
bootstrap = Bootstrap(app)
from app import errors, routes

View File

@@ -2,13 +2,16 @@ DATASETS = [
"coronavirus-commercants-parisiens-livraison-a-domicile", "coronavirus-commercants-parisiens-livraison-a-domicile",
"deconfinement-pistes-cyclables-temporaires", "deconfinement-pistes-cyclables-temporaires",
"deconfinement-parking-relais-doublement-des-places", "deconfinement-parking-relais-doublement-des-places",
"deconfinement-rues-amenagees-pour-pietons",
] ]
DATASET_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FLICKR_URL = "https://www.flickr.com/search/?text={}" TEST_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset=deconfinement-pistes-cyclables-temporaires&rows=-1"
FILES = {
"deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json",
"deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json",
"coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json",
}
COLUMNS = { COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [ "deconfinement-pistes-cyclables-temporaires": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.statut", "fields.statut",
"record_timestamp", "record_timestamp",
@@ -18,14 +21,12 @@ COLUMNS = {
"fields.societe", "fields.societe",
"fields.nb_places_dispositif_environ", "fields.nb_places_dispositif_environ",
"fields.parcs", "fields.parcs",
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.cp", "fields.cp",
"fields.ville", "fields.ville",
"fields.adresse", "fields.adresse",
], ],
"coronavirus-commercants-parisiens-livraison-a-domicile": [ "coronavirus-commercants-parisiens-livraison-a-domicile": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.adresse", "fields.adresse",
"fields.code_postal", "fields.code_postal",
@@ -37,20 +38,4 @@ COLUMNS = {
"fields.telephone", "fields.telephone",
"fields.mail", "fields.mail",
], ],
"deconfinement-rues-amenagees-pour-pietons": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.nom_voie",
"fields.categorie",
"fields.statut",
"record_timestamp",
],
} }
CHOICES = [
("coronavirus-commercants-parisiens-livraison-a-domicile", "home-delivery"),
("deconfinement-pistes-cyclables-temporaires", "cycling-paths"),
("deconfinement-parking-relais-doublement-des-places", "relay-parking"),
("deconfinement-rues-amenagees-pour-pietons", "pedestrian-streets"),
]
SECRET_KEY = "trolaso"
COORDINATES = [48.864716, 2.349014]

View File

@@ -1,59 +0,0 @@
from re import findall
from typing import List
from bs4 import BeautifulSoup
from requests import get
from constants import FLICKR_URL, DATASET_URL
def format_url(dataset) -> str:
"""
Constructs the API's URL for the requested dataset
"""
link = DATASET_URL.format(dataset)
return link
def request_dataset(dataset):
"""
Fetches the requested dataset from opendata's API
Raises an exception if there's an HTTP error
"""
url = format_url(dataset)
response = get(url)
response.raise_for_status()
data = response.json()
return data
def request_flickr(keywords) -> str:
"""
Returns the HTML of a Flickr search
"""
search_url = FLICKR_URL.format(keywords)
result = get(search_url)
html = result.text
return html
def extract_urls(images):
"""
Creates proper URLs from the regex matches
"""
links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
formatted_urls = ["https://" + link for link in links]
return formatted_urls
def scrape_flickr(keywords) -> List[str]:
"""
Creates a list of image links from a Flickr search
"""
html = request_flickr(keywords)
soup = BeautifulSoup(html, features="html.parser")
images = soup.find_all(
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
)
links = extract_urls(images)
return links

View File

@@ -1,12 +0,0 @@
from flask import render_template
from app import app
@app.errorhandler(404)
def not_found_error(error):
return render_template("404.html"), 404
@app.errorhandler(500)
def internal_error(error):
return render_template("500.html"), 500

View File

@@ -1,12 +0,0 @@
from constants import CHOICES
from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField
class DatasetForm(FlaskForm):
"""
Web form to select a dataset
"""
dataset = SelectField(choices=CHOICES)
submit = SubmitField("Submit")

View File

@@ -1,38 +1,22 @@
from folium import Map, Marker, PolyLine from json import load
from pandas import DataFrame, json_normalize from pandas import json_normalize, DataFrame
from .constants import FILES, COLUMNS
from app.data_request import request_dataset
from constants import COLUMNS, COORDINATES def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
json = load(f)
return json
def create_dataframe(dataset) -> DataFrame: def create_dataframe(dataset) -> DataFrame:
""" """
Creates a DataFrame from a JSON response Creates a DataFrame from a JSON file
""" """
json = request_dataset(dataset) json = open_json(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",) df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset]) filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df return filtered_df
def reverse_coordinates(row):
"""
Reverses each tuples coordinates to ensure folium can parse them correctly
"""
coord = [tuple(reversed(t)) for t in row]
return coord
def create_map(df):
"""
Creates a Map with markers or lines from the DataFrame
"""
m = Map(location=COORDINATES, zoom_start=12, tiles="Stamen Terrain")
for index, row in df.iterrows():
if row["fields.geo_shape.type"] == "LineString":
coord = reverse_coordinates(row["fields.geo_shape.coordinates"])
PolyLine(locations=coord, color="blue", opacity=0.5).add_to(m)
else:
lng, lat = row["fields.geo_shape.coordinates"]
Marker(location=[lat, lng]).add_to(m)
m.save("app/templates/map.html")

View File

@@ -1,20 +0,0 @@
from app.preprocessing import create_dataframe, create_map
def create_table(df) -> str:
"""
Renders an HTML table from a DataFrame
"""
df.fillna(value=0, inplace=True)
table = df.to_html(classes=["table-striped", "table-sm", "table-responsive"])
return table
def process_data(dataset):
"""
Creates the DataFrame, produces a map and returns a table
"""
df = create_dataframe(dataset)
table = create_table(df)
create_map(df)
return table

30
app/request_datasets.py Normal file
View File

@@ -0,0 +1,30 @@
from json import dump
from requests import get
from .constants import FILES, URL
def format_url(dataset) -> str:
"""
Constructs the API's URL for the requested dataset
"""
link = URL.format(dataset)
return link
def save_json(data, dataset):
"""
Dumps the data into a JSON file
"""
with open(FILES[dataset], "w") as f:
dump(data, f, ensure_ascii=False)
def request_dataset(dataset):
"""
Fetches the requested dataset from opendata's API
"""
url = format_url(dataset)
response = get(url)
response.raise_for_status()
data = response.json()
save_json(data=data, dataset=dataset)

View File

@@ -1,37 +0,0 @@
from flask import render_template
from app import app
from app.forms import DatasetForm
from app.processing import process_data
from app.data_request import scrape_flickr
@app.route("/")
@app.route("/index")
def index():
return render_template("index.html", title="Home Page")
@app.route("/data", methods=["GET", "POST"])
def data():
form = DatasetForm()
if form.validate_on_submit():
table = process_data(form.dataset.data)
return render_template("visualization.html", title="Visualization", table=table)
return render_template("data.html", title="Data", form=form)
@app.route("/visualization")
def visualization():
return render_template("visualization.html", title="Visualization", table=table)
@app.route("/map")
def map():
return render_template("map.html")
@app.route("/photos")
def photos():
images = scrape_flickr("paris coronavirus")
return render_template("photos.html", title="Photos", images=images)

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +0,0 @@
{% extends "base.html" %}
{% block app_content %}
<h1>Sorry, we couldn't find that</h1>
<p><a href="{{ url_for('index') }}">Back</a></p>
{% endblock %}

View File

@@ -1,8 +0,0 @@
{% extends "base.html" %}
{% block app_content %}
<h1>An unexpected error has occurred</h1>
<p>The administrator has been notified!</p>
<p>If he gets too many notifications, we might replace him with an AI</p>
<p><a href="{{ url_for('index') }}">Back</a></p>
{% endblock %}

View File

@@ -1,36 +0,0 @@
{% extends 'bootstrap/base.html' %}
{% block title %}
{% if title %}{{ title }} - graphPaname{% else %}graphPaname{% endif %}
{% endblock %}
{% block styles %}
<link rel="stylesheet"
href="{{url_for('.static', filename='bootstrap.min.css')}}">
{% endblock %}
{% block navbar %}
<nav class="navbar navbar-expand-lg navbar-dark bg-primary">
<a class="navbar-brand" href="{{ url_for('index') }}">graphPaname</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarColor01" aria-controls="navbarColor01" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarColor01">
<ul class="navbar-nav mr-auto">
<li class="nav-item active">
<a class="nav-link" href="{{ url_for('index') }}">Home <span class="sr-only">(current)</span></a>
</li>
<li class="nav-link"><a href="{{ url_for('data') }}">Data</a></li>
<li class="nav-link"><a href="{{ url_for('photos') }}">Photos</a></li>
</ul>
</div>
</nav>
{% endblock %}
{% block content %}
<div class="container">
{# application content needs to be provided in the app_content block #}
{% block app_content %}{% endblock %}
</div>
{% endblock %}

View File

@@ -1,12 +0,0 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Select a dataset</h1>
<div class="row">
<div class="col-md-4">
{{ wtf.quick_form(form) }}
</div>
</div>
<br>
{% endblock %}

View File

@@ -1,26 +0,0 @@
{% extends "base.html" %}
{% block content %}
<div class="jumbotron">
<h1 id="graphPaname">graphPaname</h1>
<p>
graphPaname is a system that collects real-time data, relevant to the COVID-19 pandemic de-escalation, from the city of Paris.
</p>
<p>
It works with 4 datasets about the de-escalation:
</p>
<ul class="org-ul">
<li>Retailers with home delivery</li>
<li>Additional parking places in relay parkings (parkings connected to public transportation)</li>
<li>Temporary cycling paths</li>
<li>Temporary pedestrian streets</li>
</ul>
<p>
For each dataset, we offer a table with the data, and a map of Paris with markers. Additionally, there&rsquo;s a section with photos related to the COVID-19 pandemic.
</p>
</div>
{% endblock %}

View File

@@ -1,9 +0,0 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Photos</h1>
{% for img_path in images %}
<img src="{{img_path|safe}}" alt="Image placeholder" id="photo" style="width: 200px"/>
{% endfor %}
{% endblock %}

View File

@@ -1,15 +0,0 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Dataset visualization</h1>
<div class="row">
<div class="col-md-9">
{{ table|safe }}
</div>
<div class="col-md-1">
<iframe id="map", src="/map" width="350" height="350"></iframe>
</div>
</div>
<p><a href="{{ url_for('data') }}">Back</a></p>
{% endblock %}

0
data/.keep Normal file
View File

View File

@@ -8,10 +8,15 @@ pkgs.mkShell {
pandas pandas
requests requests
flask flask
flask-bootstrap altair
flask_wtf
folium
pytest pytest
beautifulsoup4 # Development tools
black
isort
pyflakes
python-language-server
pyls-black
pyls-isort
pyls-mypy
]; ];
} }

View File

@@ -1,33 +1,21 @@
from pandas import DataFrame from pandas import DataFrame
from requests import get from requests import get
from app.constants import URL, COLUMNS, FILES, DATASETS
from app.request_datasets import request_dataset
from app.preprocessing import create_dataframe from app.preprocessing import create_dataframe
from app.data_request import request_dataset from os import remove
from constants import COLUMNS, DATASETS, DATASET_URL, FLICKR_URL
def test_dataset_request(): def test_dataset_request():
"""
Checks that the datasets URLs are reachable
"""
for dataset in DATASETS: for dataset in DATASETS:
response = get(DATASET_URL.format(dataset)) response = get(URL.format(dataset))
assert response.status_code == 200 assert response.status_code == 200
def test_dataframe_creation(): def test_dataframe_creation():
"""
Verifes that the DataFrames are created and filtered properly
"""
for dataset in DATASETS: for dataset in DATASETS:
request_dataset(dataset)
df = create_dataframe(dataset) df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame) assert isinstance(df, DataFrame)
assert all(df.columns == COLUMNS[dataset]) assert list(df) == COLUMNS[dataset]
def test_flickr_request():
"""
Checks that Flickr search is avalaible
"""
response = get(FLICKR_URL.format("paris coronavirus"))
assert response.status_code == 200