Compare commits

..

31 Commits

Author SHA1 Message Date
a53d79fe5f Fix typo on readme 2020-07-16 04:09:35 +02:00
b442532768 Reload map template on form submit 2020-06-15 20:49:10 +02:00
f15ff39090 Fix reverse_coordinates functionality 2020-06-15 20:02:20 +02:00
af079f7907 Replace README.org with README.md 2020-06-15 19:51:09 +02:00
57c536e909 Update the home page with more info 2020-06-15 19:46:22 +02:00
34b9f76cf1 Add installation steps and usage to the README 2020-06-15 19:42:20 +02:00
5c32ad611b Clean up dev tools and unused dependencies 2020-06-15 02:15:11 +02:00
44785255b0 Add Flickr avalaibility test 2020-06-15 02:14:04 +02:00
bb610f3935 Create lines instead of markers whenever necessary 2020-06-15 02:05:45 +02:00
3af0605449 Document processing module 2020-06-15 02:05:13 +02:00
8307a54fe0 Scrape for "paris coronavirus" images 2020-06-15 01:19:13 +02:00
da4d9b1dad Correct marker position on map 2020-06-15 01:18:44 +02:00
b33c6991e9 Change visualization layout to two columns 2020-06-15 01:18:27 +02:00
ec1729e92c Add photo visualization page 2020-06-14 21:47:31 +02:00
dd7f1bab8d Move web scraping logic into data_request 2020-06-14 21:29:42 +02:00
b74ceb05c8 Merge branch 'basset' into coolneng 2020-06-14 21:25:09 +02:00
23dea062e5 Scrape Flickr images 2020-06-14 21:24:27 +02:00
bace83dc3a Overhaul visualization page and dependency cleanup 2020-06-14 19:07:30 +02:00
58c0f4897d Testing del scraping de la fuente de imágenes. 2020-06-14 18:33:05 +02:00
dd2538f1ea Render table and folium map as iframe 2020-06-14 00:58:52 +02:00
a36cc719ef Shorten form choices labels 2020-06-14 00:14:09 +02:00
7a459da204 Fetch JSON into variable instead of file 2020-06-13 21:58:17 +02:00
3ca5f21774 Remove required data validator from form 2020-06-13 21:23:20 +02:00
7011a8f405 Add processing module 2020-06-13 20:57:21 +02:00
ffe9009d1b Add pedestrian streets dataset 2020-06-13 18:23:34 +02:00
4f5013460f Add custom error pages 2020-06-12 22:50:08 +02:00
bec6b19d1c Add dataset selection with validation 2020-06-12 22:49:49 +02:00
e83f7a0271 Create web app blueprint with Flask 2020-06-12 20:03:25 +02:00
a20dab0053 Add dataframe column assertion 2020-06-12 19:21:50 +02:00
6849078d88 Change location of constants and dependencies 2020-06-10 23:29:56 +02:00
87fa53f6f8 Add DataFrame creation and filtering test 2020-06-10 22:13:32 +02:00
23 changed files with 431 additions and 71 deletions

3
.gitignore vendored
View File

@@ -1,3 +1,4 @@
*/__pycache__
**/__pycache__
Design.org
data/*.json
app/templates/map.html

80
README.md Normal file
View File

@@ -0,0 +1,80 @@
graphPaname
===========
graphPaname is a system that collects real-time data, relevant to the
COVID-19 pandemic de-escalation, from the city of Paris.
It works with 4 datasets about the de-escalation:
- Retailers with home delivery
- Additional parking places in relay parkings (parkings connected to
public transportation)
- Temporary cycling paths
- Temporary pedestrian streets
For each dataset, we offer a table with the data, and a map of Paris
with markers. Additionally, there\'s a section with photos related to
the COVID-19 pandemic.
Technologies
------------
- Flask
- Pandas
- Folium
Data sources
------------
- [Open Data](https://opendata.paris.fr/pages/home/)
- [OpenStreetMap](https://www.openstreetmap.org/)
- [Flickr](https://flickr.com)
Requirements
------------
- Nix
Installation
------------
1. Install Nix (compatible with MacOS and Linux):
``` {.shell}
curl -L https://nixos.org/nix/install | sh
```
There are alternative installation methods, if you don\'t want to pipe
curl to sh
2. Clone the repository:
``` {.shell}
git clone https://coolneng.duckdns.org/gitea/coolneng/graphPaname
```
3. Change the working directory to the project:
``` {.shell}
cd graphPaname
```
4. Enter the nix-shell:
``` {.shell}
nix-shell
```
5. Run the tests:
``` {.shell}
pytest
```
6. Execute the Flask application:
``` {.shell}
flask run
```
The website can be accessed via **localhost:5000**

View File

@@ -1,4 +0,0 @@
* graphPaname
This project aims to gather information about the smart city of Paris and
organize it in different plots and tables.

View File

@@ -0,0 +1,11 @@
from flask import Flask
from constants import SECRET_KEY
from flask_bootstrap import Bootstrap
app = Flask(__name__)
app.secret_key = SECRET_KEY
app.config['TEMPLATES_AUTO_RELOAD'] = True
bootstrap = Bootstrap(app)
from app import errors, routes

59
app/data_request.py Normal file
View File

@@ -0,0 +1,59 @@
from re import findall
from typing import List
from bs4 import BeautifulSoup
from requests import get
from constants import FLICKR_URL, DATASET_URL
def format_url(dataset) -> str:
"""
Constructs the API's URL for the requested dataset
"""
link = DATASET_URL.format(dataset)
return link
def request_dataset(dataset):
"""
Fetches the requested dataset from opendata's API
Raises an exception if there's an HTTP error
"""
url = format_url(dataset)
response = get(url)
response.raise_for_status()
data = response.json()
return data
def request_flickr(keywords) -> str:
"""
Returns the HTML of a Flickr search
"""
search_url = FLICKR_URL.format(keywords)
result = get(search_url)
html = result.text
return html
def extract_urls(images):
"""
Creates proper URLs from the regex matches
"""
links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
formatted_urls = ["https://" + link for link in links]
return formatted_urls
def scrape_flickr(keywords) -> List[str]:
"""
Creates a list of image links from a Flickr search
"""
html = request_flickr(keywords)
soup = BeautifulSoup(html, features="html.parser")
images = soup.find_all(
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
)
links = extract_urls(images)
return links

12
app/errors.py Normal file
View File

@@ -0,0 +1,12 @@
from flask import render_template
from app import app
@app.errorhandler(404)
def not_found_error(error):
return render_template("404.html"), 404
@app.errorhandler(500)
def internal_error(error):
return render_template("500.html"), 500

12
app/forms.py Normal file
View File

@@ -0,0 +1,12 @@
from constants import CHOICES
from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField
class DatasetForm(FlaskForm):
"""
Web form to select a dataset
"""
dataset = SelectField(choices=CHOICES)
submit = SubmitField("Submit")

View File

@@ -1,22 +1,38 @@
from json import load
from pandas import json_normalize, DataFrame
from .constants import FILES, COLUMNS
from folium import Map, Marker, PolyLine
from pandas import DataFrame, json_normalize
def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
json = load(f)
return json
from app.data_request import request_dataset
from constants import COLUMNS, COORDINATES
def create_dataframe(dataset) -> DataFrame:
"""
Creates a DataFrame from a JSON file
Creates a DataFrame from a JSON response
"""
json = open_json(dataset)
json = request_dataset(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df
def reverse_coordinates(row):
"""
Reverses each tuples coordinates to ensure folium can parse them correctly
"""
coord = [tuple(reversed(t)) for t in row]
return coord
def create_map(df):
"""
Creates a Map with markers or lines from the DataFrame
"""
m = Map(location=COORDINATES, zoom_start=12, tiles="Stamen Terrain")
for index, row in df.iterrows():
if row["fields.geo_shape.type"] == "LineString":
coord = reverse_coordinates(row["fields.geo_shape.coordinates"])
PolyLine(locations=coord, color="blue", opacity=0.5).add_to(m)
else:
lng, lat = row["fields.geo_shape.coordinates"]
Marker(location=[lat, lng]).add_to(m)
m.save("app/templates/map.html")

20
app/processing.py Normal file
View File

@@ -0,0 +1,20 @@
from app.preprocessing import create_dataframe, create_map
def create_table(df) -> str:
"""
Renders an HTML table from a DataFrame
"""
df.fillna(value=0, inplace=True)
table = df.to_html(classes=["table-striped", "table-sm", "table-responsive"])
return table
def process_data(dataset):
"""
Creates the DataFrame, produces a map and returns a table
"""
df = create_dataframe(dataset)
table = create_table(df)
create_map(df)
return table

View File

@@ -1,30 +0,0 @@
from json import dump
from requests import get
from .constants import FILES, URL
def format_url(dataset) -> str:
"""
Constructs the API's URL for the requested dataset
"""
link = URL.format(dataset)
return link
def save_json(data, dataset):
"""
Dumps the data into a JSON file
"""
with open(FILES[dataset], "w") as f:
dump(data, f, ensure_ascii=False)
def request_dataset(dataset):
"""
Fetches the requested dataset from opendata's API
"""
url = format_url(dataset)
response = get(url)
response.raise_for_status()
data = response.json()
save_json(data=data, dataset=dataset)

37
app/routes.py Normal file
View File

@@ -0,0 +1,37 @@
from flask import render_template
from app import app
from app.forms import DatasetForm
from app.processing import process_data
from app.data_request import scrape_flickr
@app.route("/")
@app.route("/index")
def index():
return render_template("index.html", title="Home Page")
@app.route("/data", methods=["GET", "POST"])
def data():
form = DatasetForm()
if form.validate_on_submit():
table = process_data(form.dataset.data)
return render_template("visualization.html", title="Visualization", table=table)
return render_template("data.html", title="Data", form=form)
@app.route("/visualization")
def visualization():
return render_template("visualization.html", title="Visualization", table=table)
@app.route("/map")
def map():
return render_template("map.html")
@app.route("/photos")
def photos():
images = scrape_flickr("paris coronavirus")
return render_template("photos.html", title="Photos", images=images)

12
app/static/bootstrap.min.css vendored Normal file

File diff suppressed because one or more lines are too long

6
app/templates/404.html Normal file
View File

@@ -0,0 +1,6 @@
{% extends "base.html" %}
{% block app_content %}
<h1>Sorry, we couldn't find that</h1>
<p><a href="{{ url_for('index') }}">Back</a></p>
{% endblock %}

8
app/templates/500.html Normal file
View File

@@ -0,0 +1,8 @@
{% extends "base.html" %}
{% block app_content %}
<h1>An unexpected error has occurred</h1>
<p>The administrator has been notified!</p>
<p>If he gets too many notifications, we might replace him with an AI</p>
<p><a href="{{ url_for('index') }}">Back</a></p>
{% endblock %}

36
app/templates/base.html Normal file
View File

@@ -0,0 +1,36 @@
{% extends 'bootstrap/base.html' %}
{% block title %}
{% if title %}{{ title }} - graphPaname{% else %}graphPaname{% endif %}
{% endblock %}
{% block styles %}
<link rel="stylesheet"
href="{{url_for('.static', filename='bootstrap.min.css')}}">
{% endblock %}
{% block navbar %}
<nav class="navbar navbar-expand-lg navbar-dark bg-primary">
<a class="navbar-brand" href="{{ url_for('index') }}">graphPaname</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarColor01" aria-controls="navbarColor01" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div class="collapse navbar-collapse" id="navbarColor01">
<ul class="navbar-nav mr-auto">
<li class="nav-item active">
<a class="nav-link" href="{{ url_for('index') }}">Home <span class="sr-only">(current)</span></a>
</li>
<li class="nav-link"><a href="{{ url_for('data') }}">Data</a></li>
<li class="nav-link"><a href="{{ url_for('photos') }}">Photos</a></li>
</ul>
</div>
</nav>
{% endblock %}
{% block content %}
<div class="container">
{# application content needs to be provided in the app_content block #}
{% block app_content %}{% endblock %}
</div>
{% endblock %}

12
app/templates/data.html Normal file
View File

@@ -0,0 +1,12 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Select a dataset</h1>
<div class="row">
<div class="col-md-4">
{{ wtf.quick_form(form) }}
</div>
</div>
<br>
{% endblock %}

26
app/templates/index.html Normal file
View File

@@ -0,0 +1,26 @@
{% extends "base.html" %}
{% block content %}
<div class="jumbotron">
<h1 id="graphPaname">graphPaname</h1>
<p>
graphPaname is a system that collects real-time data, relevant to the COVID-19 pandemic de-escalation, from the city of Paris.
</p>
<p>
It works with 4 datasets about the de-escalation:
</p>
<ul class="org-ul">
<li>Retailers with home delivery</li>
<li>Additional parking places in relay parkings (parkings connected to public transportation)</li>
<li>Temporary cycling paths</li>
<li>Temporary pedestrian streets</li>
</ul>
<p>
For each dataset, we offer a table with the data, and a map of Paris with markers. Additionally, there&rsquo;s a section with photos related to the COVID-19 pandemic.
</p>
</div>
{% endblock %}

View File

@@ -0,0 +1,9 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Photos</h1>
{% for img_path in images %}
<img src="{{img_path|safe}}" alt="Image placeholder" id="photo" style="width: 200px"/>
{% endfor %}
{% endblock %}

View File

@@ -0,0 +1,15 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Dataset visualization</h1>
<div class="row">
<div class="col-md-9">
{{ table|safe }}
</div>
<div class="col-md-1">
<iframe id="map", src="/map" width="350" height="350"></iframe>
</div>
</div>
<p><a href="{{ url_for('data') }}">Back</a></p>
{% endblock %}

View File

@@ -2,16 +2,13 @@ DATASETS = [
"coronavirus-commercants-parisiens-livraison-a-domicile",
"deconfinement-pistes-cyclables-temporaires",
"deconfinement-parking-relais-doublement-des-places",
"deconfinement-rues-amenagees-pour-pietons",
]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
TEST_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset=deconfinement-pistes-cyclables-temporaires&rows=-1"
FILES = {
"deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json",
"deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json",
"coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json",
}
DATASET_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FLICKR_URL = "https://www.flickr.com/search/?text={}"
COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.statut",
"record_timestamp",
@@ -21,12 +18,14 @@ COLUMNS = {
"fields.societe",
"fields.nb_places_dispositif_environ",
"fields.parcs",
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.cp",
"fields.ville",
"fields.adresse",
],
"coronavirus-commercants-parisiens-livraison-a-domicile": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.adresse",
"fields.code_postal",
@@ -38,4 +37,20 @@ COLUMNS = {
"fields.telephone",
"fields.mail",
],
"deconfinement-rues-amenagees-pour-pietons": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.nom_voie",
"fields.categorie",
"fields.statut",
"record_timestamp",
],
}
CHOICES = [
("coronavirus-commercants-parisiens-livraison-a-domicile", "home-delivery"),
("deconfinement-pistes-cyclables-temporaires", "cycling-paths"),
("deconfinement-parking-relais-doublement-des-places", "relay-parking"),
("deconfinement-rues-amenagees-pour-pietons", "pedestrian-streets"),
]
SECRET_KEY = "trolaso"
COORDINATES = [48.864716, 2.349014]

View File

View File

@@ -8,15 +8,10 @@ pkgs.mkShell {
pandas
requests
flask
altair
flask-bootstrap
flask_wtf
folium
pytest
# Development tools
black
isort
pyflakes
python-language-server
pyls-black
pyls-isort
pyls-mypy
beautifulsoup4
];
}

View File

@@ -1,21 +1,33 @@
from pandas import DataFrame
from requests import get
from app.constants import URL, COLUMNS, FILES, DATASETS
from app.request_datasets import request_dataset
from app.preprocessing import create_dataframe
from os import remove
from app.data_request import request_dataset
from constants import COLUMNS, DATASETS, DATASET_URL, FLICKR_URL
def test_dataset_request():
"""
Checks that the datasets URLs are reachable
"""
for dataset in DATASETS:
response = get(URL.format(dataset))
response = get(DATASET_URL.format(dataset))
assert response.status_code == 200
def test_dataframe_creation():
"""
Verifes that the DataFrames are created and filtered properly
"""
for dataset in DATASETS:
request_dataset(dataset)
df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame)
assert list(df) == COLUMNS[dataset]
assert all(df.columns == COLUMNS[dataset])
def test_flickr_request():
"""
Checks that Flickr search is avalaible
"""
response = get(FLICKR_URL.format("paris coronavirus"))
assert response.status_code == 200