Compare commits

...

25 Commits

Author SHA1 Message Date
a53d79fe5f Fix typo on readme 2020-07-16 04:09:35 +02:00
b442532768 Reload map template on form submit 2020-06-15 20:49:10 +02:00
f15ff39090 Fix reverse_coordinates functionality 2020-06-15 20:02:20 +02:00
af079f7907 Replace README.org with README.md 2020-06-15 19:51:09 +02:00
57c536e909 Update the home page with more info 2020-06-15 19:46:22 +02:00
34b9f76cf1 Add installation steps and usage to the README 2020-06-15 19:42:20 +02:00
5c32ad611b Clean up dev tools and unused dependencies 2020-06-15 02:15:11 +02:00
44785255b0 Add Flickr avalaibility test 2020-06-15 02:14:04 +02:00
bb610f3935 Create lines instead of markers whenever necessary 2020-06-15 02:05:45 +02:00
3af0605449 Document processing module 2020-06-15 02:05:13 +02:00
8307a54fe0 Scrape for "paris coronavirus" images 2020-06-15 01:19:13 +02:00
da4d9b1dad Correct marker position on map 2020-06-15 01:18:44 +02:00
b33c6991e9 Change visualization layout to two columns 2020-06-15 01:18:27 +02:00
ec1729e92c Add photo visualization page 2020-06-14 21:47:31 +02:00
dd7f1bab8d Move web scraping logic into data_request 2020-06-14 21:29:42 +02:00
b74ceb05c8 Merge branch 'basset' into coolneng 2020-06-14 21:25:09 +02:00
23dea062e5 Scrape Flickr images 2020-06-14 21:24:27 +02:00
bace83dc3a Overhaul visualization page and dependency cleanup 2020-06-14 19:07:30 +02:00
58c0f4897d Testing del scraping de la fuente de imágenes. 2020-06-14 18:33:05 +02:00
dd2538f1ea Render table and folium map as iframe 2020-06-14 00:58:52 +02:00
a36cc719ef Shorten form choices labels 2020-06-14 00:14:09 +02:00
7a459da204 Fetch JSON into variable instead of file 2020-06-13 21:58:17 +02:00
3ca5f21774 Remove required data validator from form 2020-06-13 21:23:20 +02:00
7011a8f405 Add processing module 2020-06-13 20:57:21 +02:00
ffe9009d1b Add pedestrian streets dataset 2020-06-13 18:23:34 +02:00
17 changed files with 260 additions and 60 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
**/__pycache__
Design.org
data/*.json
app/templates/map.html

80
README.md Normal file
View File

@@ -0,0 +1,80 @@
graphPaname
===========
graphPaname is a system that collects real-time data, relevant to the
COVID-19 pandemic de-escalation, from the city of Paris.
It works with 4 datasets about the de-escalation:
- Retailers with home delivery
- Additional parking places in relay parkings (parkings connected to
public transportation)
- Temporary cycling paths
- Temporary pedestrian streets
For each dataset, we offer a table with the data, and a map of Paris
with markers. Additionally, there\'s a section with photos related to
the COVID-19 pandemic.
Technologies
------------
- Flask
- Pandas
- Folium
Data sources
------------
- [Open Data](https://opendata.paris.fr/pages/home/)
- [OpenStreetMap](https://www.openstreetmap.org/)
- [Flickr](https://flickr.com)
Requirements
------------
- Nix
Installation
------------
1. Install Nix (compatible with MacOS and Linux):
``` {.shell}
curl -L https://nixos.org/nix/install | sh
```
There are alternative installation methods, if you don\'t want to pipe
curl to sh
2. Clone the repository:
``` {.shell}
git clone https://coolneng.duckdns.org/gitea/coolneng/graphPaname
```
3. Change the working directory to the project:
``` {.shell}
cd graphPaname
```
4. Enter the nix-shell:
``` {.shell}
nix-shell
```
5. Run the tests:
``` {.shell}
pytest
```
6. Execute the Flask application:
``` {.shell}
flask run
```
The website can be accessed via **localhost:5000**

View File

@@ -1,4 +0,0 @@
* graphPaname
This project aims to gather information about the smart city of Paris and
organize it in different plots and tables.

View File

@@ -5,6 +5,7 @@ from flask_bootstrap import Bootstrap
app = Flask(__name__)
app.secret_key = SECRET_KEY
app.config['TEMPLATES_AUTO_RELOAD'] = True
bootstrap = Bootstrap(app)
from app import errors, routes

View File

@@ -1,26 +1,20 @@
from json import dump
from re import findall
from typing import List
from bs4 import BeautifulSoup
from requests import get
from constants import FILES, URL
from constants import FLICKR_URL, DATASET_URL
def format_url(dataset) -> str:
"""
Constructs the API's URL for the requested dataset
"""
link = URL.format(dataset)
link = DATASET_URL.format(dataset)
return link
def save_json(data, dataset):
"""
Dumps the data into a JSON file
"""
with open(FILES[dataset], "w") as f:
dump(data, f, ensure_ascii=False)
def request_dataset(dataset):
"""
Fetches the requested dataset from opendata's API
@@ -30,4 +24,36 @@ def request_dataset(dataset):
response = get(url)
response.raise_for_status()
data = response.json()
save_json(data=data, dataset=dataset)
return data
def request_flickr(keywords) -> str:
"""
Returns the HTML of a Flickr search
"""
search_url = FLICKR_URL.format(keywords)
result = get(search_url)
html = result.text
return html
def extract_urls(images):
"""
Creates proper URLs from the regex matches
"""
links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
formatted_urls = ["https://" + link for link in links]
return formatted_urls
def scrape_flickr(keywords) -> List[str]:
"""
Creates a list of image links from a Flickr search
"""
html = request_flickr(keywords)
soup = BeautifulSoup(html, features="html.parser")
images = soup.find_all(
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
)
links = extract_urls(images)
return links

View File

@@ -1,9 +1,12 @@
from constants import DATASETS
from constants import CHOICES
from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField
from wtforms.validators import DataRequired
class DatasetForm(FlaskForm):
dataset = SelectField(validators=[DataRequired()], choices=DATASETS)
"""
Web form to select a dataset
"""
dataset = SelectField(choices=CHOICES)
submit = SubmitField("Submit")

View File

@@ -1,24 +1,38 @@
from json import load
from folium import Map, Marker, PolyLine
from pandas import DataFrame, json_normalize
from constants import COLUMNS, FILES
def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
json = load(f)
return json
from app.data_request import request_dataset
from constants import COLUMNS, COORDINATES
def create_dataframe(dataset) -> DataFrame:
"""
Creates a DataFrame from a JSON file
Creates a DataFrame from a JSON response
"""
json = open_json(dataset)
json = request_dataset(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df
def reverse_coordinates(row):
"""
Reverses each tuples coordinates to ensure folium can parse them correctly
"""
coord = [tuple(reversed(t)) for t in row]
return coord
def create_map(df):
"""
Creates a Map with markers or lines from the DataFrame
"""
m = Map(location=COORDINATES, zoom_start=12, tiles="Stamen Terrain")
for index, row in df.iterrows():
if row["fields.geo_shape.type"] == "LineString":
coord = reverse_coordinates(row["fields.geo_shape.coordinates"])
PolyLine(locations=coord, color="blue", opacity=0.5).add_to(m)
else:
lng, lat = row["fields.geo_shape.coordinates"]
Marker(location=[lat, lng]).add_to(m)
m.save("app/templates/map.html")

20
app/processing.py Normal file
View File

@@ -0,0 +1,20 @@
from app.preprocessing import create_dataframe, create_map
def create_table(df) -> str:
"""
Renders an HTML table from a DataFrame
"""
df.fillna(value=0, inplace=True)
table = df.to_html(classes=["table-striped", "table-sm", "table-responsive"])
return table
def process_data(dataset):
"""
Creates the DataFrame, produces a map and returns a table
"""
df = create_dataframe(dataset)
table = create_table(df)
create_map(df)
return table

View File

@@ -2,6 +2,8 @@ from flask import render_template
from app import app
from app.forms import DatasetForm
from app.processing import process_data
from app.data_request import scrape_flickr
@app.route("/")
@@ -10,14 +12,26 @@ def index():
return render_template("index.html", title="Home Page")
@app.route("/data")
@app.route("/data", methods=["GET", "POST"])
def data():
form = DatasetForm()
if form.validate_on_submit():
return render_template("visualization.html", form=form, title="Visualization")
table = process_data(form.dataset.data)
return render_template("visualization.html", title="Visualization", table=table)
return render_template("data.html", title="Data", form=form)
@app.route("/visualization")
def visualization():
return render_template("visualization.html", title="Visualization", form=form)
return render_template("visualization.html", title="Visualization", table=table)
@app.route("/map")
def map():
return render_template("map.html")
@app.route("/photos")
def photos():
images = scrape_flickr("paris coronavirus")
return render_template("photos.html", title="Photos", images=images)

View File

@@ -22,6 +22,7 @@
<a class="nav-link" href="{{ url_for('index') }}">Home <span class="sr-only">(current)</span></a>
</li>
<li class="nav-link"><a href="{{ url_for('data') }}">Data</a></li>
<li class="nav-link"><a href="{{ url_for('photos') }}">Photos</a></li>
</ul>
</div>
</nav>

View File

@@ -3,6 +3,24 @@
{% block content %}
<div class="jumbotron">
<h1 id="graphPaname">graphPaname</h1>
<p>graphPaname is an information system that aims to show real-time data, related to the COVID-19 outbreak, in the city of Paris</p>
<p>
graphPaname is a system that collects real-time data, relevant to the COVID-19 pandemic de-escalation, from the city of Paris.
</p>
<p>
It works with 4 datasets about the de-escalation:
</p>
<ul class="org-ul">
<li>Retailers with home delivery</li>
<li>Additional parking places in relay parkings (parkings connected to public transportation)</li>
<li>Temporary cycling paths</li>
<li>Temporary pedestrian streets</li>
</ul>
<p>
For each dataset, we offer a table with the data, and a map of Paris with markers. Additionally, there&rsquo;s a section with photos related to the COVID-19 pandemic.
</p>
</div>
{% endblock %}

View File

@@ -0,0 +1,9 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Photos</h1>
{% for img_path in images %}
<img src="{{img_path|safe}}" alt="Image placeholder" id="photo" style="width: 200px"/>
{% endfor %}
{% endblock %}

View File

@@ -3,7 +3,13 @@
{% block app_content %}
<h1>Dataset visualization</h1>
<img src="data:image/png;base64,{{ plot }}" alt="Image Placeholder">
<img src="data:image/png;base64,{{ map }}" alt="Image Placeholder">
<div class="row">
<div class="col-md-9">
{{ table|safe }}
</div>
<div class="col-md-1">
<iframe id="map", src="/map" width="350" height="350"></iframe>
</div>
</div>
<p><a href="{{ url_for('data') }}">Back</a></p>
{% endblock %}

View File

@@ -2,15 +2,13 @@ DATASETS = [
"coronavirus-commercants-parisiens-livraison-a-domicile",
"deconfinement-pistes-cyclables-temporaires",
"deconfinement-parking-relais-doublement-des-places",
"deconfinement-rues-amenagees-pour-pietons",
]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FILES = {
"deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json",
"deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json",
"coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json",
}
DATASET_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FLICKR_URL = "https://www.flickr.com/search/?text={}"
COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.statut",
"record_timestamp",
@@ -20,12 +18,14 @@ COLUMNS = {
"fields.societe",
"fields.nb_places_dispositif_environ",
"fields.parcs",
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.cp",
"fields.ville",
"fields.adresse",
],
"coronavirus-commercants-parisiens-livraison-a-domicile": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.adresse",
"fields.code_postal",
@@ -37,5 +37,20 @@ COLUMNS = {
"fields.telephone",
"fields.mail",
],
"deconfinement-rues-amenagees-pour-pietons": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.nom_voie",
"fields.categorie",
"fields.statut",
"record_timestamp",
],
}
CHOICES = [
("coronavirus-commercants-parisiens-livraison-a-domicile", "home-delivery"),
("deconfinement-pistes-cyclables-temporaires", "cycling-paths"),
("deconfinement-parking-relais-doublement-des-places", "relay-parking"),
("deconfinement-rues-amenagees-pour-pietons", "pedestrian-streets"),
]
SECRET_KEY = "trolaso"
COORDINATES = [48.864716, 2.349014]

View File

View File

@@ -10,16 +10,8 @@ pkgs.mkShell {
flask
flask-bootstrap
flask_wtf
matplotlib
folium
pytest
# Development tools
black
isort
pyflakes
python-language-server
pyls-black
pyls-isort
pyls-mypy
beautifulsoup4
];
}

View File

@@ -1,11 +1,9 @@
from os import remove
from pandas import DataFrame
from requests import get
from app.preprocessing import create_dataframe
from app.data_request import request_dataset
from constants import COLUMNS, DATASETS, FILES, URL
from constants import COLUMNS, DATASETS, DATASET_URL, FLICKR_URL
def test_dataset_request():
@@ -13,7 +11,7 @@ def test_dataset_request():
Checks that the datasets URLs are reachable
"""
for dataset in DATASETS:
response = get(URL.format(dataset))
response = get(DATASET_URL.format(dataset))
assert response.status_code == 200
@@ -22,8 +20,14 @@ def test_dataframe_creation():
Verifes that the DataFrames are created and filtered properly
"""
for dataset in DATASETS:
request_dataset(dataset)
df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame)
assert all(df.columns == COLUMNS[dataset])
def test_flickr_request():
"""
Checks that Flickr search is avalaible
"""
response = get(FLICKR_URL.format("paris coronavirus"))
assert response.status_code == 200