Compare commits

...

25 Commits

Author SHA1 Message Date
a53d79fe5f Fix typo on readme 2020-07-16 04:09:35 +02:00
b442532768 Reload map template on form submit 2020-06-15 20:49:10 +02:00
f15ff39090 Fix reverse_coordinates functionality 2020-06-15 20:02:20 +02:00
af079f7907 Replace README.org with README.md 2020-06-15 19:51:09 +02:00
57c536e909 Update the home page with more info 2020-06-15 19:46:22 +02:00
34b9f76cf1 Add installation steps and usage to the README 2020-06-15 19:42:20 +02:00
5c32ad611b Clean up dev tools and unused dependencies 2020-06-15 02:15:11 +02:00
44785255b0 Add Flickr avalaibility test 2020-06-15 02:14:04 +02:00
bb610f3935 Create lines instead of markers whenever necessary 2020-06-15 02:05:45 +02:00
3af0605449 Document processing module 2020-06-15 02:05:13 +02:00
8307a54fe0 Scrape for "paris coronavirus" images 2020-06-15 01:19:13 +02:00
da4d9b1dad Correct marker position on map 2020-06-15 01:18:44 +02:00
b33c6991e9 Change visualization layout to two columns 2020-06-15 01:18:27 +02:00
ec1729e92c Add photo visualization page 2020-06-14 21:47:31 +02:00
dd7f1bab8d Move web scraping logic into data_request 2020-06-14 21:29:42 +02:00
b74ceb05c8 Merge branch 'basset' into coolneng 2020-06-14 21:25:09 +02:00
23dea062e5 Scrape Flickr images 2020-06-14 21:24:27 +02:00
bace83dc3a Overhaul visualization page and dependency cleanup 2020-06-14 19:07:30 +02:00
58c0f4897d Testing del scraping de la fuente de imágenes. 2020-06-14 18:33:05 +02:00
dd2538f1ea Render table and folium map as iframe 2020-06-14 00:58:52 +02:00
a36cc719ef Shorten form choices labels 2020-06-14 00:14:09 +02:00
7a459da204 Fetch JSON into variable instead of file 2020-06-13 21:58:17 +02:00
3ca5f21774 Remove required data validator from form 2020-06-13 21:23:20 +02:00
7011a8f405 Add processing module 2020-06-13 20:57:21 +02:00
ffe9009d1b Add pedestrian streets dataset 2020-06-13 18:23:34 +02:00
17 changed files with 260 additions and 60 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
**/__pycache__ **/__pycache__
Design.org Design.org
data/*.json data/*.json
app/templates/map.html

80
README.md Normal file
View File

@@ -0,0 +1,80 @@
graphPaname
===========
graphPaname is a system that collects real-time data, relevant to the
COVID-19 pandemic de-escalation, from the city of Paris.
It works with 4 datasets about the de-escalation:
- Retailers with home delivery
- Additional parking places in relay parkings (parkings connected to
public transportation)
- Temporary cycling paths
- Temporary pedestrian streets
For each dataset, we offer a table with the data, and a map of Paris
with markers. Additionally, there\'s a section with photos related to
the COVID-19 pandemic.
Technologies
------------
- Flask
- Pandas
- Folium
Data sources
------------
- [Open Data](https://opendata.paris.fr/pages/home/)
- [OpenStreetMap](https://www.openstreetmap.org/)
- [Flickr](https://flickr.com)
Requirements
------------
- Nix
Installation
------------
1. Install Nix (compatible with MacOS and Linux):
``` {.shell}
curl -L https://nixos.org/nix/install | sh
```
There are alternative installation methods, if you don\'t want to pipe
curl to sh
2. Clone the repository:
``` {.shell}
git clone https://coolneng.duckdns.org/gitea/coolneng/graphPaname
```
3. Change the working directory to the project:
``` {.shell}
cd graphPaname
```
4. Enter the nix-shell:
``` {.shell}
nix-shell
```
5. Run the tests:
``` {.shell}
pytest
```
6. Execute the Flask application:
``` {.shell}
flask run
```
The website can be accessed via **localhost:5000**

View File

@@ -1,4 +0,0 @@
* graphPaname
This project aims to gather information about the smart city of Paris and
organize it in different plots and tables.

View File

@@ -5,6 +5,7 @@ from flask_bootstrap import Bootstrap
app = Flask(__name__) app = Flask(__name__)
app.secret_key = SECRET_KEY app.secret_key = SECRET_KEY
app.config['TEMPLATES_AUTO_RELOAD'] = True
bootstrap = Bootstrap(app) bootstrap = Bootstrap(app)
from app import errors, routes from app import errors, routes

View File

@@ -1,26 +1,20 @@
from json import dump from re import findall
from typing import List
from bs4 import BeautifulSoup
from requests import get from requests import get
from constants import FILES, URL from constants import FLICKR_URL, DATASET_URL
def format_url(dataset) -> str: def format_url(dataset) -> str:
""" """
Constructs the API's URL for the requested dataset Constructs the API's URL for the requested dataset
""" """
link = URL.format(dataset) link = DATASET_URL.format(dataset)
return link return link
def save_json(data, dataset):
"""
Dumps the data into a JSON file
"""
with open(FILES[dataset], "w") as f:
dump(data, f, ensure_ascii=False)
def request_dataset(dataset): def request_dataset(dataset):
""" """
Fetches the requested dataset from opendata's API Fetches the requested dataset from opendata's API
@@ -30,4 +24,36 @@ def request_dataset(dataset):
response = get(url) response = get(url)
response.raise_for_status() response.raise_for_status()
data = response.json() data = response.json()
save_json(data=data, dataset=dataset) return data
def request_flickr(keywords) -> str:
"""
Returns the HTML of a Flickr search
"""
search_url = FLICKR_URL.format(keywords)
result = get(search_url)
html = result.text
return html
def extract_urls(images):
"""
Creates proper URLs from the regex matches
"""
links = findall("(live.staticflickr.com/\S+.jpg)", str(images))
formatted_urls = ["https://" + link for link in links]
return formatted_urls
def scrape_flickr(keywords) -> List[str]:
"""
Creates a list of image links from a Flickr search
"""
html = request_flickr(keywords)
soup = BeautifulSoup(html, features="html.parser")
images = soup.find_all(
"div", class_="view photo-list-photo-view requiredToShowOnServer awake",
)
links = extract_urls(images)
return links

View File

@@ -1,9 +1,12 @@
from constants import DATASETS from constants import CHOICES
from flask_wtf import FlaskForm from flask_wtf import FlaskForm
from wtforms import SelectField, SubmitField from wtforms import SelectField, SubmitField
from wtforms.validators import DataRequired
class DatasetForm(FlaskForm): class DatasetForm(FlaskForm):
dataset = SelectField(validators=[DataRequired()], choices=DATASETS) """
Web form to select a dataset
"""
dataset = SelectField(choices=CHOICES)
submit = SubmitField("Submit") submit = SubmitField("Submit")

View File

@@ -1,24 +1,38 @@
from json import load from folium import Map, Marker, PolyLine
from pandas import DataFrame, json_normalize from pandas import DataFrame, json_normalize
from constants import COLUMNS, FILES from app.data_request import request_dataset
from constants import COLUMNS, COORDINATES
def open_json(dataset) -> dict:
"""
Loads a dictionary with data from a JSON file
"""
with open(FILES[dataset]) as f:
json = load(f)
return json
def create_dataframe(dataset) -> DataFrame: def create_dataframe(dataset) -> DataFrame:
""" """
Creates a DataFrame from a JSON file Creates a DataFrame from a JSON response
""" """
json = open_json(dataset) json = request_dataset(dataset)
df = json_normalize(data=json, record_path=["records"], errors="ignore",) df = json_normalize(data=json, record_path=["records"], errors="ignore",)
filtered_df = df.filter(items=COLUMNS[dataset]) filtered_df = df.filter(items=COLUMNS[dataset])
return filtered_df return filtered_df
def reverse_coordinates(row):
"""
Reverses each tuples coordinates to ensure folium can parse them correctly
"""
coord = [tuple(reversed(t)) for t in row]
return coord
def create_map(df):
"""
Creates a Map with markers or lines from the DataFrame
"""
m = Map(location=COORDINATES, zoom_start=12, tiles="Stamen Terrain")
for index, row in df.iterrows():
if row["fields.geo_shape.type"] == "LineString":
coord = reverse_coordinates(row["fields.geo_shape.coordinates"])
PolyLine(locations=coord, color="blue", opacity=0.5).add_to(m)
else:
lng, lat = row["fields.geo_shape.coordinates"]
Marker(location=[lat, lng]).add_to(m)
m.save("app/templates/map.html")

20
app/processing.py Normal file
View File

@@ -0,0 +1,20 @@
from app.preprocessing import create_dataframe, create_map
def create_table(df) -> str:
"""
Renders an HTML table from a DataFrame
"""
df.fillna(value=0, inplace=True)
table = df.to_html(classes=["table-striped", "table-sm", "table-responsive"])
return table
def process_data(dataset):
"""
Creates the DataFrame, produces a map and returns a table
"""
df = create_dataframe(dataset)
table = create_table(df)
create_map(df)
return table

View File

@@ -2,6 +2,8 @@ from flask import render_template
from app import app from app import app
from app.forms import DatasetForm from app.forms import DatasetForm
from app.processing import process_data
from app.data_request import scrape_flickr
@app.route("/") @app.route("/")
@@ -10,14 +12,26 @@ def index():
return render_template("index.html", title="Home Page") return render_template("index.html", title="Home Page")
@app.route("/data") @app.route("/data", methods=["GET", "POST"])
def data(): def data():
form = DatasetForm() form = DatasetForm()
if form.validate_on_submit(): if form.validate_on_submit():
return render_template("visualization.html", form=form, title="Visualization") table = process_data(form.dataset.data)
return render_template("visualization.html", title="Visualization", table=table)
return render_template("data.html", title="Data", form=form) return render_template("data.html", title="Data", form=form)
@app.route("/visualization") @app.route("/visualization")
def visualization(): def visualization():
return render_template("visualization.html", title="Visualization", form=form) return render_template("visualization.html", title="Visualization", table=table)
@app.route("/map")
def map():
return render_template("map.html")
@app.route("/photos")
def photos():
images = scrape_flickr("paris coronavirus")
return render_template("photos.html", title="Photos", images=images)

View File

@@ -22,6 +22,7 @@
<a class="nav-link" href="{{ url_for('index') }}">Home <span class="sr-only">(current)</span></a> <a class="nav-link" href="{{ url_for('index') }}">Home <span class="sr-only">(current)</span></a>
</li> </li>
<li class="nav-link"><a href="{{ url_for('data') }}">Data</a></li> <li class="nav-link"><a href="{{ url_for('data') }}">Data</a></li>
<li class="nav-link"><a href="{{ url_for('photos') }}">Photos</a></li>
</ul> </ul>
</div> </div>
</nav> </nav>

View File

@@ -3,6 +3,24 @@
{% block content %} {% block content %}
<div class="jumbotron"> <div class="jumbotron">
<h1 id="graphPaname">graphPaname</h1> <h1 id="graphPaname">graphPaname</h1>
<p>graphPaname is an information system that aims to show real-time data, related to the COVID-19 outbreak, in the city of Paris</p> <p>
graphPaname is a system that collects real-time data, relevant to the COVID-19 pandemic de-escalation, from the city of Paris.
</p>
<p>
It works with 4 datasets about the de-escalation:
</p>
<ul class="org-ul">
<li>Retailers with home delivery</li>
<li>Additional parking places in relay parkings (parkings connected to public transportation)</li>
<li>Temporary cycling paths</li>
<li>Temporary pedestrian streets</li>
</ul>
<p>
For each dataset, we offer a table with the data, and a map of Paris with markers. Additionally, there&rsquo;s a section with photos related to the COVID-19 pandemic.
</p>
</div> </div>
{% endblock %} {% endblock %}

View File

@@ -0,0 +1,9 @@
{% extends "base.html" %}
{% import 'bootstrap/wtf.html' as wtf %}
{% block app_content %}
<h1>Photos</h1>
{% for img_path in images %}
<img src="{{img_path|safe}}" alt="Image placeholder" id="photo" style="width: 200px"/>
{% endfor %}
{% endblock %}

View File

@@ -3,7 +3,13 @@
{% block app_content %} {% block app_content %}
<h1>Dataset visualization</h1> <h1>Dataset visualization</h1>
<img src="data:image/png;base64,{{ plot }}" alt="Image Placeholder"> <div class="row">
<img src="data:image/png;base64,{{ map }}" alt="Image Placeholder"> <div class="col-md-9">
{{ table|safe }}
</div>
<div class="col-md-1">
<iframe id="map", src="/map" width="350" height="350"></iframe>
</div>
</div>
<p><a href="{{ url_for('data') }}">Back</a></p> <p><a href="{{ url_for('data') }}">Back</a></p>
{% endblock %} {% endblock %}

View File

@@ -2,15 +2,13 @@ DATASETS = [
"coronavirus-commercants-parisiens-livraison-a-domicile", "coronavirus-commercants-parisiens-livraison-a-domicile",
"deconfinement-pistes-cyclables-temporaires", "deconfinement-pistes-cyclables-temporaires",
"deconfinement-parking-relais-doublement-des-places", "deconfinement-parking-relais-doublement-des-places",
"deconfinement-rues-amenagees-pour-pietons",
] ]
URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1" DATASET_URL = "https://opendata.paris.fr/api/records/1.0/search/?dataset={}&q=&rows=-1"
FILES = { FLICKR_URL = "https://www.flickr.com/search/?text={}"
"deconfinement-pistes-cyclables-temporaires": "data/cycling-paths.json",
"deconfinement-parking-relais-doublement-des-places": "data/relay-parking.json",
"coronavirus-commercants-parisiens-livraison-a-domicile": "data/home-delivery.json",
}
COLUMNS = { COLUMNS = {
"deconfinement-pistes-cyclables-temporaires": [ "deconfinement-pistes-cyclables-temporaires": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.statut", "fields.statut",
"record_timestamp", "record_timestamp",
@@ -20,12 +18,14 @@ COLUMNS = {
"fields.societe", "fields.societe",
"fields.nb_places_dispositif_environ", "fields.nb_places_dispositif_environ",
"fields.parcs", "fields.parcs",
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.cp", "fields.cp",
"fields.ville", "fields.ville",
"fields.adresse", "fields.adresse",
], ],
"coronavirus-commercants-parisiens-livraison-a-domicile": [ "coronavirus-commercants-parisiens-livraison-a-domicile": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates", "fields.geo_shape.coordinates",
"fields.adresse", "fields.adresse",
"fields.code_postal", "fields.code_postal",
@@ -37,5 +37,20 @@ COLUMNS = {
"fields.telephone", "fields.telephone",
"fields.mail", "fields.mail",
], ],
"deconfinement-rues-amenagees-pour-pietons": [
"fields.geo_shape.type",
"fields.geo_shape.coordinates",
"fields.nom_voie",
"fields.categorie",
"fields.statut",
"record_timestamp",
],
} }
CHOICES = [
("coronavirus-commercants-parisiens-livraison-a-domicile", "home-delivery"),
("deconfinement-pistes-cyclables-temporaires", "cycling-paths"),
("deconfinement-parking-relais-doublement-des-places", "relay-parking"),
("deconfinement-rues-amenagees-pour-pietons", "pedestrian-streets"),
]
SECRET_KEY = "trolaso" SECRET_KEY = "trolaso"
COORDINATES = [48.864716, 2.349014]

View File

View File

@@ -10,16 +10,8 @@ pkgs.mkShell {
flask flask
flask-bootstrap flask-bootstrap
flask_wtf flask_wtf
matplotlib
folium folium
pytest pytest
# Development tools beautifulsoup4
black
isort
pyflakes
python-language-server
pyls-black
pyls-isort
pyls-mypy
]; ];
} }

View File

@@ -1,11 +1,9 @@
from os import remove
from pandas import DataFrame from pandas import DataFrame
from requests import get from requests import get
from app.preprocessing import create_dataframe from app.preprocessing import create_dataframe
from app.data_request import request_dataset from app.data_request import request_dataset
from constants import COLUMNS, DATASETS, FILES, URL from constants import COLUMNS, DATASETS, DATASET_URL, FLICKR_URL
def test_dataset_request(): def test_dataset_request():
@@ -13,7 +11,7 @@ def test_dataset_request():
Checks that the datasets URLs are reachable Checks that the datasets URLs are reachable
""" """
for dataset in DATASETS: for dataset in DATASETS:
response = get(URL.format(dataset)) response = get(DATASET_URL.format(dataset))
assert response.status_code == 200 assert response.status_code == 200
@@ -22,8 +20,14 @@ def test_dataframe_creation():
Verifes that the DataFrames are created and filtered properly Verifes that the DataFrames are created and filtered properly
""" """
for dataset in DATASETS: for dataset in DATASETS:
request_dataset(dataset)
df = create_dataframe(dataset) df = create_dataframe(dataset)
remove(FILES[dataset])
assert isinstance(df, DataFrame) assert isinstance(df, DataFrame)
assert all(df.columns == COLUMNS[dataset]) assert all(df.columns == COLUMNS[dataset])
def test_flickr_request():
"""
Checks that Flickr search is avalaible
"""
response = get(FLICKR_URL.format("paris coronavirus"))
assert response.status_code == 200