Compare commits

...

50 Commits

Author SHA1 Message Date
daf3af574c Update Summary using new algorithm results 2021-05-22 10:46:23 +02:00
1f6624b770 Fix execution script for greedy 2021-05-21 16:17:17 +02:00
acb9b35c7a Adapt local search algorithm to data structure 2021-05-21 14:27:35 +02:00
d82fe81f78 Change data representation in local search 2021-05-20 21:16:38 +02:00
b8b1fe9368 Execute greedy algorithm only once 2021-05-20 19:28:06 +02:00
5cff3199c6 Rename counter to accumulator 2021-05-20 17:23:05 +02:00
ceea2d8824 Replace f-strings with @ in dataframe query 2021-05-20 00:26:19 +02:00
550f0bb043 Refactor fitness calculation using combinations 2021-05-19 20:43:14 +02:00
9a4831e31e Fix fitness computation 2021-05-19 19:38:56 +02:00
dfdd142fdb Add correct index to the solutions list 2021-05-19 19:20:27 +02:00
38585aa16b Remove additional iteration from greedy algorithm 2021-05-19 18:26:56 +02:00
3aaf328a2c Add results analysis to summary 2021-04-20 22:19:44 +02:00
40848fdd31 Add algorithm results Excel file 2021-04-20 21:41:21 +02:00
0e995eda27 Add batch execution explanation to summary 2021-04-20 21:21:51 +02:00
6b67e6db34 Craft duplicate check before insert 2021-04-20 18:14:41 +02:00
5b63b993df Add execution time meand and standard deviation 2021-04-20 13:53:24 +02:00
b8b812d011 Write results to an Excel file 2021-04-20 13:22:01 +02:00
3e0dbb9168 Populate results with mean and std deviation 2021-04-20 11:58:24 +02:00
9ab8ec3d8a Add execution script 2021-04-20 01:21:30 +02:00
ed76d07345 Add user manual to summary 2021-04-20 00:02:38 +02:00
94570601e1 Add algorithms pseudocode in the summary 2021-04-19 22:15:57 +02:00
b937b41853 Revert "Revert "Remove iteration print statement""
This reverts commit bd4a88bb4e.
2021-04-19 20:27:56 +02:00
097ed9b52a Remove redundant previous solution checks 2021-04-19 18:29:11 +02:00
bd4a88bb4e Revert "Remove iteration print statement"
This reverts commit 9ef41abe46.
2021-04-19 16:17:52 +02:00
9ef41abe46 Remove iteration print statement 2021-04-19 15:53:13 +02:00
8b5029645f Return the best solution when you can't explore 2021-04-19 15:51:39 +02:00
2eb84a6883 Only insert element that are not duplicated 2021-04-19 15:51:12 +02:00
5e6a6d00e9 Remove duplicates in local search 2021-04-16 20:07:31 +02:00
028db4ba91 Return best solution or the same if there's none 2021-04-16 20:06:13 +02:00
3e08b67371 Simplify worst element replacement 2021-04-16 20:05:45 +02:00
5812d470a9 Add numpy dependency 2021-04-15 23:28:57 +02:00
1f2fde1abf Move each algorithm into a diffent module 2021-04-15 22:07:44 +02:00
b584239d6e Rename variable in remove_duplicates function 2021-04-15 22:01:40 +02:00
aa4a3fdec9 Set initial distance to 0 in the greedy algorithm 2021-04-15 20:51:21 +02:00
193e9046eb Refactor neighbourhood exploration 2021-04-15 20:13:19 +02:00
e3c55ca89f Refactor random solution generation 2021-04-15 20:05:15 +02:00
98a86a97c0 Minor refactor in results report 2021-04-15 18:35:31 +02:00
1cf8a2696a Implement local search algorithm 2021-04-14 19:26:13 +02:00
33a9cf323a Check if the random candidate is a duplicate 2021-04-14 19:25:25 +02:00
da234aae96 Implement best first local search algorithm 2021-04-14 18:34:52 +02:00
b3211ff682 Select m random elements as the first solution 2021-04-13 23:01:37 +02:00
bf7ca7f520 Remove duplicates in an efficient way 2021-04-13 22:44:31 +02:00
75c3a94fbe Change metric in Greedy algorithm 2021-04-13 22:44:17 +02:00
b63b5b08b6 Fix pseudorandom solution generation 2021-04-12 15:22:54 +02:00
d04d0becfe Show algorithm execution time 2021-04-12 13:23:32 +02:00
a81756e93b Remove selected solution from dataset properly 2021-04-12 12:58:23 +02:00
04c92add44 Cast distance to float to get the maximum value 2021-04-12 12:22:26 +02:00
f73e28fb8a Choose pseudorandom first solution in local search 2021-04-12 12:03:11 +02:00
27df20f7d1 Specify algorithm choice via CLI arguments 2021-04-12 12:01:37 +02:00
6a3bdc44e3 Add function to show results 2021-04-12 11:12:46 +02:00
11 changed files with 455 additions and 67 deletions

155
docs/Summary.org Normal file
View File

@@ -0,0 +1,155 @@
#+TITLE: Práctica 1
#+SUBTITLE: Metaheurísticas
#+AUTHOR: Amin Kasrou Aouam
#+DATE: 2021-04-19
#+PANDOC_OPTIONS: template:~/.pandoc/templates/eisvogel.latex
#+PANDOC_OPTIONS: listings:t
#+PANDOC_OPTIONS: toc:t
#+PANDOC_METADATA: lang=es
#+PANDOC_METADATA: titlepage:t
#+PANDOC_METADATA: listings-no-page-break:t
#+PANDOC_METADATA: toc-own-page:t
#+PANDOC_METADATA: table-use-row-colors:t
#+PANDOC_METADATA: colorlinks:t
#+PANDOC_METADATA: logo:/home/coolneng/Photos/Logos/UGR.png
#+LaTeX_HEADER: \usepackage[ruled, lined, linesnumbered, commentsnumbered, longend]{algorithm2e}
* Práctica 1
** Introducción
En esta práctica, usaremos distintos algoritmos de búsqueda para resolver el problema de la máxima diversidad (MDP). Implementaremos:
- Algoritmo /Greedy/
- Algoritmo de búsqueda local
** Algoritmos
*** Greedy
El algoritmo /greedy/ añade de forma iterativa un punto, hasta conseguir una solución de tamaño m.
En primer lugar, seleccionamos el elemento más lejano de los demás (centroide), y lo añadimos en nuestro conjunto de elementos seleccionados. A éste, añadiremos en cada paso el elemento correspondiente según la medida del /MaxMin/. Ilustramos el algoritmo a continuación:
\begin{algorithm}
\KwIn{A list $[a_i]$, $i=1, 2, \cdots, m$, that contains the chosen point and the distance}
\KwOut{Processed list}
$Sel = [\ ]$
$centroid \leftarrow getFurthestElement()$
\For{$i \leftarrow 0$ \KwTo $m$}{
\For{$element$ in $Sel$}{
$closestElements = [\ ]$
$closestPoint \leftarrow getClosestPoint(element)$
$closestElements.append(closestPoint)$
}
$maximum \leftarrow max(closestElements)$
$Sel.append(maximum)$
}
\KwRet{$Sel$}
\end{algorithm}
*** Búsqueda local
El algoritmo de búsqueda local selecciona una solución aleatoria, de tamaño /m/, y explora durante un número máximo de iteraciones soluciones vecinas.
Para mejorar la eficiencia del algoritmo, usamos la heurística del primer mejor (selección de la primera solución vecina que mejora la actual). Ilustramos el algoritmo a continuación:
\begin{algorithm}
\KwIn{A list $[a_i]$, $i=1, 2, \cdots, m$, the solution}
\KwOut{Processed list}
$Solutions = [\ ]$
$firstSolution \leftarrow getRandomSolution()$
$Solutions.append(firstSolution)$
$lastSolution \leftarrow getLastElement(neighbour)$
$maxIterations \leftarrow 1000$
\For{$i \leftarrow 0$ \KwTo $maxIterations$}{
\While{$neighbour \leq lastSolution$}{
$neighbour \leftarrow getNeighbouringSolution(lastSolution)$
$Solutions.append(neighbour)$
$lastSolution \leftarrow getLastElement(neighbour)$
}
$finalSolution \leftarrow getLastElement(Solutions)$
}
\KwRet{$finalSolution$}
\end{algorithm}
** Implementación
La práctica ha sido implementada en /Python/, usando las siguientes bibliotecas:
- NumPy
- Pandas
*** Instalación
Para ejecutar el programa es preciso instalar Python, junto con las bibliotecas *Pandas* y *NumPy*.
Se proporciona el archivo shell.nix para facilitar la instalación de las dependencias, con el gestor de paquetes [[https://nixos.org/][Nix]]. Tras instalar la herramienta Nix, únicamente habría que ejecutar el siguiente comando en la raíz del proyecto:
#+begin_src shell
nix-shell
#+end_src
*** Ejecución
La ejecución del programa se realiza mediante el siguiente comando:
#+begin_src shell
python src/main.py <dataset> <algoritmo>
#+end_src
Los parámetros posibles son:
| dataset | algoritmo |
| Cualquier archivo de la carpeta data | greedy |
| | local |
También se proporciona un script que ejecuta 1 iteración del algoritmo greedy y 3 iteraciones de la búsqueda local, con cada uno de los /datasets/, y guarda los resultados en una hoja de cálculo. Se puede ejecutar mediante el siguiente comando:
#+begin_src shell
python src/execution.py
#+end_src
*Nota*: se precisa instalar la biblioteca [[https://xlsxwriter.readthedocs.io/][XlsxWriter]] para la exportación de los resultados a un archivo Excel.
** Análisis de los resultados
Los resultados obtenidos se encuentran en el archivo /algorithm-results.xlsx/, procedemos a analizar cada algoritmo por separado.
*** Algoritmo greedy
#+CAPTION: Algoritmo greedy
[[./assets/greedy.png]]
El algoritmo greedy es determinista, por lo tanto la desviación típica es nula, dado que se ejecuta una única vez. El tiempo de ejecución varía considerablemente según el dataset:
- Dataset con n=500: 7-10 segundos
- Dataset con n=2000: 5-12 minutos
La distancia total obtenida, por lo general, es inferior al algoritmo de búsqueda local, aunque no difiere significativamente.
*** Algoritmo de búsqueda local
#+CAPTION: Algoritmo de búsqueda local
[[./assets/local.png]]
El algoritmo de búsqueda local es estocástico, debido a que para la obtención de cada una de las soluciones se utiliza un generador de números pseudoaleatorio. El tiempo de ejecución varía considerablemente según el dataset:
- Dataset con n=500: 1-2 minutos
- Dataset con n=2000: 20-25 minutos
La distancia total obtenida, por lo general, es superior al algoritmo greedy lo cual indica que la búsqueda local obtiene mejores resultados a expensas del tiempo de ejecución.
Debido a nuestras limitaciones computacionales, las ejecuciones de este algoritmo se hicieron con 100 iteraciones máximas.

BIN
docs/Summary.pdf Normal file

Binary file not shown.

BIN
docs/algorithm-results.xlsx Normal file

Binary file not shown.

BIN
docs/assets/greedy.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 141 KiB

BIN
docs/assets/local.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 155 KiB

View File

@@ -2,4 +2,11 @@
with pkgs;
mkShell { buildInputs = [ python39 python39Packages.pandas ]; }
mkShell {
buildInputs = [
python39
python39Packages.numpy
python39Packages.pandas
python39Packages.XlsxWriter
];
}

98
src/execution.py Normal file
View File

@@ -0,0 +1,98 @@
from glob import glob
from subprocess import run
from sys import executable
from numpy import mean, std
from pandas import DataFrame, ExcelWriter
def file_list(path):
file_list = []
for fname in glob(path):
file_list.append(fname)
return file_list
def create_dataframes():
greedy = DataFrame()
local = DataFrame()
return greedy, local
def process_output(results):
distances = []
time = []
for element in results:
for line in element:
if line.startswith(bytes("Total distance:", encoding="utf-8")):
line_elements = line.split(sep=bytes(":", encoding="utf-8"))
distances.append(float(line_elements[1]))
if line.startswith(bytes("Execution time:", encoding="utf-8")):
line_elements = line.split(sep=bytes(":", encoding="utf-8"))
time.append(float(line_elements[1]))
return distances, time
def populate_dataframes(greedy, local, greedy_list, local_list, dataset):
greedy_distances, greedy_time = process_output(greedy_list)
local_distances, local_time = process_output(local_list)
greedy_dict = {
"dataset": dataset.removeprefix("data/"),
"media distancia": mean(greedy_distances),
"desviacion distancia": std(greedy_distances),
"media tiempo": mean(greedy_time),
"desviacion tiempo": std(greedy_time),
}
local_dict = {
"dataset": dataset.removeprefix("data/"),
"media distancia": mean(local_distances),
"desviacion distancia": std(local_distances),
"media tiempo": mean(local_time),
"desviacion tiempo": std(local_time),
}
greedy = greedy.append(greedy_dict, ignore_index=True)
local = local.append(local_dict, ignore_index=True)
return greedy, local
def script_execution(filenames, greedy, local, iterations=3):
script = "src/main.py"
for dataset in filenames:
print(f"Running on dataset {dataset}")
greedy_cmd = run(
[executable, script, dataset, "greedy"], capture_output=True
).stdout.splitlines()
local_list = []
for _ in range(iterations):
local_cmd = run(
[executable, script, dataset, "local"], capture_output=True
).stdout.splitlines()
local_list.append(local_cmd)
greedy, local = populate_dataframes(
greedy, local, [greedy_cmd], local_list, dataset
)
return greedy, local
def export_results(greedy, local):
dataframes = {"Greedy": greedy, "Local search": local}
writer = ExcelWriter(path="docs/algorithm-results.xlsx", engine="xlsxwriter")
for name, df in dataframes.items():
df.to_excel(writer, sheet_name=name, index=False)
worksheet = writer.sheets[name]
for index, column in enumerate(df):
series = df[column]
max_length = max(series.astype(str).str.len().max(), len(str(series.name)))
worksheet.set_column(index, index, width=max_length + 5)
writer.save()
def main():
datasets = file_list(path="data/*.txt")
greedy, local = create_dataframes()
populated_greedy, populated_local = script_execution(datasets, greedy, local)
export_results(populated_greedy, populated_local)
if __name__ == "__main__":
main()

58
src/greedy.py Normal file
View File

@@ -0,0 +1,58 @@
from pandas import DataFrame, Series
def get_first_solution(n, data):
distance_sum = DataFrame(columns=["point", "distance"])
for element in range(n):
element_df = data.query(f"source == {element} or destination == {element}")
distance = element_df["distance"].sum()
distance_sum = distance_sum.append(
{"point": element, "distance": distance}, ignore_index=True
)
furthest_index = distance_sum["distance"].astype(float).idxmax()
furthest_row = distance_sum.iloc[furthest_index]
furthest_row["distance"] = 0
return furthest_row
def get_different_element(original, row):
if row.source == original:
return row.destination
return row.source
def get_closest_element(element, data):
element_df = data.query(f"source == {element} or destination == {element}")
closest_index = element_df["distance"].astype(float).idxmin()
closest_row = data.loc[closest_index]
closest_point = get_different_element(original=element, row=closest_row)
return Series(data={"point": closest_point, "distance": closest_row["distance"]})
def explore_solutions(solutions, data, index):
closest_elements = solutions["point"].apply(func=get_closest_element, data=data)
furthest_index = closest_elements["distance"].astype(float).idxmax()
solution = closest_elements.iloc[furthest_index]
solution.name = index
return solution
def remove_duplicates(current, previous, data):
duplicate_free_df = data.query(
"(source != @current or destination not in @previous) and \
(source not in @previous or destination != @current)"
)
return duplicate_free_df
def greedy_algorithm(n, m, data):
solutions = DataFrame(columns=["point", "distance"])
first_solution = get_first_solution(n, data)
solutions = solutions.append(first_solution, ignore_index=True)
for iteration in range(m - 1):
element = explore_solutions(solutions, data, index=iteration + 1)
solutions = solutions.append(element)
data = remove_duplicates(
current=element["point"], previous=solutions["point"], data=data
)
return solutions

75
src/local_search.py Normal file
View File

@@ -0,0 +1,75 @@
from numpy.random import choice, seed, randint
from pandas import DataFrame
def get_row_distance(source, destination, data):
row = data.query(
"""(source == @source and destination == @destination) or \
(source == @destination and destination == @source)"""
)
return row["distance"].values[0]
def compute_distance(element, solution, data):
accumulator = 0
distinct_elements = solution.query(f"point != {element}")
for _, item in distinct_elements.iterrows():
accumulator += get_row_distance(
source=element,
destination=item.point,
data=data,
)
return accumulator
def get_first_random_solution(n, m, data):
solution = DataFrame(columns=["point", "distance"])
seed(42)
solution["point"] = choice(n, size=m, replace=False)
solution["distance"] = solution["point"].apply(
func=compute_distance, solution=solution, data=data
)
return solution
def element_in_dataframe(solution, element):
duplicates = solution.query(f"point == {element}")
return not duplicates.empty
def replace_worst_element(previous, n, data):
solution = previous.copy()
worst_index = solution["distance"].astype(float).idxmin()
random_element = randint(n)
while element_in_dataframe(solution=solution, element=random_element):
random_element = randint(n)
solution["point"].loc[worst_index] = random_element
solution["distance"].loc[worst_index] = compute_distance(
element=solution["point"].loc[worst_index], solution=solution, data=data
)
return solution
def get_random_solution(previous, n, data):
solution = replace_worst_element(previous, n, data)
while solution["distance"].sum() <= previous["distance"].sum():
solution = replace_worst_element(previous=solution, n=n, data=data)
return solution
def explore_neighbourhood(element, n, data, max_iterations=100000):
neighbourhood = []
neighbourhood.append(element)
for _ in range(max_iterations):
previous_solution = neighbourhood[-1]
neighbour = get_random_solution(previous=previous_solution, n=n, data=data)
neighbourhood.append(neighbour)
return neighbour
def local_search(n, m, data):
first_solution = get_first_random_solution(n, m, data)
best_solution = explore_neighbourhood(
element=first_solution, n=n, data=data, max_iterations=100
)
return best_solution

61
src/main.py Executable file
View File

@@ -0,0 +1,61 @@
from preprocessing import parse_file
from greedy import greedy_algorithm
from local_search import local_search, get_row_distance
from sys import argv
from time import time
from itertools import combinations
def execute_algorithm(choice, n, m, data):
if choice == "greedy":
return greedy_algorithm(n, m, data)
elif choice == "local":
return local_search(n, m, data)
else:
print("The valid algorithm choices are 'greedy' and 'local'")
exit(1)
def get_fitness(solutions, data):
accumulator = 0
comb = combinations(solutions.index, r=2)
for index in list(comb):
elements = solutions.loc[index, :]
accumulator += get_row_distance(
source=elements["point"].head(n=1).values[0],
destination=elements["point"].tail(n=1).values[0],
data=data,
)
return accumulator
def show_results(solutions, fitness, time_delta):
duplicates = solutions.duplicated().any()
print(solutions)
print(f"Total distance: {fitness}")
if not duplicates:
print("No duplicates found")
print(f"Execution time: {time_delta}")
def usage(argv):
print(f"Usage: python {argv[0]} <file> <algorithm choice>")
print("algorithm choices:")
print("greedy: greedy algorithm")
print("local: local search algorithm")
exit(1)
def main():
if len(argv) != 3:
usage(argv)
n, m, data = parse_file(argv[1])
start_time = time()
solutions = execute_algorithm(choice=argv[2], n=n, m=m, data=data)
end_time = time()
fitness = get_fitness(solutions, data)
show_results(solutions, fitness, time_delta=end_time - start_time)
if __name__ == "__main__":
main()

View File

@@ -1,66 +0,0 @@
from preprocessing import parse_file
from pandas import DataFrame
from sys import argv
def get_first_solution(n, data):
distance_sum = DataFrame(columns=["point", "distance"])
for element in range(n):
element_df = data.query(f"source == {element} or destination == {element}")
distance = element_df["distance"].sum()
distance_sum = distance_sum.append(
{"point": element, "distance": distance}, ignore_index=True
)
furthest_index = distance_sum["distance"].idxmax()
furthest_row = distance_sum.iloc[furthest_index]
furthest_row["distance"] = 0
return furthest_row
def get_different_element(original, row):
if row.source == original:
return row.destination
return row.source
def get_furthest_element(element, data):
element_df = data.query(f"source == {element} or destination == {element}")
furthest_index = element_df["distance"].idxmax()
furthest_row = data.iloc[furthest_index]
furthest_point = get_different_element(original=element, row=furthest_row)
furthest_element = {"point": furthest_point, "distance": furthest_row["distance"]}
return furthest_element, furthest_index
def greedy_algorithm(n, m, data):
solutions = DataFrame(columns=["point", "distance"])
first_solution = get_first_solution(n, data)
solutions = solutions.append(first_solution, ignore_index=True)
for _ in range(m):
last_solution = solutions["point"].tail(n=1)
centroid, index = get_furthest_element(element=int(last_solution), data=data)
solutions = solutions.append(dict(centroid), ignore_index=True)
data = data.drop(index)
return solutions
# NOTE In each step, switch to the element that gives the least amount
def local_search():
pass
def usage(argv):
print(f"Usage: python {argv[0]} <file>")
exit(1)
def main():
if len(argv) != 2:
usage(argv)
n, m, data = parse_file(argv[1])
solutions = greedy_algorithm(n, m, data)
print(solutions)
if __name__ == "__main__":
main()