import numpy as np
import cvxpy as cp
import numpy as np
import os
[docs]def add_liste(Level, Liste, Node, UP=True):
if Level==-1:
Liste = [Node] + Liste
elif Level >= len(Liste):
Liste.append([Node])
if UP:
Level -= 1
else:
Level += 1
else:
Liste.append([Node])
if UP:
Level -= 1
else:
Level += 1
return Level, Liste
[docs]def recu(node, level, graph, liste, node2ind, dejavu, dico_closest, nodes):
L = []
for ind in range(len(nodes)):
if graph[ind,node2ind[node]] and dejavu[ind]==0:
L.append(ind)
for ind in L:
if graph[ind,node2ind[node]] and dejavu[ind]==0:
level, liste = add_liste(level, liste, nodes[ind], UP=True)
dejavu[ind] = 1
graph, liste, dejavu = recu(nodes[ind], level, graph, liste, node2ind, dejavu, dico_closest, nodes)
return graph, liste, dejavu
[docs]class Clustering:
"""
Class that performs the final rounding step on the rows of the matrix $\hat{B}$ which is the
optimal solution of the SDP relaxation of the K-means problem.
"""
def __init__(self, n, K):
"""
:param n: Nomber of nodes in the graph
:param K: Number of clusters
"""
self.n = n
self.K = K
[docs] def solve_relaxed_LP(self, M):
"""
Prelimanary to run the K-medoid algorithm.
:param M: M is the output of the relaxed SDP problem.
"""
barx = cp.Variable((self.n,self.n))
bary = cp.Variable(self.n)
C = np.zeros((self.n,self.n))
for i in range(1, self.n):
for j in range(i):
C[i,j] = np.linalg.norm(M[:,i]-M[:,j], ord=1)
C[j,i] = C[i,j]
constraints = [ cp.sum(barx[:,j]) == 1 for j in range(self.n)]
constraints += [
- barx[i%self.n,i//self.n] + bary[i%self.n] >= 0 for i in range(self.n * self.n)
]
constraints += [
self.K - cp.sum(bary) >= 0
]
constraints += [
barx[i%self.n,i//self.n] >=0 for i in range(self.n * self.n)
]
constraints += [
bary[i] >=0 for i in range(self.n)
]
prob = cp.Problem(cp.Minimize(cp.trace(C@barx)),
constraints)
prob.solve()
barx = barx.value
bary = bary.value
return barx, bary, C
[docs] def Kmedoids(self, barx, bary, C):
"""
Kmedoid algorithm that performs a rounding step on the rows of $\hat{B}$.
:param barx: Output of the method 'solve_relaxed_LP'
:param bary: Output of the method 'solve_relaxed_LP'
:param C: Output of the method 'solve_relaxed_LP'
"""
# Step 1 : consolidating locations
## It consists in moving revelantly demand.
## All locations with positive demand will be far from eachother, garanteeing:
## - not increase the cost of the fractional solution
## - each feasible integer solution for the modified instance can be converted to a feasible integer
## solution for the original instance with a small added cost
d = np.ones(self.n)
barx = barx * (1* (barx>0))
barC = np.diag(C @ barx)
ind = np.argsort(barC)
for j in range(self.n):
for i in range(j):
if d[ind[i]]>0 and C[ind[i],ind[j]]<=4*barC[ind[j]]:
d[ind[i]] += d[ind[j]]
d[ind[j]] = 0
# Step 2 : Consolidating centers
## Construction of a 1/2-resticted solution
## y_i=0 if d[i]=0 and y_i >=1/2 otherwise (without paying this too much).
px = np.copy(barx)
py = np.copy(bary)
# Closest[i] will be the closest node to i (except i), let's say j, satisfying d[j]>0
closest = []
dico_weights = {}
sorted_lines_C = np.zeros((self.n,self.n))
for i in range(self.n):
ls = np.argsort(C[i,:])
sorted_lines_C[i,:] = ls
j = 0
while (d[int(ls[j])]==0 or i==int(ls[j])):
j += 1
closest.append(ls[j])
# We only move the nodes with a null demand and a partial open center (i.e y>0) and thus that can't be a center
if d[i]==0 and py[i]>0:
# We assign i to the closest point j such that d[j] > 0
py[ls[j]] = min(1 , py[i] + py[ls[j]])
py[i] = 0
for pj in range(self.n):
px[ls[j],pj] += px[i,pj]
px[i,pj] = 0
if d[i]>0:
dico_weights[i] = d[i]*C[closest[i],i]
## Construction of a (1/2-1)-integral solution
## y_i=0 if d[i]=0 and y_i =1/2 or 1 otherwise (without paying this too much).
pn = len(dico_weights)
haty = np.zeros(self.n)
hatx = np.zeros((self.n,self.n))
# We sort the locations j \in N' (ie such that d[j]>0) in decreasing order of their weight
sorted_dico_weights = {k: v for k, v in sorted(dico_weights.items(), key=lambda item: item[1], reverse=True)}
list_keys = sorted_dico_weights.keys() #list of nodes in N' (i.e. with d[i]>0)
for ind, j in enumerate(list_keys):
if ind < 2*self.K-pn:
haty[j] = 1
else:
haty[j] = 1/2
hatx[closest[j],j] = 1/2
hatx[j,j] = haty[j]
# Step 3 : rounding a {1/2, 1}-integral solution to an integral one
centers = [k for k in list_keys if haty[k] == 1]
odd_level = []
even_level = []
dico_closest = {i:closest[i] for i in list_keys if haty[i]==1/2}
import copy
dic = dico_closest.copy()
for key,value in dic.items():
try:
if dico_closest[value]==key:
del dico_closest[key]
except:
pass
keys = list(dico_closest.keys())
values = list(dico_closest.values())
nodes = list(set(keys+values))
graph = np.zeros((len(nodes),len(nodes)))
node2ind = {node:ind for ind,node in enumerate(nodes)}
for ind,key in enumerate(keys):
graph[node2ind[key],node2ind[values[ind]]] = 1
dejavu = np.zeros(len(nodes))
while len(dejavu)!=np.sum(dejavu):
notfound = True
indj = 0
level = 0
liste = []
while notfound and indj<len(nodes):
if np.sum(graph[:,indj])==0 and np.sum(graph[indj,:])>0 and dejavu[indj]==0:
notfound = False
j = nodes[indj]
liste.append([j])
s_j = dico_closest[j]
liste.append([s_j])
inds_j = node2ind[s_j]
dejavu[indj] = 1
dejavu[inds_j] = 1
graph, liste, dejavu = recu(s_j, 1, graph, liste, node2ind, dejavu, dico_closest, nodes)
level = 2
j = s_j
indj = inds_j
leaf = False
while not(leaf):
if np.sum(graph[indj,:])>0:
s_j = dico_closest[j]
inds_j = node2ind[s_j]
if dejavu[inds_j]==0:
dejavu[inds_j] = 1
graph, liste, dejavu = recu(s_j, level-1, graph, liste, node2ind, dejavu, dico_closest, nodes)
level, liste = add_liste(level, liste, s_j, UP=False)
j = s_j
indj = inds_j
else:
leaf = True
else:
leaf = True
indj += 1
for ind in range(len(liste)):
if ind % 2:
even_level += liste[ind]
else:
odd_level += liste[ind]
if len(odd_level)<len(even_level):
centers = centers + list(set(odd_level))
else:
centers = centers + list(set(even_level))
centers = list(set(centers))
# -1 is assigned to the nodes that are not centers. Otherwise, we numerote them.
num_center = -np.ones(self.n)
for ind, j in enumerate(centers):
num_center[j] = int(ind)
A = np.zeros((self.n,self.K))
for i in range(self.n):
if num_center[i] == -1:
ls = sorted_lines_C[i,:]
count = 0
while num_center[int(ls[count])] == -1:
count += 1
A[i,int(num_center[int(ls[count])])] = 1
else:
A[i,int(num_center[i])] = 1
self.clusters_approx = []
for i in range(self.n):
for j in range(self.K):
if A[i,j]==1:
self.clusters_approx.append(j)
self.true_partition = self.build_partition(self.clusters)
approx_partition = self.build_partition(self.clusters_approx)
self.find_permutation(self.true_partition, approx_partition)
inverse = [0] * len(self.permutation)
for i, p in enumerate(self.permutation):
inverse[p] = i
clusts_approx = np.copy(self.clusters_approx)
for i,group in enumerate(clusts_approx):
self.clusters_approx[i] = inverse[group]
[docs] def build_partition(self, clust):
"""
Given a list clust that associates to each node its community, this methods builds the associated
partition of the noes of the graph.
:param clust: List of estimated clusters for the n nodes of the graph
"""
d = {i:set() for i in range(self.K)}
for i in range(len(clust)):
d[clust[i]].add(i)
return (d)
[docs] def find_permutation(self, true_partition, approx_partition):
"""
Find the permutation between the names of the true communities and the ones estimated by our
algorithm.
:param true_partition: True partition of the nodes in the graph according to their clusters
:param approx_partition: Estimated partition of the nodes
"""
import itertools
permus = list(itertools.permutations([i for i in range(self.K)]))
best_error = np.float('inf')
for permu in permus:
error = 0
for k in range(self.K):
try:
error += len(true_partition[k]-approx_partition[permu[k]])
except:
error += len(true_partition[k])
if error < best_error:
best_error = error
self.permutation = permu