import numpy as np
import cvxpy as cp
import os
import matplotlib.pyplot as plt
from numpy.core.multiarray import _vec_string
[docs]class BaumWelch():
"""Class which performs the Baum-Welch algorithm and that uses it to solve the link prediction and the collaborative filtering problems."""
def __init__(self):
pass
[docs] def forward(self, ini, P, O, m, n, delta, Y, K=None):
"""
Forward step of the BaumWelch algorithm.
We observe the connections between nodes 0,...,m,n,...,n+delta. However, we consider that the edges involving
nodes between m+1 and n-1 are not reliable and we do not take into account the estimated clusters for the nodes
between m+1 and n-1.
:param ini: initial distribution of the Markov chain
:param P: Transition matrix of the Markov chain
:param O: matrix of emission probabilities
:param m: We observe fully the graph until time m
:param n: We do not observe reliably the connection involving nodes between time m and n
:param delta: We observe the connections between nodes n and n+delta
:param Y: vector of length n+delta+1 of the estimated communities
:param K: Number of communities
.. note:
Using this function with a specified K can be typically interesting when we try to
estimate the number of clusters with the procedure describes in the paper.
"""
if K is None:
K=self.K
alpha = np.ones((K, n+delta+1))
alpha[:,0] = ini * O[:,Y[0]]
# scaling
alpha[:,0] /= np.sum(alpha[:,0])
puissP = P
for t in range(1,n+delta+1):
for i in range(K):
if t <= m or t >= n:
alpha[i,t] = np.sum(alpha[:,t-1] * P[:,i]) * O[i,Y[t]]
else:
alpha[i,t] = np.sum(alpha[:,m] * puissP[:,i])
puissP = puissP @ P
# scaling
# alpha[:,t] /= np.sum(alpha[:,t])
return alpha
[docs] def backward(self, P, O, m, n, delta, Y, K=None):
"""
Backward step of the BaumWelch algorithm.
We observe the connections between nodes 0,...,m,n,...,n+delta. However, we consider that the edges involving
nodes between m+1 and n-1 are not reliable and we do not take into account the estimated clusters for the nodes
between m+1 and n-1.
:param P: Transition matrix of the Markov chain
:param O: matrix of emission probabilities
:param m: We observe fully the graph until time m
:param n: We do not observe reliably the connection involving nodes between time m and n
:param delta: We observe the connections between nodes n and n+delta
:param Y: vector of length n+delta+1 of the estimated communities
:param K: Number of communities
.. note:
Using this function with a specified K can be typically interesting when we try to
estimate the number of clusters with the procedure describes in the paper.
"""
if K is None:
K=self.K
beta = np.ones((K, n+delta+1))
beta[:,n+delta] = np.ones(K)
# scaling
# beta[:,n+delta] /= np.sum(beta[:,n+delta])
puissP = P
for t in range(n+delta-1,-1,-1):
for i in range(K):
if t <= m-1 or t >= n-1:
beta[i,t] = np.sum(beta[:,t+1] * P[i,:] * O[:,Y[t+1]])
else:
beta[i,t] = np.sum(beta[:,n-1] * puissP[i,:])
puissP = puissP @ P
# scaling
# beta[:,t] /= np.sum(beta[:,t])
return beta
[docs] def update(self, alpha, beta, P, O, m, n, delta, Y, K=None):
"""
Update step of the BaumWelch algorithm: the parameters of the HMM are updated and returned.
:param alpha: Matrix of size $K \times n+delta$ giving the output of the "forward" method
:param beta: Matrix of size $K \times n+delta$ giving the output of the "backward" method
:param P: Transition matrix of the Markov chain
:param O: matrix of emission probabilities
:param m: We observe fully the graph until time m
:param n: We do not observe reliably the connection involving nodes between time m and n
:param delta: We observe the connections between nodes n and n+delta
:param Y: vector of length n+delta+1 of the estimated communities
:param K: Number of communities
.. note:
Using this function with a specified K can be typically interesting when we try to
estimate the number of clusters with the procedure describes in the paper.
"""
if K is None:
K=self.K
xi = np.zeros((K,K, n+delta+1))
for t in range(n+delta):
for i in range(K):
for j in range(K):
if t >= m-1 and t <= n-1:
xi[i,j,t] = alpha[i,t] * P[i,j] * beta[j,t+1]
else:
xi[i,j,t] = alpha[i,t] * P[i,j] * O[j,Y[t+1]] * beta[j,t+1]
xi[:,:,t] /= np.sum(xi[:,:,t])
P = np.zeros((K,K))
O = np.zeros((K,K))
gamma = alpha * beta
gamma /= np.tile(np.sum(gamma, axis=0).reshape(1,n+delta+1), (K,1))
ini = gamma[:,0]
for i in range(K):
for j in range(K):
P[i,j] = np.sum(xi[i,j,:]) / np.sum(gamma[i,:])
for i in range(K):
for t in range(gamma.shape[1]):
O[i,Y[t]] += gamma[i,t]
O[i,:] /= np.sum(gamma[i,:])
return (ini, gamma, P, O)
[docs] def adjacency_BaumWelch(self, m, n, delta):
"""
Builds an adjacency matrix from the attribute self.X removing the nodes between m+1 and n-1 (that we consider to be not reliable).
:param m: We observe fully the graph until time m
:param n: We do not observe reliably how nodes between m+1 and n-1 are connected
:param delta: We observe reliably how nodes between n and n+delta are connected
"""
X = np.zeros((m+1+delta+1,m+1+delta+1))
for i in range(m+1):
X[i,:m+1] = self.X[i,:m+1]
X[:m+1,i] = self.X[i,:m+1]
for i in range(delta+1):
X[m+i+1,m+1:] = self.X[n+i,n:n+delta+1]
X[m+1:,m+i+1] = self.X[n:n+delta+1,n+i]
X[m+i+1,:m+1] = self.X[n+i,:m+1]
X[:m+1,m+i+1] = self.X[:m+1,n+i]
return X
[docs] def BaumWelch(self, m, n, delta, nbite, eps = 1e-2, K=None):
"""
BaumWelch algorithm that iteratively perform the forward, backward and update steps.
:param m: We observe fully the graph until time m
:param n: We do not observe reliably the connection involving nodes between time m and n
:param delta: We observe the connections between nodes n and n+delta
:param nbite: Number of iterations for the Baum Welch algorithm
:param eps: Parameter use to initialize the matrix of emission probabilities O
:param K: Number of communities
.. note:
Using this function with a specified K can be typically interesting when we try to
estimate the number of clusters with the procedure describes in the paper.
"""
if K is None:
K=self.K
self.estimate_partition()
Y = np.zeros(n+delta+1)
for i in range(m+1):
Y[i] = self.clusters_approx[i]
count = m+1
for i in range(n,n+delta+1):
Y[i] = self.clusters_approx[count]
count += 1
Y = np.array(Y, dtype=int)
ini = np.ones(K) / K
P = np.ones((K,K)) / K
O = (1-eps)*np.eye(K) + (eps / (K-1))* (np.ones((K,K))-np.eye(K))
for ite in range(nbite):
alpha = self.forward(ini, P, O, m, n, delta, Y)
beta = self.backward(P, O, m, n, delta, Y)
ini, gamma, P, O = self.update(alpha, beta, P, O, m, n, delta, Y)
return ini, gamma, P, O
[docs] def collaborative_filtering_robustMAP(self, ini, alpha, beta, O, observed_links, observed_nodes, m, n, Pbaum=None):
"""
Solve robustly the collaborative filtering problem when we observe fully the graph at time m and we want
to predict the community of node n when we observe only a subset of the edges that connects (or not) n
with the nodes 1,...,m.
:param ini: initial distribution of the Markov chain given by the Baum-Welch algorithm
:param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise).
:param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n
:param m: We observe fully the graph until time m
:param n: Node that we want to learn the community
.. note:
Our implementation do not respect striclty the formula of the paper. We get rid of quantities that do not depend on
the cluster k of node n. This allows to avoid underflow issues.
"""
if Pbaum is None:
Pbaum = self.approx_P
best_pred = 0
best_k = -1
indices = np.argsort(observed_nodes)[::-1]
observed_nodes = observed_nodes[indices]
observed_links = observed_links[indices]
ROB = []
for k in range(self.K): # c_{n}
res = beta[:,observed_nodes[0]] / np.sum(beta[:,observed_nodes[0]])
for c in range(self.K): # c_i
if observed_links[0]:
res[c] *= self.approx_Q[c,k]
else:
res[c] *= 1-self.approx_Q[c,k]
for i,node in enumerate(observed_nodes[:int(len(observed_links)-1)]):
chi = np.ones((self.K, self.K))
nodei = node
nodeim = observed_nodes[i+1]
for c in range(self.K):
for cb in range(self.K):
chi[c,cb] = Pbaum[c,cb] * O[cb,self.clusters_approx[nodeim+1]]
if (nodeim+1) != nodei:
for step in range(nodeim+2,nodei+1):
chitemp = np.zeros((self.K, self.K))
for c in range(self.K):
for cb in range(self.K):
for cd in range(self.K):
chitemp[c,cb] += chi[c,cd] * Pbaum[cd,cb] * O[cb,self.clusters_approx[step]]
chi = np.copy(chitemp)
restemp = np.zeros(self.K)
for c in range(self.K):
restemp[c] = np.sum(chi[c,:] * res)
if observed_links[i+1]:
res = self.approx_Q[:,k] * restemp
else:
res = (1-self.approx_Q[:,k]) * restemp
final_pred = np.sum(alpha[:,observed_nodes[-1]] * res)
vec = [np.sum(ini * (np.linalg.matrix_power(Pbaum,n)[:,r])) for r in range(self.K)]
vec /= np.sum(vec)
final_pred *= vec[k]
ROB.append(final_pred)
ROB /= np.sum(ROB)
return np.argmax(ROB)
[docs] def collaborative_filtering_pluginMAP(self, alpha, beta, observed_links, observed_nodes, m, n):
"""
Solve the collaborative filtering problem using the plugin approach when we observe fully the graph at
time m and we want to predict the community of node n when we observe only a subset of the edges that
connects (or not) n with the nodes 1,...,m.
:param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise).
:param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n
:param m: We observe fully the graph until time m
:param n: Node that we want to learn the community
"""
best_pred = 0
best_k = -1
for k in range(self.K):
temp = np.linalg.matrix_power(self.approx_P,n-m)[self.clusters_approx[m],k]
for i,node in enumerate(observed_nodes):
if observed_links[i]:
temp *= self.approx_Q[self.clusters_approx[node],k]
else:
temp *= 1-self.approx_Q[self.clusters_approx[node],k]
if temp > best_pred:
best_pred = temp
best_k = k
return best_k
[docs] def collaborative_filtering_optimalMAP(self, alpha, beta, observed_links, observed_nodes, m, n):
"""Solve robustly the collaborative filtering problem using the optimal approach when we observe fully
the graph at time m and we want to predict the community of node n when we observe only a subset of the
edges that connects (or not) n with the nodes 1,...,m.
:param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise).
:param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n
:param m: We observe fully the graph until time m
:param n: Node that we want to learn the community
"""
best_pred = 0
best_k = -1
for k in range(self.K):
temp = np.linalg.matrix_power(self.P,n-m)[self.clusters[m],k]
for i,node in enumerate(observed_nodes):
if observed_links[i]:
temp *= self.Q[self.clusters[node],k]
else:
temp *= 1-self.Q[self.clusters[node],k]
if temp > best_pred:
best_pred = temp
best_k = k
return best_k
[docs] def RES(self, ini, alpha, beta, O ,observed_links, observed_nodes, m, n):
"""
Solve robustly the collaborative filtering problem when we observe fully the graph at time m and we want
to predict the community of node n when we observe only a subset of the edges that connects (or not) n
with the nodes 1,...,m.
:param ini: initial distribution of the Markov chain given by the Baum-Welch algorithm
:param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm
:param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise).
:param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n
:param m: We observe fully the graph until time m
:param n: Node that we want to learn the community
.. note:
Our implementation do not respect striclty the formula of the paper. We get rid of quantities that do not depend on
the cluster k of node n. This allows to avoid underflow issues.
"""
best_pred = 0
best_k = -1
indices = np.argsort(observed_nodes)[::-1]
observed_nodes = observed_nodes[indices]
observed_links = observed_links[indices]
ROB = []
for k in range(self.K):
res = np.ones(self.K) # beta[:,observed_nodes[0]]
for c in range(self.K):
if observed_links[0]:
res[c] *= self.approx_Q[c,k]
else:
res[c] *= 1-self.approx_Q[c,k]
for i,node in enumerate(observed_nodes[:int(len(observed_links)-1)]):
chi = np.zeros((self.K, self.K))
nodei = node
nodeim = observed_nodes[i+1]
for c in range(self.K):
for cb in range(self.K):
chi[c,cb] = self.approx_P[c,cb] * O[cb,self.clusters_approx[nodei]]
for step in range(nodei-1,nodeim,-1):
chitemp = np.zeros((self.K, self.K))
for c in range(self.K):
for cb in range(self.K):
for cd in range(self.K):
chitemp[c,cb] += self.approx_P[c,cd] * ( O[cd,self.clusters_approx[step]] + chi[cd,cb])
chi = np.copy(chitemp)
restemp = np.zeros(self.K)
for c in range(self.K):
restemp[c] = np.sum(chi[c,:] * res)
if observed_links[i+1]:
res = self.approx_Q[:,k] * restemp
else:
res = (1-self.approx_Q[:,k]) * restemp
final_pred = np.sum(alpha[:,observed_nodes[-1]] * res)
final_pred *= np.sum(ini * (np.linalg.matrix_power(self.approx_P,n))[:,k])
ROB.append(final_pred)
MAP = []
for k in range(self.K):
temp = np.linalg.matrix_power(self.approx_P,n-m)[self.clusters_approx[m],k]
for i,node in enumerate(observed_nodes):
if observed_links[i]:
temp *= self.approx_Q[self.clusters_approx[node],k]
else:
temp *= 1-self.approx_Q[self.clusters_approx[node],k]
MAP.append(temp)
OPT = []
for k in range(self.K):
temp = np.linalg.matrix_power(self.P,n-m)[self.clusters[m],k]
for i,node in enumerate(observed_nodes):
if observed_links[i]:
temp *= self.Q[self.clusters[node],k]
else:
temp *= 1-self.Q[self.clusters[node],k]
OPT.append(temp)
return ROB/np.sum(ROB), OPT/np.sum(OPT), MAP/np.sum(MAP)