Source code for markovianSBM.BaumWelch


import numpy as np
import cvxpy as cp
import os
import matplotlib.pyplot as plt
from numpy.core.multiarray import _vec_string

[docs]class BaumWelch(): """Class which performs the Baum-Welch algorithm and that uses it to solve the link prediction and the collaborative filtering problems.""" def __init__(self): pass
[docs] def forward(self, ini, P, O, m, n, delta, Y, K=None): """ Forward step of the BaumWelch algorithm. We observe the connections between nodes 0,...,m,n,...,n+delta. However, we consider that the edges involving nodes between m+1 and n-1 are not reliable and we do not take into account the estimated clusters for the nodes between m+1 and n-1. :param ini: initial distribution of the Markov chain :param P: Transition matrix of the Markov chain :param O: matrix of emission probabilities :param m: We observe fully the graph until time m :param n: We do not observe reliably the connection involving nodes between time m and n :param delta: We observe the connections between nodes n and n+delta :param Y: vector of length n+delta+1 of the estimated communities :param K: Number of communities .. note: Using this function with a specified K can be typically interesting when we try to estimate the number of clusters with the procedure describes in the paper. """ if K is None: K=self.K alpha = np.ones((K, n+delta+1)) alpha[:,0] = ini * O[:,Y[0]] # scaling alpha[:,0] /= np.sum(alpha[:,0]) puissP = P for t in range(1,n+delta+1): for i in range(K): if t <= m or t >= n: alpha[i,t] = np.sum(alpha[:,t-1] * P[:,i]) * O[i,Y[t]] else: alpha[i,t] = np.sum(alpha[:,m] * puissP[:,i]) puissP = puissP @ P # scaling # alpha[:,t] /= np.sum(alpha[:,t]) return alpha
[docs] def backward(self, P, O, m, n, delta, Y, K=None): """ Backward step of the BaumWelch algorithm. We observe the connections between nodes 0,...,m,n,...,n+delta. However, we consider that the edges involving nodes between m+1 and n-1 are not reliable and we do not take into account the estimated clusters for the nodes between m+1 and n-1. :param P: Transition matrix of the Markov chain :param O: matrix of emission probabilities :param m: We observe fully the graph until time m :param n: We do not observe reliably the connection involving nodes between time m and n :param delta: We observe the connections between nodes n and n+delta :param Y: vector of length n+delta+1 of the estimated communities :param K: Number of communities .. note: Using this function with a specified K can be typically interesting when we try to estimate the number of clusters with the procedure describes in the paper. """ if K is None: K=self.K beta = np.ones((K, n+delta+1)) beta[:,n+delta] = np.ones(K) # scaling # beta[:,n+delta] /= np.sum(beta[:,n+delta]) puissP = P for t in range(n+delta-1,-1,-1): for i in range(K): if t <= m-1 or t >= n-1: beta[i,t] = np.sum(beta[:,t+1] * P[i,:] * O[:,Y[t+1]]) else: beta[i,t] = np.sum(beta[:,n-1] * puissP[i,:]) puissP = puissP @ P # scaling # beta[:,t] /= np.sum(beta[:,t]) return beta
[docs] def update(self, alpha, beta, P, O, m, n, delta, Y, K=None): """ Update step of the BaumWelch algorithm: the parameters of the HMM are updated and returned. :param alpha: Matrix of size $K \times n+delta$ giving the output of the "forward" method :param beta: Matrix of size $K \times n+delta$ giving the output of the "backward" method :param P: Transition matrix of the Markov chain :param O: matrix of emission probabilities :param m: We observe fully the graph until time m :param n: We do not observe reliably the connection involving nodes between time m and n :param delta: We observe the connections between nodes n and n+delta :param Y: vector of length n+delta+1 of the estimated communities :param K: Number of communities .. note: Using this function with a specified K can be typically interesting when we try to estimate the number of clusters with the procedure describes in the paper. """ if K is None: K=self.K xi = np.zeros((K,K, n+delta+1)) for t in range(n+delta): for i in range(K): for j in range(K): if t >= m-1 and t <= n-1: xi[i,j,t] = alpha[i,t] * P[i,j] * beta[j,t+1] else: xi[i,j,t] = alpha[i,t] * P[i,j] * O[j,Y[t+1]] * beta[j,t+1] xi[:,:,t] /= np.sum(xi[:,:,t]) P = np.zeros((K,K)) O = np.zeros((K,K)) gamma = alpha * beta gamma /= np.tile(np.sum(gamma, axis=0).reshape(1,n+delta+1), (K,1)) ini = gamma[:,0] for i in range(K): for j in range(K): P[i,j] = np.sum(xi[i,j,:]) / np.sum(gamma[i,:]) for i in range(K): for t in range(gamma.shape[1]): O[i,Y[t]] += gamma[i,t] O[i,:] /= np.sum(gamma[i,:]) return (ini, gamma, P, O)
[docs] def adjacency_BaumWelch(self, m, n, delta): """ Builds an adjacency matrix from the attribute self.X removing the nodes between m+1 and n-1 (that we consider to be not reliable). :param m: We observe fully the graph until time m :param n: We do not observe reliably how nodes between m+1 and n-1 are connected :param delta: We observe reliably how nodes between n and n+delta are connected """ X = np.zeros((m+1+delta+1,m+1+delta+1)) for i in range(m+1): X[i,:m+1] = self.X[i,:m+1] X[:m+1,i] = self.X[i,:m+1] for i in range(delta+1): X[m+i+1,m+1:] = self.X[n+i,n:n+delta+1] X[m+1:,m+i+1] = self.X[n:n+delta+1,n+i] X[m+i+1,:m+1] = self.X[n+i,:m+1] X[:m+1,m+i+1] = self.X[:m+1,n+i] return X
[docs] def BaumWelch(self, m, n, delta, nbite, eps = 1e-2, K=None): """ BaumWelch algorithm that iteratively perform the forward, backward and update steps. :param m: We observe fully the graph until time m :param n: We do not observe reliably the connection involving nodes between time m and n :param delta: We observe the connections between nodes n and n+delta :param nbite: Number of iterations for the Baum Welch algorithm :param eps: Parameter use to initialize the matrix of emission probabilities O :param K: Number of communities .. note: Using this function with a specified K can be typically interesting when we try to estimate the number of clusters with the procedure describes in the paper. """ if K is None: K=self.K self.estimate_partition() Y = np.zeros(n+delta+1) for i in range(m+1): Y[i] = self.clusters_approx[i] count = m+1 for i in range(n,n+delta+1): Y[i] = self.clusters_approx[count] count += 1 Y = np.array(Y, dtype=int) ini = np.ones(K) / K P = np.ones((K,K)) / K O = (1-eps)*np.eye(K) + (eps / (K-1))* (np.ones((K,K))-np.eye(K)) for ite in range(nbite): alpha = self.forward(ini, P, O, m, n, delta, Y) beta = self.backward(P, O, m, n, delta, Y) ini, gamma, P, O = self.update(alpha, beta, P, O, m, n, delta, Y) return ini, gamma, P, O
[docs] def collaborative_filtering_robustMAP(self, ini, alpha, beta, O, observed_links, observed_nodes, m, n, Pbaum=None): """ Solve robustly the collaborative filtering problem when we observe fully the graph at time m and we want to predict the community of node n when we observe only a subset of the edges that connects (or not) n with the nodes 1,...,m. :param ini: initial distribution of the Markov chain given by the Baum-Welch algorithm :param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise). :param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n :param m: We observe fully the graph until time m :param n: Node that we want to learn the community .. note: Our implementation do not respect striclty the formula of the paper. We get rid of quantities that do not depend on the cluster k of node n. This allows to avoid underflow issues. """ if Pbaum is None: Pbaum = self.approx_P best_pred = 0 best_k = -1 indices = np.argsort(observed_nodes)[::-1] observed_nodes = observed_nodes[indices] observed_links = observed_links[indices] ROB = [] for k in range(self.K): # c_{n} res = beta[:,observed_nodes[0]] / np.sum(beta[:,observed_nodes[0]]) for c in range(self.K): # c_i if observed_links[0]: res[c] *= self.approx_Q[c,k] else: res[c] *= 1-self.approx_Q[c,k] for i,node in enumerate(observed_nodes[:int(len(observed_links)-1)]): chi = np.ones((self.K, self.K)) nodei = node nodeim = observed_nodes[i+1] for c in range(self.K): for cb in range(self.K): chi[c,cb] = Pbaum[c,cb] * O[cb,self.clusters_approx[nodeim+1]] if (nodeim+1) != nodei: for step in range(nodeim+2,nodei+1): chitemp = np.zeros((self.K, self.K)) for c in range(self.K): for cb in range(self.K): for cd in range(self.K): chitemp[c,cb] += chi[c,cd] * Pbaum[cd,cb] * O[cb,self.clusters_approx[step]] chi = np.copy(chitemp) restemp = np.zeros(self.K) for c in range(self.K): restemp[c] = np.sum(chi[c,:] * res) if observed_links[i+1]: res = self.approx_Q[:,k] * restemp else: res = (1-self.approx_Q[:,k]) * restemp final_pred = np.sum(alpha[:,observed_nodes[-1]] * res) vec = [np.sum(ini * (np.linalg.matrix_power(Pbaum,n)[:,r])) for r in range(self.K)] vec /= np.sum(vec) final_pred *= vec[k] ROB.append(final_pred) ROB /= np.sum(ROB) return np.argmax(ROB)
[docs] def collaborative_filtering_pluginMAP(self, alpha, beta, observed_links, observed_nodes, m, n): """ Solve the collaborative filtering problem using the plugin approach when we observe fully the graph at time m and we want to predict the community of node n when we observe only a subset of the edges that connects (or not) n with the nodes 1,...,m. :param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise). :param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n :param m: We observe fully the graph until time m :param n: Node that we want to learn the community """ best_pred = 0 best_k = -1 for k in range(self.K): temp = np.linalg.matrix_power(self.approx_P,n-m)[self.clusters_approx[m],k] for i,node in enumerate(observed_nodes): if observed_links[i]: temp *= self.approx_Q[self.clusters_approx[node],k] else: temp *= 1-self.approx_Q[self.clusters_approx[node],k] if temp > best_pred: best_pred = temp best_k = k return best_k
[docs] def collaborative_filtering_optimalMAP(self, alpha, beta, observed_links, observed_nodes, m, n): """Solve robustly the collaborative filtering problem using the optimal approach when we observe fully the graph at time m and we want to predict the community of node n when we observe only a subset of the edges that connects (or not) n with the nodes 1,...,m. :param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise). :param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n :param m: We observe fully the graph until time m :param n: Node that we want to learn the community """ best_pred = 0 best_k = -1 for k in range(self.K): temp = np.linalg.matrix_power(self.P,n-m)[self.clusters[m],k] for i,node in enumerate(observed_nodes): if observed_links[i]: temp *= self.Q[self.clusters[node],k] else: temp *= 1-self.Q[self.clusters[node],k] if temp > best_pred: best_pred = temp best_k = k return best_k
[docs] def RES(self, ini, alpha, beta, O ,observed_links, observed_nodes, m, n): """ Solve robustly the collaborative filtering problem when we observe fully the graph at time m and we want to predict the community of node n when we observe only a subset of the edges that connects (or not) n with the nodes 1,...,m. :param ini: initial distribution of the Markov chain given by the Baum-Welch algorithm :param alpha: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param beta: Matrix of size $K \times n+delta$ given by the Baum-Welch algorithm :param observed_links: A vector with binary variables of length less than m. For any i, observed_links[i] is 1 if and only if we observe an edge between nodes n and observed_nodes[i] (and 0 otherwise). :param observed_nodes: A vector with the same length as the vector "observed_links". It contains the nodes for which we observe the connection (or not) with node n :param m: We observe fully the graph until time m :param n: Node that we want to learn the community .. note: Our implementation do not respect striclty the formula of the paper. We get rid of quantities that do not depend on the cluster k of node n. This allows to avoid underflow issues. """ best_pred = 0 best_k = -1 indices = np.argsort(observed_nodes)[::-1] observed_nodes = observed_nodes[indices] observed_links = observed_links[indices] ROB = [] for k in range(self.K): res = np.ones(self.K) # beta[:,observed_nodes[0]] for c in range(self.K): if observed_links[0]: res[c] *= self.approx_Q[c,k] else: res[c] *= 1-self.approx_Q[c,k] for i,node in enumerate(observed_nodes[:int(len(observed_links)-1)]): chi = np.zeros((self.K, self.K)) nodei = node nodeim = observed_nodes[i+1] for c in range(self.K): for cb in range(self.K): chi[c,cb] = self.approx_P[c,cb] * O[cb,self.clusters_approx[nodei]] for step in range(nodei-1,nodeim,-1): chitemp = np.zeros((self.K, self.K)) for c in range(self.K): for cb in range(self.K): for cd in range(self.K): chitemp[c,cb] += self.approx_P[c,cd] * ( O[cd,self.clusters_approx[step]] + chi[cd,cb]) chi = np.copy(chitemp) restemp = np.zeros(self.K) for c in range(self.K): restemp[c] = np.sum(chi[c,:] * res) if observed_links[i+1]: res = self.approx_Q[:,k] * restemp else: res = (1-self.approx_Q[:,k]) * restemp final_pred = np.sum(alpha[:,observed_nodes[-1]] * res) final_pred *= np.sum(ini * (np.linalg.matrix_power(self.approx_P,n))[:,k]) ROB.append(final_pred) MAP = [] for k in range(self.K): temp = np.linalg.matrix_power(self.approx_P,n-m)[self.clusters_approx[m],k] for i,node in enumerate(observed_nodes): if observed_links[i]: temp *= self.approx_Q[self.clusters_approx[node],k] else: temp *= 1-self.approx_Q[self.clusters_approx[node],k] MAP.append(temp) OPT = [] for k in range(self.K): temp = np.linalg.matrix_power(self.P,n-m)[self.clusters[m],k] for i,node in enumerate(observed_nodes): if observed_links[i]: temp *= self.Q[self.clusters[node],k] else: temp *= 1-self.Q[self.clusters[node],k] OPT.append(temp) return ROB/np.sum(ROB), OPT/np.sum(OPT), MAP/np.sum(MAP)