Accelerated EM Algorithm
4.8.2.1
regmix_em1step <- function(par, dat){
y <- dat[,1]
xmat <- dat[,-1]
n <- nrow(xmat) # number of samples
p <- ncol(xmat) # dimension of samples
m <- (length(par)-1)/(p+1) # number of clusters
pi.now = pi.pre = par[1:m]
beta.now = beta.pre = matrix(par[(m+1):((m+1)+m*p-1)],p,m)
sigma.now = sigma.pre = par[length(par)]
parnew <- rep(NA,(m+1)+m*p)
ols <- function(w){
return(solve(t(as.matrix(xmat)) %*% diag(w) %*% as.matrix(xmat))
%*% t(as.matrix(xmat)) %*% diag(w) %*% y)
}
phi <- matrix(apply((y – as.matrix(xmat) %*% beta.pre)/sigma.pre,1,dnorm),n,m)
prob <- (phi * pi.pre) / apply(phi*pi.pre, 1, sum)
pi.now <- apply(prob, 2, mean)
beta.now <- apply(prob, 2, ols)
sigma.now <- sum((y – as.matrix(xmat) %*% beta.now) *
(y – as.matrix(xmat) %*% beta.now) * prob) / n
parnew <- c(pi.now,as.vector(beta.now),sigma.now)
return(parnew)
}
4.8.2.2
regmix_em <- function(par, dat, control){
iter <- 0
while (TRUE) {
parnew <- regmix_em1step(par, dat)
iter <- iter + 1
if(sum((parnew-par)^2)<=control[2]){
break
}else if(iter>=control[1]){
break
}
par <- parnew
}
return(list(parnew,iter))
}
regmix_sim <- function(n, pi, beta, sigma) {
K <- ncol(beta)
p <- NROW(beta)
xmat <- matrix(rnorm(n * p), n, p) # normal covaraites
error <- matrix(rnorm(n * K, sd = sigma), n, K)
ymat <- xmat %*% beta + error # n by K matrix
ind <- t(rmultinom(n, size = 1, prob = pi))
y <- rowSums(ymat * ind)
data.frame(y, xmat)
}
n <- 400
pi <- c(.3, .4, .3)
bet <- matrix(c( 1, 1, 1,
-1, -1, -1), 2, 3)
sig <- 1
set.seed(1205)
dat <- regmix_sim(n, pi, bet, sig)
par <- c(pi / pi / length(pi),as.vector(bet*0),sig/sig)
out1 <- regmix_em(par, dat, control = list(maxit = 5000, tol = 1e-7))
print(out1)
## [[1]]
## [1] 0.3385967 0.3276644 0.3337389 0.3348181 -0.4607229 0.3416062
## [7] -0.4715782 0.3241958 -0.4947318 3.0012569
##
## [[2]]
## [1] 4
4.8.2.3
library(SQUAREM)
regmix_obj <- function(par, dat){
y <- dat[,1]
xmat <- dat[,-1]
n <- nrow(xmat) # number of samples
p <- ncol(xmat) # dimension of samples
m <- (length(par)-1)/(p+1) # number of clusters
pi = par[1:m]
beta = matrix(par[(m+1):((m+1)+m*p-1)],p,m)
sigma = par[length(par)]
phi <- matrix(apply((y – as.matrix(xmat) %*% beta)/sigma,1,dnorm),n,m)
likelihood <- phi %*% pi
return(-sum(log(likelihood)))
}
out2 <- squarem(par, dat=dat, regmix_em1step, regmix_obj,
control = list(tol=1e-7,maxiter=5000))
print(out2)
## \$par
## [1] 0.3385967 0.3276644 0.3337389 0.3348189 -0.4607235 0.3416078
## [7] -0.4715760 0.3241935 -0.4947333 3.0012568
##
## \$value.objfn
## [1] 427.265
##
## \$iter
## [1] 4
##
## \$fpevals
## [1] 7
##
## \$objfevals
## [1] 4
##
## \$convergence
## [1] TRUE
4.8.2.4
library(microbenchmark)
microbenchmark( EM_Algorithm <- regmix_em(par, dat,
control = list(maxit = 5000, tol = 1e-7)),
Accelerated_EM <- squarem(par, dat=dat, regmix_em1step,
regmix_obj, control = list(tol=1e-7,maxiter=5000)),
times = 100L)
## Unit: milliseconds
## expr
## EM_Algorithm <- regmix_em(par, dat, control = list(maxit = 5000, tol = 1e-07))
## Accelerated_EM <- squarem(par, dat = dat, regmix_em1step, regmix_obj, control = list(tol = 1e-07, maxiter = 5000))
## min lq mean median uq max neval cld
## 24.8377 29.83425 32.35545 32.20780 34.2157 54.1597 100 a
## 50.6950 55.06280 60.57016 58.65985 62.3096 101.3462 100 b
We can see that the speed of Accelerated EM Algorithm is slower that EM Algorithm in this case.