#######################################################################
### Fit a 2-component univariate
normal mixture to a given ####
### density g(y) using the misspecified population-based EM ####
### algorithm. ####
###
####
### Input:
####
### l = lower bound on the support of
g(y) ####
### u = upper bound on the support of
g(y) ####
### g(y) = user
defined density function.
####
### err = convergence criterion (default =
1/10^15) ####
### err must be a positive number ####
###
####
### ####
### Output:
####
### * Parameters of a two component normal
mixture: ####
### pi1, pi2 (prior probabilities with
pi2=1-pi1) ####
### mu1, mu2, mixture component
means ####
### sigma1, sigma2, mixture component
standard deviations ####
### * loglike = Misspecified log-likelihood ####
### = int(g(y)log(f(y))dy
####
### where
f(y) = mixture density. ####
### ####
###
####
### NOTES:
####
### 1. If the density g(y) is similar in shape
to a normal ####
### density, then
the misspecified log-likelihood may ####
### converge very slowly. ####
### 2. The default density used is the gamma
density. ####
### 3. R's integrate function may give an
error for infinite ####
### limits of
integration (when l and/or u are infinite.
####
### In these cases, the user can specify
finite but large ####
### values for l
and/or u to approximate the integral over
####
### the infinite
range.
####
### 4. If the algorithm has not converged
after maxiter, an
####
### message is
printed saying the algorithm has not
####
### converged. ####
### Default value is maxiter
= 20,0000 ####
### 5.
This program can be used to approximate any function ####
### with a
bounded range. One need only vertically shift
####
### the function so
that it takes only positive values ####
### and
normalize so that it integrates to one.
####
###
####
### ####
#######################################################################
#######################################################################
err <- 1/10^15 # convergence criterion = squared
difference between log-likelihood between successive iterations of the
algorithm.
l <- 0 # lower limit on support of the
density
u <- 40 # upper limit on support of the
density
## Specify the
density g(y)
## The default
density is a gamma with shape parameter kappa and
## scale parameter theta.
kappa <- 10
# shape parameter for gamma distribution.
theta <- 1
# scale parameter for the gamma density.
g <- function(y){1/(theta^kappa*gamma(kappa))*y^(kappa-1)*exp(-y/theta)}
# Specify initial
values for the mixture component parameters
pi1 <- 0.5
pi2 <- 1-pi1
mu
<- integrate(function(y){y*g(y)},l,u)$value
sigma <- sqrt(integrate(function(y){(y-mu)^2*g(y)},l,u)$value)
mu1 <- mu - sigma
mu2 <- mu + sigma
sigma1 <- sigma/2
sigma2 <- sigma/2
diff <- 100
llold
<- 0 # Store previous log-likelihood
value
iter
<- 0 # iteration number
maxiter
<- 20000 # maximum number of iterations allowed.
while (diff > err & iter
< maxiter)
{
iter=iter+1
fpi1 <-
function(y){g(y)*
pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))/
(pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))+
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2)))
}
pi1 <- integrate(fpi1, l,u)$value
pi2=1-pi1
fmu1 <-
function(y) {y*g(y)*
pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))/
(pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))+
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2)))
}
fmu2 <-
function(y) {y*g(y)*
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2))/
(pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))+
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2)))
}
fsig1 <-
function(y) {(y-mu1)^2*g(y)*
pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))/
(pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))+
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2)))
}
fsig2 <-
function(y) {(y-mu2)^2*g(y)*
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2))/
(pi1/sigma1*exp(-(y-mu1)^2/(2*sigma1^2))+
pi2/sigma2*exp(-(y-mu2)^2/(2*sigma2^2)))
}
normmix
<- function(y){pi1*dnorm(y, mean=mu1, sd=sigma1)+
pi2*dnorm(y,
mean=mu2, sd=sigma2)}
mu1 <- integrate(fmu1, l,
u)$value/pi1
mu2 <- integrate(fmu2, l,
u)$value/pi2
sigma1 <- sqrt(integrate(fsig1,
l,u)$value/pi1)
sigma2 <- sqrt(integrate(fsig2,
l,u)$value/pi2)
# Compute the
log-likelihood
loglikeint
<- function(y){g(y)*(log(pi1*dnorm(y,mean=mu1, sd=sigma1)
+(1-pi1)*dnorm(y,mean=mu2, sd=sigma2)))}
mixloglikeint
<- function(y){
normmix(y)*(log(pi1*dnorm(y,mean=mu1, sd=sigma1)
+(1-pi1)*dnorm(y,mean=mu2, sd=sigma2)))}
loglike=integrate(loglikeint,l,u)$value
diff = (loglike-llold)^2
llold
= loglike #
Store old log-likelihood value
trueloglikeint <- function(y){g(y)*log(g(y))}
trueloglike
<- integrate(trueloglikeint, l,u)$value
cat(iter, loglike, trueloglike, pi1, mu1, mu2, sigma1, sigma2, diff,
"\n")
# Plot g(y) and the
mixture approximation
lbound
<- l
ubound
<- u
if (l == -Inf){lbound = mu - 6*sigma}
if (u ==
Inf){ubound = mu + 6*sigma}
y=seq(lbound,ubound, length=200)
mmax
<- as.matrix(sort(normmix(y)))
mmax
<- min(mmax[dim(mmax)[1],1],
Inf)
pts <- matrix(c(mu1, mu2, 0, 0), 2,2)
plot(y, g(y), type="l", ylim=c(-mmax/4,mmax), xlim=c(lbound,ubound),
lwd=3, main="2-Component Mixture Approximation to
g(y)",
ylab="Density",
xlab="")
lines(y, normmix(y), lty=2, col=2, lwd=3)
abline(h=0)
points(pts, cex=3, col=2, pch=19)
legend((lbound+ubound)*.25,
-mmax/16, c("g(y)",
"2-Component
Mixture"),
col
= c(1,2),
lty
= c(1,2),
lwd=c(3,3), merge = TRUE)#, bg='gray90')
}
if (iter > maxiter){
cat("Misspecified EM Algorithm did not converge in ", maxiter, " iterations", "\n")
}
cat(" pi1 = ", pi1, "\n",
"pi2 = ", pi2, "\n",
"mu1 = ", mu1, "\n",
"mu2 = ", mu2, "\n",
"sigma1 = ", sigma1,
"\n",
"sigma2 = ", sigma2,
"\n",
"Misspecified
log-likelihood = ", loglike, "\n")