# R function for estimating the k-principal points (Flury 1990)
#  of a multivariate normal distribution using the Parametric k-means
#  algorithm (Tarpey 2006, Computational Statistics).
#
#  Input:
#  x = n by p data matrix, where n is the sample size and p
#        is the dimension of the data.
#  k = Number of principal points to estimate (1 < k < n-1)
# ns = Choose a large sample size for the simulated data (e.g. ns=500000)
#
#  Output:
#  results = k by p matrix of estimated principal points
#
#  Usage: results=parametrickmeans(x,k,ns)
#
#   References:
#      "Principal Points" (1990) by B. Flury, Biometrika, 77, 33-41.
#      "A Parametric k-Means Algorithm" (2006), by T. Tarpey, to appear in Computational Statistics.
#           
parametrickmeans <- function(x, k, ns)
{
n <- dim(x)[1]    # sample size
p <- dim(x)[2]    # dimension of data
xbar <-apply(x,2,mean) # sample mean
S <- cov(x)      # sample covariance matrix
e <- eigen(S)    # eigen decomposition of the covariance matrix
# Simulate N(xbar, S) data with sample size ns and put the data in xsim:
xsim <- sweep(matrix(rnorm(ns*p), ns, p)%*%diag(sqrt(e$value))%*%t(e$vector), 2, -xbar)
kmeans(xsim,k)$centers    # Estimated principal points using the parametric k-means

results <- list("kprincipalpoints" = kmeans(xsim,k)$centers)
results
}