# Preliminaries ---------------------------------------------------------------- rm(list=ls()) library(tidyverse) library(rstan) library(extraDistr) library(bayesplot) # Data generation -------------------------------------------------------------- # define true model parameters alpha <- c(0.35, 0.2, 0.25, 0.2) mu <- c(3000, 4000, 5000, 6000) sigma <- 250 K <- length(alpha) # sample size n <- 1e3 # sample the observations set.seed(1) which_x <- sample(K, n, replace=T, prob=alpha) y <- rnorm(n, mean=mu[which_x], sd=sigma) # plot data data.frame(y) %>% ggplot(aes(x=y)) + geom_histogram(colour="black", bins=50) # Run stan --------------------------------------------------------------------- # set up parallel cores options(mc.cores=5) # set up data stan_data <- list(n=n, K=K, y=y) # initial values init_func <- function(){ list(alpha = as.numeric(rdirichlet(1, rep(1, K))), mu = sort(rnorm(K, 4000, 1500)), sigma = 1 / sqrt(rgamma(1, 3, 600^2))) } fit <- stan(file = "gaussian_mixture.stan", data = stan_data, chains = 5, iter = 1e4, init = init_func, seed = 2) # Analysis of results ---------------------------------------------------------- # bayesplot plots mcmc_areas(fit, regex_pars="alpha", prob=0.95) mcmc_areas(fit, regex_pars="mu", prob=0.95) # summary summ <- summary(fit)$summary round(summ[rownames(summ)!="lp__", c("2.5%", "mean", "97.5%")],2)