# Preliminaries ----------------------------------------------------------------

rm(list=ls())
library(tidyverse)
library(rstan)
library(extraDistr)
library(bayesplot)


# Data generation --------------------------------------------------------------

# define true model parameters
alpha <- c(0.35, 0.2, 0.25, 0.2)
mu <- c(3000, 4000, 5000, 6000)
sigma <- 250
K <- length(alpha)

# sample size
n <- 1e3

# sample the observations
set.seed(1)
which_x <- sample(K, n, replace=T, prob=alpha)
y <- rnorm(n, mean=mu[which_x], sd=sigma)

# plot data
data.frame(y) %>%
  ggplot(aes(x=y)) + geom_histogram(colour="black", bins=50)


# Run stan ---------------------------------------------------------------------

# set up parallel cores
options(mc.cores=5)

# set up data
stan_data <- list(n=n, K=K, y=y)

# initial values
init_func <- function(){
  list(alpha = as.numeric(rdirichlet(1, rep(1, K))),
       mu = sort(rnorm(K, 4000, 1500)),
       sigma = 1 / sqrt(rgamma(1, 3, 600^2)))
}


fit <- stan(file = "gaussian_mixture.stan",
            data = stan_data,
            chains = 5,
            iter = 1e4,
            init = init_func,
            seed = 2)


# Analysis of results ----------------------------------------------------------

# bayesplot plots
mcmc_areas(fit, regex_pars="alpha", prob=0.95)
mcmc_areas(fit, regex_pars="mu", prob=0.95)

# summary
summ <- summary(fit)$summary

round(summ[rownames(summ)!="lp__", c("2.5%", "mean", "97.5%")],2)