MCMC Sampler sampler for the Hidden Markov with Gamma-Poisson emission densities

Usage

hmm_mcmc_gamma_poisson(
  data,
  prior_T,
  prior_betas,
  prior_alpha = 1,
  iter = 5000,
  warmup = floor(iter/1.5),
  thin = 1,
  seed = sample.int(.Machine$integer.max, 1),
  init_T = NULL,
  init_betas = NULL,
  init_alpha = NULL,
  print_params = TRUE,
  verbose = TRUE
)

Arguments

data: (numeric) data
prior_T: (matrix) prior transition matrix
prior_betas: (numeric) prior beta parameters
prior_alpha: (numeric) a single prior alpha parameter. By default, prior_alpha=1
iter: (integer) number of MCMC iterations
warmup: (integer) number of warmup iterations
thin: (integer) thinning parameter. By default, 1
seed: (integer) seed parameter
init_T: (matrix) optional parameter; initial transition matrix
init_betas: (numeric) optional parameter; initial beta parameters
init_alpha: (numeric) optional parameter; initial alpha parameter
print_params: (logical) optional parameter; print estimated parameters every iteration. By default, TRUE
verbose: (logical) optional parameter; print additional messages. By default, TRUE

Value

List with following elements:

data: data used for simulation
samples: list with samples
estimates: list with various estimates
idx: indices with iterations after the warmup period
priors: prior parameters
inits: initial parameters
last_iter: list with samples from the last MCMC iteration
info: list with various meta information about the object

Details

Please see supplementary information at doi:10.1186/s12859-024-05751-4 for more details on the algorithm.

For usage recommendations please see https://github.com/LynetteCaitlin/oHMMed/blob/main/UsageRecommendations.pdf.

References

Claus Vogl, Mariia Karapetiants, Burçin Yıldırım, Hrönn Kjartansdóttir, Carolin Kosiol, Juraj Bergman, Michal Majka, Lynette Caitlin Mikula. Inference of genomic landscapes using ordered Hidden Markov Models with emission densities (oHMMed). BMC Bioinformatics 25, 151 (2024). doi:10.1186/s12859-024-05751-4

Examples

# Simulate Poisson-Gamma data
N <- 2^10
true_T <- rbind(c(0.95, 0.05, 0),
                c(0.025, 0.95, 0.025),
                c(0.0, 0.05, 0.95))

true_betas <- c(2, 1, 0.1)
true_alpha <- 1

simdata_full <- hmm_simulate_gamma_poisson_data(L = N,
                                                mat_T = true_T,
                                                betas = true_betas,
                                                alpha = true_alpha)
simdata <- simdata_full$data
hist(simdata, breaks = 40, probability = TRUE,  
     main = "Distribution of the simulated Poisson-Gamma data")
lines(density(simdata), col = "red")


# Set numbers of states to be inferred
n_states_inferred <- 3

# Set priors
prior_T <- generate_random_T(n_states_inferred)
prior_betas <- c(1, 0.5, 0.1)
prior_alpha <- 3

# Simmulation settings
iter <- 50
warmup <- floor(iter / 5) # 20 percent
thin <- 1
seed <- sample.int(10000, 1)
print_params <- FALSE # if TRUE then parameters are printed in each iteration
verbose <- FALSE # if TRUE then the state of the simulation is printed

# Run MCMC sampler
res <- hmm_mcmc_gamma_poisson(data = simdata,
                              prior_T = prior_T,
                              prior_betas = prior_betas,
                              prior_alpha = prior_alpha,
                              iter = iter,
                              warmup = warmup,  
                              thin = thin,
                              seed = seed,
                              print_params = print_params,
                              verbose = verbose)
res
#> Model: HMM Gamma-Poisson 
#> Type: MCMC 
#> Iter: 50 
#> Warmup: 10 
#> Thin: 1 
#> States: 3 

summary(res)# summary output can be also assigned to a variable
#> Estimated betas:
#>   beta[1]   beta[2]   beta[3] 
#> 3.5834236 1.2507648 0.1541274 
#> 
#> Estimated alpha:
#> 2.839187
#> 
#> Estimated means:
#> 0.7994824 2.316726 18.60581
#> 
#> Estimated transition rates:
#>          1          2         3
#> 1 0.979440 0.02055999 0.0000000
#> 2 0.101204 0.48096676 0.4178293
#> 3 0.000000 0.60273643 0.3972636
#> 
#> Number of windows assigned to hidden states:
#>   1   2   3 
#> 783 143  98 
#> 
#> Kullback-Leibler divergence between observed and estimated distributions:
#> 0.3132493
#> 
#> Log Likelihood:
#>         mean           sd       median 
#> -1846.262363     2.081207 -1846.352902 
#> 
#> P-value of poisson test for difference between rates of states (stepwise):
#>          1-2          2-3 
#> 7.810267e-46 0.000000e+00 
#> 

coef(res) # extract model estimates
#> $betas
#>   beta[1]   beta[2]   beta[3] 
#> 3.5834236 1.2507648 0.1541274 
#> 
#> $alpha
#> [1] 2.839187
#> 
#> $means
#>   means[1]   means[2]   means[3] 
#>  0.7994824  2.3167258 18.6058077 
#> 
#> $mat_T
#>          [,1]       [,2]      [,3]
#> [1,] 0.979440 0.02055999 0.0000000
#> [2,] 0.101204 0.48096676 0.4178293
#> [3,] 0.000000 0.60273643 0.3972636
#> 

# plot(res) # MCMC diagnostics