#----------------------------------------------------------------------------
#' Simulate noisy observations from a dynamic regression model
#'
#' Simulates data from a time series regression with dynamic regression coefficients.
#' The dynamic regression coefficients are simulated as a Gaussian random walk,
#' where jumps occur with a pre-specified probability \code{sparsity}.
#' The coefficients are initialized by a N(0,1) simulation.
#'
#' @param nT number of time points
#' @param p number of predictors (total)
#' @param p_0 number of true zero regression terms
#' @param sparsity the probability of a jump
#' (i.e., a change in the dynamic regression coefficient)
#' @param RSNR root-signal-to-noise ratio
#' @param ar1 the AR(1) coefficient for the predictors X; default is zero for iid N(0,1) predictors
#' @param include_plot logical; if TRUE, include a plot of the simulated data and the true curve
#'
#' @return a list containing
#' \itemize{
#' \item the simulated function \code{y}
#' \item the simulated predictors \code{X}
#' \item the simulated dynamic regression coefficients \code{beta_true}
#' \item the true function \code{mu_true}
#' \item the true observation standard deviation \code{sigma_true}
#' }
#'
#'
#' @note The root-signal-to-noise ratio is defined as RSNR = (sd of true function)/(sd of noise).
#' @importFrom stats arima.sim
#' @export

simRegression = function(nT = 200, p = 20, p_0 = 15,
                         sparsity = 0.05, RSNR = 5, ar1 = 0,
                         include_plot = FALSE){

  if(p < p_0) stop('Must have more predictors (p) than true zeros (p_0)')

  # Simulate the predictors: autocorrelated or independent?
    # Either way, use N(0,1) innovations
  if(ar1 == 0){
    X = cbind(1,matrix(rnorm(n = nT*(p-1)), nrow = nT, ncol = p-1))
  } else X = cbind(1,
                   apply(matrix(0, nrow = nT, ncol = p-1), 2, function(x)
                     stats::arima.sim(n = nT, list(ar = ar1), sd = sqrt(1-ar1^2))))

  # Simulate the true regression signals
  beta_true = matrix(0, nrow = nT, ncol = p);

  # Value of intercept:
  beta_true[,1] = 1

  # Now, for the remaining nonzero predictors, simulate as jumps:
  if((p - p_0) > 1){for(j in 2:(p - p_0)){
    # Simulate the paths:
    beta_true[,j] = rnorm(n = 1) +
      cumsum(rnorm(n = nT)*rbinom(n = nT, size = 1, prob = sparsity))
  }}

  # Conditional mean:
  mu_true = rowSums(X*beta_true)

  # Noise SD, based on RSNR (also put in a check for constant/zero functions)
  sigma_true = sd(mu_true)/RSNR; if(sigma_true==0) sigma_true = sqrt(sum(mu_true^2)/nT)/RSNR + 10^-3

  # Observed data:
  y = mu_true + sigma_true*rnorm(nT)

  # Plot?
  if(include_plot) {t = seq(0, 1, length.out=nT); plot(t, y, main = 'Simulated Data and True Curve'); lines(t, mu_true, lwd=8, col='black') }

  # Return the raw data and the true values:
  list(y = y, X = X, beta_true = beta_true, mu_true = mu_true, sigma_true = sigma_true)
}
#----------------------------------------------------------------------------
#' Simulate noisy observations from a dynamic regression model
#'
#' Simulates data from a time series regression with dynamic regression coefficients.
#' The dynamic regression coefficients are selected using the options from the
#' \code{simUnivariate()} function in the \code{wmtsa} package.
#'
#' @param signalNames vector of strings matching the "name" argument in the \code{simUnivariate()} function,
#' e.g. "bumps" or "doppler"
#' @param nT number of points
#' @param RSNR root-signal-to-noise ratio
#' @param p_0 number of true zero regression terms to include
#' @param include_intercept logical; if TRUE, the first column of X is 1's
#' @param scale_all logical; if TRUE, scale all regression coefficients to \[0,1\]
#' @param include_plot logical; if TRUE, include a plot of the simulated data and the true curve
#' @param ar1 the AR(1) coefficient for the predictors X; default is zero for iid N(0,1) predictors
#'
#' @return a list containing
#' \itemize{
#' \item the simulated function \code{y}
#' \item the simulated predictors \code{X}
#' \item the simulated dynamic regression coefficients \code{beta_true}
#' \item the true function \code{mu_true}
#' \item the true observation standard deviation \code{sigma_true}
#' }
#'
#' @note The number of predictors is \code{p = length(signalNames) + p_0}.
#'
#' @note The root-signal-to-noise ratio is defined as RSNR = (sd of true function)/(sd of noise).
#'

simRegression0 = function(signalNames = c("bumps", "blocks"), nT = 200, RSNR = 10, p_0 = 5, include_intercept = TRUE, scale_all = TRUE, include_plot = TRUE, ar1 = 0){

  # True number of signals
  p_true = length(signalNames)

  # Total number of covariates (non-intercept)
  p = p_true + p_0

  # Simulate the true regression signals
  beta_true = matrix(0, nrow = nT, ncol = p)
  for(j in 1:p_true) beta_true[,j] = simUnivariate(signalNames[j], n=nT);
  if(scale_all) beta_true[,1:p_true] = apply(as.matrix(beta_true[,1:p_true]), 2, function(x) (x - min(x))/(max(x) - min(x)))

  # Simulate the predictors: autocorrelated or independent? Either way, use N(0,1) innovations
  if(ar1 == 0){
    X = matrix(rnorm(nT*p), nrow=nT, ncol = p)
  } else X = apply(matrix(0, nrow = nT, ncol = p), 2, function(x) stats::arima.sim(n = nT, list(ar = ar1), sd = sqrt(1-ar1^2)))

  # If we want an intercept, simply replace the first column w/ 1s
  if(include_intercept) X[,1] = matrix(1, nrow = nrow(X), ncol = 1)

  # The true response function:
  mu_true = rowSums(X*beta_true)

  # Noise SD, based on RSNR (also put in a check for constant/zero functions)
  sigma_true = sd(mu_true)/RSNR; if(sigma_true==0) sigma_true = sqrt(sum(mu_true^2)/nT)/RSNR + 10^-3

  # Simulate the data:
  y = mu_true + sigma_true*rnorm(nT)

  # Plot?
  if(include_plot) {t = seq(0, 1, length.out=nT); plot(t, y, main = 'Simulated Data and True Curve'); lines(t, mu_true, lwd=8, col='black') }

  # Return the raw data and the true values:
  list(y = y, X = X, beta_true = beta_true, mu_true = mu_true, sigma_true = sigma_true)
}
#----------------------------------------------------------------------------
#' Initialize the evolution error variance parameters
#'
#' Compute initial values for evolution error variance parameters under the various options:
#' dynamic horseshoe prior ('DHS'), horseshoe prior ('HS'),
#' Bayesian lasso ('BL'), normal stochastic volatility ('SV'),
#' or normal-inverse-gamma prior ('NIG').
#'
#' @param omega \code{T x p} matrix of evolution errors
#' @param evol_error the evolution error distribution; must be one of
#' 'DHS' (dynamic horseshoe prior), 'HS' (horseshoe prior), or 'NIG' (normal-inverse-gamma prior)
#' @return List of relevant components: \code{sigma_wt}, the \code{T x p} matrix of evolution standard deviations,
#' and additional parameters associated with the DHS and HS priors.

initEvolParams = function(omega, evol_error = "DHS"){

  # Check:
  if(!((evol_error == "DHS") || (evol_error == "HS") || (evol_error == "BL") || (evol_error == "SV") ||(evol_error == "NIG"))) stop('Error type must be one of DHS, HS, BL, SV, or NIG')

  # Make sure omega is (n x p) matrix
  omega = as.matrix(omega); n = nrow(omega); p = ncol(omega)

  if(evol_error == "DHS") return(initDHS(omega))

  if(evol_error == "HS"){
    tauLambdaj = 1/omega^2;
    xiLambdaj = 1/(2*tauLambdaj); tauLambda = 1/(2*colMeans(xiLambdaj)); xiLambda = 1/(tauLambda + 1)

    # Parameters to store/return:
    return(list(sigma_wt = 1/sqrt(tauLambdaj), tauLambdaj = tauLambdaj, xiLambdaj = xiLambdaj, tauLambda = tauLambda, xiLambda = xiLambda))
  }
  if(evol_error == "BL"){
    tau_j = abs(omega); lambda2 = mean(tau_j)
    return(list(sigma_wt = tau_j, tau_j = tau_j, lambda2 = lambda2))
  }
  if(evol_error == "SV") return(initSV(omega))
  if(evol_error == "NIG") return(list(sigma_wt = tcrossprod(rep(1,n), apply(omega, 2, function(x) sd(x, na.rm=TRUE)))))
}
#----------------------------------------------------------------------------
#' Initialize the evolution error variance parameters
#'
#' Compute initial values for evolution error variance parameters under the dynamic horseshoe prior
#'
#' @param omega \code{T x p} matrix of evolution errors
#' @return List of relevant components: the \code{T x p} evolution error SD \code{sigma_wt},
#' the \code{T x p} log-volatility \code{ht}, the \code{p x 1} log-vol unconditional mean(s) \code{dhs_mean},
#' the \code{p x 1} log-vol AR(1) coefficient(s) \code{dhs_phi},
#' the \code{T x p} log-vol innovation SD \code{sigma_eta_t} from the PG priors,
#' the \code{p x 1} initial log-vol SD \code{sigma_eta_0},
#' and the mean of log-vol means \code{dhs_mean0} (relevant when \code{p > 1})
#' @importFrom methods is

initDHS = function(omega){

  # "Local" number of time points
  omega = as.matrix(omega)
  n = nrow(omega); p = ncol(omega)

  # Initialize the log-volatilities:
  ht = log(omega^2 + 0.0001)

  # Initialize the AR(1) model to obtain unconditional mean and AR(1) coefficient
  arCoefs = apply(ht, 2, function(x){
    params = try(arima(x, c(1,0,0)), silent = TRUE)
    if(is(params, "try-error")){
      params = params$coef
    } else{
      params = c(0.8, mean(x)/(1 - 0.8))
    }
    params
  })
  dhs_mean = arCoefs[2,]; dhs_phi = arCoefs[1,]; dhs_mean0 = mean(dhs_mean)

  # Initialize the SD of log-vol innovations simply using the expectation:
  sigma_eta_t = matrix(pi, nrow = n-1, ncol = p)
  sigma_eta_0 = rep(pi, p) # Initial value

  # Evolution error SD:
  sigma_wt = exp(ht/2)

  list(sigma_wt = sigma_wt, ht = ht, dhs_mean = dhs_mean, dhs_phi = dhs_phi, sigma_eta_t = sigma_eta_t, sigma_eta_0 = sigma_eta_0, dhs_mean0 = dhs_mean0)
}
#----------------------------------------------------------------------------
#' Initialize the stochastic volatility parameters
#'
#' Compute initial values for normal stochastic volatility parameters.
#' The model assumes an AR(1) for the log-volatility.
#'
#' @param omega \code{T x p} matrix of errors
#' @return List of relevant components: \code{sigma_wt}, the \code{T x p} matrix of standard deviations,
#' and additional parameters (unconditional mean, AR(1) coefficient, and standard deviation).

initSV = function(omega){

  # Make sure omega is (n x p) matrix
  omega = as.matrix(omega); n = nrow(omega); p = ncol(omega)

  # log-volatility:
  ht = log(omega^2 + 0.0001)

  # AR(1) pararmeters: check for error in initialization too
  svParams = apply(ht, 2, function(x){
    ar_fit = try(arima(x, c(1,0,0)), silent = TRUE)
    if(is(ar_fit, "try-error")) {
      params = c(ar_fit$coef[2], ar_fit$coef[1], sqrt(ar_fit$sigma2))
    } else params = c(mean(x)/(1 - 0.8),0.8, 1)
    params
  }); rownames(svParams) = c("intercept", "ar1", "sig")

  # SDs, log-vols, and other parameters:
  return(list(sigma_wt = exp(ht/2), ht = ht, svParams = svParams))
}
#----------------------------------------------------------------------------
#' Initialize the parameters for the initial state variance
#'
#' The initial state SDs are assumed to follow half-Cauchy priors, C+(0,A),
#' where the SDs may be common or distinct among the states.
#'
#' This function initializes the parameters for a PX-Gibbs sampler.
#'
#' @param mu0 \code{p x 1} vector of initial values (undifferenced)
#' @param commonSD logical; if TRUE, use common SDs (otherwise distinct)
#' @return List of relevant components:
#' the \code{p x 1} evolution error SD \code{sigma_w0},
#' the \code{p x 1} parameter-expanded RV's \code{px_sigma_w0},
#' and the corresponding global scale parameters
#' \code{sigma_00} and \code{px_sigma_00} (ignore if commonSD)

initEvol0 = function(mu0, commonSD = TRUE){

  p = length(mu0)

  # Common or distinct:
  if(commonSD) {
    sigma_w0 = rep(mean(abs(mu0)), p)
  } else  sigma_w0 = abs(mu0)

  # Initialize at 1 for simplicity:
  px_sigma_w0 = rep(1, p)

  sigma_00 = px_sigma_00 = 1

  list(sigma_w0 = sigma_w0, px_sigma_w0 = px_sigma_w0, sigma_00 = sigma_00, px_sigma_00 = px_sigma_00)
}
#----------------------------------------------------------------------------
#' Compute X'X
#'
#' Build the \code{Tp x Tp} matrix XtX using the Matrix() package
#' @param X \code{T x p} matrix of predictors
#' @return Block diagonal \code{Tp x Tp} Matrix (object) where each \code{p x p} block is \code{tcrossprod(matrix(X[t,]))}
#'
#' @note X'X is a one-time computing cost. Special cases may have more efficient computing options,
#' but the Matrix representation is important for efficient computations within the sampler.
#'

build_XtX = function(X){

  # Store the dimensions:
  nT = nrow(X); p = ncol(X)

  # Store the matrix
  XtX = Matrix::bandSparse(nT*p, k = 0, diagonals= list(rep(1,nT*p)), symmetric = TRUE)

  t.seq.p = seq(1, nT*(p+1), by = p)

  for(t in 1:nT){
    t.ind = t.seq.p[t]:(t.seq.p[t+1]-1)
    XtX[t.ind, t.ind] = tcrossprod(matrix(X[t,]))
  }
  XtX
}

#----------------------------------------------------------------------------
#' Compute initial Cholesky decomposition for Bayesian Trend Filtering
#'
#' Computes the Cholesky decomposition for the quadratic term in the (Gaussian) posterior
#' of the Bayesian Trend Filtering coefficients. The sparsity pattern will not change during the
#' MCMC, so we can save computation time by computing this up front.
#'
#' @param nT number of time points
#' @param D degree of differencing (D = 1 or D = 2)

initChol_spam = function(nT, D = 1){

  # Random initialization
  QHt_Matrix = build_Q(obs_sigma_t2 = abs(rnorm(nT)),
                       evol_sigma_t2 = abs(rnorm(nT)),
                       D = D)

  # And return the Cholesky piece:
  # TODO: this is where the warning is being
  chQht_Matrix0 = spam::chol(spam::as.spam.dgCMatrix(as(QHt_Matrix, "dgCMatrix")))

  chQht_Matrix0
}
#----------------------------------------------------------------------------
#' Compute initial Cholesky decomposition for TVP Regression
#'
#' Computes the Cholesky decomposition for the quadratic term in the (Gaussian) posterior
#' of the TVP regression coefficients. The sparsity pattern will not change during the
#' MCMC, so we can save computation time by computing this up front.
#' @param obs_sigma_t2 the \code{T x 1} vector of observation error variances
#' @param evol_sigma_t2 the \code{T x p} matrix of evolution error variances
#' @param XtX the \code{Tp x Tp} matrix of X'X (one-time cost; see ?build_XtX)
#' @param D the degree of differencing (one or two)
#'
#'
initCholReg_spam = function(obs_sigma_t2, evol_sigma_t2, XtX, D = 1){

  # Some quick checks:
  if((D < 0) || (D != round(D)))  stop('D must be a positive integer')

  # Dimensions of X:
  nT = nrow(evol_sigma_t2); p = ncol(evol_sigma_t2)

  if(D == 1){
    # Lagged version of transposed precision matrix, with zeros as appropriate (needed below)
    t_evol_prec_lag_mat = matrix(0, nrow = p, ncol = nT);
    t_evol_prec_lag_mat[,1:(nT-1)] = t(1/evol_sigma_t2[-1,])

    # Diagonal of quadratic term:
    Q_diag = matrix(t(1/evol_sigma_t2) + t_evol_prec_lag_mat)

    # Off-diagonal of quadratic term:
    Q_off = matrix(-t_evol_prec_lag_mat)[-(nT*p)]

    # Quadratic term:
    Qevol = Matrix::bandSparse(nT*p, k = c(0,p), diagonals= list(Q_diag, Q_off), symmetric = TRUE)

    # For checking via direct computation:
    # H1 = bandSparse(nT, k = c(0,-1), diag = list(rep(1, nT), rep(-1, nT)), symmetric = FALSE)
    # IH = kronecker(as.matrix(H1), diag(p));
    # Q0 = t(IH)%*%diag(as.numeric(1/matrix(t(evol_sigma_t2))))%*%(IH)
    # print(sum((Qevol - Q0)^2))

  } else {
    if(D == 2){

      # Lagged x2 version of transposed precision matrix (recurring term)
      t_evol_prec_lag2 = t(1/evol_sigma_t2[-(1:2),])

      # Diagonal of quadratic term:
      Q_diag = t(1/evol_sigma_t2)
      Q_diag[,2:(nT-1)] = Q_diag[,2:(nT-1)] + 4*t_evol_prec_lag2
      Q_diag[,1:(nT-2)] = Q_diag[,1:(nT-2)] + t_evol_prec_lag2
      Q_diag = matrix(Q_diag)

      # Off-diagonal (1) of quadratic term:
      Q_off_1 = matrix(0, nrow = p, ncol = nT);
      Q_off_1[,1] = -2/evol_sigma_t2[3,]
      Q_off_1[,2:(nT-1)] = Q_off_1[,2:(nT-1)] + -2*t_evol_prec_lag2
      Q_off_1[,2:(nT-2)] = Q_off_1[,2:(nT-2)] + -2*t_evol_prec_lag2[,-1]
      Q_off_1 = matrix(Q_off_1)

      # Off-diagonal (2) of quadratic term:
      Q_off_2 =  matrix(0, nrow = p, ncol = nT); Q_off_2[,1:(nT-2)] = t_evol_prec_lag2
      Q_off_2 = matrix(Q_off_2)

      # Quadratic term:
      Qevol = Matrix::bandSparse(nT*p, k = c(0, p, 2*p), diagonals= list(Q_diag, Q_off_1, Q_off_2), symmetric = TRUE)

      # For checking via direct computation:
      # H2 = bandSparse(nT, k = c(0,-1, -2), diag = list(rep(1, nT), c(0, rep(-2, nT-1)), rep(1, nT)), symmetric = FALSE)
      # IH = kronecker(as.matrix(H2), diag(p));
      # Q0 = t(IH)%*%diag(as.numeric(1/matrix(t(evol_sigma_t2))))%*%(IH)
      # print(sum((Qevol - Q0)^2))

    } else stop('Requires D=1 or D=2')
  }

  Qobs = 1/rep(obs_sigma_t2, each = p)*XtX
  Qpost = Qobs + Qevol

  # New version (NOTE: reorder; opposite of log-vol!)
  QHt_Matrix = spam::as.spam.dgCMatrix(as(Qpost, "dgCMatrix"))

  # And return the Cholesky piece:
  chQht_Matrix0 = spam::chol(QHt_Matrix)

  chQht_Matrix0
}
#----------------------------------------------------------------------------
#' Compute the quadratic term in Bayesian trend filtering
#'
#' Compute the quadratic term arising in the full conditional distribution
#' of a Bayesian trend filtering model with \code{D = 1} or \code{D = 2}.
#' This function exploits the known \code{D}-banded structure of \code{Q}
#' to compute the matrix directly, using objects in the Matrix package.
#'
#' @param obs_sigma_t2 the \code{T x 1} vector of observation error variances
#' @param evol_sigma_t2 the \code{T x 1} vector of evolution error variances
#' @param D the degree of differencing (one or two)
#' @return Banded \code{T x T} Matrix (object) \code{Q}
#'
#' @importFrom Matrix bandSparse

build_Q = function(obs_sigma_t2, evol_sigma_t2, D = 1){

  if(!(D == 1 || D == 2)) stop('build_Q requires D = 1 or D = 2')

  nT = length(evol_sigma_t2)

  # For reference: first and second order difference matrices (not needed below)
  #H1 = bandSparse(nT, k = c(0,-1), diag = list(rep(1, nT), rep(-1, nT)), symmetric = FALSE)
  #H2 = bandSparse(nT, k = c(0,-1, -2), diag = list(rep(1, nT), c(0, rep(-2, nT-1)), rep(1, nT)), symmetric = FALSE)

  # Quadratic term: can construct directly for D = 1 or D = 2 using [diag(1/obs_sigma_t2, nT) + (t(HD)%*%diag(1/evol_sigma_t2, nT))%*%HD]
  if(D == 1){
    # D = 1 case:
    Q = Matrix::bandSparse(nT, k = c(0,1),
                   diagonals= list(1/obs_sigma_t2 + 1/evol_sigma_t2 + c(1/evol_sigma_t2[-1], 0),
                               -1/evol_sigma_t2[-1]),
                   symmetric = TRUE)
  } else {
    # D = 2 case:
    Q = Matrix::bandSparse(nT, k = c(0,1,2),
                   diagonals= list(1/obs_sigma_t2 + 1/evol_sigma_t2 + c(0, 4/evol_sigma_t2[-(1:2)], 0) + c(1/evol_sigma_t2[-(1:2)], 0, 0),
                               c(-2/evol_sigma_t2[3], -2*(1/evol_sigma_t2[-(1:2)] + c(1/evol_sigma_t2[-(1:3)],0))),
                               1/evol_sigma_t2[-(1:2)]),
                   symmetric = TRUE)
  }
  Q
}
#----------------------------------------------------------------------------
#' Compute Non-Zeros (Signals)
#'
#' Estimate the location of non-zeros (signals) implied by
#' horseshoe-type thresholding.
#'
#' @details Thresholding is based on \code{kappa[t] > 1/2}, where
#' \code{kappa = 1/(1 + evol_sigma_t2/obs_sigma_t2)}, \code{evol_sigma_t2} is the
#' evolution error variance, and \code{obs_sigma_t2} is the observation error variance.
#' In particular, the decision rule is based on the posterior mean of \code{kappa}.
#'
#' @note The thresholding rule depends on whether the prior variance for the state
#' variable \code{mu} (i.e., \code{evol_sigma_t2}) is scaled by the observation standard
#' deviation, \code{obs_sigma_t2}. Explicitly, if \code{mu[t]} ~ N(0, \code{evol_sigma_t2[t]})
#' then the correct thresholding rule is based on \code{kappa = 1/(1 + evol_sigma_t2/obs_sigma_t2)}.
#' However, if \code{mu[t]} ~ N(0, \code{evol_sigma_t2[t]*obs_sigma_t2[t]})
#' then the correct thresholding rule is based on \code{kappa = 1/(1 + evol_sigma_t2)}.
#' The latter case may be implemented by omitting the input for \code{post_obs_sigma_t2}
#' (or setting it to NULL).
#'
#' @param post_evol_sigma_t2 the \code{Nsims x T} or \code{Nsims x T x p}
#' matrix/array of posterior draws of the evolution error variances.
#'
#' @param post_obs_sigma_t2 the \code{Nsims x 1} or \code{Nsims x T} matrix of
#' posterior draws of the observation error variances.
#'
#' @return A vector (or matrix) of indices identifying the signals according to the
#' horsehoe-type thresholding rule.

getNonZeros = function(post_evol_sigma_t2, post_obs_sigma_t2 = NULL){

  # Posterior distribution of shrinkage parameters in (0,1)
  if(is.null(post_obs_sigma_t2)){
    post_kappa = 1/(1 + post_evol_sigma_t2)
  } else {

    # Check: if p > 1, then adjust the dimension of post_obs_sigma_t2
    if(length(dim(post_evol_sigma_t2)) > 2) post_obs_sigma_t2 = array(rep(post_obs_sigma_t2, times = dim(post_evol_sigma_t2)[3]), dim(post_evol_sigma_t2))

    post_kappa = 1/(1 + post_evol_sigma_t2/post_obs_sigma_t2)
  }

  # Indices of non-zeros:
  non_zero = which(colMeans(post_kappa) < 1/2, arr.ind = TRUE)

  # Return:
  non_zero
}
#' Sample components from a discrete mixture of normals
#'
#' Sample Z from 1,2,...,k, with P(Z=i) proportional to q_iN(mu_i,sig2_i).
#'
#' @param y vector of data
#' @param mu vector of component means
#' @param sig vector of component standard deviations
#' @param q vector of component weights
#' @return Sample from \{1,...,k\}
#----------------------------------------------------------------------------
ncind = function(y,mu,sig,q){
  sample(1:length(q),
         size = 1,
         prob = q*dnorm(y,mu,sig))
}


#####################################################################################################
#' Compute Simultaneous Credible Bands
#'
#' Compute (1-alpha)\% credible BANDS for a function based on MCMC samples using Crainiceanu et al. (2007)
#'
#' @param sampFuns \code{Nsims x m} matrix of \code{Nsims} MCMC samples and \code{m} points along the curve
#' @param alpha confidence level
#'
#' @return \code{m x 2} matrix of credible bands; the first column is the lower band, the second is the upper band
#'
#' @note The input needs not be curves: the simultaneous credible "bands" may be computed
#' for vectors. The resulting credible intervals will provide joint coverage at the (1-alpha)%
#' level across all components of the vector.
#'

credBands = function(sampFuns, alpha = .05){

  N = nrow(sampFuns); m = ncol(sampFuns)

  # Compute pointwise mean and SD of f(x):
  Efx = colMeans(sampFuns); SDfx = apply(sampFuns, 2, sd)

  # Compute standardized absolute deviation:
  Standfx = abs(sampFuns - tcrossprod(rep(1, N), Efx))/tcrossprod(rep(1, N), SDfx)

  # And the maximum:
  Maxfx = apply(Standfx, 1, max)

  # Compute the (1-alpha) sample quantile:
  Malpha = quantile(Maxfx, 1-alpha)

  # Finally, store the bands in a (m x 2) matrix of (lower, upper)
  cbind(Efx - Malpha*SDfx, Efx + Malpha*SDfx)
}
#####################################################################################################
#' Compute Simultaneous Band Scores (SimBaS)
#'
#' Compute simultaneous band scores (SimBaS) from Meyer et al. (2015, Biometrics).
#' SimBaS uses MC(MC) simulations of a function of interest to compute the minimum
#' alpha such that the joint credible bands at the alpha level do not include zero.
#' This quantity is computed for each grid point (or observation point) in the domain
#' of the function.
#'
#' @param sampFuns \code{Nsims x m} matrix of \code{Nsims} MCMC samples and \code{m} points along the curve
#'
#' @return \code{m x 1} vector of simBaS
#'
#' @note The input needs not be curves: the simBaS may be computed
#' for vectors to achieve a multiplicity adjustment.
#'
#' @note The minimum of the returned value, \code{PsimBaS_t},
#' over the domain \code{t} is the Global Bayesian P-Value (GBPV) for testing
#' whether the function is zero everywhere.
#'

simBaS = function(sampFuns){

  N = nrow(sampFuns); m = ncol(sampFuns)

  # Compute pointwise mean and SD of f(x):
  Efx = colMeans(sampFuns); SDfx = apply(sampFuns, 2, sd)

  # Compute standardized absolute deviation:
  Standfx = abs(sampFuns - tcrossprod(rep(1, N), Efx))/tcrossprod(rep(1, N), SDfx)

  # And the maximum:
  Maxfx = apply(Standfx, 1, max)

  # And now compute the SimBaS scores:
  PsimBaS_t = rowMeans(sapply(Maxfx, function(x) abs(Efx)/SDfx <= x))

  # Alternatively, using a loop:
  #PsimBaS_t = numeric(nT); for(t in 1:m) PsimBaS_t[t] = mean((abs(Efx)/SDfx)[t] <= Maxfx)

  PsimBaS_t
}

#----------------------------------------------------------------------------
#' Summarize of effective sample size
#'
#' Compute the summary statistics for the effective sample size (ESS) across
#' posterior samples for possibly many variables
#'
#' @param postX An array of arbitrary dimension \code{(nsims x ... x ...)}, where \code{nsims} is the number of posterior samples
#' @return Table of summary statistics using the function \code{summary()}.
#' @importFrom coda effectiveSize as.mcmc

getEffSize = function(postX) {
  if(is.null(dim(postX))) return(effectiveSize(postX))
  summary(coda::effectiveSize(coda::as.mcmc(array(postX, c(dim(postX)[1], prod(dim(postX)[-1]))))))
}
#----------------------------------------------------------------------------
#' Compute the ergodic (running) mean.
#' @param x vector for which to compute the running mean
#' @return A vector \code{y} with each element defined by \code{y[i] = mean(x[1:i])}

ergMean = function(x) {cumsum(x)/(1:length(x))}

#----------------------------------------------------------------------------
#' Compute the log-odds
#' @param x scalar or vector in (0,1) for which to compute the (componentwise) log-odds
#' @return A scalar or vector of log-odds

logit = function(x) {
  if(any(abs(x) > 1)) stop('x must be in (0,1)')
  log(x/(1-x))
}

#----------------------------------------------------------------------------
#' Compute the inverse log-odds
#' @param x scalar or vector for which to compute the (componentwise) inverse log-odds
#' @return A scalar or vector of values in (0,1)

invlogit = function(x) exp(x - log(1+exp(x))) # exp(x)/(1+exp(x))

#----------------------------------------------------------------------------
#' Univariate Slice Sampler from Neal (2008)
#'
#' Compute a draw from a univariate distribution using the code provided by
#' Radford M. Neal. The documentation below is also reproduced from Neal (2008).
#'
#' @param x0    Initial point
#' @param g     Function returning the log of the probability density (plus constant)
#' @param w     Size of the steps for creating interval (default 1)
#' @param m     Limit on steps (default infinite)
#' @param lower Lower bound on support of the distribution (default -Inf)
#' @param upper Upper bound on support of the distribution (default +Inf)
#' @param gx0   Value of g(x0), if known (default is not known)
#'
#' @return  The point sampled, with its log density attached as an attribute.
#'
#' @note The log density function may return -Inf for points outside the support
#' of the distribution.  If a lower and/or upper bound is specified for the
#' support, the log density function will not be called outside such limits.
#'

uni.slice <- function (x0, g, w=1, m=Inf, lower=-Inf, upper=+Inf, gx0=NULL)
{
  # Check the validity of the arguments.

  if (!is.numeric(x0) || length(x0)!=1
      || !is.function(g)
      || !is.numeric(w) || length(w)!=1 || w<=0
      || !is.numeric(m) || !is.infinite(m) && (m<=0 || m>1e9 || floor(m)!=m)
      || !is.numeric(lower) || length(lower)!=1 || x0<lower
      || !is.numeric(upper) || length(upper)!=1 || x0>upper
      || upper<=lower
      || !is.null(gx0) && (!is.numeric(gx0) || length(gx0)!=1))
  {
    stop ("Invalid slice sampling argument")
  }

  # Keep track of the number of calls made to this function.
  #uni.slice.calls <<- uni.slice.calls + 1

  # Find the log density at the initial point, if not already known.

  if (is.null(gx0))
  { #uni.slice.evals <<- uni.slice.evals + 1
  gx0 <- g(x0)
  }

  # Determine the slice level, in log terms.

  logy <- gx0 - rexp(1)

  # Find the initial interval to sample from.

  u <- runif(1,0,w)
  L <- x0 - u
  R <- x0 + (w-u)  # should guarantee that x0 is in [L,R], even with roundoff

  # Expand the interval until its ends are outside the slice, or until
  # the limit on steps is reached.

  if (is.infinite(m))  # no limit on number of steps
  {
    repeat
    { if (L<=lower) break
      #uni.slice.evals <<- uni.slice.evals + 1
      if (g(L)<=logy) break
      L <- L - w
    }

    repeat
    { if (R>=upper) break
      #uni.slice.evals <<- uni.slice.evals + 1
      if (g(R)<=logy) break
      R <- R + w
    }
  }

  else if (m>1)  # limit on steps, bigger than one
  {
    J <- floor(runif(1,0,m))
    K <- (m-1) - J

    while (J>0)
    { if (L<=lower) break
      #uni.slice.evals <<- uni.slice.evals + 1
      if (g(L)<=logy) break
      L <- L - w
      J <- J - 1
    }

    while (K>0)
    { if (R>=upper) break
      #uni.slice.evals <<- uni.slice.evals + 1
      if (g(R)<=logy) break
      R <- R + w
      K <- K - 1
    }
  }

  # Shrink interval to lower and upper bounds.

  if (L<lower)
  { L <- lower
  }
  if (R>upper)
  { R <- upper
  }

  # Sample from the interval, shrinking it on each rejection.

  repeat
  {
    x1 <- runif(1,L,R)

    #uni.slice.evals <<- uni.slice.evals + 1
    gx1 <- g(x1)

    if (gx1>=logy) break

    if (x1>x0)
    { R <- x1
    }
    else
    { L <- x1
    }
  }

  # Return the point sampled, with its log density attached as an attribute.

  attr(x1,"log.density") <- gx1
  return (x1)

}

#----------------------------------------------------------------------------
#' Compute the spectrum of an AR(p) model
#'
#' @param ar_coefs (p x 1) vector of AR(p) coefficients
#' @param sigma_e observation standard deviation
#' @param n.freq number of frequencies at which to evaluate the spectrum
#'
#' @return A (n.freq x 2) matrix where the first column is the frequencies
#' and the second column is the spectrum evaluated at that frequency

spec_dsp = function(ar_coefs, sigma_e, n.freq = 500){

  p = length(ar_coefs)

  freq <- seq.int(0, 0.5, length.out = n.freq)

  if(p > 0){
    # AR(p) setting:
    cs <- outer(freq, 1L:p, function(x, y) cos(2 * pi * x * y)) %*% ar_coefs
    sn <- outer(freq, 1L:p, function(x, y) sin(2 *pi * x * y)) %*% ar_coefs

    sf = sigma_e^2/(((1 - cs)^2 + sn^2))
  } else sf = rep.int(sigma_e^2, n.freq) # White noise

  cbind(freq, sf)
}

#----------------------------------------------------------------------------
#' Compute the design matrix X for AR(p) model
#'
#' @param y (T x 1) vector of responses
#' @param p order of AR(p) model
#' @param include_intercept logical; if TRUE, first column of X is ones
getARpXmat = function(y, p = 1, include_intercept = FALSE){
  if(p==0) return(NULL)
  nT = length(y);
  X = matrix(1, nrow = nT - p, ncol = p)
  for(j in 1:p) X[,j] = y[(p-j+1):(nT-j)]

  # Not the most efficient, but should be fine
  if(include_intercept) X = cbind(1,X)

  X
}

#----------------------------------------------------------------------------
#' Wrapper function for C++ call for sample mat, check pre-conditions to prevent crash
#'
#' @param row_ind list of the row indices to fill in the bandsparse matrix
#' @param col_ind list of the columns indices to fill in the bandsparse matrix
#' @param mat_val list of the values to fill in the bandsparse matrix
#' @param mat_l dimension of the band-sparse matrix
#' @param num_inp number of non-zero elements in the bandsparse matrix
#' @param linht \code{T-D} vector of linear term in the sampler
#' @param rd \code{T-D} vector of standard normal noise samples
#' @param D the degree of differencing for changepoint
#'

sample_mat_c = function(row_ind, col_ind, mat_val, mat_l, num_inp, linht, rd, D){
  if ((length(row_ind) != num_inp) || (length(col_ind) != num_inp) || (length(mat_val) != num_inp)) stop('Length of inputs do not match')
  if ((length(linht) != mat_l) || (length(rd) != mat_l)) stop('length of vectors do not match')

  output = sample_mat(row_ind, col_ind, mat_val, mat_l, num_inp, linht, rd, D)

  output
}
