```
## Simulate Bayesian single-arm adaptive trial
## Allow early termination due to futility or efficacy
## Binary outcome
## Beta-binomial:
## p ~ beta(a, b)
## x_i ~ binomial(p) i = 1..n
## p|x ~ beta(a + sum(x), b + n - sum(x))
## Efficacy at interim t if Pr(p > p_0 | x_{(t)}) > \gamma_e
## Futility at interim t if Pr(p > p_0 | x_{(t_max)}) < \gamma_f
## https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4247348/
library('rmutil') ## for betabinom
## Simulate entire trial
## ptru - true probability of outcome (p)
## pref - reference probability of outcome (p_0)
## nint - sample sizes at which to conduct interim analyses
## efft - efficacy threshold
## futt - futility threshold
## apri - prior beta parameter \alpha
## bpri - prior beta parameter \beta
simtrial <- function(
ptru = 0.15,
pref = 0.15,
nint = c(10, 13, 16, 19),
efft = 0.95,
futt = 0.05,
apri = 1,
bpri = 1) {
## determine minimum number of 'successes' necessary to
## conclude efficacy if study continues to maximum
## sample size
nmax <- max(nint)
post <- sapply(0:nmax, function(nevt)
1-pbeta(pref, apri + nevt, bpri + nmax - nevt))
nsuc <- min(which(post > efft)-1)
## simulate samples
samp <- rbinom(n = nmax, size = 1, prob = ptru)
## simulate interim analyses
intr <- lapply(nint, function(ncur) {
## compute number of current events
ecur <- sum(samp[1:ncur])
## compute posterior beta parameters
abb <- apri + ecur
bbb <- bpri + ncur - ecur
sbb <- abb + bbb
mbb <- abb/(abb+bbb)
## compute efficacy Pr(p > p_0 | x_{(t)})
effp <- 1-pbeta(pref, abb, bbb)
## return for efficacy
if(effp > efft)
return(list(action='stop',
reason='efficacy',
n = ncur))
## number of events necessary in remainder of
## study to conclude efficacy
erem <- nsuc-ecur
## compute success probability Pr(p > p_0 | x_{(t_max)})
if(erem > nmax-ncur) { ## not enough possible events
sucp <- 0
} else { ## not yet met efficacy threshold
sucp <- 1-pbetabinom(q = erem-1,
size = nmax-ncur, m = mbb, s = sbb)
}
if(sucp < futt)
return(list(action='stop',
reason='futility',
n = ncur))
return(list(action='continue',
reason='',
n = ncur))
})
stpi <- match('stop', sapply(intr, `[[`, 'action'))
return(intr[[stpi]])
}
## Simulate study with max sample size of 200 where true
## probability is identical to reference (i.e., the null
## hypothesis is true). This type of simulation helps us
## determine the overall type-I error rate.
nint <- c(40,80,120,160,200)
nmax <- max(nint)
res <- do.call(rbind, lapply(1:10000,
function(t) as.data.frame(simtrial(ptru = 0.72,
pref = 0.72,
nint = nint,
efft = 0.975,
futt = 0.20))))
## Prob. early termination (PET) due to Futility
mean(res$reason == 'futility' & res$n < nmax)
## PET Efficacy
mean(res$reason == 'efficacy' & res$n < nmax)
## Pr(conclude efficacy) 'type-I error rate'
mean(res$reason == 'efficacy')
## average and sd sample size
mean(res$n); sd(res$n)
barplot(prop.table(table(res$n)),
xlab='Study Size (N)',
main="No Difference")
## Simulate study where true probability is greater than
## reference (i.e., an alternative hypothesis). This type
## of simulation helps us determine the study power.
res <- do.call(rbind, lapply(1:10000,
function(t) as.data.frame(simtrial(ptru = 0.82,
pref = 0.72,
nint = nint,
efft = 0.975,
futt = 0.20))))
## Prob. early termination (PET) due to Futility
mean(res$reason == 'futility' & res$n < nmax)
## PET Efficacy
mean(res$reason == 'efficacy' & res$n < nmax)
## Pr(conclude efficacy) 'power'
mean(res$reason == 'efficacy')
## average and sd sample size
mean(res$n); sd(res$n)
barplot(prop.table(table(res$n)),
xlab='Study Size (N)',
main="35% Reduction")
```

]]>```
## Metropolis sampling
## x - current value of Markov chain (numeric vector)
## targ - target log density function
## prop - function with prototype function(x, ...) that generates
## a proposal value from a symmetric proposal distribution
library('mvtnorm')
prop_mvnorm <- function(x, ...)
drop(rmvnorm(1, mean=x, ...))
metropolis <- function(x, targ, prop=prop_mvnorm, ...) {
xnew <- prop(x)
lrat <- targ(xnew, ...) - targ(x, ...)
if(log(runif(1)) < lrat)
x <- xnew
return(x)
}
## Metropolis-in-Gibbs sampling
## x - current value of Markov chain (numeric vector)
## targ - target log density function
## ... - arguments passed to 'targ'
gibbs <- function(x, targ, ...) {
for(i in 1:length(x)) {
## define full conditional
targ1 <- function(x1, ...) {
x[i] <- x1
targ(x, ...)
}
## sample using Metropolis algorithm
x[i] <- metropolis(x[i], targ1, ...)
}
return(x)
}
```

The following code produces the figure below to illustrate the two methods using a 'dumbell' distribution (cf. R package 'ks' vignette).

```
### The code below illustrates the use of the functions above
## target 'dumbell' density (c.f., R package 'ks' vignette)
library('ks')
mus <- rbind(c(-2,2), c(0,0), c(2,-2))
sigmas <- rbind(diag(2), matrix(c(0.8, -0.72, -0.72, 0.8), nrow=2), diag(2))
cwt <- 3/11
props <- c((1-cwt)/2, cwt, (1-cwt)/2)
targ_test <- function(x, ...)
log(dmvnorm.mixt(x, mus=mus, Sigmas=sigmas, props=props))
## plot contours of target 'dumbell' density
set.seed(42)
par(mfrow=c(1,2))
plotmixt(mus=mus, Sigmas=sigmas, props=props,
xlim=c(-4,4), ylim=c(-4,4),
xlab=expression(x[1]),
ylab=expression(x[2]),
main="Metropolis-in-Gibbs")
## initialize and sample using Metropolis-in-Gibbs
xcur <- gibbs(c(0,0), targ_test, sigma=vcov_test)
for(j in 1:2000) {
xcur <- gibbs(xcur, targ_test, sigma=vcov_test)
points(xcur[1], xcur[2], pch=20, col='#00000055')
}
## plot contours of target 'dumbell' density
plotmixt(mus=mus, Sigmas=sigmas, props=props,
xlim=c(-4,4), ylim=c(-4,4),
xlab=expression(x[1]),
ylab=expression(x[2]),
main="Metropolis")
## initialize and sample using Metropolis
xcur <- metropolis(c(0,0), targ_test, sigma=vcov_test)
for(j in 1:2000) {
xcur <- metropolis(xcur, targ_test, sigma=vcov_test)
points(xcur[1], xcur[2], pch=20, col='#00000055')
}
```

The figure illustrates two contrasting properties of the two methods:

- Metropolis-in-Gibbs samples can get 'stuck' in certain regions of the support, especially when there are multiple modes and/or significant correlation among the random variables. This is not as much a problem for Metropolis sampling.
- Metropolis sampling can produce fewer unique samples due to the poor approximation of the proposal density to the target density. This occurs more often for high-dimensional target densities.

Bank | Link | Best rate* |
---|---|---|

Civic Bank & Trust | http://www.civicbanktn.com/ | 1.250% (amonts below $30k) 0.400% (amounts above $30k) |

Reliant Bank | https://www.reliantbank.com/ | 1.000% (amoutns below $20k) 0.050% (amounts above $20k) |

Renasant Bank | https://www.renasantbank.com/ | 0.753% (amounts below $25k) 0.095% (amounts above $25k) |

Capital Bank | https://www.capitalbank-us.com/ | 0.740% ($10k min., not sure customer service undersood my request) |

Simmons Bank (purchased First State Bank) | https://simmonsbank.com/ | 0.250% |

The rates in the following table are typical but pathetic. They are in no particular order.

Bank | Link | Best rate* |
---|---|---|

The Bank of Nashville - Synovus | https://bankofnashville.synovus.com/ | 0.050% |

Regions | https://www.regions.com/ | Could not reach customer service (waited 5 minutes on hold starting at 9:30am). |

Pinnacle (purchased Avenue Bank) | https://www.pnfp.com/ | 0.025% |

Fifth Third Bank | https://www.53.com/ | 0.010% ($20k min.) 0.100% ($100k min.) |

The Tennessee Credit Union | https://www.ttcu.org/ | 2.500% (amounts below $5k) 0.250% (amounts above $5k) |

First Tennessee | https://www.firsttennessee.com/ | 0.010% |

Bank of America | https://www.bankofamerica.com/ | 0.010% ($0 min.) 0.020% ($50k min.) |

Wells Fargo | https://www.wellsfargo.com/ | 0.010% ($0 min.) 0.05% ($5k min.) |

Cornerstone Financial Credit Union | https://www.bankcfcu.org/ | 0.050% ($1000 min.) |

US Bank | https://usbank.com | 0.010% ($0 min.) 0.020% ($10k min.) |

Ally (Online) | https://www.ally.com | 0.100% ($0 min.) 0.600% ($15k min.) |

Suntrust | https://www.suntrust.com | 0.010% ($0 min.) 0.03% ($10k min.) 0.05 ($25k min.) |

Southeast Financial Credit Union | https://www.southeastfinancial.org/ | 0.050% |

Vanderbilt University Credit Union | https://www.vanderbiltcu.org/ | 0.050% |

Nashville Post Office Credit Union | http://www.npocu.org/ | 0.050% |

BB&T | https://www.bbt.com/ | 0.010% |

First Citizens Bank | https://www.firstcitizens.com/ | 0.010% |

Republic Bank | http://www.republicbank.com/ | 0.050% ($0 min.) 0.080% ($15k min.) |

United Community Bank | https://www.ucbi.com/ | 0.020% |

* The best rate may be listed as APR or APY. However, these are usually very similar for checking accounts.

]]>The figure below shows the sensitivity versus false-positive rate for 52 controlled laboratory studies of naive examinees, untrained in polygraph countermeasures. Each study examinee was assigned to be truthful or deceptive, and a polygraph was used to test for deception. Thus, in this context, sensitivity is the likelihood of detecting deception in examinees who were, in fact, attempting to deceive the examiner.

Caption from the report: FIGURE 5-1 Sensitivity and false positive rates in 52 laboratory datasets on polygraph validity. NOTES: Points connected by lines come from the same dataset. The two curves are symmetrical receiver operating characteristic (ROC) curves with accuracy index (A) values of 0.81 and 0.91.

On the basis of this figure, the report concludes that "features of polygraph charts and the judgments made from them are correlated with deception in a variety of controlled situations involving naïve examinees untrained in countermeasures: for such examinees and test contexts, the polygraph has an accuracy greater than chance," and that "errors are not infrequent in polygraph testing." Indeed, the false positive rate is quite large for most studies.

The next figure is similar to that above, but for "field studies", which "involved examination of polygraph charts from law enforcement agencies’ or polygraph examiners’ case files in relation to the truth as determined by relatively reliable but nevertheless imperfect criteria, including confession by the subject or another party or apparently definitive evidence."

Caption from the report: FIGURE 5-3 Sensitivity and false positive rate in seven field datasets on polygraph validity. NOTE: Points connected by lines come from the same dataset.

Interestingly, these studies appear to confirm the laboratory experiments. However, note that the false-positive rate was greater than 10% in all studies, and as large as 70%.

Although these data should disqualify the polygraph for any high-stakes purpose (at least 1 in 10 truthful examinees would be incorrectly identified as deceptive), I'm surprised that there is so much empirical evidence of greater-than-chance lie detection. Nevertheless, the report goes on to criticize the quality and generalizability of polygraph research, and leaves the reader with a decidedly skeptical sense of the utility of polygraph in lie detection.

]]>In general, the sum of rounded numbers (e.g., using the `base::round` function) is not the same as their rounded sum. For example:

> sum(c(0.333, 0.333, 0.334)) [1] 1 > sum(round(c(0.333, 0.333, 0.334), 2)) [1] 0.99

The stackoverflow solution applies the following algorithm

- Round down to the specified number of decimal places
- Order numbers by their remainder values
- Increment the specified decimal place of values with 'k' largest remainders, where 'k' is the number of values that must be incremented to preserve their rounded sum

Here's the corresponding R function:

round_preserve_sum <- function(x, digits = 0) { up <- 10 ^ digits x <- x * up y <- floor(x) indices <- tail(order(x-y), round(sum(x)) - sum(y)) y[indices] <- y[indices] + 1 y / up }

Continuing with the example:

> sum(c(0.333, 0.333, 0.334)) [1] 1 > sum(round(c(0.333, 0.333, 0.334), 2)) [1] 0.99 > sum(round_preserve_sum(c(0.333, 0.333, 0.334), 2)) [1] 1]]>

```
## generate some bivariate data
set.seed(42)
x1 <- seq(1,10,0.3)
w = .6067;
a0 = 1.6345;
a1 = -.6235;
b1 = -1.3501;
a2 = -1.1622;
b2 = -.9443;
x2 = a0 + a1*cos(x1*w) + b1*sin(x1*w) + a2*cos(2*x1*w) +
b2*sin(2*x1*w) + rnorm(length(x1),0,3/4)
x <- scale(cbind(x1,x2))
alim <- extendrange(x, f=0.1)
alim_ <- range(x)
## plot centered data
plot(x[,1], x[,2], bty='n',
xlab=expression(x[1]),
ylab=expression(x[2]),
xlim=alim, ylim=alim)
legend("topleft", legend=c("Initialize"), bty="n")
## plot first principal component line
svdx <- svd(x)
clip(alim_[1],alim_[2],alim_[1],alim_[2])
with(svdx, abline(a=0, b=v[2,1]/v[1,1]))
## plot projections of each point onto line
z1 <- with(svdx, x%*%v[,1]%*%t(v[,1]))
segments(x0=x[,1],y0=x[,2],
x1=z1[,1],y1=z1[,2])
## compute initial lambda (arc-lengths associated with
## orthogonal projections of data onto curve)
lam <- with(svdx, as.numeric(u[,1]*d[1]))
for(itr in 1:3) {
#### step (a) of iterative algorithm ####
## compute scatterplot smoother in either dimension
## increase 'df' to make the curve more flexible
fit1 <- smooth.spline(x=lam, y=x[,1], df=4)
fit2 <- smooth.spline(x=lam, y=x[,2], df=4)
## plot data and the principal curve for a sequence of lambdas
plot(x[,1], x[,2], bty='n',
xlab=expression(x[1]),
ylab=expression(x[2]),
xlim=alim, ylim=alim)
legend("topleft", legend=c("Step (a)"), bty="n")
seq_lam <- seq(min(lam),max(lam),length.out=100)
lines(predict(fit1, seq_lam)$y, predict(fit2, seq_lam)$y)
## show points along curve corresponding
## to original lambdas
z1 <- cbind(predict(fit1, lam)$y, predict(fit2, lam)$y)
segments(x0=x[,1],y0=x[,2],
x1=z1[,1],y1=z1[,2])
#### step (b) of iterative algorithm ####
## recompute lambdas
euc_dist <- function(l, x, f1, f2)
sum((c(predict(f1, l)$y, predict(f2, l)$y) - x)^2)
lam <- apply(x,1,function(x0) optimize(euc_dist,
interval=extendrange(lam, f=0.50),
x=x0, f1=fit1, f2=fit2)$minimum)
## show projections associated with recomputed lambdas
plot(x[,1], x[,2], bty='n',
xlab=expression(x[1]),
ylab=expression(x[2]),
xlim=alim, ylim=alim)
legend("topleft", legend=c("Step (b)"), bty="n")
seq_lam <- seq(min(lam),max(lam),length.out=100)
lines(predict(fit1, seq_lam)$y, predict(fit2, seq_lam)$y)
z1 <- cbind(predict(fit1, lam)$y, predict(fit2, lam)$y)
segments(x0=x[,1],y0=x[,2],
x1=z1[,1],y1=z1[,2])
}
```

]]>I'm recently returned from the 2015 Rocky Mountain Bioinformatics Conference, where I presented the above poster. This is work with a colleague, Rick Gray, at the FDA. He and I collaborate on our NIH award "Optimal Design of Challenge-Response Experiments in Cardiac Electrophysiology" (HL118392) The (original) poster abstract is below, but the poster content is slightly less ambitious. Here are PNG and PDF versions: PNG (2.2M), PDF (1.1M). I will post some R code demonstrating the model shortly.

Abstract:

---

The Hodgkin-Huxley cardiac cell model is used to model the behavior of ion-channels during the cardiac action potential. On a larger scale, the model is used, for example, to model cardiac arrhythmias and to assess the effects of defibrillation protocols. Historically, the model parameters have been estimated in a piecewise fashion using summaries of raw data from voltage-clamp experiments, and by fitting the summarized data to model sub-components. This process is repeated for each of the model sub-components and corresponding summaries of voltage-clamp data until all of the model parameters are estimated. However, we demonstrate that by summarizing the raw data, some information about the model parameters is ignored. We show that the piecewise estimation procedure can be biased, and can yield estimates that are not unique. Finally show that the model parameters can be estimated simultaneously by integrating data sources across multiple types of voltage-clamp experiments, and that this technique is more efficient than the piecewise approach.

---

I was also able to catch up with a good friend (and even next-door neighbor, for a time) from graduate school, Adam Richards, who is currently working with the Department of Medicine at the University of Colorado Denver - Anschutz Medical Campus. He presented a poster as well. Here's a (blurry) picture of us at the poster session:

]]>P.S. Jeff Leek from simplystatistics.org visted our department yesterday and mentioned to me that he often blogs about things on the internet that anger him. Thus, this post was motivated by that sentiment. Thanks Jeff!

]]>I am often asked about the number of subjects needed to study a binary outcome, which usually leads to a discussion of confidence intervals for binary proportions, and the associated precision. Sometimes the precision is quantified as the width or half-width of a 95% confidence interval. For proportions, I like the Wilson score interval because it’s simple to calculate and does not violate the constraints of probability estimates (i.e., estimates must be between 0 and 1). Below is a function that computes the Wilson interval, given the number of trials (`n`

) and the fraction of events (`p`

).

```
## Level (1-a) Wilson confidence interval for proportion
## WILSON, E. B. 1927. Probable inference, the law of succession,
## and statistical inference. Journal of the American Statistical
## Association 22: 209-212.
WilsonBinCI <- function(n, p, a=0.05) {
z <- qnorm(1-a/2,lower.tail=FALSE)
l <- 1/(1+1/n*z^2)*(p + 1/2/n*z^2 +
z*sqrt(1/n*p*(1-p) + 1/4/n^2*z^2))
u <- 1/(1+1/n*z^2)*(p + 1/2/n*z^2 -
z*sqrt(1/n*p*(1-p) + 1/4/n^2*z^2))
list(lower=l, upper=u)
}
```

The code below generates a figure that illustrates the 95% confidence bounds as a function of the probability estimate, and for a sequence of trial sizes. I’m posting this here for my future easy access, but I hope some readers will find it useful as well, or might suggest improvements.

```
pseq <- seq(0, 1, length.out=200)
nseq <- c(10,20,50,200,1000)
gcol <- gray.colors(length(nseq), start = 0.3, end = 0.7)
par(mar=c(5,5,4,2)+0.1)
plot(pseq, pseq, type="n",
ylim=c(-0.3,0.3),
main="Wilson Binomial 95% CI",
xlab="Probability Estimate (P)",
ylab="",
yaxt="n")
pbnd <- -3:3/10
axis(2, at=pbnd, labels=
ifelse(pbnd<0, paste0("P",pbnd),
ifelse(pbnd==0, rep("P",length(pbnd)),
paste0("P+",pbnd))),las=2)
mtext("95% CI", 2, line = 4)
legend("topright", paste("N =", nseq),
fill=gcol, border=NA, bty="n", cex=0.8)
for(i in 1:length(nseq)) {
bnds <- t(sapply(pseq, function(p)
unlist(WilsonBinCI(nseq[i], p))))
bnds <- bnds - pseq
polygon(x=c(pseq,rev(pseq)),
y=c(bnds[,2],rev(bnds[,1])),
border=NA, col=gcol[i])
pmin <- pseq[which.min(bnds[,1])]
bmin <- min(bnds[,1])
lines(x=c(-1,rep(pmin,2)),
y=c(rep(bmin,2),-1), lty=3)
}
abline(h=0, lty=3)
```

]]>