Outline

Interpreting p-values

movies <- bechdel[complete.cases(bechdel),]
movies$return <- movies$intgross_2013/movies$budget_2013
wilcox.test(return ~ binary, data = movies)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  return by binary
## W = 299610, p-value = 0.0412
## alternative hypothesis: true location shift is not equal to 0

P-hacking

Multiple testing

\[ P(\text{at least one false positive out of } m \text{ tests}) = 1 - P(\text{no false positives out of } m) \] We can do this using independence! \[ P(\text{no false positives out of } m) = (0.95)^m \]

m <- 1:90
FWER <- data.frame(m=m, FWER = 1 - 0.95^m)
ggplot(FWER, aes(x = m, y = FWER)) + geom_point() + theme_tufte()

Confidence intervals are better

Statistical significance and practical significance

group1 <- rnorm(1000000, mean = 0.01)
group2 <- rnorm(1000000, mean = 0)
t.test(group1, group2)
## 
##  Welch Two Sample t-test
## 
## data:  group1 and group2
## t = 5.646, df = 2e+06, p-value = 1.642e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.005209812 0.010750209
## sample estimates:
##    mean of x    mean of y 
## 0.0082484169 0.0002684068
range <- data.frame(x = c(-2,2))
ggplot(range, aes(x)) +
  stat_function(fun = dnorm, args = list(mean = 0.01, sd = 1)) +
  stat_function(fun = dnorm, args = list(mean = 0, sd = 1)) +
  theme_tufte() +
  ggtitle("Two significantly different distributions?")

c_null <- qnorm(.95)
mu <- 0.001
powern <- function(n) {
  1 - pnorm(c_null - mu*sqrt(n))
}
range <- data.frame(n = 10^c(1:7))
ggplot(range, aes(n)) + 
  stat_function(fun = powern) + theme_tufte() +
  ylab("Power") +
  ggtitle("Power as a function of sample size, mu = 0.001")

c_null <- qnorm(.975)
powermu <- function(mu) pnorm(-c_null - mu*10) + pnorm(c_null - mu*10, lower.tail = F)
range <- data.frame(mu = seq(from = -.5, to = .5, length.out = 100))
ggplot(range, aes(mu)) + 
  stat_function(fun = powermu) + theme_tufte() +
  ylab("Power") +
  ggtitle("Power as a function of true mean (two-sided), n = 100")