Announcements

Means of i.i.d. samples

true_mu <- 42
Zbar <- function(n) mean(rnorm(n, mean = true_mu, sd = 2))
df <- data.frame(Z = c(rnorm(5000, mean = true_mu, sd = 2),
                       replicate(5000, Zbar(10)),
                       replicate(5000, Zbar(30))),
                 Samples = factor(c(rep(1, 5000), rep(10, 5000), rep(30, 5000))))
ggplot(df, aes(Z, fill = Samples, linetype = Samples)) + geom_density(alpha = .2) + theme_minimal()

Central limit theorem

How to think about the central limit theorem

Example: Exponential distribution

Shape of the underlying random variable distribution is not normal:

# True parameters, try changing them
rate = 1.8
# Don't change this
true_mu = 1/rate
true_var = 1/rate^2
X <- seq(from = 0, to = 2*rate, length.out = 100)
ggplot(data.frame(x=X, Probability=dexp(X, rate)), aes(x, Probability)) + 
  geom_line() + theme_minimal() + ggtitle("Exp: model world")

Plot the sampling distribution of the mean, as the sample size increases

Xbar <- function(n) sqrt(n) * (mean(rexp(n, rate)) - true_mu)
df <- data.frame(X = c(replicate(5000, Xbar(20)),
                       replicate(5000, Xbar(50)),
                       replicate(5000, Xbar(100))),
                 Sample_size = factor(c(rep(20, 5000),
                                        rep(50, 5000),
                                        rep(100, 5000))))
ggplot(df, aes(X, fill = Sample_size)) + 
  geom_bar(alpha = .8, stat = "density", position = "identity") + 
  theme_minimal() + ylab("Sample dist. of the mean") + theme_minimal() +
  stat_function(fun = dnorm, args = list(sd = sqrt(true_var))) +
  facet_grid(Sample_size~.) + ggtitle("Exp: CLT")