```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
```
### Background
- See the lecture notes on laws of large numbers for background
- Summary: considering samples of i.i.d. random variables
- As the sample size increases, the sample mean converges to its true expected value
### Experiments
Try out the law of large numbers for different kinds of random variables.
#### Binomial
Shape of the underlying random variable distribution
```{r}
# True parameters, try changing them
size = 50
true_prob = 2/3
# Don't change this
true_mu <- size*true_prob
ggplot(data.frame(Successes=0:50, Probability=dbinom(0:size, size, true_prob)), aes(Successes, Probability)) +
geom_bar(stat = "identity") + theme_minimal() + ggtitle("Binomial: model world")
```
Generate a random sample
```{r}
# Largest sample size
max_n <- 5000
# Generate a large sample
X <- rbinom(max_n, size, true_prob)
# Look at a few data points
head(X)
```
Plot the sample mean, as the sample size increases
```{r}
# Make a list of sample sizes up to the maximum one
n <- seq(from = 5, to = max_n, by = 20)
# Compute sample means for each sample size
Xbar_n <- sapply(n, function(first_n) mean(X[1:first_n]))
# Create a data frame for plotting
lln <- data.frame(n = n, Xbar_n = Xbar_n)
# Plot the sample sizes
ggplot(lln, aes(n, Xbar_n)) + geom_point() + geom_hline(yintercept = true_mu) +
ylab("Sample mean") + xlab("Sample size") + theme_minimal() + ggtitle("Binomial: data world")
```
#### Normal
Shape of the underlying random variable distribution
```{r}
# True parameters, try changing them
sd = 2
true_mu = sqrt(5)
# Don't change this
X <- seq(from = true_mu - 3*sd, to = true_mu + 3*sd, length.out = 100)
ggplot(data.frame(x=X, Probability=dnorm(X, true_mu, sd)), aes(x, Probability)) +
geom_line() + theme_minimal() + ggtitle("Normal: model world")
```
Generate a random sample
```{r}
# Largest sample size
max_n <- 5000
# Generate a large sample
X <- rnorm(max_n, true_mu, sd)
# Look at a few data points
head(X)
```
Plot the sample mean, as the sample size increases
```{r}
# Make a list of sample sizes up to the maximum one
n <- seq(from = 5, to = max_n, by = 20)
# Compute sample means for each sample size
Xbar_n <- sapply(n, function(first_n) mean(X[1:first_n]))
# Create a data frame for plotting
lln <- data.frame(n = n, Xbar_n = Xbar_n)
# Plot the sample sizes
ggplot(lln, aes(n, Xbar_n)) + geom_point() + geom_hline(yintercept = true_mu) +
ylab("Sample mean") + xlab("Sample size") + theme_minimal() + ggtitle("Normal: data world")
```
#### Exponential
Shape of the underlying random variable distribution
```{r}
# True parameters, try changing them
rate = 1.2
# Don't change this
true_mu <- 1/rate
X <- seq(from = 0, to = true_mu + 2*sqrt(true_mu), length.out = 100)
ggplot(data.frame(x=X, Probability=dexp(X, rate)), aes(x, Probability)) +
geom_line() + theme_minimal() + ggtitle("Exponential: model world")
```
Generate a random sample
```{r}
# Largest sample size
max_n <- 5000
# Generate a large sample
X <- rexp(max_n, rate)
# Look at a few data points
head(X)
```
Plot the sample mean, as the sample size increases
```{r}
# Make a list of sample sizes up to the maximum one
n <- seq(from = 10, to = max_n, by = 20)
# Compute sample means for each sample size
Xbar_n <- sapply(n, function(first_n) mean(X[1:first_n]))
# Create a data frame for plotting
lln <- data.frame(n = n, Xbar_n = Xbar_n)
# Plot the sample sizes
ggplot(lln, aes(n, Xbar_n)) + geom_point() + geom_hline(yintercept = true_mu) +
ylab("Sample mean") + xlab("Sample size") + theme_minimal() + ggtitle("Exponential: data world")
```
#### Poisson
Shape of the underlying random variable distribution
```{r}
# True parameters, try changing them
lambda = 12
# Don't change this
true_mu <- lambda
X <- seq(from = 0, to = ceiling(lambda + 4*sqrt(lambda)))
ggplot(data.frame(x=X, Probability=dpois(X, lambda)), aes(x, Probability)) +
geom_bar(stat = "identity") + theme_minimal() + ggtitle("Poisson: model world")
```
Generate a random sample
```{r}
# Largest sample size
max_n <- 5000
# Generate a large sample
X <- rpois(max_n, lambda)
# Look at a few data points
head(X)
```
Plot the sample mean, as the sample size increases
```{r}
# Make a list of sample sizes up to the maximum one
n <- seq(from = 5, to = max_n, by = 20)
# Compute sample means for each sample size
Xbar_n <- sapply(n, function(first_n) mean(X[1:first_n]))
# Create a data frame for plotting
lln <- data.frame(n = n, Xbar_n = Xbar_n)
# Plot the sample sizes
ggplot(lln, aes(n, Xbar_n)) + geom_point() + geom_hline(yintercept = true_mu) +
ylab("Sample mean") + xlab("Sample size") + theme_minimal() + ggtitle("Poisson: data world")
```
#### Beta
Shape of the underlying random variable distribution
```{r}
# True parameters, try changing them
shape1 = 2
shape2 = 10
# Don't change this
true_mu <- shape1/(shape1+shape2)
X <- seq(from = 0, to = 1, length.out = 100)
ggplot(data.frame(x = X, Probability = dbeta(X, shape1, shape2)), aes(x, Probability)) +
geom_line() + theme_minimal() + ggtitle("Beta: model world")
```
Generate a random sample
```{r}
# Largest sample size
max_n <- 5000
# Generate a large sample
X <- rbeta(max_n, shape1, shape2)
# Look at a few data points
head(X)
```
Plot the sample mean, as the sample size increases
```{r}
# Make a list of sample sizes up to the maximum one
n <- seq(from = 5, to = max_n, by = 20)
# Compute sample means for each sample size
Xbar_n <- sapply(n, function(first_n) mean(X[1:first_n]))
# Create a data frame for plotting
lln <- data.frame(n = n, Xbar_n = Xbar_n)
# Plot the sample sizes
ggplot(lln, aes(n, Xbar_n)) + geom_point() + geom_hline(yintercept = true_mu) +
ylab("Sample mean") + xlab("Sample size") + theme_minimal() + ggtitle("Beta: data world")
```
### Uniform
Exercise: copy and paste the Beta section here, then make the following changes.
In the first section of code, where it says `# True parameters...`
- Change `shape1 = ...` to `min_u = 0`
- Change `shape2 = ...` to `max_u = ` some positive number, you choose it (in class we called this $\theta$)
- Change `true_mu = ...` to `true_mu = max_u/2` (the middle point is the average)
In the second section of code, which begins `# Largest sample size`
- Change `dbeta` to `dunif`
- Change the "arguments" of `dunif` from `(max_n, shape1, shape2)` to `(max_n, min, max)`
In the last section of code, which creates the plot
- Change `rbeta` to `runif`
- Also change the arguments to this function the same way as in the previous section for `dunif`
In the first and last sections of code
- Change `ggtitle("Beta: ...")` to `ggtitle("Uniform: ...")`
Once you finish this, delete all the previous text starting right above your new Uniform section up to the first chunk of code, the one that says `library(tidyverse)`. Also delete these instructions. Save your edited .Rmd file, knit it (click the knit button), and email me the result (from your Stern or NYU email).