rm(list=ls())
# ==========================================================================
# Monte Carlo Example of CLT
# ==========================================================================
set.seed(123)
dist_x <- c()
for (i in 1:100000){
dist_x[i] <- mean(runif(30))
}
head(dist_x)
## [1] 0.5724002 0.4386409 0.5126119 0.5192552 0.4779382 0.4803582
hist(sqrt(30)*(dist_x-0.5),breaks=100,prob=TRUE)
curve(dnorm(x,mean=0,sd=sqrt(1/12)),add=TRUE)
# ==========================================================================
# ECDF
# ==========================================================================
n <- 10
x <- 1:n
plot(ecdf(x))
x1 <- rnorm(n)
plot(ecdf(x1),cex=0,col='red')
curve(pnorm(x,mean=0,sd=1), -6,6, add=TRUE, col='blue')
# ==========================================================================
# ECDF
# ==========================================================================
n <- 1000
x <- 1:n
plot(ecdf(x))
x1 <- rnorm(n)
plot(ecdf(x1),cex=0,col='red')
curve(pnorm(x,mean=0,sd=1), -6,6, add=TRUE, col='blue')
# ==========================================================================
# Bootstrap
# ==========================================================================
n <- 10
x <- rnorm(n) # Norm(0,1) distribution, 10 points
print(x)
## [1] -1.0914135 -0.1123605 -1.9947612 -0.6493271 -0.1016537 -1.1586340
## [7] -0.2428534 -0.5253778 0.3837271 -1.1371902
#
# We want to check the bootstrap sample std of the sample mean
#
### n-sample mean and n-sample std, the analytical solution
x_mean <- mean(x) # n-sample mean
x_std <- sd(x) # n-sample standard deviation
x_mean_std <- x_std/sqrt(n)
x_mean_std
## [1] 0.2195257
### The bootstrapped sample
B <- 10000
boot <- list()
for (i in 1:B){
boot[[i]] <- sample(x, n, replace=TRUE, prob=rep(1/n,n))
}
print(boot[[sample(1:10000, 1)]])
## [1] -0.6493271 -1.1371902 -0.1016537 -1.0914135 -0.2428534 -1.1371902
## [7] -0.1016537 -1.1586340 -0.1016537 -1.1371902
boot_mean <- lapply(boot,mean)# Calculate the n-sample mean for each of the 10000 b-samples
boot_mean <- unlist(boot_mean)
length(boot_mean)
## [1] 10000
x_mean_std_boot <- sqrt(sum((boot_mean - mean(boot_mean))^2)/B) # This is the b-sample std of the bootstrapped mean. It should be similar to x_mean_std, which is obtained analytically.
x_mean_std_boot
## [1] 0.2089893
#
# We also see that when n is small, x is not good enough to represent N(0,1) distribution.
#
x_mean # should be close to 0 if n is large
## [1] -0.6629844
x_mean_std # should be close to 1/sqrt(10)
## [1] 0.2195257
x_mean_std_true <- 1/sqrt(n)
print(c(x_mean_std_true, x_mean_std, x_mean_std_boot))
## [1] 0.3162278 0.2195257 0.2089893
### The difference of the first and the second is determined by n; the difference of the second and the third is determined by B