# Math 242, Sept 16, Confidence Intervals

require(mosaic)

# 5.01p
xbar = mean(height,data=Galton)
s = sd(height,data=Galton)
n = nrow(Galton)
lower = xbar - 1.96*s/sqrt(n)
upper = xbar + 1.96*s/sqrt(n)
# CI is (66.53,67), so extremely close to Bootstrap CI

# Now we must check the conditions. n = 898 > 30, but is the data skew?
densityplot(~height,data=Galton)
histogram(~height,data=Galton)
# Looks pretty normal, and definitely not skewed.

# 5.02p
women = subset(Galton,Galton$sex == 'F')
xbarmen = 
xbarwomen = 
smen = 
swomen = 
nmen = 
nwomen = 
lower = xbarmen - xbarwomen - 1.96 * sqrt(smen^2 / nmen + swomen^2/nwomen)
upper = 

# 5.41 - proportion of people with height > 70...
phat = nrow(subset(Galton,Galton$height > 70))/nrow(Galton)
phat
qhat = 
lower = phat - 1.96 * 

  
# 5.42 - difference in proportion between men of height > 70 and women of height > 70
nrow(subset(women,women$height > 70))


# Effect size
nrow(SwimRecords)
sd(SwimRecords$time)
mean(SwimRecords$time)
confint(SwimRecords$time,mean,level=95)
help(confint)
SwimRecords

# Load the 'inference' function:
load(url('http://assets.datacamp.com/course/dasi/inference.Rdata'))
require(inference)

# Run the inference function:
inference(SwimRecords$time, type="ci", method="theoretical", conflevel=0.95, est="mean", boot_method="perc")