# Chapter 13.4 - Using Confidence Intervals

## Repeated information from Chapter 13.3, Confidence Intervals:

In [None]:
from datascience import *
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plots

In [None]:
# Place the csv file in the same directory as this notebook
ski_resorts = Table().read_table("ski_resorts.csv")
ski_resorts.show(5)

In [None]:
ski_resorts.hist("Total Snowfall")

In [None]:
# Assume the 875 entries in our csv file are pulled from a much larger sample
def one_bootstrap_mean():
 resample = ski_resorts.sample()
 return np.average(resample.column('Total Snowfall'))

In [None]:
# Generate many means from bootstrap samples
def many_bootstrap_means(how_many):
 bootstrap_means = make_array()
 for _ in np.arange(how_many):
 bootstrap_means = np.append(bootstrap_means, one_bootstrap_mean())
 return bootstrap_means

In [None]:
# Obtain endpoints of the 95% confidence interval
bootstrap_means = many_bootstrap_means(1000)
left = percentile(2.5, bootstrap_means)
right = percentile(97.5, bootstrap_means)
make_array(left, right)

The array endpoints show the 95% confidence interval for the mean Total Snowfall.
Here is a histogram to help visualize:

In [None]:
resampled_means = Table().with_column('Bootstrap Sample Mean', bootstrap_means)
resampled_means.hist(bins=20, unit="Inches")
plots.plot([left, right], [0, 0], color='yellow', lw=8);

## An Incorrect Use of a Confidence Interval

Avoid the common mistake of incorrectly using the confidence interval.
For example, it is incorrect to conclude that 95% of the ski resorts have a total snowfall 
between the interval of [left, right] found above. Why is this?

In [None]:
low_bound = left
high_bound = right
reduced_ski_resorts = ski_resorts.where("Total Snowfall", are.above_or_equal_to(low_bound))
reduced_ski_resorts = reduced_ski_resorts.where("Total Snowfall", are.below_or_equal_to(high_bound))
print("The percentage of ski resorts in this interval = {:.2f}%.".format(reduced_ski_resorts.num_rows / ski_resorts.num_rows * 100))

## A Correct Use of a Confidence Interval

But we can use a confidence interval to test a hypothesis!
- **Null Hypothesis** - The average total snowfall in the population is 100
- **Alternative Hypothesis** - The average total snowfall in the population is not 100

The null hypothesis can be rejected since it is not in the 95% confidence interval.

## Another Correct Use of a Confidence Interval

Here is another example. Let the **Null Hypothesis** be that the Average Summit Depth
is no more than 10 inches greater than the Average Base Depth. (Note: these two numbers are *paired*.) 
To reject this hypothesis with 99% confidence, we can use the bootstrap method.

In [None]:
depth_table = ski_resorts.select("Average Base Depth", "Average Summit Depth")
depth_table = depth_table.with_column("Difference", 
 depth_table.column("Average Summit Depth") - depth_table.column("Average Base Depth"))

In [None]:
print("The average difference is {:.2f} inches.".format(np.average(depth_table.column("Difference"))))

In [None]:
def one_bootstrap_mean():
 resample = depth_table.sample()
 return np.average(resample.column('Difference'))

In [None]:
# Generate many bootstrap means
def many_bootstrap_means(num_repetitions):
 bstrap_means = make_array()
 for _ in np.arange(num_repetitions):
 bstrap_means = np.append(bstrap_means, one_bootstrap_mean())
 return bstrap_means

In [None]:
# Get the endpoints of the 99% confidence interval
bstrap_means = many_bootstrap_means(1000)
left = percentile(0.5, bstrap_means)
right = percentile(99.5, bstrap_means)
make_array(left, right)

In [None]:
resampled_means = Table().with_columns(
 'Bootstrap Sample Mean', bstrap_means
)
resampled_means.hist()
plots.plot([left, right], [0, 0], color='yellow', lw=8);

Notes:
- The higher we want our confidence to be, the larger the interval becomes
- We have done better than simply concluding that we can reject the null hypothesis. We have estimated how big the average difference is. That’s a more useful result than just saying, “It’s not 10 inches or less.”