# Chapter 8: Functions and Tables

In [None]:
from datascience import *
import numpy as np
%matplotlib inline

## Function Example

In [None]:
def percents(counts, decimal_places=2):
 """Convert the values in array_x to percents out of the total of array_x."""
 total = counts.sum()
 return np.round((counts/total)*100, decimal_places)

parts = make_array(2, 1, 4)
print("Rounded to 1 decimal place:", percents(parts, 1))
print("Rounded to the default number of decimal places:", percents(parts))

In [None]:
percents?

## Apply a function to a table column

In [None]:
def characterize(age):
 if age <= 1:
 result = "baby"
 elif age <= 3:
 result = "toddler"
 elif age <= 10:
 result = "child"
 elif age <= 12:
 result = "tween"
 elif age <= 19:
 result = "teenager"
 else:
 result = "adult"
 return result

In [None]:
family = Table().with_columns(
 "Name", make_array("Josie", "James", "Claire"),
 "Age", make_array(2, 32, 31))

family

In [None]:
family.with_column("Description", family.apply(characterize, "Age"))

## Making Predictions

In [None]:
grades = Table().read_table("grades_and_piazza.csv")
grades = grades.drop(3, 4, 5, 6)
grades

In [None]:
days_max = max(grades.column("days online"))
views_max = max(grades.column("views"))
print(days_max, views_max)

In [None]:
def predict_gpa(days_online, views):
 close_days = grades.where("days online", are.between(days_online-10, days_online+10)).column("GPA")
 close_views = grades.where("views", are.between(views - 25, views + 25)).column("GPA")
 return (np.average(close_days)*3 + np.average(close_views)) / 4

In [None]:
grades = grades.with_column("Predicted GPA", grades.apply(predict_gpa, "days online", "views"))
grades.select("GPA", "Predicted GPA")

In [None]:
grades = grades.with_column("Error", abs(1 - grades.column("Predicted GPA") / grades.column("GPA")))
grades.set_format("Error", PercentFormatter)
grades

In [None]:
grades.scatter("days online", make_array("GPA", "Predicted GPA"))

Suppose we want to predict a new student's GPA but all that we know is that
the student spent 55 days online. One way to predict the GPA is by averaging the GPAs of
the students who spent between 45 and 65 days online.

In [None]:
candidate_GPAs = grades.where("days online", are.between_or_equal_to(45, 65))
candidate_GPAs

In [None]:
candidate_GPA_prediction = np.average(candidate_GPAs.column("Predicted GPA"))
candidate_GPA_prediction

In [None]:
import matplotlib.pyplot as plots
grades.scatter("days online", make_array("GPA", "Predicted GPA"))
plots.plot([50,50], [2.0, 4.0], color='red', lw=2)
plots.plot([60, 60], [2.0, 4.0], color='red', lw=2)
plots.scatter(55, candidate_GPA_prediction, color='red', s=40);