{ "cells": [ { "cell_type": "markdown", "id": "86519cb5-59b3-446d-8679-d0722b79bcb8", "metadata": {}, "source": [ "# Chapter 8: Functions and Tables" ] }, { "cell_type": "code", "execution_count": null, "id": "95967072-6417-44d1-b677-444e83bbc29b", "metadata": {}, "outputs": [], "source": [ "from datascience import *\n", "import numpy as np\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "id": "8bb7cd32-1526-4f57-91c9-e08d4639b7ca", "metadata": {}, "source": [ "## Function Example" ] }, { "cell_type": "code", "execution_count": null, "id": "68fde631-1787-4df8-be09-585c8b1e6e0e", "metadata": {}, "outputs": [], "source": [ "def percents(counts, decimal_places=2):\n", " \"\"\"Convert the values in array_x to percents out of the total of array_x.\"\"\"\n", " total = counts.sum()\n", " return np.round((counts/total)*100, decimal_places)\n", "\n", "parts = make_array(2, 1, 4)\n", "print(\"Rounded to 1 decimal place:\", percents(parts, 1))\n", "print(\"Rounded to the default number of decimal places:\", percents(parts))" ] }, { "cell_type": "code", "execution_count": null, "id": "7046c8ff-3126-4047-b60d-fce313909f16", "metadata": {}, "outputs": [], "source": [ "percents?" ] }, { "cell_type": "markdown", "id": "5fbad763-7305-42a1-b9ec-b1f3b2404ca0", "metadata": {}, "source": [ "## Apply a function to a table column" ] }, { "cell_type": "code", "execution_count": null, "id": "5467f926-ea91-49d6-95e8-e75175616479", "metadata": {}, "outputs": [], "source": [ "def characterize(age):\n", " if age <= 1:\n", " result = \"baby\"\n", " elif age <= 3:\n", " result = \"toddler\"\n", " elif age <= 10:\n", " result = \"child\"\n", " elif age <= 12:\n", " result = \"tween\"\n", " elif age <= 19:\n", " result = \"teenager\"\n", " else:\n", " result = \"adult\"\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "id": "ed763e9d-aa35-446c-8e93-91c6f014c433", "metadata": {}, "outputs": [], "source": [ "family = Table().with_columns(\n", " \"Name\", make_array(\"Josie\", \"James\", \"Claire\"),\n", " \"Age\", make_array(2, 32, 31))\n", "\n", "family" ] }, { "cell_type": "code", "execution_count": null, "id": "82e883f7-bde4-498d-b40a-0524e586ae79", "metadata": {}, "outputs": [], "source": [ "family.with_column(\"Description\", family.apply(characterize, \"Age\"))" ] }, { "cell_type": "markdown", "id": "0086b6e3-08c5-48cb-ae4b-316f2135b763", "metadata": {}, "source": [ "## Making Predictions" ] }, { "cell_type": "code", "execution_count": null, "id": "a125381e-880f-4ad1-b4dd-83eb632adeaa", "metadata": {}, "outputs": [], "source": [ "grades = Table().read_table(\"grades_and_piazza.csv\")\n", "grades = grades.drop(3, 4, 5, 6)\n", "grades" ] }, { "cell_type": "code", "execution_count": null, "id": "0ecf23d4-7d48-417d-94bd-193fb076ad63", "metadata": {}, "outputs": [], "source": [ "days_max = max(grades.column(\"days online\"))\n", "views_max = max(grades.column(\"views\"))\n", "print(days_max, views_max)" ] }, { "cell_type": "code", "execution_count": null, "id": "6ac4691d-a693-4f7b-a43c-5192e0404a0a", "metadata": {}, "outputs": [], "source": [ "def predict_gpa(days_online, views):\n", " close_days = grades.where(\"days online\", are.between(days_online-10, days_online+10)).column(\"GPA\")\n", " close_views = grades.where(\"views\", are.between(views - 25, views + 25)).column(\"GPA\")\n", " return (np.average(close_days)*3 + np.average(close_views)) / 4" ] }, { "cell_type": "code", "execution_count": null, "id": "0be67ed7-83d4-4893-90a6-51369ac120de", "metadata": {}, "outputs": [], "source": [ "grades = grades.with_column(\"Predicted GPA\", grades.apply(predict_gpa, \"days online\", \"views\"))\n", "grades.select(\"GPA\", \"Predicted GPA\")" ] }, { "cell_type": "code", "execution_count": null, "id": "565040fe-6118-497a-9643-297e552b7a01", "metadata": {}, "outputs": [], "source": [ "grades = grades.with_column(\"Error\", abs(1 - grades.column(\"Predicted GPA\") / grades.column(\"GPA\")))\n", "grades.set_format(\"Error\", PercentFormatter)\n", "grades" ] }, { "cell_type": "code", "execution_count": null, "id": "459472e7-6de0-4489-9521-32d0ec081b50", "metadata": {}, "outputs": [], "source": [ "grades.scatter(\"days online\", make_array(\"GPA\", \"Predicted GPA\"))" ] }, { "cell_type": "markdown", "id": "098f67bd-abee-40a1-9643-f9e73fc7732f", "metadata": {}, "source": [ "Suppose we want to predict a new student's GPA but all that we know is that\n", "the student spent 55 days online. One way to predict the GPA is by averaging the GPAs of\n", "the students who spent between 45 and 65 days online." ] }, { "cell_type": "code", "execution_count": null, "id": "d42aed7b-ae72-4d83-9c50-4a70d9f6c4c7", "metadata": {}, "outputs": [], "source": [ "candidate_GPAs = grades.where(\"days online\", are.between_or_equal_to(45, 65))\n", "candidate_GPAs" ] }, { "cell_type": "code", "execution_count": null, "id": "8853f9ec-706b-4703-b51f-6bd8fc60c120", "metadata": {}, "outputs": [], "source": [ "candidate_GPA_prediction = np.average(candidate_GPAs.column(\"Predicted GPA\"))\n", "candidate_GPA_prediction" ] }, { "cell_type": "code", "execution_count": null, "id": "f87a4d96-3d54-4a20-9c8a-657b47e9d374", "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plots\n", "grades.scatter(\"days online\", make_array(\"GPA\", \"Predicted GPA\"))\n", "plots.plot([50,50], [2.0, 4.0], color='red', lw=2)\n", "plots.plot([60, 60], [2.0, 4.0], color='red', lw=2)\n", "plots.scatter(55, candidate_GPA_prediction, color='red', s=40);" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }