{ "cells": [ { "cell_type": "markdown", "id": "1f673859-da9c-41a5-97ff-4f1b21c66f47", "metadata": {}, "source": [ "# Chapter 6" ] }, { "cell_type": "code", "execution_count": 1, "id": "ceece285-8718-4bc2-ba0f-9ac3645c7a8e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "C:\\Users\\n57g588\\AppData\\Local\\Programs\\Python\\Python313\\Lib\\site-packages\\datascience\\maps.py:13: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n", " import pkg_resources\n" ] } ], "source": [ "from datascience import *" ] }, { "cell_type": "markdown", "id": "8770f576-cee3-46c6-8bc5-6fe0447bdfa3", "metadata": {}, "source": [ "## Students Table" ] }, { "cell_type": "code", "execution_count": 2, "id": "52a17041-0f47-45a2-8166-1ad16dd5c932", "metadata": {}, "outputs": [], "source": [ "students = Table()" ] }, { "cell_type": "code", "execution_count": null, "id": "9a3a6bc8-d6b2-46fe-b9c2-72f5d02f6c1d", "metadata": {}, "outputs": [], "source": [ "students = students.with_columns(\"First Name\", make_array(\"Cedric\", \"Ashley\", \"Alexis\", \"Lucy\", \"Devon\"))\n", "students" ] }, { "cell_type": "code", "execution_count": null, "id": "3e6ede5c-46c9-4c35-ba72-33c6bf8c93b4", "metadata": {}, "outputs": [], "source": [ "students = students.with_columns(\n", " \"Last Name\", make_array(\"Jefferson\", \"Juric\", \"Lameres\", \"Lewis\", \"Maurer\"),\n", " \"Major Code\", make_array(\"CSPR\", \"NDGR\", \"CSBA\", \"CSPR\", \"CSPR\"),\n", " \"Age\", make_array(21, 25, 20, 22, 20))\n", "students" ] }, { "cell_type": "code", "execution_count": null, "id": "f25b3fdb-3dfd-4599-8006-e2344ce3bec4", "metadata": {}, "outputs": [], "source": [ "students.column(2)" ] }, { "cell_type": "code", "execution_count": null, "id": "c01a697c-d3f7-434d-9ba2-cf4a1c139171", "metadata": {}, "outputs": [], "source": [ "students.column(\"Major Code\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a8b275c7-38b6-470f-a65f-3f8de8d106b6", "metadata": {}, "outputs": [], "source": [ "students.column(\"Last Name\").item(1)" ] }, { "cell_type": "code", "execution_count": null, "id": "cb20ff11-e882-458e-95c9-508546012d9f", "metadata": {}, "outputs": [], "source": [ "students.select(\"Last Name\", \"Major Code\")" ] }, { "cell_type": "code", "execution_count": null, "id": "d5a93926-a162-426a-bc85-ef31c03be6b5", "metadata": {}, "outputs": [], "source": [ "students.drop(\"First Name\", \"Age\")" ] }, { "cell_type": "code", "execution_count": null, "id": "5fb5c7e6-ebf8-416f-8e84-3f471cc0295a", "metadata": {}, "outputs": [], "source": [ "students.sort(\"First Name\").show()" ] }, { "cell_type": "code", "execution_count": null, "id": "eb6a997f-5fe7-4738-aff9-aed45c030e7b", "metadata": {}, "outputs": [], "source": [ "students" ] }, { "cell_type": "code", "execution_count": null, "id": "a29ca197-a603-4c66-a0fe-ffcb1199aee8", "metadata": {}, "outputs": [], "source": [ "alphabetically_last = students.sort(\"First Name\", descending=True).column(\"First Name\").item(0)\n", "alphabetically_last" ] }, { "cell_type": "code", "execution_count": null, "id": "765c8a2d-0ac3-408e-b28e-b80528e19a8b", "metadata": {}, "outputs": [], "source": [ "students = students.with_column(\"Age\", students.column(\"Age\") + 1)\n", "students" ] }, { "cell_type": "code", "execution_count": null, "id": "99939509-cd7c-415c-8115-48ecc9b45d51", "metadata": {}, "outputs": [], "source": [ "tutor_age = 23\n", "students = students.with_column(\"Age Percentage\", students.column(\"Age\") / tutor_age).set_format(\"Age Percentage\", PercentFormatter)\n", "students" ] }, { "cell_type": "code", "execution_count": null, "id": "ce5cf28f-a4da-4d7c-bd3c-8f2223feae4f", "metadata": {}, "outputs": [], "source": [ "help(students.sort)" ] }, { "cell_type": "code", "execution_count": null, "id": "49bbd135-a3d5-43d9-8526-ee487d42c065", "metadata": {}, "outputs": [], "source": [ "students.take(2)" ] }, { "cell_type": "code", "execution_count": null, "id": "839080ee-6534-43a2-8cb8-eeb86e59bd8a", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "students.take(np.arange(0, students.num_rows, 2))" ] }, { "cell_type": "code", "execution_count": null, "id": "68700cec-9bf2-40fc-b7ad-e72bed524f72", "metadata": {}, "outputs": [], "source": [ "students.where(\"Major Code\", are.equal_to(\"CSPR\"))" ] }, { "cell_type": "code", "execution_count": null, "id": "ebf8f1ef-7976-4977-aeb1-904c3b1430c8", "metadata": {}, "outputs": [], "source": [ "students.where(\"Major Code\", \"CSPR\").sort(\"Age\")" ] }, { "cell_type": "code", "execution_count": null, "id": "ce795821-e27b-48eb-a1ba-d5ba46a00bdc", "metadata": {}, "outputs": [], "source": [ "students.where(\"Major Code\", \"CSPR\").where(\"Age\", are.above(21))" ] }, { "cell_type": "markdown", "id": "4f44adae-fc69-454a-9731-cfd8fcad1a5b", "metadata": {}, "source": [ "## Predicates: \n", "- are.equal_to (also are.not_equal_to)\n", "- are.above\n", "- are.above_or_equal_to\n", "- are.below\n", "- are.below_or_equal_to\n", "- are.between\n", "- are.strictly_between\n", "- are.between_or_equal_to\n", "- are_containing" ] }, { "cell_type": "code", "execution_count": null, "id": "b3d474a5-6684-4d8d-85d4-d92d6e6a484b", "metadata": {}, "outputs": [], "source": [ "students.where(\"Major Code\", are.containing(\"CS\"))" ] }, { "cell_type": "markdown", "id": "7b959290-d276-4065-a1da-71e33f27aec0", "metadata": {}, "source": [ "## Actors Table" ] }, { "cell_type": "code", "execution_count": null, "id": "a944bad6-02c4-4e8f-9e01-ad58b3a8a500", "metadata": {}, "outputs": [], "source": [ "actors = Table.read_table(\"actors.csv\")\n", "actors" ] }, { "cell_type": "code", "execution_count": null, "id": "bfbff5ff-5450-4c18-b8d7-27d48ff9c330", "metadata": {}, "outputs": [], "source": [ "actors.show(5)" ] }, { "cell_type": "code", "execution_count": null, "id": "ce587285-8cf2-49df-83a7-014e241e2179", "metadata": {}, "outputs": [], "source": [ "actors.num_columns" ] }, { "cell_type": "code", "execution_count": null, "id": "db2d1c1d-26f6-46eb-8fac-214f9fa69163", "metadata": {}, "outputs": [], "source": [ "actors.num_rows" ] }, { "cell_type": "code", "execution_count": null, "id": "baf6aecd-f0eb-40bd-9d2f-585a2f784b39", "metadata": {}, "outputs": [], "source": [ "actors.labels" ] }, { "cell_type": "code", "execution_count": null, "id": "e79c939c-e8aa-4cbd-9ccb-3b4b11683fdb", "metadata": {}, "outputs": [], "source": [ "actors = actors.relabeled(\"#1 Movie\", \"Highest Grossing\")\n", "actors" ] }, { "cell_type": "markdown", "id": "1b27cb11-1159-48b1-9a44-64bbaecf597e", "metadata": {}, "source": [ "## Active Learning - Use the Actors Table for these questions" ] }, { "cell_type": "markdown", "id": "d38bbe05-093d-48db-b222-5bfe8164fd97", "metadata": {}, "source": [ "1. Print the name of the actor who has been in the most movies, as well as the number of movies.\n", "2. Add a column named \"Movie Percentage\" that shows the percentage of movies each actor has been in, relative to the actor in #1. Format this column and sort it from highest to lowest." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }