import pandas as pd import matplotlib.pyplot as plt failed_banks = pd.read_csv('banklist.csv') failed_banks["Closing Year"] = 0 for i in range(len(failed_banks)): year = failed_banks.ix[i, "Closing Date"] year = int(year[-2:]) + 2000 failed_banks.ix[i, "Closing Year"] = year # ---------------------------- def make_states(df1): """Create a DataFrame based on how many banks failed each year per state based on the input DataFrame. :param df1: DataFrame to read the Data from. :return: DataFrame with columns representing each state, rows as years, and the values as the failed banks. """ df = df1.copy() years = (df['Closing Year'].min(), df['Closing Year'].max()) # A tuple of the oldest and most recent years. yspan = years[1] - years[0] # The span of years from which this file is taken. states_and_years = {} # This is a dictionary. Each key is a State, each value is another dictionary. for current_state in df['ST'].unique(): just_one_state = {} # This is a dictionary. Each key is a year, each value is how many banks failed. for current_year in range(yspan): just_one_state[current_year + 2000] = \ df.loc[(df['ST'] == current_state) & (df['Closing Year'] == 2000 + current_year)].shape[0] # The above function returns all rows in the original DataFrame that show a bank in the current state # (represented by current_state) and the current year (current_year). Since a DataFrame is returned, we # use a .shape call to get the number of entries. Now we know how many banks failed in this state/year. states_and_years[current_state] = just_one_state return pd.DataFrame.from_dict(states_and_years).sort_index(axis=1) # Convert the dictionary to a sorted DataFrame. # ---------------------------- plt.figure("Bank Information") plt.xlabel("Year") plt.ylabel("Failures") plt.title("FDIC Failed Banks") failed_banks["Closing Year"].value_counts(sort=False).plot("bar", color="violet") plt.xlabel("Year") plt.title("Failures by State") df = make_states(failed_banks) print(df) # fontP = plt.font_manager.FontProperties() # fontP.set_size('small') # legend([plot1], "title", prop = fontP) # color map df.plot.area(cmap=plt.get_cmap('cool')) plt.show()