Apply Geopandas and Seaborn to FDIC Bank Data Visualizations¶

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import seaborn as sns

# Load a filtered FDIC dataset and USA map shapefile
banks = pd.read_csv('/content/drive/MyDrive/filtered_institutions.csv', encoding='utf-8')

usmap = gpd.read_file("/content/drive/MyDrive/tl_2022_us_state/tl_2022_us_state.shp")
usmap = usmap.to_crs("EPSG:5070")

Let's exclude the following territories from the analysis:¶

  • HI: Hawaii
  • VI: U.S. Virgin Islands
  • MP: Northern Mariana Islands
  • GU: Guam
  • AK: Alaska
  • AS: American Samoa
  • PR: Puerto Rico

Now we can display U.S. map of 49 states with boundaries.¶

In [7]:
non_continental = ['HI','VI','MP','GU','AK','AS','PR']
usa = usmap
for n in non_continental:
    usa = usa[usa.STUSPS != n]

usa.boundary.plot()
plt.show()

Examine the number of banks ever registered in each state.¶

In [8]:
# Select relevant columns from banks dataframe
bank_counts = banks['STNAME'].value_counts()
bank_counts = bank_counts.reset_index()
bank_counts.rename(columns={'index': 'STNAME', 'STNAME': 'COUNT'}, inplace=True)

# Merge the US map with the DataFrame containing the counts of banks
merged_geo_data = usa.merge(bank_counts, left_on='NAME', right_on='STNAME', how='left')

# Plot the US map with the counts of banks as a choropleth map
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data.plot(column='COUNT', cmap='Oranges', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)

area_threshold = merged_geo_data['geometry'].area.quantile(0.10)

for idx, row in merged_geo_data.iterrows():
    # Only add text for states larger than the area threshold
    if row.geometry.area > area_threshold:
      centroid = row.geometry.centroid
      plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=6)

ax.set_title('Number of ever registered Banks by State')

plt.show()

Only Texas has more than 2500 banks ever registered there.

I'd like to plot a simple pie chart to see how many banks are still active in the country.¶

In [9]:
# Calculate the count of active and inactive institutions
active_count = banks['ACTIVE'].sum()
inactive_count = len(banks) - active_count

# Create a donut-like chart using matplotlib
plt.figure(figsize=(5, 5))
labels = ['Active', 'Inactive']
sizes = [active_count, inactive_count]
colors = ['#467AB9', '#B98546']

# Function to format label with absolute numbers and percentages
def func(pct, sizes):
    absolute = int(pct/100.*sum(sizes))
    return f"{absolute:d}\n({pct:.1f}%)"

# Plotting the pie chart
plt.pie(sizes, labels=labels, colors=colors, startangle=90, autopct=lambda pct: func(pct, sizes))

# Draw a circle at the center to make it a donut chart
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

# Equal aspect ratio ensures that pie is drawn as a circle
plt.title('Comparison of active vs. inactive Banks')
plt.axis('equal')
plt.show()

Less than 1/5 of the total amount.

Compare number of active and inactive banks on the paired plots.¶

In [10]:
# Filter out only 'ACTIVE' and 'STNAME' columns (1 means that bank is active)
banks_filtered = banks[['STNAME', 'ACTIVE']]

# Aggregate the number of active and inactive banks by state
bank_status_counts = banks_filtered.groupby(['STNAME', 'ACTIVE']).size().unstack(fill_value=0).reset_index()

# Merge the data
merged_geo_data = usa.merge(bank_status_counts, left_on='NAME', right_on='STNAME', how='left')

# Determine an area threshold
area_threshold = merged_geo_data['geometry'].area.quantile(0.25)  # Adjust the quantile as needed

# Plotting, corrected for integer column access with state abbreviations for larger states
fig, ax = plt.subplots(1, 2, figsize=(20, 10), sharex=True, sharey=True)

# Plot active banks
merged_geo_data.plot(column=1, cmap='Greens', linewidth=0.8, ax=ax[0], edgecolor='0.8', legend=True)
ax[0].set_title('Active Banks by State')
for idx, row in merged_geo_data.iterrows():
    if row.geometry.area > area_threshold:
        centroid = row.geometry.centroid
        ax[0].text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)
ax[0].axis('off')

# Plot inactive banks
merged_geo_data.plot(column=0, cmap='Reds', linewidth=0.8, ax=ax[1], edgecolor='0.8', legend=True)
ax[1].set_title('Inactive Banks by State')
for idx, row in merged_geo_data.iterrows():
    if row.geometry.area > area_threshold:
        centroid = row.geometry.centroid
        ax[1].text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)
ax[1].axis('off')

plt.suptitle('Comparison of active vs. inactive Banks by State', fontsize=16)
plt.show()

Not very representative. Seems like it's better to calculate a ratio between these categories.¶

In [14]:
# Calculate the ratio of active to inactive banks for each state
bank_status_counts['Inactive/Active Ratio'] = bank_status_counts[0] / bank_status_counts[1]

# Replace any potential infinite values with NaN in case of 0 active banks
bank_status_counts.replace([np.inf, -np.inf], np.nan, inplace=True)

# Merge the geographic data with the bank status counts, including the new ratio
merged_geo_data_ratio = usa.merge(bank_status_counts, left_on='NAME', right_on='STNAME', how='left')

# Plotting the ratio of active to inactive banks by state with state abbreviations for larger states only
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data_ratio.plot(column='Inactive/Active Ratio', cmap='coolwarm', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)

# Determine an area threshold below which the state abbreviation will not be displayed
area_threshold = merged_geo_data_ratio['geometry'].area.quantile(0.10)

for idx, row in merged_geo_data_ratio.iterrows():
    # Only add text for states larger than the area threshold
    if row.geometry.area > area_threshold:
        centroid = row.geometry.centroid
        plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)

ax.set_title('Ratio of inactive to active Banks by State')
ax.axis('off')
plt.show()

Blue states suggest stable banking sectors, while red, notably Florida, indicates higher inactivity, possibly detect banking sector challenges. The map can serve as an indicator of the relative health of the banking industry across states.

Let's look at a couple of financial metrics in the form of a non-interactive dashboard¶

In [13]:
banks_filtered = banks.dropna(subset=['ROA', 'ROE', 'ASSET', 'STNAME', 'BKCLASS']).copy()

bank_class_dict = {
    'N': 'National Bank',
    'NM': 'State Bank, Non-Fed Member',
    'OI': 'Foreign Bank, Insured',
    'SB': 'Fed Savings Bank',
    'SI': 'State Savings Bank',
    'SL': 'State Savings & Loan',
    'SM': 'State Bank, Fed Member',
    'NC': 'Noninsured Commercial',
    'NS': 'Noninsured Savings Bank',
    'CU': 'Credit Union'
}

# Update BKCLASS with short descriptions
banks_filtered.loc[:, 'BKCLASS'] = banks_filtered['BKCLASS'].map(bank_class_dict)

# Calculate the mean ROA and ROE for each state and sort them for consistent order
mean_values = banks_filtered.groupby('STNAME')[['ROA', 'ROE']].mean().reset_index()
mean_values_sorted = mean_values.sort_values(by='ROA', ascending=False).head(40)

# Create a figure to hold the dashboard layout
dashboard_fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))

# Average ROA by State Bar Plot
sns.barplot(y='STNAME', x='ROA', data=mean_values_sorted, ax=ax1)
ax1.set_title('Average ROA by State - Top 40')

# Average ROE by State Bar Plot
sns.barplot(y='STNAME', x='ROE', data=mean_values_sorted, ax=ax2)
ax2.set_title('Average ROE by State - Top 40')

# Create the barplot for Total Assets by State for the top 40 states
total_assets_by_state = banks_filtered.groupby('STNAME')['ASSET'].sum().reset_index()
top_states_by_assets = total_assets_by_state.sort_values(by='ASSET', ascending=False).head(40)
sns.barplot(data=top_states_by_assets, x='ASSET', y='STNAME', ax=ax3)
ax3.set_title('Total Assets by State - Top 40')
ax3.set_xscale('log')

# Bank Classification Types
sns.countplot(data=banks_filtered, y='BKCLASS', ax=ax4, order=banks_filtered['BKCLASS'].value_counts().index)
ax4.set_title('Bank Classification Types')

# Adjust layout spacing and show the dashboard
dashboard_fig.tight_layout(pad=3.0)
plt.show()

The dashboard provides a financial profile of the banking sector across different states and territories, focusing on profitability (ROA and ROE), size (Total Assets), and organizational structure (Bank Classification Types). The negative values in ROE across all states and territories suggest are uncommon and indicate losses or poor performance. The values range from around 0 to -500, with the Federated States of Micronesia having the least negative value and Florida the most negative.

The most frequent type is a state bank which is not a Fed member. Time to plot all of them on the map.¶

In [16]:
# Aggregate the number of banks by state and class
bank_class_counts = banks.groupby(['STNAME', 'BKCLASS']).size().reset_index(name='Counts')

# Pivot the data to have bank classes as columns and states as rows
pivot_bank_class_counts = bank_class_counts.pivot(index='STNAME', columns='BKCLASS', values='Counts').fillna(0)

# Merge the geographic data with the pivoted bank class counts
merged_geo_data_classes = usa.merge(pivot_bank_class_counts, left_on='NAME', right_index=True, how='left').fillna(0)

# Choose a bank class to visualize
bank_class_to_plot = 'NM'

# Build a plot
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data_classes.plot(column=bank_class_to_plot, cmap='Oranges', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
area_threshold = merged_geo_data_classes['geometry'].area.quantile(0.10)

# Add state abbreviations without the area threshold condition for simplicity
for idx, row in merged_geo_data_classes.iterrows():
    # Only add text for states larger than the area threshold
    if row.geometry.area > area_threshold:
      centroid = row.geometry.centroid
      plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=6)

ax.set_title('Concentration of commercial Banks (Fed Non-Members) by State')
ax.axis('off')
plt.show()

Provided illustration shows the distribution of commercial banks that are not members of the Federal Reserve System across the United States. Texas stands out with the highest concentration of this type of banks.