import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import os
import seaborn as sns
# Load a filtered FDIC dataset and USA map shapefile
banks = pd.read_csv('/content/drive/MyDrive/filtered_institutions.csv', encoding='utf-8')
usmap = gpd.read_file("/content/drive/MyDrive/tl_2022_us_state/tl_2022_us_state.shp")
usmap = usmap.to_crs("EPSG:5070")
non_continental = ['HI','VI','MP','GU','AK','AS','PR']
usa = usmap
for n in non_continental:
usa = usa[usa.STUSPS != n]
usa.boundary.plot()
plt.show()
# Select relevant columns from banks dataframe
bank_counts = banks['STNAME'].value_counts()
bank_counts = bank_counts.reset_index()
bank_counts.rename(columns={'index': 'STNAME', 'STNAME': 'COUNT'}, inplace=True)
# Merge the US map with the DataFrame containing the counts of banks
merged_geo_data = usa.merge(bank_counts, left_on='NAME', right_on='STNAME', how='left')
# Plot the US map with the counts of banks as a choropleth map
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data.plot(column='COUNT', cmap='Oranges', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
area_threshold = merged_geo_data['geometry'].area.quantile(0.10)
for idx, row in merged_geo_data.iterrows():
# Only add text for states larger than the area threshold
if row.geometry.area > area_threshold:
centroid = row.geometry.centroid
plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=6)
ax.set_title('Number of ever registered Banks by State')
plt.show()
Only Texas has more than 2500 banks ever registered there.
# Calculate the count of active and inactive institutions
active_count = banks['ACTIVE'].sum()
inactive_count = len(banks) - active_count
# Create a donut-like chart using matplotlib
plt.figure(figsize=(5, 5))
labels = ['Active', 'Inactive']
sizes = [active_count, inactive_count]
colors = ['#467AB9', '#B98546']
# Function to format label with absolute numbers and percentages
def func(pct, sizes):
absolute = int(pct/100.*sum(sizes))
return f"{absolute:d}\n({pct:.1f}%)"
# Plotting the pie chart
plt.pie(sizes, labels=labels, colors=colors, startangle=90, autopct=lambda pct: func(pct, sizes))
# Draw a circle at the center to make it a donut chart
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
# Equal aspect ratio ensures that pie is drawn as a circle
plt.title('Comparison of active vs. inactive Banks')
plt.axis('equal')
plt.show()
Less than 1/5 of the total amount.
# Filter out only 'ACTIVE' and 'STNAME' columns (1 means that bank is active)
banks_filtered = banks[['STNAME', 'ACTIVE']]
# Aggregate the number of active and inactive banks by state
bank_status_counts = banks_filtered.groupby(['STNAME', 'ACTIVE']).size().unstack(fill_value=0).reset_index()
# Merge the data
merged_geo_data = usa.merge(bank_status_counts, left_on='NAME', right_on='STNAME', how='left')
# Determine an area threshold
area_threshold = merged_geo_data['geometry'].area.quantile(0.25) # Adjust the quantile as needed
# Plotting, corrected for integer column access with state abbreviations for larger states
fig, ax = plt.subplots(1, 2, figsize=(20, 10), sharex=True, sharey=True)
# Plot active banks
merged_geo_data.plot(column=1, cmap='Greens', linewidth=0.8, ax=ax[0], edgecolor='0.8', legend=True)
ax[0].set_title('Active Banks by State')
for idx, row in merged_geo_data.iterrows():
if row.geometry.area > area_threshold:
centroid = row.geometry.centroid
ax[0].text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)
ax[0].axis('off')
# Plot inactive banks
merged_geo_data.plot(column=0, cmap='Reds', linewidth=0.8, ax=ax[1], edgecolor='0.8', legend=True)
ax[1].set_title('Inactive Banks by State')
for idx, row in merged_geo_data.iterrows():
if row.geometry.area > area_threshold:
centroid = row.geometry.centroid
ax[1].text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)
ax[1].axis('off')
plt.suptitle('Comparison of active vs. inactive Banks by State', fontsize=16)
plt.show()
# Calculate the ratio of active to inactive banks for each state
bank_status_counts['Inactive/Active Ratio'] = bank_status_counts[0] / bank_status_counts[1]
# Replace any potential infinite values with NaN in case of 0 active banks
bank_status_counts.replace([np.inf, -np.inf], np.nan, inplace=True)
# Merge the geographic data with the bank status counts, including the new ratio
merged_geo_data_ratio = usa.merge(bank_status_counts, left_on='NAME', right_on='STNAME', how='left')
# Plotting the ratio of active to inactive banks by state with state abbreviations for larger states only
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data_ratio.plot(column='Inactive/Active Ratio', cmap='coolwarm', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
# Determine an area threshold below which the state abbreviation will not be displayed
area_threshold = merged_geo_data_ratio['geometry'].area.quantile(0.10)
for idx, row in merged_geo_data_ratio.iterrows():
# Only add text for states larger than the area threshold
if row.geometry.area > area_threshold:
centroid = row.geometry.centroid
plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=8)
ax.set_title('Ratio of inactive to active Banks by State')
ax.axis('off')
plt.show()
Blue states suggest stable banking sectors, while red, notably Florida, indicates higher inactivity, possibly detect banking sector challenges. The map can serve as an indicator of the relative health of the banking industry across states.
banks_filtered = banks.dropna(subset=['ROA', 'ROE', 'ASSET', 'STNAME', 'BKCLASS']).copy()
bank_class_dict = {
'N': 'National Bank',
'NM': 'State Bank, Non-Fed Member',
'OI': 'Foreign Bank, Insured',
'SB': 'Fed Savings Bank',
'SI': 'State Savings Bank',
'SL': 'State Savings & Loan',
'SM': 'State Bank, Fed Member',
'NC': 'Noninsured Commercial',
'NS': 'Noninsured Savings Bank',
'CU': 'Credit Union'
}
# Update BKCLASS with short descriptions
banks_filtered.loc[:, 'BKCLASS'] = banks_filtered['BKCLASS'].map(bank_class_dict)
# Calculate the mean ROA and ROE for each state and sort them for consistent order
mean_values = banks_filtered.groupby('STNAME')[['ROA', 'ROE']].mean().reset_index()
mean_values_sorted = mean_values.sort_values(by='ROA', ascending=False).head(40)
# Create a figure to hold the dashboard layout
dashboard_fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(20, 15))
# Average ROA by State Bar Plot
sns.barplot(y='STNAME', x='ROA', data=mean_values_sorted, ax=ax1)
ax1.set_title('Average ROA by State - Top 40')
# Average ROE by State Bar Plot
sns.barplot(y='STNAME', x='ROE', data=mean_values_sorted, ax=ax2)
ax2.set_title('Average ROE by State - Top 40')
# Create the barplot for Total Assets by State for the top 40 states
total_assets_by_state = banks_filtered.groupby('STNAME')['ASSET'].sum().reset_index()
top_states_by_assets = total_assets_by_state.sort_values(by='ASSET', ascending=False).head(40)
sns.barplot(data=top_states_by_assets, x='ASSET', y='STNAME', ax=ax3)
ax3.set_title('Total Assets by State - Top 40')
ax3.set_xscale('log')
# Bank Classification Types
sns.countplot(data=banks_filtered, y='BKCLASS', ax=ax4, order=banks_filtered['BKCLASS'].value_counts().index)
ax4.set_title('Bank Classification Types')
# Adjust layout spacing and show the dashboard
dashboard_fig.tight_layout(pad=3.0)
plt.show()
The dashboard provides a financial profile of the banking sector across different states and territories, focusing on profitability (ROA and ROE), size (Total Assets), and organizational structure (Bank Classification Types). The negative values in ROE across all states and territories suggest are uncommon and indicate losses or poor performance. The values range from around 0 to -500, with the Federated States of Micronesia having the least negative value and Florida the most negative.
# Aggregate the number of banks by state and class
bank_class_counts = banks.groupby(['STNAME', 'BKCLASS']).size().reset_index(name='Counts')
# Pivot the data to have bank classes as columns and states as rows
pivot_bank_class_counts = bank_class_counts.pivot(index='STNAME', columns='BKCLASS', values='Counts').fillna(0)
# Merge the geographic data with the pivoted bank class counts
merged_geo_data_classes = usa.merge(pivot_bank_class_counts, left_on='NAME', right_index=True, how='left').fillna(0)
# Choose a bank class to visualize
bank_class_to_plot = 'NM'
# Build a plot
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
merged_geo_data_classes.plot(column=bank_class_to_plot, cmap='Oranges', linewidth=0.8, ax=ax, edgecolor='0.8', legend=True)
area_threshold = merged_geo_data_classes['geometry'].area.quantile(0.10)
# Add state abbreviations without the area threshold condition for simplicity
for idx, row in merged_geo_data_classes.iterrows():
# Only add text for states larger than the area threshold
if row.geometry.area > area_threshold:
centroid = row.geometry.centroid
plt.text(centroid.x, centroid.y, row['STUSPS'], ha='center', va='center', fontsize=6)
ax.set_title('Concentration of commercial Banks (Fed Non-Members) by State')
ax.axis('off')
plt.show()
Provided illustration shows the distribution of commercial banks that are not members of the Federal Reserve System across the United States. Texas stands out with the highest concentration of this type of banks.