import geopandas as gpd
# Provide the path to your GIS file (e.g., .shp file)
file_path = 'calenviroscreen40shpf2021shp/CES4 Final Shapefile.shp'
# Load the shapefile
gdf = gpd.read_file(file_path)
# Display the first few rows of the GeoDataFrame
gdf.head()# map the data
gdf.plot()# View the first few rows to get an overview
gdf.head()# Check the columns in the dataset
gdf.columns# Check for missing values
gdf.isnull().sum()# Summary statistics for numerical columns
gdf.describe()import matplotlib.pyplot as plt
# Create a figure with subplots to compare pollution burden and population vulnerability
fig, ax = plt.subplots(1, 2, figsize=(15, 7))
# Plot Pollution Burden on the left
gdf.plot(column='PolBurdSc', ax=ax[0], legend=True, cmap='OrRd')
ax[0].set_title('Pollution Burden Score')
# Plot Population Vulnerability on the right (example with 'Poverty' column)
gdf.plot(column='Poverty', ax=ax[1], legend=True, cmap='PuBu')
ax[1].set_title('Population Vulnerability (Poverty)')
plt.show()# Sort the data by Cumulative Impact Score and take the top 10
top_10 = gdf.nlargest(10, 'CIscore')
# Create a bar plot
top_10.plot(kind='bar', x='Tract', y='CIscore', legend=False, color='red')
plt.title('Top 10 Most Impacted Communities')
plt.ylabel('Cumulative Impact Score')
plt.xticks(rotation=45, ha='right')
plt.show()# Scatter plot comparing Pollution Burden Score to Poverty Score
gdf.plot.scatter(x='PolBurdSc', y='Poverty', alpha=0.5, color='blue')
plt.title('Pollution Burden vs Poverty')
plt.xlabel('Pollution Burden Score')
plt.ylabel('Poverty Score')
plt.show()# Filter for a specific region, e.g., Los Angeles County
la_county = gdf[gdf['County'] == 'Los Angeles']
# Plot the Air Quality Score for LA County
la_county.plot(column='PM2_5', legend=True, cmap='coolwarm')
plt.title('PM2.5 Air Quality in Los Angeles County')
plt.show()# Select a few columns to compare
hazards = gdf[['Ozone', 'PM2_5', 'DieselPM']]
# Plot a multi-line chart
hazards.plot(figsize=(10, 6))
plt.title('Comparison of Ozone, PM2.5, and Diesel Particulate Matter Across Regions')
plt.xlabel('Region (Tracts or ZIP)')
plt.ylabel('Concentration')
plt.legend(['Ozone', 'PM2.5', 'Diesel PM'])
plt.show()import seaborn as sns
# Select columns for correlation matrix
corr_columns = ['Poverty', 'Unempl', 'HousBurd', 'PolBurdSc', 'Asthma', 'Cardiovas']
# Create a correlation matrix
corr_matrix = gdf[corr_columns].corr()
# Plot the heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
plt.title('Correlation Between Population Characteristics and Pollution Burden')
plt.show()# Set up a stacked bar plot for racial composition by ZIP code
gdf[['ZIP', 'Hispanic', 'White', 'AfricanAm', 'AAPI', 'NativeAm']].set_index('ZIP').plot(
kind='bar', stacked=True, figsize=(10, 6), colormap='viridis')
plt.title('Racial Composition by ZIP Code')
plt.ylabel('Proportion of Population')
plt.legend(title='Racial Groups')
plt.xticks(rotation=90)
plt.show()# Plot a statewide map of cumulative impact scores
gdf.plot(column='CIscore', legend=True)
plt.title('Cumulative Impact Scores Across California')
plt.show()