import pandas as pd
CES4 = pd.read_excel("calenviroscreen40resultsdatadictionary_F_2021.xlsx", sheet_name='CES4.0FINAL_results')CES4Loading...
CES4.columnsIndex(['Census Tract', 'Total Population', 'California County', 'ZIP',
'Approximate Location', 'Longitude', 'Latitude', 'CES 4.0 Score',
'CES 4.0 Percentile', 'CES 4.0 Percentile Range', 'Ozone', 'Ozone Pctl',
'PM2.5', 'PM2.5 Pctl', 'Diesel PM', 'Diesel PM Pctl', 'Drinking Water',
'Drinking Water Pctl', 'Lead', 'Lead Pctl', 'Pesticides',
'Pesticides Pctl', 'Tox. Release', 'Tox. Release Pctl', 'Traffic',
'Traffic Pctl', 'Cleanup Sites', 'Cleanup Sites Pctl',
'Groundwater Threats', 'Groundwater Threats Pctl', 'Haz. Waste',
'Haz. Waste Pctl', 'Imp. Water Bodies', 'Imp. Water Bodies Pctl',
'Solid Waste', 'Solid Waste Pctl', 'Pollution Burden',
'Pollution Burden Score', 'Pollution Burden Pctl', 'Asthma',
'Asthma Pctl', 'Low Birth Weight', 'Low Birth Weight Pctl',
'Cardiovascular Disease', 'Cardiovascular Disease Pctl', 'Education',
'Education Pctl', 'Linguistic Isolation', 'Linguistic Isolation Pctl',
'Poverty', 'Poverty Pctl', 'Unemployment', 'Unemployment Pctl',
'Housing Burden', 'Housing Burden Pctl', 'Pop. Char. ',
'Pop. Char. Score', 'Pop. Char. Pctl'],
dtype='object')CES4.shape(8035, 58)#How many missging values are there in each column?
CES4.isnull().sum()
Census Tract 0
Total Population 0
California County 0
ZIP 0
Approximate Location 0
Longitude 0
Latitude 0
CES 4.0 Score 103
CES 4.0 Percentile 103
CES 4.0 Percentile Range 103
Ozone 0
Ozone Pctl 0
PM2.5 0
PM2.5 Pctl 0
Diesel PM 0
Diesel PM Pctl 0
Drinking Water 28
Drinking Water Pctl 28
Lead 96
Lead Pctl 96
Pesticides 0
Pesticides Pctl 0
Tox. Release 0
Tox. Release Pctl 0
Traffic 35
Traffic Pctl 35
Cleanup Sites 0
Cleanup Sites Pctl 0
Groundwater Threats 0
Groundwater Threats Pctl 0
Haz. Waste 0
Haz. Waste Pctl 0
Imp. Water Bodies 0
Imp. Water Bodies Pctl 0
Solid Waste 0
Solid Waste Pctl 0
Pollution Burden 0
Pollution Burden Score 0
Pollution Burden Pctl 0
Asthma 11
Asthma Pctl 11
Low Birth Weight 227
Low Birth Weight Pctl 227
Cardiovascular Disease 11
Cardiovascular Disease Pctl 11
Education 103
Education Pctl 103
Linguistic Isolation 320
Linguistic Isolation Pctl 320
Poverty 75
Poverty Pctl 75
Unemployment 335
Unemployment Pctl 335
Housing Burden 145
Housing Burden Pctl 145
Pop. Char. 103
Pop. Char. Score 103
Pop. Char. Pctl 103
dtype: int64# count the number of counties in the data
CES4['California County'].nunique()58# count the number of unique census tracts in the data
CES4['Census Tract'].nunique()8035# count the number of ZIP codes in the data
CES4['ZIP'].nunique()1355# count the number of cities in the data
CES4['Approximate Location'].nunique()785# how many censustracts are in each city
CES4['Approximate Location'].value_counts()Los Angeles 996
San Diego 280
San Francisco 195
San Jose 188
Oakland 113
...
Marina del Rey 1
Yountville 1
Lakeport 1
Highgrove 1
Soledad 1
Name: Approximate Location, Length: 785, dtype: int64