任务描述
本关为练习关卡,请按照编程要求完成任务,获取美国各州2010年的人口密度排名。
import pandas as pd
import numpy as np
def task3():
#********** Begin **********#
#读取三个csv文件
pop = pd.DataFrame(pd.read_csv("./step3/state-population.csv"))
ares = pd.DataFrame(pd.read_csv("./step3/state-areas.csv"))
abbrevs = pd.DataFrame(pd.read_csv("./step3/state-abbrevs.csv"))
# 合并pop和abbrevs并删除重复列
df1 = pd.merge(pop, abbrevs, how='outer', left_on='state/region', right_on='abbreviation')
df1 = df1.drop('abbreviation', axis=1)
# 填充对应的全称
df1.loc[df1['state/region'] == 'PR', 'state'] = 'Puerto Rico'
df1.loc[df1['state/region'] == 'USA', 'state'] = 'United States'
# 合并面积数据
df1 = pd.merge(df1, ares, on='state', how='left')
# 删掉这些缺失值
df1 = df1.dropna()
# 取year为2010年的数据,并将索引设为state列
df1 = df1.loc[df1['year'] == 2010]
df1.set_index('state')
# 计算人口密度
df1['population'] = df1['population'] / df1['area (sq. mi)']
# 对密度求和
a = df1.loc[df1['ages'] == 'under18']['population']
b = df1.loc[df1['ages'] == 'total']['population']
all = a.values + b.values
all = pd.DataFrame(all, index=df1.loc[df1['ages'] == 'under18']['state'])
# 对值进行排序
all = all.sort_values(0, ascending=False)
# 输出人口密度前5名和倒数5名
print("前5名:")
front = str(all.iloc[:5, 0])[:-24]
print(front)
print('dtype: float64')
print("后5名:")
back = str(all.iloc[-5:, 0])[:-24]
print(back)
print('dtype: float64')
# ********** End **********#