下面是我要捕获特定城市和年份的天气数据的功能,但是我想知道如何修改此功能,以便一次可以捕获多个城市和一年以上的数据并将其附加到同一城市数据集?
import pandas as pd
def calgary_weather(city, year):
d = dict(calgary = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27211&Year="+year+"&Month=5&Day=1&timeframe=2&submit=Download+Data",
edmonton = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27793&Year="+year+"&Month=5&Day=1&timeframe=2&submit=Download+Data")
url = d[city]
data = pd.read_csv(url)
return data
calgary_weather('edmonton', '2017')
import pandas as pd
def get_weather_data(cities: (list, str), years: (list, int)) -> pd.DataFrame:
stations = {'calgary': 27211, 'edmonton': 27793}
df_list = list()
for city in cities:
for year in years:
url = f'https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID={stations[city]}&Year={year}&Month=5&Day=1&timeframe=2&submit=Download+Data'
df_list.append(pd.read_csv(url))
return pd.concat(df for df in df_list)
cities = ['calgary', 'edmonton']
years = [2017, 2018, 2019]
df = get_weather_data(cities=cities, years=years)
print(df.iloc[:3, :5].to_markdown())
| | Longitude (x) | Latitude (y) | Station Name | Climate ID | Date/Time |
|---:|----------------:|---------------:|:-----------------|-------------:|:------------|
| 0 | -114 | 51.11 | CALGARY INT'L CS | 3031094 | 2017-01-01 |
| 1 | -114 | 51.11 | CALGARY INT'L CS | 3031094 | 2017-01-02 |
| 2 | -114 | 51.11 | CALGARY INT'L CS | 3031094 | 2017-01-03 |
print(df.iloc[-3:, :5].to_markdown())
| | Longitude (x) | Latitude (y) | Station Name | Climate ID | Date/Time |
|----:|----------------:|---------------:|:--------------------------|-------------:|:------------|
| 362 | -113.61 | 53.31 | EDMONTON INTERNATIONAL CS | 3012206 | 2019-12-29 |
| 363 | -113.61 | 53.31 | EDMONTON INTERNATIONAL CS | 3012206 | 2019-12-30 |
| 364 | -113.61 | 53.31 | EDMONTON INTERNATIONAL CS | 3012206 | 2019-12-31 |
def weather(*args):
years = []
cities = []
for arg in args:
if isinstance(arg, int):
years.append(arg)
elif isinstance(arg, str):
cities.append(arg)
我更喜欢只传递多个参数而不是输入列表,因为如果您要创建一个用户友好的程序,则无论如何都必须将其每个输入都设为一个列表。我认为这可能是一种很好的处理方式,但是您也可以只将原始代码与for循环一起使用,并使用输入列表代替city
和year
。
您可以执行以下代码。总而言之,城市将“共享”岁月。我不知道df.append
的效率如何,但是它有效。请注意我如何更改字典中的字符串。
import pandas as pd
def calgary_weather1(cities: list, years: list) -> pd.DataFrame:
d = dict(calgary = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27211&Year={year}&Month=5&Day=1&timeframe=2&submit=Download+Data",
edmonton = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27793&Year={year}&Month=5&Day=1&timeframe=2&submit=Download+Data")
df = pd.DataFrame()
for city in cities:
for year in years:
url = d[city].format(year=year)
df = df.append(pd.read_csv(url))
return df
calgary_weather(['edmonton','calgary'], ['2016','2017','2018'])
但是,如果您希望能够更灵活地选择年份:
def calgary_weather2(cities: list, years: list) -> pd.DataFrame:
d = dict(calgary = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27211&Year={year}&Month=5&Day=1&timeframe=2&submit=Download+Data",
edmonton = "https://climate.weather.gc.ca/climate_data/bulk_data_e.html?format=csv&stationID=27793&Year={year}&Month=5&Day=1&timeframe=2&submit=Download+Data")
df = pd.DataFrame()
for city, cityyears in zip(cities, years):
for year in cityyears:
url = d[city].format(year=year)
df = df.append(pd.read_csv(url))
return df
calgary_weather2(['edmonton','calgary'], [['2016','2017','2018'],['2012','2013']])