# I'll be using the folium package to render the data into a map in Jupyter.
import folium
import pandas as pd
# get the location information for this dataset
= pd.read_csv('data/BinSize_d400.csv')
df = df[df['hash'] == 'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89']
station_locations_by_hash
# get longitude and lattitude to plot
= station_locations_by_hash['LONGITUDE'].tolist()
lons = station_locations_by_hash['LATITUDE'].tolist()
lats
# plot on a beautiful folium map
= folium.Map(location = [lats[0], lons[0]], height = 500, zoom_start = 9)
my_map for lat, lon in zip(lats, lons):
folium.Marker([lat, lon]).add_to(my_map)
# render map in Jupyter
display(my_map)
Visualize Michigan’s temperature records
Goal
Plot line graphs of the min and max temperatures for the years 2005 through 2014 highlighting the daily 2015 temperatures that exceeded those values.
Dataset
The data comes from a subset of The National Centers for Environmental Information (NCEI) Global Historical Climatology Network daily (GHCNd) (GHCN-Daily), which includes daily climate records from thousands of land surface stations across the globe.
Here I use data from the Ann Arbor Michigan location.
Each row in this datafile corresponds to a single observation from a weather station, and has the following variables:
- id : station identification code
- date : date in YYYY-MM-DD format (e.g. 2012-01-24 = January 24, 2012)
- element : indicator of element type
- TMAX : Maximum temperature (tenths of degrees C)
- TMIN : Minimum temperature (tenths of degrees C)
- value : data value for element (tenths of degrees C)
Step 1
Load the dataset and transform the data into Celsius (refer to documentation) then extract all of the rows which have minimum or maximum temperatures.
import pandas as pd
= pd.read_csv('data/temperatures.csv')
df df.head()
ID | Date | Element | Data_Value | |
---|---|---|---|---|
0 | USW00094889 | 2014-11-12 | TMAX | 22 |
1 | USC00208972 | 2009-04-29 | TMIN | 56 |
2 | USC00200032 | 2008-05-26 | TMAX | 278 |
3 | USC00205563 | 2005-11-11 | TMAX | 139 |
4 | USC00200230 | 2014-02-27 | TMAX | -106 |
# In this code cell, transform the Data_Value column
'Data_Value_Tenths'] = df['Data_Value'] / 10
df[
## drop leap day
'Date'] = pd.to_datetime(df['Date'])
df[= df[~((df['Date'].dt.month == 2) & (df['Date'].dt.day == 29))]
df
= df[df['Element'] == 'TMAX']
df_max = df[df['Element'] == 'TMIN']
df_min
print(df.shape, df_min.shape, df_max.shape)
(165002, 5) (81982, 5) (83020, 5)
Step 2
In order to visualize the data we would plot the min and max data for each day of the year between the years 2005 and 2014 across all weather stations. But we also need to find out when the min or max temperature in 2015 falls below the min or rises above the max for the previous decade.
# create a DataFrame of maximum temperature by date
= df_max.groupby('Date')['Data_Value_Tenths'].max().reset_index()
max_temp_date
# create a DataFrame of minimum temperatures by date
= df_min.groupby('Date')['Data_Value_Tenths'].min().reset_index()
min_temp_date
print(max_temp_date.shape, min_temp_date.shape)
(4015, 2) (4015, 2)
Step 3
Now that we have grouped the daily max and min temperatures for each day of the years 2005 through 2015, we can separate out the data for 2015.
import numpy as np
# calculate the minimum and maximum values for the day of the year for 2005 through 2014
= max_temp_date[max_temp_date['Date'].dt.year < 2015]
max_temp_0514 = max_temp_0514.groupby(max_temp_0514['Date'].dt.strftime('%m-%d')).agg({'Data_Value_Tenths': 'max'})
max_temp_0514 ={'Data_Value_Tenths': 'Max_Temp_0514'}, inplace=True)
max_temp_0514.rename(columns
= min_temp_date[min_temp_date['Date'].dt.year < 2015]
min_temp_0514 = min_temp_0514.groupby(min_temp_0514['Date'].dt.strftime('%m-%d')).agg({'Data_Value_Tenths': 'min'})
min_temp_0514 ={'Data_Value_Tenths': 'Min_Temp_0514'}, inplace=True)
min_temp_0514.rename(columns
# calculate the minimum and maximum values for the years 2015
= max_temp_date[max_temp_date['Date'].dt.year == 2015]
max_temp_2015 = max_temp_2015.groupby(max_temp_2015['Date'].dt.strftime('%m-%d')).agg({'Data_Value_Tenths': 'max'})
max_temp_2015 ={'Data_Value_Tenths': 'Max_Temp_2015'}, inplace=True)
max_temp_2015.rename(columns
= min_temp_date[min_temp_date['Date'].dt.year == 2015]
min_temp_2015 = min_temp_2015.groupby(min_temp_2015['Date'].dt.strftime('%m-%d')).agg({'Data_Value_Tenths': 'min'})
min_temp_2015 ={'Data_Value_Tenths': 'Min_Temp_2015'}, inplace=True)
min_temp_2015.rename(columns
## join df and find broken records
= pd.merge(max_temp_0514, min_temp_0514, on='Date').merge(max_temp_2015, on='Date').merge(min_temp_2015, on='Date')
df_temp 'Broke_Record_Max'] = df_temp['Max_Temp_2015'] > df_temp['Max_Temp_0514']
df_temp['Broke_Record_Min'] = df_temp['Min_Temp_2015'] < df_temp['Min_Temp_0514']
df_temp[
print(max_temp_0514.shape, min_temp_0514.shape, max_temp_2015.shape, min_temp_2015.shape, df_temp.shape)
(365, 1) (365, 1) (365, 1) (365, 1) (365, 6)
Step 4
Now it’s time to plot!
import matplotlib.pyplot as plt
from calendar import month_abbr
# put your plotting code here!
= plt.subplots(figsize=(12, 6))
fig, ax
'Max_Temp_0514'].values, label='Max Temp (2005-2014)', linewidth=1, alpha = 0.7, c='firebrick')
plt.plot(df_temp['Min_Temp_0514'].values, label='Min Temp (2005-2014)', linewidth=1, alpha = 0.7, c='royalblue')
plt.plot(df_temp[
range(df_temp.shape[0]), df_temp['Max_Temp_0514'], df_temp['Min_Temp_0514'], facecolor='gray', alpha=0.3)
plt.fill_between(
'Broke_Record_Max'] == True),
plt.scatter(np.where(df_temp['Broke_Record_Max'] == True]['Max_Temp_2015'].values,
df_temp[df_temp[=15, color='red', label='High Temp Broken (2015)', marker='^')
s'Broke_Record_Min'] == True),
plt.scatter(np.where(df_temp['Broke_Record_Min'] == True]['Min_Temp_2015'].values,
df_temp[df_temp[=15, color='indigo', label='Low Temp Broken (2015)', marker='v')
s
= 'upper right', fontsize=8)
plt.legend(loc
0, 365, num = 12), list(month_abbr)[1:], size=10)
plt.xticks(np.linspace(=10)
plt.yticks(size'Months', size = 12)
plt.xlabel('Temperature (Celsius)', size = 12)
plt.ylabel('Temperature Patterns', size = 14)
plt.title(
'right', 'top']].set_visible(False)
ax.spines[[# ax.spines[['bottom', 'left']].set_alpha(0.3)
# ax.tick_params(axis='x', color='gray')
# ax.tick_params(axis='y', color='gray')
# plt.savefig('temperature_plot.png')
plt.show()
All in one
import pandas as pd
import numpy as np
from calendar import month_abbr
## load data
= pd.read_csv('data/temperatures.csv')
df
## isolate year,month,day
'Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.strftime('%b') ## month short form
df['Day'] = df['Date'].dt.day
df[
## sort month alphabethically
= list(month_abbr)[1:]
month_order 'Month'] = pd.Categorical(df['Month'], categories=month_order, ordered=True)
df[
## drop leap day
= df[~((df['Month'] == 'Feb') & (df['Day'] == 29))]
df
## transform temp
'Data_Value'] = df['Data_Value'] / 10
df[
## group temps
= df[(df['Element'] == 'TMAX') & (df['Year'] < 2015)].groupby(['Month','Day'], observed=True).agg({'Data_Value': 'max'})
max_temp_0514 = df[(df['Element'] == 'TMIN') & (df['Year'] < 2015)].groupby(['Month','Day'], observed=True).agg({'Data_Value': 'min'})
min_temp_0514 = df[(df['Element'] == 'TMAX') & (df['Year'] == 2015)].groupby(['Month','Day'], observed=True).agg({'Data_Value': 'max'})
max_temp_2015 = df[(df['Element'] == 'TMIN') & (df['Year'] == 2015)].groupby(['Month','Day'], observed=True).agg({'Data_Value': 'min'})
min_temp_2015
## rename
={'Data_Value': 'Max_Temp_0514'}, inplace=True)
max_temp_0514.rename(columns={'Data_Value': 'Min_Temp_0514'}, inplace=True)
min_temp_0514.rename(columns={'Data_Value': 'Max_Temp_2015'}, inplace=True)
max_temp_2015.rename(columns={'Data_Value': 'Min_Temp_2015'}, inplace=True)
min_temp_2015.rename(columns
## join df and find broken records
= pd.merge(max_temp_0514, min_temp_0514, on=['Month', 'Day'])\
df_temp =['Month', 'Day'])\
.merge(max_temp_2015, on=['Month', 'Day'])
.merge(min_temp_2015, on'Broke_Record_Max'] = df_temp['Max_Temp_2015'] > df_temp['Max_Temp_0514']
df_temp['Broke_Record_Min'] = df_temp['Min_Temp_2015'] < df_temp['Min_Temp_0514']
df_temp[
## drop 29/30/31 in Feb because of merge
=True)
df_temp.dropna(inplace
# plotting
= plt.subplots(figsize=(12, 6))
fig, ax
'Max_Temp_0514'].values, label='Max Temp (2005-2014)', linewidth=1, alpha = 0.7, c='firebrick')
plt.plot(df_temp['Min_Temp_0514'].values, label='Min Temp (2005-2014)', linewidth=1, alpha = 0.7, c='royalblue')
plt.plot(df_temp[
range(df_temp.shape[0]), df_temp['Max_Temp_0514'], df_temp['Min_Temp_0514'], facecolor='gray', alpha=0.3)
plt.fill_between(
'Broke_Record_Max'] == True),
plt.scatter(np.where(df_temp['Broke_Record_Max'] == True]['Max_Temp_2015'].values,
df_temp[df_temp[=15, color='red', label='High Temp Broken (2015)', marker='^')
s'Broke_Record_Min'] == True),
plt.scatter(np.where(df_temp['Broke_Record_Min'] == True]['Min_Temp_2015'].values,
df_temp[df_temp[=15, color='indigo', label='Low Temp Broken (2015)', marker='v')
s
= 'best', fontsize=10)
plt.legend(loc
0, 365, num = 12), list(month_abbr)[1:], size=10)
plt.xticks(np.linspace(=10)
plt.yticks(size'Months', size = 12)
plt.xlabel('Temperature (Celsius)', size = 12)
plt.ylabel('Temperature Patterns', size = 14)
plt.title(
'right', 'top']].set_visible(False)
ax.spines[[# ax.spines[['bottom', 'left']].set_alpha(0.3)
# ax.tick_params(axis='x', color='gray')
# ax.tick_params(axis='y', color='gray')
plt.show()