%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
mpl.rcParams['figure.dpi']= 900
%config InlineBackend.figure_format = 'retina'
Import Data and Clean
The data used for this visual is taken from The National Centers for Environmental Information (NCEI) Daily Global Historical Climatology Network (GHCN-Daily). The GHCN-Daily consists of daily climate records from thousands of weather stations from around the world.
- id : station identification code
- date : date in YYYY-MM-DD format (e.g. 2012-01-24 = January 24, 2012)
- element : indicator of element type
- TMAX : Maximum temperature (tenths of degrees C)
- TMIN : Minimum temperature (tenths of degrees C)
- value : data value for element (tenths of degrees C)
filename = 'fb441e62df2d58994928907a91895ec62c2c42e6cd075c2700843b89.csv'
w = (pd.read_csv('data/' + filename)
.rename(columns={'Data_Value':'Temp'}))
print(w.columns)
print(w.shape)
print(w['Date'].min())
print(w['Date'].max())
Index(['ID', 'Date', 'Element', 'Temp'], dtype='object')
(165085, 4)
2005-01-01
2015-12-31
Drop Leap Days and convert temperature to degrees Celsius, then to degrees Fahrenheight.
print(w.shape)
w = w[~(w['Date'].str.contains('02-29'))].copy()
print(w.shape)
w['Temp'] = (w['Temp'] * 0.1 * (9./5.)) + 32.
w.head()
(165085, 4)
(165002, 4)
|
ID |
Date |
Element |
Temp |
0 |
USW00094889 |
2014-11-12 |
TMAX |
35.96 |
1 |
USC00208972 |
2009-04-29 |
TMIN |
42.08 |
2 |
USC00200032 |
2008-05-26 |
TMAX |
82.04 |
3 |
USC00205563 |
2005-11-11 |
TMAX |
57.02 |
4 |
USC00200230 |
2014-02-27 |
TMAX |
12.92 |
w_15 = w[w['Date'].str.contains('2015-')].copy()
print(w_15['Date'].min())
print(w_15['Date'].max())
2015-01-01
2015-12-31
w_05_14 = w[~(w['Date'].str.contains('2015-'))].copy()
print(w_05_14['Date'].min())
print(w_05_14['Date'].max())
2005-01-01
2014-12-31
w_05_14['Date'] = w_05_14['Date'].str[5:]
w_15['Date'] = w_15['Date'].str[5:]
w_05_14.head()
|
ID |
Date |
Element |
Temp |
0 |
USW00094889 |
11-12 |
TMAX |
35.96 |
1 |
USC00208972 |
04-29 |
TMIN |
42.08 |
2 |
USC00200032 |
05-26 |
TMAX |
82.04 |
3 |
USC00205563 |
11-11 |
TMAX |
57.02 |
4 |
USC00200230 |
02-27 |
TMAX |
12.92 |
mi = (pd.DataFrame(w_05_14[w_05_14['Element']=='TMIN'][['Date','Temp']]
.groupby('Date')
.min())
.reset_index()
.sort_values('Date')
.rename(columns={'Temp':"05-14 Min"}))
print(mi.shape)
ma = (pd.DataFrame(w_05_14[w_05_14['Element']=='TMAX'][['Date','Temp']]
.groupby('Date')
.max())
.reset_index()
.sort_values('Date')
.rename(columns={'Temp':"05-14 Max"}))
print(ma.shape)
t_05_14 = pd.merge(mi, ma,
on='Date')
t_05_14.head()
(365, 2)
(365, 2)
|
Date |
05-14 Min |
05-14 Max |
0 |
01-01 |
3.20 |
60.08 |
1 |
01-02 |
-16.06 |
57.02 |
2 |
01-03 |
-16.06 |
55.94 |
3 |
01-04 |
-14.98 |
51.08 |
4 |
01-05 |
5.00 |
55.04 |
mi_15 = (pd.DataFrame(w_15[w_15['Element']=='TMIN'][['Date','Temp']]
.groupby('Date')
.min())
.reset_index()
.sort_values('Date')
.rename(columns={'Temp':"15 Min"}))
print(mi_15.shape)
ma_15 = (pd.DataFrame(w_15[w_15['Element']=='TMAX'][['Date','Temp']]
.groupby('Date')
.max())
.reset_index()
.sort_values('Date')
.rename(columns={'Temp':"15 Max"}))
print(ma_15.shape)
t_15 = pd.merge(mi_15, ma_15,
on='Date')
t_15.head()
(365, 2)
(365, 2)
|
Date |
15 Min |
15 Max |
0 |
01-01 |
8.06 |
33.98 |
1 |
01-02 |
10.04 |
39.02 |
2 |
01-03 |
19.94 |
39.02 |
3 |
01-04 |
16.16 |
39.92 |
4 |
01-05 |
4.10 |
37.04 |
t = pd.merge(t_05_14, t_15,
on='Date')
print(t.shape)
t.head()
(365, 5)
|
Date |
05-14 Min |
05-14 Max |
15 Min |
15 Max |
0 |
01-01 |
3.20 |
60.08 |
8.06 |
33.98 |
1 |
01-02 |
-16.06 |
57.02 |
10.04 |
39.02 |
2 |
01-03 |
-16.06 |
55.94 |
19.94 |
39.02 |
3 |
01-04 |
-14.98 |
51.08 |
16.16 |
39.92 |
4 |
01-05 |
5.00 |
55.04 |
4.10 |
37.04 |
t.loc[t['15 Min'] >= t['05-14 Min'], '15 Min'] = np.nan
t.loc[t['15 Max'] <= t['05-14 Max'], '15 Max'] = np.nan
t[(t['15 Min'].isna()==False)|
(t['15 Max'].isna()==False)].fillna('').head()
|
Date |
05-14 Min |
05-14 Max |
15 Min |
15 Max |
4 |
01-05 |
5.00 |
55.04 |
4.1 |
|
10 |
01-11 |
-0.94 |
60.08 |
-4 |
|
33 |
02-03 |
-9.76 |
48.02 |
-10.84 |
|
39 |
02-09 |
-5.80 |
46.04 |
|
46.94 |
44 |
02-14 |
-7.06 |
51.08 |
-11.02 |
|
x_label_dates = ['01-01',
'04-01',
'07-01',
'10-01',
'12-31']
x_label_labels = ['1/1',
'4/1',
'7/1',
'10/1',
'12/31']
x_label_indexes = list(t[t['Date'].isin(x_label_dates)].index)
t[t['Date'].isin(x_label_dates)]
|
Date |
05-14 Min |
05-14 Max |
15 Min |
15 Max |
0 |
01-01 |
3.20 |
60.08 |
NaN |
NaN |
90 |
04-01 |
17.06 |
80.96 |
NaN |
NaN |
181 |
07-01 |
42.98 |
98.06 |
NaN |
NaN |
273 |
10-01 |
26.24 |
80.96 |
NaN |
NaN |
364 |
12-31 |
5.00 |
57.02 |
NaN |
NaN |
def create_default_plot():
plt.figure()
plt.plot(t['Date'],
t['05-14 Min'],
label="Low '05-'14")
plt.plot(t['Date'],
t['05-14 Max'],
label="High '05-'14")
plt.gca().fill_between(t['Date'],
t['05-14 Min'],
t['05-14 Max'])
plt.scatter(t['Date'],
t['15 Min'],
label="Low '15")
plt.scatter(t['Date'],
t['15 Max'],
label="High '15")
plt.legend()
plt.title('Days in 2015 that Set a Record High or Low for 2005-2015')
plt.xlabel('Date')
plt.ylabel('Temperature ($^\circ$F)');
def create_best_practices_plot():
plt.figure()
plt.plot(t.index,
t['05-14 Min'],
color='blue',
linewidth=0.1,
zorder=1,
label="Low '05-'14")
plt.plot(t.index,
t['05-14 Max'],
color='red',
linewidth=0.1,
zorder=1,
label="High '05-'14")
plt.gca().fill_between(t.index,
t['05-14 Min'],
t['05-14 Max'],
color='#DFDFDF',
zorder=1)
plt.scatter(t.index,
t['15 Min'],
color='blue',
s=3,
zorder=2,
label="Low '15")
plt.scatter(t.index,
t['15 Max'],
color='red',
s=3,
zorder=2,
label="High '15")
plt.title('Days in 2015 that Set a Record High or Low for 2005-2015',
fontsize=8,
alpha=0.8)
plt.xlabel('Date',
fontsize=8,
alpha=0.8)
plt.ylabel('Temperature ($^\circ$F)',
fontsize=8,
alpha=0.8)
plt.xticks(x_label_indexes,
x_label_labels,
fontsize=6,
alpha=0.8)
plt.yticks(fontsize=6,
alpha=0.8)
plt.gca().tick_params(direction='out',
length=3,
width=0.25)
for spine in plt.gca().spines.values():
spine.set_visible(False);
handles, labels = plt.gca().get_legend_handles_labels()
order = [3,1,0,2]
plt.legend([handles[idx] for idx in order],
[labels[idx] for idx in order],
fontsize=6,
frameon=False)
plt.savefig('weather.png',
bbox_inches='tight')
Matplotlib Default Plot
Plot Made per Best Practices
create_best_practices_plot()
Best Practices List
The second graph demonstrates several important improvements to bring it inline with visualization best practices