Read, Process, and Visualize Ambient Weather Station Data#
Motivation#
Within this notebook, we build the workflow to process data from the Ambient weather stations. We will also process the raw data, rename some fields, and write out the data to a netcdf file.
Imports#
from ambient_api.ambientapi import AmbientAPI
import numpy as np
from datetime import datetime
import time
import hvplot.pandas
import hvplot.xarray
from bokeh.models.formatters import DatetimeTickFormatter
import holoviews as hv
import pandas as pd
import panel as pn
import holoviews as hv
import xarray as xr
import os
from pathlib import Path
hv.extension('bokeh')
Setup Your Environment#
! export AMBIENT_ENDPOINT=https://api.ambientweather.net/v1
! export AMBIENT_API_KEY="b93528c5ad494a70bf4cf804b1fcf92df2ffddd530084ab7903fac42c235becd"
! export AMBIENT_APPLICATION_KEY="6f8a8d7706a744f28af53fca96ced84a0851269a01484942a97788c7c1d18992"
Call the API#
Now, we can call the API once we have our environment variables.
api = AmbientAPI()
devices = api.get_devices()
Setup Helper Functions and Metadata Fixes#
attrs_dict = {'tempf':{'standard_name': 'Temperature',
'units': 'degF'},
'tempinf':{'standard_name': 'Temperature',
'units': 'degF'},
'dewPoint': {'standard_name': 'Dewpoint Temperature',
'units': 'degF'},
'dewPointin': {'standard_name': 'Dewpoint Temperature',
'units': 'degF'}}
variable_mapping = {'tempf':'outdoor_temperature',
'tempinf':'indoor_temperature',
'dewPoint':'outdoor_dewpoint',
'dewPointin':'indoor_dewpoint',
'date':'time'}
def process_station(device, attrs=attrs_dict, variable_mapping=variable_mapping):
current_date = datetime.utcnow()
# Read in the station data
data = device.get_data(end_date = current_date)
meta = device.info
# Read into a pandas dataframe
df = pd.DataFrame(data)
# Format the times properly
df['date'] = pd.to_datetime(df.date).astype('datetime64[ns]')
# Sort the values and set the index to be the date
df = df.sort_values('date')
df = df.set_index('date')
ds = df.to_xarray()
# Add associated metadata
for variable in attrs.keys():
ds[variable].attrs = attrs[variable]
# Rename the variables
ds = ds.rename(variable_mapping)
# Reshape the data
ds = ds.expand_dims('station')
ds['station'] = [meta['name']]
ds['latitude'] = meta['coords']['coords']['lat']
ds['longitude'] = meta['coords']['coords']['lon']
ds = ds.sel(time=f"{current_date.year}-{current_date.month}-{current_date.day}")
return ds
Run the Functions + Read our Data#
Read our data, and combine the datasets at the end into a single dataset.
dsets = []
for device in devices:
try:
dsets.append(process_station(device))
except:
pass
time.sleep(5)
ds = xr.concat(dsets, dim='station')
---------------------------------------------------------------------------
StopIteration Traceback (most recent call last)
File ~/miniconda3/envs/instrument-cookbooks-dev/lib/python3.10/site-packages/xarray/core/concat.py:228, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
227 try:
--> 228 first_obj, objs = utils.peek_at(objs)
229 except StopIteration:
File ~/miniconda3/envs/instrument-cookbooks-dev/lib/python3.10/site-packages/xarray/core/utils.py:205, in peek_at(iterable)
204 gen = iter(iterable)
--> 205 peek = next(gen)
206 return peek, itertools.chain([peek], gen)
StopIteration:
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
Cell In[6], line 8
6 pass
7 time.sleep(5)
----> 8 ds = xr.concat(dsets, dim='station')
File ~/miniconda3/envs/instrument-cookbooks-dev/lib/python3.10/site-packages/xarray/core/concat.py:230, in concat(objs, dim, data_vars, coords, compat, positions, fill_value, join, combine_attrs)
228 first_obj, objs = utils.peek_at(objs)
229 except StopIteration:
--> 230 raise ValueError("must supply at least one object to concatenate")
232 if compat not in _VALID_COMPAT:
233 raise ValueError(
234 f"compat={compat!r} invalid: must be 'broadcast_equals', 'equals', 'identical', 'no_conflicts' or 'override'"
235 )
ValueError: must supply at least one object to concatenate
Write out the Data File#
end_time = ds.isel(time=-1)
time_label = pd.to_datetime(end_time.time.values).strftime('%Y/%m/%d/ambient.a1.%Y%m%d.nc')
full_file = f'../../data/surface-meteorology/{time_label}'
full_path = Path(full_file)
if not os.path.exists(full_path.parent):
os.makedirs(full_path.parent)
ds.to_netcdf(full_file)
Visualize the Variables#
formatter = DatetimeTickFormatter(hours="%d %b %Y \n %H:%M UTC")
variables = ['outdoor_temperature', 'outdoor_dewpoint', 'hourlyrainin', 'solarradiation']
panels = []
for variable in variables:
panels.append(ds[variable].hvplot.line(x='time', by='station', xformatter=formatter))
hv.Layout(panels).cols(1)