Skip to content

Instantly share code, notes, and snippets.

@darothen
Created July 25, 2022 03:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darothen/5380e223ae5bc894006a5b6ed5a27cbb to your computer and use it in GitHub Desktop.
Save darothen/5380e223ae5bc894006a5b6ed5a27cbb to your computer and use it in GitHub Desktop.
Strawman for GRIB2 pangeo-forge recipe
from pangeo_forge_recipes.patterns import pattern_from_file_sequence, FilePattern, ConcatDim
from pangeo_forge_recipes.recipes import HDFReferenceRecipe
from pathlib import Path
from dask.diagnostics import ProgressBar
# NOTE(darothen): use PR fsspec/kerchunk#204
# from kerchunk.grib2 import scan_grib
import intake
## Local
# NOTE(darothen): Download some recent HRRR model output to a local directory, or
# use the remote block (commented out) below to access it from S3. Note that you may
# need to modify `storage_options` if accessing the remote data.
# The following curl command can help automate downloading the HRRR data, modify it for
# a more recent YYYYMMDD (only two days are archived on NOMADS):
# $ curl -O "https://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.20220721/conus/hrrr.t00z.wrfsfcf[00-18].grib2"
data_path = Path("./hrrr_data/")
fn_pths = ["file://"+str(pth) for pth in sorted(data_path.glob("*.grib2"))]
fn_pths = fn_pths[:5]
pattern = pattern_from_file_sequence(fn_pths, 'step', file_type='grib')
## Remote
# _tpl = "s3://noaa-hrrr-bdp-pds/hrrr.20220722/conus/hrrr.t22z.wrfsfcf{step:02d}.grib2"
# _tpl = "https://noaa-hrrr-bdp-pds.s3.amazonaws.com/hrrr.20220722/conus/hrrr.t22z.wrfsfcf{step:02d}.grib2"
# def format_function(step: int) -> str:
# return _tpl.format(step=step)
# pattern = FilePattern(
# format_function, ConcatDim("step", [0, 1, 2, 3]),
# file_type='grib',
# )
storage_options = {
'anon': True,
'default_cache_type': 'readahead',
}
_common = ['time', 'step', 'latitude', 'longitude', 'valid_time']
storage_options['common_coords'] = _common
_filter = {
## Option 1) Grab all the variables defined on the 850 mb surface
# 'typeOfLevel': 'isobaricInhPa',
# 'level': 850,
# 'shortName': 't'
## Option 2) Down-select solely to surface (skin) temperature
'typeOfLevel': 'surface',
'shortName': 't'
}
storage_options['filter_by_keys'] = _filter
recipe = HDFReferenceRecipe(pattern, src_storage_options=storage_options)
# recipe = recipe.copy_pruned()
# recipe_func = recipe.to_function()
# recipe_func()
delayed = recipe.to_dask()
with ProgressBar():
delayed.compute()
cat_url = f"{recipe.target}/reference.yaml"
cat = intake.open_catalog(cat_url)
cat
ds = cat.data.to_dask()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment