We can connect to the Lynker AWS s3 holdings to extract the refactored and aggregated catchments upstream of USGS gage ID: 01022500
.
cats <- sf::read_sf("/vsis3/formulations-dev/CAMELS20/camels_01022500_2677104/spatial/hydrofabric.gpkg", "catchments")
Using the VRT produced for hydrofabric work and hosted via Github Pages, we can extract the needed 30m DEM (3DEP 1 - 1 arcsecond).
elev <- rast("/vsicurl/https://mikejohnson51.github.io/opendap.catalog/ned_USGS_1.vrt")
dem <- crop(elev, project(vect(cats), crs(elev)))
Using the in-memory elevation data we can write a quick function to compute and save a TWI grid:
build_twi <- function(dem, outfile = NULL) {
writeRaster(dem, "dem.tif", overwrite = TRUE)
gdal_utils("warp",
source = "dem.tif",
destination = "dem_proj.tif",
options = c(
"-of", "GTiff",
"-t_srs", "EPSG:5070",
"-r", "bilinear"
)
)
wbt_breach_depressions("dem_proj.tif", "dem_corr.tif")
wbt_md_inf_flow_accumulation("dem_corr.tif", "sca.tif")
wbt_slope("dem_proj.tif", "slope.tif")
wbt_wetness_index(
"sca.tif",
"slope.tif",
'fin.tif'
)
gdal_utils("translate",
source = "fin.tif",
destination = outfile,
options = c("-co", "TILED=YES",
"-co", "COPY_SRC_OVERVIEWS=YES",
"-co", "COMPRESS=DEFLATE"))
unlink("dem.tif")
unlink("dem_proj.tif")
unlink("dem_corr.tif")
unlink("sca.tif")
unlink("slope.tif")
unlink("fin.tif")
return(outfile)
}
file <- build_twi(dem, outfile = "../private/twi.tif")
We next compute a catchment level mean TWI using zonal
.
o <- zonal::execute_zonal(file, cats, "ID", join = FALSE)
names(o)[2] <- "TWI"
head(o)
## ID TWI
## 1: cat-10 8.496850
## 2: cat-100 8.167785
## 3: cat-104 8.292524
## 4: cat-106 8.476784
## 5: cat-109 7.944167
## 6: cat-113 7.518617
The remaining workflow is representative of a process that might update frequently in a lumped formulation (e.g. soil moisture) creating the need to map values back to a common model grid. That is the reason a simple resample/regrid is not possible.
nwm_1km <- materilize_grid(
ext = ext(-2303999.62876143, 2304000.37123857, -1920000.70008381, 1919999.29991619),
diminsion = c(3841, 4608),
projection = 'PROJCS["Sphere_Lambert_Conformal_Conic",GEOGCS["GCS_Sphere",DATUM["D_Sphere",SPHEROID["Sphere",6370000.0,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Lambert_Conformal_Conic"],PARAMETER["false_easting",0.0],PARAMETER["false_northing",0.0],PARAMETER["central_meridian",-97.0],PARAMETER["standard_parallel_1",30.0],PARAMETER["standard_parallel_2",60.0],PARAMETER["latitude_of_origin",40.000008],UNIT["Meter",1.0]];-35691800 -29075200 126180232.640845;-100000 10000;-100000 10000;0.001;0.001;0.001;IsHighPrecision'
)
(w <- weighting_grid(nwm_1km, cats, "ID"))
## ID cell coverage_fraction
## 1: cat-3 4716637 0.07492989
## 2: cat-3 4716638 0.57502741
## 3: cat-3 4716639 0.04904574
## 4: cat-3 4720478 0.11056144
## 5: cat-3 4720479 0.98024553
## ---
## 1316: cat-222 4651331 0.79211766
## 1317: cat-222 4651332 0.55743623
## 1318: cat-222 4655171 0.07673281
## 1319: cat-222 4655172 0.06575897
## 1320: cat-222 4655173 0.16672505
For this example we are computing a grouped mean. This is done by multiplying a value by its weight/covergage fraction and dividning bt the sum of the coverage_fraction.
Say a new value was computed at the grid level and needs to be mapped back to the catchments. The “psuedo” value is:
TWI + a random number selected between 1 and 100
.Using the weight grid, this is a straight forward - table based process!
# Imagine this is a model time step updating the grid values...
nwm_1km[] = values(nwm_1km) + sample(1:100, ncell(nwm_1km), replace = T)
system.time({
# Extract needed cell from layer (1) using the weight grid
w$updated_value = nwm_1km[w$cell][,1]
# Apply areal.mean function over "updated_value" grouping by ID
exe <- w[, lapply(.SD, FUN = areal.mean, w = coverage_fraction),
by = "ID",
.SDcols = "updated_value"
]
})
## user system elapsed
## 0.002 0.001 0.003
head(exe)
## ID updated_value
## 1: cat-3 61.27171
## 2: cat-8 61.28775
## 3: cat-10 75.12232
## 4: cat-15 59.54757
## 5: cat-16 57.18305
## 6: cat-18 58.01534
So the question remains can this scale? I hope to show this with a quick naive, but more realisitic example of how the hydrofabric framework being build can support model engine goals:
# AWS bucket location
bucket = 'formulations-dev'
subdir = 'hydrofabric/CONUS-hydrofabric/ngen-release/01a/2021-11-01/spatial'
local_pr = '/Users/mjohnson/github/zonal/to_build/pr_2020.nc'
Since this is “new” we will build the weight grid and upload it to s3. This only needs to be done once for each grid/hydrofabric pair.
write_parquet(weighting_grid(local_pr, cats, "ID"),
"../private/gridmet_weights.parquet")
put_object(
file = "../private/gridmet_weights.parquet",
object = file.path(subdir, 'gridmet_weights.parquet'),
bucket = bucket,
multipart = TRUE
)
unlink("../private/gridmet_weights.parquet")
The produced weights parquet file is 814 KB… that’s it!
Say we want aggregated catchment averages of the 4th day of GridMet rainfall. - Remote read the weights file - Partial read the forcing file - Apply areal.mean
grouped by ID
system.time({
w = read_parquet(file.path('s3:/',bucket, subdir, 'gridmet_weights.parquet'))
w$rain = rast(local_pr)[[4]][w$cell]
catchment_rainfall <- w[, lapply(.SD, FUN = areal.mean, w = coverage_fraction),
by = "ID",
.SDcols = "rain"
]
})
## user system elapsed
## 0.198 0.033 1.385
head(catchment_rainfall)
## ID rain
## 1: cat-13581 1.659140
## 2: cat-711 0.000000
## 3: cat-13569 1.455351
## 4: cat-3 0.000000
## 5: cat-4 0.000000
## 6: cat-7 0.000000
This is two generally show how hydrofabric attributes can be used in computation. We make a naive assumption that run off equals rainfall * %sand
.
basin_attributes
file to get sand%
system.time({
sand = read_parquet(file.path('s3:/',bucket, subdir, ... = 'basin_attributes.parquet'),
col_select = c("ID", "sand-1m-percent"))
model = merge(catchment_rainfall, sand, by = "ID")
model$runoff = model$rain * model$`sand-1m-percent`
})
## user system elapsed
## 0.072 0.013 1.298
If (for some reason) you want to take the catchment level “runoff” values, and map them back to the grid, you can do so…
system.time({
dt <- merge(w, model, by = "ID")
# Apply areal.mean function over "TWI" grouped by cell
grid_runoff <- dt[, lapply(.SD, FUN = areal.mean, w = coverage_fraction),
by = "cell",
.SDcols = "runoff"
]
})
## user system elapsed
## 0.039 0.001 0.040
tmp = materilize_grid(local_pr, fillValue = 0)
tmp[grid_runoff$cell] <- grid_runoff$runoff
plot(tmp)
#plot(crop(tmp, project(vect(cats), crs(tmp))))