Spatial objects (sf) can be built from a vector of X and Y values in addition to a coordinate reference system (CRS). For example:
df = data.frame(name = state.name,
X = state.center$x,
Y = state.center$y)
head(df)
name X Y
1 Alabama -86.7509 32.5901
2 Alaska -127.2500 49.2500
3 Arizona -111.6250 34.2192
4 Arkansas -92.2992 34.7336
5 California -119.7730 36.5341
6 Colorado -105.5130 38.6777
# Geographic Coordinate System (GCS)
(df_sf_gcs = st_as_sf(df,
coords = c("X", "Y"),
crs = 4269))
Simple feature collection with 50 features and 1 field
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -127.25 ymin: 27.8744 xmax: -68.9801 ymax: 49.25
Geodetic CRS: NAD83
First 10 features:
name geometry
1 Alabama POINT (-86.7509 32.5901)
2 Alaska POINT (-127.25 49.25)
3 Arizona POINT (-111.625 34.2192)
4 Arkansas POINT (-92.2992 34.7336)
5 California POINT (-119.773 36.5341)
6 Colorado POINT (-105.513 38.6777)
7 Connecticut POINT (-72.3573 41.5928)
8 Delaware POINT (-74.9841 38.6777)
9 Florida POINT (-81.685 27.8744)
10 Georgia POINT (-83.3736 32.3329)
ggplot() +
geom_sf(data = df_sf_gcs) +
coord_sf(datum = st_crs(df_sf_gcs)) +
theme_linedraw()
# Projected Coordinate System (PCS)
# st_transforms converts from one reference system to another
(df_sf_pcs = st_transform(df_sf_gcs, 5070))
Simple feature collection with 50 features and 1 field
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -2805703 ymin: 640477 xmax: 2079664 ymax: 3291437
Projected CRS: NAD83 / Conus Albers
First 10 features:
name geometry
1 Alabama POINT (862043.5 1099545)
2 Alaska POINT (-2264853 3291437)
3 Arizona POINT (-1422260 1356663)
4 Arkansas POINT (336061.5 1303543)
5 California POINT (-2086972 1760961)
6 Colorado POINT (-818480.9 1779785)
7 Connecticut POINT (1936213 2307450)
8 Delaware POINT (1796466 1938236)
9 Florida POINT (1409814 640477)
10 Georgia POINT (1179012 1107322)
ggplot() +
geom_sf(data = df_sf_pcs) +
coord_sf(datum = st_crs(df_sf_pcs)) +
theme_linedraw()
st_distance
review# Three most populous cities in the USA
(big3 = cities %>%
select(city, population) %>%
slice_max(population, n = 3))
Simple feature collection with 3 features and 2 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -2032609 ymin: 1468446 xmax: 1833394 ymax: 2178657
Projected CRS: NAD83 / Conus Albers
# A tibble: 3 x 3
city population geometry
<chr> <dbl> <POINT [m]>
1 New York 19354922 (1833394 2178657)
2 Los Angeles 12815475 (-2032609 1468446)
3 Chicago 8675982 (684663.4 2122678)
# Santa Barbara
(sb = filter(cities, city == "Santa Barbara") %>%
select(city, population))
Simple feature collection with 1 feature and 2 fields
Geometry type: POINT
Dimension: XY
Bounding box: xmin: -2140651 ymin: 1531425 xmax: -2140651 ymax: 1531425
Projected CRS: NAD83 / Conus Albers
# A tibble: 1 x 3
city population geometry
<chr> <dbl> <POINT [m]>
1 Santa Barbara 204034 (-2140651 1531425)
# Distance from SB to population centers
st_distance(big3, sb)
Units: [m]
[,1]
[1,] 4026405.5
[2,] 125058.1
[3,] 2886517.0
There are two notable things about this result:
units
This second point highlights a useful feature of st_distance
, namley, its ability to return distance matrices between all combinations of features in x
and y
.
units
reviewWhile units are useful, they are not always the preferred units. By default, the units measurement is defined by the projection. For example:
st_crs(big3)$units
[1] "m"
Units can be converted using units::set_units
. For example, ‘m’ can be converted to ‘km’:
big3 = mutate(big3,
dist_to_sb = st_distance(big3, sb),
dist_to_sb = set_units(dist_to_sb, "km"))
(big3$dist_to_sb)
Units: [km]
[,1]
[1,] 4026.4055
[2,] 125.0581
[3,] 2886.5170
You might have noticed the data type
of the st_distance
objects are an S3 class of units
. Sometimes, this class can cause problems when trying to using it with other classes or methods:
big3$dist_to_sb + 4
Error in Ops.units(big3$dist_to_sb, 4): both operands of the expression should be "units" objects
ggplot(data = big3) +
geom_col(aes(x = city, y = dist_to_sb))
Don't know how to automatically pick scale for object of type units. Defaulting to continuous.
Error in Ops.units(x, range[1]): both operands of the expression should be "units" objects
In these cases, the units class can be dropped with units::drop_units
big3 = mutate(big3,
dist_to_sb = st_distance(big3, sb),
dist_to_sb = set_units(dist_to_sb, "km"),
dist_to_sb = drop_units(dist_to_sb))
big3$dist_to_sb + 4
[,1]
[1,] 4030.4055
[2,] 129.0581
[3,] 2890.5170
ggplot(data = big3) +
geom_col(aes(x = reorder(city, -dist_to_sb), y = dist_to_sb)) +
labs(title = "Distance to Santa Barbara (km)") +
ggthemes::theme_fivethirtyeight() +
theme( axis.text.x = element_text(face = "bold", size = 14))
As with all functions, these steps can be nested:
big3 = mutate(big3,
dist_to_sb = drop_units(set_units(st_distance(big3, sb), "km")))
Geometry
reviewThere are a few ways to manipulate existing geometries, here we discuss st_union
, st_combine
and st_cast
st_combine
returns a single, combined geometry, with no resolved boundaries.
st_union()
returns a single geometry with resolved boundaries
st_cast()
casts one geometry type to another
(west_coast = USAboundaries::us_states() %>%
filter(name %in% c('Oregon', 'California', 'Washington')) %>%
select(name, geometry))
Simple feature collection with 3 features and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS: WGS 84
name geometry
1 California MULTIPOLYGON (((-118.594 33...
2 Oregon MULTIPOLYGON (((-124.5524 4...
3 Washington MULTIPOLYGON (((-123.2371 4...
plot(west_coast['name'], key.pos = 1)
# Combine Geometries
(combined_wc = st_combine(west_coast))
Geometry set for 1 feature
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS: WGS 84
MULTIPOLYGON (((-118.594 33.4672, -118.4848 33....
plot(combined_wc, col = "red")
# Unioned Geometries
(unioned_wc = st_union(west_coast))
although coordinates are longitude/latitude, st_union assumes that they are planar
Geometry set for 1 feature
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS: WGS 84
MULTIPOLYGON (((-123.2371 48.68347, -123.0704 4...
plot(unioned_wc, col = "red")
# Combine Geometries
line_wc = st_cast(unioned_wc, "MULTILINESTRING")
plot(line_wc, col = "red")
In this section you will extend your growing ggplot
skills to handle spatial data using ggrepl
to label significant features; gghighlight
to emphasize important criteria; and scaled color/fill to create chloropleth represnetations of variables. Below is some example code to provide an example of these tools in action:
# Define a state/region classifier and select the southern states
state.of.interest = data.frame(state = state.name, region = state.region) %>%
filter(region == "South") %>%
pull(state)
# Get USA states in the southern region and transform to EPSG:5070
state = USAboundaries::us_states() %>%
filter(name %in% state.of.interest) %>%
st_transform(5070)
# Get USA congressional districts in the southern region and transform to EPSG:5070
districts = USAboundaries::us_congressional() %>%
filter(state_name %in% state.of.interest) %>%
st_transform(5070)
# Get the 10 most populous cities in the southern region and transform to EPSG:5070
sub_cities = cities %>%
filter(state_name %in% state.of.interest) %>%
slice_max(population, n = 10) %>%
st_transform(5070)
ggplot() +
# Add districts with a dashed line (lty = 3),
# a color gradient from blue to red based on aland,
# and a fill aplha of 0.5
geom_sf(data = districts, aes(fill = aland), lty = 3, alpha = .5) +
scale_fill_gradient(low = 'blue', high = "red") +
# Highlight (keep blue) only those districts witn a land area > 5e10
gghighlight(aland > 5e10) +
# Add the state borders with a thicker line and no fill
geom_sf(data = state, size = 1, fill = "NA") +
# Add the cities
geom_sf(data = sub_cities, size= 2, color = "red") +
# Add labels to the cities
ggrepel::geom_label_repel(
data = sub_cities,
aes(label = city, geometry = geometry),
stat = "sf_coordinates",
size = 3) +
labs(fill = "Area Land") +
ggthemes::theme_map()