Question 1:

Making Spatial Objects & Coordinate Transformation

Spatial objects (sf) can be built from a vector of X and Y values in addition to a coordinate reference system (CRS). For example:

df = data.frame(name = state.name, 
                X = state.center$x, 
                Y = state.center$y)
head(df)
        name         X       Y
1    Alabama  -86.7509 32.5901
2     Alaska -127.2500 49.2500
3    Arizona -111.6250 34.2192
4   Arkansas  -92.2992 34.7336
5 California -119.7730 36.5341
6   Colorado -105.5130 38.6777
# Geographic Coordinate System (GCS)
(df_sf_gcs = st_as_sf(df, 
                      coords = c("X", "Y"), 
                      crs = 4269))
Simple feature collection with 50 features and 1 field
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -127.25 ymin: 27.8744 xmax: -68.9801 ymax: 49.25
Geodetic CRS:  NAD83
First 10 features:
          name                 geometry
1      Alabama POINT (-86.7509 32.5901)
2       Alaska    POINT (-127.25 49.25)
3      Arizona POINT (-111.625 34.2192)
4     Arkansas POINT (-92.2992 34.7336)
5   California POINT (-119.773 36.5341)
6     Colorado POINT (-105.513 38.6777)
7  Connecticut POINT (-72.3573 41.5928)
8     Delaware POINT (-74.9841 38.6777)
9      Florida  POINT (-81.685 27.8744)
10     Georgia POINT (-83.3736 32.3329)
ggplot() + 
  geom_sf(data = df_sf_gcs) + 
  coord_sf(datum = st_crs(df_sf_gcs)) +
  theme_linedraw()

# Projected Coordinate System (PCS)
# st_transforms converts from one reference system to another
(df_sf_pcs = st_transform(df_sf_gcs, 5070))
Simple feature collection with 50 features and 1 field
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -2805703 ymin: 640477 xmax: 2079664 ymax: 3291437
Projected CRS: NAD83 / Conus Albers
First 10 features:
          name                  geometry
1      Alabama  POINT (862043.5 1099545)
2       Alaska  POINT (-2264853 3291437)
3      Arizona  POINT (-1422260 1356663)
4     Arkansas  POINT (336061.5 1303543)
5   California  POINT (-2086972 1760961)
6     Colorado POINT (-818480.9 1779785)
7  Connecticut   POINT (1936213 2307450)
8     Delaware   POINT (1796466 1938236)
9      Florida    POINT (1409814 640477)
10     Georgia   POINT (1179012 1107322)
ggplot() + 
  geom_sf(data = df_sf_pcs) + 
  coord_sf(datum = st_crs(df_sf_pcs)) +
  theme_linedraw()

Question 2:

st_distance review

# Three most populous cities in the USA
(big3 = cities %>% 
   select(city, population) %>% 
   slice_max(population, n = 3))
Simple feature collection with 3 features and 2 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -2032609 ymin: 1468446 xmax: 1833394 ymax: 2178657
Projected CRS: NAD83 / Conus Albers
# A tibble: 3 x 3
  city        population           geometry
  <chr>            <dbl>        <POINT [m]>
1 New York      19354922  (1833394 2178657)
2 Los Angeles   12815475 (-2032609 1468446)
3 Chicago        8675982 (684663.4 2122678)
# Santa Barbara
(sb = filter(cities, city == "Santa Barbara") %>% 
    select(city, population))
Simple feature collection with 1 feature and 2 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -2140651 ymin: 1531425 xmax: -2140651 ymax: 1531425
Projected CRS: NAD83 / Conus Albers
# A tibble: 1 x 3
  city          population           geometry
  <chr>              <dbl>        <POINT [m]>
1 Santa Barbara     204034 (-2140651 1531425)
# Distance from SB to population centers
st_distance(big3, sb)
Units: [m]
          [,1]
[1,] 4026405.5
[2,]  125058.1
[3,] 2886517.0

There are two notable things about this result:

  1. It has units
  2. It is returned as a matrix, even though SB only had one row

This second point highlights a useful feature of st_distance, namley, its ability to return distance matrices between all combinations of features in x and y.

units review

While units are useful, they are not always the preferred units. By default, the units measurement is defined by the projection. For example:

st_crs(big3)$units
[1] "m"

Units can be converted using units::set_units. For example, ‘m’ can be converted to ‘km’:

big3 = mutate(big3, 
              dist_to_sb = st_distance(big3, sb),
              dist_to_sb = set_units(dist_to_sb, "km")) 

(big3$dist_to_sb)
Units: [km]
          [,1]
[1,] 4026.4055
[2,]  125.0581
[3,] 2886.5170

You might have noticed the data type of the st_distance objects are an S3 class of units. Sometimes, this class can cause problems when trying to using it with other classes or methods:

big3$dist_to_sb + 4
Error in Ops.units(big3$dist_to_sb, 4): both operands of the expression should be "units" objects
ggplot(data = big3) + 
  geom_col(aes(x = city, y = dist_to_sb))
Don't know how to automatically pick scale for object of type units. Defaulting to continuous.
Error in Ops.units(x, range[1]): both operands of the expression should be "units" objects

In these cases, the units class can be dropped with units::drop_units

big3 = mutate(big3, 
              dist_to_sb = st_distance(big3, sb),
              dist_to_sb = set_units(dist_to_sb, "km"),
              dist_to_sb = drop_units(dist_to_sb))

big3$dist_to_sb + 4
          [,1]
[1,] 4030.4055
[2,]  129.0581
[3,] 2890.5170
ggplot(data = big3) + 
  geom_col(aes(x = reorder(city, -dist_to_sb), y = dist_to_sb)) + 
    labs(title = "Distance to Santa Barbara (km)") + 
  ggthemes::theme_fivethirtyeight() + 
  theme( axis.text.x = element_text(face = "bold", size = 14))

As with all functions, these steps can be nested:

big3 = mutate(big3, 
              dist_to_sb = drop_units(set_units(st_distance(big3, sb), "km")))

Geometry review

There are a few ways to manipulate existing geometries, here we discuss st_union, st_combine and st_cast

  • st_combine returns a single, combined geometry, with no resolved boundaries.

  • st_union() returns a single geometry with resolved boundaries

  • st_cast() casts one geometry type to another

(west_coast = USAboundaries::us_states() %>% 
  filter(name %in% c('Oregon', 'California', 'Washington')) %>% 
  select(name, geometry))
Simple feature collection with 3 features and 1 field
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS:  WGS 84
        name                       geometry
1 California MULTIPOLYGON (((-118.594 33...
2     Oregon MULTIPOLYGON (((-124.5524 4...
3 Washington MULTIPOLYGON (((-123.2371 4...
plot(west_coast['name'], key.pos = 1)

# Combine Geometries
(combined_wc = st_combine(west_coast))
Geometry set for 1 feature 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS:  WGS 84
MULTIPOLYGON (((-118.594 33.4672, -118.4848 33....
plot(combined_wc, col = "red")

# Unioned Geometries
(unioned_wc = st_union(west_coast))
although coordinates are longitude/latitude, st_union assumes that they are planar
Geometry set for 1 feature 
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -124.7258 ymin: 32.53416 xmax: -114.1391 ymax: 49.00249
Geodetic CRS:  WGS 84
MULTIPOLYGON (((-123.2371 48.68347, -123.0704 4...
plot(unioned_wc, col = "red")

# Combine Geometries
line_wc = st_cast(unioned_wc, "MULTILINESTRING")
plot(line_wc, col = "red")

Question 3:

In this section you will extend your growing ggplot skills to handle spatial data using ggrepl to label significant features; gghighlight to emphasize important criteria; and scaled color/fill to create chloropleth represnetations of variables. Below is some example code to provide an example of these tools in action:

Get some data (review)

# Define a state/region classifier and select the southern states
state.of.interest = data.frame(state = state.name, region = state.region) %>% 
  filter(region == "South") %>% 
  pull(state)

# Get USA states in the southern region and transform to EPSG:5070
state = USAboundaries::us_states() %>% 
  filter(name %in% state.of.interest) %>% 
  st_transform(5070)

# Get USA congressional districts in the southern region and transform to EPSG:5070
districts =  USAboundaries::us_congressional() %>% 
  filter(state_name %in% state.of.interest) %>% 
  st_transform(5070)

# Get the 10 most populous cities in the southern region and transform to EPSG:5070
sub_cities = cities %>% 
  filter(state_name %in% state.of.interest) %>% 
  slice_max(population, n = 10) %>% 
  st_transform(5070)

Map

ggplot() + 
  # Add districts with a dashed line (lty = 3), 
  # a color gradient from blue to red based on aland, 
  # and a fill aplha of 0.5
  geom_sf(data = districts, aes(fill = aland), lty = 3, alpha = .5) + 
  scale_fill_gradient(low = 'blue', high = "red") +
  # Highlight (keep blue) only those districts witn a land area > 5e10
  gghighlight(aland > 5e10) +
  # Add the state borders with a thicker line and no fill
  geom_sf(data = state, size = 1, fill = "NA") +
  # Add the cities
  geom_sf(data = sub_cities, size= 2, color = "red") +
  # Add labels to the cities
  ggrepel::geom_label_repel(
    data = sub_cities,
    aes(label = city, geometry = geometry),
    stat = "sf_coordinates",
    size = 3) +
  labs(fill = "Area Land") + 
  ggthemes::theme_map()