Skip to main content
added 5 characters in body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]]n_nests$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]]n_nests$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]]n_nests$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 50 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.41594 ymin: -79.0571 xmax: 33.83423 ymax: -78.54372 #> Geodetic CRS: WGS 84 #> First 10 features: #> NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> 2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> 3 LOSH-2019-03 SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04 COXFER 2019-04-17 2019 POINT (33.8071 -79.0085) #> 5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> 6 LOSH-2019-06 SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #> 7 LOSH-2019-07 HGTCCC 2019-04-12 2019 POINT (33.7936 -79.0015) #> 8 LOSH-2019-08 WPDEDI 2019-04-17 2019 POINT (33.80068 -79.0006) #> 9 LOSH-2019-09 CMWSSS 2019-04-24 2019 POINT (33.79995 -78.99844) #> 10 LOSH-2019-10 IMSTOP 2019-04-29 2019 POINT (33.7998 -78.9945) #> n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1 LOSH-2019-2426 60.95754 LOSH-2019-1517 68.19119 LOSH-2019-1618 1173.5578 #> 2 LOSH-2019-3941 199.82044 LOSH-2019-0203 213.67488 LOSH-2019-2730 223.3503 #> 3 LOSH-2019-2627 195.30050 LOSH-2019-02 213.67488 LOSH-2019-0304 388.1173 #> 4 LOSH-2019-2730 369.08852 LOSH-2019-03 388.11729 LOSH-2019-3941 390.0631 #> 5 LOSH-2019-3739 149.19667 LOSH-2019-0809 176.01593 LOSH-2019-3335 201.1317 #> 6 LOSH-2019-2223 491.31455 LOSH-2019-4144 502.51049 LOSH-2019-04 524.8380 #> 7 LOSH-2019-2425 161.56390 LOSH-2019-0708 181.28822 LOSH-2019-3840 189.7436 #> 8 LOSH-2019-07 181.28822 LOSH-2019-2931 189.42955 LOSH-2019-0809 241.7774 #> 9 LOSH-2019-3233 169.80511 LOSH-2019-05 176.01593 LOSH-2019-3840 230.9364 #> 10 LOSH-2019-3435 216.08536 LOSH-2019-3132 263.90729 LOSH-2019-2729 362.9705 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not anyanother subset (internally). Moreover, names were added directly to sf instead of listing decoupled indices and distances.

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 50 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.41594 ymin: -79.0571 xmax: 33.83423 ymax: -78.54372 #> Geodetic CRS: WGS 84 #> First 10 features: #> NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> 2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> 3 LOSH-2019-03 SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04 COXFER 2019-04-17 2019 POINT (33.8071 -79.0085) #> 5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> 6 LOSH-2019-06 SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #> 7 LOSH-2019-07 HGTCCC 2019-04-12 2019 POINT (33.7936 -79.0015) #> 8 LOSH-2019-08 WPDEDI 2019-04-17 2019 POINT (33.80068 -79.0006) #> 9 LOSH-2019-09 CMWSSS 2019-04-24 2019 POINT (33.79995 -78.99844) #> 10 LOSH-2019-10 IMSTOP 2019-04-29 2019 POINT (33.7998 -78.9945) #> n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1 LOSH-2019-24 60.95754 LOSH-2019-15 68.19119 LOSH-2019-16 1173.5578 #> 2 LOSH-2019-39 199.82044 LOSH-2019-02 213.67488 LOSH-2019-27 223.3503 #> 3 LOSH-2019-26 195.30050 LOSH-2019-02 213.67488 LOSH-2019-03 388.1173 #> 4 LOSH-2019-27 369.08852 LOSH-2019-03 388.11729 LOSH-2019-39 390.0631 #> 5 LOSH-2019-37 149.19667 LOSH-2019-08 176.01593 LOSH-2019-33 201.1317 #> 6 LOSH-2019-22 491.31455 LOSH-2019-41 502.51049 LOSH-2019-04 524.8380 #> 7 LOSH-2019-24 161.56390 LOSH-2019-07 181.28822 LOSH-2019-38 189.7436 #> 8 LOSH-2019-07 181.28822 LOSH-2019-29 189.42955 LOSH-2019-08 241.7774 #> 9 LOSH-2019-32 169.80511 LOSH-2019-05 176.01593 LOSH-2019-38 230.9364 #> 10 LOSH-2019-34 216.08536 LOSH-2019-31 263.90729 LOSH-2019-27 362.9705 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not any subset. Moreover, names were added directly to sf instead of listing decoupled indices and distances.

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- n_nests$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- n_nests$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- n_nests$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 50 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.41594 ymin: -79.0571 xmax: 33.83423 ymax: -78.54372 #> Geodetic CRS: WGS 84 #> First 10 features: #> NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> 2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> 3 LOSH-2019-03 SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04 COXFER 2019-04-17 2019 POINT (33.8071 -79.0085) #> 5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> 6 LOSH-2019-06 SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #> 7 LOSH-2019-07 HGTCCC 2019-04-12 2019 POINT (33.7936 -79.0015) #> 8 LOSH-2019-08 WPDEDI 2019-04-17 2019 POINT (33.80068 -79.0006) #> 9 LOSH-2019-09 CMWSSS 2019-04-24 2019 POINT (33.79995 -78.99844) #> 10 LOSH-2019-10 IMSTOP 2019-04-29 2019 POINT (33.7998 -78.9945) #> n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1 LOSH-2019-26 60.95754 LOSH-2019-17 68.19119 LOSH-2019-18 1173.5578 #> 2 LOSH-2019-41 199.82044 LOSH-2019-03 213.67488 LOSH-2019-30 223.3503 #> 3 LOSH-2019-27 195.30050 LOSH-2019-02 213.67488 LOSH-2019-04 388.1173 #> 4 LOSH-2019-30 369.08852 LOSH-2019-03 388.11729 LOSH-2019-41 390.0631 #> 5 LOSH-2019-39 149.19667 LOSH-2019-09 176.01593 LOSH-2019-35 201.1317 #> 6 LOSH-2019-23 491.31455 LOSH-2019-44 502.51049 LOSH-2019-04 524.8380 #> 7 LOSH-2019-25 161.56390 LOSH-2019-08 181.28822 LOSH-2019-40 189.7436 #> 8 LOSH-2019-07 181.28822 LOSH-2019-31 189.42955 LOSH-2019-09 241.7774 #> 9 LOSH-2019-33 169.80511 LOSH-2019-05 176.01593 LOSH-2019-40 230.9364 #> 10 LOSH-2019-35 216.08536 LOSH-2019-32 263.90729 LOSH-2019-29 362.9705 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on another subset (internally). Moreover, names were added directly to sf instead of listing decoupled indices and distances.

added 663 characters in body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18

I noticed the nngeo package some days ago and think the functionality might become in handy here:

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 650 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.792441594 ymin: -79.04660571 xmax: 33.831383423 ymax: -78.997854372 #> Geodetic CRS: WGS 84 #> First 10 features: #>  NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR  geometry #> 1  LOSH-2019-01 RIVERF 2019-03-26 2019  POINT (33.8313 -79.0466) #> 2  LOSH-2019-02 DISCGO 2019-04-03 2019  POINT (33.7975 -79.0038) #> 3  LOSH-2019-03 SCHOLA 2019-04-06 2019  POINT (33.7963 -79.0057) #> 4  LOSH-2019-04 COXFER 2019-04-17 20202019  POINT (33.8071 -79.0085) #> 5  LOSH-2019-05 LOWESP 2019-04-01 2019  POINT (33.7924 -78.9978) #> 6  LOSH-20202019-06 SEACOA 2019-04-04 2019  POINT (33.8075 -79.0132) #> 7 LOSH-2019-07 HGTCCC 2019-04-12 2019 POINT (33.7936 -79.0015) #> 8 LOSH-2019-08 WPDEDI 2019-04-17 2019 POINT (33.80068 -79.0006) #> 9 LOSH-2019-09 CMWSSS 2019-04-24 2019 POINT (33.79995 -78.99844) #> 10 LOSH-2019-10 IMSTOP 2019-04-29 2019 POINT (33.7998 -78.9945) #>  n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1  LOSH-2019-0424 3763 60.359595754 LOSH-2019-0215 4626 68.834219119 LOSH-2019-0116 48321173.48375578 #> 2  LOSH-2019-02 39 213199.674982044 LOSH-2019-03 02 678213.674267488 LOSH-2019-0427 1070 223.92023503 #> 3  LOSH-2019-02 26 213195.674930050 LOSH-2019-04 02 870213.689167488 LOSH-2019-03 885388.96321173 #> 4  <NA>LOSH-2019-27 369.08852 LOSH-2019-03 388.11729 LOSH-2019-39 390.0631 #> 5 NA LOSH-2019-37 149.19667 LOSH-2019-08 176.01593 LOSH-2019-33 201.1317 #> 6 <NA> LOSH-2019-22 491.31455 LOSH-2019-41 502.51049 LOSH-2019-04 524.8380 #> NA7 LOSH-2019-24 161.56390 LOSH-2019-07 181.28822 LOSH-2019-38 189.7436 #> <NA>8 LOSH-2019-07 181.28822 LOSH-2019-29 189.42955 LOSH-2019-08 NA241.7774 #> 59  LOSH-2019-02 32 678169.674280511 LOSH-2019-03 05 885176.963201593 LOSH-2019-0438 1749 230.27489364 #> 610 LOSH-2019-03 34 870216.689108536 LOSH-2019-0231 1070263.920290729 LOSH-2019-0427 1749 362.27489705 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not any subset. Moreover, names were added directly to sfsf instead of listing decupleddecoupled indices and distances.

Distances are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

I would try to apply this snippet on your complete dataset to be sure if it works properly.

I noticed the nngeo package some days ago and think the functionality might be handy here:

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 6 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.7924 ymin: -79.0466 xmax: 33.8313 ymax: -78.9978 #> Geodetic CRS: WGS 84 #> NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> 2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> 3 LOSH-2019-03 SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04 COXFER 2019-04-17 2020 POINT (33.8071 -79.0085) #> 5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> 6 LOSH-2020-06 SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #> n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1 LOSH-2019-04 3763.3595 LOSH-2019-02 4626.8342 LOSH-2019-01 4832.4837 #> 2 LOSH-2019-02  213.6749 LOSH-2019-03  678.6742 LOSH-2019-04 1070.9202 #> 3 LOSH-2019-02  213.6749 LOSH-2019-04  870.6891 LOSH-2019-03 885.9632 #> 4  <NA> NA <NA> NA <NA> NA #> 5 LOSH-2019-02  678.6742 LOSH-2019-03  885.9632 LOSH-2019-04 1749.2748 #> 6 LOSH-2019-03  870.6891 LOSH-2019-02 1070.9202 LOSH-2019-04 1749.2748 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not any subset. Moreover, names were added directly to sf instead listing decupled indices and distances.

Distances are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

I would try to apply this snippet on your complete dataset to be sure if it works properly.

I noticed the nngeo package some days ago and think the functionality might come in handy here:

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]]) # skip iteration if there are no neighbours if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE) } sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]] sf[i, "n1_dist"] <- result[["dist"]][[1]][1] sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]] sf[i, "n2_dist"] <- result[["dist"]][[1]][2] sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]] sf[i, "n3_dist"] <- result[["dist"]][[1]][3] } # inspect result sf #> Simple feature collection with 50 features and 10 fields #> Geometry type: POINT #> Dimension: XY #> Bounding box: xmin: 33.41594 ymin: -79.0571 xmax: 33.83423 ymax: -78.54372 #> Geodetic CRS: WGS 84 #> First 10 features: #>  NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR  geometry #> 1  LOSH-2019-01 RIVERF 2019-03-26 2019  POINT (33.8313 -79.0466) #> 2  LOSH-2019-02 DISCGO 2019-04-03 2019  POINT (33.7975 -79.0038) #> 3  LOSH-2019-03 SCHOLA 2019-04-06 2019  POINT (33.7963 -79.0057) #> 4  LOSH-2019-04 COXFER 2019-04-17 2019  POINT (33.8071 -79.0085) #> 5  LOSH-2019-05 LOWESP 2019-04-01 2019  POINT (33.7924 -78.9978) #> 6  LOSH-2019-06 SEACOA 2019-04-04 2019  POINT (33.8075 -79.0132) #> 7 LOSH-2019-07 HGTCCC 2019-04-12 2019 POINT (33.7936 -79.0015) #> 8 LOSH-2019-08 WPDEDI 2019-04-17 2019 POINT (33.80068 -79.0006) #> 9 LOSH-2019-09 CMWSSS 2019-04-24 2019 POINT (33.79995 -78.99844) #> 10 LOSH-2019-10 IMSTOP 2019-04-29 2019 POINT (33.7998 -78.9945) #>  n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1  LOSH-2019-24  60.95754 LOSH-2019-15  68.19119 LOSH-2019-16 1173.5578 #> 2  LOSH-2019-39 199.82044 LOSH-2019-02 213.67488 LOSH-2019-27  223.3503 #> 3  LOSH-2019-26 195.30050 LOSH-2019-02 213.67488 LOSH-2019-03 388.1173 #> 4 LOSH-2019-27 369.08852 LOSH-2019-03 388.11729 LOSH-2019-39 390.0631 #> 5 LOSH-2019-37 149.19667 LOSH-2019-08 176.01593 LOSH-2019-33 201.1317 #> 6 LOSH-2019-22 491.31455 LOSH-2019-41 502.51049 LOSH-2019-04 524.8380 #> 7 LOSH-2019-24 161.56390 LOSH-2019-07 181.28822 LOSH-2019-38 189.7436 #> 8 LOSH-2019-07 181.28822 LOSH-2019-29 189.42955 LOSH-2019-08 241.7774 #> 9  LOSH-2019-32 169.80511 LOSH-2019-05 176.01593 LOSH-2019-38  230.9364 #> 10 LOSH-2019-34 216.08536 LOSH-2019-31 263.90729 LOSH-2019-27  362.9705 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not any subset. Moreover, names were added directly to sf instead of listing decoupled indices and distances.

Distances are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

added 1374 characters in body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` sf_subfor (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(sf, HD_YEAR == "2019"nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]])    # skip iteration if there are no neighbours  if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighborneighbour search for simple features  result <- st_nn(sf_subnest, sf_subn_nests, k = 3+13, returnDist = TRUE)  } nn  sf[i, "n1_name"] <- result[["nn"]]sf$NEST_NUMBER[result[["nn"]][[1]][1]] dist  sf[i, "n1_dist"] <- result[["dist"]] result[["dist"]][[1]][1] # remove distance   to self nnsf[i, "n2_name"] <- lapply(nn,sf$NEST_NUMBER[result[["nn"]][[1]][2]]  tail sf[i, "n2_dist"] <-1) result[["dist"]][[1]][2] dist sf[i, "n3_name"] <- lapply(dist,sf$NEST_NUMBER[result[["nn"]][[1]][3]]  tail sf[i, "n3_dist"] <-1) result[["dist"]][[1]][3] }  # inspect listsresult nnsf #> [[1]] #>Simple [1]feature 5collection 3with 2 #>6 features and 10 fields #> [[2]]Geometry type: POINT #> [1]Dimension: 3 4 5 XY #> Bounding box: xmin: 33.7924 ymin: -79.0466 xmax: 33.8313 ymax: -78.9978 #> [[3]]Geodetic CRS: WGS 84 #> [1] 2 5 4NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> [[4]]2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> [1]3 2LOSH-2019-03 3 5 SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04  COXFER 2019-04-17 2020 POINT (33.8071 -79.0085) #> [[5]]5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> [1]6 3LOSH-2020-06 2 4 dist SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #> [[1]] n1_name n1_dist n2_name n2_dist n3_name n3_dist #> [1]1 LOSH-2019-04 3763.3593595 LOSH-2019-02 4626.8348342 LOSH-2019-01 4832.484 #> 4837 #> [[2]] #>2 [1]LOSH-2019-02 213.6749 LOSH-2019-03 678.6742 LOSH-2019-04 1070.9202 #>  #>3 [[3]] #>LOSH-2019-02 [1] 213.6749 LOSH-2019-04  870.6891 LOSH-2019-03 885.9632 #>  #>4 [[4]] #> [1] 678.6742 885.9632 1749.2748 #>  #> [[5]] #><NA> [1] 870.6891 1070.9202 1749.2748 # use indices toNA query your nest ids <NA> NA <NA> NA sf_sub$NEST_NUMBER[nn[[1]]]#> 5 LOSH-2019-02 678.6742 LOSH-2019-03 885.9632 LOSH-2019-04 1749.2748 #> [1]6 "LOSHLOSH-20202019-06"03 "LOSH 870.6891 LOSH-2019-03"02 "LOSH1070.9202 LOSH-2019-02"04 1749.2748 

Interpretation:

AccordingOrder has changed at first glance in comparison to nn, the nearest nests to nest 1 are nests 5, 3first approach, 2; nearest ones to nest 2because indices are nests 3, 4determined now based on the overall sf object, 5not any subset. Moreover, names were added directly to sf instead listing decupled indices and so ondistances. 

Distances given in dist are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # filter by year sf_sub <- dplyr::filter(sf, HD_YEAR == "2019") # perform nearest neighbor search for simple features result <- st_nn(sf_sub, sf_sub, k = 3+1, returnDist = TRUE) nn <- result[["nn"]] dist <- result[["dist"]]  # remove distance to self nn <- lapply(nn, tail, -1) dist <- lapply(dist, tail, -1) # inspect lists nn #> [[1]] #> [1] 5 3 2 #> #> [[2]] #> [1] 3 4 5 #> #> [[3]] #> [1] 2 5 4 #> #> [[4]] #> [1] 2 3 5 #> #> [[5]] #> [1] 3 2 4 dist #> [[1]] #> [1] 3763.359 4626.834 4832.484 #>  #> [[2]] #> [1] 213.6749 678.6742 1070.9202 #>  #> [[3]] #> [1] 213.6749 870.6891 885.9632 #>  #> [[4]] #> [1] 678.6742 885.9632 1749.2748 #>  #> [[5]] #> [1] 870.6891 1070.9202 1749.2748 # use indices to query your nest ids sf_sub$NEST_NUMBER[nn[[1]]] #> [1] "LOSH-2020-06" "LOSH-2019-03" "LOSH-2019-02" 

Interpretation:

According to nn, the nearest nests to nest 1 are nests 5, 3, 2; nearest ones to nest 2 are nests 3, 4, 5, and so on. Distances given in dist are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

library(sf) library(nngeo) # create a sf object from your data frame sf <- st_as_sf(df, coords = c("LAT", "LONG"), crs = 4326) # iterate over individual nests, filter by year and territory before `st_nn` for (i in 1:dim(df)[1]) { nest <- sf[i, ] # which neighbours to consider? exlude self first n_nests <- sf[-i, ] |> dplyr::filter(HD_YEAR == nest[["HD_YEAR"]] & TERRITORY != nest[["TERRITORY"]])    # skip iteration if there are no neighbours  if (dim(n_nests)[1] == 0) { next } else { # perform nearest neighbour search for simple features  result <- st_nn(nest, n_nests, k = 3, returnDist = TRUE)  }   sf[i, "n1_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][1]]   sf[i, "n1_dist"] <- result[["dist"]][[1]][1]    sf[i, "n2_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][2]]   sf[i, "n2_dist"] <- result[["dist"]][[1]][2]  sf[i, "n3_name"] <- sf$NEST_NUMBER[result[["nn"]][[1]][3]]   sf[i, "n3_dist"] <- result[["dist"]][[1]][3] }  # inspect result sf #> Simple feature collection with 6 features and 10 fields #> Geometry type: POINT #> Dimension:  XY #> Bounding box: xmin: 33.7924 ymin: -79.0466 xmax: 33.8313 ymax: -78.9978 #> Geodetic CRS: WGS 84 #> NEST_NUMBER TERRITORY HATCH_DATE HD_YEAR geometry #> 1 LOSH-2019-01 RIVERF 2019-03-26 2019 POINT (33.8313 -79.0466) #> 2 LOSH-2019-02 DISCGO 2019-04-03 2019 POINT (33.7975 -79.0038) #> 3 LOSH-2019-03  SCHOLA 2019-04-06 2019 POINT (33.7963 -79.0057) #> 4 LOSH-2019-04  COXFER 2019-04-17 2020 POINT (33.8071 -79.0085) #> 5 LOSH-2019-05 LOWESP 2019-04-01 2019 POINT (33.7924 -78.9978) #> 6 LOSH-2020-06  SEACOA 2019-04-04 2019 POINT (33.8075 -79.0132) #>  n1_name n1_dist n2_name n2_dist n3_name n3_dist #> 1 LOSH-2019-04 3763.3595 LOSH-2019-02 4626.8342 LOSH-2019-01 4832.4837 #> 2 LOSH-2019-02 213.6749 LOSH-2019-03 678.6742 LOSH-2019-04 1070.9202 #> 3 LOSH-2019-02 213.6749 LOSH-2019-04  870.6891 LOSH-2019-03 885.9632 #> 4 <NA> NA  <NA> NA <NA> NA #> 5 LOSH-2019-02 678.6742 LOSH-2019-03 885.9632 LOSH-2019-04 1749.2748 #> 6 LOSH-2019-03  870.6891 LOSH-2019-02 1070.9202 LOSH-2019-04 1749.2748 

Order has changed at first glance in comparison to the first approach, because indices are determined now based on the overall sf object, not any subset. Moreover, names were added directly to sf instead listing decupled indices and distances. 

Distances are given in meters and calculated "similiar" to sf::st_distance() making use of spherical distances for lat/lon points according to ?st_nn.

edited body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
Loading
added 178 characters in body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
Loading
added 461 characters in body
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
Loading
Source Link
dimfalk
  • 1.4k
  • 1
  • 8
  • 18
Loading