2 Data preprocessing

2.1 Data import: GPS and raster data

I start by importing GPS data of adults only (without individuals from the Jacksonville zoo), in the form of trajectories (ltraj) that is converted to point data (SpatialPointsDataFrame) and projected to Lat/Long:

library("hab")
load("Data/wost-ad-ltraj.RData")
library("rgdal")
wost <- hab::ltraj2spdf(trajad, strict = FALSE, proj4string = CRS("+init=epsg:32617"))
wost$id <- factor(wost$id)
wost <- subset(wost, !(id %in% c(407680, 429200, 476550, 721310, 721320)))
wost <- spTransform(wost, CRS("+proj=longlat +datum=WGS84"))
summary(wost)
## Object of class SpatialPointsDataFrame
## Coordinates:
##         min       max
## x -91.45783 -78.01650
## y  25.08267  34.60033
## Is projected: FALSE 
## proj4string :
## [+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0]
## Number of points: 445638
## Data attributes:
##       date                           dx                  dy            
##  Min.   :2005-07-29 07:00:00   Min.   :-90334.77   Min.   :-127323.69  
##  1st Qu.:2007-09-28 19:00:00   1st Qu.:   -30.13   1st Qu.:    -19.90  
##  Median :2009-07-09 17:00:00   Median :     0.00   Median :      0.00  
##  Mean   :2009-04-11 15:12:47   Mean   :     5.56   Mean   :    -12.55  
##  3rd Qu.:2010-10-29 20:00:00   3rd Qu.:    31.28   3rd Qu.:     19.58  
##  Max.   :2011-12-31 18:00:00   Max.   :133219.05   Max.   : 148420.54  
##                                NA's   :36011       NA's   :36011       
##       dist                 dt             R2n               
##  Min.   :     0.00   Min.   :3600    Min.   :            0  
##  1st Qu.:    16.76   1st Qu.:3600    1st Qu.:       108970  
##  Median :    49.76   Median :3600    Median :      9802756  
##  Mean   :  1544.56   Mean   :4777    Mean   :  24050154883  
##  3rd Qu.:   639.01   3rd Qu.:7200    3rd Qu.:   1192219334  
##  Max.   :150312.14   Max.   :7200    Max.   :1005475966130  
##  NA's   :36011       NA's   :22863                          
##    abs.angle        rel.angle            id             burst       
##  Min.   :-3.14    Min.   :-3.14    572931 : 29866   414590 : 21736  
##  1st Qu.:-1.57    1st Qu.:-1.57    414590 : 21736   414240 : 18336  
##  Median : 0.00    Median : 0.15    414240 : 18336   414580 : 14595  
##  Mean   : 0.09    Mean   : 0.18    721290 : 16319   416110 : 14110  
##  3rd Qu.: 1.57    3rd Qu.: 2.04    723560 : 15763   407700 : 13064  
##  Max.   : 3.14    Max.   : 3.14    414580 : 14595   414260 : 12473  
##  NA's   :110418   NA's   :143287   (Other):329023   (Other):351324  
##       gid           elevation           day      
##  Min.   :148996   Min.   :-999.0   Min.   : 429  
##  1st Qu.:293034   1st Qu.:   3.0   1st Qu.:1220  
##  Median :456866   Median :  10.0   Median :1870  
##  Mean   :442556   Mean   :-173.2   Mean   :1781  
##  3rd Qu.:584878   3rd Qu.:  28.0   3rd Qu.:2347  
##  Max.   :701216   Max.   :2032.0   Max.   :2775  
##                   NA's   :22873
wost2 <- droplevels(wost@data)
length(unique(wost2$id))
## [1] 61
table(wost2$id)
## 
## 407700 407710 414240 414250 414260 414270 414580 414590 414600 414610 
##  13064   1463  18336   5676  12473   2523  14595  21736   8671    628 
## 416100 416110 425120 429190 475171 475180 475192 475220 475241 475270 
##    651  14110   5093  11316   2683  11720    650   2137     19   4867 
## 475282 475284 475302 475322 519770 519790 519800 519810 572661 572662 
##   1790   1245   2729    505   8973   7306   8909   7876   3998   3839 
## 572671 572710 572741 572742 572743 572781 572800 572811 572931 572951 
##   1083  10281    346   2268   3202   4386   1098   5881  29866   7911 
## 572981 572991 721290 721300 723560 723570 723580 723600 723610 809320 
##   4614   8626  16319   9859  15763   2824   2134   2910   7459   5638 
## 809330 809340 910200 910210 910220 910230 910240 992250 992270 992280 
##    805   9165  13990  14474  14269  14105   3372   9188   7607   8761 
## 992290 
##   7853
summary(as.numeric(table(wost2$id)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      19    2523    5881    7306   10280   29870
sd(as.numeric(table(wost2$id)))
## [1] 6036.537
summary(unlist(by(wost2, wost2$id, function(k) difftime(max(k$date), min(k$date), 
    units = "days"))))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.25  247.50  527.50  649.10  972.30 2326.00
sd(unlist(by(wost2, wost2$id, function(k) difftime(max(k$date), min(k$date), 
    units = "days"))))
## [1] 562.5885

I also import environmental data (from files). All rasters are combined in a RasterBrick, which is then masked using a Minimum convex polygon (from adehabitatHR) of every relocations enlarged by a one-cell buffer:

library("raster")
janPrec <- raster("Data/Consensus_rasters/jan_prec.img")
janTemp <- raster("Data/Consensus_rasters/jan_temp.img")
febPrec <- raster("Data/Consensus_rasters/feb_prec.img")
febTemp <- raster("Data/Consensus_rasters/feb_temp.img")
marPrec <- raster("Data/Consensus_rasters/mar_prec.img")
marTemp <- raster("Data/Consensus_rasters/mar_temp.img")
aprPrec <- raster("Data/Consensus_rasters/apr_prec.img")
aprTemp <- raster("Data/Consensus_rasters/apr_temp.img")
mayPrec <- raster("Data/Consensus_rasters/may_prec.img")
mayTemp <- raster("Data/Consensus_rasters/may_temp.img")
junPrec <- raster("Data/Consensus_rasters/jun_prec.img")
junTemp <- raster("Data/Consensus_rasters/jun_temp.img")
julPrec <- raster("Data/Consensus_rasters/jul_prec.img")
julTemp <- raster("Data/Consensus_rasters/jul_temp.img")
augPrec <- raster("Data/Consensus_rasters/aug_prec.img")
augTemp <- raster("Data/Consensus_rasters/aug_temp.img")
sepPrec <- raster("Data/Consensus_rasters/sep_prec.img")
sepTemp <- raster("Data/Consensus_rasters/sep_temp.img")
octPrec <- raster("Data/Consensus_rasters/oct_prec.img")
octTemp <- raster("Data/Consensus_rasters/oct_temp.img")
decPrec <- raster("Data/Consensus_rasters/dec_prec.img")
decTemp <- raster("Data/Consensus_rasters/dec_temp.img")
novPrec <- raster("Data/Consensus_rasters/nov_prec.img")
novTemp <- raster("Data/Consensus_rasters/nov_temp.img")
(rast <- brick(janPrec, janTemp, febPrec, febTemp, marPrec, marTemp, aprPrec, 
    aprTemp, mayPrec, mayTemp, junPrec, junTemp, julPrec, julTemp, augPrec, 
    augTemp, sepPrec, sepTemp, octPrec, octTemp, novPrec, novTemp, decPrec, 
    decTemp))
## class      : RasterBrick 
## dimensions : 855, 2156, 1843380, 24  (nrow, ncol, ncell, nlayers)
## resolution : 0.167, 0.167  (x, y)
## extent     : -180, 180.052, -59.1665, 83.6185  (xmin, xmax, ymin, ymax)
## crs        : NA 
## source     : /tmp/Rtmp4OILip/raster/r_tmp_2019-10-30_173244_13787_41238.grd 
## names      : jan_prec, jan_temp, feb_prec, feb_temp, mar_prec, mar_temp, apr_prec, apr_temp, may_prec, may_temp, jun_prec, jun_temp, jul_prec, jul_temp, aug_prec, ... 
## min values :     0.00,   -51.45,     0.00,   -47.45,     0.00,   -44.55,     0.00,   -35.75,     0.00,   -19.80,     0.00,   -13.55,     0.00,   -10.85,     0.00, ... 
## max values :   808.90,    33.10,   714.15,    32.45,   689.05,    32.25,   632.15,    33.90,   965.00,    35.80,  2136.30,    38.35,  2123.25,    38.20,  1538.70, ...
library("rgeos")
sa <- mcp(wost, percent = 100)
rast <- crop(rast, gBuffer(sa, width = 0.167))
rast <- mask(rast, gBuffer(sa, width = 0.167))

The result is something like this (using temperatures in January as an example)

library("sf")
library("maps")
states <- st_as_sf(map("state", plot = FALSE, fill = TRUE))
rastDF <- data.frame(rasterToPoints(rast))
library("cowplot")
library("viridis")
ggplot() + geom_raster(data = rastDF, aes_string(x = "x", y = "y", fill = names(rastDF)[4])) + 
    geom_sf(data = states, fill = NA, size = 0.2, colour = gray(0.2)) + geom_point(data = data.frame(wost), 
    aes(x = x, y = y), shape = 20, size = 0.8, stroke = 0, colour = "gray") + 
    coord_sf(xlim = c(-92, -77.5), ylim = c(24.5, 35), expand = FALSE) + scale_fill_viridis(end = 0.8, 
    alpha = 1, option = "plasma") + labs(x = "Latitude", y = "Longitude", fill = expression(`Temperature `(degree ~ 
    C))) + theme(legend.position = "top", legend.justification = "center") + 
    guides(fill = guide_colourbar(barwidth = 15, barheight = 1, title.theme = element_text(size = 26)))
Temperature in January.

Figure 2.1: Temperature in January.

2.2 Environmental intersection

We first look at the frequency of data for each month:

library("lubridate")
wost$month <- month(wost$date)
table(wost$month)
## 
##     1     2     3     4     5     6     7     8     9    10    11    12 
## 28931 26156 30298 33741 40787 38467 42136 44483 43625 42663 38409 35942
plot(table(wost$month), ylab = "Frequency", xlab = "Month")
Frequency of data per month.

Figure 2.2: Frequency of data per month.

We then intersect the GPS data to the precipitation and temperature layers, using the month number to identify the correct layer:

wost$prec <- ifelse(wost$month == 1, extract(janPrec, wost),
    ifelse(wost$month == 2, extract(febPrec, wost),
        ifelse(wost$month == 3, extract(marPrec, wost),
            ifelse(wost$month == 4, extract(aprPrec, wost),
                ifelse(wost$month == 5, extract(mayPrec, wost),
                    ifelse(wost$month == 6, extract(junPrec, wost),
                        ifelse(wost$month == 7, extract(julPrec, wost),
                            ifelse(wost$month == 8, extract(augPrec, wost),
                                ifelse(wost$month == 9, extract(sepPrec, wost),
                                    ifelse(wost$month == 10, extract(octPrec, wost),
                                        ifelse(wost$month == 11, extract(novPrec, wost),
                                            extract(decPrec, wost))))))))))))
wost$temp <- ifelse(wost$month == 1, extract(janTemp, wost),
    ifelse(wost$month == 2, extract(febTemp, wost),
        ifelse(wost$month == 3, extract(marTemp, wost),
            ifelse(wost$month == 4, extract(aprTemp, wost),
                ifelse(wost$month == 5, extract(mayTemp, wost),
                    ifelse(wost$month == 6, extract(junTemp, wost),
                        ifelse(wost$month == 7, extract(julTemp, wost),
                            ifelse(wost$month == 8, extract(augTemp, wost),
                                ifelse(wost$month == 9, extract(sepTemp, wost),
                                    ifelse(wost$month == 10, extract(octTemp, wost),
                                        ifelse(wost$month == 11, extract(novTemp, wost),
                                            extract(decTemp, wost))))))))))))

Note that the correlation between temperature and precipitation in wood stork use data is pretty high.

cor(wost$prec, wost$temp, use = "complete.obs")
## [1] 0.7965986
plot(wost$prec, wost$temp, pch = 20, xlab = "Precipitation", ylab = "Temperature")
Correlation between precipitation and temperature.

Figure 2.3: Correlation between precipitation and temperature.

2.3 Sampling the point data

In this section, I will sample GPS data to ensure that each individual is represented equally in each month. I first break down the data into a list with one element per month. Over this list, i.e. for each month, individuals with less than 100 locations are removed, and one random location of a random individual (with replacement) is chosen 5000 times, to give each individual the same weight in the sampled data:

table(wost$id, wost$month)
##         
##             1    2    3    4    5    6    7    8    9   10   11   12
##   407680    0    0    0    0    0    0    0    0    0    0    0    0
##   407700  956  856  897  911 1040  950 1325 1371 1211 1257 1295  995
##   407710    0    0    0    0    0    0  323  308  329  342  161    0
##   414240 1488 1328 1486 1490 1579 1595 1633 1638 1551 1555 1453 1540
##   414250  266  311  313  539  637  619  629  693  617  467  290  295
##   414260  947  739  781 1057 1232 1062 1207 1189 1087 1090 1040 1042
##   414270    0    0    0  175  317  320  337  327  308  348  319   72
##   414580  903  855  962 1390 1376 1416 1632 1504 1404 1233  945  975
##   414590 1530 1373 1637 1902 1980 1937 2058 1963 1927 1990 1840 1599
##   414600  620  556  554  872  887  809  801  872  778  728  615  579
##   414610    0    0    0  148  244  236    0    0    0    0    0    0
##   416100    0    0   36  285  330    0    0    0    0    0    0    0
##   416110 1005  883 1061 1269 1546 1250 1336 1348 1113 1163 1075 1061
##   425120  144  132  223  398  466  444  791  692  710  535  374  184
##   429190  890  718  851  847  952  887 1086 1122 1065  983 1013  902
##   429200    0    0    0    0    0    0    0    0    0    0    0    0
##   475171   41   10  279  302  424  491  285  302  174  175  114   86
##   475180  508  680 1148 1112 1206 1020 1210 1087 1090  982  968  709
##   475192    0    0    0  331  319    0    0    0    0    0    0    0
##   475220   94   68  172  242  229  221  217  232  194  208  195   65
##   475241    0    0    0    0    0    0   19    0    0    0    0    0
##   475270  277  238  623  659  670  652  530  476  262  142  217  121
##   475282    0    0    0    0    0    0  356  426  422  434  152    0
##   475284    0    0    0   20  155  150  156  155  150  154  150  155
##   475302  317  336  359  396  313    0   24  241   53  133  275  282
##   475322    0    0    0    0    0    0    0   58  437   10    0    0
##   476550    0    0    0    0    0    0    0    0    0    0    0    0
##   519770  457  411  487  452  703  887  926  945  912  952  895  946
##   519790  467  428  470  439  472  446  617  898  907  917  763  482
##   519800  462  412  475  432  655  907  959  947  914  936  877  933
##   519810  470  440  477  446  438  423  625  949  920  901  889  898
##   572661  213    0    0  365  469  319  465  468  456  455  336  452
##   572662  156    0    0    0  440  444  434  472  459  480  474  480
##   572671    0    0    0  385  454  244    0    0    0    0    0    0
##   572710  712  724 1151 1145 1186 1025  704  641  725  777  723  768
##   572741    0    0    0    0    0    0    0  131  215    0    0    0
##   572742    0    0    0    0    0    0  255  444  449  464  404  252
##   572743  429  384   45    0  427  201  117  214  371  422  250  342
##   572781  444   20   32  439  467  346  456  442  459  439  384  458
##   572800    0    0  115  136  121  194  201  245   86    0    0    0
##   572811  460  407  454  450  455  391  430  151  554  892  780  457
##   572931 2456 2360 2416 2385 2420 2141 2434 2611 2786 2727 2541 2589
##   572951  892  786  468  452  423  268  446  643  899  905  858  871
##   572981  464  396  466  403  408  208   42  481  453  433  442  418
##   572991  797  737  856  745  451  286  463  879  841  875  849  847
##   721290  959 1038 1366 1259 1352 1548 1745 1805 1782 1672  940  853
##   721300  804  864  909  826  684  664  909  900  847  815  781  856
##   721310    0    0    0    0    0    0    0    0    0    0    0    0
##   721320    0    0    0    0    0    0    0    0    0    0    0    0
##   723560 1101 1215 1345 1307 1405 1336 1351 1395 1358 1381 1301 1268
##   723570    0    0    0  120  360  323  355  351  354  368  321  272
##   723580    0    0    0   45  356  298  363  359  339  353   21    0
##   723600    0    0    0  120  361  326  370  363  356  369  360  285
##   723610  471  470  826  768  633  738  696  757  758  559  406  377
##   809320  439  410  461  457  647  465  473  476  452  456  454  448
##   809330    0    0    0    0  135  477  193    0    0    0    0    0
##   809340  891  588  453  434  576  830  909  926  852  900  901  905
##   910200  875  794  955  863 1340 1248 1342 1284 1296 1361 1334 1298
##   910210  913  844  924  887 1353 1309 1364 1377 1357 1406 1367 1373
##   910220  906  811  950  917 1371 1271 1367 1390 1331 1364 1338 1253
##   910230  930  819  935  906 1282 1252 1304 1319 1322 1364 1342 1330
##   910240    0    0    0    0  444  418  458  470  455  483  448  196
##   992250  445  434  460  472  813  923  948  964  942  954  900  933
##   992270  466  439  487  450  676  927  928  956  729  484  441  624
##   992280  431  407  457  441  657  911  922  918  904  916  890  907
##   992290  435  435  476  450  451  414  610  908  903  954  908  909
summary(as.numeric(table(wost$id)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0    1876    5366    6752    9691   29870
months <- split(wost, wost$month)
for (i in 1:length(months)) months[[i]]$id <- factor(months[[i]]$id)
monthLocs <- lapply(months, function(k) {
    k <- nsubset(k, id, 100)
    k$id <- factor(k$id)
    lt <- split(k, k$id)
    set.seed(34138)
    sam <- sample(1:length(lt), 5000, replace = TRUE)
    return(do.call("rbind", lapply(sam, function(x) {
        lt[[x]][sample(1:nrow(lt[[x]]), 1), ]
    })))
})
monthLocsAll <- do.call(rbind, monthLocs)
table(monthLocsAll$month)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12 
## 5000 5000 5000 5000 5000 5000 5000 5000 5000 5000 5000 5000

Here is the result on the map of temperatures in January:

ggplot() + geom_raster(data = data.frame(rasterToPoints(rast)), aes_string(x = "x", 
    y = "y", fill = names(rast)[4])) + geom_sf(data = states, fill = NA, size = 0.2, 
    colour = gray(0.2)) + geom_point(data = data.frame(wost), aes(x = x, y = y), 
    shape = 20, size = 0.8, stroke = 0, colour = "gray") + geom_point(data = subset(data.frame(monthLocsAll), 
    month == 1), aes(x = x, y = y), shape = 15) + coord_sf(xlim = c(-92, -77.5), 
    ylim = c(24.5, 35), expand = FALSE) + scale_fill_viridis(end = 0.8, alpha = 1, 
    option = "plasma") + labs(x = "Latitude", y = "Longitude", fill = expression(`Temperature `(degree ~ 
    C))) + theme(legend.position = "top", legend.justification = "center") + 
    guides(fill = guide_colourbar(barwidth = 15, barheight = 1, title.theme = element_text(size = 26)))
Wood stork location data (blue dots), with January temperatures in the background and January locations in black.

Figure 2.4: Wood stork location data (blue dots), with January temperatures in the background and January locations in black.