Chapter 15 On your own

Congratulations! You made it to the end of the planned content in one peice.

You now have the opportunity to apply some of the skills you have learnt to a completely new database. Cut and paste your code from previous chapters to check the data, write the analysis date frames and explore the dataset.

I will help you read in the data - and correct a mistake I made. Then you are on your own!

15.1 Working with a new dataset

15.1.1 Read in the data and packages

# Load your data 
pro <- read.csv("data/raw_data/your_data/proj.csv", header=T)
# I missed of the project ID from all the dtaframes so we need to update those
pro$project_id <- pro$project_name

img <- read.csv("data/raw_data/your_data/img.csv", header=T)
img$project_id <- pro$project_name

dep <- read.csv("data/raw_data/your_data/dep.csv", header=T)
dep$project_id <- pro$project_name

cam <- read.csv("data/raw_data/your_data/cam.csv", header=T)
cam$project_id <- pro$project_name


# A list of the required packages
list.of.packages <- c("activity", "corrplot", "cowplot", "dplyr",  "elevatr", "gfcanalysis",   "ggplot2", "gridExtra", "iNEXT", "kableExtra", "Hmsc", "leaflet", "lme4", "lubridate", "magrittr", "MCMCvis", "MODISTools", "osmdata", "pals", "plotly", "remotes", "rmarkdown", "sf", "spOccupancy", "stars", "stringr", "terra", "tibble", "tidyr",  "unmarked", "viridis", "jtools", "vegan", "MuMIn")

# A check to see which ones you have and which are missing
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]

# Code which tells R to install the missing packages
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)

15.1.2 Format the dates

# Deployment dates
# start dates
dep$start_date <- ymd(dep$start_date)

# end dates
dep$end_date   <- ymd(dep$end_date)

# camera days
dep$days <- interval(dep$start_date, dep$end_date)/ddays(1)

# Image dates
# Image timestamp
img$timestamp <- ymd_hms(img$timestamp)

15.1.3 Plot and correct spatial co-ordinates

# Pick the category you want to color
category <- "feature_type"

# We first convert this category to a factor with discrete levels
dep[,category] <- factor(dep[,category])
# then use the turbo() function to assign each level a color
col.cat <- turbo(length(levels(dep[,category])))
# then we apply it to the dataframe
dep$colours <- col.cat[dep[,category]]

m <- leaflet() %>%
  addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%  
  addTiles(group="Base") %>%     # Include a basemap option too
  addCircleMarkers(lng=dep$longitude, lat=dep$latitude,
                   # Co lour the markers depending on the 'feature type'
                   color=dep$colours,
                   # Add a popup of the placename and feature_type together 
                   popup=paste(dep$placename, dep[,category])) %>%
  
  # Add a legend explaining what is going on
  addLegend("bottomleft", colors = col.cat,  labels = levels(dep[,category]),
                   title = category,
                   labFormat = labelFormat(prefix = "$"),
                   opacity = 1) %>%
  
  # add a layer control box to toggle between the layers
  addLayersControl(
                    baseGroups = c("Satellite", "Base"))

m

QUESTION What is the survey design? Hint: zoom in.

15.1.4 Check camera spacing

# Distance between traps
camera_locs <- dep %>% 
  dplyr::select(placename, latitude, longitude) %>% 
  unique() %>% # remove duplicated rows (rows where the placename and coordinates match)
  st_as_sf(coords = c("longitude", "latitude"), crs = "+proj=longlat") # Convert to `sf` format

# distance matrix for all cameras
camera_dist <- st_distance(camera_locs) %>% 
                  as.dist() %>% 
                  usedist::dist_setNames(as.character(camera_locs$placename)) %>% 
                  as.matrix()

camera_dist_list <- list()
i <- 1
for(i in 1:nrow(camera_dist))
{
  camera_dist_list[[i]]<- data.frame(placename=row.names(camera_dist)[i], dist= min(camera_dist[i,][camera_dist[i,]!=0]))
}
camera_dist_list <- bind_rows(camera_dist_list)
summary(camera_dist_list$dist)

Do you have anything to be worried about here?

15.1.5 Check all deployments have image data

# Do all deployments have images
table(unique(dep$placename)  %in% unique(img$placename))

15.1.6 5b. Check all images have deployment data

# Do all deployments have images
table(unique(img$placename) %in% unique(dep$placename))

15.1.7 5c. Check deployments occur where you expect

# Call the plot
p <- plot_ly()

# We want a separate row for each 'placename' - so lets turn it into a factor
dep$placename <- as.factor(dep$placename)

# loop through each place name
for(i in seq_along(levels(dep$placename)))
  {
      #Subset the data to just that placename
      tmp <- dep[dep$placename==levels(dep$placename)[i],]
      # Order by date
      tmp <- tmp[order(tmp$start_date),]
      # Loop through each deployment at that placename
      for(j in 1:nrow(tmp))
      {
        # Add a line to 'p'
        p <- add_trace(p, 
                       #Use the start and end date as x coordinates
                       x = c(tmp$start_date[j], tmp$end_date[j]), 
                       #Use the counter for the y coordinates
                       y = c(i,i), 
                       # State the type of chart
                       type="scatter",
                       # make a line that also has points
                       mode = "lines+markers", 
                       # Add the deployment ID as hover text
                       hovertext=tmp$deployment_id[j], 
                       # Color it all black
                       color=I("black"), 
                       # Suppress the legend
                       showlegend = FALSE)
      }
      
  }
# Add a categorical y axis
 p <- p %>%   layout(yaxis = list(

      ticktext = as.list(levels(dep$placename)), 

      tickvals = as.list(1:length(levels(dep$placename))),

      tickmode = "array"))


p

QUESTION What do you think about the camera deployments?

15.1.8 Check images occur within deployments

#Camera activity plot
# Make a separate plot for each 20 stations For each 20 stations
# Order by placename to make the graphs easier to follow
dep <- dep[order(dep$placename),]


# To do this make a plot dataframe
tmp <- data.frame("deployment_id"=unique(dep$deployment_id), "plot_group"=ceiling(1:length(unique(dep$deployment_id))/20))

dep_tmp <- left_join(dep,tmp, by="deployment_id")

for(i in 1:max(dep_tmp$plot_group))
{  
  # Call the plot
  
  #Subset the data to just that placename
  tmp <- dep_tmp[dep_tmp$plot_group==i,]
  # Order by placename 
  tmp <- tmp[order(tmp$placename),]
  p <- plot_ly() 
 # Loop through each deployment at that placename
  for(j in 1:nrow(tmp))
    {
        #Subset the image data
        tmp_img <- img[img$deployment_id==tmp$deployment_id[j],]
        
        if(nrow(tmp_img)>0)
        {
         
          p <- add_trace(p, 
                       #Use the start and end date as x coordinates
                       x = c(ymd_hms(tmp_img$timestamp)), 
                       #Use the counter for the y coordinates
                       y = rep(j, nrow(tmp_img)), 
                       # State the type of chart
                       type="scatter",
                       # make a line that also has points
                       mode = "markers", 
                       # Add the deployment ID as hover text
                       hovertext=paste(tmp_img$genus,tmp_img$species), 
                       # Color it all black
                       marker = list(color = "red"), 
                       # Suppress the legend
                       showlegend = FALSE)
        }
        
       # Add a line to 'p'
        p <- add_trace(p, 
                       #Use the start and end date as x coordinates
                       x = c(tmp$start_date[j], tmp$end_date[j]+days(1)), 
                       #Use the counter for the y coordinates
                       y = c(j,j), 
                       # State the type of chart
                       type="scatter",
                       # make a line that also has points
                       mode = "lines", 
                       # Add the deployment ID as hover text
                       hovertext=tmp$deployment_id[j], 
                       # Color it all black
                       color=I("black"), 
                       # Suppress the legend
                       showlegend = FALSE)
      }
  # Add custom y axis labels  
  p <- p %>%   layout(yaxis = list(

      ticktext = as.list(tmp$deployment_id), 

      tickvals = as.list(1:nrow(tmp)),

      tickmode = "array"))
  
  print(p)
      
  
} 

Note - there are three panels to check!

QUESTION Do you need to make any edits? Note - there are three plot windows to check.

The camera failed for LAC061 - try to seem in and find out the last day it worked. Then update the deployment so the camera stops the day after the last correct date.

# The camera failed for LAC061 
dep$end_date[dep$deployment_id=="LAC061_C186_010921"] <- ymd("2021-09-09")

LAC064 also looks somewhat strange, that would be worth checking what happened. Lets leave it in for now.

We could maybe remove OFF067 and OFF072? Personal choice!

15.1.9 Check species taxonomy

# Create a species list
# add an sp column to the img dataframe - remember the genus and species columns are not pasted together yet
img$sp <- paste(img$genus, img$species, sep=".")
### Species lists

# First define vector of the headings you want to see (we will use this trick a lot later on)
taxonomy_headings <- c("class", "order", "family", "genus", "species", "common_name")

# Subset the image data to just those columns
tmp<- img[,colnames(img)%in% taxonomy_headings]
# Remove duplicates
tmp <- tmp[duplicated(tmp)==F,]

# Create an ordered species list
sp_list  <- tmp[order(tmp$class, tmp$order, tmp$family, tmp$genus, tmp$species),]
sp_list$sp <- paste(sp_list$genus, sp_list$species, sep=".")

write.csv(sp_list, paste0("data/raw_data/",pro$project_id[1],"_raw_species_list.csv"))

QUESTION Are there any classifications you will remove when we create our analysis dataframes?

15.1.10 Check species activity

# Diel activity check

# First lets convert our timestamp to decimal hours
img$hours <- hour(img$timestamp) + minute(img$timestamp)/60 + second(img$timestamp)/(60*60)

# Count all of the captures
tmp <- img %>% group_by(common_name) %>% summarize(count=n())

yform <- list(categoryorder = "array",
              categoryarray = tmp$common_name)

fig <- plot_ly(x = img$hours, y = img$common_name,type="scatter",
               height=1000, text=img$deployment_id, hoverinfo='text',
               mode   = 'markers',
               marker = list(size = 5,
                             color = 'rgba(50, 100, 255, .2)',
                             line = list(color = 'rgba(0, 0, 0, 0)',
                                         width = 0))) %>% 
              layout(yaxis = yform)
fig

# Remove the column
img$hours <- NULL

QUESTION Are there any images you would like to check?

Note - Paca’s are nocturnal but they have some diurnal detections here. Check which station they come from. It might be that they are the same station as the one we excluded previously!

15.1.11 Filter to target species

# Create your processed dataframe folder
dir.create("data/processed_data")

# Filter your species
# Remove observations without animals detected, where we don't know the species, and non-mammals
img_sub <- img %>% filter(is_blank==0,                # Remove the blanks
                          is.na(img$species)==FALSE, # Remove classifications which don't have species 
                          class=="Mammalia",          # Subset to mammals
                          species!="sapiens",
                          species!="")               # remove instances without species labels 
                          
table(img_sub$common_name)

QUESTION Are there any other classifications you would like to remove?

15.1.12 Make effort look_up

##########################################
# Create your daily lookup

# Remove any deployments without end dates
tmp <- dep[is.na(dep$end_date)==F,]

# Create an empty list to store our days
daily_lookup <- list()

# Loop through the deployment dataframe and create a row for every day the camera is active
for(i in 1:nrow(tmp))
{
  if(ymd(tmp$start_date[i])!=ymd(tmp$end_date[i]))
  {
    daily_lookup[[i]] <- data.frame("date"=seq(ymd(tmp$start_date[i]), ymd(tmp$end_date[i]), by="days"), "placename"=tmp$placename[i])
  }
}

# Merge the lists into a dataframe
row_lookup <- bind_rows(daily_lookup)

# Remove duplicates - when start and end days are the same for successive deployments
row_lookup <- row_lookup[duplicated(row_lookup)==F,]

15.1.13 Create independent data

###################################
# Create your independent detections
independent <- 30

# Check for a `group_size` variable? 
table(img_sub$group_size)

# Check for a 'number_of_objects' variable
table(img_sub$number_of_objects)

QUESTION WHich variable will you use for your animal_count?

img_sub$animal_count <- ###INSERT YOUR SELECTION HERE###
# Create your independent data

img_tmp <- img_sub %>%
              arrange(deployment_id) %>%        # Order by deployment_id
              group_by(deployment_id, sp) %>%   # Group species together
              mutate(duration = int_length(timestamp %--% lag(timestamp))) # Calculate the gap bet



library(stringr)
# Give a random value to all cells
img_tmp$event_id <- 9999

# Create a counter
counter <- 1

# Make a unique code that has one more zero than rows in your dataframe  
num_code <- as.numeric(paste0(nrow(img_sub),0))

# Loop through img_tmp - if gap is greater than the threshold -> give it a new event ID
for (i in 2:nrow(img_tmp)) {
  img_tmp$event_id[i-1]  <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
  
  if(is.na(img_tmp$duration[i]) | abs(img_tmp$duration[i]) > (independent * 60))
    {
      counter <- counter + 1
    }
}

# Update the information for the last row - the loop above always updates the previous row... leaving the last row unchanged
   
 # group ID  for the last row
 if(img_tmp$duration[nrow(img_tmp)] < (independent * 60)|
    is.na(img_tmp$duration[nrow(img_tmp)])){
   img_tmp$event_id[nrow(img_tmp)] <- img_tmp$event_id[nrow(img_tmp)-1]
 } else{
   counter <- counter + 1
   img_tmp$event_id[nrow(img_tmp)] <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
 }

# remove the duration column
img_tmp$duration <- NULL
 


 # find out the last and the first of the time in the group
  top <- img_tmp %>% group_by(event_id) %>% top_n(1,timestamp) %>% dplyr::select(event_id, timestamp)
  bot <- img_tmp %>% group_by(event_id) %>% top_n(-1,timestamp) %>% dplyr::select(event_id, timestamp)
  names(bot)[2] <- c("timestamp_end")
  
  img_num <- img_tmp %>% group_by(event_id) %>% summarise(event_observations=n()) # number of images in the event
  event_grp <- img_tmp %>% group_by(event_id) %>% summarise(event_groupsize=max(animal_count))

  # calculate the duration and add the other elements
  diff <-  top %>% left_join(bot, by="event_id") %>%
      mutate(event_duration=abs(int_length(timestamp %--% timestamp_end))) %>%
      left_join(event_grp, by="event_id")%>%
      left_join(img_num, by="event_id")

  # Remove columns you don't need
  diff$timestamp   <-NULL
  diff$timestamp_end <-NULL
  diff <- diff[duplicated(diff)==F,]
    # Merge the img_tmp with the event data
  img_tmp <- 
   left_join(img_tmp,diff,by="event_id")
  
  
  # Remove duplicates
ind_dat <- img_tmp[duplicated(img_tmp$event_id)==F,]

# Make a  unique code for ever day and deployment where cameras were functioning
tmp <- paste(row_lookup$date, row_lookup$placename)

#Subset ind_dat to data that matches the unique codes
ind_dat <- ind_dat[paste(substr(ind_dat$timestamp,1,10), ind_dat$placename) %in% tmp, ]

# Convert your species names to factors
ind_dat$sp <- as.factor(ind_dat$sp)

# Export your data frames

write.csv(ind_dat, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_detections.csv"), row.names = F)

# also write the cleaned all detections file (some activity analyses require it)
write.csv(img_tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_raw_detections.csv"), row.names = F)

write.csv(row_lookup, paste0("data/processed_data/",ind_dat$project_id[1], "_daily_lookup.csv"), row.names = F)

#Subset the columns
tmp <- dep[, c("project_id", "placename", "longitude", "latitude", "feature_type")]

# Remove duplicated rows
tmp<- tmp[duplicated(tmp)==F,]

# write the file
write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_camera_locations.csv"), row.names = F)


tmp <- sp_list[sp_list$sp %in% ind_dat$sp,]

write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_species_list.csv"), row.names = F)

15.1.14 Create analysis dataframes

In the next step, we won’t create the daily dataframe as it takes too long!

# Total counts
  # Station / Month / deport / Species      
  tmp <- row_lookup
  
  # Calculate the number of days at each site  
  total_obs <- tmp %>% 
      group_by(placename) %>%
      summarise(days = n())
  
  # Convert to a data frame
  total_obs <- as.data.frame(total_obs)
  
  # Add columns for each species  
  total_obs[, levels(ind_dat$sp)] <- NA
  # Duplicate for counts
  total_count <- total_obs
  # Test counter
  i <-1
  # For each station, count the number of individuals/observations
  for(i in 1:nrow(total_obs))
    {
      tmp <- ind_dat[ind_dat$placename==total_obs$placename[i],]
      
      tmp_stats <- tmp %>%  group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
      
      total_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
      total_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
    }

  
# Save them
    
write.csv(total_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_observations.csv"), row.names = F) 

write.csv(total_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_counts.csv"), row.names = F) 


# Monthly counts
  # Station / Month / days / Covariates / Species      
  tmp <- row_lookup
  # Simplify the date to monthly
  tmp$date <- substr(tmp$date,1,7)
  
  # Calculate the number of days in each month  
  mon_obs <- tmp %>% 
      group_by(placename,date ) %>%
      summarise(days = n())
  # Convert to a data frame
  mon_obs <- as.data.frame(mon_obs)
    
  mon_obs[, levels(ind_dat$sp)] <- NA
  mon_count <- mon_obs
  # For each month, count the number of individuals/observations
  for(i in 1:nrow(mon_obs))
    {
      tmp <- ind_dat[ind_dat$placename==mon_obs$placename[i] & substr(ind_dat$timestamp,1,7)== mon_obs$date[i],]
      
      tmp_stats <- tmp %>%  group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
      
      mon_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
      mon_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
      
    }

  
write.csv(mon_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_observations.csv"), row.names = F) 

write.csv(mon_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_counts.csv"), row.names = F) 

### Weekly 

# Weekly format
  # Station / Month / days / Covariates / Species      
  tmp <- row_lookup
  # Simplify the date to year-week
  tmp$date <- strftime(tmp$date, format = "%Y-W%U")
  # The way this is coded is the counter W01 starts at the first Sunday of the year, everything before that is W00. Weeks do not roll across years.
  
  # Calculate the number of days in each week  
  week_obs <- tmp %>% 
      group_by(placename,date ) %>%
      summarise(days = n())
  
  # Convert to a data frame
  week_obs <- as.data.frame(week_obs)
  
  # Add species columns  
  week_obs[, levels(ind_dat$sp)] <- NA
  
  # Duplicate for counts
  week_count <- week_obs
  
  # For each week, count the number of individuals/observations
  for(i in 1:nrow(week_obs))
    {
      tmp <- ind_dat[ind_dat$placename==week_obs$placename[i] & strftime(ind_dat$timestamp, format = "%Y-W%U")== week_obs$date[i],]
      
      tmp_stats <- tmp %>%  group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
      
      week_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
      week_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
      
    }

write.csv(week_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_observations.csv"), row.names = F) 

write.csv(week_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_counts.csv"), row.names = F) 

15.1.15 Check analysis dataframe counts

# Do an final check of the counts


tmp <- cbind(data.frame("Time"=c("Total", "Monthly", "Weekly")),
rbind(colSums(total_obs[,2:ncol(total_obs)]),
colSums(mon_obs[,3:ncol(mon_obs)]),
colSums(week_obs[,3:ncol(week_obs)])))

tmp %>%
  kbl() %>%
  kable_styling(full_width = T) %>%
  column_spec(1, bold = T, border_right = T)%>% 
  kableExtra::scroll_box(width = "100%")

15.2 Add your covariates

# LOad the packages
library(kableExtra);library(dplyr); library(sf); library(MODISTools); library(lubridate); library(corrplot); library(traitdata); library(terra); library(osmdata); library(elevatr)

15.2.1 Species Traits

# Start by reading in your species list
sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
locs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations.csv"), header=T)

# Species traits
library(traitdata)
data("elton_mammals")
elton_mammals$sp <- paste0(elton_mammals$Genus,"." ,elton_mammals$Species)

tmp <- elton_mammals[, c("sp","BodyMass.Value", "Activity.Nocturnal", "Activity.Crepuscular",   "Activity.Diurnal")]

# Lets rename the columns to make them more usable
tmp <- tmp %>% rename(
              mass_g = BodyMass.Value,
              act_noct = Activity.Nocturnal,
              act_crep = Activity.Crepuscular,
              act_diur = Activity.Diurnal)

sp_summary <- left_join(sp_summary, tmp)

QUESTION Are there any missing values you need to fill in?

Dont worry about adding it right now - carry on!

write.csv(sp_summary, paste0("data/processed_data/", locs$project_id[1],"_species_list.csv"), row.names = F)

15.3 Spatial data

15.3.1 Convert to simple features

# Spatial covariates
locs_sf <- st_as_sf(locs,                              # We specify the dataframe 
                    coords=c("longitude", "latitude"), # The XY coordinates
                    crs=4326)                          # And the projection code

15.3.2 Get elevation data

###################
# Elevation 

library(elevatr)
locs_sf <- get_elev_point(locs_sf, 
                          src="aws", #Amazon Web Service Terrain Tiles - available globally
                          z = 12)  # z specifies the zoom level, the lower the value the faster the code runs, but the coarser the elevation values are

boxplot(locs_sf$elevation)

15.3.3 Open street map data

#######
# Distance to water (there are no roads here)

library(osmdata)
aoi <- st_bbox(st_buffer(locs_sf, 10000)) # Units are in meters 

water <- opq(aoi) %>%
           add_osm_feature(key="water") %>%
           osmdata_sf()

# THERE IS NO OPEN STREET MAP DATA FOR THIS AREA - ARGH

15.3.4 NDVI

#######
# NDVI

library(MODISTools)
modis_locs <- locs %>% 
  select("placename", "longitude", "latitude") %>% 
  rename(site_name=placename, lat=latitude, lon=longitude)


# list available dates for a product at a location
dates <- mt_dates(product = "MOD13Q1", lat = modis_locs$lat[1], lon = modis_locs$lon[1]) #MOD15A2H

# Get the first and last date!
first(dates$calendar_date); last(dates$calendar_date)


site_ndvi <- mt_batch_subset(product = "MOD13Q1",
                              df=modis_locs,
                              band = "250m_16_days_NDVI",
                              start = "2022-01-01",
                              end = "2022-02-28",
                              km_lr = 0,         # Use these options if you want to buffer the value (km left)
                              km_ab = 0,         # Use these options if you want to buffer the value (km above)
                              internal = TRUE)

ndvi_simple <- site_ndvi %>% 
  select(   site, band, calendar_date, value) %>% 
  rename(placename=site)

tmp <- ndvi_simple %>%             #Take the NDVI layer
  group_by(placename) %>%          # Group observations by the placename
  summarize(mean_ndvi=mean(value)) # Take the mean of the values and call the new column `mean_ndvi`

# Add the new data to our locations dataframe
locs_sf <- left_join(locs_sf, tmp)

# Convert it back to a dataframe
locs_sf$geometry <- NULL

locs <- left_join(locs, locs_sf)

# Write the dataset

write.csv(locs, paste0("data/processed_data/", locs$project_id[1],"_camera_locations_and_covariates.csv"), row.names=F)

15.4 Data exploration

15.4.1 Final map

list.of.packages <- c("kableExtra", "tidyr", "leaflet", "dplyr", "viridis", "corrplot", "lubridate", "plotly")

new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)

# Final locations plot

locs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations_and_covariates.csv"))

# If you want to color by a category do it here:
category <- "feature_type"
# First lets choose a category to color
locs[,category] <- factor(locs[,category])
col.cat <- turbo(length(levels(locs[,category])))
# Add it to the dataframe
locs$colours <- col.cat[locs[,category]]

m <- leaflet() %>%
  # Add a satellite image layer
  addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%  
  addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%     
  addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
                   # Color the markers depending on the 'feature type'
                   color=locs$colours,
                   # Add a popup of the deployment code 
                   popup=paste(locs$placename, locs[,category])) %>%
  # Add a legend explaining what is going on
  addLegend("bottomleft", colors = col.cat,  labels = levels(locs[,category]),
    title = category,
    labFormat = labelFormat(prefix = "$"),
    opacity = 1
  ) %>%
  # add a layer control box to toggle between the layers
  addLayersControl(
    baseGroups = c("Satellite", "Base"),
    options = layersControlOptions(collapsed = FALSE)
  )
m

15.4.2 Detection summary

sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
total_obs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_total_observations.csv"), header=T)

# Convert to long
long_obs <- total_obs %>% 
  pivot_longer(cols=sp_summary$sp,  # The columns we want to create into rows - species
               names_to="sp",       # What we what the number column to be called
               values_to = "count") # Takes the values in the species columns and calls them `count`


# We can them summaries those using dplyr
tmp <- long_obs %>%                   # Take the long observation data frame `long_obs` 
          group_by(sp) %>%            # Group by species
          summarise(count=sum(count)) # Sum all the independent observations

# Add it to the sp_summary dataframe
sp_summary <- left_join(sp_summary, tmp)

## Occupancy
# We use the mutate function to mutate the column
total_binary <-  total_obs %>%    # The total obs dataframe              
                    mutate(across(sp_summary$sp, ~+as.logical(.x)))  # across all of the species columns, make it binary

# Flip the dataframe to longer - as before
long_bin <- total_binary %>% 
  pivot_longer(cols=sp_summary$sp, names_to="sp", values_to = "count") # Takes the species names columns, and makes them unique rows with "sp" as the key 

# We can now sum the presence/absences and divide by the number of survey locations
tmp <- long_bin %>% 
  group_by(sp) %>% 
  summarise(occupancy=sum(count)/nrow(locs)) # divided the sum by the number of sites

# add the results to the sp_summary
sp_summary <- left_join(sp_summary, tmp)


###########################
# Comparison plot
# Lets put the dataframes in a sensible order
sp_summary <- sp_summary[order(sp_summary$count),]

yform <- list(categoryorder = "array",
              categoryarray = sp_summary$sp)

xform <- list(title="Captures")

# Capture rate
fig1 <- plot_ly(x = sp_summary$count, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>% 
 layout(yaxis = yform, xaxis=xform)

yform <- list(categoryorder = "array",
              categoryarray = sp_summary$sp,
              showticklabels=F)
xform <- list(title="Occupancy")


# Occupancy
fig2 <- plot_ly(x = sp_summary$occupancy, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>% 
 layout(yaxis = yform, xaxis=xform)

subplot(nrows=1,fig1, fig2, titleX = T) # We could stack them on top of one another using nrows=2

QUESTION What does this tell you about the behavior of the target species?

15.4.3 Temporal patterns

par(mar=c(5,4,1,1))
# Temporal patterns
mon_obs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_monthly_observations.csv"), header=T)

# Count up the number of stations and the number of camera nights
mon_summary <- mon_obs %>%                  # Use the monthly observations dataframe
            group_by(date) %>%              # Group by the date
            summarise(locs_active=n(),      # Count the number of active cameras
                      cam_days=sum(days))   # And sum the active days 


# Add in the species specific counts - and join it with the mon_summary dataframe
mon_summary <- mon_obs %>% 
                group_by(date) %>%  
                summarise(across(sp_summary$sp, sum, na.rm=TRUE)) %>% # summarise across all of 
                                                                      # the species columns 
                left_join(x=mon_summary)   # Join with the mon_summary dataframe

# We first need to convert the date column to a date object
mon_summary$date <- ym(mon_summary$date)

# Set up a two panel plot (side by side)
par(mfrow=c(1,2))

plot(mon_summary$date, mon_summary$locs_active,
     type="o", 
     pch=19,
     ylim=c(0, max(mon_summary$locs_active)),
     las=1, 
     ylab="Number of cameras active", xlab="Date")


# Sum all the captures rates for the species columns
mon_summary$all.sp <- rowSums(mon_summary[, sp_summary$sp])

# Plot them
plot(mon_summary$date, mon_summary$all.sp/(mon_summary$cam_days/100),
     type="o",
     pch=19,
     las=1, ylab="Detections per 100 cam days", xlab="Date")

Question What is going on in these plots?

# Species specific plots

par(mfrow=c(2,2))
i <- 1
for(i in 1:length(sp_summary$sp))
{
  plot(mon_summary$date, pull(mon_summary, sp_summary$sp[i])/(mon_summary$cam_days/100),  # The pull command allows you to grab a specific column in a dataframe and turn it into a vector!
     type="o",
     pch=19,
     las=1, ylab="Detections per 100 cam days", xlab="Date",
     main=sp_summary$sp[i])
}

Question Any interesting patterns?

15.4.4 Spatial patterns

# Spatial plots

total_obs <- left_join(total_obs, locs)

# Jaguar
focal_species <- "Panthera.onca"

focal_cr <- pull(total_obs, focal_species)/(total_obs$days/100)

m <- leaflet() %>%
  addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%     
  addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
                   # Add a popup of the deployment code 
                   popup=paste(locs$placename),
                   radius=(focal_cr/max(focal_cr)*10)+1, stroke=F,
                   fillOpacity=0.6) 
m

Try other species!

Question Any interesting patterns?

15.4.5 Species co-occurance

# Co-occurances
par(mfrow=c(1,1))

# Pull the data for each of the species from 
tmp <- total_obs[, sp_summary$sp]
M <- cor(tmp)

corrplot(M, method="color", 
         type="upper", 
         order="hclust",
         # addCoef.col = "black", # We suppress the coefs to make a cleaner plot
         tl.col="black", tl.srt=45, #Text label color and rotation
         diag=FALSE
         )

Question Any interesting patterns?

You can now do some data exploration plots.

# Prepare the data
locs <- locs %>% 
            mutate_if(is.character,as.factor) # If a column is a character string, make it a factor

total_obs <- left_join(total_obs, locs)

We have the following species:

sp_summary$sp

And the following categories to explore:

 c("feature_type", "elevation", "mean_ndvi")