Chapter 15 On your own
Congratulations! You made it to the end of the planned content in one peice.
You now have the opportunity to apply some of the skills you have learnt to a completely new database. Cut and paste your code from previous chapters to check the data, write the analysis date frames and explore the dataset.
I will help you read in the data - and correct a mistake I made. Then you are on your own!
15.1 Working with a new dataset
15.1.1 Read in the data and packages
# Load your data
pro <- read.csv("data/raw_data/your_data/proj.csv", header=T)
# I missed of the project ID from all the dtaframes so we need to update those
pro$project_id <- pro$project_name
img <- read.csv("data/raw_data/your_data/img.csv", header=T)
img$project_id <- pro$project_name
dep <- read.csv("data/raw_data/your_data/dep.csv", header=T)
dep$project_id <- pro$project_name
cam <- read.csv("data/raw_data/your_data/cam.csv", header=T)
cam$project_id <- pro$project_name
# A list of the required packages
list.of.packages <- c("activity", "corrplot", "cowplot", "dplyr", "elevatr", "gfcanalysis", "ggplot2", "gridExtra", "iNEXT", "kableExtra", "Hmsc", "leaflet", "lme4", "lubridate", "magrittr", "MCMCvis", "MODISTools", "osmdata", "pals", "plotly", "remotes", "rmarkdown", "sf", "spOccupancy", "stars", "stringr", "terra", "tibble", "tidyr", "unmarked", "viridis", "jtools", "vegan", "MuMIn")
# A check to see which ones you have and which are missing
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
# Code which tells R to install the missing packages
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
15.1.3 Plot and correct spatial co-ordinates
# Pick the category you want to color
category <- "feature_type"
# We first convert this category to a factor with discrete levels
dep[,category] <- factor(dep[,category])
# then use the turbo() function to assign each level a color
col.cat <- turbo(length(levels(dep[,category])))
# then we apply it to the dataframe
dep$colours <- col.cat[dep[,category]]
m <- leaflet() %>%
addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%
addTiles(group="Base") %>% # Include a basemap option too
addCircleMarkers(lng=dep$longitude, lat=dep$latitude,
# Co lour the markers depending on the 'feature type'
color=dep$colours,
# Add a popup of the placename and feature_type together
popup=paste(dep$placename, dep[,category])) %>%
# Add a legend explaining what is going on
addLegend("bottomleft", colors = col.cat, labels = levels(dep[,category]),
title = category,
labFormat = labelFormat(prefix = "$"),
opacity = 1) %>%
# add a layer control box to toggle between the layers
addLayersControl(
baseGroups = c("Satellite", "Base"))
m
QUESTION What is the survey design? Hint: zoom in.
15.1.4 Check camera spacing
# Distance between traps
camera_locs <- dep %>%
dplyr::select(placename, latitude, longitude) %>%
unique() %>% # remove duplicated rows (rows where the placename and coordinates match)
st_as_sf(coords = c("longitude", "latitude"), crs = "+proj=longlat") # Convert to `sf` format
# distance matrix for all cameras
camera_dist <- st_distance(camera_locs) %>%
as.dist() %>%
usedist::dist_setNames(as.character(camera_locs$placename)) %>%
as.matrix()
camera_dist_list <- list()
i <- 1
for(i in 1:nrow(camera_dist))
{
camera_dist_list[[i]]<- data.frame(placename=row.names(camera_dist)[i], dist= min(camera_dist[i,][camera_dist[i,]!=0]))
}
camera_dist_list <- bind_rows(camera_dist_list)
summary(camera_dist_list$dist)
Do you have anything to be worried about here?
15.1.7 5c. Check deployments occur where you expect
# Call the plot
p <- plot_ly()
# We want a separate row for each 'placename' - so lets turn it into a factor
dep$placename <- as.factor(dep$placename)
# loop through each place name
for(i in seq_along(levels(dep$placename)))
{
#Subset the data to just that placename
tmp <- dep[dep$placename==levels(dep$placename)[i],]
# Order by date
tmp <- tmp[order(tmp$start_date),]
# Loop through each deployment at that placename
for(j in 1:nrow(tmp))
{
# Add a line to 'p'
p <- add_trace(p,
#Use the start and end date as x coordinates
x = c(tmp$start_date[j], tmp$end_date[j]),
#Use the counter for the y coordinates
y = c(i,i),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "lines+markers",
# Add the deployment ID as hover text
hovertext=tmp$deployment_id[j],
# Color it all black
color=I("black"),
# Suppress the legend
showlegend = FALSE)
}
}
# Add a categorical y axis
p <- p %>% layout(yaxis = list(
ticktext = as.list(levels(dep$placename)),
tickvals = as.list(1:length(levels(dep$placename))),
tickmode = "array"))
p
QUESTION What do you think about the camera deployments?
15.1.8 Check images occur within deployments
#Camera activity plot
# Make a separate plot for each 20 stations For each 20 stations
# Order by placename to make the graphs easier to follow
dep <- dep[order(dep$placename),]
# To do this make a plot dataframe
tmp <- data.frame("deployment_id"=unique(dep$deployment_id), "plot_group"=ceiling(1:length(unique(dep$deployment_id))/20))
dep_tmp <- left_join(dep,tmp, by="deployment_id")
for(i in 1:max(dep_tmp$plot_group))
{
# Call the plot
#Subset the data to just that placename
tmp <- dep_tmp[dep_tmp$plot_group==i,]
# Order by placename
tmp <- tmp[order(tmp$placename),]
p <- plot_ly()
# Loop through each deployment at that placename
for(j in 1:nrow(tmp))
{
#Subset the image data
tmp_img <- img[img$deployment_id==tmp$deployment_id[j],]
if(nrow(tmp_img)>0)
{
p <- add_trace(p,
#Use the start and end date as x coordinates
x = c(ymd_hms(tmp_img$timestamp)),
#Use the counter for the y coordinates
y = rep(j, nrow(tmp_img)),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "markers",
# Add the deployment ID as hover text
hovertext=paste(tmp_img$genus,tmp_img$species),
# Color it all black
marker = list(color = "red"),
# Suppress the legend
showlegend = FALSE)
}
# Add a line to 'p'
p <- add_trace(p,
#Use the start and end date as x coordinates
x = c(tmp$start_date[j], tmp$end_date[j]+days(1)),
#Use the counter for the y coordinates
y = c(j,j),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "lines",
# Add the deployment ID as hover text
hovertext=tmp$deployment_id[j],
# Color it all black
color=I("black"),
# Suppress the legend
showlegend = FALSE)
}
# Add custom y axis labels
p <- p %>% layout(yaxis = list(
ticktext = as.list(tmp$deployment_id),
tickvals = as.list(1:nrow(tmp)),
tickmode = "array"))
print(p)
}
Note - there are three panels to check!
QUESTION Do you need to make any edits? Note - there are three plot windows to check.
The camera failed for LAC061 - try to seem in and find out the last day it worked. Then update the deployment so the camera stops the day after the last correct date.
# The camera failed for LAC061
dep$end_date[dep$deployment_id=="LAC061_C186_010921"] <- ymd("2021-09-09")
LAC064 also looks somewhat strange, that would be worth checking what happened. Lets leave it in for now.
We could maybe remove OFF067 and OFF072? Personal choice!
15.1.9 Check species taxonomy
# Create a species list
# add an sp column to the img dataframe - remember the genus and species columns are not pasted together yet
img$sp <- paste(img$genus, img$species, sep=".")
### Species lists
# First define vector of the headings you want to see (we will use this trick a lot later on)
taxonomy_headings <- c("class", "order", "family", "genus", "species", "common_name")
# Subset the image data to just those columns
tmp<- img[,colnames(img)%in% taxonomy_headings]
# Remove duplicates
tmp <- tmp[duplicated(tmp)==F,]
# Create an ordered species list
sp_list <- tmp[order(tmp$class, tmp$order, tmp$family, tmp$genus, tmp$species),]
sp_list$sp <- paste(sp_list$genus, sp_list$species, sep=".")
write.csv(sp_list, paste0("data/raw_data/",pro$project_id[1],"_raw_species_list.csv"))
QUESTION Are there any classifications you will remove when we create our analysis dataframes?
15.1.10 Check species activity
# Diel activity check
# First lets convert our timestamp to decimal hours
img$hours <- hour(img$timestamp) + minute(img$timestamp)/60 + second(img$timestamp)/(60*60)
# Count all of the captures
tmp <- img %>% group_by(common_name) %>% summarize(count=n())
yform <- list(categoryorder = "array",
categoryarray = tmp$common_name)
fig <- plot_ly(x = img$hours, y = img$common_name,type="scatter",
height=1000, text=img$deployment_id, hoverinfo='text',
mode = 'markers',
marker = list(size = 5,
color = 'rgba(50, 100, 255, .2)',
line = list(color = 'rgba(0, 0, 0, 0)',
width = 0))) %>%
layout(yaxis = yform)
fig
# Remove the column
img$hours <- NULL
QUESTION Are there any images you would like to check?
Note - Paca’s are nocturnal but they have some diurnal detections here. Check which station they come from. It might be that they are the same station as the one we excluded previously!
15.1.11 Filter to target species
# Create your processed dataframe folder
dir.create("data/processed_data")
# Filter your species
# Remove observations without animals detected, where we don't know the species, and non-mammals
img_sub <- img %>% filter(is_blank==0, # Remove the blanks
is.na(img$species)==FALSE, # Remove classifications which don't have species
class=="Mammalia", # Subset to mammals
species!="sapiens",
species!="") # remove instances without species labels
table(img_sub$common_name)
QUESTION Are there any other classifications you would like to remove?
15.1.12 Make effort look_up
##########################################
# Create your daily lookup
# Remove any deployments without end dates
tmp <- dep[is.na(dep$end_date)==F,]
# Create an empty list to store our days
daily_lookup <- list()
# Loop through the deployment dataframe and create a row for every day the camera is active
for(i in 1:nrow(tmp))
{
if(ymd(tmp$start_date[i])!=ymd(tmp$end_date[i]))
{
daily_lookup[[i]] <- data.frame("date"=seq(ymd(tmp$start_date[i]), ymd(tmp$end_date[i]), by="days"), "placename"=tmp$placename[i])
}
}
# Merge the lists into a dataframe
row_lookup <- bind_rows(daily_lookup)
# Remove duplicates - when start and end days are the same for successive deployments
row_lookup <- row_lookup[duplicated(row_lookup)==F,]
15.1.13 Create independent data
###################################
# Create your independent detections
independent <- 30
# Check for a `group_size` variable?
table(img_sub$group_size)
# Check for a 'number_of_objects' variable
table(img_sub$number_of_objects)
QUESTION WHich variable will you use for your animal_count
?
# Create your independent data
img_tmp <- img_sub %>%
arrange(deployment_id) %>% # Order by deployment_id
group_by(deployment_id, sp) %>% # Group species together
mutate(duration = int_length(timestamp %--% lag(timestamp))) # Calculate the gap bet
library(stringr)
# Give a random value to all cells
img_tmp$event_id <- 9999
# Create a counter
counter <- 1
# Make a unique code that has one more zero than rows in your dataframe
num_code <- as.numeric(paste0(nrow(img_sub),0))
# Loop through img_tmp - if gap is greater than the threshold -> give it a new event ID
for (i in 2:nrow(img_tmp)) {
img_tmp$event_id[i-1] <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
if(is.na(img_tmp$duration[i]) | abs(img_tmp$duration[i]) > (independent * 60))
{
counter <- counter + 1
}
}
# Update the information for the last row - the loop above always updates the previous row... leaving the last row unchanged
# group ID for the last row
if(img_tmp$duration[nrow(img_tmp)] < (independent * 60)|
is.na(img_tmp$duration[nrow(img_tmp)])){
img_tmp$event_id[nrow(img_tmp)] <- img_tmp$event_id[nrow(img_tmp)-1]
} else{
counter <- counter + 1
img_tmp$event_id[nrow(img_tmp)] <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
}
# remove the duration column
img_tmp$duration <- NULL
# find out the last and the first of the time in the group
top <- img_tmp %>% group_by(event_id) %>% top_n(1,timestamp) %>% dplyr::select(event_id, timestamp)
bot <- img_tmp %>% group_by(event_id) %>% top_n(-1,timestamp) %>% dplyr::select(event_id, timestamp)
names(bot)[2] <- c("timestamp_end")
img_num <- img_tmp %>% group_by(event_id) %>% summarise(event_observations=n()) # number of images in the event
event_grp <- img_tmp %>% group_by(event_id) %>% summarise(event_groupsize=max(animal_count))
# calculate the duration and add the other elements
diff <- top %>% left_join(bot, by="event_id") %>%
mutate(event_duration=abs(int_length(timestamp %--% timestamp_end))) %>%
left_join(event_grp, by="event_id")%>%
left_join(img_num, by="event_id")
# Remove columns you don't need
diff$timestamp <-NULL
diff$timestamp_end <-NULL
diff <- diff[duplicated(diff)==F,]
# Merge the img_tmp with the event data
img_tmp <-
left_join(img_tmp,diff,by="event_id")
# Remove duplicates
ind_dat <- img_tmp[duplicated(img_tmp$event_id)==F,]
# Make a unique code for ever day and deployment where cameras were functioning
tmp <- paste(row_lookup$date, row_lookup$placename)
#Subset ind_dat to data that matches the unique codes
ind_dat <- ind_dat[paste(substr(ind_dat$timestamp,1,10), ind_dat$placename) %in% tmp, ]
# Convert your species names to factors
ind_dat$sp <- as.factor(ind_dat$sp)
# Export your data frames
write.csv(ind_dat, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_detections.csv"), row.names = F)
# also write the cleaned all detections file (some activity analyses require it)
write.csv(img_tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_raw_detections.csv"), row.names = F)
write.csv(row_lookup, paste0("data/processed_data/",ind_dat$project_id[1], "_daily_lookup.csv"), row.names = F)
#Subset the columns
tmp <- dep[, c("project_id", "placename", "longitude", "latitude", "feature_type")]
# Remove duplicated rows
tmp<- tmp[duplicated(tmp)==F,]
# write the file
write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_camera_locations.csv"), row.names = F)
tmp <- sp_list[sp_list$sp %in% ind_dat$sp,]
write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_species_list.csv"), row.names = F)
15.1.14 Create analysis dataframes
In the next step, we won’t create the daily dataframe as it takes too long!
# Total counts
# Station / Month / deport / Species
tmp <- row_lookup
# Calculate the number of days at each site
total_obs <- tmp %>%
group_by(placename) %>%
summarise(days = n())
# Convert to a data frame
total_obs <- as.data.frame(total_obs)
# Add columns for each species
total_obs[, levels(ind_dat$sp)] <- NA
# Duplicate for counts
total_count <- total_obs
# Test counter
i <-1
# For each station, count the number of individuals/observations
for(i in 1:nrow(total_obs))
{
tmp <- ind_dat[ind_dat$placename==total_obs$placename[i],]
tmp_stats <- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
total_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
total_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
}
# Save them
write.csv(total_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_observations.csv"), row.names = F)
write.csv(total_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_counts.csv"), row.names = F)
# Monthly counts
# Station / Month / days / Covariates / Species
tmp <- row_lookup
# Simplify the date to monthly
tmp$date <- substr(tmp$date,1,7)
# Calculate the number of days in each month
mon_obs <- tmp %>%
group_by(placename,date ) %>%
summarise(days = n())
# Convert to a data frame
mon_obs <- as.data.frame(mon_obs)
mon_obs[, levels(ind_dat$sp)] <- NA
mon_count <- mon_obs
# For each month, count the number of individuals/observations
for(i in 1:nrow(mon_obs))
{
tmp <- ind_dat[ind_dat$placename==mon_obs$placename[i] & substr(ind_dat$timestamp,1,7)== mon_obs$date[i],]
tmp_stats <- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
mon_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
mon_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
}
write.csv(mon_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_observations.csv"), row.names = F)
write.csv(mon_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_counts.csv"), row.names = F)
### Weekly
# Weekly format
# Station / Month / days / Covariates / Species
tmp <- row_lookup
# Simplify the date to year-week
tmp$date <- strftime(tmp$date, format = "%Y-W%U")
# The way this is coded is the counter W01 starts at the first Sunday of the year, everything before that is W00. Weeks do not roll across years.
# Calculate the number of days in each week
week_obs <- tmp %>%
group_by(placename,date ) %>%
summarise(days = n())
# Convert to a data frame
week_obs <- as.data.frame(week_obs)
# Add species columns
week_obs[, levels(ind_dat$sp)] <- NA
# Duplicate for counts
week_count <- week_obs
# For each week, count the number of individuals/observations
for(i in 1:nrow(week_obs))
{
tmp <- ind_dat[ind_dat$placename==week_obs$placename[i] & strftime(ind_dat$timestamp, format = "%Y-W%U")== week_obs$date[i],]
tmp_stats <- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
week_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$obs
week_count[i,as.character(tmp_stats$sp)] <- tmp_stats$count
}
write.csv(week_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_observations.csv"), row.names = F)
write.csv(week_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_counts.csv"), row.names = F)
15.1.15 Check analysis dataframe counts
# Do an final check of the counts
tmp <- cbind(data.frame("Time"=c("Total", "Monthly", "Weekly")),
rbind(colSums(total_obs[,2:ncol(total_obs)]),
colSums(mon_obs[,3:ncol(mon_obs)]),
colSums(week_obs[,3:ncol(week_obs)])))
tmp %>%
kbl() %>%
kable_styling(full_width = T) %>%
column_spec(1, bold = T, border_right = T)%>%
kableExtra::scroll_box(width = "100%")
15.2 Add your covariates
# LOad the packages
library(kableExtra);library(dplyr); library(sf); library(MODISTools); library(lubridate); library(corrplot); library(traitdata); library(terra); library(osmdata); library(elevatr)
15.2.1 Species Traits
# Start by reading in your species list
sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
locs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations.csv"), header=T)
# Species traits
library(traitdata)
data("elton_mammals")
elton_mammals$sp <- paste0(elton_mammals$Genus,"." ,elton_mammals$Species)
tmp <- elton_mammals[, c("sp","BodyMass.Value", "Activity.Nocturnal", "Activity.Crepuscular", "Activity.Diurnal")]
# Lets rename the columns to make them more usable
tmp <- tmp %>% rename(
mass_g = BodyMass.Value,
act_noct = Activity.Nocturnal,
act_crep = Activity.Crepuscular,
act_diur = Activity.Diurnal)
sp_summary <- left_join(sp_summary, tmp)
QUESTION Are there any missing values you need to fill in?
Dont worry about adding it right now - carry on!
15.3 Spatial data
15.3.2 Get elevation data
###################
# Elevation
library(elevatr)
locs_sf <- get_elev_point(locs_sf,
src="aws", #Amazon Web Service Terrain Tiles - available globally
z = 12) # z specifies the zoom level, the lower the value the faster the code runs, but the coarser the elevation values are
boxplot(locs_sf$elevation)
15.3.4 NDVI
#######
# NDVI
library(MODISTools)
modis_locs <- locs %>%
select("placename", "longitude", "latitude") %>%
rename(site_name=placename, lat=latitude, lon=longitude)
# list available dates for a product at a location
dates <- mt_dates(product = "MOD13Q1", lat = modis_locs$lat[1], lon = modis_locs$lon[1]) #MOD15A2H
# Get the first and last date!
first(dates$calendar_date); last(dates$calendar_date)
site_ndvi <- mt_batch_subset(product = "MOD13Q1",
df=modis_locs,
band = "250m_16_days_NDVI",
start = "2022-01-01",
end = "2022-02-28",
km_lr = 0, # Use these options if you want to buffer the value (km left)
km_ab = 0, # Use these options if you want to buffer the value (km above)
internal = TRUE)
ndvi_simple <- site_ndvi %>%
select( site, band, calendar_date, value) %>%
rename(placename=site)
tmp <- ndvi_simple %>% #Take the NDVI layer
group_by(placename) %>% # Group observations by the placename
summarize(mean_ndvi=mean(value)) # Take the mean of the values and call the new column `mean_ndvi`
# Add the new data to our locations dataframe
locs_sf <- left_join(locs_sf, tmp)
# Convert it back to a dataframe
locs_sf$geometry <- NULL
locs <- left_join(locs, locs_sf)
# Write the dataset
write.csv(locs, paste0("data/processed_data/", locs$project_id[1],"_camera_locations_and_covariates.csv"), row.names=F)
15.4 Data exploration
15.4.1 Final map
list.of.packages <- c("kableExtra", "tidyr", "leaflet", "dplyr", "viridis", "corrplot", "lubridate", "plotly")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
# Final locations plot
locs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations_and_covariates.csv"))
# If you want to color by a category do it here:
category <- "feature_type"
# First lets choose a category to color
locs[,category] <- factor(locs[,category])
col.cat <- turbo(length(levels(locs[,category])))
# Add it to the dataframe
locs$colours <- col.cat[locs[,category]]
m <- leaflet() %>%
# Add a satellite image layer
addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%
addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%
addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
# Color the markers depending on the 'feature type'
color=locs$colours,
# Add a popup of the deployment code
popup=paste(locs$placename, locs[,category])) %>%
# Add a legend explaining what is going on
addLegend("bottomleft", colors = col.cat, labels = levels(locs[,category]),
title = category,
labFormat = labelFormat(prefix = "$"),
opacity = 1
) %>%
# add a layer control box to toggle between the layers
addLayersControl(
baseGroups = c("Satellite", "Base"),
options = layersControlOptions(collapsed = FALSE)
)
m
15.4.2 Detection summary
sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
total_obs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_total_observations.csv"), header=T)
# Convert to long
long_obs <- total_obs %>%
pivot_longer(cols=sp_summary$sp, # The columns we want to create into rows - species
names_to="sp", # What we what the number column to be called
values_to = "count") # Takes the values in the species columns and calls them `count`
# We can them summaries those using dplyr
tmp <- long_obs %>% # Take the long observation data frame `long_obs`
group_by(sp) %>% # Group by species
summarise(count=sum(count)) # Sum all the independent observations
# Add it to the sp_summary dataframe
sp_summary <- left_join(sp_summary, tmp)
## Occupancy
# We use the mutate function to mutate the column
total_binary <- total_obs %>% # The total obs dataframe
mutate(across(sp_summary$sp, ~+as.logical(.x))) # across all of the species columns, make it binary
# Flip the dataframe to longer - as before
long_bin <- total_binary %>%
pivot_longer(cols=sp_summary$sp, names_to="sp", values_to = "count") # Takes the species names columns, and makes them unique rows with "sp" as the key
# We can now sum the presence/absences and divide by the number of survey locations
tmp <- long_bin %>%
group_by(sp) %>%
summarise(occupancy=sum(count)/nrow(locs)) # divided the sum by the number of sites
# add the results to the sp_summary
sp_summary <- left_join(sp_summary, tmp)
###########################
# Comparison plot
# Lets put the dataframes in a sensible order
sp_summary <- sp_summary[order(sp_summary$count),]
yform <- list(categoryorder = "array",
categoryarray = sp_summary$sp)
xform <- list(title="Captures")
# Capture rate
fig1 <- plot_ly(x = sp_summary$count, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>%
layout(yaxis = yform, xaxis=xform)
yform <- list(categoryorder = "array",
categoryarray = sp_summary$sp,
showticklabels=F)
xform <- list(title="Occupancy")
# Occupancy
fig2 <- plot_ly(x = sp_summary$occupancy, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>%
layout(yaxis = yform, xaxis=xform)
subplot(nrows=1,fig1, fig2, titleX = T) # We could stack them on top of one another using nrows=2
QUESTION What does this tell you about the behavior of the target species?
15.4.3 Temporal patterns
par(mar=c(5,4,1,1))
# Temporal patterns
mon_obs <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_monthly_observations.csv"), header=T)
# Count up the number of stations and the number of camera nights
mon_summary <- mon_obs %>% # Use the monthly observations dataframe
group_by(date) %>% # Group by the date
summarise(locs_active=n(), # Count the number of active cameras
cam_days=sum(days)) # And sum the active days
# Add in the species specific counts - and join it with the mon_summary dataframe
mon_summary <- mon_obs %>%
group_by(date) %>%
summarise(across(sp_summary$sp, sum, na.rm=TRUE)) %>% # summarise across all of
# the species columns
left_join(x=mon_summary) # Join with the mon_summary dataframe
# We first need to convert the date column to a date object
mon_summary$date <- ym(mon_summary$date)
# Set up a two panel plot (side by side)
par(mfrow=c(1,2))
plot(mon_summary$date, mon_summary$locs_active,
type="o",
pch=19,
ylim=c(0, max(mon_summary$locs_active)),
las=1,
ylab="Number of cameras active", xlab="Date")
# Sum all the captures rates for the species columns
mon_summary$all.sp <- rowSums(mon_summary[, sp_summary$sp])
# Plot them
plot(mon_summary$date, mon_summary$all.sp/(mon_summary$cam_days/100),
type="o",
pch=19,
las=1, ylab="Detections per 100 cam days", xlab="Date")
Question What is going on in these plots?
# Species specific plots
par(mfrow=c(2,2))
i <- 1
for(i in 1:length(sp_summary$sp))
{
plot(mon_summary$date, pull(mon_summary, sp_summary$sp[i])/(mon_summary$cam_days/100), # The pull command allows you to grab a specific column in a dataframe and turn it into a vector!
type="o",
pch=19,
las=1, ylab="Detections per 100 cam days", xlab="Date",
main=sp_summary$sp[i])
}
Question Any interesting patterns?
15.4.4 Spatial patterns
# Spatial plots
total_obs <- left_join(total_obs, locs)
# Jaguar
focal_species <- "Panthera.onca"
focal_cr <- pull(total_obs, focal_species)/(total_obs$days/100)
m <- leaflet() %>%
addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%
addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
# Add a popup of the deployment code
popup=paste(locs$placename),
radius=(focal_cr/max(focal_cr)*10)+1, stroke=F,
fillOpacity=0.6)
m
Try other species!
Question Any interesting patterns?
15.4.5 Species co-occurance
# Co-occurances
par(mfrow=c(1,1))
# Pull the data for each of the species from
tmp <- total_obs[, sp_summary$sp]
M <- cor(tmp)
corrplot(M, method="color",
type="upper",
order="hclust",
# addCoef.col = "black", # We suppress the coefs to make a cleaner plot
tl.col="black", tl.srt=45, #Text label color and rotation
diag=FALSE
)
Question Any interesting patterns?
You can now do some data exploration plots.
# Prepare the data
locs <- locs %>%
mutate_if(is.character,as.factor) # If a column is a character string, make it a factor
total_obs <- left_join(total_obs, locs)
We have the following species:
And the following categories to explore: