Github repo

If you want to try out any of this code, you should clone the repo. You can knit the R markdown script (india_snake_callouts.Rmd) in R studio to generate the html

git clone https://github.com/popomics/Kuttalam_et_al_2024_snake_human_conflict.git

Load the Data

Masterset <- read.csv("./Cleaned Data_October 2022.csv",
                      header = TRUE, stringsAsFactors = FALSE)

Transforming Variables

Masterset <- mutate(Masterset, Date= as.Date(Date, format= "%d/%m/%Y"))
Masterset$Time <- chron(times = Masterset$Time)
Masterset$Species_Name <- as.factor(Masterset$Species_Name)
Masterset$Site_Type <- as.factor(Masterset$Site_Type)
Masterset$Age <- as.factor(Masterset$Age)

# Simplify the species_name
Masterset$Species_Name <- gsub(".*\\((.*?)\\).*", "\\1", Masterset$Species_Name)

Comparing Years

Here, we test whether there are significant decreases in the number of rescues year-on-year. We also test the changes in proportion of venomous and non-venomous rescues year-on-year.

First, load the Date and Species_Name from the Masterset. Label the rescues as ‘venomous’ or ‘non-venomous’.

month_encounts = data.frame(Masterset$Date, Masterset$Species_Name)
names(month_encounts) <- c('dates', 'species')
# label species type
month_encounts <- mutate(month_encounts, species_type = case_when(
  species == "Bungarus fasciatus" ~ "venomous",
  species == "Bungarus caeruleus" ~ "venomous",
  species == "Naja kaouthia" ~ "venomous",
  species == "Naja naja" ~ "venomous",
  species == "Daboia russelii" ~ "venomous",
  species == " Argyrogena fasciolatus" ~ "non-venomous",
  species == "Indotyphlops braminus" ~ "non-venomous",
  species == "Fowlea piscator" ~ "non-venomous",
  species == "Dendrelaphis tristis" ~ "non-venomous",
  species == "Boiga trigonata" ~ "non-venomous",
  species == "Oligodon arnensis" ~ "non-venomous",
  species == "Ptyas mucosa" ~ "non-venomous",
  species == "Eryx conicus" ~ "non-venomous",
  species == "Ahaetulla nasuta" ~ "non-venomous",
  species == "Lycodon aulicus" ~ "non-venomous",
  species == "Typhlops diardii" ~ "non-venomous",
  species == "Denrelaphis proarchos" ~ "non-venomous",
  species == "Eryx Johnii" ~ "non-venomous",
  species == "Enhydris enhydris" ~ "non-venomous",
  species == "Amphiesma stolatum" ~ "non-venomous",
  species == "Lycodon jara" ~ "non-venomous"))

Create a month and year column.

month_encounts$month =  format(month_encounts$dates, format = "%B")
month_encounts$year = format(month_encounts$dates, format = "%Y")

Get the summary of the rescue counts.

summary_month_encounts <- month_encounts %>% 
  count(species_type, month, year, sort = TRUE)

# There are rows with missing combinations (e.g. non-venomous-- August-- 2022 = n = 0)
summary_month_encounts <- summary_month_encounts %>% 
  add_row(species_type = "non-venomous", month = "August", year = "2022",
          n = 0)
summary_month_encounts <- summary_month_encounts %>% 
  add_row(species_type = "non-venomous", month = "January", year = "2021", 
          n = 0)
summary_month_encounts <- summary_month_encounts %>% 
  add_row(species_type = "non-venomous", month = "July", year = "2022", 
          n = 0)

## Add total number of rescue in each month in each year (venom + non-venom)
summary_month_encounts <- summary_month_encounts %>%
  group_by(month, year) %>%
  summarise(n = sum(n)) %>%
  mutate(species_type = "All Snakes") %>%
  bind_rows(summary_month_encounts) %>%
  arrange(month, year)

Compare 2020 vs 2021 (July to December)

summary_month_encounts %>% 
  group_by(species_type, 
           year = case_when(year == "2020" ~ "2020", year == "2021" ~ "2021"), 
           month = case_when(month == "July" ~ "July", 
                             month == "August" ~ "August", 
                             month == "September" ~ "September", 
                             month == "October" ~ "October", 
                             month == "November" ~ "November",
                             month == "December" ~ "December")) %>%
  summarise(n = sum(n)) %>% na.omit() %>% group_by(species_type, year) %>% 
  summarise(n=sum(n))

## # A tibble: 6 × 3
## # Groups:   species_type [3]
##   species_type year      n
##   <chr>        <chr> <dbl>
## 1 All Snakes   2020    201
## 2 All Snakes   2021    142
## 3 non-venomous 2020     61
## 4 non-venomous 2021     38
## 5 venomous     2020    140
## 6 venomous     2021    104

# Chi-Squared test for venom vs non-venom
chisq.test(matrix(c(61,38,140,104), ncol = 2, byrow = TRUE), correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  matrix(c(61, 38, 140, 104), ncol = 2, byrow = TRUE)
## X-squared = 0.52166, df = 1, p-value = 0.4701

# Chi-Squared test for given probabilities for all snakes
obs <- c(201,142)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 10.149, df = 1, p-value = 0.001444

Compare 2021 vs 2022 (January to October)

summary_month_encounts %>% 
  group_by(species_type, 
           year = case_when(year == "2021" ~ "2021", year == "2022" ~ "2022"), 
           month = case_when(month == "January" ~ "January", 
                             month == "February" ~ "February", 
                             month == "March" ~ "March", 
                             month == "April" ~ "April", 
                             month == "May" ~ "May", 
                             month == "June" ~ "June", 
                             month == "July" ~ "July", 
                             month == "August" ~ "August", 
                             month == "September" ~ "September", 
                             month == "October" ~ "October")) %>%
  summarise(n = sum(n)) %>% na.omit() %>% group_by(species_type, year) %>% 
  summarise(n=sum(n))

## # A tibble: 6 × 3
## # Groups:   species_type [3]
##   species_type year      n
##   <chr>        <chr> <dbl>
## 1 All Snakes   2021    166
## 2 All Snakes   2022    114
## 3 non-venomous 2021     52
## 4 non-venomous 2022     30
## 5 venomous     2021    114
## 6 venomous     2022     84

# Chi-Squared test for venom vs non-venom
chisq.test(matrix(c(52,114,30,84), ncol = 2, byrow = TRUE), correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  matrix(c(52, 114, 30, 84), ncol = 2, byrow = TRUE)
## X-squared = 0.819, df = 1, p-value = 0.3655

# Chi-Squared test for given probabilities for all snakes
obs <- c(166,114)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 9.6571, df = 1, p-value = 0.001886

Plotting the average number of rescues each month:

Figure 3 in Paper

fig3 <- summary_month_encounts %>% group_by(month, species_type) %>% 
  summarise(average  = mean(n, na.rm = TRUE), sd = sd(n, na.rm = TRUE))
fig3 <- fig3 %>% mutate(across(c('average'), round, 2)) %>%
  mutate(across(c('sd'), round, 2))

# create plot
ggplot(fig3, aes(x = factor(month, levels = c('January', 'February', 'March', 
                                          'April', 'May', 'June', 'July', 'August', 
                                          'September', 'October', 'November', 
                                          'December')), 
                            y = average, fill = species_type, width =.75)) + 
  geom_col(position = position_dodge(0.8), width = 0.5) + 
  scale_y_continuous(name="Average Number of Encounters", 
                     limits=c(0, 30), breaks = seq(0, 30, 2)) + 
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold", angle = 90), 
        legend.position = c(.10, .85), 
        legend.box.background = element_rect(color="grey", size=0.75), 
        legend.key.size = unit(2, 'mm')
        ) +
  scale_fill_manual(values = c("#6A6F70", "#33CCCC", "#EA5858"), 
                    labels= c('All Snakes','Non-Venomous', 'Venomous'), name = "Species Type") +
  geom_text(aes(label = average), colour = "black", size = 2.5, vjust = -0.4, 
            position = position_dodge(0.8), fontface = "bold") +
  xlab('Month')

Site Use Comparison

Individual Sites

We first compare the association between the species type (venomous and non-venomous) and the each of the individual sites.

VenomvsNonVenom = data.frame(Masterset$Species_Name, Masterset$Site_Type)
#Change column names
names(VenomvsNonVenom) <- c('Species', 'Site')
# Check for rows where there may be blank cells
which(VenomvsNonVenom$Site == "")

## [1] 133 205 215

which(VenomvsNonVenom$Species == "")

## integer(0)

# Remove the rows with blank cells
VenomvsNonVenom <- VenomvsNonVenom[!VenomvsNonVenom$Site=="",]

Add column for snake type ( 1 = venomous, 0 = non-venomous)

VenomvsNonVenom <- mutate(VenomvsNonVenom, SnakeType = case_when(
  Species == "Bungarus fasciatus" ~ "1",
  Species == "Bungarus caeruleus" ~ "1",
  Species == "Naja kaouthia" ~ "1",
  Species == "Naja naja" ~ "1",
  Species == "Daboia russelii" ~ "1",
  Species == " Argyrogena fasciolatus" ~ "0",
  Species == "Indotyphlops braminus" ~ "0",
  Species == "Fowlea piscator" ~ "0",
  Species == "Dendrelaphis tristis" ~ "0",
  Species == "Boiga trigonata" ~ "0",
  Species == "Oligodon arnensis" ~ "0",
  Species == "Ptyas mucosa" ~ "0",
  Species == "Eryx conicus" ~ "0",
  Species == "Ahaetulla nasuta" ~ "0",
  Species == "Lycodon aulicus" ~ "0",
  Species == "Typhlops diardii" ~ "0",
  Species == "Denrelaphis proarchos" ~ "0",
  Species == "Eryx Johnii" ~ "0",
  Species == "Enhydris enhydris" ~ "0",
  Species == "Amphiesma stolatum" ~ "0",
  Species == "Lycodon jara" ~ "0"))

Check whether there are any sites with low rescue counts

VenomvsNonVenom %>% count(Site, SnakeType, sort = TRUE)

##                      Site SnakeType   n
## 1  Backyard/ Front Garden         1 133
## 2              Store Room         1  64
## 3  Backyard/ Front Garden         0  41
## 4                 Bedroom         1  36
## 5              Store Room         0  36
## 6                 Kitchen         1  35
## 7                 Bedroom         0  30
## 8                Bathroom         1  21
## 9             Fishing Net         1  13
## 10              Courtyard         1  12
## 11                Kitchen         0  11
## 12                   Shop         1  10
## 13            Fishing Net         0   9
## 14          Fishing Traps         1   9
## 15               Cow Shed         1   8
## 16             Rice Field         1   7
## 17               Bathroom         0   6
## 18                   Shop         0   5
## 19              Courtyard         0   4
## 20              Bird Cage         1   3
## 21                  Canal         1   3
## 22               Cow Shed         0   3
## 23                   Pond         1   3
## 24                   Well         1   3
## 25              Bird Cage         0   2
## 26      Chicken/ Duck Pen         1   2
## 27           Fodder Field         0   2
## 28             Rice Field         0   2
## 29         Bamboo Orchard         1   1
## 30      Chicken/ Duck Pen         0   1
## 31        Groundnut Field         1   1
## 32           Poultry Farm         0   1

Combine the following sites which have low counts and are very similar niches:

Well + Pond + Fishing Net + Fishing Traps + Canal = Water Body
Rice Field + Groundnut Field + Fodder Field + Bamboo Orchard = Agricultural Field
Bird Cage + Chicken/ Duck Pen + Poultry Farm = Bird Enclosure

VenomvsNonVenom <- VenomvsNonVenom %>%
  mutate(Site = case_when(
    Site %in% c('Well', 'Pond', 'Fishing Net', 'Fishing Traps', 'Canal') ~ 'Water Body',
    Site %in% c('Rice Field', 'Groundnut Field', 'Fodder Field', 'Bamboo Orchard') ~ 'Agricultural Field',
    Site %in% c('Bird Cage', 'Chicken/ Duck Pen', 'Poultry Farm') ~ 'Bird Enclosure',
    TRUE ~ Site
  ))

Run Chi-Squared Tests at each Site

site_counts <- VenomvsNonVenom %>% count(Site, SnakeType, sort = FALSE)

#Agricultural Field: 
obs <- c(4,9)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 1.9231, df = 1, p-value = 0.1655

#Backyard: 
obs <- c(41,133)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 48.644, df = 1, p-value = 3.07e-12

#Bathroom
obs <- c(6,21)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 8.3333, df = 1, p-value = 0.003892

#Bedroom
obs <- c(30,36)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 0.54545, df = 1, p-value = 0.4602

#Bird Enclosure
obs <- c(4,5)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 0.11111, df = 1, p-value = 0.7389

#Courtyard
obs <- c(4,12)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 4, df = 1, p-value = 0.0455

#Cow shed
obs <- c(3,8)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 2.2727, df = 1, p-value = 0.1317

#Kitchen
obs <- c(11,35)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 12.522, df = 1, p-value = 0.0004022

#Shop
obs <- c(5,10)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 1.6667, df = 1, p-value = 0.1967

#Store Room
obs <- c(36,64)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 7.84, df = 1, p-value = 0.00511

#Water Body
obs <- c(9,31)
exp <- c(0.5,0.5)
chisq.test(obs, p=exp)

## 
##  Chi-squared test for given probabilities
## 
## data:  obs
## X-squared = 12.1, df = 1, p-value = 0.0005042

Visualizing the data:

Figure 2 in Paper

# group the counts and percetange them for fig2
fig2 <- VenomvsNonVenom %>% count(Site, SnakeType, sort = TRUE)
fig2 <- fig2 %>% group_by(SnakeType) %>% mutate(perc = ( n / sum(n)) * 100)
fig2 <- fig2 %>% mutate(across(c('perc'), round, 2))
# change the order sites so it looks as Inside and Outside Grouped
fig2$Site <- factor(fig2$Site, levels=c('Agricultural Field','Backyard/ Front Garden', 'Water Body', 
                                        'Bathroom', 'Bedroom', 'Kitchen', 'Courtyard', 'Shop', 
                                        'Store Room', 'Bird Enclosure', 'Cow Shed'))
# change 0 ,1  to non-venomous and venomous
fig2$SnakeType <- as.factor(fig2$SnakeType)
levels(fig2$SnakeType) <- list(fig2$SnakeType, "Non-Venomous" = "0", 
                                "Venomous" = "1")
# Plot fig2
ggplot(fig2, aes(x = SnakeType,y = perc, fill = Site, colour = SnakeType,
                 order = Site)) + 
  geom_col(size=1) +
  scale_color_manual(values = c("blue", "red")) +
  scale_x_discrete(limits = c(" ", "Non-Venomous","Venomous")) +
  scale_fill_brewer(palette = "Set3") +
  coord_polar("y") +
  theme_void()

Figure 4 in Paper

fig4 <- VenomvsNonVenom %>% count(Site, SnakeType, sort = TRUE)

#Create the plot
ggplot(fig4, aes(x = Site, y = n, fill = SnakeType)) + 
  geom_col(position = "dodge") + 
  scale_y_continuous(name="Number of Encounters", 
                     limits=c(0, 140), breaks = seq(0, 140, 10)) + 
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold", angle = 90, 
                                   colour = c("black", "red", "red", "black", 
                                              "black", "red", "black", "red", 
                                              "black", "red", "red")), 
        legend.position = c(.90, .80), 
        legend.box.background = element_rect(color="grey", size=0.75)) +
  scale_fill_manual(labels= c('Non-Venomous', 'Venomous'), 
                    values = c("#33CCCC", "#EA5858"), name = "Species Type") +
  geom_text(aes(label = n), colour = "black", size = 3, vjust = -0.3, 
            position = position_dodge(0.9))

Outside vs Inside Sites

Here, we group the sites as outside or inside and then run a chi-squared test.

OutvsIn = data.frame(Masterset$Species_Name, Masterset$Site_Type)
names(OutvsIn) <- c('Species', 'Site')

# Eliminating Blank Data
OutvsIn <- OutvsIn[!OutvsIn$Site=="",]


# Labeling Venom or Non-Venom
OutvsIn <- mutate(OutvsIn, SnakeType = case_when(
  Species == "Bungarus fasciatus" ~ "1",
  Species == "Bungarus caeruleus" ~ "1",
  Species == "Naja kaouthia" ~ "1",
  Species == "Naja naja" ~ "1",
  Species == "Daboia russelii" ~ "1",
  Species == " Argyrogena fasciolatus" ~ "0",
  Species == "Indotyphlops braminus" ~ "0",
  Species == "Fowlea piscator" ~ "0",
  Species == "Dendrelaphis tristis" ~ "0",
  Species == "Boiga trigonata" ~ "0",
  Species == "Oligodon arnensis" ~ "0",
  Species == "Ptyas mucosa" ~ "0",
  Species == "Eryx conicus" ~ "0",
  Species == "Ahaetulla nasuta" ~ "0",
  Species == "Lycodon aulicus" ~ "0",
  Species == "Typhlops diardii" ~ "0",
  Species == "Denrelaphis proarchos" ~ "0",
  Species == "Eryx Johnii" ~ "0",
  Species == "Enhydris enhydris" ~ "0",
  Species == "Amphiesma stolatum" ~ "0",
  Species == "Lycodon jara" ~ "0"))

# Inside or Outside
OutvsIn <- mutate(OutvsIn, Location = case_when(
  Site == "Backyard/ Front Garden" ~ "Outside",
  Site == "Bamboo Orchard" ~ "Outside",
  Site == "Bathroom" ~ "Inside",
  Site == "Bedroom" ~ "Inside",
  Site == "Bird Cage" ~ "Inside",
  Site == "Canal" ~ "Outside",
  Site == "Chicken/ Duck Pen" ~ "Inside",
  Site == "Courtyard" ~ "Inside",
  Site == "Cow Shed" ~ "Outside",
  Site == "Fishing Net" ~ "Outside",
  Site == "Fishing Traps" ~ "Outside",
  Site == "Fodder Field" ~ "Outside",
  Site == "Groundnut Field" ~ "Outside",
  Site == "Kitchen" ~ "Inside",
  Site == "Pond" ~ "Outside",
  Site == "Poultry Farm" ~ "Inside",
  Site == "Rice Field" ~ "Outside",
  Site == "Shop" ~ "Inside",
  Site == "Store Room" ~ "Inside",
  Site == "Well" ~ "Outside"))

Run the Chi-Squared Test

# Create the table on which the chi-squared will be run
table(OutvsIn$SnakeType, OutvsIn$Location)

##    
##     Inside Outside
##   0     96      57
##   1    183     181

# chi squared test
chisq.test(table(OutvsIn$SnakeType, OutvsIn$Location), correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  table(OutvsIn$SnakeType, OutvsIn$Location)
## X-squared = 6.7431, df = 1, p-value = 0.009411

Visualizing this:

plot1 <- OutvsIn %>% count(Location, SnakeType, sort = TRUE)
# create percentages for each category


plot1 <- plot1 %>% group_by(SnakeType) %>% 
  mutate(percent = round(n /sum(n)*100, 2))
plot1 <- plot1 %>% group_by(SnakeType) %>% 
  mutate(percent_labels = scales::percent(n / sum(n), accuracy = .1, trim = FALSE))

# create the plot
ggplot(plot1, aes(fill=Location, x= SnakeType, y=percent, label = percent_labels)) + 
  geom_bar(position = "stack", stat = "identity")  +
  scale_y_continuous(name="Percentage of Encounters") + 
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold", angle = 0, colour = 
                                     c("#33CCCC", "#EA5858"))) +
  scale_fill_manual(labels= c('Inside', 'Outside'), 
                    values = c("#FFC000", "#70AD47"), name = "Location") +
  scale_x_discrete(name = "Species Type", labels = c("Non-Venomous", "Venomous")) +
  geom_text(size = 5, position = position_stack(vjust = 0.5), fontface = "bold")

Loading Climate Data

Load the data from the csv file named “IMD_data.csv”.

climate_data <- read.csv("./IMD_data.csv", header = TRUE, 
                         stringsAsFactors = FALSE)
# change the date format in the climate_data
climate_data <- mutate(climate_data, Date = 
                         as.Date(Date, format = "%d/%m/%Y"))
climate_data <- mutate(climate_data, Date = 
                         as.Date(Date, format = "%Y-%m-%d"))
#change the name to match the encounter column name
names(climate_data) <- c('dates', 'max_temp', 'min_temp', 'rain')
#add missing dates 
climate_data <- pad(climate_data)

Create single dataframe for each variable.

# maxtemp
maxtemp = data.frame(climate_data$dates, climate_data$max_temp)
names(maxtemp) <- c('dates', 'maxtemp')
# min temp
mintemp = data.frame(climate_data$dates, climate_data$min_temp)
names(mintemp) <- c('dates', 'mintemp')
# rain
rain = data.frame(climate_data$dates, climate_data$rain)
names(rain) <- c('dates', 'rain')

Effect Of Rain

Effect on Location and Species Type

rain_sites = data.frame(Masterset$Date, Masterset$Species_Name, Masterset$Site_Type)
names(rain_sites) <- c('dates', 'species', 'site')

rain_sites <- mutate(rain_sites, species_type = case_when(
  species == "Bungarus fasciatus" ~ "venomous",
  species == "Bungarus caeruleus" ~ "venomous",
  species == "Naja kaouthia" ~ "venomous",
  species == "Naja naja" ~ "venomous",
  species == "Daboia russelii" ~ "venomous",
  species == " Argyrogena fasciolatus" ~ "non-venomous",
  species == "Indotyphlops braminus" ~ "non-venomous",
  species == "Fowlea piscator" ~ "non-venomous",
  species == "Dendrelaphis tristis" ~ "non-venomous",
  species == "Boiga trigonata" ~ "non-venomous",
  species == "Oligodon arnensis" ~ "non-venomous",
  species == "Ptyas mucosa" ~ "non-venomous",
  species == "Eryx conicus" ~ "non-venomous",
  species == "Ahaetulla nasuta" ~ "non-venomous",
  species == "Lycodon aulicus" ~ "non-venomous",
  species == "Typhlops diardii" ~ "non-venomous",
  species == "Denrelaphis proarchos" ~ "non-venomous",
  species == "Eryx Johnii" ~ "non-venomous",
  species == "Enhydris enhydris" ~ "non-venomous",
  species == "Amphiesma stolatum" ~ "non-venomous",
  species == "Lycodon jara" ~ "non-venomous"))

rain_sites <- mutate(rain_sites, site_type = case_when(
  site == "Backyard/ Front Garden" ~ "Outside",
  site == "Bamboo Orchard" ~ "Outside",
  site == "Bathroom" ~ "Inside",
  site == "Bedroom" ~ "Inside",
  site == "Bird Cage" ~ "Inside",
  site == "Canal" ~ "Outside",
  site == "Chicken/ Duck Pen" ~ "Inside",
  site == "Courtyard" ~ "Inside",
  site == "Cow Shed" ~ "Outside",
  site == "Fishing Net" ~ "Outside",
  site == "Fishing Traps" ~ "Outside",
  site == "Fodder Field" ~ "Outside",
  site == "Groundnut Field" ~ "Outside",
  site == "Kitchen" ~ "Inside",
  site == "Pond" ~ "Outside",
  site == "Poultry Farm" ~ "Inside",
  site == "Rice Field" ~ "Outside",
  site == "Shop" ~ "Inside",
  site == "Store Room" ~ "Inside",
  site == "Well" ~ "Outside"))

Add the rain data.

rain_sites <- merge(rain_sites, rain, by = "dates")
#remove rows with NA's 
rain_sites <- na.omit(rain_sites)

rain_sites <- rain_sites[, !names(rain_sites) %in% c("species", "site")]

# label dry days and rainy days
rain_sites <- rain_sites %>% mutate(day_type = case_when(rain == 0 ~ "Dry", 
                                             rain > 0 ~ "Rain"))
rain_sites$day_type <- as.factor(rain_sites$day_type)


rain_sites %>% count(species_type, site_type, day_type)

##   species_type site_type day_type   n
## 1 non-venomous    Inside      Dry  70
## 2 non-venomous    Inside     Rain  19
## 3 non-venomous   Outside      Dry  41
## 4 non-venomous   Outside     Rain  14
## 5     venomous    Inside      Dry 130
## 6     venomous    Inside     Rain  43
## 7     venomous   Outside      Dry 123
## 8     venomous   Outside     Rain  44

Run the Chi-Squared Tests.

# venomous
chisq.test(as.table(rbind(c(130,123), c(43,44))), correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  as.table(rbind(c(130, 123), c(43, 44)))
## X-squared = 0.099319, df = 1, p-value = 0.7526

#non-venomous 
chisq.test(as.table(rbind(c(70,41), c(19,14))), correct = FALSE)

## 
##  Pearson's Chi-squared test
## 
## data:  as.table(rbind(c(70, 41), c(19, 14)))
## X-squared = 0.32446, df = 1, p-value = 0.5689

Visualizing this in a plot:

plot2 <- rain_sites %>% count(species_type, site_type, day_type, sort = TRUE)

# create percentages for each category
plot2 <- plot2 %>% group_by(day_type, species_type) %>%
  mutate(percent = round(n /sum(n)*100,2))
plot2 <- plot2 %>% group_by(day_type, species_type) %>%
  mutate(percent_labels = scales::percent(n / sum(n), accuracy = .1, trim = FALSE))

# create plot1 plot
ggplot(plot2, aes(fill = site_type, x = species_type, y=percent, label=percent_labels)) + 
  geom_bar(position = "stack", stat = "identity") + 
  scale_y_continuous(name="Percentage of Encounters") +
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold", angle = 0, 
                                   colour = c("#33CCCC", "#EA5858")), 
        axis.text = element_text(size = 10)) +
  geom_text(size = 5, fontface = "bold", position = position_stack(vjust = 0.5)) +
  scale_fill_manual(labels= c('Inside', 'Outside'), 
                    values = c("#FFC000", "#70AD47"), name = "Location") +
  scale_x_discrete(name = "Species Type", labels = c("Non-Venomous", "Venomous")) +
  facet_grid(. ~day_type)

Effect of Day Type Combinations

daytype = data.frame(Masterset$Date, Masterset$Species_Name)
names(daytype) <- c('dates' , 'species')
#add dates with no rescues
daytype <- pad(daytype)
# change species to character to allow NA replacement
daytype$species <- as.character(daytype$species)
# change NA to 0 first
daytype <- daytype %>% mutate(species = ifelse(is.na(species), 0, species))
# change back to factor
daytype$species <- as.factor(daytype$species)
#label days with enocunters as "yes or no"
daytype <- mutate(daytype, snake_encounter = ifelse(species == "0", "no", "yes"))
# insert rain and previous rain data to each day
daytype <- merge(daytype, rain, by = "dates")
daytype$rain_prev[2:nrow(daytype)] <- daytype$rain[1:nrow(daytype) - 1]

daytype <- daytype %>% mutate(prevday_type = case_when(rain_prev == 0 ~ "Dry", 
                                                                   rain_prev > 0 ~ "Rain"))
daytype$prevday_type <- as.factor(daytype$prevday_type)

daytype <- daytype %>% mutate(sameday_type = case_when(rain == 0 ~ "Dry", 
                                                                   rain > 0 ~ "Rain"))
daytype$sameday_type <- as.factor(daytype$sameday_type)

#create prev:same day column 
daytype$day_diff = daytype$prevday_type:daytype$sameday_type
daytype$day_diff <- as.factor(daytype$day_diff)
# remove multiple day rows
daytype <- daytype[!duplicated(daytype[c('dates')]),]

table( daytype$day_diff, daytype$snake_encounter)

##            
##              no yes
##   Dry:Dry   293 183
##   Dry:Rain   56  23
##   Rain:Dry   44  36
##   Rain:Rain  57  48

Run the chi-squared tests for each of combination of day_diff.

# Dry:Dry vs Dry:Rain
prev_day_compare <- matrix(c(293,183,56,23), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 2.5277, df = 1, p-value = 0.1119

# Dry:Dry vs Rain:Dry
prev_day_compare <- matrix(c(293,183,44,36), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 1.2325, df = 1, p-value = 0.2669

# Dry:Dry vs Rain:Rain
prev_day_compare <- matrix(c(293, 183,57,48), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 1.8977, df = 1, p-value = 0.1683

# Dry:Rain vs Rain:Dry
prev_day_compare <- matrix(c(56,23,44,36), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 4.2983, df = 1, p-value = 0.03815

# Dry:Rain vs Rain:Rain
prev_day_compare <- matrix(c(56,23,57,48), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 5.2424, df = 1, p-value = 0.02204

#Rain:Dry vs Rain:Rain
prev_day_compare <- matrix(c(44,36,57,48), ncol=2, byrow = TRUE)
chisq.test(prev_day_compare, correct = F)

## 
##  Pearson's Chi-squared test
## 
## data:  prev_day_compare
## X-squared = 0.0093453, df = 1, p-value = 0.923

Visualizing this in a plot:

Figure 5 in Paper

fig5 <- daytype %>% count(snake_encounter, day_diff, sort = TRUE)
# remove NA's 
fig5 <- na.omit(fig5)

# create percentages for each category
fig5 <- fig5 %>% group_by(day_diff) %>%
  mutate(percent = round(n / sum(n)*100, 2))
fig5 <- fig5 %>% group_by(day_diff) %>% 
  mutate(percent_labels = scales::percent(n / sum(n), accuracy = .1, trim = FALSE))

# data table
table_day <-  data.frame(
  day_diff = c("Dry:Dry", "Dry:Rain", "Rain:Dry", "Rain:Rain"),
  No = c(293, 56, 44, 57),
  Yes = c(183, 23, 36, 48))
# transpose the table
table_day <- t(table_day[, -1])
colnames(table_day) <- c("Dry:Dry", "Dry:Rain", "Rain:Dry", "Rain:Rain")
# Create a stacked chart using ggplot
plot <- ggplot(fig5, aes(fill=snake_encounter, x= day_diff, y=percent, label = percent_labels)) + 
  geom_bar(position = "stack", stat = "identity") +
  scale_y_continuous(name="Percentage of Encounters") + 
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold", angle = 0), 
        axis.text = element_text(size = 10), 
        legend.position = "none") + 
  scale_fill_manual(labels= c('No', 'Yes'), values = c("#33CCCC", "#EA5858"), 
                    name = "Rescue Encounter") +
  scale_x_discrete(name = "Day Type") +
  geom_text(size = 5, position = position_stack(vjust = 0.5), fontface = "bold") 
# create text table with the data table
table <- ggtexttable(table_day, theme = ttheme("light", 
                                               tbody.style = tbody_style(fill = c("#33CCCC", "#EA5858"))))

# merge the table and plot
ggarrange(plot, table, ncol = 1, heights = c(3,1))

We will also test to see whether there is an association between the daytype combination and the locations of the rescue.

daytype_location = data.frame(Masterset$Date, Masterset$Site_Type)
names(daytype_location) <- c('dates', 'site')
daytype_location <- mutate(daytype_location, site_type = case_when(
  site == "Backyard/ Front Garden" ~ "Outside",
  site == "Bamboo Orchard" ~ "Outside",
  site == "Bathroom" ~ "Inside",
  site == "Bedroom" ~ "Inside",
  site == "Bird Cage" ~ "Inside",
  site == "Canal" ~ "Outside",
  site == "Chicken/ Duck Pen" ~ "Inside",
  site == "Courtyard" ~ "Inside",
  site == "Cow Shed" ~ "Outside",
  site == "Fishing Net" ~ "Outside",
  site == "Fishing Traps" ~ "Outside",
  site == "Fodder Field" ~ "Outside",
  site == "Groundnut Field" ~ "Outside",
  site == "Kitchen" ~ "Inside",
  site == "Pond" ~ "Outside",
  site == "Poultry Farm" ~ "Inside",
  site == "Rice Field" ~ "Outside",
  site == "Shop" ~ "Inside",
  site == "Store Room" ~ "Inside",
  site == "Well" ~ "Outside"))
daytype_location <- pad(daytype_location)

daytype_location <- merge(daytype_location, rain, by = "dates")
daytype_location$rain_prev[2:nrow(daytype_location)] <- daytype_location$rain[1:nrow(daytype_location) - 1]

daytype_location <- daytype_location %>% mutate(prevday_type = case_when(rain_prev == 0 ~ "Dry", 
                                                                   rain_prev > 0 ~ "Rain"))
daytype_location$prevday_type <- as.factor(daytype_location$prevday_type)

daytype_location <- daytype_location %>% mutate(sameday_type = case_when(rain == 0 ~ "Dry", 
                                                                   rain > 0 ~ "Rain"))
daytype_location$sameday_type <- as.factor(daytype_location$sameday_type)

#create prev:same day column 
daytype_location$day_diff = daytype_location$prevday_type:daytype_location$sameday_type
daytype_location$day_diff <- as.factor(daytype_location$day_diff)

daytype_location <- na.omit(daytype_location)

table(daytype_location$site_type, daytype_location$day_diff)

##          
##           Dry:Dry Dry:Rain Rain:Dry Rain:Rain
##   Inside      180       15       18        46
##   Outside     145        7       18        50

# Running the chi-squared test
chisq.test(table(daytype_location$site_type, daytype_location$day_diff))

## 
##  Pearson's Chi-squared test
## 
## data:  table(daytype_location$site_type, daytype_location$day_diff)
## X-squared = 3.6941, df = 3, p-value = 0.2964

Most Rescued Species

Comparing Climate Effects

Rain

rain_select= data.frame(Masterset$Date, Masterset$Species_Name)
names(rain_select) <-  c('dates', 'species')
rain_select <- rain_select[rain_select$species %in% c( "Fowlea piscator" , "Dendrelaphis tristis" ,
                                                             "Ptyas mucosa", "Lycodon aulicus", 
                                                             "Bungarus fasciatus", "Bungarus caeruleus",
                                                             "Naja kaouthia", "Naja naja","Daboia russelii" ), ]
rain_select <- merge(rain_select, rain, by = "dates")
#remove rows with NA's 
rain_select <- na.omit(rain_select)
#remove the date column
rain_select <- rain_select[, !names(rain_select) %in% c("dates")]

Check the assumptions for a one-way ANOVA.

### create the model
model_rain <- aov(rain~species, data = rain_select)
### normality
shapiro.test(residuals(model_rain))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_rain)
## W = 0.57637, p-value < 2.2e-16

### homogeneity of variance
leveneTest(model_rain)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   8  0.6748 0.7139
##       455

### independence
durbinWatsonTest(model_rain)

##  lag Autocorrelation D-W Statistic p-value
##    1       0.5130554     0.9731223       0
##  Alternative hypothesis: rho != 0

Assumptions have been violated.

# Run a Kruskal Wallis Test
kruskal_test(rain~species, data = rain_select)

## # A tibble: 1 × 6
##   .y.       n statistic    df     p method        
## * <chr> <int>     <dbl> <int> <dbl> <chr>         
## 1 rain    464      8.68     8  0.37 Kruskal-Wallis

# rain (A)
rain_plot <- ggplot(rain_select, aes(x=factor(species, level=c('Bungarus fasciatus', 
                                                            'Bungarus caeruleus',
                                                            'Daboia russelii',
                                                            'Naja kaouthia',
                                                            'Naja naja', 
                                                            'Fowlea piscator',
                                                            'Dendrelaphis tristis',
                                                            'Ptyas mucosa',
                                                            'Lycodon aulicus')),
                                     y=rain, fill=species)) + geom_violin() + 
  labs(x = "Species", y= "Rain (mm)") +
  geom_point(position = position_jitter(seed = 1, width = 0.2), size = 1) + 
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "italic", angle = 90), legend.position = "none") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank() ) 
rain_plot

Minimum Temperature

mintemp_select= data.frame(Masterset$Date, Masterset$Species_Name)
names(mintemp_select) <-  c('dates', 'species')
mintemp_select <- mintemp_select[mintemp_select$species %in% c( "Fowlea piscator" , "Dendrelaphis tristis" ,
                                                             "Ptyas mucosa", "Lycodon aulicus", 
                                                             "Bungarus fasciatus", "Bungarus caeruleus",
                                                             "Naja kaouthia", "Naja naja","Daboia russelii" ), ]
mintemp_select <- merge(mintemp_select, mintemp, by = "dates")
#remove rows with NA's 
mintemp_select <- na.omit(mintemp_select)
#remove the date column
mintemp_select <- mintemp_select[, !names(mintemp_select) %in% c("dates")]

Check the assumptions for a one-way ANOVA.

### create the model
model_mintemp <- aov(mintemp~species, data = mintemp_select)
### normality
shapiro.test(residuals(model_mintemp))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_mintemp)
## W = 0.8835, p-value < 2.2e-16

### homogeneity of variance
leveneTest(model_mintemp)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value   Pr(>F)   
## group   8  3.1266 0.001883 **
##       454                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

### independence
durbinWatsonTest(model_mintemp)

##  lag Autocorrelation D-W Statistic p-value
##    1       0.8611095     0.2774887       0
##  Alternative hypothesis: rho != 0

Assumptions have been violated.

# Run a Kruskal Wallis Test
kruskal_test(mintemp~species, data = mintemp_select)

## # A tibble: 1 × 6
##   .y.         n statistic    df        p method        
## * <chr>   <int>     <dbl> <int>    <dbl> <chr>         
## 1 mintemp   463      27.8     8 0.000507 Kruskal-Wallis

# post-hoc test
pwc_mintemp <- dunn_test(mintemp~species, data = mintemp_select)
print(pwc_mintemp, n=36)

## # A tibble: 36 × 9
##    .y.     group1      group2    n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>   <chr>       <chr>  <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 mintemp Bungarus c… Bunga…    34    41  -0.678   4.98e-1 1       ns          
##  2 mintemp Bungarus c… Daboi…    34    72  -4.03    5.68e-5 0.00204 **          
##  3 mintemp Bungarus c… Dendr…    34    17  -2.81    4.93e-3 0.163   ns          
##  4 mintemp Bungarus c… Fowle…    34    26  -2.48    1.31e-2 0.407   ns          
##  5 mintemp Bungarus c… Lycod…    34    33  -1.79    7.29e-2 1       ns          
##  6 mintemp Bungarus c… Naja …    34    15  -0.731   4.65e-1 1       ns          
##  7 mintemp Bungarus c… Naja …    34   178  -1.75    7.98e-2 1       ns          
##  8 mintemp Bungarus c… Ptyas…    34    47  -1.64    1.02e-1 1       ns          
##  9 mintemp Bungarus f… Daboi…    41    72  -3.48    5.06e-4 0.0172  *           
## 10 mintemp Bungarus f… Dendr…    41    17  -2.35    1.88e-2 0.564   ns          
## 11 mintemp Bungarus f… Fowle…    41    26  -1.95    5.12e-2 1       ns          
## 12 mintemp Bungarus f… Lycod…    41    33  -1.20    2.30e-1 1       ns          
## 13 mintemp Bungarus f… Naja …    41    15  -0.230   8.18e-1 1       ns          
## 14 mintemp Bungarus f… Naja …    41   178  -0.985   3.25e-1 1       ns          
## 15 mintemp Bungarus f… Ptyas…    41    47  -0.989   3.23e-1 1       ns          
## 16 mintemp Daboia rus… Dendr…    72    17   0.00953 9.92e-1 1       ns          
## 17 mintemp Daboia rus… Fowle…    72    26   0.837   4.02e-1 1       ns          
## 18 mintemp Daboia rus… Lycod…    72    33   1.90    5.74e-2 1       ns          
## 19 mintemp Daboia rus… Naja …    72    15   2.15    3.13e-2 0.909   ns          
## 20 mintemp Daboia rus… Naja …    72   178   3.65    2.62e-4 0.00917 **          
## 21 mintemp Daboia rus… Ptyas…    72    47   2.50    1.24e-2 0.396   ns          
## 22 mintemp Dendrelaph… Fowle…    17    26   0.606   5.45e-1 1       ns          
## 23 mintemp Dendrelaph… Lycod…    17    33   1.33    1.84e-1 1       ns          
## 24 mintemp Dendrelaph… Naja …    17    15   1.72    8.59e-2 1       ns          
## 25 mintemp Dendrelaph… Naja …    17   178   2.00    4.57e-2 1       ns          
## 26 mintemp Dendrelaph… Ptyas…    17    47   1.65    9.93e-2 1       ns          
## 27 mintemp Fowlea pis… Lycod…    26    33   0.793   4.28e-1 1       ns          
## 28 mintemp Fowlea pis… Naja …    26    15   1.29    1.96e-1 1       ns          
## 29 mintemp Fowlea pis… Naja …    26   178   1.52    1.30e-1 1       ns          
## 30 mintemp Fowlea pis… Ptyas…    26    47   1.14    2.56e-1 1       ns          
## 31 mintemp Lycodon au… Naja …    33    15   0.680   4.97e-1 1       ns          
## 32 mintemp Lycodon au… Naja …    33   178   0.583   5.60e-1 1       ns          
## 33 mintemp Lycodon au… Ptyas…    33    47   0.307   7.59e-1 1       ns          
## 34 mintemp Naja kaout… Naja …    15   178  -0.376   7.07e-1 1       ns          
## 35 mintemp Naja kaout… Ptyas…    15    47  -0.479   6.32e-1 1       ns          
## 36 mintemp Naja naja   Ptyas…   178    47  -0.249   8.04e-1 1       ns

# mintemp (B)
pwc_mintemp <- pwc_mintemp %>% add_xy_position(x = "species")
mintemp_plot <- ggplot(mintemp_select, aes(x= factor(mintemp_select$species, level=c('Bungarus fasciatus', 
                                                                   'Bungarus caeruleus',
                                                                   'Daboia russelii',
                                                                   'Naja kaouthia',
                                                                   'Naja naja', 
                                                                   'Fowlea piscator',
                                                                   'Dendrelaphis tristis',
                                                                   'Ptyas mucosa',
                                                                   'Lycodon aulicus')),
                                           y= mintemp)) + geom_violin(aes(fill= species)) +
  labs(x="Species", y="Temperature") +
  geom_point(position = position_jitter(seed = 1, width = 0.2), size = 1) +
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "italic", angle = 90), legend.position = "none") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank() )  + 
  stat_pvalue_manual(pwc_mintemp, hide.ns = TRUE)
mintemp_plot

Maximum Temperature

maxtemp_select= data.frame(Masterset$Date, Masterset$Species_Name)
names(maxtemp_select) <-  c('dates', 'species')
maxtemp_select <- maxtemp_select[maxtemp_select$species %in% c( "Fowlea piscator" , "Dendrelaphis tristis" ,
                                                             "Ptyas mucosa", "Lycodon aulicus", 
                                                             "Bungarus fasciatus", "Bungarus caeruleus",
                                                             "Naja kaouthia", "Naja naja","Daboia russelii" ), ]
maxtemp_select <- merge(maxtemp_select, maxtemp, by = "dates")
#remove rows with NA's 
maxtemp_select <- na.omit(maxtemp_select)
#remove the date column
maxtemp_select <- maxtemp_select[, !names(maxtemp_select) %in% c("dates")]

Check the assumptions for a one-way ANOVA.

### create the model
model_maxtemp <- aov(maxtemp~species, data = maxtemp_select)
### normality
shapiro.test(residuals(model_maxtemp))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_maxtemp)
## W = 0.9552, p-value = 3.554e-10

### homogeneity of variance
leveneTest(model_maxtemp)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   8  1.5928 0.1248
##       423

### independence
durbinWatsonTest(model_maxtemp)

##  lag Autocorrelation D-W Statistic p-value
##    1       0.7916678     0.4160015       0
##  Alternative hypothesis: rho != 0

Assumptions have been violated.

# Run a Kruskal Wallis Test
kruskal_test(maxtemp~species, data = maxtemp_select)

## # A tibble: 1 × 6
##   .y.         n statistic    df      p method        
## * <chr>   <int>     <dbl> <int>  <dbl> <chr>         
## 1 maxtemp   432      13.5     8 0.0949 Kruskal-Wallis

# maxtemp (C)

maxtemp_plot <- ggplot(maxtemp_select, aes(x= factor(maxtemp_select$species, level=c('Bungarus fasciatus', 
                                                                                  'Bungarus caeruleus',
                                                                                  'Daboia russelii',
                                                                                  'Naja kaouthia',
                                                                                  'Naja naja', 
                                                                                  'Fowlea piscator',
                                                                                  'Dendrelaphis tristis',
                                                                                  'Ptyas mucosa',
                                                                                  'Lycodon aulicus')),
                                           y= maxtemp)) + geom_violin(aes(fill= species)) +
  labs(x="Species", y="Temperature") +
  geom_point(position = position_jitter(seed = 1, width = 0.2), size = 1) +
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "italic", angle = 90), legend.position = "none") +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank() )
maxtemp_plot

Temperature Range

temprange_select= data.frame(Masterset$Date, Masterset$Species_Name)
names(temprange_select) <-  c('dates', 'species')
temprange_select <- temprange_select[temprange_select$species %in% c( "Fowlea piscator" , "Dendrelaphis tristis" ,
                                                             "Ptyas mucosa", "Lycodon aulicus", 
                                                             "Bungarus fasciatus", "Bungarus caeruleus",
                                                             "Naja kaouthia", "Naja naja","Daboia russelii" ), ]

First, create the temperature range dataframe.

# Creating temprange
temprange <- merge(mintemp, maxtemp, by = "dates")
temprange$temprange <- temprange$maxtemp - temprange$mintemp
temprange <- temprange[, !names(temprange) %in% c("mintemp", "maxtemp")]
temprange_select <- merge(temprange_select, temprange, by = "dates")
#remove rows with NA's 
temprange_select <- na.omit(temprange_select)
#remove the date column
temprange_select <- temprange_select[, !names(temprange_select) %in% c("dates")]

Check the assumptions for a one-way ANOVA.

### create the model
model_temprange <- aov(temprange~species, data = temprange_select)
### normality
shapiro.test(residuals(model_temprange))

## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(model_temprange)
## W = 0.99406, p-value = 0.091

### homogeneity of variance
leveneTest(model_temprange)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   8  0.8588 0.5514
##       422

### independence
durbinWatsonTest(model_temprange)

##  lag Autocorrelation D-W Statistic p-value
##    1        0.656815     0.6827829       0
##  Alternative hypothesis: rho != 0

Assumptions have not been violated.

# Run a Kruskal Wallis Test
anova_test(temprange~species, data = temprange_select)

## ANOVA Table (type II tests)
## 
##    Effect DFn DFd     F     p p<.05   ges
## 1 species   8 422 2.702 0.007     * 0.049

pwc_temprange <- temprange_select %>% tukey_hsd(temprange ~ species)
print(pwc_temprange, n=36, width = Inf)

## # A tibble: 36 × 9
##    term    group1               group2               null.value estimate
##  * <chr>   <chr>                <chr>                     <dbl>    <dbl>
##  1 species Bungarus caeruleus   Bungarus fasciatus            0  0.422  
##  2 species Bungarus caeruleus   Daboia russelii               0  2.14   
##  3 species Bungarus caeruleus   Dendrelaphis tristis          0  2.14   
##  4 species Bungarus caeruleus   Fowlea piscator               0  2.33   
##  5 species Bungarus caeruleus   Lycodon aulicus               0  2.13   
##  6 species Bungarus caeruleus   Naja kaouthia                 0  1.25   
##  7 species Bungarus caeruleus   Naja naja                     0  1.01   
##  8 species Bungarus caeruleus   Ptyas mucosa                  0  1.70   
##  9 species Bungarus fasciatus   Daboia russelii               0  1.72   
## 10 species Bungarus fasciatus   Dendrelaphis tristis          0  1.71   
## 11 species Bungarus fasciatus   Fowlea piscator               0  1.90   
## 12 species Bungarus fasciatus   Lycodon aulicus               0  1.71   
## 13 species Bungarus fasciatus   Naja kaouthia                 0  0.825  
## 14 species Bungarus fasciatus   Naja naja                     0  0.587  
## 15 species Bungarus fasciatus   Ptyas mucosa                  0  1.28   
## 16 species Daboia russelii      Dendrelaphis tristis          0 -0.00441
## 17 species Daboia russelii      Fowlea piscator               0  0.184  
## 18 species Daboia russelii      Lycodon aulicus               0 -0.00754
## 19 species Daboia russelii      Naja kaouthia                 0 -0.894  
## 20 species Daboia russelii      Naja naja                     0 -1.13   
## 21 species Daboia russelii      Ptyas mucosa                  0 -0.444  
## 22 species Dendrelaphis tristis Fowlea piscator               0  0.189  
## 23 species Dendrelaphis tristis Lycodon aulicus               0 -0.00312
## 24 species Dendrelaphis tristis Naja kaouthia                 0 -0.889  
## 25 species Dendrelaphis tristis Naja naja                     0 -1.13   
## 26 species Dendrelaphis tristis Ptyas mucosa                  0 -0.439  
## 27 species Fowlea piscator      Lycodon aulicus               0 -0.192  
## 28 species Fowlea piscator      Naja kaouthia                 0 -1.08   
## 29 species Fowlea piscator      Naja naja                     0 -1.32   
## 30 species Fowlea piscator      Ptyas mucosa                  0 -0.628  
## 31 species Lycodon aulicus      Naja kaouthia                 0 -0.886  
## 32 species Lycodon aulicus      Naja naja                     0 -1.12   
## 33 species Lycodon aulicus      Ptyas mucosa                  0 -0.436  
## 34 species Naja kaouthia        Naja naja                     0 -0.239  
## 35 species Naja kaouthia        Ptyas mucosa                  0  0.450  
## 36 species Naja naja            Ptyas mucosa                  0  0.689  
##    conf.low conf.high  p.adj p.adj.signif
##  *    <dbl>     <dbl>  <dbl> <chr>       
##  1  -1.88       2.73  1      ns          
##  2   0.0751     4.21  0.0356 *           
##  3  -0.830      5.10  0.378  ns          
##  4  -0.355      5.01  0.149  ns          
##  5  -0.283      4.55  0.133  ns          
##  6  -1.86       4.35  0.944  ns          
##  7  -0.848      2.87  0.75   ns          
##  8  -0.568      3.96  0.323  ns          
##  9  -0.237      3.68  0.137  ns          
## 10  -1.18       4.61  0.649  ns          
## 11  -0.694      4.50  0.354  ns          
## 12  -0.612      4.03  0.346  ns          
## 13  -2.21       3.86  0.995  ns          
## 14  -1.15       2.32  0.98   ns          
## 15  -0.891      3.44  0.658  ns          
## 16  -2.71       2.70  1      ns          
## 17  -2.20       2.57  1      ns          
## 18  -2.10       2.08  1      ns          
## 19  -3.75       1.96  0.988  ns          
## 20  -2.54       0.271 0.227  ns          
## 21  -2.36       1.47  0.998  ns          
## 22  -3.01       3.39  1      ns          
## 23  -2.99       2.98  1      ns          
## 24  -4.45       2.68  0.997  ns          
## 25  -3.68       1.42  0.905  ns          
## 26  -3.30       2.42  1      ns          
## 27  -2.89       2.51  1      ns          
## 28  -4.41       2.25  0.985  ns          
## 29  -3.53       0.894 0.644  ns          
## 30  -3.19       1.94  0.998  ns          
## 31  -4.01       2.24  0.994  ns          
## 32  -3.01       0.757 0.639  ns          
## 33  -2.72       1.85  1      ns          
## 34  -2.95       2.47  1      ns          
## 35  -2.56       3.46  1      ns          
## 36  -0.995      2.37  0.938  ns

#temprange (D)
pwc_temprange <- pwc_temprange %>% add_xy_position(x = "species")

temprange_plot <- ggplot(temprange_select, aes(x= factor(temprange_select$species, level=c('Bungarus fasciatus', 
                                                                                  'Bungarus caeruleus',
                                                                                  'Daboia russelii',
                                                                                  'Naja kaouthia',
                                                                                  'Naja naja', 
                                                                                  'Fowlea piscator',
                                                                                  'Dendrelaphis tristis',
                                                                                  'Ptyas mucosa',
                                                                                  'Lycodon aulicus')),
                                           y= temprange)) + geom_violin(aes(fill= species)) +
  labs(x="Species", y="Temperature") +
  geom_point(position = position_jitter(seed = 1, width = 0.2), size = 1) +
  theme(panel.border = element_blank(), 
        panel.background = element_blank(), 
        axis.line = element_line(colour = "black"),
        panel.grid.major.y = element_line(colour = "grey", linetype = 1), 
        axis.text.x = element_text(face = "bold.italic", 
                                   angle = 90, size = 15), 
        legend.position = "none") +
  scale_x_discrete(labels = c("Bungarus fasciatus" = "B. fasciatus", 
                              "Bungarus caeruleus" = "B. caeruleus",
                              "Daboia russelii" = "D. russelii",
                              "Naja kaouthia" = "N. kaouthia",
                              "Naja naja" = "N. naja",
                              "Fowlea piscator" = "F. piscator",
                              "Dendrelaphis tristis" = "D. tristis",
                              "Ptyas mucosa" = "P. mucosa",
                              "Lycodon aulicus" = "L. aulicus")) +
  stat_pvalue_manual(pwc_temprange, hide.ns = TRUE)
temprange_plot

Visualizing these Results:

Figure 6 in Paper

# merge fig5

ggarrange(rain_plot, mintemp_plot, maxtemp_plot, 
          temprange_plot + rremove("xlab"), ncol = 1, 
          heights = c(15,15,15,25), labels = "AUTO")

Binary Logistic Regression Models

# make climate data
climate_data <- rain
climate_data$rain_prev <- c(NA, climate_data$rain[-nrow(climate_data)])  
climate_data <- merge(climate_data, mintemp, by = "dates")
climate_data <- merge(climate_data, maxtemp, by = "dates")
climate_data <- pad(climate_data)

Test whether the climate variables are collinear.

# Correlation Matrix
climate_correlation <- climate_data[, c("rain", "rain_prev", 
                                        "mintemp", "maxtemp")]

climate_correlation <- na.omit(climate_correlation)
vif(climate_correlation)

##   Variables      VIF
## 1      rain 1.128907
## 2 rain_prev 1.181178
## 3   mintemp 3.117688
## 4   maxtemp 2.959750

All the VIF scores are less than 5.

We will now run models on each of the species.

climate_species <- data.frame(Masterset$Date, Masterset$Species_Name)
names(climate_species) <- c('dates', 'species')

# for Bungarus fasciatus
climate_fasciatus <- subset(climate_species, species %in% c("Bungarus fasciatus"))
climate_fasciatus <- pad(climate_fasciatus)
climate_fasciatus$presence <- ifelse(is.na(climate_fasciatus$species), 0, 
                                     ifelse(climate_fasciatus$species == "Bungarus fasciatus", 1, 0))
climate_fasciatus <- subset(climate_fasciatus, select = -species)

climate_fasciatus <- merge(climate_fasciatus, climate_data, by = "dates")
climate_fasciatus <- subset(climate_fasciatus, select = -dates )
climate_fasciatus <- na.omit(climate_fasciatus)
# making the model
model_fasciatus <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                       data = climate_fasciatus, family = binomial(link = "logit"))
summary(model_fasciatus)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_fasciatus)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -5.487846   1.815368  -3.023   0.0025 **
## rain        -0.002439   0.013725  -0.178   0.8590   
## rain_prev    0.018177   0.009267   1.961   0.0498 * 
## maxtemp     -0.016984   0.074096  -0.229   0.8187   
## mintemp      0.130568   0.063978   2.041   0.0413 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 298.68  on 679  degrees of freedom
## Residual deviance: 284.06  on 675  degrees of freedom
## AIC: 294.06
## 
## Number of Fisher Scoring iterations: 6

res_fasciatus <- roc(presence ~ fitted(model_fasciatus), 
                     data = climate_fasciatus)
res_fasciatus$auc

## Area under the curve: 0.6772

ggroc(res_fasciatus, legacy.axes = TRUE)

# for Bungarus caeruleus
climate_caeruleus <- subset(climate_species, species %in% c("Bungarus caeruleus"))
climate_caeruleus <- pad(climate_caeruleus)
climate_caeruleus$presence <- ifelse(is.na(climate_caeruleus$species), 0, 
                                     ifelse(climate_caeruleus$species == "Bungarus caeruleus", 1, 0))
climate_caeruleus <- subset(climate_caeruleus, select = -species)

climate_caeruleus <- merge(climate_caeruleus, climate_data, by = "dates")
climate_caeruleus <- subset(climate_caeruleus, select = -dates )
climate_caeruleus <- na.omit(climate_caeruleus)
# making the model
model_caeruleus <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                       data = climate_caeruleus, family = binomial(link = "logit"))
summary(model_caeruleus)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_caeruleus)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -4.878492   1.990713  -2.451  0.01426 * 
## rain         0.006316   0.011623   0.543  0.58682   
## rain_prev    0.004843   0.011544   0.420  0.67484   
## maxtemp     -0.097348   0.080885  -1.204  0.22877   
## mintemp      0.208084   0.072325   2.877  0.00401 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 263.96  on 678  degrees of freedom
## Residual deviance: 249.39  on 674  degrees of freedom
## AIC: 259.39
## 
## Number of Fisher Scoring iterations: 6

res_caeruleus <- roc(presence ~ fitted(model_caeruleus), 
                     data = climate_caeruleus)
res_caeruleus$auc

## Area under the curve: 0.6843

ggroc(res_caeruleus, legacy.axes = TRUE)

# for Daboia russelii
climate_russelii <- subset(climate_species, species %in% c("Daboia russelii"))
climate_russelii <- pad(climate_russelii)
climate_russelii$presence <- ifelse(is.na(climate_russelii$species), 0, 
                                    ifelse(climate_russelii$species == "Daboia russelii", 1, 0))
climate_russelii <- subset(climate_russelii, select = -species)

climate_russelii <- merge(climate_russelii, climate_data, by = "dates")
climate_russelii <- subset(climate_russelii, select = -dates )
climate_russelii <- na.omit(climate_russelii)
# making the model
model_russelii <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                      data = climate_russelii, family = binomial(link = "logit"))
summary(model_russelii)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_russelii)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -2.597946   1.087569  -2.389   0.0169 *
## rain         0.009837   0.011077   0.888   0.3745  
## rain_prev    0.012091   0.011060   1.093   0.2743  
## maxtemp      0.046903   0.053989   0.869   0.3850  
## mintemp     -0.049719   0.045757  -1.087   0.2772  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 423.26  on 613  degrees of freedom
## Residual deviance: 420.37  on 609  degrees of freedom
## AIC: 430.37
## 
## Number of Fisher Scoring iterations: 4

res_russelii <- roc(presence ~ fitted(model_russelii), 
                    data = climate_russelii)
res_russelii$auc

## Area under the curve: 0.572

ggroc(res_russelii, legacy.axes = TRUE)

# for Naja kaouthia
climate_kaouthia <- subset(climate_species, species %in% c("Naja kaouthia"))
climate_kaouthia <- pad(climate_kaouthia)
climate_kaouthia$presence <- ifelse(is.na(climate_kaouthia$species), 0, 
                                    ifelse(climate_kaouthia$species == "Naja kaouthia", 1, 0))
climate_kaouthia <- subset(climate_kaouthia, select = -species)

climate_kaouthia <- merge(climate_kaouthia, climate_data, by = "dates")
climate_kaouthia <- subset(climate_kaouthia, select = -dates )
climate_kaouthia <- na.omit(climate_kaouthia)
# making the model
model_kaouthia <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                      data = climate_kaouthia, family = binomial(link = "logit"))
summary(model_kaouthia)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_kaouthia)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -4.16606    2.41034  -1.728   0.0839 .
## rain        -0.01728    0.04255  -0.406   0.6846  
## rain_prev   -0.05321    0.05831  -0.913   0.3615  
## maxtemp     -0.06457    0.11467  -0.563   0.5734  
## mintemp      0.11495    0.09631   1.194   0.2327  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 134.54  on 635  degrees of freedom
## Residual deviance: 131.37  on 631  degrees of freedom
## AIC: 141.37
## 
## Number of Fisher Scoring iterations: 8

res_kaouthia <- roc(presence ~ fitted(model_kaouthia), 
                    data = climate_kaouthia)
res_kaouthia$auc

## Area under the curve: 0.6205

ggroc(res_kaouthia, legacy.axes = TRUE)

# for Naja naja

climate_naja <- subset(climate_species, species %in% c("Naja naja"))
climate_naja <- pad(climate_naja)
climate_naja$presence <- ifelse(is.na(climate_naja$species), 0, 
                                ifelse(climate_naja$species == "Naja naja", 1, 0))
climate_naja <- subset(climate_naja, select = -species)

climate_naja <- merge(climate_naja, climate_data, by = "dates")
climate_naja <- subset(climate_naja, select = -dates )
climate_naja <- na.omit(climate_naja)
# making the model
model_naja <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                  data = climate_naja, family = binomial(link = "logit"))
summary(model_naja)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_naja)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -2.459512   0.850496  -2.892  0.00383 **
## rain        -0.011139   0.009033  -1.233  0.21755   
## rain_prev    0.010488   0.006958   1.507  0.13174   
## maxtemp     -0.019641   0.038995  -0.504  0.61449   
## mintemp      0.078748   0.032044   2.458  0.01399 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 780.84  on 735  degrees of freedom
## Residual deviance: 764.18  on 731  degrees of freedom
## AIC: 774.18
## 
## Number of Fisher Scoring iterations: 4

res_naja <- roc(presence ~ fitted(model_naja), 
                data = climate_naja)
res_naja$auc

## Area under the curve: 0.5975

ggroc(res_naja, legacy.axes = TRUE)

# for Fowlea piscator
climate_piscator <- subset(climate_species, species %in% c("Fowlea piscator"))
climate_piscator <- pad(climate_piscator)
climate_piscator$presence <- ifelse(is.na(climate_piscator$species), 0, 
                                    ifelse(climate_piscator$species == "Fowlea piscator", 1, 0))
climate_piscator <- subset(climate_piscator, select = -species)

climate_piscator <- merge(climate_piscator, climate_data, by = "dates")
climate_piscator <- subset(climate_piscator, select = -dates )
climate_piscator <- na.omit(climate_piscator)
# making the model
model_piscator <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                      data = climate_piscator, family = binomial(link = "logit"))
summary(model_piscator)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_piscator)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -6.60913    2.03858  -3.242  0.00119 **
## rain         0.01231    0.01471   0.837  0.40285   
## rain_prev    0.01730    0.01318   1.313  0.18929   
## maxtemp      0.17631    0.09246   1.907  0.05653 . 
## mintemp     -0.11871    0.07590  -1.564  0.11778   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 194.90  on 689  degrees of freedom
## Residual deviance: 190.26  on 685  degrees of freedom
## AIC: 200.26
## 
## Number of Fisher Scoring iterations: 6

res_piscator <- roc(presence ~ fitted(model_piscator), 
                    data = climate_piscator)
res_piscator$auc

## Area under the curve: 0.6291

ggroc(res_piscator, legacy.axes = TRUE)

# for Dendrelaphis tristis
climate_tristis <- subset(climate_species, species %in% c("Dendrelaphis tristis"))
climate_tristis <- pad(climate_tristis)
climate_tristis$presence <- ifelse(is.na(climate_tristis$species), 0, 
                                   ifelse(climate_tristis$species == "Dendrelaphis tristis", 1, 0))
climate_tristis <- subset(climate_tristis, select = -species)

climate_tristis <- merge(climate_tristis, climate_data, by = "dates")
climate_tristis <- subset(climate_tristis, select = -dates )
climate_tristis <- na.omit(climate_tristis)
# making the model
model_tristis <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                     data = climate_tristis, family = binomial(link = "logit"))
summary(model_tristis)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_tristis)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.092116   2.084857  -1.483    0.138
## rain        -0.001482   0.024460  -0.061    0.952
## rain_prev    0.020530   0.018602   1.104    0.270
## maxtemp      0.023112   0.107959   0.214    0.830
## mintemp     -0.057133   0.090170  -0.634    0.526
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.43  on 544  degrees of freedom
## Residual deviance: 143.01  on 540  degrees of freedom
## AIC: 153.01
## 
## Number of Fisher Scoring iterations: 6

res_tristis <- roc(presence ~ fitted(model_tristis), 
                   data = climate_tristis)
res_tristis$auc

## Area under the curve: 0.6111

ggroc(res_tristis, legacy.axes = TRUE)

# for Ptyas mucosa
climate_mucosa <- subset(climate_species, species %in% c("Ptyas mucosa"))
climate_mucosa <- pad(climate_mucosa)
climate_mucosa$presence <- ifelse(is.na(climate_mucosa$species), 0, 
                                  ifelse(climate_mucosa$species == "Ptyas mucosa", 1, 0))
climate_mucosa <- subset(climate_mucosa, select = -species)

climate_mucosa <- merge(climate_mucosa, climate_data, by = "dates")
climate_mucosa <- subset(climate_mucosa, select = -dates )
climate_mucosa <- na.omit(climate_mucosa)
# making the model
model_mucosa <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                    data = climate_mucosa, family = binomial(link = "logit"))
summary(model_mucosa)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_mucosa)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.906593   1.568450  -3.766 0.000166 ***
## rain        -0.019996   0.021863  -0.915 0.360390    
## rain_prev    0.007473   0.014401   0.519 0.603817    
## maxtemp      0.068586   0.068703   0.998 0.318136    
## mintemp      0.046114   0.058856   0.783 0.433337    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 300.15  on 606  degrees of freedom
## Residual deviance: 291.59  on 602  degrees of freedom
## AIC: 301.59
## 
## Number of Fisher Scoring iterations: 6

res_mucosa <- roc(presence ~ fitted(model_mucosa), 
                  data = climate_mucosa)
res_mucosa$auc

## Area under the curve: 0.6225

ggroc(res_mucosa, legacy.axes = TRUE)

# for Lycodon aulicus
climate_aulicus <- subset(climate_species, species %in% c("Lycodon aulicus"))
climate_aulicus <- pad(climate_aulicus)
climate_aulicus$presence <- ifelse(is.na(climate_aulicus$species), 0, 
                                   ifelse(climate_aulicus$species == "Lycodon aulicus", 1, 0))
climate_aulicus <- subset(climate_aulicus, select = -species)

climate_aulicus <- merge(climate_aulicus, climate_data, by = "dates")
climate_aulicus <- subset(climate_aulicus, select = -dates )
climate_aulicus <- na.omit(climate_aulicus)
# making the model
model_aulicus <- glm(presence ~ rain + rain_prev + maxtemp + mintemp, 
                     data = climate_aulicus, family = binomial(link = "logit"))
summary(model_aulicus)

## 
## Call:
## glm(formula = presence ~ rain + rain_prev + maxtemp + mintemp, 
##     family = binomial(link = "logit"), data = climate_aulicus)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -4.96519    1.65687  -2.997  0.00273 **
## rain        -0.02047    0.02637  -0.776  0.43766   
## rain_prev    0.01245    0.01454   0.856  0.39199   
## maxtemp      0.10652    0.07824   1.362  0.17335   
## mintemp     -0.06706    0.06380  -1.051  0.29321   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 252.21  on 681  degrees of freedom
## Residual deviance: 249.16  on 677  degrees of freedom
## AIC: 259.16
## 
## Number of Fisher Scoring iterations: 6

res_aulicus <- roc(presence ~ fitted(model_aulicus), 
                   data = climate_aulicus)
res_aulicus$auc

## Area under the curve: 0.5833

ggroc(res_aulicus, legacy.axes = TRUE)

Comparing Time Periods

time_effects <- data.frame(Masterset$Date, Masterset$Time, Masterset$Species_Name)
names(time_effects) <- c('dates', 'time', 'species')

# create time_periods
time_effects$datetime <- as.POSIXct(paste(time_effects$dates, time_effects$time))
time_effects$time <- as_hms(time_effects$datetime)
#Define the 6 hour breaks and the time period labels
breaks <- hour(hms("00:00:00", "05:59:59", "11:59:59", "17:59:59", "23:59:59"))
labels  <- c("0_6", "6_12", "12_18", "18_0")

# Define time periods
time_effects$time_period <- cut(x = hour(time_effects$time), 
                                breaks = breaks, labels = labels, 
                                include.lowest = TRUE)
                                
# Select only the species and time_period columns
time_periods_species <- time_effects %>% select(species, time_period)

selected_species <- c( "Fowlea piscator" , "Dendrelaphis tristis" ,
                       "Ptyas mucosa", "Lycodon aulicus", 
                       "Bungarus fasciatus", "Bungarus caeruleus",
                       "Naja kaouthia", "Naja naja","Daboia russelii" )
time_periods_species <- subset(time_periods_species, species %in% selected_species)

table_time_periods <- table(time_periods_species$species, time_periods_species$time_period)
table_time_periods

##                       
##                        0_6 6_12 12_18 18_0
##   Bungarus caeruleus     0    5     3   30
##   Bungarus fasciatus     2   12     3   25
##   Daboia russelii        0   18    29   31
##   Dendrelaphis tristis   1    6     8    2
##   Fowlea piscator        1    5     4   18
##   Lycodon aulicus        3    3     8   23
##   Naja kaouthia          0    7     5    5
##   Naja naja              5   55    65   65
##   Ptyas mucosa           0   13    23   12

# remove the first 6 hour time period (0_6) due to the lack of calls
table_time_periods <- table_time_periods[, -1]
chisq.test(table_time_periods)

## 
##  Pearson's Chi-squared test
## 
## data:  table_time_periods
## X-squared = 69.277, df = 16, p-value = 1.335e-08

Snake Rescue Encounter Analysis

Sourish Kuttalam

2024-05-23

Github repo

Load the Data

Transforming Variables

Comparing Years

Compare 2020 vs 2021 (July to December)

Compare 2021 vs 2022 (January to October)

Site Use Comparison

Individual Sites

Outside vs Inside Sites

Loading Climate Data

Effect Of Rain

Effect on Location and Species Type

Effect of Day Type Combinations

Most Rescued Species

Comparing Climate Effects

Rain

Minimum Temperature

Maximum Temperature

Temperature Range

Binary Logistic Regression Models

Comparing Time Periods