# I. # simmat function is doing the same calculations as Stéphanie's plus # the starting date (from when the cumulative favourable days are counted) # is adjustable (perhaps that variable matters) # please see the comments of Stéphanie simmat <- function(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24, k=0.046,startingJDate=100,weather) { temperatures <- weather[,which(names(weather)=="Temperature")] rainfall <- weather[,which(names(weather)=="Rainfall")] DFM <- numeric(length(temperatures)) CDFM <- numeric(length(temperatures)) Crain <- numeric(length(temperatures)) PMP <- numeric(length(temperatures)) PEP <- numeric(length(temperatures)) p.empty.pseudo <- numeric(length(temperatures)) p.full.pseudo <- numeric(length(temperatures)) for (i in startingJDate:length(temperatures)) { Crain[i] <- sum(rainfall[(i-nd):(i-1)],na.rm = T) if (!is.na(temperatures[i]>tmin & temperatures[i]rmin) & (temperatures[i]>tmin & temperatures[i]rmin)) {DFM[i] <- 1} CDFM[i] <- sum(DFM[1:i]) PMP[i] <- pnorm(CDFM[i], mean = meanFD, sd = sdFD) PEP[i] <- ifelse(!is.na(1-exp(-k*rainfall[i])),1-exp(-k*rainfall[i]),0) p.empty.pseudo[i] <- p.empty.pseudo[i-1] + (PMP[i]-p.empty.pseudo[i-1])*PEP[i] p.full.pseudo[i] <- (PMP[i]-p.empty.pseudo[i])/(1-p.empty.pseudo[i]) } data.frame(temperatures,rainfall,Crain,DFM,CDFM,PMP,PEP, p.empty.pseudo,p.full.pseudo) } #just to test whether this fuction works #fakeweather <- data.frame(Temperature=sample(5:25,365,replace = T), # Rainfall=rpois(365,2)) #x <- simmat(weather = fakeweather) #plot(x$p.full.pseudo) #--------------------------------------------------------------------- #II. # Reading the real data #setwd("C:/Users/Admin/Desktop/SummerSchool") df <- read.csv2("database_pseudothecia_ascospores dull.csv") # see the results in the 2nd site-location #df<-df[c(367:731),] #x <- simmat(weather = df, startingJDate=90) #plot(x$p.full.pseudo) #--------------------------------------------------------------------- #III. # Run the simmat function for each site in each year (each Site.year) # 1. Do a list with elements of data frames, where each data frame is # for single Site.year df.list <- split(df,df$Site.year) # note: Site.year is an integer not a factor, but this function works with this as well # 2. Do the simulation for all elements of the list (all Site.year -s) simulated.pseudo <- lapply(df.list, function(x) { x$sim.pseudo <- simmat(weather=x, startingJDate=180)$p.full.pseudo x }) # 3. Do a large data frame from the list df.sim <- do.call("rbind",simulated.pseudo) #Note: much less observation here compared to the original 'df', because the rows of the seconf half are empty there sum(is.na(df.sim$sim.pseudo)) #check whether all simulated values are non-NAs # 4. plotting the simulated vs. observed (jittered) plot(jitter(Percentage.of.mature.pseudothecia)~sim.pseudo, data=df.sim[df.sim$Month>6,],ylim=c(0,1)) #note only 2nd half of the year is considered and values between 0 and 1 // not nice code:( #------------------------------------------------------------------- #IV. # only the interesting (or good) data for later use df2 <- df[df$Month>6 & !is.na(df$Percentage.of.mature.pseudothecia) & df$Percentage.of.mature.pseudothecia<=1,] #------------------------------------------------------------------- #V. #Parameter estimation ### An optimisation function to feed 'nls', DOES NOT WORK # simmat.optim <- function(tmin,tmax,rmin,nd,meanFD,sdFD, # k,startingJDate=100,weather) # { # simulated.pseudo <- lapply(df.list, # function(x) # { # simulation<-simmat(tmin,tmax,rmin,nd,meanFD, # sdFD,k,startingJDate, # weather=x) # x$sim.pseudo <- simulation$p.full.pseudo # x # }) # # df.sim <- do.call("rbind",simulated.pseudo) # df.sim2 <- df.sim[df.sim$Month>6 & # !is.na(df.sim$Percentage.of.mature.pseudothecia) & # df.sim$Percentage.of.mature.pseudothecia<1.1,] # df.sim2$sim.pseudo # #sum((df.sim2$sim.pseudo-df.sim2$Percentage.of.mature.pseudothecia)^2) # } # # Y <- df2$Percentage.of.mature.pseudothecia # nls(Y~simmat.optim(tmin,tmax,rmin,nd,meanFD,sdFD,k, # startingJDate=100,weather), # data=list(weather=df[,8:9]), # start=list(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24,k=0.046), # lower = list(tmin=0,tmax=10,rmin=0,nd=1,meanFD=10,sdFD=1,k=0), # upper = list(tmin=20,tmax=40,rmin=9,nd=20,meanFD=100,sdFD=100,k=0.9)) # # nls(Y~simmat.optim(tmin,tmax,rmin,nd,meanFD,sdFD,k, # startingJDate=100,weather), # data=list(weather=df[,8:9]), # start=list(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24,k=0.046), # lower = list(tmin=5,tmax=21,rmin=3,nd=6,meanFD=60,sdFD=20,k=0.04), # upper = list(tmin=7,tmax=23,rmin=5,nd=8,meanFD=70,sdFD=30,k=0.5)) # # xx<-simmat.optim(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24,k=0.046, # startingJDate=100,weather==df[,8:9]) # an optimisation function for 'optim' function /!/could not tested -- TOO LONG simmat.optim2 <- function(parameters) { tmin <- parameters[1] tmax <- parameters[2] rmin <- parameters[3] nd <- parameters[4] meanFD <- parameters[5] sdFD <- parameters[6] k <- parameters[7] simulated.pseudo <- lapply(df.list, function(x) { simulation<-simmat(tmin,tmax,rmin,nd,meanFD, sdFD,k,startingJDate=100, weather=x) x$sim.pseudo <- simulation$p.full.pseudo x }) df.sim <- do.call("rbind",simulated.pseudo) df.sim2 <- df.sim[df.sim$Month>6 & !is.na(df.sim$Percentage.of.mature.pseudothecia) & df.sim$Percentage.of.mature.pseudothecia<1.1,] # the output which should be minimized: sum((df.sim2$sim.pseudo-df.sim2$Percentage.of.mature.pseudothecia)^2) } #c(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24,k=0.046) optim2<-optim(par=c(tmin=6,tmax=22,rmin=4,nd=7,meanFD=65,sdFD=24,k=0.046), simmat.optim2,method = "L-BFGS-B", lower = list(tmin=0,tmax=10,rmin=0,nd=1,meanFD=10,sdFD=1,k=0), upper = list(tmin=20,tmax=40,rmin=9,nd=20,meanFD=100,sdFD=100,k=0.9)) #----------------------- # an optimisation function for 'optim' function, BUT for only 2 variables # (see the results of SA by Stéphanie) simmat.optim3 <- function(parameters) { tmin <- 6 tmax <- parameters[1] rmin <- 4 nd <- 7 meanFD <- parameters[2] sdFD <- 24 k <- 0.046 simulated.pseudo <- lapply(df.list, function(x) { simulation<-simmat(tmin,tmax,rmin,nd,meanFD, sdFD,k,startingJDate=100, weather=x) x$sim.pseudo <- simulation$p.full.pseudo x }) df.sim <- do.call("rbind",simulated.pseudo) df.sim2 <- df.sim[df.sim$Month>6 & !is.na(df.sim$Percentage.of.mature.pseudothecia) & df.sim$Percentage.of.mature.pseudothecia<1.1,] # the output which should be minimized: sum((df.sim2$sim.pseudo-df.sim2$Percentage.of.mature.pseudothecia)^2) } optim3<-optim(par=c(tmax=22,meanFD=65),simmat.optim3) #WORKS #tmax meanFD #55.27402 26.50226 ########################################### # END - it should be continued from here ########################################### # ,method = "L-BFGS-B", # lower = list(tmin=0,tmax=10,rmin=0,nd=1,meanFD=10,sdFD=1,k=0), # upper = list(tmin=20,tmax=40,rmin=9,nd=20,meanFD=100,sdFD=100,k=0.9))