# David Makowski, 2015, Formation RMT modelisation 2 décembre 2015 library(lme4) library(AICcmodavg) TAB <- read.table("BDD_septoriose_2.txt",header=TRUE) summary(TAB) #time_b correspond au temps journalier avec le time_b=1 pour une date d'observation = au 31 mars # model 1 = model effet temps uniquement model1<- glm(cbind(feuille.m,feuille.s)~time_b, family=binomial(link="logit"), data=TAB) theta1 <- coef(model1) model1_fitted<-model1$fitted.values par(mfrow=c(1,1)) plot(TAB$time_b,model1_fitted, xlab="Temps (jours)", ylab="Prob. infection") # model 2 = model effet temps + effet fixe groupe de risque sur intercept + effet fixe region sur l'intercept et la pente model2<- glm(cbind(feuille.m,feuille.s)~time_b*region+risque , family=binomial, data=TAB) theta2 <- coef(model2) model2_fitted<-model2$fitted.values par(mfrow=c(1,2)) plot(TAB$time_b[TAB$region=="B" & TAB$risque=="fort"],model2_fitted[TAB$region=="B" & TAB$risque=="fort"], xlab="Temps (jours)", ylab="Prob. infection",col="red") points(TAB$time_b[TAB$region=="B" & TAB$risque=="faible"],model2_fitted[TAB$region=="B" & TAB$risque=="faible"], col="green") points(TAB$time_b[TAB$region=="B" & TAB$risque=="moyen"],model2_fitted[TAB$region=="B" & TAB$risque=="moyen"], col="orange") title("B") plot(TAB$time_b[TAB$region=="A" & TAB$risque=="fort"],model2_fitted[TAB$region=="A" & TAB$risque=="fort"], xlab="Temps (jours)", ylab="Prob. infection",col="red") points(TAB$time_b[TAB$region=="A" & TAB$risque=="faible"],model2_fitted[TAB$region=="A" & TAB$risque=="faible"], col="green") points(TAB$time_b[TAB$region=="A" & TAB$risque=="moyen"],model2_fitted[TAB$region=="A" & TAB$risque=="moyen"], col="orange") title("A") # model 3 = model effet temps + effet fixe groupe de risque sur intercept + effet fixe region sur l'intercept et la pente + effet site-annee aléatoire sur l'intercept model3<- glmer(cbind(feuille.m,feuille.s)~time_b*region+risque + (1|id_unique), family=binomial, data=TAB) theta3 <- fixef(model3) model3_fitted<-fitted(model3) model3_fitted_med<-predictSE.mer(model3, newdata=TAB, se.fit = FALSE, type = "response",level = 0, print.matrix = FALSE) model3_fitted_chf<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("fort",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model3_fitted_chm<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("moyen",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model3_fitted_chl<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("faible",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit par(mfrow=c(1,2)) plot(1:100,model3_fitted_chf, xlab="Temps (jours)", ylab="Prob. infection",col="red",type="l") lines(1:100, model3_fitted_chm,col="orange") lines(1:100, model3_fitted_chl,col="green") model3_fitted_cf<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("fort",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model3_fitted_cm<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("moyen",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model3_fitted_cl<-predictSE.mer(model3, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("faible",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit plot(1:100,model3_fitted_cf, xlab="Temps (jours)", ylab="Prob. infection",col="red",type="l") lines(1:100, model3_fitted_cm,col="orange") lines(1:100, model3_fitted_cl,col="green") par(mfrow=c(1,1)) plot(TAB$time_b[TAB$id_unique == "id_145"],model3_fitted[TAB$id_unique == "id_145"], xlab="Temps (jours)", ylab="Prob. infection",col="blue", ylim=c(0,1), pch=19) points(TAB$time_b[TAB$id_unique == "id_77"],model3_fitted[TAB$id_unique == "id_77"], col="yellow",pch=19) points(TAB$time_b[TAB$id_unique == "id_124"],model3_fitted[TAB$id_unique == "id_124"], col="purple",pch=19) points(TAB$time_b[TAB$id_unique == "id_145"],TAB$obs_val_num[TAB$id_unique == "id_145"]/10,pch=4) points(TAB$time_b[TAB$id_unique == "id_77"],TAB$obs_val_num[TAB$id_unique == "id_77"]/10,pch=4) points(TAB$time_b[TAB$id_unique == "id_124"],TAB$obs_val_num[TAB$id_unique == "id_124"]/10,pch=4) # model 4 = model effet temps + effet fixe groupe de risque sur intercept + effet fixe region sur l'intercept et la pente + effet site-annee aléatoire sur l'intercept et sur la pente model4<- glmer(cbind(feuille.m,feuille.s)~time_b*region+risque + (1+time_b|id_unique), family=binomial, data=TAB) theta4 <- fixef(model4) theta4 <- fixef(model4) model4_fitted<-fitted(model4) model4_fitted_med<-predictSE.mer(model4, newdata=TAB, se.fit = FALSE, type = "response",level = 0, print.matrix = FALSE) model4_fitted_chf<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("fort",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model4_fitted_chm<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("moyen",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model4_fitted_chl<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("B",100), risque=rep("faible",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit par(mfrow=c(1,2)) plot(1:100,model4_fitted_chf, xlab="Temps (jours)", ylab="Prob. infection",col="red",type="l") lines(1:100, model4_fitted_chm,col="orange") lines(1:100, model4_fitted_chl,col="green") title("B") model4_fitted_cf<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("fort",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model4_fitted_cm<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("moyen",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit model4_fitted_cl<-predictSE.mer(model4, newdata=data.frame(time_b=1:100, region=rep("A",100), risque=rep("faible",100)), se.fit = TRUE, type = "response",level = 0, print.matrix = FALSE)$fit plot(1:100,model4_fitted_cf, xlab="Temps (jours)", ylab="Prob. infection",col="red",type="l") lines(1:100, model4_fitted_cm,col="orange") lines(1:100, model4_fitted_cl,col="green") title("A") par(mfrow=c(1,1)) plot(TAB$time_b[TAB$id_unique == "id_145"],model4_fitted[TAB$id_unique == "id_145"], xlab="Temps (jours)", ylab="Prob. infection",col="blue", ylim=c(0,1), pch=19) points(TAB$time_b[TAB$id_unique == "id_77"],model4_fitted[TAB$id_unique == "id_77"], col="yellow",pch=19) points(TAB$time_b[TAB$id_unique == "id_124"],model4_fitted[TAB$id_unique == "id_124"], col="purple",pch=19) points(TAB$time_b[TAB$id_unique == "id_145"],TAB$obs_val_num[TAB$id_unique == "id_145"]/10,pch=4) points(TAB$time_b[TAB$id_unique == "id_77"],TAB$obs_val_num[TAB$id_unique == "id_77"]/10,pch=4) points(TAB$time_b[TAB$id_unique == "id_124"],TAB$obs_val_num[TAB$id_unique == "id_124"]/10,pch=4) # AIC des 4 modèles AIC(model1) AIC(model2) AIC(model3) AIC(model4) # Calcul du RMSE pour les 4 modèles (pour les modèle3 et 4, il y a 2 RMSE, un RMSE calculé avec les estimations utilisant les paramètres médians et # un RMSE calculé avec les estimations utilisant les paramètres site-specifique) RMSE1 <- sqrt(mean((TAB$obs_val_num/10-model1_fitted)^2)) RMSE1 RMSE2 <- sqrt(mean((TAB$obs_val_num/10-model2_fitted)^2)) RMSE2 RMSE3_median <- sqrt(mean((TAB$obs_val_num/10-model3_fitted_med)^2)) RMSE3_median RMSE3_site_specific <- sqrt(mean((TAB$obs_val_num/10-model3_fitted)^2)) RMSE3_site_specific RMSE4_median <- sqrt(mean((TAB$obs_val_num/10-model4_fitted_med)^2)) RMSE4_median RMSE4_site_specific <- sqrt(mean((TAB$obs_val_num/10-model4_fitted)^2)) RMSE4_site_specific ################### Predictions en 2015 ################### annee_popu <-c(2009,2010,2011,2012,2013,2014) # on prend l'ensemble des observations sauf celles de l'annee 2015 TAB11505<- TAB[TAB$annee %in% annee_popu,] # on prend l'ensemble des observations de l'annee 2015 avant la date de prediction ici 15 mai TAB21505 <- TAB[TAB$annee == 2015 & TAB$mois < 5,] TAB31505 <- TAB[TAB$annee == 2015 & TAB$mois == 5 & TAB$jour <15,] # on combine les BDDs TAB_estim1505 <- rbind(TAB11505, TAB21505, TAB31505) TAB_estim1505 <- TAB_estim1505[order(TAB_estim1505$dateobs),] TABbis1505 <-TAB[TAB$annee == 2015 & TAB$mois == 5 & TAB$jour>=15,] ######### model1 model1_1505 <- glm(cbind(feuille.m,feuille.s)~time_b, family=binomial(link="logit"), data=TAB_estim1505) theta1505_model1<- coef(model1_1505) Pred1<-predict(model1_1505, newdata=TABbis1505, type="response") ######### model2 model2_1505<-glm(cbind(feuille.m,feuille.s)~time_b*region+risque , family=binomial, data=TAB_estim1505) Pred2<-predict(model2_1505, newdata=TABbis1505, type="response") ######### model4 model4_1505 <- glmer(cbind(feuille.m,feuille.s)~time_b*region+risque + (1+time_b|id_unique), family=binomial, data=TAB_estim1505) theta1505_model4<- fixef(model4_1505) Pred4<-predictSE.mer(model4_1505, newdata=TABbis1505, se.fit = FALSE, type = "response",level = 0, print.matrix = FALSE) #######RMSEP sqrt(mean((TABbis1505$obs_val_num/10-Pred1)^2)) sqrt(mean((TABbis1505$obs_val_num/10-Pred2)^2)) sqrt(mean((TABbis1505$obs_val_num/10-Pred4)^2))