# Estimation of a causal effect of an agricultural practice based on observational data
# Korgan Aldebert (Acta), François Brun (Acta), David Makowski (Inrae), 2023-12-18
# Fertigation data : part 2. Inverse propensity weighting (IPW)

library(boot)

# Loading the fertigation data. The data present a bias in the amount of irrigation 
# depending on the implementation of fertigation (see part 1)
# the data are simulated, thus we know the "true" effects 
# intercept: 72.94 coeff_Drip : 8.49 coeff_irrigation Irrigation : 0.02904 coeff_nitrogen : 0.1
dfbiais = read.table("Drip_Fertigation_biais.csv", sep=";",header=T)
dfbiais$treat = ifelse(dfbiais$Drip=="Yes",1,0)

################################################################################
# Step 1. Propensity score estimation using logistic regression
# Assuming we have an expert to identify the relevant variables 
# that are sources of confusion : Irrigation and Nitrogen
formule = c("treat ~ Irrigation + Nitrogen") 

# we want to predict the probability (binomial with a logit link) to have a treatement according to the variables
reg = glm(formule, family = binomial(link = "logit"), data=dfbiais)
dfbiais$pscore = predict(reg, type='response')
summary(reg)


################################################################################
# Step 2. Inverse propensity weighting (IPW) 
# computation of the weigth

# here, we use the computation of weight for an ATE estimation
# The average treatment effect (ATE) is used when we are interested in the average treatment of the entire population, whereas the average treatment effect of the treated (ATT) is used when we are only interested in the average treatment effect of those treated
dfbiais$weight = with(dfbiais, treat/pscore + (1-treat)/(1-pscore))

# You need to analyze the distribution of weight, because for extreme values of p-score (very near 0 ou 1) you will obtain very big weights which will give too much importance to a few individual.
hist(dfbiais$weight)
summary(dfbiais$weight)
quantile(dfbiais$weight, probs=c(0.99))
# Here, no extrem values. A possibility is to set a threshold to eliminate extrem weight value or to trim
#dfbiais[dfbiais$weight<=quantile(dfbiais$weight, probs=c(0.99)),"weight"]=quantile(dfbiais$weight, probs=c(0.99))
#dfbiais[dfbiais$weight<=quantile(dfbiais$weight, probs=c(0.99)),"weight"]=NA


################################################################################
# Step 3. Comparison of the dataset before and after weighting (and treshold)
# Il est aussi possible d'utiliser le package cobalt pour voir l'effet de la pondération
# des individus sur la population.  
library(cobalt)
balIPTW = bal.tab(treat ~ Irrigation + Nitrogen + pscore, 
                  data = na.omit(dfbiais),
                  weights = "weight",
                  s.d.denom = "treated",
                  disp.v.ratio = TRUE,
                  un = TRUE)
balIPTW

################################################################################
# Step 4. The result : comparison of the means of our variable of interest between the two populations of individuals
# Estimation of the effect by weighted regression
weight_effet_effect = glm(Yield ~ treat, weights = weight,data = dfbiais)
summary(weight_effet_effect)
# The estimate of our effect and its Std error
summary(weight_effet_effect)$coefficients[1:2,1:2] 

# or directly
#Value_T1 = weighted.mean(x=dfbiais[dfbiais$treat==1,"Yield"], w=dfbiais[dfbiais$treat==1,"weight"])
#Value_T0 = weighted.mean(x=dfbiais[dfbiais$treat==0,"Yield"], w=dfbiais[dfbiais$treat==0,"weight"])
#Value_T1-Value_T0

################################################################################
# step 5. Obtaining a confidence interval by Bootstrap
# function which allows us to obtain an estimate by inverse weighting

iptw.boot=function(k_dfbiais,indices) {
  # resampling of the original/full data set
  k_dfbiais=dfbiais[indices,]
  # Propensity score estimation (step 1)
  ps.out = glm(treat ~ Irrigation + Nitrogen,
               family = binomial(link=logit),
               data = k_dfbiais)   
  k_dfbiais$pscore <- ps.out$fitted
  # Inverse propensity weighting (IPW) (step 2)
  k_dfbiais$weight <- with(k_dfbiais, treat + (1-treat)/(1-pscore))

  # Here, no extrem values. A possibility is to set a threshold to eliminate extrem weight value or to trim
  #dfbiais[dfbiais$weight<=quantile(dfbiais$weight, probs=c(0.99)),"weight"]=quantile(dfbiais$weight, probs=c(0.99))
  #dfbiais[dfbiais$weight<=quantile(dfbiais$weight, probs=c(0.99)),"weight"]=NA
  
  # Estimation of the effect by weighted regression (step 3)
  glm.out=glm(Yield ~ treat + pscore, weight=weight,data=k_dfbiais)
  # Sortie de l'effet moyen estimé et de l'écart-type associé
  res = summary(glm.out)$coefficients[2,1:2]
  return(res)
}

# Bootstrap repeated R times, storing the estimates
boot.out = boot(data = dfbiais, # Les données sur lesquelles on effectue le bootstrap
                statistic = iptw.boot, # La fonction pour obtenir nos estimations
                R = 1000) # Le nombre de répétitions

# Confidence interval estimation
boot.ci(boot.out)

# end of file