# Estimation of a causal effect of an agricultural practice based on observational data
# Korgan Aldebert (Acta), François Brun (Acta), David Makowski (Inrae), 2023-12-18
# Fertigation data : part 4. Doubly Robust Estimation of Causal Effects


# Loading the fertigation data. The data present a bias in the amount of irrigation 
# depending on the implementation of fertigation (see part 1)
# the data are simulated, thus we know the "true" effects 
# intercept: 72.94 coeff_Drip : 8.49 coeff_irrigation Irrigation : 0.02904 coeff_nitrogen : 0.1
dfbiais = read.table("Drip_Fertigation_biais.csv", sep=";",header=T)
dfbiais$treat = ifelse(dfbiais$Drip=="Yes",1,0)

################################################################################
# Step 1. Propensity score estimation using logistic regression - f()
# Assuming we have an expert to identify the relevant variables 
# that are sources of confusion : Irrigation and Nitrogen
formule = c("treat ~ Irrigation + Nitrogen") 

# we want to predict the probability (binomial with a logit link) to have a treatement according to the variables
propensity_f = glm(formule, family = binomial(link = "logit"), data=dfbiais)
dfbiais$pscore = predict(propensity_f, type='response')
summary(propensity_f)


################################################################################
# Step 2. modeling yield according to data - standardisation model g()
standardisation_g = lm(Yield ~ Drip + Irrigation + Nitrogen, data = dfbiais)

# New data = balanced data
DripYes = dfbiais
DripNo = dfbiais
DripYes$Drip="Yes"
DripNo$Drip="No"

# Prediction of standardisation model g()
DripYes$yieldpredict = predict(standardisation_g, newdata = DripYes)
DripNo$yieldpredict = predict(standardisation_g, newdata = DripNo)

# Residuals of  standardisation model g()
DripYes$residuals = DripYes$Yield - DripYes$yieldpredict
DripNo$residuals = DripNo$Yield - DripNo$yieldpredict

################################################################################
# Step 3. Doubly Robust Estimation for trait=1 and for trait=0

# Estimator E(Y|trait=1)
#DripYes$Drip=1
DripYes$estimation = with(DripYes,(yieldpredict + (treat / pscore) * residuals))
# Estimator E(Y|trait=0)
#DripNo$Drip=0
DripNo$estimation = with(DripNo,(yieldpredict + ((1 - treat) / (1 - pscore)) * residuals))

################################################################################
# Step 3. The result : comparison of the means of our variable of interest between the two populations of individuals
# Average difference in predictions
mean(DripYes$estimation)-mean(DripNo$estimation)

################################################################################
# Step 4. not shown - As other methods, a bootstrap can be set to obtain a confidence interval for the estimate.


# end of file