# Author : Andrew R Thomson et al. # Email : a.r.thomson@ucl.ac.uk # Title : Random Forest Barometer validation # Date : 3rd July, 2020. #Reading Data from File. (change path) lit_maj_nat <- read.delim("/Users/andrewthomson/Documents/Archive work/Bristol/Bristol Projects/new_maj_manucript/29June/data_filteredforvalidation.txt") #Loading Required Packages (require installation first) library(randomForest) library(caTools) library(tidyverse) library(caret) library(Metrics) library(e1071) # Create single training/test datasets with 70:30 split. spl <- sample.split(lit_maj_nat$ExptP, SplitRatio = 0.7) Barom_Train <- subset(lit_maj_nat, spl == TRUE) Barom_Test <- subset(lit_maj_nat, spl == FALSE) # Train single random forest regression model with 75 trees Barom_rf<-randomForest(ExptP ~ ., data = Barom_Train, nodesize = 25, ntree = 75) #calculate RMSE mean(sqrt(Barom_rf$mse)) #calculate Pseudo R-squared mean(Barom_rf$rsq) #______________________________________________ # Running various validation strategies. #create grid of number of input paramters (compositional elements) to use tunegrid <- expand.grid(.mtry=c(5,7,9)) #______________________________________________ # Random Forest with 10-fold Cross Validation ctrl <- trainControl(method = "cv", number = 10, p=0.7, savePred=T) # Training the model mod_rf_cv <- train(ExptP~., data=lit_maj_nat, method = "rf", tuneGrid=tunegrid, trControl = ctrl, ntree =75) # Results of K-fold CV RFR mod_rf_cv$results #______________________________________________ # Random Forest with bootstrap Validation ctrl <- trainControl(method = "boot", number=1000, p=0.7, savePred=T) #Training the model modBOOT <- train(ExptP~., data=lit_maj_nat, method = "rf",tuneGrid=tunegrid,trControl = ctrl, ntree =75) # Results of bootstrap validation modBOOT$results #______________________________________________ # Random Forest with Leave One Out Cross Validation ctrl <- trainControl(method = "LOOCV", savePred=T) #Training the model mod <- train(ExptP~., data=lit_maj_nat, method = "rf", trControl = ctrl,tuneGrid=tunegrid, ntree =75) # Results of LOOCV RFR mod$results