.Rhistory

a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data[i]$iteration <- i
estimation_data[i]$beta_0 <- lm(a~x_1)$coef[1]
estimation_data[i]$beta_1 <- beta_1
estimation_data[i]$beta_2 <- beta_2
}
estimation_data  <- data_frame(iteration=numeric(1000), beta_0=numeric(1000),beta_1=numeric(1000),beta_2=numeric(1000))
#Initialise Beta one etstimate to a value
beta_1 <- 20
for(i in 1:1000){
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data[i]$iteration <- i
estimation_data[i]$beta_0 <- lm(a~x_1)$coef[1]
estimation_data[i]$beta_1 <- beta_1
estimation_data[i]$beta_2 <- beta_2
}
lm(a~x_1)$coef[1]
estimation_data$iteration[1] <- 69
estimation_data
estimation_data  <- data_frame(iteration=numeric(1000), beta_0=numeric(1000),beta_1=numeric(1000),beta_2=numeric(1000))
#Initialise Beta one etstimate to a value
beta_1 <- 20
for(i in 1:1000){
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data$iteration[i] <- i
estimation_data$beta_0[i] <- lm(a~x_1)$coef[1]
estimation_data$beta_1[i] <- beta_1
estimation_data$beta_2[i] <- beta_2
}
estimation_data
estimation_long <- spread(estimation_data, value="value",key="coefficient",-iteration)
estimation_long <- spread(estimation_data, value="value",key=-iteration)
?spread
estimation_long <- gather(estimation_data, value="value",key="coefficient",-iteration)
estimation_long
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point()
estimation_data  <- data_frame(iteration=numeric(1000), beta_0=numeric(1000),beta_1=numeric(1000),beta_2=numeric(1000))
#Initialise Beta one etstimate to a value
beta_1 <- 20
for(i in 1:1000){
#load previous estimates
if(i>1){
beta_1 <- estimation_data$beta_1[i-1]
beta_2 <- estimation_data$beta_2[i-1]
}
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data$iteration[i] <- i
estimation_data$beta_0[i] <- lm(a~x_1)$coef[1]
estimation_data$beta_1[i] <- beta_1
estimation_data$beta_2[i] <- beta_2
}
estimation_long <- gather(estimation_data, value="value",key="coefficient",-iteration)
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point()
#Initialise Beta one etstimate to a value
beta_1 <- 2523
beta_1 <- 2523
for(i in 1:1000){
#load previous estimates
if(i>1){
beta_1 <- estimation_data$beta_1[i-1]
beta_2 <- estimation_data$beta_2[i-1]
}
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data$iteration[i] <- i
estimation_data$beta_0[i] <- lm(a~x_1)$coef[1]
estimation_data$beta_1[i] <- beta_1
estimation_data$beta_2[i] <- beta_2
}
estimation_long <- gather(estimation_data, value="value",key="coefficient",-iteration)
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point()
#Initialise Beta one etstimate to a value
beta_1 <- 202
for(i in 1:1000){
#load previous estimates
if(i>1){
beta_1 <- estimation_data$beta_1[i-1]
beta_2 <- estimation_data$beta_2[i-1]
}
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data$iteration[i] <- i
estimation_data$beta_0[i] <- lm(a~x_1)$coef[1]
estimation_data$beta_1[i] <- beta_1
estimation_data$beta_2[i] <- beta_2
}
estimation_long <- gather(estimation_data, value="value",key="coefficient",-iteration)
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point()
View(estimation_data)
#Initialise Beta one etstimate to a value
beta_1 <- 9
estimation_data  <- data_frame(iteration=numeric(1000), beta_0=numeric(1000),beta_1=numeric(1000),beta_2=numeric(1000))
for(i in 1:1000){
#load previous estimates
if(i>1){
beta_1 <- estimation_data$beta_1[i-1]
beta_2 <- estimation_data$beta_2[i-1]
}
#Fit model to residual of Y and B1X1
a <- Y - beta_1*x_1
beta_2 <- lm(a~x_2)$coef[2]
#Now we keep Beta_2 fixed and vary beta 1
a <- Y - beta_2 * x_2
beta_1 <- lm(a~x_1)$coef[2]
#Store Iteration Estimates
estimation_data$iteration[i] <- i
estimation_data$beta_0[i] <- lm(a~x_1)$coef[1]
estimation_data$beta_1[i] <- beta_1
estimation_data$beta_2[i] <- beta_2
}
estimation_long <- gather(estimation_data, value="value",key="coefficient",-iteration)
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point()
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_smooth()
ggplot(estimation_long,aes(x=factor(iteration),y=value , col = coefficient)) + geom_point(alpha=.2)
install.packages("tree")
library(tree)
library(ISLR)
library(tidyverse)
#Classification Trees --------
Carseats$High <- ifelse(Carseats$Sales<=8,"No","Yes")
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
View(Carseats)
summary(tree_carseats)
library(tree)
library(ISLR)
library(tidyverse)
#Classification Trees --------
Carseats$High <- ifelse(Carseats$Sales<=8,"No","Yes")
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
summary(tree_carseats)
tree_carseats
library(ISLR)
library(tidyverse)
library(MASS)
library(class)
names(Smarket)
summary(Smarket)
cor(Smarket[,-9])
#You can see strong correlation between year and volume of shares traded
#We fit a logistic GLM to predict Direction categorical variable
glm_model <- glm(data = Smarket,
formula = Direction ~ Lag1 + Lag2 +Lag3 +Lag4 +Lag5 +Volume,
family = "binomial")
summary(glm_model)
library(tidyverse)
library(ISLR)
library(boot)
library(splines)
library(gam)
library(akima) #Visualising 2D local regression in GAM
#Polynomial Regression and Step Functions
polynomial_fit <- lm(wage ~ poly(age,4) , data = Wage)
#exctract coefficients
coefficients(summary(polynomial_fit))
#We can also choose the basis function based on the raw polynomials set Raw = T
#Remember, if you want to type out the polynomials manually, keep it wihtin I(age^2) as the ^ has a special meaning in the R formula notation
#We will recreate figure 7.1
agelims <- range(Wage$age)
age_grid <- seq(agelims[1],agelims[2])
predictions <- predict(polynomial_fit , newdata = list(age=age_grid), se = TRUE)
se_bands <- cbind(predictions$fit - 2*predictions$se.fit ,predictions$fit + 2*predictions$se.fit )
#Now lets plot this
par(mfrow=(c(1,2)), mar =c(4.5,4.5,1,1) , oma = c(0,0,4,0))
plot(Wage$age, Wage$wage, xlim= agelims , cex =0.5 , col = "darkgrey")
title("Degree -4 Polynomial", outer = T)
lines(age_grid, predictions$fit, lwd=2 , col="blue") #remember, the x coordinates are specificed first
matlines(age_grid,se_bands,lwd=1,col='blue',lty=3)
#What degree polynomial will be the best?
fit_1 <- glm(data = Wage , wage ~ age)
fit_2 <- glm(data = Wage , wage ~ poly(age,2))
fit_3 <- glm(data = Wage , wage ~ poly(age,3))
fit_4 <- glm(data = Wage , wage ~ poly(age,4))
fit_5 <- glm(data = Wage , wage ~ poly(age,5))
anova(fit_1,fit_2,fit_3,fit_4,fit_5 ,test = "F")
#degree 3 or 4 seems to be the best
#Anova method is robust and will work if you have other, non-age variables in the model
#Notice this
coefficients(summary(fit_5))
#P-values of the t-statistics are the same!!
#in fact, if you square the t values, they will equal the f-values
#Can confirm anova results using cross-validation
cv_results <- map(list(fit_1,fit_2,fit_3,fit_4,fit_5),cv.glm, K=10 , data=Wage) %>% transpose(~delta)
#Remember that glm logistic is a type of linear model, so lets try a polynomial glm
glm_fit <- glm(I(wage>250)~poly(age,4),data=Wage, family = "binomial")
#note we use the I function as an indicator function which sets values to 1 when wage>250
glm_predictions <- predict(glm_fit, newdata = list(age=age_grid),se = T,type="response")
#standard error bands
se_bands_glm <- cbind(glm_predictions$fit - 2*glm_predictions$se.fit ,glm_predictions$fit + 2*glm_predictions$se.fit )
plot(Wage$age,I(Wage$wage>250),xlim=agelims,type="n",ylim=c(0,0.2))
points(jitter(Wage$age),I((Wage$wage>250)/5), cex=.5,pch="|",col="darkgrey") #plot actual data, need to transform it to keep it in the range
lines(age_grid,glm_predictions$fit,lwd=2,col="blue")
matlines(age_grid,se_bands_glm,lwd=1,col="blue",lty=3)
#Let's create a step function
table(cut(Wage$age,4))
step_lm <- lm(wage~cut(age,4),data = Wage)
coefficients(summary(step_lm))
#Notice how cut creates factors or categorical variables
#Regression Splines
#bs stands for basis spline, in other words you specify the basis formulae, forms a cubic spline by default , note not a NATURAL cubic spline
regression_spline <- lm(wage ~bs(age,knots=c(25,40,60)),data = Wage)
summary(regression_spline)
spline_prediction <- predict(regression_spline, newdata= list(age=age_grid) , se = T)
plot(Wage$age,Wage$wage, col ="gray")
#natural spline
natural_spline <- lm(wage~ns(age,df=4), data = Wage) #can either specify number of knots or by degrees of FREEDOM, degrees of freedom for cubic spline is 4 + Knots
natural_predictions <- predict(natural_spline , newdata = list(age=age_grid),se=T)
title("natural spline")
lines(age_grid,natural_predictions$fit,col="red")
#Smoothing Spline
plot(Wage$age,Wage$wage, col ="gray")
title("Smoothing Spline")
smoothing_specified <- smooth.spline(Wage$age,Wage$wage,df=16)
smoothing_cv_determined <- smooth.spline(Wage$age, Wage$wage, cv = TRUE)
lines(smoothing_specified,col="green")
lines(smoothing_cv_determined,col="black")
#not much difference in this case
#Local Regresion
plot(Wage$age,Wage$wage, col ="gray")
title("LOESS ")
loess_fit <- loess(wage~age,span=0.2,data=Wage)
loess_fit_2 <- loess(wage~age,span=0.5,data=Wage)
lines(age_grid,predict(loess_fit,newdata=data_frame(age=age_grid)),col="red")
lines(age_grid,predict(loess_fit_2,newdata=data_frame(age=age_grid)),col="blue")
#higher the span, greater the smoothness, span of 0.2 means that 20% of data will be within neighbourhood for the regression at each unique x_i
#GAMs
#Additive model framework, create a separate f(x_j)  then use OLS to determine B_j s.t. B_j * f(x_j)
#Note this works because you are performing OLS on the basis functions' outputs. Note with smoothing splines, you aren't using basis functions, you are trying to find a general function g(x), therefore to use smoothing splines in a GAM, you need to be careful
gam_1 <- lm( wage ~ ns(year,4) + ns(age,5) + education , data = Wage)
#Because of the issue stated above, we use the gam library and the gam function instead of the base lm function
gam_2 <- gam(wage~s(year,4) + s(age,5) +education , data = Wage) #s() indicates that we wanta smoothing spline with the desired degrees of freedom
par(mfrow=c(1,3))
plot(gam_2,se=T,col="Blue") #very cool!
#it invokes plot.gam() by default automatically, we can call plot.gam on the "lm" version of the gam (gam_1)
plot.gam(gam_1,se=T,col="red")
#The smoothing spine for year looks linear, lets do an anova on a few models to determine if it should be a linear function rather than a smoothing spline
gam_noyear <- gam(wage~ s(age,5) +education , data = Wage)
gam_linear_year <- gam(wage~ year + s(age,5) +education , data = Wage)
anova(gam_noyear,gam_linear_year , gam_2 , test = "F")
#Having linear year is better than not having year. The smoothing spline is not significantly better than a linear function of year!!
#let's look at a summary of one of the models
summary(gam_2)
#TODO: read up on how to understand the output of this
#You can also add in local regression as a building block to a GAM
gam_local_regression_single <- gam(wage ~s(year,df=4) + lo(age,span=.7)+education , data =Wage)
plot(gam_local_regression_single, se =T , col ="blue")
#try interactions with local regression
gam_local_regression_interaction <- gam(wage~lo(year,age,span=.5)+ education , data = Wage)
#Use akima package to visualise this
par(mfrow=c(1,2))
plot(gam_local_regression_interaction , se =T)
#We can also use gams for LOGISTIC REGRESSION
gam_logistic <- gam(I(wage>250) ~year + s(age,df=5) + education , family ="binomial" ,  data = Wage)
par(mfrow=c(1,3))
plot(gam_logistic ,  se = T , col = "red")
#Hold on, look at error bars for <HS grad, why are they so big?
table(Wage$education, I(Wage$wage>250))
#there are no people earning above that wage in <HS grad, it is screwing up the model and should be excluded
gam_logistic <- gam(I(wage>250) ~year + s(age,df=5) + education , family ="binomial" ,  data = Wage, subset =(education!="1. < HS Grad"))
plot(gam_logistic ,  se = T , col = "purple")
glmnet
?glmnet
library(tidyverse)
library(ISLR)
library(boot)
set.seed(1)
library(ISLR)
library(boot)
library(tidyverse)
library(MASS)
library(tidyverse)
library(ISLR)
library(leaps)# for subset selection
library(glmnet) # for ridge and lasso regression
library(pls) #PCR and PLS regression
#With all of these approaches, we want to perform model size and variable selection on TRAINING data! i.e. cv on training data to figure out which class model is better
#Then use the test set only for working out a test MSE
#Finally, use ALL data when specifying the final model in its full form
#Best subset --------
Hitters <- na.omit(Hitters)
all_subsets <- regsubsets(Salary ~ . , data = Hitters)
summary(all_subsets)
# What it will do is report the details of the best training model for each model SIZE
#default is max size = 8 so we just pass a new max
all_subsets <- regsubsets(Salary ~ . , data = Hitters , nvmax = 19)
summary_of_subsets <- summary(all_subsets)
summary_of_subsets$cp
#Let's plot this information to see which model we shold try
#(remember how Cp is an unbiased estimator for TEST MSE if the estimation of noise variance is unbiased!)
par(mfrow =c(2,2))
plot(summary_of_subsets$rss , xlab ="No of Variables", ylab="RSS" , type ="l")
plot(summary_of_subsets$adjr2 , xlab ="No of Variables", ylab="Adjusted R-squared" , type ="l")
#The points command works like plot except it plots point on an already existing plot
#instead of creating a new plot
#identify point with highest adjusted r squared
which.max(summary_of_subsets$adjr2 )
points(x = 11 , summary_of_subsets$adjr2[11] , col="red" , cex = 2 , pch = 20)
plot(summary_of_subsets$cp , xlab ="No of Variables", ylab="Cp" , type ="l")
points(which.min(summary_of_subsets$cp) ,summary_of_subsets$cp[which.min(summary_of_subsets$cp)] , col="red", cex = 2 , pch=20 )
plot(summary_of_subsets$bic , xlab ="No of Variables", ylab="BIC" , type ="l")
points(which.min(summary_of_subsets$bic) ,summary_of_subsets$bic[which.min(summary_of_subsets$cp)] , col="red", cex = 2 , pch=20 )
par(mfrow=c(1,1))
#also the package has a plotting function
#Change scale argument to change what is on Y-axis. Look at ?plot.regsubsets
plot(all_subsets , scale ="r2")
#Look at linear coefficients of Model with lowest bic
coef(all_subsets , 6)
#Forward/Backward  set selection -----
regfit_forward <- regsubsets(Salary~. , data = Hitters , nvmax=19 ,method = "forward")
summary(regfit_forward)
regfit_backward <- regsubsets(Salary~. , data = Hitters , nvmax=19 ,method = "backward")
summary(regfit_forward)
#How to select best set?
#Validation Set Approach------
#remember, we do subset selecction on TRAINING data, this is because we need test data
#to confirm the appropriatness. We wouldn't get a good estimate of test MSE
set.seed(1)
train <- sample(c(TRUE,FALSE), nrow(Hitters) , replace = TRUE)
test <- !train
regfit_best <- regsubsets(Salary ~ . , data = Hitters[train,] , nvmax = 19)
#We can look at "best" models for each size by RSS
#Now we create a MODEL MATRIX to be used as a data matrix for the test data
test_mat <- model.matrix(Salary ~ . , data = Hitters[test,])
test_mse <- c(numeric(19))
for( i in 1:19){
coeffi <- coef(regfit_best,id = i)
prediction <- test_mat[,names(coeffi)]%*%coeffi  #multiply data matrix by extracted coefficients
test_mse[i] <- mean((Hitters$Salary[test] - prediction)^2)
}
#View errors
test_mse
#find mind
which.min(test_mse)
#Model 10 minimises test mse
#let's look at the model specification
coef(regfit_best, id = 10)
#Little tedious because there is no predict function within the leaps package
#Let's write out own one
predict.regsubsets <- function(object,newdata,id,...){
form <- as.formula(object$call[[2]])
mat <- model.matrix(form,newdata)
coefi <- coef(object , id =id)
xvars <- names(coefi)
#return this now
mat[ , xvars]%*%coefi
}
#Only use test and training split when you are trying to figure out which model size /class to use
# Once you have decided that you will use model of size 10, use ALL data to create those models
#This is because you have already validated it and determined that model size 10 is the most accurate
best_model <- regsubsets(Salary ~ . , data = Hitters , nvmax =19) %>% coef(. ,id=10)
summary(regfit_forward)
regfit_best
x <- c(5,8,10)
for (i in seq_along(x)){print(i)}
for (i in x ){print(i)}
!= <>
2 == 2/1
library(tree)
library(ISLR)
library(tidyverse)
Carseats$High <- ifelse(Carseats$Sales<=8,"No","Yes")
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
tree_carseats
Carseats
summary(tree_carseats)
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
Carseats
Carseats
Carseats
tree_carseats <- tree(High ~ . , data = Carseats)
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
tree_carseats
#Classification Trees --------
Carseats$High <- ifelse(Carseats$Sales<=8,"No","Yes") %>% factor()
tree_carseats <- tree(High ~ . - Sales , data = Carseats)
summary(tree_carseats)
#we can see a 9% trainning error
plot(tree_carseats)
text(tree_carseats, prety = 0)
plot(tree_carseats)
text(tree_carseats, pretty = 0)
#Can see that shelve location is the most important variable as the tree splits on this first
tree_carseats
#Can see that shelve location is the most important variable as the tree splits on this first
tree_carseats
#Can see that shelve location is the most important variable as the tree splits on this first
cat(tree_carseats)
#Can see that shelve location is the most important variable as the tree splits on this first
tree_carseats$frame
#Can see that shelve location is the most important variable as the tree splits on this first
tree_carseats$frame %>% summary
#Divide data into test and train sets
set.seed(2)
train <- sample(nrow(Carseats),200)
carseats_test <- Carseats[-train,]
tree_carseats <- tree(High ~ . - Sales , Carseats , subset = train)
tree_predictions <- predict(tree_carseats,carseats_test, type = "class")
#construct confusion matrix
table(tree_predictions,Carseats$high[-train])
carseats_test
#construct confusion matrix
table(tree_predictions,carseats_test$High)
#Let's try prunning these trees
set.seed(3)
cv_carseats <- cv.tree(tree_carseats, FUN = prune.misclass)
cv_carseats
#The dev object refers to the number of misclassifications
par(mfrow = c(1,2))
plot(cv_carseats$size,cv_carseats$dev, type = "b")
plot(cv_carseats$size,cv_carseats$dev, type = "b" , ylab ="No of misclassifications")
par(mfrow = c(1,2))
plot(cv_carseats$size,cv_carseats$dev, type = "b" , ylab ="No of misclassifications", xlab ="No of terminal nodes")
plot(cv_carseats$k,cv_carseats$dev, type = "b" , ylab ="No of misclassifications", xlab = "value of reg. param")
prune_carseats <- prune.misclass(tree_carseats , best = 9)
plot(prune_carseats)
text(prune_carseats, pretty = 0)
par(mfrow=c(1,1))
plot(prune_carseats)
text(prune_carseats, pretty = 0)
tree_predictions <- predict(prune_carseats,carseats_test, type = "class")
#construct confusion matrix
table(tree_predictions,carseats_test$High)
# overall hit rate improved to 77%
#Regression Trees ------------------
set.seed(1)
train <- sample(nrow(Boston),nrow(Boston)/2)
# overall hit rate improved to 77%
#Regression Trees ------------------
library(MASS)
set.seed(1)
# overall hit rate improved to 77%
#Regression Trees ------------------
attach(MASS::Boston)
Boston
train <- sample(nrow(Boston),nrow(Boston)/2)
tree_boston <- (medv ~ . , Boston , subset = train)
tree_boston <- tree(medv ~ . , Boston , subset = train)
summary(tree_boston)
plot(tree_boston)
text(tree_boston, pretty = 0)
#see if tree should be pruned
cv_boston <- cv.tree(tree_boston)
cv_boston
?ranger
install.packages("Ranger")
?ranger::ranger()
6.96/997
6.96/997 * 12
70*60*0.083771
5
#bagging is a special case of a random forest where m = 9; i.e. consider all variables in each tree
library(randomForest)
install.packages("randomForest")
#bagging is a special case of a random forest where m = 9; i.e. consider all variables in each tree
library(randomForest)
set.seed(1)
bag_boston <- randomForest( medv ~ . , data = Boston , subset = train , mtry = 13 , importance = T)
attach(MASS::Boston)
bag_boston <- randomForest( medv ~ . , data = Boston , subset = train , mtry = 13 , importance = T)
Boston
data(Boston)
data(MASS::Boston)
Boston <- MASS::Boston
bag_boston <- randomForest( medv ~ . , data = Boston , subset = train , mtry = 13 , importance = T)
# mtry refers to the number of predicotrs that should be considered for each tree... this was set to ALL predictors
bag_boston
#caclulate test mse
yhat_bag <- predict(bag_boston , newdata = Boston[-train,])
plot(yhat_bag, Boston[-train,]$medv)
abline(0,1)
abline(0,1 , col = "red")
mean((yhat_bag) - Boston[-train,]$medv)^2)
mean((yhat_bag - Boston[-train,]$medv)^2)
#try a random forest
set.seed(1)
rf_boston <- randomForest(medv ~ . , data = Boston , subset = train , mtry = 13 , ntree = 25)
rf_boston <- randomForest(medv ~ . , data = Boston , subset = train , mtry = 6 , ntree = 25)
rf_boston <- randomForest(medv ~ . , data = Boston , subset = train , mtry = 6, importance = T)
yhat_rf <- predict(rf_boston , newdata = Boston[-train,])
mean((yhat_rf - Boston[-train,]$medv)^2)
importance(rf_boston)
#two measures to consider when looking at importance, reduction in training MSE and node purity
varImpPlot(rf_boston)