Import packages
library(olsrr)
library(MASS) # stepAIC function
Stepwise Regression using olsrr package
Forward Stepwise Regression
mod_forward <- lm(mpg ~ ., data = mtcars)
step_forward <- ols_step_forward(mod_forward)
## We are selecting variables based on p value...
## 1 variable(s) added....
## 1 variable(s) added...
## 1 variable(s) added...
## No more variables satisfy the condition of penter: 0.3
## Forward Selection Method
##
## Candidate Terms:
##
## 1 . cyl
## 2 . disp
## 3 . hp
## 4 . drat
## 5 . wt
## 6 . qsec
## 7 . vs
## 8 . am
## 9 . gear
## 10 . carb
##
## -------------------------------------------------------------------------
## Selection Summary
## -------------------------------------------------------------------------
## Variable Adj.
## Step Entered R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------
## 1 wt 0.7528 0.7446 12.6618 166.0294 3.0459
## 2 cyl 0.8302 0.8185 1.9295 156.0101 2.5675
## 3 hp 0.8431 0.8263 1.8036 155.4766 2.5115
## -------------------------------------------------------------------------
Backward Stepwise Regression
mod_backward <- lm(mpg ~ ., data = mtcars)
step_backward <- ols_step_backward(mod_backward)
## We are eliminating variables based on p value...
## No more variables satisfy the condition of prem: 0.3
## Backward Elimination Method
##
## Candidate Terms:
##
## 1 . cyl
## 2 . disp
## 3 . hp
## 4 . drat
## 5 . wt
## 6 . qsec
## 7 . vs
## 8 . am
## 9 . gear
## 10 . carb
##
## ------------------------------------------------------------------------
## Elimination Summary
## ------------------------------------------------------------------------
## Variable Adj.
## Step Removed R-Square R-Square C(p) AIC RMSE
## ------------------------------------------------------------------------
## 1 cyl 0.8689 0.8153 9.5600 161.7271 2.5900
## 2 vs 0.8687 0.823 7.5993 159.7853 2.5353
## 3 carb 0.8681 0.8296 5.6994 157.9333 2.4877
## 4 gear 0.8667 0.8347 3.9281 156.2687 2.4503
## 5 drat 0.8637 0.8375 2.4167 154.9740 2.4293
## ------------------------------------------------------------------------
Backward Stepwise Regression
mod_both <- lm(mpg ~ ., data = mtcars)
step_both <- ols_stepwise(mod_both)
## We are selecting variables based on p value...
## 1 variable(s) added....
## 1 variable(s) added...
## No more variables to be added or removed.
## Stepwise Selection Method
##
## Candidate Terms:
##
## 1 . cyl
## 2 . disp
## 3 . hp
## 4 . drat
## 5 . wt
## 6 . qsec
## 7 . vs
## 8 . am
## 9 . gear
## 10 . carb
##
## -------------------------------------------------------------------------------------
## Stepwise Selection Summary
## -------------------------------------------------------------------------------------
## Added/ Adj.
## Step Variable Removed R-Square R-Square C(p) AIC RMSE
## -------------------------------------------------------------------------------------
## 1 wt addition 0.753 0.745 12.6620 166.0294 3.0459
## 2 cyl addition 0.830 0.819 1.9300 156.0101 2.5675
## -------------------------------------------------------------------------------------
Alternatively, Stepwise Regression by AIC using MASS
Defining empty and full models
mod_empty <- lm(mpg ~ 1, data = mtcars)
mod_full <- lm(mpg ~ ., data = mtcars)
Forward Stepwise Regression
stepAIC(mod_empty, direction = "forward", scope = list(upper = mod_full, lower = mod_empty))
## Start: AIC=115.94
## mpg ~ 1
##
## Df Sum of Sq RSS AIC
## + wt 1 847.73 278.32 73.217
## + disp 1 808.89 317.16 77.397
## + cyl 2 824.78 301.26 77.752
## + hp 1 678.37 447.67 88.427
## + drat 1 522.48 603.57 97.988
## + vs 1 496.53 629.52 99.335
## + gear 2 483.24 642.80 102.003
## + am 1 405.15 720.90 103.672
## + carb 1 341.78 784.27 106.369
## + qsec 1 197.39 928.66 111.776
## <none> 1126.05 115.943
##
## Step: AIC=73.22
## mpg ~ wt
##
## Df Sum of Sq RSS AIC
## + cyl 2 95.263 183.06 63.810
## + hp 1 83.274 195.05 63.840
## + qsec 1 82.858 195.46 63.908
## + vs 1 54.228 224.09 68.283
## + carb 1 44.602 233.72 69.628
## + disp 1 31.639 246.68 71.356
## + gear 2 40.372 237.95 72.202
## <none> 278.32 73.217
## + drat 1 9.081 269.24 74.156
## + am 1 0.002 278.32 75.217
##
## Step: AIC=63.81
## mpg ~ wt + cyl
##
## Df Sum of Sq RSS AIC
## + hp 1 22.2810 160.78 61.657
## <none> 183.06 63.810
## + qsec 1 10.9487 172.11 63.837
## + carb 1 9.2436 173.81 64.152
## + vs 1 1.8416 181.22 65.487
## + disp 1 0.1096 182.95 65.791
## + am 1 0.0903 182.97 65.794
## + drat 1 0.0727 182.99 65.798
## + gear 2 6.6815 176.38 66.620
##
## Step: AIC=61.66
## mpg ~ wt + cyl + hp
##
## Df Sum of Sq RSS AIC
## + am 1 9.7520 151.03 61.655
## <none> 160.78 61.657
## + drat 1 2.4377 158.34 63.168
## + carb 1 1.2623 159.51 63.405
## + vs 1 0.6545 160.12 63.527
## + disp 1 0.6508 160.13 63.527
## + qsec 1 0.2294 160.55 63.611
## + gear 2 7.3662 153.41 64.156
##
## Step: AIC=61.65
## mpg ~ wt + cyl + hp + am
##
## Df Sum of Sq RSS AIC
## <none> 151.03 61.655
## + vs 1 7.3459 143.68 62.059
## + qsec 1 7.0439 143.98 62.126
## + disp 1 0.6168 150.41 63.524
## + drat 1 0.2202 150.81 63.608
## + carb 1 0.0000 151.03 63.655
## + gear 2 1.3605 149.66 65.365
##
## Call:
## lm(formula = mpg ~ wt + cyl + hp + am, data = mtcars)
##
## Coefficients:
## (Intercept) wt cyl6 cyl8 hp
## 33.70832 -2.49683 -3.03134 -2.16368 -0.03211
## ammanual
## 1.80921
Backward Stepwise Regression
stepAIC(mod_full, direction = "backward")
## Start: AIC=70.87
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + gear + carb
##
## Df Sum of Sq RSS AIC
## - gear 2 5.1061 135.16 68.103
## - drat 1 0.9408 130.99 69.101
## - disp 1 3.4354 133.49 69.705
## - carb 1 3.9503 134.00 69.828
## - vs 1 6.5693 136.62 70.447
## - qsec 1 7.1353 137.19 70.579
## - cyl 2 16.4500 146.50 70.682
## <none> 130.05 70.870
## - am 1 14.6316 144.68 72.282
## - hp 1 22.1573 152.21 73.905
## - wt 1 23.6065 153.66 74.208
##
## Step: AIC=68.1
## mpg ~ cyl + disp + hp + drat + wt + qsec + vs + am + carb
##
## Df Sum of Sq RSS AIC
## - drat 1 0.025 135.18 66.108
## - carb 1 3.866 139.02 67.005
## - vs 1 4.035 139.19 67.044
## - disp 1 4.732 139.89 67.204
## - qsec 1 4.941 140.10 67.251
## - cyl 2 14.238 149.40 67.308
## <none> 135.16 68.103
## - am 1 15.929 151.09 69.668
## - hp 1 18.284 153.44 70.163
## - wt 1 31.992 167.15 72.901
##
## Step: AIC=66.11
## mpg ~ cyl + disp + hp + wt + qsec + vs + am + carb
##
## Df Sum of Sq RSS AIC
## - vs 1 4.250 139.43 65.099
## - carb 1 4.808 139.99 65.227
## - disp 1 4.895 140.08 65.247
## - qsec 1 4.918 140.10 65.252
## - cyl 2 17.095 152.28 65.919
## <none> 135.18 66.108
## - am 1 16.829 152.01 67.863
## - hp 1 19.891 155.07 68.501
## - wt 1 33.543 168.73 71.201
##
## Step: AIC=65.1
## mpg ~ cyl + disp + hp + wt + qsec + am + carb
##
## Df Sum of Sq RSS AIC
## - carb 1 2.898 142.33 63.757
## - disp 1 4.214 143.65 64.052
## - cyl 2 13.993 153.43 64.160
## <none> 139.43 65.099
## - qsec 1 10.717 150.15 65.469
## - am 1 14.361 153.79 66.236
## - hp 1 15.649 155.08 66.503
## - wt 1 36.334 175.77 70.510
##
## Step: AIC=63.76
## mpg ~ cyl + disp + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - disp 1 1.651 143.98 62.126
## - cyl 2 11.107 153.44 62.162
## - qsec 1 8.078 150.41 63.524
## <none> 142.33 63.757
## - hp 1 15.403 157.73 65.046
## - am 1 17.424 159.75 65.453
## - wt 1 40.707 183.04 69.807
##
## Step: AIC=62.13
## mpg ~ cyl + hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - cyl 2 16.085 160.07 61.515
## - qsec 1 7.044 151.03 61.655
## <none> 143.98 62.126
## - hp 1 15.443 159.42 63.387
## - am 1 16.566 160.55 63.611
## - wt 1 52.932 196.91 70.145
##
## Step: AIC=61.52
## mpg ~ hp + wt + qsec + am
##
## Df Sum of Sq RSS AIC
## - hp 1 9.219 169.29 61.307
## <none> 160.07 61.515
## - qsec 1 20.225 180.29 63.323
## - am 1 25.993 186.06 64.331
## - wt 1 78.494 238.56 72.284
##
## Step: AIC=61.31
## mpg ~ wt + qsec + am
##
## Df Sum of Sq RSS AIC
## <none> 169.29 61.307
## - am 1 26.178 195.46 63.908
## - qsec 1 109.034 278.32 75.217
## - wt 1 183.347 352.63 82.790
##
## Call:
## lm(formula = mpg ~ wt + qsec + am, data = mtcars)
##
## Coefficients:
## (Intercept) wt qsec ammanual
## 9.618 -3.917 1.226 2.936
Stepwise Regression (both)
stepAIC(mod_empty, direction = "both", scope = list(upper = mod_full, lower = mod_empty))
## Start: AIC=115.94
## mpg ~ 1
##
## Df Sum of Sq RSS AIC
## + wt 1 847.73 278.32 73.217
## + disp 1 808.89 317.16 77.397
## + cyl 2 824.78 301.26 77.752
## + hp 1 678.37 447.67 88.427
## + drat 1 522.48 603.57 97.988
## + vs 1 496.53 629.52 99.335
## + gear 2 483.24 642.80 102.003
## + am 1 405.15 720.90 103.672
## + carb 1 341.78 784.27 106.369
## + qsec 1 197.39 928.66 111.776
## <none> 1126.05 115.943
##
## Step: AIC=73.22
## mpg ~ wt
##
## Df Sum of Sq RSS AIC
## + cyl 2 95.26 183.06 63.810
## + hp 1 83.27 195.05 63.840
## + qsec 1 82.86 195.46 63.908
## + vs 1 54.23 224.09 68.283
## + carb 1 44.60 233.72 69.628
## + disp 1 31.64 246.68 71.356
## + gear 2 40.37 237.95 72.202
## <none> 278.32 73.217
## + drat 1 9.08 269.24 74.156
## + am 1 0.00 278.32 75.217
## - wt 1 847.73 1126.05 115.943
##
## Step: AIC=63.81
## mpg ~ wt + cyl
##
## Df Sum of Sq RSS AIC
## + hp 1 22.281 160.78 61.657
## <none> 183.06 63.810
## + qsec 1 10.949 172.11 63.837
## + carb 1 9.244 173.81 64.152
## + vs 1 1.842 181.22 65.487
## + disp 1 0.110 182.95 65.791
## + am 1 0.090 182.97 65.794
## + drat 1 0.073 182.99 65.798
## + gear 2 6.682 176.38 66.620
## - cyl 2 95.263 278.32 73.217
## - wt 1 118.204 301.26 77.752
##
## Step: AIC=61.66
## mpg ~ wt + cyl + hp
##
## Df Sum of Sq RSS AIC
## + am 1 9.752 151.03 61.655
## <none> 160.78 61.657
## + drat 1 2.438 158.34 63.168
## + carb 1 1.262 159.51 63.405
## + vs 1 0.655 160.12 63.527
## + disp 1 0.651 160.13 63.527
## + qsec 1 0.229 160.55 63.611
## - hp 1 22.281 183.06 63.810
## - cyl 2 34.270 195.05 63.840
## + gear 2 7.366 153.41 64.156
## - wt 1 116.390 277.17 77.084
##
## Step: AIC=61.65
## mpg ~ wt + cyl + hp + am
##
## Df Sum of Sq RSS AIC
## <none> 151.03 61.655
## - am 1 9.752 160.78 61.657
## + vs 1 7.346 143.68 62.059
## + qsec 1 7.044 143.98 62.126
## - cyl 2 29.265 180.29 63.323
## + disp 1 0.617 150.41 63.524
## + drat 1 0.220 150.81 63.608
## + carb 1 0.000 151.03 63.655
## + gear 2 1.361 149.66 65.365
## - hp 1 31.943 182.97 65.794
## - wt 1 46.173 197.20 68.191
##
## Call:
## lm(formula = mpg ~ wt + cyl + hp + am, data = mtcars)
##
## Coefficients:
## (Intercept) wt cyl6 cyl8 hp
## 33.70832 -2.49683 -3.03134 -2.16368 -0.03211
## ammanual
## 1.80921