Import packages

library(caret)


Random Split Data in Training (75%) and Test Sets (25%)

n <- nrow(iris)
index = sample(1:n, size = round(0.75*n), replace = FALSE)
train = iris[index, ]
test = iris[-index, ]
paste("Observations in training data: ", nrow(train), sep = "")
## [1] "Observations in training data: 112"
paste("Observations in testing data: ", nrow(test), sep = "")
## [1] "Observations in testing data: 38"


Stratified Sampling (Preserving Class Distributions)

set.seed(99)
table(iris$Species) # 50/50/50 split within Species claddification
## 
## versicolor     setosa  virginica 
##         50         50         50
index <- createDataPartition(iris$Species, p = .75, list = FALSE, times = 1)
train <- iris[index, ]
test  <- iris[-index, ]
table(train$Species) # 38/38/38, equal ratio preserved in training data 
## 
## versicolor     setosa  virginica 
##         38         38         38
table(test$Species) # 12/12/12, equal ratio preserved in test data 
## 
## versicolor     setosa  virginica 
##         12         12         12