Import packages

library(dplyr)


Create data frame with missing categorical features

head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
iris_test <- iris
iris_test$Species <- as.character(iris_test$Species)
iris_test[c(sample(1:150, 10)), 5] <- NA # generating NAs at 10 random values from iris dataset


Impute missing Species using mode

# Defining mode function 
mode <- function(variable){
  data <- data.frame(table(as.factor(variable))) # convert variable frequency in data frame 
  as.character(data[which(data$Freq %in% max(data$Freq))[[1]], 1]) # find mode
}

mode(iris_test$Species)
## [1] "virginica"
iris_test$Species[is.na(iris_test$Species)] <- mode(iris_test$Species)