For this bigram analysis, The Time Machine, a science fiction piece by H.G. Wells, was analyzed from the Project Gutenberg, which offers over 56,000 free e-books. Package gutenbergr downloads and processes public domain works in the Project Gutenberg. All other works from The Project Gutenberg can be retrieved by View(gutenberg_works()).
The Time Machine charts the Time Traveller's fantastical path through creating a time machine, traveling to distant lands of the future, interacting with humanoid creatures like Eloi, ape-like creatures like Morlocks and huge reddish-crabs, finally returning to Victorian decadence and lastly, vanishing to disappearance in the time machine.


Import packages

library(gutenbergr)
library(tidyr)
library(tidytext)
library(dplyr)
library(ggplot2)
library(knitr)
gutenberg_works(title == "The Time Machine") # extract The Time Machine
## # A tibble: 1 x 8
##   gutenberg_id title  author   gutenberg_author… language gutenberg_books…
##          <int> <chr>  <chr>                <int> <chr>    <chr>           
## 1           35 The T… Wells, …                30 en       Science Fiction…
## # ... with 2 more variables: rights <chr>, has_text <lgl>
time_machine <- gutenberg_download(c(35), meta_fields = "title") # pass gutenberg_id


Find all instances of Morlocks in Wells' text

machine <- time_machine$text[grep("Morlocks", time_machine$text)]
length(machine)
## [1] 46
head(machine, 8)
## [1] "I had seen of the Morlocks--that, by the by, was the name by which"  
## [2] "'Then came troublesome doubts. Why had the Morlocks taken my Time"   
## [3] "abruptly to trouble about the Morlocks, and was only concerned in"   
## [4] "influence of the Eloi, whose disgust of the Morlocks I now began"    
## [5] "spectral Morlocks sheltered from the glare. The place, by the by,"   
## [6] "Morlocks at any rate were carnivorous! Even at the time, I remember"
## [7] "blackness I could hear the Morlocks rustling like wind among leaves,"
## [8] "Morlocks and was speedily clambering up the shaft, while they stayed"


Tokenize text in bigrams

bigrams <- time_machine %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2)


Remove stop words from bigrams

bigrams_separate <- bigrams %>%
  separate(bigram, c("word_1", "word_2"), sep = " ")

head(bigrams_separate)
## # A tibble: 6 x 4
##   gutenberg_id title            word_1  word_2
##          <int> <chr>            <chr>   <chr>  
## 1           35 The Time Machine the     time   
## 2           35 The Time Machine time    machine
## 3           35 The Time Machine machine by     
## 4           35 The Time Machine by      h      
## 5           35 The Time Machine h       g      
## 6           35 The Time Machine g       wells
bigrams_filter <- bigrams_separate %>%
  filter(!word_1 %in% stop_words$word) %>%
  filter(!word_2 %in% stop_words$word) %>%
  unite(bigram, word_1, word_2, sep = " ")

head(bigrams_filter)
## # A tibble: 6 x 3
##   gutenberg_id title            bigram          
##          <int> <chr>            <chr>           
## 1           35 The Time Machine time machine    
## 2           35 The Time Machine time traveller  
## 3           35 The Time Machine recondite matter
## 4           35 The Time Machine grey eyes       
## 5           35 The Time Machine eyes shone      
## 6           35 The Time Machine fire burned


Find frequent bigrams

bigrams_frequent <- bigrams_filter %>% dplyr::count(bigram) %>% dplyr::arrange(desc(n))
bigrams_frequent2 <- bigrams_frequent %>% inner_join(bigrams_filter)
## Joining, by = "bigram"
bigrams_frequent2 <- as.data.frame(unique(bigrams_frequent2))
head(bigrams_frequent2, 10)
##              bigram  n gutenberg_id            title
## 1    time traveller 54           35 The Time Machine
## 2      time machine 39           35 The Time Machine
## 3      white sphinx 10           35 The Time Machine
## 4   green porcelain  8           35 The Time Machine
## 5      looked round  7           35 The Time Machine
## 6   time travelling  7           35 The Time Machine
## 7  time traveller's  6           35 The Time Machine
## 8       upper world  6           35 The Time Machine
## 9      world people  6           35 The Time Machine
## 10     bronze doors  4           35 The Time Machine


Table bigrams associated with time

nature_bigrams <- bigrams_frequent2[grep("time", bigrams_frequent2$bigram), ]
nature_bigrams2 <- nature_bigrams %>% arrange(desc(n)) %>% head(15)
kable(nature_bigrams2)
bigram n gutenberg_id title
time traveller 54 35 The Time Machine
time machine 39 35 The Time Machine
time travelling 7 35 The Time Machine
time traveller's 6 35 The Time Machine
time dimension 3 35 The Time Machine
amazing time 1 35 The Time Machine
coming times 1 35 The Time Machine
culminating time 1 35 The Time Machine
explore time 1 35 The Time Machine
express time 1 35 The Time Machine
fifty times 1 35 The Time Machine
forty times 1 35 The Time Machine
fourth time 1 35 The Time Machine
future time 1 35 The Time Machine
human lifetimes 1 35 The Time Machine