1. Loading file and data

# Read the text file 
filePath <- "../example.txt"
text <- readLines(filePath)

# Load the data as a tibble
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
text_df <- tibble(line = 1:15, text = text)

2. Tokenizing

library(tidytext)

token_df <- text_df %>%
            unnest_tokens(word, text)

token_df

3. Removing generic stopwords

data(stop_words)

clean_df <- token_df %>%
  anti_join(stop_words, by = c("word" = "word"))

4. Removing stopwords defined by me

mystop <- read.csv("../stop.csv", header = TRUE)

cleaner_df <- clean_df %>%
  anti_join(mystop, by = c("word" = "text"))

cleaner_df

5. Getting word count

frequency <- cleaner_df %>%
              count(word, sort = TRUE) 

6. Plotting

Version A

library(wordcloud)

frequency %>%
 with(wordcloud(word, n, min.freq = 4, max.words = 100, random.order=FALSE, rot.per=0.35, colors=brewer.pal(8, "Dark2")))

Version B

library(wordcloud2)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
  wordcloud2(data=frequency, size=1.6, color='random-dark')
LS0tCnRpdGxlOiAiV29yZGNsb3VkIHdpdGggUiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgMS4gTG9hZGluZyBmaWxlIGFuZCBkYXRhCgpgYGB7cn0KIyBSZWFkIHRoZSB0ZXh0IGZpbGUgCmZpbGVQYXRoIDwtICIuLi9leGFtcGxlLnR4dCIKdGV4dCA8LSByZWFkTGluZXMoZmlsZVBhdGgpCgojIExvYWQgdGhlIGRhdGEgYXMgYSB0aWJibGUKbGlicmFyeShkcGx5cikKdGV4dF9kZiA8LSB0aWJibGUobGluZSA9IDE6MTUsIHRleHQgPSB0ZXh0KQpgYGAKIyMgMi4gVG9rZW5pemluZwoKYGBge3J9CmxpYnJhcnkodGlkeXRleHQpCgp0b2tlbl9kZiA8LSB0ZXh0X2RmICU+JQogICAgICAgICAgICB1bm5lc3RfdG9rZW5zKHdvcmQsIHRleHQpCgp0b2tlbl9kZgpgYGAKIyMgMy4gUmVtb3ZpbmcgZ2VuZXJpYyBzdG9wd29yZHMKCmBgYHtyfQpkYXRhKHN0b3Bfd29yZHMpCgpjbGVhbl9kZiA8LSB0b2tlbl9kZiAlPiUKICBhbnRpX2pvaW4oc3RvcF93b3JkcywgYnkgPSBjKCJ3b3JkIiA9ICJ3b3JkIikpCgpgYGAKCiMjIDQuIFJlbW92aW5nIHN0b3B3b3JkcyBkZWZpbmVkIGJ5IG1lCgpgYGB7cn0KbXlzdG9wIDwtIHJlYWQuY3N2KCIuLi9zdG9wLmNzdiIsIGhlYWRlciA9IFRSVUUpCgpjbGVhbmVyX2RmIDwtIGNsZWFuX2RmICU+JQogIGFudGlfam9pbihteXN0b3AsIGJ5ID0gYygid29yZCIgPSAidGV4dCIpKQoKY2xlYW5lcl9kZgpgYGAKIyMgNS4gR2V0dGluZyB3b3JkIGNvdW50CmBgYHtyfQpmcmVxdWVuY3kgPC0gY2xlYW5lcl9kZiAlPiUKICAgICAgICAgICAgICBjb3VudCh3b3JkLCBzb3J0ID0gVFJVRSkgCmBgYAoKIyMgNi4gUGxvdHRpbmcKCiMjIyBWZXJzaW9uIEEKYGBge3J9CmxpYnJhcnkod29yZGNsb3VkKQoKZnJlcXVlbmN5ICU+JQogd2l0aCh3b3JkY2xvdWQod29yZCwgbiwgbWluLmZyZXEgPSA0LCBtYXgud29yZHMgPSAxMDAsIHJhbmRvbS5vcmRlcj1GQUxTRSwgcm90LnBlcj0wLjM1LCBjb2xvcnM9YnJld2VyLnBhbCg4LCAiRGFyazIiKSkpCmBgYAoKIyMjIFZlcnNpb24gQgpgYGB7cn0KbGlicmFyeSh3b3JkY2xvdWQyKQpgYGAKCmBgYHtyfQogIHdvcmRjbG91ZDIoZGF0YT1mcmVxdWVuY3ksIHNpemU9MS42LCBjb2xvcj0ncmFuZG9tLWRhcmsnKQpgYGAKCgoKCgoKCgo=