Weighted LDA Models • keyATM

Weighted LDA

We use the text data we prepared for the base keyATM (see Preparation).

library(keyATM)
library(quanteda)
library(magrittr)
data(data_corpus_inaugural, package = "quanteda")
data_corpus_inaugural <- head(data_corpus_inaugural, n = 58)

data_tokens <- tokens(
  data_corpus_inaugural,
  remove_numbers = TRUE,
  remove_punct = TRUE,
  remove_symbols = TRUE,
  remove_separators = TRUE,
  remove_url = TRUE
  ) %>%
  tokens_tolower() %>%
  tokens_remove(c(stopwords("english"),
                "may", "shall", "can",
                "must", "upon", "with", "without")) %>%
  tokens_select(min_nchar = 3)

data_dfm <- dfm(data_tokens) %>%
  dfm_trim(min_termfreq = 5, min_docfreq = 2)

keyATM_docs <- keyATM_read(texts = data_dfm)

out <- weightedLDA(
  docs              = keyATM_docs,    # text input
  number_of_topics  = 5,              # number of topics without keywords
  model             = "base",         # select the model
  options           = list(seed = 250)
)

top_words(out)

##     Topic_1      Topic_2    Topic_3    Topic_4  Topic_5
## 1     world   government      every     people  country
## 2    nation       states   citizens        one    great
## 3       new       people   national       time      war
## 4   freedom       public     rights       free congress
## 5   america constitution      never        let     laws
## 6     peace        union       well      years      law
## 7  american       united       just government     best
## 8       men        power     common    liberty      now
## 9      life         duty confidence       work     many
## 10  nations    interests       less      great     made

Weighted LDA Covariates

We use the covariate data we prepared for the covariate keyATM (see keyATM_cov).

vars <- docvars(data_corpus_inaugural)

library(dplyr)
vars %>%
  as_tibble() %>%
  mutate(Period = case_when(Year <= 1899 ~ "18_19c",
                            TRUE ~ "20_21c")) %>%
  mutate(Party = case_when(Party == "Democratic" ~ "Democratic",
                           Party == "Republican" ~ "Republican",
                           TRUE ~ "Other")) %>%
  select(Party, Period) -> vars_selected

vars_selected %>%
  mutate(Party  = factor(Party,
                         levels = c("Other", "Republican", "Democratic")),
         Period = factor(Period,
                         levels = c("18_19c", "20_21c"))) -> vars_selected

out <- weightedLDA(
  docs              = keyATM_docs,
  number_of_topics  = 5,
  model             = "covariates",
  model_settings    = list(covariates_data    = vars_selected,
                           covariates_formula = ~ Party + Period),
  options           = list(seed = 250)
)

top_words(out)

##       Topic_1    Topic_2 Topic_3   Topic_4      Topic_5
## 1      people government   world   country       states
## 2         now      great  nation      best       people
## 3       great     united     new     right       public
## 4    american   congress   peace   nations        power
## 5       years       laws freedom political constitution
## 6         men        law america      part        union
## 7  government   national    time     party        every
## 8        just        war     let      ever       rights
## 9        know      among   every    office     citizens
## 10       much     policy    life   justice         duty

Weighted LDA Dynamic

We use the time index we prepared for the dynamic keyATM (see keyATM_dynamic).

vars %>%
  as_tibble() %>%
  mutate(Period = (vars$Year - 1780) %/% 10 + 1) -> vars_period

out <- weightedLDA(
  docs             = keyATM_docs,
  number_of_topics = 3,
  model            = "dynamic",
  model_settings   = list(time_index = vars_period$Period,
                          num_states = 5),
  options          = list(seed = 250)
)

top_words(out)

##     Topic_1      Topic_2 Topic_3
## 1     world   government  people
## 2    nation       states   every
## 3     peace       public   power
## 4       new      country     war
## 5      time        great    made
## 6   nations constitution    good
## 7   freedom       united  spirit
## 8   america        union   never
## 9  american     national    well
## 10      let       rights   right