Weighted LDA

We use the text data we prepared for the base keyATM (see Preparation).

library(keyATM)
library(quanteda)
library(magrittr)
data(data_corpus_inaugural, package = "quanteda")
data_corpus_inaugural <- head(data_corpus_inaugural, n = 58)

data_tokens <- tokens(
  data_corpus_inaugural,
  remove_numbers = TRUE,
  remove_punct = TRUE,
  remove_symbols = TRUE,
  remove_separators = TRUE,
  remove_url = TRUE
) %>%
  tokens_tolower() %>%
  tokens_remove(c(
    stopwords("english"),
    "may", "shall", "can",
    "must", "upon", "with", "without"
  )) %>%
  tokens_select(min_nchar = 3)

data_dfm <- dfm(data_tokens) %>%
  dfm_trim(min_termfreq = 5, min_docfreq = 2)

keyATM_docs <- keyATM_read(texts = data_dfm)
out <- weightedLDA(
  docs              = keyATM_docs, # text input
  number_of_topics  = 5, # number of topics without keywords
  model             = "base", # select the model
  options           = list(seed = 250)
)
##     Topic_1      Topic_2    Topic_3    Topic_4  Topic_5
## 1     world   government      every     people  country
## 2    nation       states   citizens        one    great
## 3       new       people   national       time      war
## 4   freedom       public     rights       free congress
## 5   america constitution      never        let     laws
## 6     peace        union       well      years      law
## 7  american       united       just government     best
## 8       men        power     common    liberty      now
## 9      life         duty confidence       work     many
## 10  nations    interests       less      great     made

Weighted LDA Covariates

We use the covariate data we prepared for the covariate keyATM (see keyATM_cov).

vars <- docvars(data_corpus_inaugural)

library(dplyr)
vars %>%
  as_tibble() %>%
  mutate(Period = case_when(
    Year <= 1899 ~ "18_19c",
    TRUE ~ "20_21c"
  )) %>%
  mutate(Party = case_when(
    Party == "Democratic" ~ "Democratic",
    Party == "Republican" ~ "Republican",
    TRUE ~ "Other"
  )) %>%
  select(Party, Period) -> vars_selected

vars_selected %>%
  mutate(
    Party = factor(Party,
      levels = c("Other", "Republican", "Democratic")
    ),
    Period = factor(Period,
      levels = c("18_19c", "20_21c")
    )
  ) -> vars_selected
out <- weightedLDA(
  docs = keyATM_docs,
  number_of_topics = 5,
  model = "covariates",
  model_settings = list(
    covariates_data = vars_selected,
    covariates_formula = ~ Party + Period
  ),
  options = list(seed = 250)
)
##       Topic_1    Topic_2 Topic_3   Topic_4      Topic_5
## 1      people government   world   country       states
## 2         now      great  nation      best       people
## 3       great     united     new     right       public
## 4    american   congress   peace   nations        power
## 5       years       laws freedom political constitution
## 6         men        law america      part        union
## 7  government   national    time     party        every
## 8        just        war     let      ever       rights
## 9        know      among   every    office     citizens
## 10       much     policy    life   justice         duty

Weighted LDA Dynamic

We use the time index we prepared for the dynamic keyATM (see keyATM_dynamic).

vars %>%
  as_tibble() %>%
  mutate(Period = (vars$Year - 1780) %/% 10 + 1) -> vars_period
out <- weightedLDA(
  docs = keyATM_docs,
  number_of_topics = 3,
  model = "dynamic",
  model_settings = list(
    time_index = vars_period$Period,
    num_states = 5
  ),
  options = list(seed = 250)
)
##     Topic_1      Topic_2 Topic_3
## 1     world   government  people
## 2    nation       states   every
## 3     peace       public   power
## 4       new      country     war
## 5      time        great    made
## 6   nations constitution    good
## 7   freedom       united  spirit
## 8   america        union   never
## 9  american     national    well
## 10      let       rights   right