Weighted LDA

We use the text data we prepared for the base keyATM (see Preparation).

library(keyATM)
library(quanteda)
library(magrittr)
data(data_corpus_inaugural, package = "quanteda")
data_corpus_inaugural <- head(data_corpus_inaugural, n = 58)

data_tokens <- tokens(data_corpus_inaugural,
                      remove_numbers = TRUE, 
                      remove_punct = TRUE, 
                      remove_symbols = TRUE,
                      remove_separators = TRUE,
                      remove_url = TRUE) %>%
                 tokens_tolower() %>%
                 tokens_remove(c(stopwords("english"), 
                               "may", "shall", "can",
                               "must", "upon", "with", "without")) %>%
                 tokens_select(min_nchar = 3)

data_dfm <- dfm(data_tokens) %>%
              dfm_trim(min_termfreq = 5, min_docfreq = 2)

keyATM_docs <- keyATM_read(texts = data_dfm)
out <- weightedLDA(docs              = keyATM_docs,    # text input
                   number_of_topics  = 5,              # number of topics without keywords
                   model             = "base",         # select the model
                   options           = list(seed = 250))
##    Topic_1      Topic_2  Topic_3    Topic_4    Topic_5
## 1    world       states      war      great     people
## 2      new      country      men      every government
## 3   nation constitution   spirit government     public
## 4   people       united    never       made       laws
## 5  freedom        union     long   congress        law
## 6  america     citizens progress       just       duty
## 7    peace       rights   common       best  interests
## 8      let          one  purpose      power  political
## 9     time        power american       much    nations
## 10    life          now      yet       part     policy

Weighted LDA Covariates

We use the covariate data we prepared for the covariate keyATM (see keyATM_cov).

vars <- docvars(data_corpus_inaugural)

library(dplyr)
vars %>%
  as_tibble() %>%
  mutate(Period = case_when(Year <= 1899 ~ "18_19c",
                            TRUE ~ "20_21c")) %>%
  mutate(Party = case_when(Party == "Democratic" ~ "Democratic",
                           Party == "Republican" ~ "Republican",
                           TRUE ~ "Other")) %>%
  select(Party, Period) -> vars_selected

vars_selected %>%
  mutate(Party  = factor(Party, 
                         levels = c("Other", "Republican", "Democratic")),
         Period = factor(Period, 
                         levels = c("18_19c", "20_21c"))) -> vars_selected
out <- weightedLDA(docs              = keyATM_docs,
                   number_of_topics  = 5,
                   model             = "covariates",
                   model_settings    = list(covariates_data    = vars_selected, 
                                            covariates_formula = ~ Party + Period),
                   options           = list(seed = 250))
##     Topic_1 Topic_2 Topic_3   Topic_4      Topic_5
## 1    people  united   world   country   government
## 2     peace     now     new     great       states
## 3   nations     war  nation      made       people
## 4     every  policy freedom  congress       public
## 5      free    part america      laws constitution
## 6     years    good     let       law        union
## 7       men  nation    time      best        power
## 8   justice   every    life      well     national
## 9      just  spirit     one     among       rights
## 10 american     far    work political     citizens

Weighted LDA Dynamic

We use the time index we prepared for the dynamic keyATM (see keyATM_dynamic).

vars %>%
  as_tibble() %>%
  mutate(Period = (vars$Year - 1780) %/% 10 + 1) -> vars_period
out <- weightedLDA(docs             = keyATM_docs,  
                   number_of_topics = 3,            
                   model            = "dynamic",    
                   model_settings   = list(time_index = vars_period$Period, 
                                           num_states = 5),
                   options          = list(seed = 250))
##       Topic_1 Topic_2      Topic_3
## 1  government   world       states
## 2       great  nation       people
## 3       every  people      country
## 4         war   peace        power
## 5        made     new       public
## 6        laws    time constitution
## 7         law nations       united
## 8        well freedom        union
## 9   interests america     national
## 10       many    free       rights