We use the text data we prepared for the base keyATM (see Preparation).
library(keyATM)
library(quanteda)
library(magrittr)
data(data_corpus_inaugural, package = "quanteda")
data_corpus_inaugural <- head(data_corpus_inaugural, n = 58)
data_tokens <- tokens(
data_corpus_inaugural,
remove_numbers = TRUE,
remove_punct = TRUE,
remove_symbols = TRUE,
remove_separators = TRUE,
remove_url = TRUE
) %>%
tokens_tolower() %>%
tokens_remove(c(stopwords("english"),
"may", "shall", "can",
"must", "upon", "with", "without")) %>%
tokens_select(min_nchar = 3)
data_dfm <- dfm(data_tokens) %>%
dfm_trim(min_termfreq = 5, min_docfreq = 2)
keyATM_docs <- keyATM_read(texts = data_dfm)
out <- weightedLDA(
docs = keyATM_docs, # text input
number_of_topics = 5, # number of topics without keywords
model = "base", # select the model
options = list(seed = 250)
)
top_words(out)
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5
## 1 world government every people country
## 2 nation states citizens one great
## 3 new people national time war
## 4 freedom public rights free congress
## 5 america constitution never let laws
## 6 peace union well years law
## 7 american united just government best
## 8 men power common liberty now
## 9 life duty confidence work many
## 10 nations interests less great made
We use the covariate data we prepared for the covariate keyATM (see keyATM_cov).
vars <- docvars(data_corpus_inaugural)
library(dplyr)
vars %>%
as_tibble() %>%
mutate(Period = case_when(Year <= 1899 ~ "18_19c",
TRUE ~ "20_21c")) %>%
mutate(Party = case_when(Party == "Democratic" ~ "Democratic",
Party == "Republican" ~ "Republican",
TRUE ~ "Other")) %>%
select(Party, Period) -> vars_selected
vars_selected %>%
mutate(Party = factor(Party,
levels = c("Other", "Republican", "Democratic")),
Period = factor(Period,
levels = c("18_19c", "20_21c"))) -> vars_selected
out <- weightedLDA(
docs = keyATM_docs,
number_of_topics = 5,
model = "covariates",
model_settings = list(covariates_data = vars_selected,
covariates_formula = ~ Party + Period),
options = list(seed = 250)
)
top_words(out)
## Topic_1 Topic_2 Topic_3 Topic_4 Topic_5
## 1 people government world country states
## 2 now great nation best people
## 3 great united new right public
## 4 american congress peace nations power
## 5 years laws freedom political constitution
## 6 men law america part union
## 7 government national time party every
## 8 just war let ever rights
## 9 know among every office citizens
## 10 much policy life justice duty
We use the time index we prepared for the dynamic keyATM (see keyATM_dynamic).
out <- weightedLDA(
docs = keyATM_docs,
number_of_topics = 3,
model = "dynamic",
model_settings = list(time_index = vars_period$Period,
num_states = 5),
options = list(seed = 250)
)
top_words(out)
## Topic_1 Topic_2 Topic_3
## 1 world government people
## 2 nation states every
## 3 peace public power
## 4 new country war
## 5 time great made
## 6 nations constitution good
## 7 freedom united spirit
## 8 america union never
## 9 american national well
## 10 let rights right