<- read_csv("https://jadenchant.github.io/data/project2025.csv") |>
project2025 mutate(id = row_number()) |>
select(id, section_name, chapter, chapter_name, subsection_name, authors, text) |>
mutate(word_count = str_count(text, "\\S+"))
Link to Data Collection and Cleaning
Get Data from Project 2025 Website
Data collected from the Project 2025 website. Here is the python file used to collect and clean the data.
Bing Sentiment Analysis
<- get_sentiments("bing")
bing_sentiment
<- project2025 |>
tokenized mutate(text = as.character(text)) |>
unnest_tokens(word, text)
<- tokenized |>
bing_sentiment_count inner_join(bing_sentiment, by = "word") |>
group_by(id, sentiment) |>
summarize(count = n()) |>
pivot_wider(names_from = sentiment, values_from = count, values_fill = 0) |>
rename(bing_positive = positive, bing_negative = negative)
<- left_join(project2025, bing_sentiment_count, by = "id") |>
bing_result mutate(bing_pred_sentiment = as.factor(ifelse(bing_positive > bing_negative, "positive", "negative"))) |>
mutate(bing_pred_val = bing_positive / (bing_positive + bing_negative))
Afinn Sentiment Analysis
# afinn_sentiment <- get_sentiments("afinn")
#
# afinn_sentiment_count <- tokenized |>
# inner_join(afinn_sentiment, by = "word") |>
# group_by(id) |>
# summarize(
# afinn_positive = sum(value[value > 0], na.rm = TRUE),
# afinn_negative = sum(abs(value[value < 0]), na.rm = TRUE),
# afinn_total = sum(value, na.rm = TRUE))
#
# result <- left_join(project2025, afinn_sentiment_count, by = "id") |>
# mutate(afinn_pred_sentiment = as.factor(ifelse(afinn_positive > afinn_negative, "positive", "negative"))) |>
# mutate(afinn_pred_val = afinn_positive / (afinn_positive + afinn_negative))
<- bing_result |>
result_long pivot_longer(cols = c(bing_positive, bing_negative),
names_to = "Sentiment", values_to = "Count") |>
mutate(Sentiment = ifelse(Sentiment == "bing_positive", "Positive", "Negative"))
ggplot(result_long, aes(x = id, y = Count, color = Sentiment)) +
geom_line() +
labs(title = "Sentiment Analysis Throughout the Playbook",
subtitle = "(Bing Lexicon)",
x = "Project 2025 Playbook",
y = "Sentiment Count",
color = "Sentiment Type") +
scale_color_manual(values = c("Positive" = "#0a8754", "Negative" = "#AA0E3D")) +
dark_theme_gray(base_size = 12) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
ggplot(result_long, aes(x = id, y = bing_pred_val, color = bing_pred_val)) +
geom_line() +
scale_color_gradient2(low = "#AA0E3D", high = "#0a8754",
midpoint = 0.5, name = "Sentiment Percentage") +
labs(title = "Sentiment Analysis Throughout the Playbook",
subtitle = "(Bing Lexicon)",
x = "Project 2025 Playbook",
y = "Sentiment Percentage") +
dark_theme_gray(base_size = 12) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())