Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save cboulanger/a09b1908c3c5160b8e01870184e5fbe6 to your computer and use it in GitHub Desktop.
Save cboulanger/a09b1908c3c5160b8e01870184e5fbe6 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(tidytext)
library(xts)
library(tstools)
base_dir = "C:\\Users\\Boulanger\\ownCloud\\Abteilung 3\\Projekte\\Legal Theory Graph"
corpus_dirs = c()
for (dir in list.dirs(base_dir)) {
if (dir != base_dir) {
corpus_dirs = c(corpus_dirs, dir)
}
}
corpora = list()
for (dir in corpus_dirs) {
setwd(dir)
print(dir)
name = basename(dir)
if (! name %in% names(corpora)) {
files = list.files(dir, pattern = ".txt$")
# create a word list of all files in the directory
df <- map_df(files, ~ data_frame(txt = read_file(.x)) %>%
mutate(filename = basename(.x)) %>%
unnest_tokens(word, txt))
# group by year and compute total number of words
df <- df %>%
mutate(year=str_extract(filename,"(19|20)\\d\\d"))
print(paste("Years:", paste(unique(df$year), collapse = ", ")))
df <- df %>%
dplyr::count(year, word) %>%
bind_tf_idf(word, year, n) %>%
complete(year, fill = list(count = 0)) %>%
group_by(year) %>%
mutate(year_total = sum(n)) %>%
mutate(year=as.numeric(year)) %>%
mutate(date = as.Date(paste(year,"-01-01", sep = "")))
corpora[[name]] = df
}
}
#search for key terms
# absolute werte -> "n / year_total" weglassen
xts2ts <- function(XD, freq) {
maxRow <- nrow(XD)
startYM <- c(.indexyear(XD[1]) + 1900, .indexmon(XD[1]) + 1L)
endYM <- c(.indexyear(XD[maxRow]) + 1900, .indexmon(XD[maxRow]) + 1L)
ts(as.numeric(XD), start = startYM, end = endYM, frequency = freq)
}
ts_list = list()
for (name in names(corpora)) {
df <- corpora[[name]] %>% filter(word %in% c("ehrlich"))
ts <- seq.Date(as.Date("1970-01-01"), as.Date("2019-12-31"), by="year")
df <- data.frame(date=ts) %>% full_join(df)
xts_data <- xts(df$n, order.by=df$date, frequency = 1)
ts_data = xts2ts(xts_data, 1)
ts_list[[name]] <- ts_data
}
figure3 <- tsplot(ts_list,
plot_title="'ehrlich' im Rechtssoziologiekorpus",
left_as_bar = T,
theme = init_tsplot_theme(bar_gap = 10))
figure3
@cboulanger
Copy link
Author

grafik

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment