Model Training Log Plot: Code

Here is the code:

# identifies files in working directory based on a distinguishing substring;
#    those files are text files with a series of numbers (1 number per line)
# then graphs those number series as y-variable versus index (1, 2, 3...) as
#    x-variable; saves graphs as '.png' files in working directory


get_strings_with_substring <- function(strings, a_substring){
     # returns strings from a vector of strings that contain a substring
     substrings <- strings[grepl(a_substring, strings)]
     return(substrings)
}


get_strings_without_substring <- function(strings, a_substring){
     # returns strings from a vector of strings that do not contain a substring
     substrings <- strings[!grepl(a_substring, strings)]
     return(substrings)
}


read_files_into_table_list <- function(filenames){
     # reads tables from files specified by 'filenames' into a list of tables
     table_list <- vector('list', length(filenames))
     for (i in 1:length(filenames)){
          table_list[i] <- read.table(filenames[i])
     }
     return(table_list)
}


graph_series_by_index <- function(numeric_vector, title){
     # plots a number series 'numeric_vector' as y-variable against its indices
     #    (1, 2, 3...) as the x-variable
     # 'title' is assumed to be the filename that contained the number series;
     #    it is used as the plot's title and modified to be the plot's filename
     title_stem <- strsplit(title, '\\.')[[1]]    # removes filename extension
     graph_filename <- paste(title_stem, '.png', sep='')    # adds extension
     png(graph_filename, width=640, height=512)
     plot(numeric_vector, pch=16, type='b', lwd=2, main=title, ylim=c(0, 1))
     dev.off()
}


graph_multiple_series <- function(list_of_numeric_vectors, titles){
     # plot multiple number series
     for (i in 1:length(list_of_numeric_vectors)){
          graph_series_by_index(list_of_numeric_vectors[[i]], titles[i]) 
     }
}


graph_series_from_files <- function(filenames, file_substring) {
     # identifies files based on a distinguishing substring; those files are
     #    text files with a series of numbers (1 number per line)
     # then graphs those number series as y-variable versus index (1, 2, 3...) as
     #    x-variable; saves graphs as '.png' files in working directory
     substring_filenames <- get_strings_with_substring(filenames, file_substring) 
     substring_filenames <- get_strings_without_substring(substring_filenames, '.png') 
     tables_list <- read_files_into_table_list(substring_filenames) 
     graph_multiple_series(tables_list, substring_filenames)
}


current_directory <- getwd()
logs_directory <- file.path(getwd(), 'logs', fsep=.Platform$file.sep)
setwd(logs_directory)
filenames <- list.files()
non_empty_filenames <- filenames[file.info(filenames)$size != 0] # skip empty files

file_substring <- 'converged_props'      # proportions of documents that converged
graph_series_from_files(non_empty_filenames, file_substring)

file_substring <- 'topic_diffs'          # topic differences
graph_series_from_files(non_empty_filenames, file_substring)

setwd(current_directory)