diff --git a/.gitignore b/.gitignore index fcb4ec2..190e318 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,10 @@ .DS_Store .dropbox +*.Rhistory code_check* nohup.out -+*.pdf +*.pdf +*.csv slides/01-Slides/cache slides/01-Slides/data slides/05-Slides/cache - - diff --git a/03-Classification/email_classify.R b/03-Classification/email_classify.R index 2378cae..0e3c9e6 100644 --- a/03-Classification/email_classify.R +++ b/03-Classification/email_classify.R @@ -59,7 +59,7 @@ ggsave(plot = ex1, # words as features get.msg <- function(path) { - con <- file(path, open = "rt", encoding = "latin1") + con <- file(path, open = "rb", encoding = "latin1") text <- readLines(con) # The message always begins after the first full line break msg <- text[seq(which(text == "")[1] + 1, length(text), 1)] diff --git a/04-Ranking/priority_inbox.R b/04-Ranking/priority_inbox.R index 94355c5..f59377f 100644 --- a/04-Ranking/priority_inbox.R +++ b/04-Ranking/priority_inbox.R @@ -129,6 +129,7 @@ pattern1 <- "%a, %d %b %Y %H:%M:%S" pattern2 <- "%d %b %Y %H:%M:%S" allparse.df$Date <- date.converter(allparse.df$Date, pattern1, pattern2) +allparse.df$Date <- as.POSIXct(allparse.df$Date) # Convert emails and subjects to lower-case allparse.df$Subject <- tolower(allparse.df$Subject) @@ -142,7 +143,7 @@ priority.df <- allparse.df[with(allparse.df, order(Date)), ] priority.train <- priority.df[1:(round(nrow(priority.df) / 2)), ] # The first step is to create rank weightings for all of the features. -# We begin with the simpliest: who the email is from. +# We begin with the simplest: who the email is from. # Calculate the frequency of correspondence with all emailers in the training set from.weight <- ddply(priority.train, .(From.EMail), diff --git a/05-Regression/chapter05.R b/05-Regression/chapter05.R index cb35879..d60bacb 100644 --- a/05-Regression/chapter05.R +++ b/05-Regression/chapter05.R @@ -18,6 +18,7 @@ # Otherwise you will see errors when loading data or saving figures! library('ggplot2') +dir.create("images") # First snippet ages <- read.csv(file.path('data', 'longevity.csv')) diff --git a/06-Regularization/chapter06.R b/06-Regularization/chapter06.R index 741b9d3..bf63e6f 100644 --- a/06-Regularization/chapter06.R +++ b/06-Regularization/chapter06.R @@ -18,6 +18,7 @@ # Otherwise you will see errors when loading data or saving figures! library('ggplot2') +library('Hmisc') # First snippet set.seed(1) diff --git a/07-Optimization/chapter07.R b/07-Optimization/chapter07.R index 4cf79ab..6a18de9 100644 --- a/07-Optimization/chapter07.R +++ b/07-Optimization/chapter07.R @@ -237,7 +237,7 @@ propose.modified.cipher <- function(cipher) # Seventeenth code snippet load(file.path('data', 'lexical_database.Rdata')) -# Eighteength code snippet +# Eighteenth code snippet lexical.database[['a']] lexical.database[['the']] lexical.database[['he']] diff --git a/10-Recommendations/chapter10.R b/10-Recommendations/chapter10.R index 8c6a87a..93ed680 100644 --- a/10-Recommendations/chapter10.R +++ b/10-Recommendations/chapter10.R @@ -143,7 +143,7 @@ row.names(user.package.matrix) <- user.package.matrix[, 1] user.package.matrix <- user.package.matrix[, -1] # Tenth code snippet -similarities <- cor(user.package.matrix) +suppressWarnings(similarities <- cor(user.package.matrix)) nrow(similarities) #[1] 2487 diff --git a/11-SNA/02_twitter_net.R b/11-SNA/02_twitter_net.R index 00df079..b371ca5 100644 --- a/11-SNA/02_twitter_net.R +++ b/11-SNA/02_twitter_net.R @@ -65,10 +65,10 @@ user.net <- set.vertex.attribute(user.net, "Label", value = get.vertex.attribut # Next, extract the 2-core, and remove pendants generated as a result user.cores <- graph.coreness(user.net, mode = "in") -user.clean <- subgraph(user.net, which(user.cores > 1) - 1) +user.clean <- induced.subgraph(user.net, which(user.cores > 1) - 1) # Finally, extract ego.net -user.ego <- subgraph(user.net, c(0, neighbors(user.net, user, mode = "out"))) +user.ego <- induced.subgraph(user.net, c(0, neighbors(user.net, user, mode = "out"))) # Add hierarchical clustering data to network user.sp <- shortest.paths(user.ego) diff --git a/fast_check.R b/fast_check.R index 3eceb6a..4ca4cc3 100644 --- a/fast_check.R +++ b/fast_check.R @@ -39,7 +39,9 @@ source('chapter10.R') setwd('..') #setwd('11-SNA') -#source('chapter09.R') +#source('01_google_sg.R') +#source('02_twitter_net.R') +#source('03_twitter_rec.R') #setwd('..') setwd('12-Model_Comparison') diff --git a/package_installer.R b/package_installer.R index 2f44f15..991e8ff 100644 --- a/package_installer.R +++ b/package_installer.R @@ -33,7 +33,7 @@ cran.packages <- c("e1071", "tm", "XML") -cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'") +cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'\n") for(p in cran.packages) { if(!suppressWarnings(require(p, character.only = TRUE, quietly = TRUE))) {