From a8ffcf4d8fda03a3471179e2551e2fd40c015407 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Thu, 21 Nov 2013 16:16:18 -0800 Subject: [PATCH 1/9] readability improvement, update gitignore --- .gitignore | 5 ++--- package_installer.R | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index fcb4ec2..fc39518 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,9 @@ .DS_Store .dropbox +*.Rhistory code_check* nohup.out -+*.pdf +*.pdf slides/01-Slides/cache slides/01-Slides/data slides/05-Slides/cache - - diff --git a/package_installer.R b/package_installer.R index 2f44f15..991e8ff 100644 --- a/package_installer.R +++ b/package_installer.R @@ -33,7 +33,7 @@ cran.packages <- c("e1071", "tm", "XML") -cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'") +cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'\n") for(p in cran.packages) { if(!suppressWarnings(require(p, character.only = TRUE, quietly = TRUE))) { From 20fc25254cb7f6baa524eafb3c36e1b94de550d3 Mon Sep 17 00:00:00 2001 From: stedy Date: Sun, 24 Nov 2013 20:42:33 -0800 Subject: [PATCH 2/9] summarise from ddply wants dates in POSIXct --- .gitignore | 1 + 04-Ranking/priority_inbox.R | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index fc39518..190e318 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,7 @@ code_check* nohup.out *.pdf +*.csv slides/01-Slides/cache slides/01-Slides/data slides/05-Slides/cache diff --git a/04-Ranking/priority_inbox.R b/04-Ranking/priority_inbox.R index 94355c5..f59377f 100644 --- a/04-Ranking/priority_inbox.R +++ b/04-Ranking/priority_inbox.R @@ -129,6 +129,7 @@ pattern1 <- "%a, %d %b %Y %H:%M:%S" pattern2 <- "%d %b %Y %H:%M:%S" allparse.df$Date <- date.converter(allparse.df$Date, pattern1, pattern2) +allparse.df$Date <- as.POSIXct(allparse.df$Date) # Convert emails and subjects to lower-case allparse.df$Subject <- tolower(allparse.df$Subject) @@ -142,7 +143,7 @@ priority.df <- allparse.df[with(allparse.df, order(Date)), ] priority.train <- priority.df[1:(round(nrow(priority.df) / 2)), ] # The first step is to create rank weightings for all of the features. -# We begin with the simpliest: who the email is from. +# We begin with the simplest: who the email is from. # Calculate the frequency of correspondence with all emailers in the training set from.weight <- ddply(priority.train, .(From.EMail), From a2247131c5f8e98b497912b48d3da4a2e46fc62b Mon Sep 17 00:00:00 2001 From: stedy Date: Sun, 24 Nov 2013 20:48:06 -0800 Subject: [PATCH 3/9] manually add images folder to write pdfs to --- 05-Regression/chapter05.R | 1 + 1 file changed, 1 insertion(+) diff --git a/05-Regression/chapter05.R b/05-Regression/chapter05.R index cb35879..d60bacb 100644 --- a/05-Regression/chapter05.R +++ b/05-Regression/chapter05.R @@ -18,6 +18,7 @@ # Otherwise you will see errors when loading data or saving figures! library('ggplot2') +dir.create("images") # First snippet ages <- read.csv(file.path('data', 'longevity.csv')) From 34d82e589ddf392ce3195e4debfba2d81d5dc772 Mon Sep 17 00:00:00 2001 From: stedy Date: Sun, 24 Nov 2013 22:08:28 -0800 Subject: [PATCH 4/9] added Hmisc to avoid error in snippet 30 --- 06-Regularization/chapter06.R | 1 + 1 file changed, 1 insertion(+) diff --git a/06-Regularization/chapter06.R b/06-Regularization/chapter06.R index 741b9d3..bf63e6f 100644 --- a/06-Regularization/chapter06.R +++ b/06-Regularization/chapter06.R @@ -18,6 +18,7 @@ # Otherwise you will see errors when loading data or saving figures! library('ggplot2') +library('Hmisc') # First snippet set.seed(1) From 592df11694d73ba00478d65f6c98cae4089f5c81 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Mon, 25 Nov 2013 11:33:45 -0800 Subject: [PATCH 5/9] corrected spelling --- 07-Optimization/chapter07.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/07-Optimization/chapter07.R b/07-Optimization/chapter07.R index 4cf79ab..6a18de9 100644 --- a/07-Optimization/chapter07.R +++ b/07-Optimization/chapter07.R @@ -237,7 +237,7 @@ propose.modified.cipher <- function(cipher) # Seventeenth code snippet load(file.path('data', 'lexical_database.Rdata')) -# Eighteength code snippet +# Eighteenth code snippet lexical.database[['a']] lexical.database[['the']] lexical.database[['he']] From 670165c2e4d9dd036d7f22634ba2f041c906c072 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Mon, 25 Nov 2013 11:35:51 -0800 Subject: [PATCH 6/9] added suppressWarnings since SD for cor is 0 --- 10-Recommendations/chapter10.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/10-Recommendations/chapter10.R b/10-Recommendations/chapter10.R index 8c6a87a..93ed680 100644 --- a/10-Recommendations/chapter10.R +++ b/10-Recommendations/chapter10.R @@ -143,7 +143,7 @@ row.names(user.package.matrix) <- user.package.matrix[, 1] user.package.matrix <- user.package.matrix[, -1] # Tenth code snippet -similarities <- cor(user.package.matrix) +suppressWarnings(similarities <- cor(user.package.matrix)) nrow(similarities) #[1] 2487 From 0aa7f0bbc4c9dbc0f76f663005b5604886e75194 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Mon, 25 Nov 2013 13:53:05 -0800 Subject: [PATCH 7/9] igraph now uses induced.subgraph --- 11-SNA/02_twitter_net.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/11-SNA/02_twitter_net.R b/11-SNA/02_twitter_net.R index 00df079..b371ca5 100644 --- a/11-SNA/02_twitter_net.R +++ b/11-SNA/02_twitter_net.R @@ -65,10 +65,10 @@ user.net <- set.vertex.attribute(user.net, "Label", value = get.vertex.attribut # Next, extract the 2-core, and remove pendants generated as a result user.cores <- graph.coreness(user.net, mode = "in") -user.clean <- subgraph(user.net, which(user.cores > 1) - 1) +user.clean <- induced.subgraph(user.net, which(user.cores > 1) - 1) # Finally, extract ego.net -user.ego <- subgraph(user.net, c(0, neighbors(user.net, user, mode = "out"))) +user.ego <- induced.subgraph(user.net, c(0, neighbors(user.net, user, mode = "out"))) # Add hierarchical clustering data to network user.sp <- shortest.paths(user.ego) From 096787b4c2fd7e4ba0882ad11ce9607069dd0494 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Mon, 25 Nov 2013 13:53:39 -0800 Subject: [PATCH 8/9] updated to show actual scripts for chap 11 --- fast_check.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fast_check.R b/fast_check.R index 3eceb6a..4ca4cc3 100644 --- a/fast_check.R +++ b/fast_check.R @@ -39,7 +39,9 @@ source('chapter10.R') setwd('..') #setwd('11-SNA') -#source('chapter09.R') +#source('01_google_sg.R') +#source('02_twitter_net.R') +#source('03_twitter_rec.R') #setwd('..') setwd('12-Model_Comparison') From d41f91b60b76b3186ddc4b72abed4dbb9193e544 Mon Sep 17 00:00:00 2001 From: Zach Stednick Date: Mon, 25 Nov 2013 16:49:57 -0800 Subject: [PATCH 9/9] file opened in 'rb' mode resolves Issue #16 --- 03-Classification/email_classify.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/03-Classification/email_classify.R b/03-Classification/email_classify.R index 2378cae..0e3c9e6 100644 --- a/03-Classification/email_classify.R +++ b/03-Classification/email_classify.R @@ -59,7 +59,7 @@ ggsave(plot = ex1, # words as features get.msg <- function(path) { - con <- file(path, open = "rt", encoding = "latin1") + con <- file(path, open = "rb", encoding = "latin1") text <- readLines(con) # The message always begins after the first full line break msg <- text[seq(which(text == "")[1] + 1, length(text), 1)]