From a8ffcf4d8fda03a3471179e2551e2fd40c015407 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Thu, 21 Nov 2013 16:16:18 -0800
Subject: [PATCH 1/9] readability improvement, update gitignore

---
 .gitignore          | 5 ++---
 package_installer.R | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/.gitignore b/.gitignore
index fcb4ec2..fc39518 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,10 +1,9 @@
 .DS_Store
 .dropbox
+*.Rhistory
 code_check*
 nohup.out
-+*.pdf
+*.pdf
 slides/01-Slides/cache
 slides/01-Slides/data
 slides/05-Slides/cache
-
-
diff --git a/package_installer.R b/package_installer.R
index 2f44f15..991e8ff 100644
--- a/package_installer.R
+++ b/package_installer.R
@@ -33,7 +33,7 @@ cran.packages <- c("e1071",
                    "tm",
                    "XML")
 
-cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'")
+cat("This script will now attempt to install all of the R packages used in 'Machine Learning for Hackers'\n")
 
 for(p in cran.packages) {
     if(!suppressWarnings(require(p, character.only = TRUE, quietly = TRUE))) {

From 20fc25254cb7f6baa524eafb3c36e1b94de550d3 Mon Sep 17 00:00:00 2001
From: stedy <zstednic@fhcrc.org>
Date: Sun, 24 Nov 2013 20:42:33 -0800
Subject: [PATCH 2/9] summarise from ddply wants dates in POSIXct

---
 .gitignore                  | 1 +
 04-Ranking/priority_inbox.R | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index fc39518..190e318 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@
 code_check*
 nohup.out
 *.pdf
+*.csv
 slides/01-Slides/cache
 slides/01-Slides/data
 slides/05-Slides/cache
diff --git a/04-Ranking/priority_inbox.R b/04-Ranking/priority_inbox.R
index 94355c5..f59377f 100644
--- a/04-Ranking/priority_inbox.R
+++ b/04-Ranking/priority_inbox.R
@@ -129,6 +129,7 @@ pattern1 <- "%a, %d %b %Y %H:%M:%S"
 pattern2 <- "%d %b %Y %H:%M:%S"
 
 allparse.df$Date <- date.converter(allparse.df$Date, pattern1, pattern2)
+allparse.df$Date <- as.POSIXct(allparse.df$Date)
 
 # Convert emails and subjects to lower-case
 allparse.df$Subject <- tolower(allparse.df$Subject)
@@ -142,7 +143,7 @@ priority.df <- allparse.df[with(allparse.df, order(Date)), ]
 priority.train <- priority.df[1:(round(nrow(priority.df) / 2)), ]
 
 # The first step is to create rank weightings for all of the features.
-# We begin with the simpliest: who the email is from.
+# We begin with the simplest: who the email is from.
 
 # Calculate the frequency of correspondence with all emailers in the training set
 from.weight <- ddply(priority.train, .(From.EMail),

From a2247131c5f8e98b497912b48d3da4a2e46fc62b Mon Sep 17 00:00:00 2001
From: stedy <zstednic@fhcrc.org>
Date: Sun, 24 Nov 2013 20:48:06 -0800
Subject: [PATCH 3/9] manually add images folder to write pdfs to

---
 05-Regression/chapter05.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/05-Regression/chapter05.R b/05-Regression/chapter05.R
index cb35879..d60bacb 100644
--- a/05-Regression/chapter05.R
+++ b/05-Regression/chapter05.R
@@ -18,6 +18,7 @@
 # Otherwise you will see errors when loading data or saving figures!
 
 library('ggplot2')
+dir.create("images")
 
 # First snippet
 ages <- read.csv(file.path('data', 'longevity.csv'))

From 34d82e589ddf392ce3195e4debfba2d81d5dc772 Mon Sep 17 00:00:00 2001
From: stedy <zstednic@fhcrc.org>
Date: Sun, 24 Nov 2013 22:08:28 -0800
Subject: [PATCH 4/9] added Hmisc to avoid error in snippet 30

---
 06-Regularization/chapter06.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/06-Regularization/chapter06.R b/06-Regularization/chapter06.R
index 741b9d3..bf63e6f 100644
--- a/06-Regularization/chapter06.R
+++ b/06-Regularization/chapter06.R
@@ -18,6 +18,7 @@
 # Otherwise you will see errors when loading data or saving figures!
 
 library('ggplot2')
+library('Hmisc')
 
 # First snippet
 set.seed(1)

From 592df11694d73ba00478d65f6c98cae4089f5c81 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Mon, 25 Nov 2013 11:33:45 -0800
Subject: [PATCH 5/9] corrected spelling

---
 07-Optimization/chapter07.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/07-Optimization/chapter07.R b/07-Optimization/chapter07.R
index 4cf79ab..6a18de9 100644
--- a/07-Optimization/chapter07.R
+++ b/07-Optimization/chapter07.R
@@ -237,7 +237,7 @@ propose.modified.cipher <- function(cipher)
 # Seventeenth code snippet
 load(file.path('data', 'lexical_database.Rdata'))
 
-# Eighteength code snippet
+# Eighteenth code snippet
 lexical.database[['a']]
 lexical.database[['the']]
 lexical.database[['he']]

From 670165c2e4d9dd036d7f22634ba2f041c906c072 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Mon, 25 Nov 2013 11:35:51 -0800
Subject: [PATCH 6/9] added suppressWarnings since SD for cor is 0

---
 10-Recommendations/chapter10.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/10-Recommendations/chapter10.R b/10-Recommendations/chapter10.R
index 8c6a87a..93ed680 100644
--- a/10-Recommendations/chapter10.R
+++ b/10-Recommendations/chapter10.R
@@ -143,7 +143,7 @@ row.names(user.package.matrix) <- user.package.matrix[, 1]
 user.package.matrix <- user.package.matrix[, -1]
 
 # Tenth code snippet
-similarities <- cor(user.package.matrix)
+suppressWarnings(similarities <- cor(user.package.matrix))
 
 nrow(similarities)
 #[1] 2487

From 0aa7f0bbc4c9dbc0f76f663005b5604886e75194 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Mon, 25 Nov 2013 13:53:05 -0800
Subject: [PATCH 7/9] igraph now uses induced.subgraph

---
 11-SNA/02_twitter_net.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/11-SNA/02_twitter_net.R b/11-SNA/02_twitter_net.R
index 00df079..b371ca5 100644
--- a/11-SNA/02_twitter_net.R
+++ b/11-SNA/02_twitter_net.R
@@ -65,10 +65,10 @@ user.net  <- set.vertex.attribute(user.net, "Label", value = get.vertex.attribut
 
 # Next, extract the 2-core, and remove pendants generated as a result
 user.cores <- graph.coreness(user.net, mode = "in")
-user.clean <- subgraph(user.net, which(user.cores > 1) - 1)
+user.clean <- induced.subgraph(user.net, which(user.cores > 1) - 1)
 
 # Finally, extract ego.net
-user.ego <- subgraph(user.net, c(0, neighbors(user.net, user, mode = "out")))
+user.ego <- induced.subgraph(user.net, c(0, neighbors(user.net, user, mode = "out")))
 
 # Add hierarchical clustering data to network
 user.sp <- shortest.paths(user.ego)

From 096787b4c2fd7e4ba0882ad11ce9607069dd0494 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Mon, 25 Nov 2013 13:53:39 -0800
Subject: [PATCH 8/9] updated to show actual scripts for chap 11

---
 fast_check.R | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fast_check.R b/fast_check.R
index 3eceb6a..4ca4cc3 100644
--- a/fast_check.R
+++ b/fast_check.R
@@ -39,7 +39,9 @@ source('chapter10.R')
 setwd('..')
 
 #setwd('11-SNA')
-#source('chapter09.R')
+#source('01_google_sg.R')
+#source('02_twitter_net.R')
+#source('03_twitter_rec.R')
 #setwd('..')
 
 setwd('12-Model_Comparison')

From d41f91b60b76b3186ddc4b72abed4dbb9193e544 Mon Sep 17 00:00:00 2001
From: Zach Stednick <zstednic@fhcrc.org>
Date: Mon, 25 Nov 2013 16:49:57 -0800
Subject: [PATCH 9/9] file opened in 'rb' mode resolves Issue #16

---
 03-Classification/email_classify.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/03-Classification/email_classify.R b/03-Classification/email_classify.R
index 2378cae..0e3c9e6 100644
--- a/03-Classification/email_classify.R
+++ b/03-Classification/email_classify.R
@@ -59,7 +59,7 @@ ggsave(plot = ex1,
 # words as features
 get.msg <- function(path)
 {
-  con <- file(path, open = "rt", encoding = "latin1")
+  con <- file(path, open = "rb", encoding = "latin1")
   text <- readLines(con)
   # The message always begins after the first full line break
   msg <- text[seq(which(text == "")[1] + 1, length(text), 1)]