Skip to content

Feature/fix line ending #7

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 15, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 40 additions & 5 deletions Sources/Code/CSVImporter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,24 @@ import Foundation
import FileKit
import HandySwift

/// An enum to represent the possible line endings of CSV files.
public enum LineEnding : String {
case NL = "\n"
case CR = "\r"
case CRLF = "\r\n"
case Unknown = ""
}

private let chunkSize = 4096

/// Importer for CSV files that maps your lines to a specified data structure.
public class CSVImporter<T> {

// MARK: - Stored Instance Properties

let csvFile: TextFile
let delimiter: String
var lineEnding: LineEnding

var lastProgressReport: NSDate?

Expand All @@ -25,7 +36,7 @@ public class CSVImporter<T> {
var failClosure: (() -> Void)?


// MARK: - Computes Instance Properties
// MARK: - Computed Instance Properties

var shouldReportProgress: Bool {
get {
Expand All @@ -42,9 +53,11 @@ public class CSVImporter<T> {
/// - Parameters:
/// - path: The path to the CSV file to import.
/// - delimiter: The delimiter used within the CSV file for separating fields. Defaults to ",".
public init(path: String, delimiter: String = ",") {
/// - lineEnding: The lineEnding of the file. If not specified will be determined automatically.
public init(path: String, delimiter: String = ",", lineEnding: LineEnding = .Unknown) {
self.csvFile = TextFile(path: Path(path))
self.delimiter = delimiter
self.lineEnding = lineEnding
}


Expand Down Expand Up @@ -120,7 +133,10 @@ public class CSVImporter<T> {
/// - valuesInLine: The values found within a line.
/// - Returns: `true` on finish or `false` if can't read file.
func importLines(closure: (valuesInLine: [String]) -> Void) -> Bool {
if let csvStreamReader = self.csvFile.streamReader() {
if lineEnding == .Unknown {
lineEnding = lineEndingForFile()
}
if let csvStreamReader = self.csvFile.streamReader(lineEnding.rawValue) {
for line in csvStreamReader {
let valuesInLine = readValuesInLine(line)
closure(valuesInLine: valuesInLine)
Expand All @@ -132,19 +148,38 @@ public class CSVImporter<T> {
}
}

/// Determines the line ending for the CSV file
///
/// - Returns: the lineEnding for the CSV file or default of NL.
private func lineEndingForFile() -> LineEnding {
var lineEnding: LineEnding = .NL
if let fileHandle = self.csvFile.handleForReading {
let data = fileHandle.readDataOfLength(chunkSize).mutableCopy()
if let contents = NSString(bytesNoCopy: data.mutableBytes, length: data.length, encoding: NSUTF8StringEncoding, freeWhenDone: false) {
if contents.containsString(LineEnding.CRLF.rawValue) {
lineEnding = .CRLF
} else if contents.containsString(LineEnding.NL.rawValue) {
lineEnding = .NL
} else if contents.containsString(LineEnding.CR.rawValue) {
lineEnding = .CR
}
}
}
return lineEnding
}

/// Reads the line and returns the fields found. Handles double quotes according to RFC 4180.
///
/// - Parameters:
/// - line: The line to read values from.
/// - Returns: An array of values found in line.
func readValuesInLine(line: String) -> [String] {
var correctedLine = line.stringByReplacingOccurrencesOfString("\(delimiter)\"\"\(delimiter)", withString: delimiter+delimiter)
correctedLine = correctedLine.stringByReplacingOccurrencesOfString("\r\n", withString: "\n")

if correctedLine.hasPrefix("\"\"\(delimiter)") {
correctedLine = correctedLine.substringFromIndex(correctedLine.startIndex.advancedBy(2))
}
if correctedLine.hasSuffix("\(delimiter)\"\"") || correctedLine.hasSuffix("\(delimiter)\"\"\n") {
if correctedLine.hasSuffix("\(delimiter)\"\"") {
correctedLine = correctedLine.substringToIndex(correctedLine.startIndex.advancedBy(correctedLine.utf16.count - 2))
}

Expand Down
138 changes: 138 additions & 0 deletions Tests/Code/CSVImporterSpec.swift
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,146 @@ class CSVImporterSpec: QuickSpec {
}

expect(recordValues).toEventuallyNot(beNil(), timeout: 10)
expect(recordValues!.first!).toEventually(equal(self.validTeamsFirstRecord()))
}

it("imports data from CSV file with headers Specifying lineEnding") {
let path = self.pathForResourceFile("Teams.csv")
var recordValues: [[String: String]]?

if let path = path {
let importer = CSVImporter<[String: String]>(path: path, lineEnding: .CRLF)

importer.startImportingRecords(structure: { (headerValues) -> Void in
print(headerValues)
}, recordMapper: { (recordValues) -> [String : String] in
return recordValues
}).onFail {
print("Did fail")
}.onFinish { importedRecords in
print("Did finish import, first array: \(importedRecords.first)")
recordValues = importedRecords
}
}

expect(recordValues).toEventuallyNot(beNil(), timeout: 10)
expect(recordValues!.first!).toEventually(equal(self.validTeamsFirstRecord()))
}

it("imports data from CSV file with headers Specifying lineEnding NL") {
let path = self.convertTeamsLineEndingTo(.NL)
var recordValues: [[String: String]]?

if let path = path {
let importer = CSVImporter<[String: String]>(path: path, lineEnding: .NL)

importer.startImportingRecords(structure: { (headerValues) -> Void in
print(headerValues)
}, recordMapper: { (recordValues) -> [String : String] in
return recordValues
}).onFail {
print("Did fail")
}.onFinish { importedRecords in
print("Did finish import, first array: \(importedRecords.first)")
recordValues = importedRecords
}
}

expect(recordValues).toEventuallyNot(beNil(), timeout: 10)
expect(recordValues!.first!).toEventually(equal(self.validTeamsFirstRecord()))

self.deleteFileSilently(path)
}

it("imports data from CSV file with headers with lineEnding CR Sniffs lineEnding") {
let path = self.convertTeamsLineEndingTo(.CR)
var recordValues: [[String: String]]?

if let path = path {
let importer = CSVImporter<[String: String]>(path: path) // don't specify lineEnding

importer.startImportingRecords(structure: { (headerValues) -> Void in
print(headerValues)
}, recordMapper: { (recordValues) -> [String : String] in
return recordValues
}).onFail {
print("Did fail")
}.onFinish { importedRecords in
print("Did finish import, first array: \(importedRecords.first)")
recordValues = importedRecords
}
}

expect(recordValues).toEventuallyNot(beNil(), timeout: 10)
expect(recordValues!.first!).toEventually(equal(self.validTeamsFirstRecord()))

self.deleteFileSilently(path)
}

it("imports data from CSV file with headers Specifying Wrong lineEnding Fails") {
let path = self.pathForResourceFile("Teams.csv")
var recordValues: [[String: String]]?

if let path = path {
do {
let string = try String(contentsOfFile: path)
expect(string.containsString(LineEnding.CRLF.rawValue)).to(beTrue())
} catch { }

let importer = CSVImporter<[String: String]>(path: path, lineEnding: .NL) // wrong

importer.startImportingRecords(structure: { (headerValues) -> Void in
print(headerValues)
}, recordMapper: { (recordValues) -> [String : String] in
return recordValues
}).onFail {
print("Did fail")
}.onFinish { importedRecords in
print("Did finish import, first array: \(importedRecords.first)")
recordValues = importedRecords
}
}

expect(recordValues).toEventuallyNot(beNil(), timeout: 10)
expect(recordValues!.first!).toEventuallyNot(equal(self.validTeamsFirstRecord()))

self.deleteFileSilently(path)
}


it("zz") { }
}

func validTeamsFirstRecord() -> [String:String] {
return ["H": "426", "SOA": "23", "SO": "19", "WCWin": "", "AB": "1372", "BPF": "103", "IPouts": "828", "PPF": "98", "3B": "37", "BB": "60", "HBP": "", "lgID": "NA", "ER": "109", "CG": "22", "name": "Boston Red Stockings", "yearID": "1871", "divID": "", "teamIDretro": "BS1", "FP": "0.83", "R": "401", "G": "31", "BBA": "42", "HA": "367", "RA": "303", "park": "South End Grounds I", "DivWin": "", "WSWin": "", "HR": "3", "E": "225", "ERA": "3.55", "franchID": "BNA", "DP": "", "L": "10", "LgWin": "N", "W": "20", "SV": "3", "SHO": "1", "Rank": "3", "Ghome": "", "teamID": "BS1", "teamIDlahman45": "BS1", "HRA": "2", "SF": "", "attendance": "", "CS": "", "teamIDBR": "BOS", "SB": "73", "2B": "70"]
}

func convertTeamsLineEndingTo(lineEnding:LineEnding) -> String? {
if let path = pathForResourceFile("Teams.csv") {
do {
let string = try String(contentsOfFile: path)
expect(string.containsString(LineEnding.CRLF.rawValue)).to(beTrue())
let crString = string.stringByReplacingOccurrencesOfString(LineEnding.CRLF.rawValue, withString: lineEnding.rawValue)
let tempPath = (NSTemporaryDirectory() as NSString).stringByAppendingPathComponent("TeamsNewLineEnding.csv")
try crString.writeToFile(tempPath, atomically: false, encoding: NSUTF8StringEncoding)
return tempPath
} catch {

}
}

return nil
}

func pathForResourceFile(name:String) -> String? {
return NSBundle(forClass: CSVImporterSpec.classForCoder()).pathForResource(name, ofType: nil)
}

func deleteFileSilently(path:String?) {
guard let path = path else { return }
do {
try NSFileManager.defaultManager().removeItemAtPath(path)
} catch { }
}

}