Skip to content

Feature/performance improvements #9

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 18, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions Sources/Code/CSVImporter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ public class CSVImporter<T> {
self.csvFile = TextFile(path: Path(path))
self.delimiter = delimiter
self.lineEnding = lineEnding

delimiterQuoteDelimiter = "\(delimiter)\"\"\(delimiter)"
delimiterDelimiter = delimiter+delimiter
quoteDelimiter = "\"\"\(delimiter)"
delimiterQuote = "\(delimiter)\"\""
}

/// Creates a `CSVImporter` object with required configuration options.
Expand Down Expand Up @@ -148,8 +153,10 @@ public class CSVImporter<T> {
}
if let csvStreamReader = self.csvFile.streamReader(lineEnding.rawValue) {
for line in csvStreamReader {
let valuesInLine = readValuesInLine(line)
closure(valuesInLine: valuesInLine)
autoreleasepool {
let valuesInLine = readValuesInLine(line)
closure(valuesInLine: valuesInLine)
}
}

return true
Expand Down Expand Up @@ -178,37 +185,41 @@ public class CSVImporter<T> {
return lineEnding
}

// Various private constants used for reading lines
private let startPartRegex = try! NSRegularExpression(pattern: "\\A\"[^\"]*\\z", options: .CaseInsensitive)
private let middlePartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\\z", options: .CaseInsensitive)
private let endPartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\"\\z", options: .CaseInsensitive)
private let substitute = "\u{001a}"
private let delimiterQuoteDelimiter:String
private let delimiterDelimiter:String
private let quoteDelimiter:String
private let delimiterQuote:String

/// Reads the line and returns the fields found. Handles double quotes according to RFC 4180.
///
/// - Parameters:
/// - line: The line to read values from.
/// - Returns: An array of values found in line.
func readValuesInLine(line: String) -> [String] {
var correctedLine = line.stringByReplacingOccurrencesOfString("\(delimiter)\"\"\(delimiter)", withString: delimiter+delimiter)
var correctedLine = line.stringByReplacingOccurrencesOfString(delimiterQuoteDelimiter, withString: delimiterDelimiter)

if correctedLine.hasPrefix("\"\"\(delimiter)") {
if correctedLine.hasPrefix(quoteDelimiter) {
correctedLine = correctedLine.substringFromIndex(correctedLine.startIndex.advancedBy(2))
}
if correctedLine.hasSuffix("\(delimiter)\"\"") {
if correctedLine.hasSuffix(delimiterQuote) {
correctedLine = correctedLine.substringToIndex(correctedLine.startIndex.advancedBy(correctedLine.utf16.count - 2))
}

let substitute = "\u{001a}"
correctedLine = correctedLine.stringByReplacingOccurrencesOfString("\"\"", withString: substitute)
var components = correctedLine.componentsSeparatedByString(delimiter)

var index = 0
while index < components.count {
let element = components[index]

let startPartRegex = try! NSRegularExpression(pattern: "\\A\"[^\"]*\\z", options: .CaseInsensitive) // swiftlint:disable:this force_try

if index < components.count-1 && startPartRegex.firstMatchInString(element, options: .Anchored, range: element.fullRange) != nil {
var elementsToMerge = [element]

let middlePartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\\z", options: .CaseInsensitive) // swiftlint:disable:this force_try
let endPartRegex = try! NSRegularExpression(pattern: "\\A[^\"]*\"\\z", options: .CaseInsensitive) // swiftlint:disable:this force_try

while middlePartRegex.firstMatchInString(components[index+1], options: .Anchored, range: components[index+1].fullRange) != nil {
elementsToMerge.append(components[index+1])
components.removeAtIndex(index+1)
Expand Down