Skip to content

Commit b4d9427

Browse files
authored
feature: add support for a custom CSVFormat (#353)
* feat(read): add support for custom CSVFormat * test: add unit tests for CSVFormat parameter * fix: resolve ReadWorkbook conflict * refactor: simplify csvFormat initialization in CsvReadWorkbookHolder * feat(csv): add CSV constant definitions * feat(read): add CSV support and refactor ExcelReaderBuilder * feat(write): add CSV support and refactor ExcelWriterBuilder * test(csv): refactor CSV read/write tests * test(csv): refactor testHolder() delimiter with CsvConstant * refactor: add quote method to set quote character and default QuoteMode
1 parent be360e7 commit b4d9427

File tree

16 files changed

+700
-47
lines changed

16 files changed

+700
-47
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
package cn.idev.excel.metadata.csv;
2+
3+
/**
4+
* Constant definitions for CSV file processing.
5+
*/
6+
public class CsvConstant {
7+
/**
8+
* commonly used character
9+
*/
10+
public static final char SPACE = ' ';
11+
public static final char BACKSLASH = '\\';
12+
public static final char BACKSPACE = '\b';
13+
public static final char PIPE = '|';
14+
public static final char DOUBLE_QUOTE = '"';
15+
16+
/**
17+
* line break
18+
*/
19+
public static final String CR = "\r";
20+
public static final String FF = "\f";
21+
public static final String LF = "\n";
22+
public static final String CRLF = "\r\n";
23+
24+
/**
25+
* field related
26+
*/
27+
public static final String TAB = "\t";
28+
public static final String COMMA = ",";
29+
public static final String EMPTY = "";
30+
public static final String AT = "@";
31+
32+
/**
33+
* unicode
34+
*/
35+
public static final String UNICODE_EMPTY = "\u0000";
36+
public static final String UNICODE_NEX_LINE = "\u0085";
37+
public static final String UNICODE_LINE_SEPARATOR = "\u2028";
38+
39+
/**
40+
* database NULL value
41+
*/
42+
public static final String SQL_NULL_STRING = "\\N";
43+
44+
private CsvConstant() {
45+
}
46+
}

fastexcel-core/src/main/java/cn/idev/excel/metadata/csv/CsvSheet.java

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,13 @@
11
package cn.idev.excel.metadata.csv;
22

3-
import java.io.Closeable;
4-
import java.io.IOException;
5-
import java.math.BigDecimal;
6-
import java.util.Collection;
7-
import java.util.Iterator;
8-
import java.util.List;
9-
import java.util.Map;
10-
3+
import cn.idev.excel.constant.BuiltinFormats;
114
import cn.idev.excel.enums.ByteOrderMarkEnum;
125
import cn.idev.excel.enums.NumericCellTypeEnum;
136
import cn.idev.excel.exception.ExcelGenerateException;
147
import cn.idev.excel.util.DateUtils;
158
import cn.idev.excel.util.ListUtils;
169
import cn.idev.excel.util.NumberDataFormatterUtils;
1710
import cn.idev.excel.util.StringUtils;
18-
import cn.idev.excel.constant.BuiltinFormats;
19-
2011
import lombok.EqualsAndHashCode;
2112
import lombok.Getter;
2213
import lombok.Setter;
@@ -45,6 +36,14 @@
4536
import org.apache.poi.ss.util.CellRangeAddress;
4637
import org.apache.poi.ss.util.PaneInformation;
4738

39+
import java.io.Closeable;
40+
import java.io.IOException;
41+
import java.math.BigDecimal;
42+
import java.util.Collection;
43+
import java.util.Iterator;
44+
import java.util.List;
45+
import java.util.Map;
46+
4847
/**
4948
* csv sheet
5049
*
@@ -90,7 +89,7 @@ public CsvSheet(CsvWorkbook csvWorkbook, Appendable out) {
9089
this.csvWorkbook = csvWorkbook;
9190
this.out = out;
9291
this.rowCacheCount = 100;
93-
this.csvFormat = CSVFormat.DEFAULT;
92+
this.csvFormat = csvWorkbook.getCsvFormat() == null ? CSVFormat.DEFAULT : csvWorkbook.getCsvFormat();
9493
this.lastRowIndex = -1;
9594
}
9695

@@ -290,7 +289,7 @@ public List<CellRangeAddress> getMergedRegions() {
290289

291290
@Override
292291
public Iterator<Row> rowIterator() {
293-
return (Iterator<Row>)(Iterator<? extends Row>)rowCache.iterator();
292+
return (Iterator<Row>) (Iterator<? extends Row>) rowCache.iterator();
294293
}
295294

296295
@Override
@@ -761,7 +760,7 @@ public void flushData() {
761760
Iterator<Cell> cellIterator = row.cellIterator();
762761
int columnIndex = 0;
763762
while (cellIterator.hasNext()) {
764-
CsvCell csvCell = (CsvCell)cellIterator.next();
763+
CsvCell csvCell = (CsvCell) cellIterator.next();
765764
while (csvCell.getColumnIndex() > columnIndex++) {
766765
csvPrinter.print(null);
767766
}
@@ -807,7 +806,7 @@ private String buildCellValue(CsvCell csvCell) {
807806
if (csvCell.getNumberValue() == null) {
808807
return null;
809808
}
810-
//number
809+
// number
811810
if (dataFormat == null) {
812811
dataFormat = BuiltinFormats.GENERAL;
813812
dataFormatString = csvWorkbook.createDataFormat().getFormat(dataFormat);

fastexcel-core/src/main/java/cn/idev/excel/metadata/csv/CsvWorkbook.java

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,10 @@
11
package cn.idev.excel.metadata.csv;
22

3-
import java.io.IOException;
4-
import java.io.OutputStream;
5-
import java.nio.charset.Charset;
6-
import java.util.Iterator;
7-
import java.util.List;
8-
import java.util.Locale;
9-
103
import lombok.EqualsAndHashCode;
114
import lombok.Getter;
125
import lombok.Setter;
136
import org.apache.commons.compress.utils.Lists;
7+
import org.apache.commons.csv.CSVFormat;
148
import org.apache.poi.ss.SpreadsheetVersion;
159
import org.apache.poi.ss.formula.EvaluationWorkbook;
1610
import org.apache.poi.ss.formula.udf.UDFFinder;
@@ -26,6 +20,13 @@
2620
import org.apache.poi.ss.usermodel.SheetVisibility;
2721
import org.apache.poi.ss.usermodel.Workbook;
2822

23+
import java.io.IOException;
24+
import java.io.OutputStream;
25+
import java.nio.charset.Charset;
26+
import java.util.Iterator;
27+
import java.util.List;
28+
import java.util.Locale;
29+
2930
/**
3031
* csv workbook
3132
*
@@ -44,7 +45,6 @@ public class CsvWorkbook implements Workbook {
4445
* true if date uses 1904 windowing, or false if using 1900 date windowing.
4546
* <p>
4647
* default is false
47-
*
4848
*/
4949
private Boolean use1904windowing;
5050

@@ -83,9 +83,14 @@ public class CsvWorkbook implements Workbook {
8383
* Default true.
8484
*/
8585
private Boolean withBom;
86+
87+
/**
88+
* Specifies CSVFormat for parsing.
89+
*/
90+
private CSVFormat csvFormat;
8691

8792
public CsvWorkbook(Appendable out, Locale locale, Boolean use1904windowing, Boolean useScientificFormat,
88-
Charset charset, Boolean withBom) {
93+
Charset charset, Boolean withBom) {
8994
this.out = out;
9095
this.locale = locale;
9196
this.use1904windowing = use1904windowing;
@@ -196,7 +201,7 @@ public Font createFont() {
196201

197202
@Override
198203
public Font findFont(boolean bold, short color, short fontHeight, String name, boolean italic, boolean strikeout,
199-
short typeOffset, byte underline) {
204+
short typeOffset, byte underline) {
200205
return null;
201206
}
202207

@@ -220,7 +225,7 @@ public CellStyle createCellStyle() {
220225
if (csvCellStyleList == null) {
221226
csvCellStyleList = Lists.newArrayList();
222227
}
223-
CsvCellStyle csvCellStyle = new CsvCellStyle((short)csvCellStyleList.size());
228+
CsvCellStyle csvCellStyle = new CsvCellStyle((short) csvCellStyleList.size());
224229
csvCellStyleList.add(csvCellStyle);
225230
return csvCellStyle;
226231
}
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
package cn.idev.excel.read.builder;
2+
3+
import cn.idev.excel.ExcelReader;
4+
import cn.idev.excel.event.SyncReadListener;
5+
import cn.idev.excel.exception.ExcelGenerateException;
6+
import cn.idev.excel.read.metadata.ReadSheet;
7+
import cn.idev.excel.read.metadata.ReadWorkbook;
8+
import cn.idev.excel.support.ExcelTypeEnum;
9+
import org.apache.commons.csv.CSVFormat;
10+
import org.apache.commons.csv.QuoteMode;
11+
12+
import java.util.List;
13+
14+
/**
15+
* Builder for CSV file reading
16+
*/
17+
public class CsvReaderBuilder extends AbstractExcelReaderParameterBuilder<CsvReaderBuilder, ReadSheet> {
18+
private ReadWorkbook readWorkbook;
19+
private ReadSheet readSheet;
20+
private CSVFormat.Builder csvFormatBuilder;
21+
22+
private CsvReaderBuilder() {
23+
}
24+
25+
public CsvReaderBuilder(ReadWorkbook readWorkbook) {
26+
readWorkbook.setExcelType(ExcelTypeEnum.CSV);
27+
this.readWorkbook = readWorkbook;
28+
this.readSheet = new ReadSheet();
29+
this.csvFormatBuilder = CSVFormat.DEFAULT.builder();
30+
}
31+
32+
/**
33+
* Sets the delimiter character
34+
*
35+
* @param delimiter the delimiter character
36+
* @return Returns a CsvReaderBuilder object, enabling method chaining
37+
*/
38+
public CsvReaderBuilder delimiter(String delimiter) {
39+
if (delimiter != null) {
40+
this.csvFormatBuilder.setDelimiter(delimiter);
41+
}
42+
return this;
43+
}
44+
45+
/**
46+
* Sets the quote character
47+
*
48+
* @param quote the quote character
49+
* @return Returns a CsvReaderBuilder object, enabling method chaining
50+
*/
51+
public CsvReaderBuilder quote(Character quote) {
52+
return quote(quote, QuoteMode.MINIMAL);
53+
}
54+
55+
/**
56+
* Sets the quote character and the quoting behavior
57+
*
58+
* @param quote the quote character
59+
* @param quoteMode defines the quoting behavior
60+
* @return Returns a CsvReaderBuilder object, enabling method chaining
61+
*/
62+
public CsvReaderBuilder quote(Character quote, QuoteMode quoteMode) {
63+
if (quote != null) {
64+
this.csvFormatBuilder.setQuote(quote);
65+
}
66+
if (quoteMode != null) {
67+
this.csvFormatBuilder.setQuoteMode(quoteMode);
68+
}
69+
return this;
70+
}
71+
72+
/**
73+
* Sets the line separator
74+
*
75+
* @param recordSeparator the line separator
76+
* @return Returns a CsvReaderBuilder object, enabling method chaining
77+
*/
78+
public CsvReaderBuilder recordSeparator(String recordSeparator) {
79+
if (recordSeparator != null) {
80+
this.csvFormatBuilder.setRecordSeparator(recordSeparator);
81+
}
82+
return this;
83+
}
84+
85+
/**
86+
* Sets the null string
87+
*
88+
* @param nullString the String to convert to and from {@code null}
89+
* @return Returns a CsvReaderBuilder object, enabling method chaining
90+
*/
91+
public CsvReaderBuilder nullString(String nullString) {
92+
if (nullString != null) {
93+
this.csvFormatBuilder.setNullString(nullString);
94+
}
95+
return this;
96+
}
97+
98+
/**
99+
* Sets the escape character.
100+
*
101+
* @param escape the Character used to escape special characters in values
102+
* @return Returns a CsvReaderBuilder object, enabling method chaining
103+
*/
104+
public CsvReaderBuilder escape(Character escape) {
105+
if (escape != null) {
106+
this.csvFormatBuilder.setEscape(escape);
107+
}
108+
return this;
109+
}
110+
111+
private ExcelReader buildExcelReader() {
112+
if (this.readWorkbook.getAutoTrim() != null) {
113+
this.csvFormatBuilder.setTrim(this.readWorkbook.getAutoTrim());
114+
}
115+
if (this.readWorkbook.getIgnoreEmptyRow() != null) {
116+
this.csvFormatBuilder.setIgnoreEmptyLines(this.readWorkbook.getIgnoreEmptyRow());
117+
}
118+
this.readWorkbook.setCsvFormat(this.csvFormatBuilder.build());
119+
return new ExcelReader(this.readWorkbook);
120+
}
121+
122+
public void doRead() {
123+
if (this.readWorkbook == null) {
124+
throw new ExcelGenerateException("Must use 'FastExcelFactory.read().csv()' to call this method");
125+
}
126+
ExcelReader excelReader = buildExcelReader();
127+
excelReader.read(this.readSheet);
128+
excelReader.finish();
129+
}
130+
131+
/**
132+
* synchronous read and returns the results
133+
*
134+
* @return Returns a list containing the read data
135+
*/
136+
public <T> List<T> doReadSync() {
137+
if (this.readWorkbook == null) {
138+
throw new ExcelGenerateException("Must use 'FastExcelFactory.read().csv()' to call this method");
139+
}
140+
ExcelReader excelReader = buildExcelReader();
141+
// Register a synchronous read listener
142+
SyncReadListener syncReadListener = new SyncReadListener();
143+
registerReadListener(syncReadListener);
144+
excelReader.read(this.readSheet);
145+
excelReader.finish();
146+
return (List<T>) syncReadListener.getList();
147+
}
148+
149+
@Override
150+
protected ReadSheet parameter() {
151+
return this.readSheet;
152+
}
153+
}

fastexcel-core/src/main/java/cn/idev/excel/read/builder/ExcelReaderBuilder.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,11 @@ public ExcelReaderSheetBuilder sheet(Integer sheetNo, String sheetName) {
263263
return excelReaderSheetBuilder;
264264
}
265265

266+
public CsvReaderBuilder csv() {
267+
excelType(ExcelTypeEnum.CSV);
268+
return new CsvReaderBuilder(parameter());
269+
}
270+
266271
@Override
267272
protected ReadWorkbook parameter() {
268273
return readWorkbook;

fastexcel-core/src/main/java/cn/idev/excel/read/metadata/ReadWorkbook.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import lombok.EqualsAndHashCode;
1313
import lombok.Getter;
1414
import lombok.Setter;
15+
import org.apache.commons.csv.CSVFormat;
1516

1617
import javax.xml.parsers.SAXParserFactory;
1718
import java.io.File;
@@ -123,4 +124,10 @@ public class ReadWorkbook extends ReadBasicParameter {
123124
* Ignore hidden sheet.
124125
*/
125126
private Boolean ignoreHiddenSheet;
127+
128+
/**
129+
* Specifies CSVFormat for parsing.
130+
* Only work on the CSV file.
131+
*/
132+
private CSVFormat csvFormat;
126133
}

fastexcel-core/src/main/java/cn/idev/excel/read/metadata/holder/csv/CsvReadWorkbookHolder.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@ public class CsvReadWorkbookHolder extends ReadWorkbookHolder {
2525
public CsvReadWorkbookHolder(ReadWorkbook readWorkbook) {
2626
super(readWorkbook);
2727
setExcelType(ExcelTypeEnum.CSV);
28-
this.csvFormat = CSVFormat.DEFAULT;
28+
this.csvFormat = readWorkbook.getCsvFormat() == null ? CSVFormat.DEFAULT : readWorkbook.getCsvFormat();
2929
}
3030
}

0 commit comments

Comments
 (0)