|
| 1 | +package io.deephaven.csv.reading.cells; |
| 2 | + |
| 3 | +import io.deephaven.csv.containers.ByteSlice; |
| 4 | +import io.deephaven.csv.reading.ReaderUtil; |
| 5 | +import io.deephaven.csv.util.CsvReaderException; |
| 6 | +import io.deephaven.csv.util.MutableBoolean; |
| 7 | +import io.deephaven.csv.util.MutableInt; |
| 8 | + |
| 9 | +import java.io.InputStream; |
| 10 | + |
| 11 | +/** |
| 12 | + * This class uses an underlying DelimitedCellGrabber to grab whole lines at a time from the input stream, and then it |
| 13 | + * breaks them into fixed-sized cells to return to the caller. |
| 14 | + */ |
| 15 | +public class FixedCellGrabber implements CellGrabber { |
| 16 | + /** |
| 17 | + * Makes a degenerate CellGrabber that has no delimiters or quotes and therefore returns whole lines. This is a |
| 18 | + * somewhat quick-and-dirty way to reuse the buffering and newline logic in DelimitedCellGrabber without rewriting |
| 19 | + * it. |
| 20 | + * |
| 21 | + * @param stream The underlying stream. |
| 22 | + * @return The "line grabber" |
| 23 | + */ |
| 24 | + public static CellGrabber makeLineGrabber(InputStream stream) { |
| 25 | + final byte IllegalUtf8 = (byte) 0xff; |
| 26 | + return new DelimitedCellGrabber(stream, IllegalUtf8, IllegalUtf8, true, false); |
| 27 | + } |
| 28 | + |
| 29 | + private final CellGrabber lineGrabber; |
| 30 | + private final int[] columnWidths; |
| 31 | + private final boolean ignoreSurroundingSpaces; |
| 32 | + private final boolean utf32CountingMode; |
| 33 | + private final ByteSlice rowText; |
| 34 | + private boolean needsUnderlyingRefresh; |
| 35 | + private int colIndex; |
| 36 | + private final MutableBoolean dummy1; |
| 37 | + private final MutableInt dummy2; |
| 38 | + |
| 39 | + /** Constructor. */ |
| 40 | + public FixedCellGrabber(final CellGrabber lineGrabber, final int[] columnWidths, boolean ignoreSurroundingSpaces, |
| 41 | + boolean utf32CountingMode) { |
| 42 | + this.lineGrabber = lineGrabber; |
| 43 | + this.columnWidths = columnWidths; |
| 44 | + this.ignoreSurroundingSpaces = ignoreSurroundingSpaces; |
| 45 | + this.utf32CountingMode = utf32CountingMode; |
| 46 | + this.rowText = new ByteSlice(); |
| 47 | + this.needsUnderlyingRefresh = true; |
| 48 | + this.colIndex = 0; |
| 49 | + this.dummy1 = new MutableBoolean(); |
| 50 | + this.dummy2 = new MutableInt(); |
| 51 | + } |
| 52 | + |
| 53 | + @Override |
| 54 | + public void grabNext(ByteSlice dest, MutableBoolean lastInRow, MutableBoolean endOfInput) |
| 55 | + throws CsvReaderException { |
| 56 | + if (needsUnderlyingRefresh) { |
| 57 | + // Underlying row used up, and all columns provided. Ask underlying CellGrabber for the next line. |
| 58 | + lineGrabber.grabNext(rowText, dummy1, endOfInput); |
| 59 | + |
| 60 | + if (endOfInput.booleanValue()) { |
| 61 | + // Set dest to the empty string, and leave 'endOfInput' set to true. |
| 62 | + dest.reset(rowText.data(), rowText.end(), rowText.end()); |
| 63 | + return; |
| 64 | + } |
| 65 | + |
| 66 | + needsUnderlyingRefresh = false; |
| 67 | + colIndex = 0; |
| 68 | + } |
| 69 | + |
| 70 | + // There is data to return. Count off N characters. The final column gets all remaining characters. |
| 71 | + final boolean lastCol = colIndex == columnWidths.length - 1; |
| 72 | + final int numCharsToTake = lastCol ? Integer.MAX_VALUE : columnWidths[colIndex]; |
| 73 | + takeNCharactersInCharset(rowText, dest, numCharsToTake, utf32CountingMode, dummy2); |
| 74 | + ++colIndex; |
| 75 | + needsUnderlyingRefresh = lastCol || dest.size() == 0; |
| 76 | + lastInRow.setValue(needsUnderlyingRefresh); |
| 77 | + endOfInput.setValue(false); |
| 78 | + |
| 79 | + if (ignoreSurroundingSpaces) { |
| 80 | + ReaderUtil.trimSpacesAndTabs(dest); |
| 81 | + } |
| 82 | + } |
| 83 | + |
| 84 | + private static void takeNCharactersInCharset(ByteSlice src, ByteSlice dest, int numCharsToTake, |
| 85 | + boolean utf32CountingMode, MutableInt tempInt) { |
| 86 | + final byte[] data = src.data(); |
| 87 | + final int cellBegin = src.begin(); |
| 88 | + int current = cellBegin; |
| 89 | + while (numCharsToTake > 0) { |
| 90 | + if (current == src.end()) { |
| 91 | + break; |
| 92 | + } |
| 93 | + final int utf8Length = ReaderUtil.getUtf8LengthAndCharLength(data[current], src.end() - current, |
| 94 | + utf32CountingMode, tempInt); |
| 95 | + if (numCharsToTake < tempInt.intValue()) { |
| 96 | + // There is not enough space left in the field to store this character. |
| 97 | + // This can happen if CsvSpecs is set for the UTF16 counting convention, |
| 98 | + // there is one unit left in the field, and we encounter a character outside |
| 99 | + // the Basic Multilingual Plane, which would require two units. |
| 100 | + break; |
| 101 | + } |
| 102 | + numCharsToTake -= tempInt.intValue(); |
| 103 | + current += utf8Length; |
| 104 | + } |
| 105 | + dest.reset(src.data(), cellBegin, current); |
| 106 | + src.reset(src.data(), current, src.end()); |
| 107 | + } |
| 108 | + |
| 109 | + @Override |
| 110 | + public int physicalRowNum() { |
| 111 | + return lineGrabber.physicalRowNum(); |
| 112 | + } |
| 113 | +} |
0 commit comments