Skip to content

Commit 59d0040

Browse files
authored
Adding support for the date_nanos field type (elastic#1803) (elastic#1830)
Adding support for the date_nanos field type. This field type is now treated in the same way as the date field type, except that we keep track of nanoseconds. Closes elastic#1653
1 parent b8bd65e commit 59d0040

File tree

19 files changed

+334
-39
lines changed

19 files changed

+334
-39
lines changed

mr/src/main/java/org/elasticsearch/hadoop/mr/WritableValueReader.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public Object createArray(FieldType type) {
7171
arrayType = BooleanWritable.class;
7272
break;
7373
case DATE:
74+
case DATE_NANOS:
7475
arrayType = dateType();
7576
break;
7677
case BINARY:

mr/src/main/java/org/elasticsearch/hadoop/serialization/FieldType.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ public enum FieldType {
3939
DOUBLE,
4040
STRING,
4141
DATE,
42+
DATE_NANOS,
4243
BINARY,
4344
TOKEN_COUNT,
4445
// ES 5.x
@@ -75,6 +76,7 @@ public enum FieldType {
7576
CAST_HIERARCHY.put(FLOAT, new LinkedHashSet<FieldType>(Arrays.asList(DOUBLE, KEYWORD)));
7677
CAST_HIERARCHY.put(STRING, new LinkedHashSet<FieldType>(Collections.singletonList(KEYWORD)));
7778
CAST_HIERARCHY.put(DATE, new LinkedHashSet<FieldType>(Collections.singletonList(KEYWORD)));
79+
CAST_HIERARCHY.put(DATE_NANOS, new LinkedHashSet<FieldType>(Collections.singletonList(KEYWORD)));
7880
CAST_HIERARCHY.put(BINARY, new LinkedHashSet<FieldType>(Collections.singletonList(KEYWORD)));
7981
CAST_HIERARCHY.put(TOKEN_COUNT, new LinkedHashSet<FieldType>(Arrays.asList(LONG, KEYWORD)));
8082
CAST_HIERARCHY.put(TEXT, new LinkedHashSet<FieldType>(Collections.singletonList(KEYWORD)));

mr/src/main/java/org/elasticsearch/hadoop/serialization/builder/JdkValueReader.java

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,6 @@
1818
*/
1919
package org.elasticsearch.hadoop.serialization.builder;
2020

21-
import java.util.Arrays;
22-
import java.util.Collection;
23-
import java.util.Collections;
24-
import java.util.Date;
25-
import java.util.LinkedHashMap;
26-
import java.util.List;
27-
import java.util.Map;
28-
2921
import org.elasticsearch.hadoop.cfg.Settings;
3022
import org.elasticsearch.hadoop.serialization.FieldType;
3123
import org.elasticsearch.hadoop.serialization.Parser;
@@ -39,6 +31,14 @@
3931
import org.elasticsearch.hadoop.util.StringUtils;
4032
import org.elasticsearch.hadoop.util.unit.Booleans;
4133

34+
import java.util.Arrays;
35+
import java.util.Collection;
36+
import java.util.Collections;
37+
import java.util.Date;
38+
import java.util.LinkedHashMap;
39+
import java.util.List;
40+
import java.util.Map;
41+
4242

4343
/**
4444
* Basic value reader handling using the implied JSON type.
@@ -86,6 +86,8 @@ public Object readValue(Parser parser, String value, FieldType esType) {
8686
return binaryValue(binValue);
8787
case DATE:
8888
return date(value, parser);
89+
case DATE_NANOS:
90+
return dateNanos(value, parser);
8991
case JOIN:
9092
// In the case of a join field reaching this point it is because it is the short-hand form for a parent.
9193
// construct a container and place the short form name into the name subfield.
@@ -416,6 +418,27 @@ protected Object date(String value, Parser parser) {
416418
return processDate(val);
417419
}
418420

421+
protected Object dateNanos(String value, Parser parser) {
422+
Object val = null;
423+
424+
if (value == null || isEmpty(value)) {
425+
return nullValue();
426+
}
427+
else {
428+
Token tk = parser.currentToken();
429+
430+
// UNIX time format
431+
if (tk == Token.VALUE_NUMBER) {
432+
val = parseDate(parser.longValue(), richDate);
433+
}
434+
else {
435+
val = parseDateNanos(value, richDate);
436+
}
437+
}
438+
439+
return processDate(val);
440+
}
441+
419442
protected Object parseDate(Long value, boolean richDate) {
420443
return (richDate ? createDate(value) : value);
421444
}
@@ -424,6 +447,10 @@ protected Object parseDate(String value, boolean richDate) {
424447
return (richDate ? createDate(DateUtils.parseDate(value).getTimeInMillis()) : parseString(value));
425448
}
426449

450+
protected Object parseDateNanos(String value, boolean richDate) {
451+
return (richDate ? DateUtils.parseDateNanos(value) : parseString(value));
452+
}
453+
427454
protected Object createDate(long timestamp) {
428455
return new Date(timestamp);
429456
}

mr/src/main/java/org/elasticsearch/hadoop/serialization/builder/JdkValueWriter.java

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,19 @@
1818
*/
1919
package org.elasticsearch.hadoop.serialization.builder;
2020

21+
import org.elasticsearch.hadoop.serialization.Generator;
22+
import org.elasticsearch.hadoop.util.ObjectUtils;
23+
24+
import javax.xml.bind.DatatypeConverter;
25+
import java.sql.Timestamp;
26+
import java.time.LocalDateTime;
27+
import java.time.OffsetDateTime;
28+
import java.time.format.DateTimeFormatter;
2129
import java.util.Calendar;
2230
import java.util.Date;
2331
import java.util.Map;
2432
import java.util.Map.Entry;
2533

26-
import javax.xml.bind.DatatypeConverter;
27-
28-
import org.elasticsearch.hadoop.serialization.Generator;
29-
import org.elasticsearch.hadoop.util.ObjectUtils;
30-
3134
/**
3235
* Value writer for JDK types.
3336
*/
@@ -126,7 +129,12 @@ else if (value instanceof Iterable) {
126129
}
127130
generator.writeEndArray();
128131
}
129-
// handles Timestamp also
132+
else if (value instanceof Timestamp) {
133+
Timestamp timestamp = (Timestamp) value;
134+
LocalDateTime localDateTime = timestamp.toLocalDateTime();
135+
OffsetDateTime offsetDateTime = OffsetDateTime.of(localDateTime, OffsetDateTime.now().getOffset());
136+
generator.writeString(DateTimeFormatter.ISO_OFFSET_DATE_TIME.format(offsetDateTime));
137+
}
130138
else if (value instanceof Date) {
131139
Calendar cal = Calendar.getInstance();
132140
cal.setTime((Date) value);

mr/src/main/java/org/elasticsearch/hadoop/util/DateUtils.java

Lines changed: 42 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,22 @@
1818
*/
1919
package org.elasticsearch.hadoop.util;
2020

21-
import java.lang.reflect.Method;
22-
import java.util.Calendar;
23-
24-
import javax.xml.bind.DatatypeConverter;
25-
2621
import org.apache.commons.logging.Log;
2722
import org.apache.commons.logging.LogFactory;
2823

24+
import javax.xml.bind.DatatypeConverter;
25+
import java.lang.reflect.Method;
26+
import java.sql.Timestamp;
27+
import java.time.Instant;
28+
import java.time.ZoneId;
29+
import java.time.ZonedDateTime;
30+
import java.time.format.DateTimeFormatter;
31+
import java.time.temporal.ChronoField;
32+
import java.time.temporal.TemporalAccessor;
33+
import java.time.temporal.TemporalField;
34+
import java.time.temporal.TemporalQueries;
35+
import java.util.Calendar;
36+
2937
/**
3038
* Utility used for parsing date ISO8601.
3139
* Morphed into a runtime bridge over possible ISO8601 (simply because the spec is too large, especially when considering the various optional formats).
@@ -36,6 +44,9 @@ public abstract class DateUtils {
3644

3745
private final static boolean jodaTimeAvailable = ObjectUtils.isClassPresent("org.joda.time.format.ISODateTimeFormat", DateUtils.class.getClassLoader());
3846

47+
static final DateTimeFormatter DATE_OPTIONAL_TIME_OFFSET =
48+
DateTimeFormatter.ofPattern("uuuu-MM-dd['T'HH:mm:ss][.SSSSSSSSS][.SSSSSS][.SSS][XXX]");
49+
3950
private static abstract class Jdk6 {
4051
// Parses ISO date through the JDK XML bind class. However the spec doesn't support all ISO8601 formats which this class tries to address
4152
// in particular Time offsets from UTC are available in 3 forms:
@@ -121,4 +132,30 @@ public static Calendar parseDate(String value) {
121132

122133
return (jodaTimeAvailable && JodaTime.INITIALIZED) ? JodaTime.parseDate(value) : Jdk6.parseDate(value);
123134
}
135+
136+
public static Timestamp parseDateNanos(String value) {
137+
return DATE_OPTIONAL_TIME_OFFSET.parse(value, temporal -> {
138+
int year = temporal.get(ChronoField.YEAR);
139+
int month = temporal.get(ChronoField.MONTH_OF_YEAR);
140+
int dayOfMonth = temporal.get(ChronoField.DAY_OF_MONTH);
141+
int hour = getOrDefault(temporal, ChronoField.HOUR_OF_DAY, 0);
142+
int minute = getOrDefault(temporal, ChronoField.MINUTE_OF_HOUR, 0);
143+
int second = getOrDefault(temporal, ChronoField.SECOND_OF_MINUTE, 0);
144+
int nanoOfSecond = getOrDefault(temporal, ChronoField.NANO_OF_SECOND, 0);
145+
ZoneId zone = temporal.query(TemporalQueries.zone());
146+
if (zone == null) {
147+
zone = ZoneId.of("UTC");
148+
}
149+
ZonedDateTime zonedDateTime = ZonedDateTime.of(year, month, dayOfMonth, hour, minute, second, nanoOfSecond, zone);
150+
return Timestamp.from(Instant.from(zonedDateTime));
151+
});
152+
}
153+
154+
private static int getOrDefault(TemporalAccessor temporal, TemporalField field, int defaultValue) {
155+
if(temporal.isSupported(field)) {
156+
return temporal.get(field);
157+
} else {
158+
return defaultValue;
159+
}
160+
}
124161
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
package org.elasticsearch.hadoop.serialization.builder;
2+
3+
import org.elasticsearch.hadoop.cfg.Settings;
4+
import org.elasticsearch.hadoop.serialization.FieldType;
5+
import org.elasticsearch.hadoop.serialization.Parser;
6+
import org.junit.Test;
7+
import org.mockito.Mockito;
8+
9+
import java.sql.Timestamp;
10+
import java.util.Date;
11+
12+
import static org.junit.Assert.assertEquals;
13+
14+
public class JdkValueReaderTest {
15+
@Test
16+
public void testReadValue() {
17+
JdkValueReader reader = new JdkValueReader();
18+
Parser parser = Mockito.mock(Parser.class);
19+
20+
Mockito.when(parser.currentToken()).thenReturn(Parser.Token.VALUE_STRING);
21+
Timestamp timestamp = (Timestamp) reader.readValue(parser, "2015-01-01T12:10:30.123456789Z", FieldType.DATE_NANOS);
22+
assertEquals(1420114230123l, timestamp.getTime());
23+
assertEquals(123456789, timestamp.getNanos());
24+
25+
Mockito.when(parser.currentToken()).thenReturn(Parser.Token.VALUE_NUMBER);
26+
Mockito.when(parser.longValue()).thenReturn(1420114230123l);
27+
Date date = (Date) reader.readValue(parser, "1420114230123", FieldType.DATE_NANOS);
28+
assertEquals(1420114230123l, date.getTime());
29+
30+
Settings settings = Mockito.mock(Settings.class);
31+
Mockito.when(settings.getMappingDateRich()).thenReturn(false);
32+
reader.setSettings(settings);
33+
Mockito.when(parser.currentToken()).thenReturn(Parser.Token.VALUE_STRING);
34+
String stringValue = (String) reader.readValue(parser, "2015-01-01T12:10:30.123456789Z", FieldType.DATE_NANOS);
35+
assertEquals("2015-01-01T12:10:30.123456789Z", stringValue);
36+
37+
Mockito.when(parser.currentToken()).thenReturn(Parser.Token.VALUE_NUMBER);
38+
Mockito.when(parser.longValue()).thenReturn(1420114230123l);
39+
Long dateLong = (Long) reader.readValue(parser, "1420114230123", FieldType.DATE_NANOS);
40+
assertEquals(1420114230123l, dateLong.longValue());
41+
}
42+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package org.elasticsearch.hadoop.serialization.builder;
2+
3+
import org.elasticsearch.hadoop.serialization.Generator;
4+
import org.junit.Test;
5+
import org.mockito.ArgumentCaptor;
6+
import org.mockito.Mockito;
7+
8+
import java.sql.Timestamp;
9+
import java.time.OffsetDateTime;
10+
import java.time.ZoneId;
11+
import java.time.format.DateTimeFormatter;
12+
import java.util.Date;
13+
14+
import static org.junit.Assert.assertEquals;
15+
16+
public class JdkValueWriterTest {
17+
@Test
18+
public void testWriteDate() {
19+
JdkValueWriter jdkValueWriter = new JdkValueWriter();
20+
Date date = new Date(1420114230123l);
21+
Generator generator = Mockito.mock(Generator.class);
22+
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
23+
jdkValueWriter.doWrite(date, generator, "");
24+
Mockito.verify(generator).writeString(argument.capture());
25+
String expected = date.toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime().toString();
26+
String actual = argument.getValue();
27+
assertEquals(expected, actual);
28+
OffsetDateTime parsedDate = DateTimeFormatter.ISO_OFFSET_DATE_TIME.parse(actual, OffsetDateTime::from);
29+
assertEquals(123000000, parsedDate.getNano()); //Nothing beyond milliseconds
30+
}
31+
32+
@Test
33+
public void testWriteDateWithNanos() {
34+
JdkValueWriter jdkValueWriter = new JdkValueWriter();
35+
Timestamp timestamp = new Timestamp(1420114230123l);
36+
timestamp.setNanos(123456789);
37+
Generator generator = Mockito.mock(Generator.class);
38+
ArgumentCaptor<String> argument = ArgumentCaptor.forClass(String.class);
39+
jdkValueWriter.doWrite(timestamp, generator, "");
40+
Mockito.verify(generator).writeString(argument.capture());
41+
String expected = timestamp.toInstant().atZone(ZoneId.systemDefault()).toOffsetDateTime().toString();
42+
String actual = argument.getValue();
43+
assertEquals(expected, actual);
44+
OffsetDateTime parsedDate = DateTimeFormatter.ISO_OFFSET_DATE_TIME.parse(actual, OffsetDateTime::from);
45+
assertEquals(123456789, parsedDate.getNano());
46+
}
47+
}

mr/src/test/java/org/elasticsearch/hadoop/serialization/dto/mapping/MappingTest.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
import static org.elasticsearch.hadoop.serialization.FieldType.BOOLEAN;
4040
import static org.elasticsearch.hadoop.serialization.FieldType.BYTE;
4141
import static org.elasticsearch.hadoop.serialization.FieldType.DATE;
42+
import static org.elasticsearch.hadoop.serialization.FieldType.DATE_NANOS;
4243
import static org.elasticsearch.hadoop.serialization.FieldType.DOUBLE;
4344
import static org.elasticsearch.hadoop.serialization.FieldType.FLOAT;
4445
import static org.elasticsearch.hadoop.serialization.FieldType.GEO_POINT;
@@ -53,6 +54,7 @@
5354
import static org.elasticsearch.hadoop.serialization.FieldType.SHORT;
5455
import static org.elasticsearch.hadoop.serialization.FieldType.STRING;
5556
import static org.elasticsearch.hadoop.serialization.FieldType.TEXT;
57+
5658
import static org.elasticsearch.hadoop.serialization.dto.mapping.MappingUtils.findTypos;
5759
import static org.junit.Assert.assertEquals;
5860
import static org.junit.Assert.assertNotNull;
@@ -134,7 +136,7 @@ public void testPrimitivesParsing() throws Exception {
134136
MappingSet mappings = getMappingsForResource("primitives.json");
135137
Mapping mapping = ensureAndGet("index", "primitives", mappings);
136138
Field[] props = mapping.getFields();
137-
assertEquals(14, props.length);
139+
assertEquals(15, props.length);
138140
assertEquals("field01", props[0].name());
139141
assertEquals(BOOLEAN, props[0].type());
140142
assertEquals("field02", props[1].name());
@@ -163,6 +165,8 @@ public void testPrimitivesParsing() throws Exception {
163165
assertEquals(HALF_FLOAT, props[12].type());
164166
assertEquals("field14", props[13].name());
165167
assertEquals(SCALED_FLOAT, props[13].type());
168+
assertEquals("field15", props[14].name());
169+
assertEquals(DATE_NANOS, props[14].type());
166170
}
167171

168172
@Test
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
package org.elasticsearch.hadoop.util;
2+
3+
import org.junit.Test;
4+
5+
import java.sql.Timestamp;
6+
7+
import static org.junit.Assert.assertEquals;
8+
import static org.junit.Assert.assertNotNull;
9+
10+
public class DateUtilsTest {
11+
@Test
12+
public void parseDateNanos() {
13+
Timestamp timestamp = DateUtils.parseDateNanos("2015-01-01");
14+
assertNotNull(timestamp);
15+
assertEquals(1420070400000l, timestamp.getTime());
16+
assertEquals(0, timestamp.getNanos());
17+
18+
timestamp = DateUtils.parseDateNanos("2015-01-01T12:10:30.123456789Z");
19+
assertNotNull(timestamp);
20+
assertEquals(1420114230123l, timestamp.getTime());
21+
assertEquals(123456789, timestamp.getNanos());
22+
23+
timestamp = DateUtils.parseDateNanos("2015-01-01T00:00:00.000Z");
24+
assertNotNull(timestamp);
25+
assertEquals(1420070400000l, timestamp.getTime());
26+
assertEquals(0, timestamp.getNanos());
27+
28+
timestamp = DateUtils.parseDateNanos("2015-01-01T12:10:30.123456Z");
29+
assertNotNull(timestamp);
30+
assertEquals(1420114230123l, timestamp.getTime());
31+
assertEquals(123456000, timestamp.getNanos());
32+
33+
timestamp = DateUtils.parseDateNanos("2015-01-01T12:10:30.123Z");
34+
assertNotNull(timestamp);
35+
assertEquals(1420114230123l, timestamp.getTime());
36+
assertEquals(123000000, timestamp.getNanos());
37+
}
38+
}

mr/src/test/resources/org/elasticsearch/hadoop/serialization/dto/mapping/typed/primitives.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@
4545
"field14" : {
4646
"type" : "scaled_float",
4747
"scaling_factor" : 100.0
48+
},
49+
"field15" : {
50+
"type" : "date_nanos"
4851
}
4952
}
5053
}

mr/src/test/resources/org/elasticsearch/hadoop/serialization/dto/mapping/typeless/primitives.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
"field14" : {
4545
"type" : "scaled_float",
4646
"scaling_factor" : 100.0
47+
},
48+
"field15" : {
49+
"type" : "date_nanos"
4750
}
4851
}
4952
}

0 commit comments

Comments
 (0)