diff --git a/src/enc_csv.c b/src/enc_csv.c index 4f797381..772f4116 100644 --- a/src/enc_csv.c +++ b/src/enc_csv.c @@ -53,6 +53,8 @@ static char hexdigit[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; +extern int outputCSVNoQuotes; /* command line flag */ + /* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully * supported. The algorithm is that we must build the wide character from * UTF-8 (if char > 127) and build the full 4-octet Unicode character out @@ -63,7 +65,7 @@ static char hexdigit[16] = static int ln_addValue_CSV(const char *buf, es_str_t **str) { - int r; + int r = 0; unsigned char c; es_size_t i; char numbuf[4]; @@ -119,16 +121,162 @@ ln_addValue_CSV(const char *buf, es_str_t **str) } } } - r = 0; return r; } +/* Write to a tmp es_str_t while doing the buffer check, + * then only add quotes around the field if conditions + * are met. + * */ +static int +ln_addValue_CSV_NQ(const char *buf, es_str_t **str) +{ + int r = -1; + unsigned char c; + es_size_t i; + char numbuf[4]; + int j; + int requires_quotes = 0; + + char *cstr = NULL; + es_str_t *tmp_str = NULL; + + if((tmp_str = es_newStr(256)) == NULL) + goto done; + + assert(str != NULL); + assert(*str != NULL); + assert(buf != NULL); + + for(i = 0; i < strlen(buf); i++) { + c = buf[i]; + if((c >= 0x23 && c <= 0x5b) + || (c >= 0x5d /* && c <= 0x10FFFF*/) + || c == 0x20 || c == 0x21) { + /* no need to escape */ + if (c == 0x2c) { /* comma */ + requires_quotes = 1; + } + es_addChar(&tmp_str, c); + } else { + /* we must escape, try RFC4627-defined special sequences first */ + switch(c) { + case '\0': + es_addBuf(&tmp_str, "\\u0000", 6); + break; + case '\"': + requires_quotes = 1; + es_addBuf(&tmp_str, "\\\"", 2); + break; + case '\\': + es_addBuf(&tmp_str, "\\\\", 2); + break; + case '\010': + es_addBuf(&tmp_str, "\\b", 2); + break; + case '\014': + es_addBuf(&tmp_str, "\\f", 2); + break; + case '\n': + es_addBuf(&tmp_str, "\\n", 2); + break; + case '\r': + es_addBuf(&tmp_str, "\\r", 2); + break; + case '\t': + es_addBuf(&tmp_str, "\\t", 2); + break; + default: + /* TODO : proper Unicode encoding (see header comment) */ + for(j = 0 ; j < 4 ; ++j) { + numbuf[3-j] = hexdigit[c % 16]; + c = c / 16; + } + es_addBuf(&tmp_str, "\\u", 2); + es_addBuf(&tmp_str, numbuf, 4); + break; + } + } + } + + if (tmp_str != NULL) { + cstr = es_str2cstr(tmp_str, NULL); + } + + if (requires_quotes) { + CHKR(es_addChar(str, '"')); + es_addBuf(str, cstr, strlen(cstr)); + CHKR(es_addChar(str, '"')); + } else { + es_addBuf(str, cstr, strlen(cstr)); + } + + if (tmp_str != NULL) { + free(cstr); + } + + r = 0; + +done: + es_deleteStr(tmp_str); + return r; +} + + +static int +ln_addField_CSV_NQ(struct json_object *field, es_str_t **str) +{ + int r = 0; + int i; + struct json_object *obj; + int needComma = 0; + const char *value; + + assert(field != NULL); + assert(str != NULL); + assert(*str != NULL); + + switch(json_object_get_type(field)) { + case json_type_array: + CHKR(es_addChar(str, '[')); + for (i = json_object_array_length(field) - 1; i >= 0; i--) { + if(needComma) + es_addChar(str, ','); + else + needComma = 1; + CHKN(obj = json_object_array_get_idx(field, i)); + CHKN(value = json_object_get_string(obj)); + CHKR(ln_addValue_CSV_NQ(value, str)); + } + CHKR(es_addChar(str, ']')); + break; + case json_type_string: + case json_type_int: + CHKN(value = json_object_get_string(field)); + CHKR(ln_addValue_CSV_NQ(value, str)); + break; + case json_type_null: + case json_type_boolean: + case json_type_double: + case json_type_object: + CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1)); + break; + default: + CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1)); + } + +done: + return r; +} + + static int ln_addField_CSV(struct json_object *field, es_str_t **str) { - int r, i; + int r = 0; + int i; struct json_object *obj; int needComma = 0; const char *value; @@ -166,8 +314,6 @@ ln_addField_CSV(struct json_object *field, es_str_t **str) CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1)); } - r = 0; - done: return r; } @@ -207,9 +353,13 @@ ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData) needComma = 1; } if (field != NULL) { - CHKR(es_addChar(str, '"')); - ln_addField_CSV(field, str); - CHKR(es_addChar(str, '"')); + if (outputCSVNoQuotes) { + ln_addField_CSV_NQ(field, str); + } else { + CHKR(es_addChar(str, '"')); + ln_addField_CSV(field, str); + CHKR(es_addChar(str, '"')); + } } } r = 0; diff --git a/src/lognormalizer.c b/src/lognormalizer.c index f7322329..56acaa6e 100644 --- a/src/lognormalizer.c +++ b/src/lognormalizer.c @@ -60,6 +60,7 @@ static int recOutput = OUTPUT_PARSED_RECS | OUTPUT_UNPARSED_RECS; /**< controls which records to output */ static int outputSummaryLine = 0; static int outputNbrUnparsed = 0; +int outputCSVNoQuotes = 0; static int addErrLineNbr = 0; /**< add line number info to unparsed events */ static int flatTags = 0; /**< print event.tags in JSON? */ static FILE *fpDOT; @@ -301,6 +302,8 @@ handle_generic_option(const char* opt) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE); } else if (strcmp("addRuleLocation", opt) == 0) { ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE_LOCATION); + } else if (strcmp("outputCSVNoQuotes", opt) == 0) { + outputCSVNoQuotes = 1; } else { fprintf(stderr, "invalid -o option '%s'\n", opt); exit(1); @@ -325,6 +328,7 @@ fprintf(stderr, " -oaddRuleLocation Add location of matching rule to metadata\n" " -oaddExecPath Add exec_path attribute to output\n" " -oaddOriginalMsg Always add original message to output, not just in error case\n" + " -ooutputCSVNoQuotes CSV output will not wrap fields in double quotation marks\n" " -p Print back only if the message has been parsed succesfully\n" " -P Print back only if the message has NOT been parsed succesfully\n" " -L Add source file line number information to unparsed line output\n"