Skip to content

New command line option for lognormalizer. Resolves #336 #337

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 158 additions & 8 deletions src/enc_csv.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ static char hexdigit[16] =
{'0', '1', '2', '3', '4', '5', '6', '7', '8',
'9', 'A', 'B', 'C', 'D', 'E', 'F' };

extern int outputCSVNoQuotes; /* command line flag */

/* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully
* supported. The algorithm is that we must build the wide character from
* UTF-8 (if char > 127) and build the full 4-octet Unicode character out
Expand All @@ -63,7 +65,7 @@ static char hexdigit[16] =
static int
ln_addValue_CSV(const char *buf, es_str_t **str)
{
int r;
int r = 0;
unsigned char c;
es_size_t i;
char numbuf[4];
Expand Down Expand Up @@ -119,16 +121,162 @@ ln_addValue_CSV(const char *buf, es_str_t **str)
}
}
}
r = 0;

return r;
}


/* Write to a tmp es_str_t while doing the buffer check,
* then only add quotes around the field if conditions
* are met.
* */
static int
ln_addValue_CSV_NQ(const char *buf, es_str_t **str)
{
int r = -1;
unsigned char c;
es_size_t i;
char numbuf[4];
int j;
int requires_quotes = 0;

char *cstr = NULL;
es_str_t *tmp_str = NULL;

if((tmp_str = es_newStr(256)) == NULL)
goto done;

assert(str != NULL);
assert(*str != NULL);
assert(buf != NULL);

for(i = 0; i < strlen(buf); i++) {
c = buf[i];
if((c >= 0x23 && c <= 0x5b)
|| (c >= 0x5d /* && c <= 0x10FFFF*/)
|| c == 0x20 || c == 0x21) {
/* no need to escape */
if (c == 0x2c) { /* comma */
requires_quotes = 1;
}
es_addChar(&tmp_str, c);
} else {
/* we must escape, try RFC4627-defined special sequences first */
switch(c) {
case '\0':
es_addBuf(&tmp_str, "\\u0000", 6);
break;
case '\"':
requires_quotes = 1;
es_addBuf(&tmp_str, "\\\"", 2);
break;
case '\\':
es_addBuf(&tmp_str, "\\\\", 2);
break;
case '\010':
es_addBuf(&tmp_str, "\\b", 2);
break;
case '\014':
es_addBuf(&tmp_str, "\\f", 2);
break;
case '\n':
es_addBuf(&tmp_str, "\\n", 2);
break;
case '\r':
es_addBuf(&tmp_str, "\\r", 2);
break;
case '\t':
es_addBuf(&tmp_str, "\\t", 2);
break;
default:
/* TODO : proper Unicode encoding (see header comment) */
for(j = 0 ; j < 4 ; ++j) {
numbuf[3-j] = hexdigit[c % 16];
c = c / 16;
}
es_addBuf(&tmp_str, "\\u", 2);
es_addBuf(&tmp_str, numbuf, 4);
break;
}
}
}

if (tmp_str != NULL) {
cstr = es_str2cstr(tmp_str, NULL);
}

if (requires_quotes) {
CHKR(es_addChar(str, '"'));
es_addBuf(str, cstr, strlen(cstr));
CHKR(es_addChar(str, '"'));
} else {
es_addBuf(str, cstr, strlen(cstr));
}

if (tmp_str != NULL) {
free(cstr);
}

r = 0;

done:
es_deleteStr(tmp_str);
return r;
}


static int
ln_addField_CSV_NQ(struct json_object *field, es_str_t **str)
{
int r = 0;
int i;
struct json_object *obj;
int needComma = 0;
const char *value;

assert(field != NULL);
assert(str != NULL);
assert(*str != NULL);

switch(json_object_get_type(field)) {
case json_type_array:
CHKR(es_addChar(str, '['));
for (i = json_object_array_length(field) - 1; i >= 0; i--) {
if(needComma)
es_addChar(str, ',');
else
needComma = 1;
CHKN(obj = json_object_array_get_idx(field, i));
CHKN(value = json_object_get_string(obj));
CHKR(ln_addValue_CSV_NQ(value, str));
}
CHKR(es_addChar(str, ']'));
break;
case json_type_string:
case json_type_int:
CHKN(value = json_object_get_string(field));
CHKR(ln_addValue_CSV_NQ(value, str));
break;
case json_type_null:
case json_type_boolean:
case json_type_double:
case json_type_object:
CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1));
break;
default:
CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
}

done:
return r;
}


static int
ln_addField_CSV(struct json_object *field, es_str_t **str)
{
int r, i;
int r = 0;
int i;
struct json_object *obj;
int needComma = 0;
const char *value;
Expand Down Expand Up @@ -166,8 +314,6 @@ ln_addField_CSV(struct json_object *field, es_str_t **str)
CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
}

r = 0;

done:
return r;
}
Expand Down Expand Up @@ -207,9 +353,13 @@ ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData)
needComma = 1;
}
if (field != NULL) {
CHKR(es_addChar(str, '"'));
ln_addField_CSV(field, str);
CHKR(es_addChar(str, '"'));
if (outputCSVNoQuotes) {
ln_addField_CSV_NQ(field, str);
} else {
CHKR(es_addChar(str, '"'));
ln_addField_CSV(field, str);
CHKR(es_addChar(str, '"'));
}
}
}
r = 0;
Expand Down
4 changes: 4 additions & 0 deletions src/lognormalizer.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ static int recOutput = OUTPUT_PARSED_RECS | OUTPUT_UNPARSED_RECS;
/**< controls which records to output */
static int outputSummaryLine = 0;
static int outputNbrUnparsed = 0;
int outputCSVNoQuotes = 0;
static int addErrLineNbr = 0; /**< add line number info to unparsed events */
static int flatTags = 0; /**< print event.tags in JSON? */
static FILE *fpDOT;
Expand Down Expand Up @@ -301,6 +302,8 @@ handle_generic_option(const char* opt) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE);
} else if (strcmp("addRuleLocation", opt) == 0) {
ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE_LOCATION);
} else if (strcmp("outputCSVNoQuotes", opt) == 0) {
outputCSVNoQuotes = 1;
} else {
fprintf(stderr, "invalid -o option '%s'\n", opt);
exit(1);
Expand All @@ -325,6 +328,7 @@ fprintf(stderr,
" -oaddRuleLocation Add location of matching rule to metadata\n"
" -oaddExecPath Add exec_path attribute to output\n"
" -oaddOriginalMsg Always add original message to output, not just in error case\n"
" -ooutputCSVNoQuotes CSV output will not wrap fields in double quotation marks\n"
" -p Print back only if the message has been parsed succesfully\n"
" -P Print back only if the message has NOT been parsed succesfully\n"
" -L Add source file line number information to unparsed line output\n"
Expand Down