rsyslog · rjcortese · Mar 29, 2020 · Mar 30, 2020 · Dec 1, 2020 · Dec 6, 2020
diff --git a/src/enc_csv.c b/src/enc_csv.c
@@ -53,6 +53,8 @@ static char hexdigit[16] =
 	{'0', '1', '2', '3', '4', '5', '6', '7', '8',
 	 '9', 'A', 'B', 'C', 'D', 'E', 'F' };
 
+extern int outputCSVNoQuotes; /* command line flag */
+
 /* TODO: CSV encoding for Unicode characters is as of RFC4627 not fully
  * supported. The algorithm is that we must build the wide character from
  * UTF-8 (if char > 127) and build the full 4-octet Unicode character out
@@ -63,7 +65,7 @@ static char hexdigit[16] =
 static int
 ln_addValue_CSV(const char *buf, es_str_t **str)
 {
-	int r;
+	int r = 0;
 	unsigned char c;
 	es_size_t i;
 	char numbuf[4];
@@ -119,16 +121,162 @@ ln_addValue_CSV(const char *buf, es_str_t **str)
 			}
 		}
 	}
-	r = 0;
 
 	return r;
 }
 
 
+/* Write to a tmp es_str_t while doing the buffer check,
+ * then only add quotes around the field if conditions
+ * are met.
+ * */
+static int
+ln_addValue_CSV_NQ(const char *buf, es_str_t **str)
+{
+	int r = -1;
+	unsigned char c;
+	es_size_t i;
+	char numbuf[4];
+	int j;
+    int requires_quotes = 0;
+
+    char *cstr = NULL;
+    es_str_t *tmp_str = NULL;
+
+	if((tmp_str = es_newStr(256)) == NULL)
+		goto done;
+
+	assert(str != NULL);
+	assert(*str != NULL);
+	assert(buf != NULL);
+
+	for(i = 0; i < strlen(buf); i++) {
+		c = buf[i];
+		if((c >= 0x23 && c <= 0x5b)
+		   || (c >= 0x5d /* && c <= 0x10FFFF*/)
+		   || c == 0x20 || c == 0x21) {
+			/* no need to escape */
+            if (c == 0x2c) { /* comma */
+                requires_quotes = 1;
+            }
+			es_addChar(&tmp_str, c);
+		} else {
+			/* we must escape, try RFC4627-defined special sequences first */
+			switch(c) {
+			case '\0':
+				es_addBuf(&tmp_str, "\\u0000", 6);
+				break;
+			case '\"':
+                requires_quotes = 1;
+				es_addBuf(&tmp_str, "\\\"", 2);
+				break;
+			case '\\':
+				es_addBuf(&tmp_str, "\\\\", 2);
+				break;
+			case '\010':
+				es_addBuf(&tmp_str, "\\b", 2);
+				break;
+			case '\014':
+				es_addBuf(&tmp_str, "\\f", 2);
+				break;
+			case '\n':
+				es_addBuf(&tmp_str, "\\n", 2);
+				break;
+			case '\r':
+				es_addBuf(&tmp_str, "\\r", 2);
+				break;
+			case '\t':
+				es_addBuf(&tmp_str, "\\t", 2);
+				break;
+			default:
+				/* TODO : proper Unicode encoding (see header comment) */
+				for(j = 0 ; j < 4 ; ++j) {
+					numbuf[3-j] = hexdigit[c % 16];
+					c = c / 16;
+				}
+				es_addBuf(&tmp_str, "\\u", 2);
+				es_addBuf(&tmp_str, numbuf, 4);
+				break;
+			}
+		}
+	}
+
+    if (tmp_str != NULL) {
+        cstr = es_str2cstr(tmp_str, NULL);
+    }
+
+    if (requires_quotes) {
+        CHKR(es_addChar(str, '"'));
+        es_addBuf(str, cstr, strlen(cstr));
+        CHKR(es_addChar(str, '"'));
+    } else {
+        es_addBuf(str, cstr, strlen(cstr));
+    }
+
+    if (tmp_str != NULL) {
+        free(cstr);
+    }
+
+    r = 0;
+
+done:
+    es_deleteStr(tmp_str);
+	return r;
+}
+
+
+static int
+ln_addField_CSV_NQ(struct json_object *field, es_str_t **str)
+{
+    int r = 0; 
+	int i;
+	struct json_object *obj;
+	int needComma = 0;
+	const char *value;
+
+	assert(field != NULL);
+	assert(str != NULL);
+	assert(*str != NULL);
+
+	switch(json_object_get_type(field)) {
+	case json_type_array:
+		CHKR(es_addChar(str, '['));
+		for (i = json_object_array_length(field) - 1; i >= 0; i--) {
+			if(needComma)
+				es_addChar(str, ',');
+			else
+				needComma = 1;
+			CHKN(obj = json_object_array_get_idx(field, i));
+			CHKN(value = json_object_get_string(obj));
+			CHKR(ln_addValue_CSV_NQ(value, str));
+		}
+		CHKR(es_addChar(str, ']'));
+		break;
+	case json_type_string:
+	case json_type_int:
+		CHKN(value = json_object_get_string(field));
+		CHKR(ln_addValue_CSV_NQ(value, str));
+		break;
+	case json_type_null:
+	case json_type_boolean:
+	case json_type_double:
+	case json_type_object:
+		CHKR(es_addBuf(str, "***unsupported type***", sizeof("***unsupported type***")-1));
+		break;
+	default:
+		CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
+	}
+
+done:
+	return r;
+}
+
+
 static int
 ln_addField_CSV(struct json_object *field, es_str_t **str)
 {
-	int r, i;
+	int r = 0;
+    int i;
 	struct json_object *obj;
 	int needComma = 0;
 	const char *value;
@@ -166,8 +314,6 @@ ln_addField_CSV(struct json_object *field, es_str_t **str)
 		CHKR(es_addBuf(str, "***OBJECT***", sizeof("***OBJECT***")-1));
 	}
 
-	r = 0;
-
 done:
 	return r;
 }
@@ -207,9 +353,13 @@ ln_fmtEventToCSV(struct json_object *json, es_str_t **str, es_str_t *extraData)
 			needComma = 1;
 		}
 		if (field != NULL) {
-			CHKR(es_addChar(str, '"'));
-			ln_addField_CSV(field, str);
-			CHKR(es_addChar(str, '"'));
+            if (outputCSVNoQuotes) {
+                ln_addField_CSV_NQ(field, str);
+            } else {
+                CHKR(es_addChar(str, '"'));
+                ln_addField_CSV(field, str);
+                CHKR(es_addChar(str, '"'));
+            }
 		}
 	}
 	r = 0;

diff --git a/src/lognormalizer.c b/src/lognormalizer.c
@@ -60,6 +60,7 @@ static int recOutput = OUTPUT_PARSED_RECS | OUTPUT_UNPARSED_RECS;
 				/**< controls which records to output */
 static int outputSummaryLine = 0;
 static int outputNbrUnparsed = 0;
+int outputCSVNoQuotes = 0;
 static int addErrLineNbr = 0;	/**< add line number info to unparsed events */
 static int flatTags = 0;	/**< print event.tags in JSON? */
 static FILE *fpDOT;
@@ -301,6 +302,8 @@ handle_generic_option(const char* opt) {
 		ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE);
 	} else if (strcmp("addRuleLocation", opt) == 0) {
 		ln_setCtxOpts(ctx, LN_CTXOPT_ADD_RULE_LOCATION);
+    } else if (strcmp("outputCSVNoQuotes", opt) == 0) {
+        outputCSVNoQuotes = 1;
 	} else {
 		fprintf(stderr, "invalid -o option '%s'\n", opt);
 		exit(1);
@@ -325,6 +328,7 @@ fprintf(stderr,
 	"    -oaddRuleLocation Add location of matching rule to metadata\n"
 	"    -oaddExecPath Add exec_path attribute to output\n"
 	"    -oaddOriginalMsg Always add original message to output, not just in error case\n"
+	"    -ooutputCSVNoQuotes CSV output will not wrap fields in double quotation marks\n"
 	"    -p           Print back only if the message has been parsed succesfully\n"
 	"    -P           Print back only if the message has NOT been parsed succesfully\n"
 	"    -L           Add source file line number information to unparsed line output\n"