Skip to content

Add builtin support for reading apache, nginx and syslog logs #22

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 16, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions desktop/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,26 @@ export const evalFileHandler = {
resource: 'evalFile',
handler: async function (
_: string,
{ type, name, server }: Proxy<{ name: string; type: string }>
{
contentTypeInfo,
name,
server,
}: Proxy<{
name: string;
contentTypeInfo: { type: string; customLineRegexp: string };
}>
) {
const typeInfo = { ...contentTypeInfo, additionalParsers };
if (!server) {
const body = await fs.readFile(resolvePath(name));
return parseArrayBuffer('', name, body, additionalParsers);
return parseArrayBuffer(typeInfo, name, body);
}

const config = await getSSHConfig(server);

const sftp = new Client();
await sftp.connect(config);
let body = (await sftp.get(name)) as ArrayBuffer;
return await parseArrayBuffer(type, name, body, additionalParsers);
return await parseArrayBuffer(typeInfo, name, body);
},
};
5 changes: 2 additions & 3 deletions desktop/http.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@ export const evalHTTPHandler = {
fetch,
hci.http.method,
tunnelledUrl.toString(),
hci.http.type,
{ ...hci.http.contentTypeInfo, additionalParsers },
hci.http.headers,
body,
additionalParsers
body
);
}
);
Expand Down
5 changes: 4 additions & 1 deletion desktop/program.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,10 @@ export const getProgramHandlers = (settings: Settings) => [
}

const body = await fs.readFile(outputTmp.path);
return [await parseArrayBuffer('application/json', '', body), out];
return [
await parseArrayBuffer({ type: 'application/json' }, '', body),
out,
];
} catch (e) {
if (ppi.program.type === 'python') {
const matcher = /, line ([1-9]*), in <module>/g;
Expand Down
12 changes: 6 additions & 6 deletions shared/http.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { HTTPConnectorInfo } from './state';
import { parseArrayBuffer, Parsers } from './text';
import { parseArrayBuffer, ContentTypeInfoPlusParsers } from './text';

export type FetchFunction = (
url: string,
Expand All @@ -14,10 +14,9 @@ export async function request(
fetchFunction: FetchFunction,
method: string,
url: string,
type: string,
contentTypeInfo: ContentTypeInfoPlusParsers,
headers: Array<{ name: string; value: string }> = [],
content = '',
additionalParsers: Parsers = undefined,
require200 = false
) {
if (!(url.startsWith('https://') || url.startsWith('http://'))) {
Expand All @@ -36,14 +35,15 @@ export async function request(
});

const body = await res.arrayBuffer();
if (!type) {
type = res.headers.get('content-type');
if (!contentTypeInfo.type) {
let type = res.headers.get('content-type');
if (type.startsWith('text/plain')) {
type = '';
}
contentTypeInfo.type = type;
}

const data = await parseArrayBuffer(type, url, body, additionalParsers);
const data = await parseArrayBuffer(contentTypeInfo, url, body);
if (require200 && res.status !== 200) {
throw data;
}
Expand Down
38 changes: 25 additions & 13 deletions shared/state.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,27 +82,37 @@ export type HTTPConnectorInfoMethod =
| 'POST'
| 'DELETE';

export class ContentTypeInfo {
type: string;
customLineRegexp: string;

constructor(type?: string, customLineRegexp?: string) {
this.type = type || '';
this.customLineRegexp = customLineRegexp || '';
}
}

export class HTTPConnectorInfo extends ConnectorInfo {
http: {
headers: Array<{ value: string; name: string }>;
url: string;
method: HTTPConnectorInfoMethod;
type: string;
contentTypeInfo: ContentTypeInfo;
};

constructor(
name?: string,
url?: string,
headers: Array<{ value: string; name: string }> = [],
method?: HTTPConnectorInfoMethod,
type?: string
contentTypeInfo?: ContentTypeInfo
) {
super('http', name);
this.http = {
headers,
url: url || '',
method: method || 'GET',
type: type || '',
contentTypeInfo: contentTypeInfo || new ContentTypeInfo(),
};
}
}
Expand Down Expand Up @@ -272,7 +282,7 @@ export class TablePanelInfo extends PanelInfo {

export class FilePanelInfo extends PanelInfo {
file: {
type: string;
contentTypeInfo: ContentTypeInfo;
name: string;
content: ArrayBuffer;
};
Expand All @@ -281,28 +291,30 @@ export class FilePanelInfo extends PanelInfo {
name?: string,
fileName?: string,
fileContent?: ArrayBuffer,
type?: string
contentTypeInfo?: ContentTypeInfo
) {
super('file', name, '');
this.file = {
name: fileName || '',
content: fileContent || new ArrayBuffer(0),
type: type || '',
contentTypeInfo: contentTypeInfo || new ContentTypeInfo(),
};
}
}

export type LiteralPanelInfoType = 'csv' | 'json';

export class LiteralPanelInfo extends PanelInfo {
literal: {
type: LiteralPanelInfoType;
contentTypeInfo: ContentTypeInfo;
};

constructor(name?: string, type?: LiteralPanelInfoType, content?: string) {
constructor(
name?: string,
content?: string,
contentTypeInfo?: ContentTypeInfo
) {
super('literal', name, content);
this.literal = {
type: type || 'csv',
contentTypeInfo: contentTypeInfo || new ContentTypeInfo(),
};
}
}
Expand Down Expand Up @@ -352,8 +364,8 @@ export const DEFAULT_PROJECT: ProjectState = new ProjectState(
new ProjectPage('CSV Discovery Example', [
new LiteralPanelInfo(
'Raw CSV Text',
'csv',
'name,age\nMorgan,12\nJames,17'
'name,age\nMorgan,12\nJames,17',
new ContentTypeInfo('text/csv')
),
new SQLPanelInfo(
'Transform with SQL',
Expand Down
45 changes: 40 additions & 5 deletions shared/text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,32 @@ import * as CSV from 'papaparse';

import log from './log';

export type Parsers = { [type: string]: (a: ArrayBuffer) => Promise<any> };

export interface ContentTypeInfoPlusParsers {
additionalParsers?: Parsers;
type: string;
customLineRegexp?: string;
}

const APACHE2_ACCESS_RE =
/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>(?:[^\"]|\\.)*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>(?:[^\"]|\\.)*)" "(?<agent>(?:[^\"]|\\.)*)")?$/;
const APACHE2_ERROR_RE =
/^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])? \[client (?<client>[^\]]*)\] (?<message>.*)$/;
const NGINX_ACCESS_RE =
/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)"(?:\s+(?<http_x_forwarded_for>[^ ]+))?)?$/;
const SYSLOG_RFC3164_RE =
/^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/;
const SYSLOG_RFC5424_RE =
/\A\<(?<pri>[0-9]{1,3})\>[1-9]\d{0,2} (?<time>[^ ]+) (?<host>[!-~]{1,255}) (?<ident>[!-~]{1,48}) (?<pid>[!-~]{1,128}) (?<msgid>[!-~]{1,32}) (?<extradata>(?:\-|(?:\[.*?(?<!\\)\])+))(?: (?<message>.+))?\z/;

export function parseWithRegex(body: string, re: RegExp) {
return body
.split('\n')
.filter(Boolean)
.map((line) => re.exec(line).groups);
}

export function parseCSV(csvString: string) {
const csvWhole = CSV.parse(csvString);
const csv = csvWhole.data;
Expand Down Expand Up @@ -52,16 +78,13 @@ function trimExcelHeaders(ws: XLSX.WorkSheet) {
}
}

export type Parsers = { [type: string]: (a: ArrayBuffer) => Promise<any> };

export const XLSX_MIME_TYPE =
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet';

export async function parseArrayBuffer(
type: string,
{ type, additionalParsers, customLineRegexp }: ContentTypeInfoPlusParsers,
fileName: string,
body: ArrayBuffer,
additionalParsers?: Parsers
body: ArrayBuffer
) {
// I'm not sure body is actually always an arraybuffer.
if (!body || (body as any).length === 0 || body.byteLength === 0) {
Expand Down Expand Up @@ -97,6 +120,18 @@ export async function parseArrayBuffer(
}

switch (realType) {
case 'text/regexplines':
return parseWithRegex(bodyAsString(), new RegExp(customLineRegexp));
case 'text/syslogrfc3164':
return parseWithRegex(bodyAsString(), SYSLOG_RFC3164_RE);
case 'text/syslogrfc5424':
return parseWithRegex(bodyAsString(), SYSLOG_RFC5424_RE);
case 'text/apache2error':
return parseWithRegex(bodyAsString(), APACHE2_ERROR_RE);
case 'text/apache2access':
return parseWithRegex(bodyAsString(), APACHE2_ACCESS_RE);
case 'text/nginxaccess':
return parseWithRegex(bodyAsString(), NGINX_ACCESS_RE);
case 'text/csv':
return parseCSV(bodyAsString());
case 'application/json':
Expand Down
88 changes: 63 additions & 25 deletions ui/ContentTypePicker.tsx
Original file line number Diff line number Diff line change
@@ -1,40 +1,78 @@
import * as React from 'react';

import { MODE } from '../shared/constants';
import { XLSX_MIME_TYPE } from '../shared/text';
import { ContentTypeInfo } from '../shared/state';

import { Select } from './component-library/Select';
import { Textarea } from './component-library/Textarea';

export function ContentTypePicker({
value,
onChange,
disableAutoDetect,
inMemoryEval,
}: {
value: string;
onChange: (v: string) => void;
value: ContentTypeInfo;
onChange: (v: ContentTypeInfo) => void;
disableAutoDetect?: boolean;
inMemoryEval: boolean;
}) {
return (
<div className="form-row">
<Select
label="Content Type"
value={value}
onChange={(type: string) => {
if (type === 'null') {
type = '';
}
<React.Fragment>
<div className="form-row">
<Select
label="Content Type"
value={value.type}
onChange={(type: string) => {
if (type === 'null') {
type = '';
}

return onChange(type);
}}
>
<option value="null">Auto-detect</option>
<option value="text/csv">CSV</option>
<option value={XLSX_MIME_TYPE}>Excel</option>
{MODE !==
'browser' /* This is getting ridiculous. Really need to find a plugin architecture */ && (
<option value="parquet">Parquet</option>
)}
<option value="application/json">JSON</option>
<option value="application/jsonlines">Newline-delimited JSON</option>
</Select>
</div>
return onChange({ ...value, type });
}}
>
{!disableAutoDetect && <option value="null">Auto-detect</option>}
<optgroup label="Data">
<option value="text/csv">CSV</option>
<option value={XLSX_MIME_TYPE}>Excel</option>
{inMemoryEval /* This is getting ridiculous. Really need to find a plugin architecture */ && (
<option value="parquet">Parquet</option>
)}
<option value="application/json">JSON</option>
</optgroup>
<optgroup label="Logs">
<option value="text/apache2access">Apache2 Access Logs</option>
<option value="text/apache2error">Apache2 Error Logs</option>
<option value="text/nginxaccess">Nginx Access Logs</option>
<option value="text/syslogrfc3164">Syslog RFC-3164</option>
<option value="text/syslogrfc5424">Syslog RFC-5424</option>
<option value="application/jsonlines">
Newline-delimited JSON
</option>
<option value="text/regexplines">Newline-delimited Regex</option>
</optgroup>
</Select>
</div>
{value.type === 'text/regexplines' && (
<div className="form-row">
<Textarea
value={value.customLineRegexp}
onChange={(customLineRegexp: string) =>
onChange({ ...value, customLineRegexp })
}
/>
<p>
Enter a custom ECMAScript-flavor regular expression to be evaluated
for each line. Only named capture groups will be returned. For
example:{' '}
<code>
{
'^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) [(?<time>[^]]*)] "(?<method>S+)(?: +(?<path>[^"]*?)(?: +S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^"]*)" "(?<agent>[^"]*)"(?:s+(?<http_x_forwarded_for>[^ ]+))?)?$'
}
</code>
</p>
</div>
)}
</React.Fragment>
);
}
Loading