Skip to content

Commit 4622fc6

Browse files
author
Joseph Sirianni
authored
Implement operator for parsing absolute uri, relative uri, and query string (#12)
1 parent bbb4f97 commit 4622fc6

File tree

3 files changed

+956
-0
lines changed

3 files changed

+956
-0
lines changed

docs/operators/uri_parser.md

+180
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
## `uri_parser` operator
2+
3+
The `uri_parser` operator parses the string-type field selected by `parse_from` as [URI](https://tools.ietf.org/html/rfc3986).
4+
5+
`uri_parser` can handle:
6+
- Absolute URI
7+
- `https://google.com/v1/app?user_id=2&uuid=57b4dad2-063c-4965-941c-adfd4098face`
8+
- Relative URI
9+
- `/app?user=admin`
10+
- Query string
11+
- `?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev`
12+
- Query string must start with a question mark
13+
14+
### Configuration Fields
15+
16+
| Field | Default | Description |
17+
| --- | --- | --- |
18+
| `id` | `uri_parser` | A unique identifier for the operator |
19+
| `output` | Next in pipeline | The connected operator(s) that will receive all outbound entries |
20+
| `parse_from` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
21+
| `parse_to` | $ | A [field](/docs/types/field.md) that indicates the field to be parsed as JSON |
22+
| `preserve_to` | | Preserves the unparsed value at the specified [field](/docs/types/field.md) |
23+
| `on_error` | `send` | The behavior of the operator if it encounters an error. See [on_error](/docs/types/on_error.md) |
24+
| `if` | | An [expression](/docs/types/expression.md) that, when set, will be evaluated to determine whether this operator should be used for the given entry. This allows you to do easy conditional parsing without branching logic with routers. |
25+
26+
27+
### Output Fields
28+
29+
The following fields are returned. Empty fields are not returned.
30+
31+
| Field | Type | Example | Description |
32+
| --- | --- | --- | --- |
33+
| scheme | `string` | `"http"` | [URI Scheme](https://www.iana.org/assignments/uri-schemes/uri-schemes.xhtml). HTTP, HTTPS, FTP, etc. |
34+
| user | `string` | `"dev"` | [Userinfo](https://tools.ietf.org/html/rfc3986#section-3.2) username. Password is always ignored. |
35+
| host | `string` | `"golang.org"` | The [hostname](https://tools.ietf.org/html/rfc3986#section-3.2.2) such as `www.example.com`, `example.com`, `example`. A scheme is required in order to parse the `host` field. |
36+
| port | `string` | `"8443"` | The [port](https://tools.ietf.org/html/rfc3986#section-3.2.3) the request is sent to. A scheme is required in order to parse the `port` field. |
37+
| path | `string` | `"/v1/app"` | URI request [path](https://tools.ietf.org/html/rfc3986#section-3.3). |
38+
| query | `map[string][]string` | `"query":{"user":["admin"]}` | Parsed URI [query string](https://tools.ietf.org/html/rfc3986#section-3.4). |
39+
40+
41+
### Example Configurations
42+
43+
44+
#### Parse the field `message` as absolute URI
45+
46+
Configuration:
47+
```yaml
48+
- type: uri_parser
49+
parse_from: message
50+
```
51+
52+
<table>
53+
<tr><td> Input record </td> <td> Output record </td></tr>
54+
<tr>
55+
<td>
56+
57+
```json
58+
{
59+
"timestamp": "",
60+
"record": {
61+
"message": "https://dev:[email protected]/app?user_id=2&token=001"
62+
}
63+
}
64+
```
65+
66+
</td>
67+
<td>
68+
69+
```json
70+
{
71+
"timestamp": "",
72+
"record": {
73+
"host": "google.com",
74+
"path": "/app",
75+
"query": {
76+
"user_id": [
77+
"2"
78+
],
79+
"token": [
80+
"001"
81+
]
82+
},
83+
"scheme": "https",
84+
"user": "dev"
85+
}
86+
}
87+
```
88+
89+
</td>
90+
</tr>
91+
</table>
92+
93+
#### Parse the field `message` as relative URI
94+
95+
Configuration:
96+
```yaml
97+
- type: uri_parser
98+
parse_from: message
99+
```
100+
101+
<table>
102+
<tr><td> Input record </td> <td> Output record </td></tr>
103+
<tr>
104+
<td>
105+
106+
```json
107+
{
108+
"timestamp": "",
109+
"record": {
110+
"message": "/app?user=admin"
111+
}
112+
}
113+
```
114+
115+
</td>
116+
<td>
117+
118+
```json
119+
{
120+
"timestamp": "",
121+
"record": {
122+
"path": "/app",
123+
"query": {
124+
"user": [
125+
"admin"
126+
]
127+
}
128+
}
129+
}
130+
```
131+
132+
</td>
133+
</tr>
134+
</table>
135+
136+
#### Parse the field `query` as URI query string
137+
138+
Configuration:
139+
```yaml
140+
- type: uri_parser
141+
parse_from: query
142+
```
143+
144+
<table>
145+
<tr><td> Input record </td> <td> Output record </td></tr>
146+
<tr>
147+
<td>
148+
149+
```json
150+
{
151+
"timestamp": "",
152+
"record": {
153+
"query": "?request=681e6fc4-3314-4ccc-933e-4f9c9f0efd24&env=stage&env=dev"
154+
}
155+
}
156+
```
157+
158+
</td>
159+
<td>
160+
161+
```json
162+
{
163+
"timestamp": "",
164+
"record": {
165+
"query": {
166+
"env": [
167+
"stage",
168+
"dev"
169+
],
170+
"request": [
171+
"681e6fc4-3314-4ccc-933e-4f9c9f0efd24"
172+
]
173+
}
174+
}
175+
}
176+
```
177+
178+
</td>
179+
</tr>
180+
</table>

operator/builtin/parser/uri/uri.go

+166
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
// Copyright The OpenTelemetry Authors
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package uri
16+
17+
import (
18+
"context"
19+
"fmt"
20+
"net/url"
21+
"strings"
22+
23+
"github.com/open-telemetry/opentelemetry-log-collection/entry"
24+
"github.com/open-telemetry/opentelemetry-log-collection/operator"
25+
"github.com/open-telemetry/opentelemetry-log-collection/operator/helper"
26+
)
27+
28+
func init() {
29+
operator.Register("uri_parser", func() operator.Builder { return NewURIParserConfig("") })
30+
}
31+
32+
// NewURIParserConfig creates a new uri parser config with default values.
33+
func NewURIParserConfig(operatorID string) *URIParserConfig {
34+
return &URIParserConfig{
35+
ParserConfig: helper.NewParserConfig(operatorID, "uri_parser"),
36+
}
37+
}
38+
39+
// URIParserConfig is the configuration of a uri parser operator.
40+
type URIParserConfig struct {
41+
helper.ParserConfig `yaml:",inline"`
42+
}
43+
44+
// Build will build a uri parser operator.
45+
func (c URIParserConfig) Build(context operator.BuildContext) ([]operator.Operator, error) {
46+
parserOperator, err := c.ParserConfig.Build(context)
47+
if err != nil {
48+
return nil, err
49+
}
50+
51+
uriParser := &URIParser{
52+
ParserOperator: parserOperator,
53+
}
54+
55+
return []operator.Operator{uriParser}, nil
56+
}
57+
58+
// URIParser is an operator that parses a uri.
59+
type URIParser struct {
60+
helper.ParserOperator
61+
}
62+
63+
// Process will parse an entry.
64+
func (u *URIParser) Process(ctx context.Context, entry *entry.Entry) error {
65+
return u.ParserOperator.ProcessWith(ctx, entry, u.parse)
66+
}
67+
68+
// parse will parse a uri from a field and attach it to an entry.
69+
func (u *URIParser) parse(value interface{}) (interface{}, error) {
70+
switch m := value.(type) {
71+
case string:
72+
return parseURI(m)
73+
case []byte:
74+
return parseURI(string(m))
75+
default:
76+
return nil, fmt.Errorf("type '%T' cannot be parsed as URI", value)
77+
}
78+
}
79+
80+
// parseURI takes an absolute or relative uri and returns the parsed values.
81+
func parseURI(value string) (map[string]interface{}, error) {
82+
m := make(map[string]interface{})
83+
84+
if strings.HasPrefix(value, "?") {
85+
// remove the query string '?' prefix before parsing
86+
v, err := url.ParseQuery(value[1:])
87+
if err != nil {
88+
return nil, err
89+
}
90+
return queryToMap(v, m), nil
91+
}
92+
93+
x, err := url.ParseRequestURI(value)
94+
if err != nil {
95+
return nil, err
96+
}
97+
return urlToMap(x, m), nil
98+
}
99+
100+
// urlToMap converts a url.URL to a map, excludes any values that are not set.
101+
func urlToMap(p *url.URL, m map[string]interface{}) map[string]interface{} {
102+
scheme := p.Scheme
103+
if scheme != "" {
104+
m["scheme"] = scheme
105+
}
106+
107+
user := p.User.Username()
108+
if user != "" {
109+
m["user"] = user
110+
}
111+
112+
host := p.Hostname()
113+
if host != "" {
114+
m["host"] = host
115+
}
116+
117+
port := p.Port()
118+
if port != "" {
119+
m["port"] = port
120+
}
121+
122+
path := p.EscapedPath()
123+
if path != "" {
124+
m["path"] = path
125+
}
126+
127+
return queryToMap(p.Query(), m)
128+
}
129+
130+
// queryToMap converts a query string url.Values to a map.
131+
func queryToMap(query url.Values, m map[string]interface{}) map[string]interface{} {
132+
// no-op if query is empty, do not create the key m["query"]
133+
if len(query) <= 0 {
134+
return m
135+
}
136+
137+
/* 'parameter' will represent url.Values
138+
map[string]interface{}{
139+
"parameter-a": []interface{}{
140+
"a",
141+
"b",
142+
},
143+
"parameter-b": []interface{}{
144+
"x",
145+
"y",
146+
},
147+
}
148+
*/
149+
parameters := map[string]interface{}{}
150+
for param, values := range query {
151+
parameters[param] = queryParamValuesToMap(values)
152+
}
153+
m["query"] = parameters
154+
return m
155+
}
156+
157+
158+
// queryParamValuesToMap takes query string parameter values and
159+
// returns an []interface populated with the values
160+
func queryParamValuesToMap(values []string) []interface{} {
161+
v := make([]interface{}, len(values))
162+
for i, value := range values {
163+
v[i] = value
164+
}
165+
return v
166+
}

0 commit comments

Comments
 (0)