Skip to content

Feat/tunnel modeling merge wip #119

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 0 additions & 34 deletions .github/ISSUE_TEMPLATE/data-quality-issue.md

This file was deleted.

40 changes: 40 additions & 0 deletions oonidata/src/oonidata/models/dataformats.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,7 @@ class DNSQuery(BaseModel):
dial_id: Optional[int] = None



@add_slots
@dataclass
class TCPConnectStatus(BaseModel):
Expand Down Expand Up @@ -368,3 +369,42 @@ class NetworkEvent(BaseModel):
# Deprecated fields
dial_id: Optional[int] = None
conn_id: Optional[int] = None


@add_slots
@dataclass
class OpenVPNHandshake(BaseModel):
handshake_time: float
endpoint: str
ip: str # we might want to make this optional, and scrub in favor of ASN/prefix
port: int
transport: str
provider: str
t0: float
t: float
openvpn_options: Optional[Dict[str, str]] = None
tags: Optional[List[str]] = None
transaction_id: Optional[str] = None
failure: Failure = None

@add_slots
@dataclass
class OpenVPNPacket(BaseModel):
operation: str
opcode: str
id: int
payload_size: int
acks: Optional[List[int]] = None
send_attempts: Optional[int] = None


@add_slots
@dataclass
class OpenVPNNetworkEvent(BaseModel):
operation: str
stage: str
t: float
tags: Optional[List[str]] = None
packet: Optional[OpenVPNPacket] = None
transaction_id: Optional[int] = None

3 changes: 3 additions & 0 deletions oonidata/src/oonidata/models/nettests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .http_invalid_request_line import HTTPInvalidRequestLine
from .http_header_field_manipulation import HTTPHeaderFieldManipulation
from .echcheck import ECHCheck
from .openvpn import OpenVPN

SUPPORTED_CLASSES = [
HTTPHeaderFieldManipulation,
Expand All @@ -28,6 +29,7 @@
Signal,
FacebookMessenger,
Whatsapp,
OpenVPN,
BaseMeasurement,
ECHCheck,
]
Expand All @@ -44,6 +46,7 @@
Signal,
FacebookMessenger,
Whatsapp,
OpenVPN,
BaseMeasurement,
]

Expand Down
36 changes: 36 additions & 0 deletions oonidata/src/oonidata/models/nettests/openvpn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from dataclasses import dataclass
from typing import List, Optional

from ..base import BaseModel

from oonidata.compat import add_slots
from oonidata.models.dataformats import (
BaseTestKeys,
Failure,
TCPConnect,
OpenVPNHandshake,
OpenVPNNetworkEvent,
)
from oonidata.models.nettests.base_measurement import BaseMeasurement


@add_slots
@dataclass
class OpenVPNTestKeys(BaseTestKeys):
success: Optional[bool] = False
failure: Failure = None

network_events: Optional[List[OpenVPNNetworkEvent]] = None
tcp_connect: Optional[List[TCPConnect]] = None
openvpn_handshake: Optional[List[OpenVPNHandshake]] = None

bootstrap_time: Optional[float] = None
tunnel: str = None


@add_slots
@dataclass
class OpenVPN(BaseMeasurement):
__test_name__ = "openvpn"

test_keys: OpenVPNTestKeys
41 changes: 41 additions & 0 deletions oonidata/src/oonidata/models/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass, field
from datetime import datetime
from typing import (
Dict,
Optional,
List,
Tuple,
Expand Down Expand Up @@ -388,3 +389,43 @@ class HTTPMiddleboxObservation:
hfm_diff: Optional[str] = None
hfm_failure: Optional[str] = None
hfm_success: Optional[bool] = None


@table_model(
table_name="obs_tunnel",
table_index=("measurement_uid", "observation_idx", "measurement_start_time"),
)
@dataclass
class TunnelObservation:
measurement_meta: MeasurementMeta
probe_meta: ProbeMeta

observation_idx: int

ip: str
port: int
transport: str

# label can be a fqdn or a human readable lable used to group the endpoint
label: str

# definition of success will need to change when/if we're able to gather metrics
# through the tunnel.
success: bool
failure: Failure

protocol: str

# indicates obfuscation or modifications from the main protocol family.
variant: str = ""

# any metadata about the providers behind the endpoints.
provider: str = ""

# time it took to perform the bootstrap of the protocol
bootstrap_time: float = -1

# timing list
t0: float = -1
timing_map: Dict[str, float] = field(default_factory=dict)
failure_map: Dict[str, str] = field(default_factory=dict)
6 changes: 3 additions & 3 deletions oonipipeline/Design.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ needed.

### Expose a queriable low level view on measurements

Currently it's only possible to query measurement at a granuliaty which is as
fine a measurement.
Currently it's only possible to query measurement at a granularity which is as
fine as a measurement.

This means that it's only possible to answer questions which the original
designer of the experiment had already throught of.
designer of the experiment had already thought of.

On the other hand the new pipeline breaks down measurements into distinct
observations (think 1 DNS query and answer or 1 TLS handshake towards a
Expand Down
2 changes: 1 addition & 1 deletion oonipipeline/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ For historical context, these are the major revisions:
- `v1` - OONI Pipeline based on custom CLI scripts using mongodb as a backend. Used until ~2015.
- `v2` - OONI Pipeline based on [luigi](https://luigi.readthedocs.io/en/stable/). Used until ~2017.
- `v3` - OONI Pipeline based on [airflow](https://airflow.apache.org/). Used until ~2020.
- `v4` - OONI Pipeline basedon custom script and systemd units (aka fastpath). Currently in use in production.
- `v4` - OONI Pipeline based on custom script and systemd units (aka fastpath). Currently in use in production.
- `v5` - Next generation OONI Pipeline. What this readme is relevant to. Expected to become in production by Q4 2024.

## Setup
Expand Down
5 changes: 5 additions & 0 deletions oonipipeline/src/oonipipeline/db/create_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
WebControlObservation,
WebObservation,
HTTPMiddleboxObservation,
TunnelObservation,
)

from .connections import ClickhouseConnection
Expand Down Expand Up @@ -89,6 +90,9 @@ def typing_to_clickhouse(t: Any) -> str:
if t in (Mapping[str, str], Dict[str, str]):
return "Map(String, String)"

if t in (Mapping[str, float], Dict[str, float]):
return "Map(String, Float64)"

# TODO(art): eventually all the above types should be mapped using a similar pattern
child_type, parent_type = typing.get_args(t)
is_nullable = False
Expand Down Expand Up @@ -165,6 +169,7 @@ def format_create_query(
table_models = [
WebObservation,
WebControlObservation,
TunnelObservation,
HTTPMiddleboxObservation,
]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
NetworkEvent,
TCPConnect,
TLSHandshake,
OpenVPNHandshake,
OpenVPNNetworkEvent,
maybe_binary_data_to_bytes,
)
from oonidata.models.nettests.base_measurement import BaseMeasurement
Expand All @@ -35,6 +37,7 @@
TCPObservation,
TLSObservation,
WebObservation,
TunnelObservation,
)
from oonidata.datautils import (
InvalidCertificateChain,
Expand Down Expand Up @@ -731,7 +734,6 @@ def make_measurement_meta(msmt: BaseMeasurement, bucket_date: str) -> Measuremen
measurement_start_time=measurement_start_time,
)


class MeasurementTransformer:
"""
MeasurementTransformer is responsible for taking a measurement and
Expand Down Expand Up @@ -883,7 +885,7 @@ def consume_web_observations(

It will attempt to map them via the transaction_id or ip:port tuple.

Any observation that cannot be mapped will be returned inside of it's
Any observation that cannot be mapped will be returned inside of its
own WebObservation with all other columns set to None.
"""
web_obs_list: List[WebObservation] = []
Expand Down
107 changes: 107 additions & 0 deletions oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
from datetime import datetime
from typing import Dict, List, Optional, Tuple
from oonidata.models.dataformats import OpenVPNHandshake, OpenVPNNetworkEvent
from oonidata.models.nettests import OpenVPN
from oonidata.models.observations import (
TunnelObservation,
WebObservation,
)

from ..measurement_transformer import MeasurementTransformer, normalize_failure


def count_key_exchange_packets(network_events: List[OpenVPNNetworkEvent]) -> int:
"""
return number of packets exchanged in the SENT_KEY state
"""
n = 0
for evt in network_events:
if evt.stage == "SENT_KEY" and evt.operation.startswith("packet_"):
n += 1
return n


def make_openvpn_timing_map(
network_events: List[OpenVPNNetworkEvent],
) -> Dict[str, float]:

timings = {}
# TODO(ain): condition to test version >= xyz
if len(network_events) != 0:
for evt in network_events:
if evt.packet is not None:
if evt.packet.opcode == "P_CONTROL_HARD_RESET_CLIENT_V2":
timings["openvpn_handshake_hr_client"] = evt.t
elif evt.packet.opcode == "P_CONTROL_HARD_RESET_SERVER_V2":
timings["openvpn_handshake_hr_server"] = evt.t
elif evt.tags and "client_hello" in evt.tags:
timings["openvpn_handshake_clt_hello"] = evt.t
elif evt.tags and "server_hello" in evt.tags:
timings["openvpn_handshake_srv_hello"] = evt.t
if evt.operation == "state" and evt.stage == "GOT_KEY":
timings["openvpn_handshake_got_keys"] = evt.t
if evt.operation == "state" and evt.stage == "GENERATED_KEYS":
timings["openvpn_handshake_gen_keys"] = evt.t

timings["openvpn_handshake_key_exchg_n"] = count_key_exchange_packets(
network_events
)

return timings


class OpenVPNTransformer(MeasurementTransformer):

def make_observations(
self, msmt: OpenVPN
) -> Tuple[List[TunnelObservation], List[WebObservation]]:
if not msmt.test_keys:
return ([], [])

Check warning on line 59 in oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py

View check run for this annotation

Codecov / codecov/patch

oonipipeline/src/oonipipeline/transforms/nettests/openvpn.py#L59

Added line #L59 was not covered by tests

# def make_openvpn_observations(
# self,
# tcp_observations: Optional[List[TCPConnect]],
# openvpn_handshakes: List[OpenVPNHandshake],
# network_events: Optional[List[OpenVPNNetworkEvent]],
# bootstrap_time: float,
# ) -> List[TunnelObservation]:
# """
# Returns a list of OpenVPNObservations by mapping all related
# TCPObservations, OpenVPNNetworkevents and OpenVPNHandshakes.
# """

tunnel_observations: List[TunnelObservation] = []

assert msmt.test_keys is not None
assert msmt.test_keys.openvpn_handshake is not None
idx = 1
for hs in msmt.test_keys.openvpn_handshake:
to = TunnelObservation(
measurement_meta=self.measurement_meta,
probe_meta=self.probe_meta,
failure=normalize_failure(hs.failure),
success=hs.failure == None,
label="",
protocol="openvpn",
transport=hs.transport,
ip=hs.ip,
observation_idx=idx,
port=hs.port,
bootstrap_time=msmt.test_keys.bootstrap_time or -1,
)

to.timing_map = make_openvpn_timing_map(msmt.test_keys.network_events or [])
to.timing_map["handshake_t"] = hs.t
to.timing_map["handshake_t0"] = hs.t0
to.failure_map["handshake"] = hs.failure or ""
idx += 1

tunnel_observations.append(to)

web_observations = self.consume_web_observations(
dns_observations=[],
tcp_observations=self.make_tcp_observations(msmt.test_keys.tcp_connect),
tls_observations=[],
http_observations=[],
)
return (tunnel_observations, web_observations)
Loading