Skip to content

Commit 9d20db0

Browse files
author
Igor Polishchuk
committed
Merge pull request #2 from DataDog/master
Merge from upstream
2 parents c927479 + 570b0f5 commit 9d20db0

10 files changed

+337
-111
lines changed

checks.d/http_check.py

+131-40
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,73 @@
11
# stdlib
2+
from datetime import datetime
3+
import os.path
24
import socket
5+
import ssl
36
import time
47
from urlparse import urlparse
58

9+
# 3rd party
10+
from httplib2 import Http, HttpLib2Error
11+
import tornado
12+
613
# project
714
from checks.network_checks import NetworkCheck, Status, EventType
15+
from config import _is_affirmative
816
from util import headers as agent_headers
917

10-
# 3rd party
11-
from httplib2 import Http, HttpLib2Error
1218

13-
class HTTPCheck(NetworkCheck):
19+
def get_ca_certs_path():
20+
"""
21+
Get a path to the trusted certificates of the system
22+
"""
23+
CA_CERTS = [
24+
'/opt/datadog-agent/embedded/ssl/certs/cacert.pem',
25+
os.path.join(os.path.dirname(tornado.__file__), 'ca-certificates.crt'),
26+
'/etc/ssl/certs/ca-certificates.crt',
27+
]
28+
29+
for f in CA_CERTS:
30+
if os.path.exists(f):
31+
return f
32+
return None
33+
1434

35+
class HTTPCheck(NetworkCheck):
1536
SOURCE_TYPE_NAME = 'system'
16-
SERVICE_CHECK_PREFIX = 'http_check'
37+
SC_STATUS = 'http_check'
38+
SC_SSL_CERT = 'http_check.ssl_cert'
39+
40+
def __init__(self, name, init_config, agentConfig, instances):
41+
self.ca_certs = init_config.get('ca_certs', get_ca_certs_path())
42+
NetworkCheck.__init__(self, name, init_config, agentConfig, instances)
1743

1844
def _load_conf(self, instance):
1945
# Fetches the conf
2046
tags = instance.get('tags', [])
2147
username = instance.get('username', None)
2248
password = instance.get('password', None)
2349
timeout = int(instance.get('timeout', 10))
24-
config_headers = instance.get('headers',{})
50+
config_headers = instance.get('headers', {})
2551
headers = agent_headers(self.agentConfig)
2652
headers.update(config_headers)
2753
url = instance.get('url', None)
28-
response_time = instance.get('collect_response_time', True)
54+
response_time = _is_affirmative(instance.get('collect_response_time', True))
2955
if url is None:
3056
raise Exception("Bad configuration. You must specify a url")
31-
include_content = instance.get('include_content', False)
32-
ssl = instance.get('disable_ssl_validation', True)
33-
return url, username, password, timeout, include_content, headers, response_time, tags, ssl
57+
include_content = _is_affirmative(instance.get('include_content', False))
58+
ssl = _is_affirmative(instance.get('disable_ssl_validation', True))
59+
ssl_expire = _is_affirmative(instance.get('check_certificate_expiration', True))
60+
61+
return url, username, password, timeout, include_content, headers, response_time, tags, ssl, ssl_expire
3462

3563
def _check(self, instance):
36-
addr, username, password, timeout, include_content, headers, response_time, tags, disable_ssl_validation = self._load_conf(instance)
64+
addr, username, password, timeout, include_content, headers, response_time, tags, disable_ssl_validation, ssl_expire = self._load_conf(instance)
3765
content = ''
3866
start = time.time()
67+
68+
service_checks = []
69+
resp = None
70+
3971
try:
4072
self.log.debug("Connecting to %s" % addr)
4173
if disable_ssl_validation and urlparse(addr)[0] == "https":
@@ -48,47 +80,76 @@ def _check(self, instance):
4880
except socket.timeout, e:
4981
length = int((time.time() - start) * 1000)
5082
self.log.info("%s is DOWN, error: %s. Connection failed after %s ms" % (addr, str(e), length))
51-
return Status.DOWN, "%s. Connection failed after %s ms" % (str(e), length)
83+
service_checks.append((
84+
self.SC_STATUS,
85+
Status.DOWN,
86+
"%s. Connection failed after %s ms" % (str(e), length)
87+
))
5288

5389
except HttpLib2Error, e:
5490
length = int((time.time() - start) * 1000)
5591
self.log.info("%s is DOWN, error: %s. Connection failed after %s ms" % (addr, str(e), length))
56-
return Status.DOWN, "%s. Connection failed after %s ms" % (str(e), length)
92+
service_checks.append((
93+
self.SC_STATUS,
94+
Status.DOWN,
95+
"%s. Connection failed after %s ms" % (str(e), length)
96+
))
5797

5898
except socket.error, e:
5999
length = int((time.time() - start) * 1000)
60100
self.log.info("%s is DOWN, error: %s. Connection failed after %s ms" % (addr, repr(e), length))
61-
return Status.DOWN, "Socket error: %s. Connection failed after %s ms" % (repr(e), length)
101+
service_checks.append((
102+
self.SC_STATUS,
103+
Status.DOWN,
104+
"Socket error: %s. Connection failed after %s ms" % (repr(e), length)
105+
))
62106

63107
except Exception, e:
64108
length = int((time.time() - start) * 1000)
65109
self.log.error("Unhandled exception %s. Connection failed after %s ms" % (str(e), length))
66110
raise
67111

68-
if response_time:
69-
# Stop the timer as early as possible
70-
running_time = time.time() - start
71-
# Store tags in a temporary list so that we don't modify the global tags data structure
72-
tags_list = []
73-
tags_list.extend(tags)
74-
tags_list.append('url:%s' % addr)
75-
self.gauge('network.http.response_time', running_time, tags=tags_list)
76-
77-
if int(resp.status) >= 400:
78-
self.log.info("%s is DOWN, error code: %s" % (addr, str(resp.status)))
79-
if not include_content:
80-
content = ''
81-
return Status.DOWN, (resp.status, resp.reason, content or '')
82-
83-
self.log.debug("%s is UP" % addr)
84-
return Status.UP, "UP"
85-
86-
def _create_status_event(self, status, msg, instance):
112+
# Only report this metric if the site is not down
113+
if response_time and not service_checks:
114+
# Stop the timer as early as possible
115+
running_time = time.time() - start
116+
# Store tags in a temporary list so that we don't modify the global tags data structure
117+
tags_list = list(tags)
118+
tags_list.append('url:%s' % addr)
119+
self.gauge('network.http.response_time', running_time, tags=tags_list)
120+
121+
if not service_checks:
122+
if resp is not None and int(resp.status) >= 400:
123+
self.log.info("%s is DOWN, error code: %s" % (addr, str(resp.status)))
124+
if not include_content:
125+
content = ''
126+
service_checks.append((
127+
self.SC_STATUS, Status.DOWN, (resp.status, resp.reason, content or '')
128+
))
129+
else:
130+
self.log.debug("%s is UP" % addr)
131+
service_checks.append((
132+
self.SC_STATUS, Status.UP, "UP"
133+
))
134+
135+
if ssl_expire and urlparse(addr)[0] == "https":
136+
status, msg = self.check_cert_expiration(instance)
137+
service_checks.append((
138+
self.SC_SSL_CERT, status, msg
139+
))
140+
141+
return service_checks
142+
143+
# FIXME: 5.3 drop this function
144+
def _create_status_event(self, sc_name, status, msg, instance):
145+
# Create only this deprecated event for old check
146+
if sc_name != self.SC_STATUS:
147+
return
87148
# Get the instance settings
88149
url = instance.get('url', None)
89150
name = instance.get('name', None)
90-
nb_failures = self.statuses[name].count(Status.DOWN)
91-
nb_tries = len(self.statuses[name])
151+
nb_failures = self.statuses[name][sc_name].count(Status.DOWN)
152+
nb_tries = len(self.statuses[name][sc_name])
92153
tags = instance.get('tags', [])
93154
tags_list = []
94155
tags_list.extend(tags)
@@ -153,11 +214,13 @@ def _create_status_event(self, status, msg, instance):
153214
"tags": tags_list
154215
}
155216

156-
def report_as_service_check(self, name, status, instance, msg=None):
157-
service_check_name = self.normalize(name, self.SERVICE_CHECK_PREFIX)
217+
def report_as_service_check(self, sc_name, status, instance, msg=None):
218+
instance_name = instance['name']
219+
service_check_name = self.normalize(instance_name, sc_name)
158220
url = instance.get('url', None)
221+
sc_tags = ['url:%s' % url]
159222

160-
if status == Status.DOWN:
223+
if sc_name == self.SC_STATUS:
161224
# format the HTTP response body into the event
162225
if isinstance(msg, tuple):
163226
code, reason, content = msg
@@ -168,12 +231,40 @@ def report_as_service_check(self, name, status, instance, msg=None):
168231

169232
msg = "%d %s\n\n%s" % (code, reason, content)
170233
msg = msg.rstrip()
171-
else:
172-
msg=None
173234

174235
self.service_check(service_check_name,
175236
NetworkCheck.STATUS_TO_SERVICE_CHECK[status],
176-
tags= ['url:%s' % url],
237+
tags=sc_tags,
177238
message=msg
178239
)
179240

241+
def check_cert_expiration(self, instance):
242+
warning_days = int(instance.get('days_warning', 14))
243+
url = instance.get('url')
244+
245+
o = urlparse(url)
246+
host = o.netloc
247+
248+
port = o.port or 443
249+
250+
try:
251+
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
252+
sock.connect((host, port))
253+
ssl_sock = ssl.wrap_socket(sock, cert_reqs=ssl.CERT_REQUIRED,
254+
ca_certs=self.ca_certs)
255+
cert = ssl_sock.getpeercert()
256+
257+
except Exception as e:
258+
return Status.DOWN, "%s" % (str(e))
259+
260+
exp_date = datetime.strptime(cert['notAfter'], "%b %d %H:%M:%S %Y %Z")
261+
days_left = exp_date - datetime.utcnow()
262+
263+
if days_left.days < 0:
264+
return Status.DOWN, "Expired by {0} days".format(days_left.days)
265+
266+
elif days_left.days < warning_days:
267+
return Status.WARNING, "This cert is almost expired, only {0} days left".format(days_left.days)
268+
269+
else:
270+
return Status.UP, "Days left: {0}".format(days_left.days)

checks.d/tcp_check.py

+7-5
Original file line numberDiff line numberDiff line change
@@ -93,13 +93,14 @@ def _check(self, instance):
9393
return Status.UP, "UP"
9494

9595

96-
def _create_status_event(self, status, msg, instance):
96+
# FIXME: 5.3 remove that
97+
def _create_status_event(self, sc_name, status, msg, instance):
9798
# Get the instance settings
9899
host = instance.get('host', None)
99100
port = instance.get('port', None)
100101
name = instance.get('name', None)
101-
nb_failures = self.statuses[name].count(Status.DOWN)
102-
nb_tries = len(self.statuses[name])
102+
nb_failures = self.statuses[name][sc_name].count(Status.DOWN)
103+
nb_tries = len(self.statuses[name][sc_name])
103104

104105
# Get a custom message that will be displayed in the event
105106
custom_message = instance.get('message', "")
@@ -151,8 +152,9 @@ def _create_status_event(self, status, msg, instance):
151152
"event_object": name,
152153
}
153154

154-
def report_as_service_check(self, name, status, instance, msg=None):
155-
service_check_name = self.normalize(name, self.SERVICE_CHECK_PREFIX)
155+
def report_as_service_check(self, sc_name, status, instance, msg=None):
156+
instance_name = instance['name']
157+
service_check_name = self.normalize(instance_name, self.SERVICE_CHECK_PREFIX)
156158
host = instance.get('host', None)
157159
port = instance.get('port', None)
158160

checks.d/zk.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def _send_command(self, command, host, port, timeout):
100100
try:
101101
# Connect to the zk client port and send the stat command
102102
sock.connect((host, port))
103-
sock.sendall('stat')
103+
sock.sendall(command)
104104

105105
# Read the response into a StringIO buffer
106106
chunk = sock.recv(chunk_size)

0 commit comments

Comments
 (0)