diff --git a/changelog.d/8723.feature b/changelog.d/8723.feature new file mode 100644 index 000000000000..067f16949ac7 --- /dev/null +++ b/changelog.d/8723.feature @@ -0,0 +1 @@ +Add an admin API for local server media statistics. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/statistics.md b/docs/admin_api/statistics.md index d398a120fb7e..119003dc8060 100644 --- a/docs/admin_api/statistics.md +++ b/docs/admin_api/statistics.md @@ -1,3 +1,50 @@ +# Server media usage statistics + +Returns information about all local media usage of this server. +Gives the possibility to filter them by time. + +The API is: + +``` +GET /_synapse/admin/v1/statistics/server/media +``` + +To use it, you will need to authenticate by providing an `access_token` +for a server admin: see [README.rst](README.rst). + +A response body like the following is returned: + +```json +{ + "media_count": 3, + "media_length": 210 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint +again with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more +reports to paginate through. + +**Parameters** + +The following parameters should be set in the URL: + +* `from_ts` - string representing a positive integer - Considers only + files created at this timestamp or later. Unix timestamp in ms. +* `until_ts` - string representing a positive integer - Considers only + files created at this timestamp or earlier. Unix timestamp in ms. + + +**Response** + +The following fields are returned in the JSON response body: + +* `media_count` - integer - Number of uploaded media. +* `media_length` - integer - Size of uploaded media in bytes. + + # Users' media usage statistics Returns information about all local media usage of users. Gives the diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index 2a4f7a1740b5..d03c81fd396a 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -47,7 +47,10 @@ ShutdownRoomRestServlet, ) from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet -from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet +from synapse.rest.admin.statistics import ( + ServerMediaStatisticsRestServlet, + UserMediaStatisticsRestServlet, +) from synapse.rest.admin.users import ( AccountValidityRenewServlet, DeactivateAccountRestServlet, @@ -229,6 +232,7 @@ def register_servlets(hs, http_server): DevicesRestServlet(hs).register(http_server) DeleteDevicesRestServlet(hs).register(http_server) UserMediaStatisticsRestServlet(hs).register(http_server) + ServerMediaStatisticsRestServlet(hs).register(http_server) EventReportDetailRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) PushersRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py index f2490e382dcf..c78eaba69445 100644 --- a/synapse/rest/admin/statistics.py +++ b/synapse/rest/admin/statistics.py @@ -120,3 +120,45 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: ret["next_token"] = start + len(users_media) return 200, ret + + +class ServerMediaStatisticsRestServlet(RestServlet): + """ + Get statistics about uploaded media on this server. + """ + + PATTERNS = admin_patterns("/statistics/server/media$") + + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + from_ts = parse_integer(request, "from_ts", default=0) + if from_ts < 0: + raise SynapseError( + 400, + "Query parameter from_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + until_ts = parse_integer(request, "until_ts") + if until_ts is not None: + if until_ts < 0: + raise SynapseError( + 400, + "Query parameter until_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + if until_ts <= from_ts: + raise SynapseError( + 400, + "Query parameter until_ts must be greater than from_ts.", + errcode=Codes.INVALID_PARAM, + ) + + count, length = await self.store.get_server_media_usage(from_ts, until_ts) + return 200, {"media_count": count, "media_length": length} diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 0cdb3ec1f7ff..433d1a023d23 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -1009,3 +1009,48 @@ def get_users_media_usage_paginate_txn(txn): return await self.db_pool.runInteraction( "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn ) + + async def get_server_media_usage( + self, from_ts: Optional[int] = None, until_ts: Optional[int] = None, + ) -> Tuple[int, int]: + """Function to retrieve size and number of uploaded local media. + + Args: + from_ts: request only media that are created later than this timestamp (ms) + until_ts: request only media that are created earlier than this timestamp (ms) + Returns: + A tuple of integer representing the total number of + media and size that exist given this query + """ + + def get_server_media_usage_txn(txn): + filters = [] + args = [] + + if from_ts: + filters.append("created_ts >= ?") + args.extend([from_ts]) + if until_ts: + filters.append("created_ts <= ?") + args.extend([until_ts]) + + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + + sql = """ + SELECT + COUNT(*) as media_count, + COALESCE(SUM(media_length), 0) as media_length + FROM local_media_repository + {where_clause} + """.format( + where_clause=where_clause, + ) + + txn.execute(sql, args) + row = txn.fetchone() + + return row[0], row[1] + + return await self.db_pool.runInteraction( + "get_server_media_usage_txn", get_server_media_usage_txn + ) diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py index 816683a61235..f77a889c048f 100644 --- a/tests/rest/admin/test_statistics.py +++ b/tests/rest/admin/test_statistics.py @@ -24,12 +24,47 @@ from tests import unittest -class UserMediaStatisticsTestCase(unittest.HomeserverTestCase): +class StatisticsBase(unittest.HomeserverTestCase): servlets = [ synapse.rest.admin.register_servlets, login.register_servlets, ] + def _create_users_with_media(self, number_users: int, media_per_user: int): + """ + Create a number of users with a number of media + Args: + number_users: Number of users to be created + media_per_user: Number of media to be created for each user + """ + for i in range(number_users): + self.register_user("foo_user_%s" % i, "pass", displayname="bar_user_%s" % i) + user_tok = self.login("foo_user_%s" % i, "pass") + self._create_media(user_tok, media_per_user) + + def _create_media(self, user_token: str, number_media: int): + """ + Create a number of media for a specific user + Args: + user_token: Access token of the user + number_media: Number of media to be created for the user + """ + upload_resource = self.media_repo.children[b"upload"] + for i in range(number_media): + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + self.helper.upload_media( + upload_resource, image_data, tok=user_token, expect_code=200 + ) + + +class UserMediaStatisticsTestCase(StatisticsBase): def prepare(self, reactor, clock, hs): self.store = hs.get_datastore() self.media_repo = hs.get_media_repository_resource() @@ -414,39 +449,6 @@ def test_search_term(self): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(channel.json_body["total"], 0) - def _create_users_with_media(self, number_users: int, media_per_user: int): - """ - Create a number of users with a number of media - Args: - number_users: Number of users to be created - media_per_user: Number of media to be created for each user - """ - for i in range(number_users): - self.register_user("foo_user_%s" % i, "pass", displayname="bar_user_%s" % i) - user_tok = self.login("foo_user_%s" % i, "pass") - self._create_media(user_tok, media_per_user) - - def _create_media(self, user_token: str, number_media: int): - """ - Create a number of media for a specific user - Args: - user_token: Access token of the user - number_media: Number of media to be created for the user - """ - upload_resource = self.media_repo.children[b"upload"] - for i in range(number_media): - # file size is 67 Byte - image_data = unhexlify( - b"89504e470d0a1a0a0000000d4948445200000001000000010806" - b"0000001f15c4890000000a49444154789c63000100000500010d" - b"0a2db40000000049454e44ae426082" - ) - - # Upload some media into the room - self.helper.upload_media( - upload_resource, image_data, tok=user_token, expect_code=200 - ) - def _check_fields(self, content: List[Dict[str, Any]]): """Checks that all attributes are present in content Args: @@ -483,3 +485,157 @@ def _order_test( returned_order = [row["user_id"] for row in channel.json_body["users"]] self.assertListEqual(expected_user_list, returned_order) self._check_fields(channel.json_body["users"]) + + +class ServerMediaStatisticsTestCase(StatisticsBase): + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.media_repo = hs.get_media_repository_resource() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.url = "/_synapse/admin/v1/statistics/server/media" + + def test_no_auth(self): + """ + Try to list users without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error 403 is returned. + """ + request, channel = self.make_request( + "GET", self.url, json.dumps({}), access_token=self.other_user_tok, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + # negative from_ts + request, channel = self.make_request( + "GET", self.url + "?from_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative until_ts + request, channel = self.make_request( + "GET", self.url + "?until_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # until_ts smaller from_ts + request, channel = self.make_request( + "GET", + self.url + "?from_ts=10&until_ts=5", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_has_media(self): + """ + Tests that a normal lookup for statistics is successfully + if server has media + """ + self._create_users_with_media(5, 3) + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(15, channel.json_body["media_count"]) + self.assertEqual(15 * 67, channel.json_body["media_length"]) + + def test_no_media(self): + """ + Tests that a normal lookup for statistics is successfully + if server has no media + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["media_count"]) + self.assertEqual(0, channel.json_body["media_length"]) + + def test_from_until_ts(self): + """ + Testing filter by time with parameters `from_ts` and `until_ts` + """ + # create media earlier than `ts1` to ensure that `from_ts` is working + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts1 = self.clock.time_msec() + + # list all media when filter is not set + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["media_count"], 3) + self.assertGreater(channel.json_body["media_length"], 0) + + # filter media starting at `ts1` after creating first media + # result is 0 + request, channel = self.make_request( + "GET", self.url + "?from_ts=%s" % (ts1,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["media_count"], 0) + self.assertEqual(channel.json_body["media_length"], 0) + + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts2 = self.clock.time_msec() + # create media after `ts2` to ensure that `until_ts` is working + self._create_media(self.other_user_tok, 3) + + # filter media between `ts1` and `ts2` + request, channel = self.make_request( + "GET", + self.url + "?from_ts=%s&until_ts=%s" % (ts1, ts2), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["media_count"], 3) + self.assertGreater(channel.json_body["media_length"], 0) + + # filter media until `ts2` and earlier + request, channel = self.make_request( + "GET", self.url + "?until_ts=%s" % (ts2,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["media_count"], 6) + self.assertGreater(channel.json_body["media_length"], 0)