Skip to content

Commit 9bf12d8

Browse files
joerundedtrifiro
authored andcommitted
🔥 delete legacy tgis metrics
Signed-off-by: Joe Runde <[email protected]>
1 parent 396595f commit 9bf12d8

File tree

2 files changed

+2
-205
lines changed

2 files changed

+2
-205
lines changed

src/vllm_tgis_adapter/grpc/grpc_server.py

+2-47
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from grpc._cython.cygrpc import AbortError
1515
from grpc_health.v1 import health, health_pb2, health_pb2_grpc
1616
from grpc_reflection.v1alpha import reflection
17-
from vllm.engine.async_llm_engine import AsyncLLMEngine
1817
from vllm.engine.multiprocessing import MQEngineDeadError
1918
from vllm.entrypoints.openai.serving_completion import merge_async_iterators
2019
from vllm.inputs import TokensPrompt, token_inputs
@@ -34,11 +33,6 @@
3433
ExpDecayLengthPenaltyWarper,
3534
TypicalLogitsWarperWrapper,
3635
)
37-
from vllm_tgis_adapter.tgis_utils.metrics import (
38-
FailureReasonLabel,
39-
ServiceMetrics,
40-
TGISStatLogger,
41-
)
4236
from vllm_tgis_adapter.utils import to_list
4337

4438
from .adapters import AdapterStore, validate_adapters
@@ -84,7 +78,6 @@
8478
_F = TypeVar("_F", Callable, Coroutine)
8579

8680
logger = init_logger(__name__)
87-
service_metrics = ServiceMetrics()
8881

8982
ADD_SPECIAL_TOKENS: bool = os.getenv("ADD_SPECIAL_TOKENS", "true").lower() not in (
9083
"0",
@@ -110,8 +103,6 @@ async def _handle_exception(
110103
**kwargs: dict[str, Any],
111104
) -> None:
112105
context: ServicerContext = kwargs.get("context") or args[-1]
113-
is_generate_fn = "generate" in func.__name__.lower()
114-
115106
# self.engine on the servicer
116107
engine = args[0].engine
117108
# If the engine has died, then the server cannot process any further
@@ -132,13 +123,8 @@ async def _handle_exception(
132123

133124
if isinstance(e, OutOfMemoryError):
134125
logger.exception("%s caused GPU OOM error", func.__name__)
135-
service_metrics.count_request_failure(FailureReasonLabel.OOM)
136126
await context.abort(StatusCode.RESOURCE_EXHAUSTED, str(e))
137-
elif is_generate_fn:
138-
service_metrics.count_request_failure(FailureReasonLabel.GENERATE)
139-
else:
140-
service_metrics.count_request_failure(FailureReasonLabel.UNKNOWN)
141-
if isinstance(e, MQEngineDeadError):
127+
elif isinstance(e, MQEngineDeadError):
142128
logger.error(e)
143129
return
144130
logger.exception("%s failed", func.__name__)
@@ -201,20 +187,6 @@ def __init__(
201187

202188
async def post_init(self) -> None:
203189
self.config = await self.engine.get_model_config()
204-
205-
if not isinstance(self.engine, AsyncLLMEngine):
206-
logger.warning(
207-
"TGIS Metrics currently disabled in decoupled front-end mode, "
208-
"set DISABLE_FRONTEND_MULTIPROCESSING=True to enable"
209-
)
210-
else:
211-
# Swap in the special TGIS stats logger
212-
tgis_stats_logger = TGISStatLogger(
213-
vllm_stat_logger=self.engine.engine.stat_loggers["prometheus"],
214-
max_sequence_len=self.config.max_model_len,
215-
)
216-
self.engine.engine.stat_loggers["prometheus"] = tgis_stats_logger
217-
218190
self.health_servicer.set(
219191
self.SERVICE_NAME,
220192
health_pb2.HealthCheckResponse.SERVING,
@@ -248,8 +220,6 @@ async def Generate(
248220
request: BatchedGenerationRequest,
249221
context: ServicerContext,
250222
) -> BatchedGenerationResponse:
251-
start_time = time.time()
252-
service_metrics.count_generate_request(len(request.requests))
253223
request_id = self.request_id(context)
254224
kwargs = await self._validate_adapters(
255225
request,
@@ -302,7 +272,6 @@ async def Generate(
302272
if res.prompt is None:
303273
res.prompt = request.requests[i].text
304274
responses[i] = res
305-
service_metrics.observe_queue_time(res)
306275

307276
if (
308277
deadline is not None
@@ -328,19 +297,16 @@ async def Generate(
328297
response = self._convert_input_details(
329298
res, resp_options, sampling_params, response, tokenizer
330299
)
331-
service_metrics.observe_generation_success(start_time=start_time)
332300
responses[i] = response
333301

334302
return BatchedGenerationResponse(responses=responses)
335303

336304
@log_rpc_handler_errors
337-
async def GenerateStream( # noqa: PLR0915, C901
305+
async def GenerateStream( # noqa: C901
338306
self,
339307
request: SingleGenerationRequest,
340308
context: ServicerContext,
341309
) -> AsyncIterator[GenerationResponse]:
342-
start_time = time.time()
343-
service_metrics.count_generate_request()
344310
request_id = self.request_id(context)
345311
adapter_kwargs = await self._validate_adapters(
346312
request,
@@ -395,9 +361,6 @@ async def GenerateStream( # noqa: PLR0915, C901
395361
if first_response is None or (
396362
result.prompt_token_ids and not generated_token_count
397363
):
398-
if first_response is None:
399-
service_metrics.observe_queue_time(result)
400-
401364
if result.prompt is None:
402365
result.prompt = request.request.text
403366

@@ -453,7 +416,6 @@ async def GenerateStream( # noqa: PLR0915, C901
453416
first_response.stop_reason = last_response.stop_reason
454417
first_response.stop_sequence = last_response.stop_sequence
455418
first_response.generated_token_count = last_response.generated_token_count
456-
service_metrics.observe_generation_success(start_time=start_time)
457419

458420
def _convert_input_details(
459421
self,
@@ -544,7 +506,6 @@ async def _validate_and_convert_params(
544506
try:
545507
validate_params(params, self.max_max_new_tokens)
546508
except ValueError as tgis_validation_error:
547-
service_metrics.count_request_failure(FailureReasonLabel.VALIDATION)
548509
await context.abort(StatusCode.INVALID_ARGUMENT, str(tgis_validation_error))
549510

550511
resp_options = params.response
@@ -650,7 +611,6 @@ async def _validate_and_convert_params(
650611
except ValueError as vllm_validation_error:
651612
# There may be validation cases caught by vLLM that are not covered
652613
# by the TGIS api validation
653-
service_metrics.count_request_failure(FailureReasonLabel.VALIDATION)
654614
await context.abort(StatusCode.INVALID_ARGUMENT, str(vllm_validation_error))
655615

656616
return sampling_params, deadline
@@ -670,7 +630,6 @@ async def _validate_adapters(
670630
vllm_model_handler=vllm_model_handler,
671631
)
672632
except ValueError as e:
673-
service_metrics.count_request_failure(FailureReasonLabel.VALIDATION)
674633
await context.abort(StatusCode.INVALID_ARGUMENT, str(e))
675634
return adapters
676635

@@ -844,9 +803,6 @@ async def Tokenize(
844803
tokenized results.
845804
846805
"""
847-
# Log the incoming tokenization request for metrics
848-
service_metrics.count_tokenization_request(request)
849-
850806
# TODO simplify to only check for lora adapter
851807
adapter_kwargs = await self._validate_adapters(
852808
request,
@@ -903,7 +859,6 @@ async def Tokenize(
903859
)
904860

905861
response = BatchedTokenizeResponse(responses=responses)
906-
service_metrics.observe_tokenization_response(response)
907862
return response
908863

909864
@log_rpc_handler_errors

src/vllm_tgis_adapter/tgis_utils/metrics.py

-158
This file was deleted.

0 commit comments

Comments
 (0)