Skip to content

Commit

Permalink
[Feat] Add proxy level prometheus metrics (#5789)
Browse files Browse the repository at this point in the history
* add Proxy Level Tracking Metrics doc

* update service logger

* prometheus - track litellm_proxy_failed_requests_metric

* use REQUESTED_MODEL

* fix prom request_data
  • Loading branch information
ishaan-jaff committed Sep 20, 2024
1 parent ae41c0d commit 91e58d9
Show file tree
Hide file tree
Showing 10 changed files with 166 additions and 18 deletions.
5 changes: 4 additions & 1 deletion docs/my-website/docs/proxy/call_hooks.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
return data

async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth
):
pass

Expand Down
10 changes: 10 additions & 0 deletions docs/my-website/docs/proxy/prometheus.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys)
| `litellm_total_tokens` | input + output tokens per `"user", "key", "model", "team", "end-user"` |


### Proxy Level Tracking Metrics

Use this to track overall LiteLLM Proxy usage.
- Track Actual traffic rate to proxy
- Number of **client side** requests and failures for requests made to proxy

| Metric Name | Description |
|----------------------|--------------------------------------|
| `litellm_proxy_failed_requests_metric` | Total number of failed responses from proxy - the client did not get a success response from litellm proxy `"user", "key", "model", "team", "end-user"` |
| `litellm_proxy_total_requests_metric` | Total number of requests made to the proxy server - track number of client side requests `"user", "key", "model", "team", "end-user"` |

### LLM API / Provider Metrics

Expand Down
9 changes: 7 additions & 2 deletions litellm/_service_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,13 +212,18 @@ async def async_service_failure_hook(
)

async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Hook to track failed litellm-service calls
"""
return await super().async_post_call_failure_hook(
original_exception, user_api_key_dict
request_data,
original_exception,
user_api_key_dict,
)

async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
Expand Down
5 changes: 4 additions & 1 deletion litellm/integrations/custom_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,10 @@ async def async_pre_call_hook(
pass

async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
pass

Expand Down
5 changes: 4 additions & 1 deletion litellm/integrations/opentelemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,10 @@ async def async_service_failure_hook(
service_logging_span.end(end_time=_end_time_ns)

async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
from opentelemetry import trace
from opentelemetry.trace import Status, StatusCode
Expand Down
123 changes: 112 additions & 11 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import litellm
from litellm._logging import print_verbose, verbose_logger
from litellm.integrations.custom_logger import CustomLogger
from litellm.proxy._types import UserAPIKeyAuth


class PrometheusLogger(CustomLogger):
Expand All @@ -38,28 +39,30 @@ def __init__(
)
return

self.litellm_llm_api_failed_requests_metric = Counter(
name="litellm_llm_api_failed_requests_metric",
documentation="Total number of failed LLM API calls via litellm - track fails per API Key, team, user",
REQUESTED_MODEL = "requested_model"

self.litellm_proxy_failed_requests_metric = Counter(
name="litellm_proxy_failed_requests_metric",
documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
REQUESTED_MODEL,
"team",
"team_alias",
"user",
],
)

self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="Total number of LLM calls to litellm - track total per API Key, team, user",
self.litellm_proxy_total_requests_metric = Counter(
name="litellm_proxy_total_requests_metric",
documentation="Total number of requests made to the proxy server - track number of client side requests",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
REQUESTED_MODEL,
"team",
"team_alias",
"user",
Expand Down Expand Up @@ -201,17 +204,17 @@ def __init__(
self.litellm_deployment_success_responses = Counter(
name="litellm_deployment_success_responses",
documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
labelnames=["requested_model"] + _logged_llm_labels,
labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
)
self.litellm_deployment_failure_responses = Counter(
name="litellm_deployment_failure_responses",
documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
labelnames=["requested_model", "exception_status"] + _logged_llm_labels,
labelnames=[REQUESTED_MODEL, "exception_status"] + _logged_llm_labels,
)
self.litellm_deployment_total_requests = Counter(
name="litellm_deployment_total_requests",
documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
labelnames=["requested_model"] + _logged_llm_labels,
labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
)

# Deployment Latency tracking
Expand All @@ -232,6 +235,34 @@ def __init__(
["primary_model", "fallback_model"],
)

self.litellm_llm_api_failed_requests_metric = Counter(
name="litellm_llm_api_failed_requests_metric",
documentation="deprecated - use litellm_proxy_failed_requests_metric",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
"team",
"team_alias",
"user",
],
)

self.litellm_requests_metric = Counter(
name="litellm_requests_metric",
documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
"team",
"team_alias",
"user",
],
)

except Exception as e:
print_verbose(f"Got exception on init prometheus client {str(e)}")
raise e
Expand Down Expand Up @@ -440,6 +471,76 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
pass
pass

async def async_post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
"""
Track client side failures
Proxy level tracking - failed client side requests
labelnames=[
"end_user",
"hashed_api_key",
"api_key_alias",
"model",
"team",
"team_alias",
"user",
],
"""
try:
self.litellm_proxy_failed_requests_metric.labels(
user_api_key_dict.end_user_id,
user_api_key_dict.api_key,
user_api_key_dict.key_alias,
request_data.get("model", ""),
user_api_key_dict.team_id,
user_api_key_dict.team_alias,
user_api_key_dict.user_id,
).inc()

self.litellm_proxy_total_requests_metric.labels(
user_api_key_dict.end_user_id,
user_api_key_dict.api_key,
user_api_key_dict.key_alias,
request_data.get("model", ""),
user_api_key_dict.team_id,
user_api_key_dict.team_alias,
user_api_key_dict.user_id,
)
pass
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
)
pass

async def async_post_call_success_hook(
self, data: dict, user_api_key_dict: UserAPIKeyAuth, response
):
"""
Proxy level tracking - triggered when the proxy responds with a success response to the client
"""
try:
self.litellm_proxy_total_requests_metric.labels(
user_api_key_dict.end_user_id,
user_api_key_dict.api_key,
user_api_key_dict.key_alias,
data.get("model", ""),
user_api_key_dict.team_id,
user_api_key_dict.team_alias,
user_api_key_dict.user_id,
).inc()
except Exception as e:
verbose_logger.exception(
"prometheus Layer Error(): Exception occured - {}".format(str(e))
)
pass

def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
try:
verbose_logger.debug("setting remaining tokens requests metric")
Expand Down
1 change: 1 addition & 0 deletions litellm/proxy/auth/user_api_key_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,6 +1121,7 @@ async def user_api_key_auth(
if open_telemetry_logger is not None:
await open_telemetry_logger.async_post_call_failure_hook( # type: ignore
original_exception=e,
request_data={},
user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
)

Expand Down
5 changes: 4 additions & 1 deletion litellm/proxy/custom_callbacks1.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ async def async_pre_call_hook(
return data

async def async_post_call_failure_hook(
self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
):
pass

Expand Down
18 changes: 18 additions & 0 deletions litellm/proxy/proxy_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,26 @@
model_list:
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.5-pro
api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
- model_name: gemini-vision
litellm_params:
model: vertex_ai/gemini-1.0-pro-vision-001
api_base: https://exampleopenaiendpoint-production-c715.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
vertex_project: "adroit-crow-413218"
vertex_location: "us-central1"
vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"

- model_name: fake-azure-endpoint
litellm_params:
model: openai/429
api_key: fake-key
api_base: https://exampleopenaiendpoint-production.up.railway.app


litellm_settings:
success_callback: ["prometheus"]

3 changes: 2 additions & 1 deletion litellm/proxy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,9 +632,9 @@ async def failure_handler(

async def post_call_failure_hook(
self,
request_data: dict,
original_exception: Exception,
user_api_key_dict: UserAPIKeyAuth,
request_data: dict,
):
"""
Allows users to raise custom exceptions/log when a call fails, without having to deal with parsing Request body.
Expand Down Expand Up @@ -750,6 +750,7 @@ async def post_call_failure_hook(
_callback = callback # type: ignore
if _callback is not None and isinstance(_callback, CustomLogger):
await _callback.async_post_call_failure_hook(
request_data=request_data,
user_api_key_dict=user_api_key_dict,
original_exception=original_exception,
)
Expand Down

0 comments on commit 91e58d9

Please sign in to comment.