[Feat] Add proxy level prometheus metrics (#5789)

* add Proxy Level Tracking Metrics doc * update service logger * prometheus - track litellm_proxy_failed_requests_metric * use REQUESTED_MODEL * fix prom request_data
BerriAI · Sep 20, 2024 · 91e58d9 · 91e58d9
1 parent ae41c0d
commit 91e58d9
Show file tree

Hide file tree

Showing 10 changed files with 166 additions and 18 deletions.
diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md
@@ -41,7 +41,10 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
         return data 
 
     async def async_post_call_failure_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self, 
+        request_data: dict,
+        original_exception: Exception, 
+        user_api_key_dict: UserAPIKeyAuth
     ):
         pass
 

diff --git a/docs/my-website/docs/proxy/prometheus.md b/docs/my-website/docs/proxy/prometheus.md
@@ -70,6 +70,16 @@ Use this for for tracking per [user, key, team, etc.](virtual_keys)
 | `litellm_total_tokens`         | input + output tokens per `"user", "key", "model", "team", "end-user"`     |
 
 
+### Proxy Level Tracking Metrics
+
+Use this to track overall LiteLLM Proxy usage.
+- Track Actual traffic rate to proxy 
+- Number of **client side** requests and failures for requests made to proxy 
+
+| Metric Name          | Description                          |
+|----------------------|--------------------------------------|
+| `litellm_proxy_failed_requests_metric`             | Total number of failed responses from proxy - the client did not get a success response from litellm proxy `"user", "key", "model", "team", "end-user"`          |
+| `litellm_proxy_total_requests_metric`             | Total number of requests made to the proxy server - track number of client side requests `"user", "key", "model", "team", "end-user"`          |
 
 ### LLM API / Provider Metrics
 

diff --git a/litellm/_service_logger.py b/litellm/_service_logger.py
@@ -212,13 +212,18 @@ async def async_service_failure_hook(
             )
 
     async def async_post_call_failure_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
     ):
         """
         Hook to track failed litellm-service calls
         """
         return await super().async_post_call_failure_hook(
-            original_exception, user_api_key_dict
+            request_data,
+            original_exception,
+            user_api_key_dict,
         )
 
     async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):

diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -125,7 +125,10 @@ async def async_pre_call_hook(
         pass
 
     async def async_post_call_failure_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
     ):
         pass
 

diff --git a/litellm/integrations/opentelemetry.py b/litellm/integrations/opentelemetry.py
@@ -221,7 +221,10 @@ async def async_service_failure_hook(
             service_logging_span.end(end_time=_end_time_ns)
 
     async def async_post_call_failure_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
     ):
         from opentelemetry import trace
         from opentelemetry.trace import Status, StatusCode

diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py
@@ -15,6 +15,7 @@
 import litellm
 from litellm._logging import print_verbose, verbose_logger
 from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
 
 
 class PrometheusLogger(CustomLogger):
@@ -38,28 +39,30 @@ def __init__(
                 )
                 return
 
-            self.litellm_llm_api_failed_requests_metric = Counter(
-                name="litellm_llm_api_failed_requests_metric",
-                documentation="Total number of failed LLM API calls via litellm - track fails per API Key, team, user",
+            REQUESTED_MODEL = "requested_model"
+
+            self.litellm_proxy_failed_requests_metric = Counter(
+                name="litellm_proxy_failed_requests_metric",
+                documentation="Total number of failed responses from proxy - the client did not get a success response from litellm proxy",
                 labelnames=[
                     "end_user",
                     "hashed_api_key",
                     "api_key_alias",
-                    "model",
+                    REQUESTED_MODEL,
                     "team",
                     "team_alias",
                     "user",
                 ],
             )
 
-            self.litellm_requests_metric = Counter(
-                name="litellm_requests_metric",
-                documentation="Total number of LLM calls to litellm - track total per API Key, team, user",
+            self.litellm_proxy_total_requests_metric = Counter(
+                name="litellm_proxy_total_requests_metric",
+                documentation="Total number of requests made to the proxy server - track number of client side requests",
                 labelnames=[
                     "end_user",
                     "hashed_api_key",
                     "api_key_alias",
-                    "model",
+                    REQUESTED_MODEL,
                     "team",
                     "team_alias",
                     "user",
@@ -201,17 +204,17 @@ def __init__(
             self.litellm_deployment_success_responses = Counter(
                 name="litellm_deployment_success_responses",
                 documentation="LLM Deployment Analytics - Total number of successful LLM API calls via litellm",
-                labelnames=["requested_model"] + _logged_llm_labels,
+                labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
             )
             self.litellm_deployment_failure_responses = Counter(
                 name="litellm_deployment_failure_responses",
                 documentation="LLM Deployment Analytics - Total number of failed LLM API calls for a specific LLM deploymeny. exception_status is the status of the exception from the llm api",
-                labelnames=["requested_model", "exception_status"] + _logged_llm_labels,
+                labelnames=[REQUESTED_MODEL, "exception_status"] + _logged_llm_labels,
             )
             self.litellm_deployment_total_requests = Counter(
                 name="litellm_deployment_total_requests",
                 documentation="LLM Deployment Analytics - Total number of LLM API calls via litellm - success + failure",
-                labelnames=["requested_model"] + _logged_llm_labels,
+                labelnames=[REQUESTED_MODEL] + _logged_llm_labels,
             )
 
             # Deployment Latency tracking
@@ -232,6 +235,34 @@ def __init__(
                 ["primary_model", "fallback_model"],
             )
 
+            self.litellm_llm_api_failed_requests_metric = Counter(
+                name="litellm_llm_api_failed_requests_metric",
+                documentation="deprecated - use litellm_proxy_failed_requests_metric",
+                labelnames=[
+                    "end_user",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "model",
+                    "team",
+                    "team_alias",
+                    "user",
+                ],
+            )
+
+            self.litellm_requests_metric = Counter(
+                name="litellm_requests_metric",
+                documentation="deprecated - use litellm_proxy_total_requests_metric. Total number of LLM calls to litellm - track total per API Key, team, user",
+                labelnames=[
+                    "end_user",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "model",
+                    "team",
+                    "team_alias",
+                    "user",
+                ],
+            )
+
         except Exception as e:
             print_verbose(f"Got exception on init prometheus client {str(e)}")
             raise e
@@ -440,6 +471,76 @@ async def async_log_failure_event(self, kwargs, response_obj, start_time, end_ti
             pass
         pass
 
+    async def async_post_call_failure_hook(
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
+    ):
+        """
+        Track client side failures
+
+        Proxy level tracking - failed client side requests
+
+        labelnames=[
+                    "end_user",
+                    "hashed_api_key",
+                    "api_key_alias",
+                    "model",
+                    "team",
+                    "team_alias",
+                    "user",
+                ],
+        """
+        try:
+            self.litellm_proxy_failed_requests_metric.labels(
+                user_api_key_dict.end_user_id,
+                user_api_key_dict.api_key,
+                user_api_key_dict.key_alias,
+                request_data.get("model", ""),
+                user_api_key_dict.team_id,
+                user_api_key_dict.team_alias,
+                user_api_key_dict.user_id,
+            ).inc()
+
+            self.litellm_proxy_total_requests_metric.labels(
+                user_api_key_dict.end_user_id,
+                user_api_key_dict.api_key,
+                user_api_key_dict.key_alias,
+                request_data.get("model", ""),
+                user_api_key_dict.team_id,
+                user_api_key_dict.team_alias,
+                user_api_key_dict.user_id,
+            )
+            pass
+        except Exception as e:
+            verbose_logger.exception(
+                "prometheus Layer Error(): Exception occured - {}".format(str(e))
+            )
+            pass
+
+    async def async_post_call_success_hook(
+        self, data: dict, user_api_key_dict: UserAPIKeyAuth, response
+    ):
+        """
+        Proxy level tracking - triggered when the proxy responds with a success response to the client
+        """
+        try:
+            self.litellm_proxy_total_requests_metric.labels(
+                user_api_key_dict.end_user_id,
+                user_api_key_dict.api_key,
+                user_api_key_dict.key_alias,
+                data.get("model", ""),
+                user_api_key_dict.team_id,
+                user_api_key_dict.team_alias,
+                user_api_key_dict.user_id,
+            ).inc()
+        except Exception as e:
+            verbose_logger.exception(
+                "prometheus Layer Error(): Exception occured - {}".format(str(e))
+            )
+            pass
+
     def set_llm_deployment_failure_metrics(self, request_kwargs: dict):
         try:
             verbose_logger.debug("setting remaining tokens requests metric")

diff --git a/litellm/proxy/auth/user_api_key_auth.py b/litellm/proxy/auth/user_api_key_auth.py
@@ -1121,6 +1121,7 @@ async def user_api_key_auth(
         if open_telemetry_logger is not None:
             await open_telemetry_logger.async_post_call_failure_hook(  # type: ignore
                 original_exception=e,
+                request_data={},
                 user_api_key_dict=UserAPIKeyAuth(parent_otel_span=parent_otel_span),
             )
 

diff --git a/litellm/proxy/custom_callbacks1.py b/litellm/proxy/custom_callbacks1.py
@@ -35,7 +35,10 @@ async def async_pre_call_hook(
         return data
 
     async def async_post_call_failure_hook(
-        self, original_exception: Exception, user_api_key_dict: UserAPIKeyAuth
+        self,
+        request_data: dict,
+        original_exception: Exception,
+        user_api_key_dict: UserAPIKeyAuth,
     ):
         pass
 

diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
@@ -1,8 +1,26 @@
 model_list:
+  - model_name: gemini-vision
+    litellm_params:
+      model: vertex_ai/gemini-1.5-pro
+      api_base: https://exampleopenaiendpoint-production.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
+      vertex_project: "adroit-crow-413218"
+      vertex_location: "us-central1"
+      vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+  - model_name: gemini-vision
+    litellm_params:
+      model: vertex_ai/gemini-1.0-pro-vision-001
+      api_base: https://exampleopenaiendpoint-production-c715.up.railway.app/v1/projects/adroit-crow-413218/locations/us-central1/publishers/google/models/gemini-1.0-pro-vision-001
+      vertex_project: "adroit-crow-413218"
+      vertex_location: "us-central1"
+      vertex_credentials: "/Users/ishaanjaffer/Downloads/adroit-crow-413218-a956eef1a2a8.json"
+
   - model_name: fake-azure-endpoint
     litellm_params:
       model: openai/429
       api_key: fake-key
       api_base: https://exampleopenaiendpoint-production.up.railway.app
 
 
+litellm_settings:
+  success_callback: ["prometheus"]
+
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -632,9 +632,9 @@ async def failure_handler(
 
     async def post_call_failure_hook(
         self,
+        request_data: dict,
         original_exception: Exception,
         user_api_key_dict: UserAPIKeyAuth,
-        request_data: dict,
     ):
         """
         Allows users to raise custom exceptions/log when a call fails, without having to deal with parsing Request body.
@@ -750,6 +750,7 @@ async def post_call_failure_hook(
                     _callback = callback  # type: ignore
                 if _callback is not None and isinstance(_callback, CustomLogger):
                     await _callback.async_post_call_failure_hook(
+                        request_data=request_data,
                         user_api_key_dict=user_api_key_dict,
                         original_exception=original_exception,
                     )