From 5060d8d64f760d2d432f26ce4bffaaa95c88f02d Mon Sep 17 00:00:00 2001 From: alisalim17 Date: Thu, 18 Apr 2024 11:10:31 +0400 Subject: [PATCH 01/43] fix: newline issue in cohere_message_pt --- litellm/llms/prompt_templates/factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 8afda252ac28..2beb885d08b8 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -1033,7 +1033,8 @@ def cohere_message_pt(messages: list): tool_result = convert_openai_message_to_cohere_tool_result(message) tool_results.append(tool_result) else: - prompt += message["content"] + prompt += message["content"] + "\n\n" + prompt = prompt.rstrip() return prompt, tool_results From f1f2204c2aee4f9c2bf9945c8f0f325f6f7edc92 Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 15:04:54 +0400 Subject: [PATCH 02/43] fix tool call errors using anthropic --- litellm/llms/prompt_templates/factory.py | 31 +++++++++++++++--------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 8afda252ac28..dbe27278e156 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -466,10 +466,11 @@ def construct_tool_use_system_prompt( ): # from https://github.com/anthropics/anthropic-cookbook/blob/main/function_calling/function_calling.ipynb tool_str_list = [] for tool in tools: + tool_function = get_attribute_or_key(tool, "function") tool_str = construct_format_tool_for_claude_prompt( - tool["function"]["name"], - tool["function"].get("description", ""), - tool["function"].get("parameters", {}), + get_attribute_or_key(tool_function, "name"), + get_attribute_or_key(tool_function, "description", ""), + get_attribute_or_key(tool_function, "parameters", {}), ) tool_str_list.append(tool_str) tool_use_system_prompt = ( @@ -614,13 +615,14 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str: def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str: invokes = "" for tool in tool_calls: - if tool["type"] != "function": + if get_attribute_or_key(tool, "type") != "function": continue - - tool_name = tool["function"]["name"] + + tool_function = get_attribute_or_key(tool,"function") + tool_name = tool_function["name"] parameters = "".join( f"<{param}>{val}\n" - for param, val in json.loads(tool["function"]["arguments"]).items() + for param, val in json.loads(tool_function["arguments"]).items() ) invokes += ( "\n" @@ -705,7 +707,7 @@ def anthropic_messages_pt_xml(messages: list): if assistant_content: new_messages.append({"role": "assistant", "content": assistant_content}) - if not new_messages or new_messages[0]["role"] != "user": + if new_messages[0]["role"] != "user": if litellm.modify_params: new_messages.insert( 0, {"role": "user", "content": [{"type": "text", "text": "."}]} @@ -807,12 +809,12 @@ def convert_to_anthropic_tool_invoke(tool_calls: list) -> list: anthropic_tool_invoke = [ { "type": "tool_use", - "id": tool["id"], - "name": tool["function"]["name"], - "input": json.loads(tool["function"]["arguments"]), + "id": get_attribute_or_key(tool, "id"), + "name": get_attribute_or_key(get_attribute_or_key(tool, "function"), "name"), + "input": json.loads(get_attribute_or_key(get_attribute_or_key(tool, "function"), "arguments")), } for tool in tool_calls - if tool["type"] == "function" + if get_attribute_or_key(tool, "type") == "function" ] return anthropic_tool_invoke @@ -1355,3 +1357,8 @@ def prompt_factory( return default_pt( messages=messages ) # default that covers Bloom, T-5, any non-chat tuned model (e.g. base Llama2) + +def get_attribute_or_key(tool_or_function, attribute, default=None): + if hasattr(tool_or_function, attribute): + return getattr(tool_or_function, attribute) + return tool_or_function.get(attribute, default) From 7ca213e92f4257dc192e1ffe9641dfc1c0cefc14 Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 15:12:31 +0400 Subject: [PATCH 03/43] update factory.py --- litellm/llms/prompt_templates/factory.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index dbe27278e156..bbf87ca49f7e 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -619,10 +619,11 @@ def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str: continue tool_function = get_attribute_or_key(tool,"function") - tool_name = tool_function["name"] + tool_name = get_attribute_or_key(tool_function, "name") + tool_arguments = get_attribute_or_key(tool_function, "arguments") parameters = "".join( f"<{param}>{val}\n" - for param, val in json.loads(tool_function["arguments"]).items() + for param, val in json.loads(tool_arguments).items() ) invokes += ( "\n" @@ -707,7 +708,7 @@ def anthropic_messages_pt_xml(messages: list): if assistant_content: new_messages.append({"role": "assistant", "content": assistant_content}) - if new_messages[0]["role"] != "user": + if not new_messages or new_messages[0]["role"] != "user": if litellm.modify_params: new_messages.insert( 0, {"role": "user", "content": [{"type": "text", "text": "."}]} From 4c7d94b2b4a18413f54c77a8507064d57f6c677f Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 17:35:52 +0400 Subject: [PATCH 04/43] update factory.py --- litellm/llms/prompt_templates/factory.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index bbf87ca49f7e..6658671f8e69 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -698,8 +698,10 @@ def anthropic_messages_pt_xml(messages: list): if messages[msg_i].get( "tool_calls", [] ): # support assistant tool invoke convertion - assistant_text += convert_to_anthropic_tool_invoke( # type: ignore - messages[msg_i]["tool_calls"] + assistant_content.extend( + convert_to_anthropic_tool_invoke( # type: ignore + messages[msg_i]["tool_calls"] + ) ) assistant_content.append({"type": "text", "text": assistant_text}) From c85018c780529579206ce2428db7a2bfa053923c Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 18:13:20 +0400 Subject: [PATCH 05/43] update factory.py --- litellm/llms/prompt_templates/factory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 6658671f8e69..5b8fc9a6f451 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -695,6 +695,8 @@ def anthropic_messages_pt_xml(messages: list): assistant_text = ( messages[msg_i].get("content") or "" ) # either string or none + if assistant_text: + assistant_content.append({"type": "text", "text": assistant_text}) if messages[msg_i].get( "tool_calls", [] ): # support assistant tool invoke convertion @@ -703,8 +705,6 @@ def anthropic_messages_pt_xml(messages: list): messages[msg_i]["tool_calls"] ) ) - - assistant_content.append({"type": "text", "text": assistant_text}) msg_i += 1 if assistant_content: From e1fd463f8cf414691d850a5bcbf999debc69d8af Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 22:27:11 +0400 Subject: [PATCH 06/43] update factory.py --- litellm/llms/prompt_templates/factory.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 5b8fc9a6f451..b71857a8b2c8 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -617,7 +617,7 @@ def convert_to_anthropic_tool_invoke_xml(tool_calls: list) -> str: for tool in tool_calls: if get_attribute_or_key(tool, "type") != "function": continue - + tool_function = get_attribute_or_key(tool,"function") tool_name = get_attribute_or_key(tool_function, "name") tool_arguments = get_attribute_or_key(tool_function, "arguments") @@ -695,16 +695,14 @@ def anthropic_messages_pt_xml(messages: list): assistant_text = ( messages[msg_i].get("content") or "" ) # either string or none - if assistant_text: - assistant_content.append({"type": "text", "text": assistant_text}) if messages[msg_i].get( "tool_calls", [] ): # support assistant tool invoke convertion - assistant_content.extend( - convert_to_anthropic_tool_invoke( # type: ignore - messages[msg_i]["tool_calls"] - ) + assistant_text += convert_to_anthropic_tool_invoke_xml( # type: ignore + messages[msg_i]["tool_calls"] ) + + assistant_content.append({"type": "text", "text": assistant_text}) msg_i += 1 if assistant_content: From ecfae6d465e78f8960ff2dc3fd75197de5c2dd0b Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 22:31:08 +0400 Subject: [PATCH 07/43] update factory.py --- litellm/llms/prompt_templates/factory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index b71857a8b2c8..7ba0ee0070f5 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -677,7 +677,7 @@ def anthropic_messages_pt_xml(messages: list): { "type": "text", "text": ( - convert_to_anthropic_tool_result(messages[msg_i]) + convert_to_anthropic_tool_result_xml(messages[msg_i]) if messages[msg_i]["role"] == "tool" else messages[msg_i]["content"] ), From ca3d2fea5694d0eb7cf4c3db354e12274df43ef3 Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Thu, 18 Apr 2024 22:42:32 +0400 Subject: [PATCH 08/43] fix for #2904, remove XML characters in content --- litellm/llms/prompt_templates/factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/llms/prompt_templates/factory.py b/litellm/llms/prompt_templates/factory.py index 7ba0ee0070f5..218aa77fea40 100644 --- a/litellm/llms/prompt_templates/factory.py +++ b/litellm/llms/prompt_templates/factory.py @@ -594,7 +594,8 @@ def convert_to_anthropic_tool_result_xml(message: dict) -> str: """ name = message.get("name") - content = message.get("content") + content = message.get("content", "") + content = content.replace("<", "<").replace(">", ">").replace("&", "&") # We can't determine from openai message format whether it's a successful or # error call result so default to the successful result template From 5113d470232f47cc213e131be18c2e7be7b47f95 Mon Sep 17 00:00:00 2001 From: Nilanjan De Date: Fri, 19 Apr 2024 00:42:48 +0400 Subject: [PATCH 09/43] add test --- litellm/tests/test_bedrock_completion.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/litellm/tests/test_bedrock_completion.py b/litellm/tests/test_bedrock_completion.py index 4b1781cd9353..ca2ffea5f527 100644 --- a/litellm/tests/test_bedrock_completion.py +++ b/litellm/tests/test_bedrock_completion.py @@ -269,6 +269,30 @@ def test_bedrock_claude_3_tool_calling(): assert isinstance( response.choices[0].message.tool_calls[0].function.arguments, str ) + messages.append( + response.choices[0].message.model_dump() + ) # Add assistant tool invokes + tool_result = ( + '{"location": "Boston", "temperature": "72", "unit": "fahrenheit"}' + ) + # Add user submitted tool results in the OpenAI format + messages.append( + { + "tool_call_id": response.choices[0].message.tool_calls[0].id, + "role": "tool", + "name": response.choices[0].message.tool_calls[0].function.name, + "content": tool_result, + } + ) + # In the second response, Claude should deduce answer from tool results + second_response = completion( + model="bedrock/anthropic.claude-3-sonnet-20240229-v1:0", + messages=messages, + tools=tools, + tool_choice="auto", + ) + print(f"second response: {second_response}") + assert isinstance(second_response.choices[0].message.content, str) except RateLimitError: pass except Exception as e: From 6724bf6419b2d4f8c5081fd344e7367fec372fe0 Mon Sep 17 00:00:00 2001 From: John HU Date: Thu, 18 Apr 2024 19:48:57 -0700 Subject: [PATCH 10/43] Load google ADC before init AnthropicVertex --- litellm/llms/vertex_ai_anthropic.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index 9bce746dd65d..34709e0c5633 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -123,7 +123,7 @@ def map_openai_params(self, non_default_params: dict, optional_params: dict): """ -- Run client init +- Run client init - Support async completion, streaming """ @@ -236,17 +236,19 @@ def completion( if client is None: if vertex_credentials is not None and isinstance(vertex_credentials, str): import google.oauth2.service_account - - json_obj = json.loads(vertex_credentials) - creds = ( google.oauth2.service_account.Credentials.from_service_account_info( - json_obj, + json.loads(vertex_credentials), scopes=["https://www.googleapis.com/auth/cloud-platform"], ) ) ### CHECK IF ACCESS access_token = refresh_auth(credentials=creds) + else: + import google.auth + creds, _ = google.auth.default() + ### CHECK IF ACCESS + access_token = refresh_auth(credentials=creds) vertex_ai_client = AnthropicVertex( project_id=vertex_project, From 4dcecde97af6049676359523d844096b1fa58713 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 19 Apr 2024 16:45:13 -0700 Subject: [PATCH 11/43] ui - non admin flow --- .../src/components/leftnav.tsx | 63 +++++++++++++------ 1 file changed, 43 insertions(+), 20 deletions(-) diff --git a/ui/litellm-dashboard/src/components/leftnav.tsx b/ui/litellm-dashboard/src/components/leftnav.tsx index 04a99c41f170..dd6ef09707f2 100644 --- a/ui/litellm-dashboard/src/components/leftnav.tsx +++ b/ui/litellm-dashboard/src/components/leftnav.tsx @@ -63,11 +63,16 @@ const Sidebar: React.FC = ({ Test Key - setPage("models")}> - - Models - - + { + userRole == "Admin" ? ( + setPage("models")}> + + Models + + + ) : null + } + {userRole == "Admin" ? ( setPage("teams")}> @@ -75,11 +80,18 @@ const Sidebar: React.FC = ({ ) : null} - setPage("usage")}> - - Usage - - + + { + userRole == "Admin" ? ( + setPage("usage")}> + + Usage + + + + ) : null + } + {userRole == "Admin" ? ( setPage("users")}> @@ -87,16 +99,27 @@ const Sidebar: React.FC = ({ ) : null} - setPage("settings")}> - - Integrations - - - setPage("general-settings")}> - - Settings - - + + { + userRole == "Admin" ? ( + setPage("settings")}> + + Integrations + + + ) : null + } + + { + userRole == "Admin" ? ( + setPage("general-settings")}> + + Settings + + + ) : null + } + {userRole == "Admin" ? ( setPage("admin-panel")}> From b81d66639b3dfe70593d505fcf3b40c2f4c00821 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 19 Apr 2024 17:31:36 -0700 Subject: [PATCH 12/43] ui - allow App user to see their own info --- ui/litellm-dashboard/src/components/networking.tsx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ui/litellm-dashboard/src/components/networking.tsx b/ui/litellm-dashboard/src/components/networking.tsx index 4b961ca34777..96b6246f55fa 100644 --- a/ui/litellm-dashboard/src/components/networking.tsx +++ b/ui/litellm-dashboard/src/components/networking.tsx @@ -296,6 +296,9 @@ export const userInfoCall = async ( if (userRole == "App Owner" && userID) { url = `${url}?user_id=${userID}`; } + if (userRole == "App User" && userID) { + url = `${url}?user_id=${userID}`; + } console.log("in userInfoCall viewAll=", viewAll); if (viewAll && page_size && (page != null) && (page != undefined)) { url = `${url}?view_all=true&page=${page}&page_size=${page_size}`; From 00a07a99cda2650ed9e65a47dd7050fea1913f92 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 19 Apr 2024 17:36:29 -0700 Subject: [PATCH 13/43] fix - backend logic for non admin flow --- litellm/proxy/proxy_server.py | 69 ++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index db85b7ba105f..14ff78200b61 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -5713,6 +5713,20 @@ async def new_user(data: NewUserRequest): "user" # only create a user, don't create key if 'auto_create_key' set to False ) response = await generate_key_helper_fn(**data_json) + + # Admin UI Logic + # if team_id passed add this user to the team + if data_json.get("team_id", None) is not None: + await team_member_add( + data=TeamMemberAddRequest( + team_id=data_json.get("team_id", None), + member=Member( + user_id=data_json.get("user_id", None), + role="user", + user_email=data_json.get("user_email", None), + ), + ) + ) return NewUserResponse( key=response.get("token", ""), expires=response.get("expires", None), @@ -8112,36 +8126,33 @@ def response_convertor(response, client): } user_role = getattr(user_info, "user_role", None) - else: - ## check if user-email in db ## - user_info = await prisma_client.db.litellm_usertable.find_first( - where={"user_email": user_email} - ) - if user_info is not None: - user_defined_values = { - "models": getattr(user_info, "models", user_id_models), - "user_id": getattr(user_info, "user_id", user_id), - "user_email": getattr(user_info, "user_id", user_email), - "user_role": getattr(user_info, "user_role", None), - } - user_role = getattr(user_info, "user_role", None) + ## check if user-email in db ## + user_info = await prisma_client.db.litellm_usertable.find_first( + where={"user_email": user_email} + ) + if user_info is not None: + user_defined_values = { + "models": getattr(user_info, "models", user_id_models), + "user_id": getattr(user_info, "user_id", user_id), + "user_email": getattr(user_info, "user_id", user_email), + "user_role": getattr(user_info, "user_role", None), + } + user_role = getattr(user_info, "user_role", None) - # update id - await prisma_client.db.litellm_usertable.update_many( - where={"user_email": user_email}, data={"user_id": user_id} # type: ignore - ) - elif litellm.default_user_params is not None and isinstance( - litellm.default_user_params, dict - ): - user_defined_values = { - "models": litellm.default_user_params.get( - "models", user_id_models - ), - "user_id": litellm.default_user_params.get("user_id", user_id), - "user_email": litellm.default_user_params.get( - "user_email", user_email - ), - } + # update id + await prisma_client.db.litellm_usertable.update_many( + where={"user_email": user_email}, data={"user_id": user_id} # type: ignore + ) + elif litellm.default_user_params is not None and isinstance( + litellm.default_user_params, dict + ): + user_defined_values = { + "models": litellm.default_user_params.get("models", user_id_models), + "user_id": litellm.default_user_params.get("user_id", user_id), + "user_email": litellm.default_user_params.get( + "user_email", user_email + ), + } except Exception as e: pass From abeadadf3ee5de9b668ce0ed8a4a227d73c6c5ab Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Fri, 19 Apr 2024 21:15:57 -0700 Subject: [PATCH 14/43] fix - create key user flow --- ui/litellm-dashboard/src/components/create_key_button.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx index 8dde3fb001c2..d7fb9c5eb1b4 100644 --- a/ui/litellm-dashboard/src/components/create_key_button.tsx +++ b/ui/litellm-dashboard/src/components/create_key_button.tsx @@ -116,7 +116,7 @@ const CreateKey: React.FC = ({ wrapperCol={{ span: 16 }} labelAlign="left" > - {userRole === "App Owner" || userRole === "Admin" ? ( + {userRole === "App Owner" || userRole === "Admin" || userRole === "App User" ? ( <> Date: Sat, 20 Apr 2024 10:43:18 -0700 Subject: [PATCH 15/43] fix(router.py): calculate max_parallel_requests from given tpm limits use the azure formula to calculate rpm -> max_parallel_requests based on a deployment's tpm limits --- litellm/router.py | 49 ++++++++++++++++++++++++++++++++++------------- litellm/utils.py | 40 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 13 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index 8145ef619e6f..d7988aaba62c 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -26,7 +26,12 @@ CustomHTTPTransport, AsyncCustomHTTPTransport, ) -from litellm.utils import ModelResponse, CustomStreamWrapper, get_utc_datetime +from litellm.utils import ( + ModelResponse, + CustomStreamWrapper, + get_utc_datetime, + calculate_max_parallel_requests, +) import copy from litellm._logging import verbose_router_logger import logging @@ -61,6 +66,7 @@ def __init__( num_retries: int = 0, timeout: Optional[float] = None, default_litellm_params={}, # default params for Router.chat.completion.create + default_max_parallel_requests: Optional[int] = None, set_verbose: bool = False, debug_level: Literal["DEBUG", "INFO"] = "INFO", fallbacks: List = [], @@ -213,6 +219,7 @@ def __init__( ) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown self.num_retries = num_retries or litellm.num_retries or 0 self.timeout = timeout or litellm.request_timeout + self.default_max_parallel_requests = default_max_parallel_requests self.retry_after = retry_after self.routing_strategy = routing_strategy self.fallbacks = fallbacks or litellm.fallbacks @@ -496,7 +503,9 @@ async def _acompletion(self, model: str, messages: List[Dict[str, str]], **kwarg ) rpm_semaphore = self._get_client( - deployment=deployment, kwargs=kwargs, client_type="rpm_client" + deployment=deployment, + kwargs=kwargs, + client_type="max_parallel_requests", ) if rpm_semaphore is not None and isinstance( @@ -681,7 +690,9 @@ async def _aimage_generation(self, prompt: str, model: str, **kwargs): ### CONCURRENCY-SAFE RPM CHECKS ### rpm_semaphore = self._get_client( - deployment=deployment, kwargs=kwargs, client_type="rpm_client" + deployment=deployment, + kwargs=kwargs, + client_type="max_parallel_requests", ) if rpm_semaphore is not None and isinstance( @@ -803,7 +814,9 @@ async def _atranscription(self, file: BinaryIO, model: str, **kwargs): ### CONCURRENCY-SAFE RPM CHECKS ### rpm_semaphore = self._get_client( - deployment=deployment, kwargs=kwargs, client_type="rpm_client" + deployment=deployment, + kwargs=kwargs, + client_type="max_parallel_requests", ) if rpm_semaphore is not None and isinstance( @@ -1049,7 +1062,9 @@ async def _atext_completion(self, model: str, prompt: str, **kwargs): ) rpm_semaphore = self._get_client( - deployment=deployment, kwargs=kwargs, client_type="rpm_client" + deployment=deployment, + kwargs=kwargs, + client_type="max_parallel_requests", ) if rpm_semaphore is not None and isinstance( @@ -1243,7 +1258,9 @@ async def _aembedding(self, input: Union[str, List], model: str, **kwargs): ### CONCURRENCY-SAFE RPM CHECKS ### rpm_semaphore = self._get_client( - deployment=deployment, kwargs=kwargs, client_type="rpm_client" + deployment=deployment, + kwargs=kwargs, + client_type="max_parallel_requests", ) if rpm_semaphore is not None and isinstance( @@ -1862,17 +1879,23 @@ def set_client(self, model: dict): model_id = model["model_info"]["id"] # ### IF RPM SET - initialize a semaphore ### rpm = litellm_params.get("rpm", None) - if rpm: - semaphore = asyncio.Semaphore(rpm) - cache_key = f"{model_id}_rpm_client" + tpm = litellm_params.get("tpm", None) + max_parallel_requests = litellm_params.get("max_parallel_requests", None) + calculated_max_parallel_requests = calculate_max_parallel_requests( + rpm=rpm, + max_parallel_requests=max_parallel_requests, + tpm=tpm, + default_max_parallel_requests=self.default_max_parallel_requests, + ) + if calculated_max_parallel_requests: + semaphore = asyncio.Semaphore(calculated_max_parallel_requests) + cache_key = f"{model_id}_max_parallel_requests_client" self.cache.set_cache( key=cache_key, value=semaphore, local_only=True, ) - # print("STORES SEMAPHORE IN CACHE") - #### for OpenAI / Azure we need to initalize the Client for High Traffic ######## custom_llm_provider = litellm_params.get("custom_llm_provider") custom_llm_provider = custom_llm_provider or model_name.split("/", 1)[0] or "" @@ -2537,8 +2560,8 @@ def _get_client(self, deployment, kwargs, client_type=None): The appropriate client based on the given client_type and kwargs. """ model_id = deployment["model_info"]["id"] - if client_type == "rpm_client": - cache_key = "{}_rpm_client".format(model_id) + if client_type == "max_parallel_requests": + cache_key = "{}_max_parallel_requests".format(model_id) client = self.cache.get_cache(key=cache_key, local_only=True) return client elif client_type == "async": diff --git a/litellm/utils.py b/litellm/utils.py index e230675e68a6..566ef2099626 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5395,6 +5395,46 @@ def _map_and_modify_arg(supported_params: dict, provider: str, model: str): return optional_params +def calculate_max_parallel_requests( + max_parallel_requests: Optional[int], + rpm: Optional[int], + tpm: Optional[int], + default_max_parallel_requests: Optional[int], +) -> Optional[int]: + """ + Returns the max parallel requests to send to a deployment. + + Used in semaphore for async requests on router. + + Parameters: + - max_parallel_requests - Optional[int] - max_parallel_requests allowed for that deployment + - rpm - Optional[int] - requests per minute allowed for that deployment + - tpm - Optional[int] - tokens per minute allowed for that deployment + - default_max_parallel_requests - Optional[int] - default_max_parallel_requests allowed for any deployment + + Returns: + - int or None (if all params are None) + + Order: + max_parallel_requests > rpm > tpm / 6 (azure formula) > default max_parallel_requests + + Azure RPM formula: + 6 rpm per 1000 TPM + https://learn.microsoft.com/en-us/azure/ai-services/openai/quotas-limits + + + """ + if max_parallel_requests is not None: + return max_parallel_requests + elif rpm is not None: + return rpm + elif tpm is not None: + return int(tpm / 1000 / 6) + elif default_max_parallel_requests is not None: + return default_max_parallel_requests + return None + + def get_api_base(model: str, optional_params: dict) -> Optional[str]: """ Returns the api base used for calling the model. From 7ebf2ca4d9723b1825c23105d408b7305a73eb2d Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 11:09:34 -0700 Subject: [PATCH 16/43] (ci/cd) testing with team_id and /user/new --- litellm/proxy/proxy_server.py | 7 ++++ litellm/tests/test_key_generate_prisma.py | 48 ++++++++++++++++++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 14ff78200b61..2aab7e453210 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -6532,6 +6532,13 @@ async def team_member_add( existing_team_row = await prisma_client.get_data( # type: ignore team_id=data.team_id, table_name="team", query_type="find_unique" ) + if existing_team_row is None: + raise HTTPException( + status_code=404, + detail={ + "error": f"Team not found for team_id={getattr(data, 'team_id', None)}" + }, + ) new_member = data.member diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index fdb7649d520a..a90c13803fdc 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -120,6 +120,15 @@ async def test_new_user_response(prisma_client): await litellm.proxy.proxy_server.prisma_client.connect() from litellm.proxy.proxy_server import user_api_key_cache + await new_team( + NewTeamRequest( + team_id="ishaan-special-team", + ), + user_api_key_dict=UserAPIKeyAuth( + user_role="proxy_admin", api_key="sk-1234", user_id="1234" + ), + ) + _response = await new_user( data=NewUserRequest( models=["azure-gpt-3.5"], @@ -999,10 +1008,32 @@ def test_generate_and_update_key(prisma_client): async def test(): await litellm.proxy.proxy_server.prisma_client.connect() + + # create team "litellm-core-infra@gmail.com"" + print("creating team litellm-core-infra@gmail.com") + await new_team( + NewTeamRequest( + team_id="litellm-core-infra@gmail.com", + ), + user_api_key_dict=UserAPIKeyAuth( + user_role="proxy_admin", api_key="sk-1234", user_id="1234" + ), + ) + + await new_team( + NewTeamRequest( + team_id="ishaan-special-team", + ), + user_api_key_dict=UserAPIKeyAuth( + user_role="proxy_admin", api_key="sk-1234", user_id="1234" + ), + ) + request = NewUserRequest( - metadata={"team": "litellm-team3", "project": "litellm-project3"}, + metadata={"project": "litellm-project3"}, team_id="litellm-core-infra@gmail.com", ) + key = await new_user(request) print(key) @@ -1015,7 +1046,6 @@ async def test(): print("\n info for key=", result["info"]) assert result["info"]["max_parallel_requests"] == None assert result["info"]["metadata"] == { - "team": "litellm-team3", "project": "litellm-project3", } assert result["info"]["team_id"] == "litellm-core-infra@gmail.com" @@ -1037,7 +1067,7 @@ async def test(): # update the team id response2 = await update_key_fn( request=Request, - data=UpdateKeyRequest(key=generated_key, team_id="ishaan"), + data=UpdateKeyRequest(key=generated_key, team_id="ishaan-special-team"), ) print("response2=", response2) @@ -1048,11 +1078,10 @@ async def test(): print("\n info for key=", result["info"]) assert result["info"]["max_parallel_requests"] == None assert result["info"]["metadata"] == { - "team": "litellm-team3", "project": "litellm-project3", } assert result["info"]["models"] == ["ada", "babbage", "curie", "davinci"] - assert result["info"]["team_id"] == "ishaan" + assert result["info"]["team_id"] == "ishaan-special-team" # cleanup - delete key delete_key_request = KeyRequest(keys=[generated_key]) @@ -1941,6 +1970,15 @@ async def test_master_key_hashing(prisma_client): await litellm.proxy.proxy_server.prisma_client.connect() from litellm.proxy.proxy_server import user_api_key_cache + await new_team( + NewTeamRequest( + team_id="ishaan-special-team", + ), + user_api_key_dict=UserAPIKeyAuth( + user_role="proxy_admin", api_key="sk-1234", user_id="1234" + ), + ) + _response = await new_user( data=NewUserRequest( models=["azure-gpt-3.5"], From a34f725db7bc6f176dd826ba60ff8b553e0b0b37 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 11:43:43 -0700 Subject: [PATCH 17/43] fix - test keys --- tests/test_keys.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_keys.py b/tests/test_keys.py index 39787eb97f25..7a038bf356ee 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -14,6 +14,24 @@ import litellm +async def generate_team(session): + url = "http://0.0.0.0:4000/team/new" + headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"} + data = { + "team_id": "litellm-dashboard", + } + + async with session.post(url, headers=headers, json=data) as response: + status = response.status + response_text = await response.text() + + print(f"Response (Status code: {status}):") + print(response_text) + print() + _json_response = await response.json() + return _json_response + + async def generate_user( session, user_role="app_owner", @@ -680,6 +698,7 @@ async def test_key_delete(): key = key_gen["key"] # generate a admin UI key + generate_team(session=session) admin_ui_key = await generate_user(session=session, user_role="proxy_admin") print( "trying to delete key=", From fd282ea9325da9a1a4ef9e4b932b075a07d48e7c Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 11:48:41 -0700 Subject: [PATCH 18/43] fix testing fixes --- litellm/proxy/_types.py | 8 ++++++++ litellm/proxy/proxy_server.py | 14 +++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/_types.py b/litellm/proxy/_types.py index b697b6e976a5..ca9926cef0de 100644 --- a/litellm/proxy/_types.py +++ b/litellm/proxy/_types.py @@ -87,6 +87,14 @@ class LiteLLMRoutes(enum.Enum): "/v2/key/info", ] + sso_only_routes: List = [ + "/key/generate", + "/key/update", + "/key/delete", + "/global/spend/logs", + "/global/predict/spend/logs", + ] + management_routes: List = [ # key "/key/generate", "/key/update", diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 2aab7e453210..ebeea120d868 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1053,6 +1053,11 @@ async def user_api_key_auth( status_code=status.HTTP_403_FORBIDDEN, detail="key not allowed to access this team's info", ) + elif ( + _has_user_setup_sso() + and route in LiteLLMRoutes.sso_only_routes.value + ): + pass else: raise Exception( f"Only master key can be used to generate, delete, update info for new keys/users/teams. Route={route}" @@ -1102,6 +1107,13 @@ async def user_api_key_auth( return UserAPIKeyAuth( api_key=api_key, user_role="proxy_admin", **valid_token_dict ) + elif ( + _has_user_setup_sso() + and route in LiteLLMRoutes.sso_only_routes.value + ): + return UserAPIKeyAuth( + api_key=api_key, user_role="app_owner", **valid_token_dict + ) else: raise Exception( f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed" @@ -6545,7 +6557,7 @@ async def team_member_add( existing_team_row.members_with_roles.append(new_member) complete_team_data = LiteLLM_TeamTable( - **existing_team_row.model_dump(), + **_get_pydantic_json_dict(existing_team_row), ) team_row = await prisma_client.update_data( From 47e9d5f2ecf42d96140bb59a088898cc4c8e7f47 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:08:21 -0700 Subject: [PATCH 19/43] fix(router.py): fix init line for self.default_max_parallel_requests --- litellm/router.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index d7988aaba62c..7c557e020a60 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -204,6 +204,7 @@ def __init__( ) # use a dual cache (Redis+In-Memory) for tracking cooldowns, usage, etc. self.default_deployment = None # use this to track the users default deployment, when they want to use model = * + self.default_max_parallel_requests = default_max_parallel_requests if model_list: model_list = copy.deepcopy(model_list) @@ -219,7 +220,7 @@ def __init__( ) # cache to track failed call per deployment, if num failed calls within 1 minute > allowed fails, then add it to cooldown self.num_retries = num_retries or litellm.num_retries or 0 self.timeout = timeout or litellm.request_timeout - self.default_max_parallel_requests = default_max_parallel_requests + self.retry_after = retry_after self.routing_strategy = routing_strategy self.fallbacks = fallbacks or litellm.fallbacks From 1507b23e30a2175de7ffcd907b72da5c27e08dc4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:11:54 -0700 Subject: [PATCH 20/43] test(test_openai_endpoints.py): make test stricter --- proxy_server_config.yaml | 6 +++--- tests/test_openai_endpoints.py | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/proxy_server_config.yaml b/proxy_server_config.yaml index dcd5c68557d4..7c2d742672c6 100644 --- a/proxy_server_config.yaml +++ b/proxy_server_config.yaml @@ -96,9 +96,9 @@ litellm_settings: router_settings: routing_strategy: usage-based-routing-v2 - redis_host: os.environ/REDIS_HOST - redis_password: os.environ/REDIS_PASSWORD - redis_port: os.environ/REDIS_PORT + # redis_host: os.environ/REDIS_HOST + # redis_password: os.environ/REDIS_PASSWORD + # redis_port: os.environ/REDIS_PORT enable_pre_call_checks: true general_settings: diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py index 465817d832d2..c77eeba5b0cd 100644 --- a/tests/test_openai_endpoints.py +++ b/tests/test_openai_endpoints.py @@ -260,7 +260,10 @@ async def test_chat_completion_ratelimit(): await asyncio.gather(*tasks) pytest.fail("Expected at least 1 call to fail") except Exception as e: - pass + if "Request did not return a 200 status code: 429" in str(e): + pass + else: + pytest.fail(f"Wrong error received - {str(e)}") @pytest.mark.asyncio From 26579303e072482c1cf041dd8565c4df8cbb5974 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:15:04 -0700 Subject: [PATCH 21/43] fix(main.py): ignore max_parallel_requests as a litellm param --- litellm/main.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/litellm/main.py b/litellm/main.py index 65696b3c0ce9..87942f704070 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -609,6 +609,7 @@ def completion( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "input_cost_per_second", @@ -2560,6 +2561,7 @@ def embedding( client = kwargs.pop("client", None) rpm = kwargs.pop("rpm", None) tpm = kwargs.pop("tpm", None) + max_parallel_requests = kwargs.pop("max_parallel_requests", None) model_info = kwargs.get("model_info", None) metadata = kwargs.get("metadata", None) encoding_format = kwargs.get("encoding_format", None) @@ -2617,6 +2619,7 @@ def embedding( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "input_cost_per_second", @@ -3476,6 +3479,7 @@ def image_generation( "client", "rpm", "tpm", + "max_parallel_requests", "input_cost_per_token", "output_cost_per_token", "hf_model_name", From e56dc2817b84ca955dc88560770d0112e04a8729 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:29:38 -0700 Subject: [PATCH 22/43] test(test_router_max_parallel_requests.py): add unit tests for different scenarios --- .../test_router_max_parallel_requests.py | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 litellm/tests/test_router_max_parallel_requests.py diff --git a/litellm/tests/test_router_max_parallel_requests.py b/litellm/tests/test_router_max_parallel_requests.py new file mode 100644 index 000000000000..43c3694ff725 --- /dev/null +++ b/litellm/tests/test_router_max_parallel_requests.py @@ -0,0 +1,53 @@ +# What is this? +## Unit tests for the max_parallel_requests feature on Router +import sys, os, time, inspect, asyncio, traceback +from datetime import datetime +import pytest + +sys.path.insert(0, os.path.abspath("../..")) +import litellm +from litellm.utils import calculate_max_parallel_requests + +""" +- only rpm +- only tpm +- only max_parallel_requests +- max_parallel_requests + rpm +- max_parallel_requests + tpm +- max_parallel_requests + tpm + rpm +""" + + +max_parallel_requests_values = [None, 10] +tpm_values = [None, 20] +rpm_values = [None, 30] +default_max_parallel_requests = [None, 40] + + +@pytest.mark.parametrize( + "max_parallel_requests, tpm, rpm, default_max_parallel_requests", + [ + (mp, tp, rp, dmp) + for mp in max_parallel_requests_values + for tp in tpm_values + for rp in rpm_values + for dmp in default_max_parallel_requests + ], +) +def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests): + calculated_max_parallel_requests = calculate_max_parallel_requests( + max_parallel_requests=max_parallel_requests, + rpm=rpm, + tpm=tpm, + default_max_parallel_requests=default_max_parallel_requests, + ) + if max_parallel_requests is not None: + assert max_parallel_requests == calculated_max_parallel_requests + elif rpm is not None: + assert rpm == calculated_max_parallel_requests + elif tpm is not None: + assert int(tpm / 1000 / 6) == calculated_max_parallel_requests + elif default_max_parallel_requests is not None: + assert calculated_max_parallel_requests == default_max_parallel_requests + else: + assert calculated_max_parallel_requests is None From 7b24a74e772f20297c0762b4cfa4020929dd19bf Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 12:30:58 -0700 Subject: [PATCH 23/43] test fix - test_key_delete_ui --- tests/test_keys.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_keys.py b/tests/test_keys.py index 7a038bf356ee..f21c50c0dd8f 100644 --- a/tests/test_keys.py +++ b/tests/test_keys.py @@ -686,7 +686,7 @@ async def test_key_rate_limit(): @pytest.mark.asyncio -async def test_key_delete(): +async def test_key_delete_ui(): """ Admin UI flow - DO NOT DELETE -> Create a key with user_id = "ishaan" @@ -698,7 +698,8 @@ async def test_key_delete(): key = key_gen["key"] # generate a admin UI key - generate_team(session=session) + team = await generate_team(session=session) + print("generated team: ", team) admin_ui_key = await generate_user(session=session, user_role="proxy_admin") print( "trying to delete key=", From 7aa737cf107505a1b347529484641bb6328f4a4b Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:34:09 -0700 Subject: [PATCH 24/43] fix(router.py): add if router caching setup on info logs --- litellm/router.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/router.py b/litellm/router.py index 7c557e020a60..a80dcf5ad45e 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -306,7 +306,7 @@ def __init__( else: litellm.failure_callback = [self.deployment_callback_on_failure] verbose_router_logger.info( - f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}" + f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter caching: {self.cache}" ) self.routing_strategy_args = routing_strategy_args From 0f69f0b44e0a9e117e3defe4b52de991ffd3d372 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 12:56:54 -0700 Subject: [PATCH 25/43] test(test_router_max_parallel_requests.py): more extensive testing for setting max parallel requests --- litellm/router.py | 2 +- .../test_router_max_parallel_requests.py | 66 ++++++++++++++++++- litellm/utils.py | 5 +- 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index a80dcf5ad45e..d60767f3fa3d 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2562,7 +2562,7 @@ def _get_client(self, deployment, kwargs, client_type=None): """ model_id = deployment["model_info"]["id"] if client_type == "max_parallel_requests": - cache_key = "{}_max_parallel_requests".format(model_id) + cache_key = "{}_max_parallel_requests_client".format(model_id) client = self.cache.get_cache(key=cache_key, local_only=True) return client elif client_type == "async": diff --git a/litellm/tests/test_router_max_parallel_requests.py b/litellm/tests/test_router_max_parallel_requests.py index 43c3694ff725..f9cac6aafbf0 100644 --- a/litellm/tests/test_router_max_parallel_requests.py +++ b/litellm/tests/test_router_max_parallel_requests.py @@ -7,6 +7,7 @@ sys.path.insert(0, os.path.abspath("../..")) import litellm from litellm.utils import calculate_max_parallel_requests +from typing import Optional """ - only rpm @@ -19,7 +20,7 @@ max_parallel_requests_values = [None, 10] -tpm_values = [None, 20] +tpm_values = [None, 20, 300000] rpm_values = [None, 30] default_max_parallel_requests = [None, 40] @@ -46,8 +47,69 @@ def test_scenario(max_parallel_requests, tpm, rpm, default_max_parallel_requests elif rpm is not None: assert rpm == calculated_max_parallel_requests elif tpm is not None: - assert int(tpm / 1000 / 6) == calculated_max_parallel_requests + calculated_rpm = int(tpm / 1000 / 6) + if calculated_rpm == 0: + calculated_rpm = 1 + print( + f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={calculated_max_parallel_requests}" + ) + assert calculated_rpm == calculated_max_parallel_requests elif default_max_parallel_requests is not None: assert calculated_max_parallel_requests == default_max_parallel_requests else: assert calculated_max_parallel_requests is None + + +@pytest.mark.parametrize( + "max_parallel_requests, tpm, rpm, default_max_parallel_requests", + [ + (mp, tp, rp, dmp) + for mp in max_parallel_requests_values + for tp in tpm_values + for rp in rpm_values + for dmp in default_max_parallel_requests + ], +) +def test_setting_mpr_limits_per_model( + max_parallel_requests, tpm, rpm, default_max_parallel_requests +): + deployment = { + "model_name": "gpt-3.5-turbo", + "litellm_params": { + "model": "gpt-3.5-turbo", + "max_parallel_requests": max_parallel_requests, + "tpm": tpm, + "rpm": rpm, + }, + "model_info": {"id": "my-unique-id"}, + } + + router = litellm.Router( + model_list=[deployment], + default_max_parallel_requests=default_max_parallel_requests, + ) + + mpr_client: Optional[asyncio.Semaphore] = router._get_client( + deployment=deployment, + kwargs={}, + client_type="max_parallel_requests", + ) + + if max_parallel_requests is not None: + assert max_parallel_requests == mpr_client._value + elif rpm is not None: + assert rpm == mpr_client._value + elif tpm is not None: + calculated_rpm = int(tpm / 1000 / 6) + if calculated_rpm == 0: + calculated_rpm = 1 + print( + f"test calculated_rpm: {calculated_rpm}, calculated_max_parallel_requests={mpr_client._value}" + ) + assert calculated_rpm == mpr_client._value + elif default_max_parallel_requests is not None: + assert mpr_client._value == default_max_parallel_requests + else: + assert mpr_client is None + + # raise Exception("it worked!") diff --git a/litellm/utils.py b/litellm/utils.py index 566ef2099626..0b4fb466074b 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5429,7 +5429,10 @@ def calculate_max_parallel_requests( elif rpm is not None: return rpm elif tpm is not None: - return int(tpm / 1000 / 6) + calculated_rpm = int(tpm / 1000 / 6) + if calculated_rpm == 0: + calculated_rpm = 1 + return calculated_rpm elif default_max_parallel_requests is not None: return default_max_parallel_requests return None From c96ca1f85e79fe13be520a30c2bcd73f01f5e998 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 13:12:12 -0700 Subject: [PATCH 26/43] fix(router.py): improve debug logsd --- litellm/router.py | 2 +- litellm/tests/test_router_debug_logs.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/litellm/router.py b/litellm/router.py index d60767f3fa3d..9dcff6f3d6a8 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -306,7 +306,7 @@ def __init__( else: litellm.failure_callback = [self.deployment_callback_on_failure] verbose_router_logger.info( - f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter caching: {self.cache}" + f"Intialized router with Routing strategy: {self.routing_strategy}\n\nRouting fallbacks: {self.fallbacks}\n\nRouting context window fallbacks: {self.context_window_fallbacks}\n\nRouter Redis Caching={self.cache.redis_cache}" ) self.routing_strategy_args = routing_strategy_args diff --git a/litellm/tests/test_router_debug_logs.py b/litellm/tests/test_router_debug_logs.py index a768864aebd9..0bc711b1577a 100644 --- a/litellm/tests/test_router_debug_logs.py +++ b/litellm/tests/test_router_debug_logs.py @@ -81,7 +81,7 @@ async def _make_request(): # Define the expected log messages # - error request, falling back notice, success notice expected_logs = [ - "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None", + "Intialized router with Routing strategy: simple-shuffle\n\nRouting fallbacks: [{'gpt-3.5-turbo': ['azure/gpt-3.5-turbo']}]\n\nRouting context window fallbacks: None\n\nRouter Redis Caching=None", "litellm.acompletion(model=gpt-3.5-turbo)\x1b[31m Exception OpenAIException - Error code: 401 - {'error': {'message': 'Incorrect API key provided: bad-key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}\x1b[0m", "Falling back to model_group = azure/gpt-3.5-turbo", "litellm.acompletion(model=azure/chatgpt-v-2)\x1b[32m 200 OK\x1b[0m", From ddc71d766a58207c401b3e4611d180bc39e6184f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 13:16:47 -0700 Subject: [PATCH 27/43] fix - slack alerting show input in the api_base --- litellm/proxy/utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py index 02e8a4166869..18f1b837f469 100644 --- a/litellm/proxy/utils.py +++ b/litellm/proxy/utils.py @@ -238,7 +238,10 @@ def _response_taking_too_long_callback( litellm_params = kwargs.get("litellm_params", {}) model = kwargs.get("model", "") api_base = litellm.get_api_base(model=model, optional_params=litellm_params) - messages = kwargs.get("messages", "") + messages = kwargs.get("messages", None) + # if messages does not exist fallback to "input" + if messages is None: + messages = kwargs.get("input", None) # only use first 100 chars for alerting _messages = str(messages)[:100] @@ -282,7 +285,10 @@ async def response_taking_too_long( ): if request_data is not None: model = request_data.get("model", "") - messages = request_data.get("messages", "") + messages = request_data.get("messages", None) + if messages is None: + # if messages does not exist fallback to "input" + messages = request_data.get("input", None) trace_id = request_data.get("metadata", {}).get( "trace_id", None ) # get langfuse trace id From a909af3fc0697f12421adbfc266a60327652907f Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 14:50:34 -0700 Subject: [PATCH 28/43] (ci/cd) fix test_master_key_hashing --- litellm/tests/test_key_generate_prisma.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index a90c13803fdc..08618c98891a 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -1972,7 +1972,7 @@ async def test_master_key_hashing(prisma_client): await new_team( NewTeamRequest( - team_id="ishaan-special-team", + team_id="ishaans-special-team", ), user_api_key_dict=UserAPIKeyAuth( user_role="proxy_admin", api_key="sk-1234", user_id="1234" From b96741e4f49834833616d4d1ee20e799c8bc8ab4 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 15:01:12 -0700 Subject: [PATCH 29/43] fix(router.py): async simple-shuffle support --- litellm/proxy/_new_secret_config.yaml | 2 +- litellm/router.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index a8101181ccf3..53c59ff8a7e8 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -38,7 +38,7 @@ model_list: # max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET router_settings: - routing_strategy: usage-based-routing-v2 + # routing_strategy: usage-based-routing-v2 # redis_url: "os.environ/REDIS_URL" redis_host: os.environ/REDIS_HOST redis_port: os.environ/REDIS_PORT diff --git a/litellm/router.py b/litellm/router.py index 9dcff6f3d6a8..fda53eb4fa0a 100644 --- a/litellm/router.py +++ b/litellm/router.py @@ -2802,6 +2802,7 @@ async def async_get_available_deployment( """ if ( self.routing_strategy != "usage-based-routing-v2" + and self.routing_strategy != "simple-shuffle" ): # prevent regressions for other routing strategies, that don't have async get available deployments implemented. return self.get_available_deployment( model=model, @@ -2852,6 +2853,25 @@ async def async_get_available_deployment( messages=messages, input=input, ) + elif self.routing_strategy == "simple-shuffle": + # if users pass rpm or tpm, we do a random weighted pick - based on rpm/tpm + ############## Check if we can do a RPM/TPM based weighted pick ################# + rpm = healthy_deployments[0].get("litellm_params").get("rpm", None) + if rpm is not None: + # use weight-random pick if rpms provided + rpms = [m["litellm_params"].get("rpm", 0) for m in healthy_deployments] + verbose_router_logger.debug(f"\nrpms {rpms}") + total_rpm = sum(rpms) + weights = [rpm / total_rpm for rpm in rpms] + verbose_router_logger.debug(f"\n weights {weights}") + # Perform weighted random pick + selected_index = random.choices(range(len(rpms)), weights=weights)[0] + verbose_router_logger.debug(f"\n selected index, {selected_index}") + deployment = healthy_deployments[selected_index] + verbose_router_logger.info( + f"get_available_deployment for model: {model}, Selected deployment: {self.print_deployment(deployment) or deployment[0]} for model: {model}" + ) + return deployment or deployment[0] if deployment is None: verbose_router_logger.info( From c5d880b6fd0d289779bd6e6072e23ab0fd7ac3c6 Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 15:02:22 -0700 Subject: [PATCH 30/43] docs(routing.md): add simple shuffle async support to docs --- docs/my-website/docs/routing.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/my-website/docs/routing.md b/docs/my-website/docs/routing.md index c10d8049909c..5d9b38cc1f01 100644 --- a/docs/my-website/docs/routing.md +++ b/docs/my-website/docs/routing.md @@ -279,7 +279,7 @@ router_settings: ``` - + **Default** Picks a deployment based on the provided **Requests per minute (rpm) or Tokens per minute (tpm)** From 31e600c4fe18a5aac4dae0ddae4bb35c4e0f9941 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 15:50:01 -0700 Subject: [PATCH 31/43] (ui) - simplify user flow --- .../src/components/create_key_button.tsx | 62 +++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx index d7fb9c5eb1b4..6edc90dfc5c2 100644 --- a/ui/litellm-dashboard/src/components/create_key_button.tsx +++ b/ui/litellm-dashboard/src/components/create_key_button.tsx @@ -116,7 +116,7 @@ const CreateKey: React.FC = ({ wrapperCol={{ span: 16 }} labelAlign="left" > - {userRole === "App Owner" || userRole === "Admin" || userRole === "App User" ? ( + {userRole === "App Owner" || userRole === "Admin" ? ( <> = ({ ) : ( <> - + - - + - - + + + )}
From a71d4fc17244b92ef0ffd3e51df5ddae1b57ea84 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 15:55:18 -0700 Subject: [PATCH 32/43] (ui) hide default team for non admins --- .../src/components/dashboard_default_team.tsx | 11 +++++++++-- .../src/components/user_dashboard.tsx | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx index b3976912b2df..0abfd1a562fb 100644 --- a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx +++ b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx @@ -4,6 +4,7 @@ import { Select, SelectItem, Text, Title } from "@tremor/react"; interface DashboardTeamProps { teams: Object[] | null; setSelectedTeam: React.Dispatch>; + userRole: string | null; } type TeamInterface = { @@ -15,6 +16,7 @@ type TeamInterface = { const DashboardTeam: React.FC = ({ teams, setSelectedTeam, + userRole, }) => { const defaultTeam: TeamInterface = { models: [], @@ -25,8 +27,13 @@ const DashboardTeam: React.FC = ({ const [value, setValue] = useState(defaultTeam); - const updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam]; - + let updatedTeams; + if (userRole === "App User") { + // Non-Admin SSO users should only see their own team - they should not see "Default Team" + updatedTeams = teams; + } else { + updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam]; + } return (
diff --git a/ui/litellm-dashboard/src/components/user_dashboard.tsx b/ui/litellm-dashboard/src/components/user_dashboard.tsx index 3f6a988b3fbc..c06b72883d3e 100644 --- a/ui/litellm-dashboard/src/components/user_dashboard.tsx +++ b/ui/litellm-dashboard/src/components/user_dashboard.tsx @@ -257,7 +257,7 @@ const UserDashboard: React.FC = ({ data={keys} setData={setKeys} /> - +
From 9379e3d0472860045f00057c9137185d1147527c Mon Sep 17 00:00:00 2001 From: Krrish Dholakia Date: Sat, 20 Apr 2024 16:13:11 -0700 Subject: [PATCH 33/43] fix(lowest_tpm_rpm_v2.py): use a combined tpm+rpm query in async get cache, to reduce redis client calls in high traffic --- litellm/integrations/prometheus.py | 2 +- litellm/integrations/prometheus_services.py | 53 +++++++++++++++----- litellm/proxy/_new_secret_config.yaml | 15 ++---- litellm/router_strategy/lowest_tpm_rpm_v2.py | 12 +++-- 4 files changed, 54 insertions(+), 28 deletions(-) diff --git a/litellm/integrations/prometheus.py b/litellm/integrations/prometheus.py index 74632d49a06d..30a1188fe9ce 100644 --- a/litellm/integrations/prometheus.py +++ b/litellm/integrations/prometheus.py @@ -19,7 +19,7 @@ def __init__( **kwargs, ): try: - verbose_logger.debug(f"in init prometheus metrics") + print(f"in init prometheus metrics") from prometheus_client import Counter self.litellm_llm_api_failed_requests_metric = Counter( diff --git a/litellm/integrations/prometheus_services.py b/litellm/integrations/prometheus_services.py index 548d0a2a3af6..45f70a8c1ad3 100644 --- a/litellm/integrations/prometheus_services.py +++ b/litellm/integrations/prometheus_services.py @@ -44,9 +44,18 @@ def __init__( ) # store the prometheus histogram/counter we need to call for each field in payload for service in self.services: - histogram = self.create_histogram(service) - counter = self.create_counter(service) - self.payload_to_prometheus_map[service] = [histogram, counter] + histogram = self.create_histogram(service, type_of_request="latency") + counter_failed_request = self.create_counter( + service, type_of_request="failed_requests" + ) + counter_total_requests = self.create_counter( + service, type_of_request="total_requests" + ) + self.payload_to_prometheus_map[service] = [ + histogram, + counter_failed_request, + counter_total_requests, + ] self.prometheus_to_amount_map: dict = ( {} @@ -74,26 +83,26 @@ def get_metric(self, metric_name): return metric return None - def create_histogram(self, label: str): - metric_name = "litellm_{}_latency".format(label) + def create_histogram(self, service: str, type_of_request: str): + metric_name = "litellm_{}_{}".format(service, type_of_request) is_registered = self.is_metric_registered(metric_name) if is_registered: return self.get_metric(metric_name) return self.Histogram( metric_name, - "Latency for {} service".format(label), - labelnames=[label], + "Latency for {} service".format(service), + labelnames=[service], ) - def create_counter(self, label: str): - metric_name = "litellm_{}_failed_requests".format(label) + def create_counter(self, service: str, type_of_request: str): + metric_name = "litellm_{}_{}".format(service, type_of_request) is_registered = self.is_metric_registered(metric_name) if is_registered: return self.get_metric(metric_name) return self.Counter( metric_name, - "Total failed requests for {} service".format(label), - labelnames=[label], + "Total {} for {} service".format(type_of_request, service), + labelnames=[service], ) def observe_histogram( @@ -120,6 +129,8 @@ def service_success_hook(self, payload: ServiceLoggerPayload): if self.mock_testing: self.mock_testing_success_calls += 1 + print(f"payload call type: {payload.call_type}") + if payload.service.value in self.payload_to_prometheus_map: prom_objects = self.payload_to_prometheus_map[payload.service.value] for obj in prom_objects: @@ -129,11 +140,19 @@ def service_success_hook(self, payload: ServiceLoggerPayload): labels=payload.service.value, amount=payload.duration, ) + elif isinstance(obj, self.Counter) and "total_requests" in obj._name: + self.increment_counter( + counter=obj, + labels=payload.service.value, + amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS + ) def service_failure_hook(self, payload: ServiceLoggerPayload): if self.mock_testing: self.mock_testing_failure_calls += 1 + print(f"payload call type: {payload.call_type}") + if payload.service.value in self.payload_to_prometheus_map: prom_objects = self.payload_to_prometheus_map[payload.service.value] for obj in prom_objects: @@ -141,7 +160,7 @@ def service_failure_hook(self, payload: ServiceLoggerPayload): self.increment_counter( counter=obj, labels=payload.service.value, - amount=1, # LOG ERROR COUNT TO PROMETHEUS + amount=1, # LOG ERROR COUNT / TOTAL REQUESTS TO PROMETHEUS ) async def async_service_success_hook(self, payload: ServiceLoggerPayload): @@ -151,6 +170,8 @@ async def async_service_success_hook(self, payload: ServiceLoggerPayload): if self.mock_testing: self.mock_testing_success_calls += 1 + print(f"payload call type: {payload.call_type}") + if payload.service.value in self.payload_to_prometheus_map: prom_objects = self.payload_to_prometheus_map[payload.service.value] for obj in prom_objects: @@ -160,12 +181,20 @@ async def async_service_success_hook(self, payload: ServiceLoggerPayload): labels=payload.service.value, amount=payload.duration, ) + elif isinstance(obj, self.Counter) and "total_requests" in obj._name: + self.increment_counter( + counter=obj, + labels=payload.service.value, + amount=1, # LOG TOTAL REQUESTS TO PROMETHEUS + ) async def async_service_failure_hook(self, payload: ServiceLoggerPayload): print(f"received error payload: {payload.error}") if self.mock_testing: self.mock_testing_failure_calls += 1 + print(f"payload call type: {payload.call_type}") + if payload.service.value in self.payload_to_prometheus_map: prom_objects = self.payload_to_prometheus_map[payload.service.value] for obj in prom_objects: diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index 53c59ff8a7e8..d717dc15958a 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -4,14 +4,12 @@ model_list: model: openai/my-fake-model api_key: my-fake-key api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - # api_base: http://0.0.0.0:8080 stream_timeout: 0.001 - model_name: fake-openai-endpoint litellm_params: model: openai/my-fake-model-2 api_key: my-fake-key api_base: https://openai-function-calling-workers.tasslexyz.workers.dev/ - # api_base: http://0.0.0.0:8080 stream_timeout: 0.001 - litellm_params: model: azure/chatgpt-v-2 @@ -30,15 +28,8 @@ model_list: # api_key: my-fake-key # api_base: https://exampleopenaiendpoint-production.up.railway.app/ -# litellm_settings: -# success_callback: ["prometheus"] -# failure_callback: ["prometheus"] -# service_callback: ["prometheus_system"] -# upperbound_key_generate_params: -# max_budget: os.environ/LITELLM_UPPERBOUND_KEYS_MAX_BUDGET - router_settings: - # routing_strategy: usage-based-routing-v2 + routing_strategy: usage-based-routing-v2 # redis_url: "os.environ/REDIS_URL" redis_host: os.environ/REDIS_HOST redis_port: os.environ/REDIS_PORT @@ -48,6 +39,10 @@ router_settings: litellm_settings: num_retries: 3 # retry call 3 times on each model_name allowed_fails: 3 # cooldown model if it fails > 1 call in a minute. + success_callback: ["prometheus"] + failure_callback: ["prometheus"] + service_callback: ["prometheus_system"] + general_settings: alerting: ["slack"] diff --git a/litellm/router_strategy/lowest_tpm_rpm_v2.py b/litellm/router_strategy/lowest_tpm_rpm_v2.py index b2b6df42bfea..39dbcd9d0596 100644 --- a/litellm/router_strategy/lowest_tpm_rpm_v2.py +++ b/litellm/router_strategy/lowest_tpm_rpm_v2.py @@ -407,13 +407,15 @@ async def async_get_available_deployments( tpm_keys.append(tpm_key) rpm_keys.append(rpm_key) - tpm_values = await self.router_cache.async_batch_get_cache( - keys=tpm_keys - ) # [1, 2, None, ..] - rpm_values = await self.router_cache.async_batch_get_cache( - keys=rpm_keys + combined_tpm_rpm_keys = tpm_keys + rpm_keys + + combined_tpm_rpm_values = await self.router_cache.async_batch_get_cache( + keys=combined_tpm_rpm_keys ) # [1, 2, None, ..] + tpm_values = combined_tpm_rpm_values[: len(tpm_keys)] + rpm_values = combined_tpm_rpm_values[len(tpm_keys) :] + return self._common_checks_available_deployment( model_group=model_group, healthy_deployments=healthy_deployments, From aad827e40f292332ec7a615756cc8112d55c5a8e Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 16:25:44 -0700 Subject: [PATCH 34/43] ui - see extra optional params in accordion --- .../src/components/create_key_button.tsx | 79 ++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/ui/litellm-dashboard/src/components/create_key_button.tsx b/ui/litellm-dashboard/src/components/create_key_button.tsx index 6edc90dfc5c2..d8716d304e95 100644 --- a/ui/litellm-dashboard/src/components/create_key_button.tsx +++ b/ui/litellm-dashboard/src/components/create_key_button.tsx @@ -2,7 +2,7 @@ import React, { useState, useEffect, useRef } from "react"; import { Button, TextInput, Grid, Col } from "@tremor/react"; -import { Card, Metric, Text, Title, Subtitle } from "@tremor/react"; +import { Card, Metric, Text, Title, Subtitle, Accordion, AccordionHeader, AccordionBody, } from "@tremor/react"; import { CopyToClipboard } from 'react-copy-to-clipboard'; import { Button as Button2, @@ -308,6 +308,83 @@ const CreateKey: React.FC = ({ + + + Optional Settings + + + { + if (value && team && team.max_budget !== null && value > team.max_budget) { + throw new Error(`Budget cannot exceed team max budget: $${team.max_budget}`); + } + }, + }, + ]} + > + + + + + + { + if (value && team && team.tpm_limit !== null && value > team.tpm_limit) { + throw new Error(`TPM limit cannot exceed team TPM limit: ${team.tpm_limit}`); + } + }, + }, + ]} + > + + + { + if (value && team && team.rpm_limit !== null && value > team.rpm_limit) { + throw new Error(`RPM limit cannot exceed team RPM limit: ${team.rpm_limit}`); + } + }, + }, + ]} + > + + + + + + + + + + + + )}
From 41d3a17f1d6e435fb571f7463289d9f509c6b30a Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 16:30:17 -0700 Subject: [PATCH 35/43] ui - non admin flow --- .../src/components/dashboard_default_team.tsx | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx index 0abfd1a562fb..c845ef1508a6 100644 --- a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx +++ b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx @@ -38,13 +38,16 @@ const DashboardTeam: React.FC = ({ return (
Select Team - - If you belong to multiple teams, this setting controls which team is - used by default when creating new API Keys. - - - Default Team: If no team_id is set for a key, it will be grouped under here. - + {userRole !== "App User" && ( + <> + + If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys. + + + Default Team: If no team_id is set for a key, it will be grouped under here. + + + )} {updatedTeams && updatedTeams.length > 0 ? ( - + Optional Settings From 2165b2447984683672fa4e99f4381e5c48614b73 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 20 Apr 2024 18:19:08 -0700 Subject: [PATCH 37/43] fix - show team models / team info in admin ui --- .../src/components/dashboard_default_team.tsx | 19 ++-- .../src/components/user_dashboard.tsx | 7 ++ .../src/components/view_user_team.tsx | 95 +++++++++++++++++++ 3 files changed, 111 insertions(+), 10 deletions(-) create mode 100644 ui/litellm-dashboard/src/components/view_user_team.tsx diff --git a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx index c845ef1508a6..98f287ed4c97 100644 --- a/ui/litellm-dashboard/src/components/dashboard_default_team.tsx +++ b/ui/litellm-dashboard/src/components/dashboard_default_team.tsx @@ -34,20 +34,19 @@ const DashboardTeam: React.FC = ({ } else { updatedTeams = teams ? [...teams, defaultTeam] : [defaultTeam]; } + if (userRole === 'App User') return null; return (
Select Team - {userRole !== "App User" && ( - <> - - If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys. - - - Default Team: If no team_id is set for a key, it will be grouped under here. - - - )} + + + If you belong to multiple teams, this setting controls which team is used by default when creating new API Keys. + + + Default Team: If no team_id is set for a key, it will be grouped under here. + + {updatedTeams && updatedTeams.length > 0 ? (