特定提供者参数
提供者可能会提供OpenAI不支持的参数(例如top_k)。LiteLLM将任何非OpenAI的参数视为特定提供者参数,并在请求体中以关键字参数的形式传递给提供者。查看保留参数
你可以通过两种方式传递这些参数:
- 通过completion():我们将直接把非OpenAI参数作为请求体的一部分传递给提供者。
- 例如:
completion(model="claude-instant-1", top_k=3)
- 例如:
- 通过特定提供者的配置变量(例如
litellm.OpenAIConfig())。
SDK 用法
- OpenAI
- OpenAI 文本完成
- Azure OpenAI
- Anthropic
- Huggingface
- TogetherAI
- Ollama
- Replicate
- Petals
- Palm
- AI21
- Cohere
import litellm, os
# 设置环境变量
os.environ["OPENAI_API_KEY"] = "your-openai-key"
## 设置最大令牌数 - 通过 completion()
response_1 = litellm.completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 设置最大令牌数 - 通过配置
litellm.OpenAIConfig(max_tokens=10)
response_2 = litellm.completion(
model="gpt-3.5-turbo",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["OPENAI_API_KEY"] = "your-openai-key"
## 设置最大令牌数 - 通过 completion()
response_1 = litellm.completion(
model="text-davinci-003",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 设置最大令牌数 - 通过配置
litellm.OpenAITextCompletionConfig(max_tokens=10)
response_2 = litellm.completion(
model="text-davinci-003",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["AZURE_API_BASE"] = "your-azure-api-base"
os.environ["AZURE_API_TYPE"] = "azure" # [可选]
os.environ["AZURE_API_VERSION"] = "2023-07-01-preview" # [可选]
## 设置最大令牌数 - 通过 completion()
response_1 = litellm.completion(
model="azure/chatgpt-v-2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 设置最大令牌数 - 通过配置
litellm.AzureOpenAIConfig(max_tokens=10)
response_2 = litellm.completion(
model="azure/chatgpt-v-2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["ANTHROPIC_API_KEY"] = "your-anthropic-key"
## 设置最大令牌数 - 通过 completion()
response_1 = litellm.completion(
model="claude-instant-1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 设置最大令牌数 - 通过配置
litellm.AnthropicConfig(max_tokens_to_sample=200)
response_2 = litellm.completion(
model="claude-instant-1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["HUGGINGFACE_API_KEY"] = "your-huggingface-key" #[可选]
## 通过completion()设置最大令牌数
response_1 = litellm.completion(
model="huggingface/mistralai/Mistral-7B-Instruct-v0.1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://your-huggingface-api-endpoint",
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.HuggingfaceConfig(max_new_tokens=200)
response_2 = litellm.completion(
model="huggingface/mistralai/Mistral-7B-Instruct-v0.1",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://your-huggingface-api-endpoint"
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["TOGETHERAI_API_KEY"] = "your-togetherai-key"
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="together_ai/togethercomputer/llama-2-70b-chat",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.TogetherAIConfig(max_tokens_to_sample=200)
response_2 = litellm.completion(
model="together_ai/togethercomputer/llama-2-70b-chat",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="ollama/llama2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.OllamConfig(num_predict=200)
response_2 = litellm.completion(
model="ollama/llama2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["REPLICATE_API_KEY"] = "your-replicate-key"
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.ReplicateConfig(max_new_tokens=200)
response_2 = litellm.completion(
model="replicate/meta/llama-2-70b-chat:02e509c789964a7ea8736978a43525956ef40397be9033abf9fd2badfe68c9e3",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="petals/petals-team/StableBeluga2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://chat.petals.dev/api/v1/generate",
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.PetalsConfig(max_new_tokens=10)
response_2 = litellm.completion(
model="petals/petals-team/StableBeluga2",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://chat.petals.dev/api/v1/generate",
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["PALM_API_KEY"] = "your-palm-key"
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="palm/chat-bison",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.PalmConfig(maxOutputTokens=10)
response_2 = litellm.completion(
model="palm/chat-bison",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["AI21_API_KEY"] = "your-ai21-key"
## 通过 completion() 设置最大令牌数
response_1 = litellm.completion(
model="j2-mid",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 通过配置设置最大令牌数
litellm.AI21Config(maxOutputTokens=10)
response_2 = litellm.completion(
model="j2-mid",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
import litellm, os
# 设置环境变量
os.environ["COHERE_API_KEY"] = "your-cohere-key"
## 设置最大令牌数 - 通过 completion()
response_1 = litellm.completion(
model="command-nightly",
messages=[{ "content": "Hello, how are you?","role": "user"}],
max_tokens=10
)
response_1_text = response_1.choices[0].message.content
## 设置最大令牌数 - 通过配置
litellm.CohereConfig(max_tokens=200)
response_2 = litellm.completion(
model="command-nightly",
messages=[{ "content": "Hello, how are you?","role": "user"}],
)
response_2_text = response_2.choices[0].message.content
## 测试输出
assert len(response_2_text) > len(response_1_text)
代理使用
通过配置
model_list:
- model_name: llama-3-8b-instruct
litellm_params:
model: predibase/llama-3-8b-instruct
api_key: os.environ/PREDIBASE_API_KEY
tenant_id: os.environ/PREDIBASE_TENANT_ID
max_tokens: 256
adapter_base: <my-special_base> # 👈 提供者特定参数
通过请求
curl -X POST 'http://0.0.0.0:4000/chat/completions' \
-H 'Content-Type: application/json' \
-H 'Authorization: Bearer sk-1234' \
-D '{
"model": "llama-3-8b-instruct",
"messages": [
{
"role": "user",
"content": "What'\''s the weather like in Boston today?"
}
],
"adapater_id": "my-special-adapter-id" # 👈 提供者特定参数
}'