100+ models across 9 providers. Route to OpenAI, Anthropic, Google, xAI, DeepSeek, Bedrock, Vertex AI, Azure, and Ollama with intelligent routing and cost control.
Chat with models, explore API capabilities, and get code samples
Estimate the cost of a request before sending it. Get input/output token estimates and cost breakdown by model.
See how the gateway routes requests based on Cedar policies. Different prompts trigger different routing rules.
Test the semantic similarity cache. Similar prompts return cached responses instantly, saving cost and latency.
View available MCP servers and their tools. Agents can access databases, file systems, and external APIs through a unified interface.
Query database, list tables, execute SQL
Read files, list directories, search
Create issues, PRs, manage repos
Fetch URLs, scrape content, API calls
from openai import OpenAI
client = OpenAI(
api_key="sk-litellm-master-key-dev",
base_url="http://localhost:4000/v1"
)
response = client.chat.completions.create(
model="gpt-5-mini",
messages=[
{"role": "user", "content": "Hello, world!"}
],
stream=True
)
for chunk in response:
if chunk.choices[0].delta.content:
print(chunk.choices[0].delta.content, end="")
import requests
response = requests.post(
"http://localhost:4000/v1/chat/completions",
headers={
"Authorization": "Bearer sk-litellm-master-key-dev",
"Content-Type": "application/json"
},
json={
"model": "gpt-5-mini",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100
}
)
print(response.json()["choices"][0]["message"]["content"])
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: 'sk-litellm-master-key-dev',
baseURL: 'http://localhost:4000/v1'
});
const stream = await client.chat.completions.create({
model: 'gpt-5-mini',
messages: [{ role: 'user', content: 'Hello, world!' }],
stream: true
});
for await (const chunk of stream) {
process.stdout.write(chunk.choices[0]?.delta?.content || '');
}
const response = await fetch('http://localhost:4000/v1/chat/completions', {
method: 'POST',
headers: {
'Authorization': 'Bearer sk-litellm-master-key-dev',
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: 'gpt-5-mini',
messages: [{ role: 'user', content: 'Hello!' }],
stream: true
})
});
const reader = response.body.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
console.log(decoder.decode(value));
}
package main
import (
"bytes"
"encoding/json"
"fmt"
"net/http"
)
func main() {
payload := map[string]interface{}{
"model": "gpt-5-mini",
"messages": []map[string]string{
{"role": "user", "content": "Hello!"},
},
"max_tokens": 100,
}
body, _ := json.Marshal(payload)
req, _ := http.NewRequest("POST",
"http://localhost:4000/v1/chat/completions",
bytes.NewBuffer(body))
req.Header.Set("Authorization", "Bearer sk-litellm-master-key-dev")
req.Header.Set("Content-Type", "application/json")
client := &http.Client{}
resp, err := client.Do(req)
if err != nil {
panic(err)
}
defer resp.Body.Close()
var result map[string]interface{}
json.NewDecoder(resp.Body).Decode(&result)
fmt.Println(result)
}
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
public class GatewayExample {
public static void main(String[] args) throws Exception {
String json = """
{
"model": "gpt-5-mini",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100
}
""";
HttpClient client = HttpClient.newHttpClient();
HttpRequest request = HttpRequest.newBuilder()
.uri(URI.create("http://localhost:4000/v1/chat/completions"))
.header("Authorization", "Bearer sk-litellm-master-key-dev")
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(json))
.build();
HttpResponse<String> response = client.send(request,
HttpResponse.BodyHandlers.ofString());
System.out.println(response.body());
}
}
curl http://localhost:4000/v1/chat/completions \
-H "Authorization: Bearer sk-litellm-master-key-dev" \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-5-mini",
"messages": [{"role": "user", "content": "Hello!"}],
"max_tokens": 100
}'
curl http://localhost:4000/v1/chat/completions \
-H "Authorization: Bearer sk-litellm-master-key-dev" \
-H "Content-Type: application/json" \
-N --no-buffer \
-d '{
"model": "gpt-5-mini",
"messages": [{"role": "user", "content": "Tell me a story"}],
"stream": true
}'
curl http://localhost:4000/v1/models \
-H "Authorization: Bearer sk-litellm-master-key-dev"
Manage policies, budgets, teams, and workflows
Metrics, cost trends, and performance dashboards
Monitor agent orchestration and execution history
Everything you need to manage AI at scale
Intelligent model selection based on cost, latency SLAs, team quotas, and error rates. Hot-reload policies without restarts.
Cache responses by semantic similarity (92% threshold). Save 40%+ on repeated prompts. TTL-based expiration.
Pre-built templates: Research, Coding, Data Analysis. PostgreSQL checkpointing for resumable execution.
Temporal-powered agent coordination: Sequential, Parallel, Supervisor patterns. Human-in-the-loop approvals.
OpenAI, Anthropic, Google, xAI, DeepSeek, AWS Bedrock, Vertex AI, Azure OpenAI, Ollama. Unified interface for all.
Policy editor, workflow designer, budget dashboards, team management. Configure everything through a modern React UI.
Per-request cost prediction, team budgets, soft/hard limits, alerts. Real-time spend tracking with full audit trail.
Prometheus metrics, Grafana dashboards, Jaeger tracing. OpenTelemetry instrumentation across all services.
Connect agents to databases, file systems, APIs via MCP. Circuit breakers, rate limiting, audit logging built-in.