Skip to content
SolidRusT.ai

Error Handling

The SolidRusT AI API (served via Artemis Gateway and LiteLLM proxy) uses standard HTTP status codes and returns detailed error information in JSON format compatible with the OpenAI API specification.

All errors follow this structure:

{
"error": {
"message": "Human-readable error description",
"type": "error_type",
"code": "error_code",
"param": "parameter_name"
}
}
CodeDescriptionRetry?
200SuccessN/A
400Bad Request - Invalid parametersNo
401Unauthorized - Invalid or missing API keyNo
403Forbidden - Key doesn’t have required permissionsNo
404Not Found - Invalid endpointNo
422Unprocessable Entity - Valid JSON but invalid contentNo
429Too Many Requests - Rate limit exceededYes
500Internal Server ErrorYes
502Bad Gateway - Upstream service unavailableYes
503Service Unavailable - Temporary outageYes
504Gateway Timeout - Request took too longYes
{
"error": {
"message": "Invalid API key provided",
"type": "authentication_error",
"code": "invalid_api_key"
}
}

Cause: The API key is malformed, revoked, or doesn’t exist.

Solution:

  1. Verify your API key at console.solidrust.ai
  2. Check the key hasn’t been revoked
  3. Ensure no extra whitespace or characters
{
"error": {
"message": "Missing Authorization header",
"type": "authentication_error",
"code": "missing_auth_header"
}
}

Solution: Include the Authorization: Bearer YOUR_API_KEY header in your request.

{
"error": {
"message": "Model 'invalid-model' not found. Available models: qwen3-4b, vllm-primary",
"type": "invalid_request_error",
"code": "model_not_found",
"param": "model"
}
}

Solution: Use a valid model ID. Check Models for available options. Recommended: vllm-primary (alias that routes to the current best model).

{
"error": {
"message": "Missing required field: 'messages'",
"type": "invalid_request_error",
"code": "missing_field",
"param": "messages"
}
}
{
"error": {
"message": "Invalid type for 'temperature': expected float, got string",
"type": "invalid_request_error",
"code": "invalid_type",
"param": "temperature"
}
}
{
"error": {
"message": "Rate limit exceeded. Please retry after 60 seconds.",
"type": "rate_limit_error",
"code": "rate_limit_exceeded"
}
}

Headers included:

Retry-After: 60
X-RateLimit-Limit: 60
X-RateLimit-Remaining: 0
X-RateLimit-Reset: 1704067260

Solution: Implement exponential backoff. See Rate Limits for details.

{
"error": {
"message": "Primary model temporarily unavailable, request routed to fallback",
"type": "server_error",
"code": "failover_active"
}
}
{
"error": {
"message": "An internal error occurred. Please try again.",
"type": "server_error",
"code": "internal_error"
}
}
{
"error": {
"message": "Request timed out. Consider reducing max_tokens or prompt length.",
"type": "server_error",
"code": "timeout"
}
}
from openai import OpenAI, APIError, RateLimitError, AuthenticationError, BadRequestError
import time
import random
client = OpenAI(
api_key="YOUR_API_KEY",
base_url="https://api.solidrust.ai/v1"
)
def make_request_with_retry(messages, max_retries=5):
"""Make a request with exponential backoff retry logic."""
for attempt in range(max_retries):
try:
return client.chat.completions.create(
model="vllm-primary",
messages=messages
)
except AuthenticationError as e:
# Don't retry auth errors - fix the API key
print(f"Authentication failed: {e.message}")
raise
except BadRequestError as e:
# Don't retry validation errors - fix the request
print(f"Invalid request: {e.message}")
raise
except RateLimitError as e:
if attempt == max_retries - 1:
raise
# Use Retry-After header if available
wait_time = getattr(e, 'retry_after', None) or (2 ** attempt) + random.uniform(0, 1)
print(f"Rate limited, waiting {wait_time:.1f}s...")
time.sleep(wait_time)
except APIError as e:
# Retry server errors with exponential backoff
if e.status_code in [500, 502, 503, 504]:
if attempt == max_retries - 1:
raise
wait_time = (2 ** attempt) + random.uniform(0, 1)
print(f"Server error ({e.status_code}), retrying in {wait_time:.1f}s...")
time.sleep(wait_time)
else:
raise
# Usage
try:
response = make_request_with_retry([
{"role": "user", "content": "Hello!"}
])
print(response.choices[0].message.content)
except Exception as e:
print(f"Request failed: {e}")
import OpenAI from 'openai';
const client = new OpenAI({
apiKey: 'YOUR_API_KEY',
baseURL: 'https://api.solidrust.ai/v1',
});
async function makeRequestWithRetry(messages, maxRetries = 5) {
for (let attempt = 0; attempt < maxRetries; attempt++) {
try {
return await client.chat.completions.create({
model: 'vllm-primary',
messages,
});
} catch (error) {
// Don't retry authentication or validation errors
if (error instanceof OpenAI.AuthenticationError) {
console.error('Authentication failed:', error.message);
throw error;
}
if (error instanceof OpenAI.BadRequestError) {
console.error('Invalid request:', error.message);
throw error;
}
// Retry rate limits with backoff
if (error instanceof OpenAI.RateLimitError) {
if (attempt === maxRetries - 1) throw error;
const retryAfter = error.headers?.['retry-after'];
const waitTime = retryAfter ? parseInt(retryAfter) : Math.pow(2, attempt) + Math.random();
console.log(`Rate limited, waiting ${waitTime.toFixed(1)}s...`);
await new Promise(r => setTimeout(r, waitTime * 1000));
continue;
}
// Retry server errors
if (error instanceof OpenAI.APIError && [500, 502, 503, 504].includes(error.status)) {
if (attempt === maxRetries - 1) throw error;
const waitTime = Math.pow(2, attempt) + Math.random();
console.log(`Server error (${error.status}), retrying in ${waitTime.toFixed(1)}s...`);
await new Promise(r => setTimeout(r, waitTime * 1000));
continue;
}
throw error;
}
}
}
// Usage
try {
const response = await makeRequestWithRetry([
{ role: 'user', content: 'Hello!' }
]);
console.log(response.choices[0].message.content);
} catch (error) {
console.error('Request failed:', error.message);
}

The SolidRusT AI platform includes automatic failover for high availability:

Request → Artemis Gateway → LiteLLM Proxy → vLLM (primary)
↓ (if unavailable)
Claude Haiku (fallback)
  • vLLM pod is scaling or restarting
  • GPU maintenance window
  • Model loading in progress
  • Unexpected vLLM crash

Check the model field in the response:

response = client.chat.completions.create(...)
if 'claude' in response.model.lower():
print("Note: Response served by fallback model")
AspectvLLM (Primary)Claude Haiku (Fallback)
LatencyLower (~50ms TTFB)Higher (~200ms TTFB)
CostFree tier includedMetered usage
Context4K tokens200K tokens
CapabilitiesQwen3-4BClaude Haiku