API Reference
Complete REST API documentation for LangTrain. Integrate model training, fine-tuning, and inference into your applications with our comprehensive API.
Key Features
🔗
RESTful Design
Clean, predictable REST API following industry standards with JSON responses.
🔐
Authentication
Secure API key authentication with rate limiting and usage tracking.
⚡
Real-time Updates
WebSocket connections for real-time training progress and status updates.
📦
SDKs Available
Official Python, JavaScript, and cURL examples with comprehensive SDKs.
Authentication
All API requests require authentication using your **API key**. Include your key in the `Authorization` header as a Bearer token.
**Getting Your API Key:**
1. Sign in to your LangTrain dashboard
2. Navigate to Settings → API Keys
3. Generate a new API key
4. Store it securely (keys are only shown once)
**Security Best Practices:**
- Never commit API keys to version control
- Use environment variables for key storage
- Rotate keys regularly
- Monitor usage for suspicious activity
Code Example
# Authentication examples
import requests
import os
# Set your API key as environment variable
API_KEY = os.getenv('LANGTRAIN_API_KEY')
BASE_URL = 'https://api.langtrain.ai/v1'
# Headers for all requests
headers = {
'Authorization': f'Bearer {API_KEY}',
'Content-Type': 'application/json',
'User-Agent': 'LangTrain-Python/1.0.0'
}
# Test authentication
response = requests.get(f'{BASE_URL}/user/profile', headers=headers)
if response.status_code == 200:
print("✅ Authentication successful")
user_data = response.json()
print(f"Welcome, {user_data['name']}!")
else:
print(f"❌ Authentication failed: {response.status_code}")
print(response.json())Models API
The Models API allows you to **list available models**, get model details, and manage custom models. All base models are pre-loaded and ready for fine-tuning.
**Available Endpoints:**
- `GET /models` - List all available models
- `GET /models/{model_id}` - Get model details
- `POST /models` - Upload custom model
- `DELETE /models/{model_id}` - Delete custom model
**Model Categories:**
- **Chat Models:** Optimized for conversational AI
- **Code Models:** Specialized for code generation
- **Instruct Models:** Fine-tuned for instruction following
- **Base Models:** Raw foundation models for custom fine-tuning
Code Example
# Models API examples
# 1. List all available models
def list_models():
response = requests.get(f'{BASE_URL}/models', headers=headers)
models = response.json()
print(f"Found {len(models['data'])} models:")
for model in models['data']:
print(f" - {model['id']}: {model['name']} ({model['parameters']} params)")
return models
# 2. Get specific model details
def get_model_details(model_id):
response = requests.get(f'{BASE_URL}/models/{model_id}', headers=headers)
if response.status_code == 200:
model = response.json()
return {
'id': model['id'],
'name': model['name'],
'description': model['description'],
'parameters': model['parameters'],
'context_length': model['context_length'],
'supported_tasks': model['supported_tasks'],
'pricing': model['pricing']
}
return None
# 3. Upload custom model
def upload_custom_model(model_path, name, description):
files = {'model': open(model_path, 'rb')}
data = {
'name': name,
'description': description,
'model_type': 'custom'
}
response = requests.post(
f'{BASE_URL}/models',
headers={'Authorization': f'Bearer {API_KEY}'}, # Remove content-type for multipart
files=files,
data=data
)
return response.json()
# Usage examples
models = list_models()
llama_details = get_model_details('meta-llama/Llama-2-7b-hf')
print(f"Llama-2-7b context length: {llama_details['context_length']}")
# Custom model upload
# custom_model = upload_custom_model('./my_model.bin', 'My Custom Model', 'Fine-tuned for specific domain')Fine-tuning API
Start and manage **fine-tuning jobs** with the Fine-tuning API. Monitor progress, adjust parameters, and deploy your custom models.
**Job Lifecycle:**
1. **Create** - Submit fine-tuning job with dataset and config
2. **Queue** - Job enters processing queue
3. **Running** - Active training with progress updates
4. **Completed** - Model ready for deployment
5. **Failed** - Error occurred, check logs
**Supported Fine-tuning Methods:**
- **LoRA** - Parameter-efficient adaptation
- **QLoRA** - Quantized LoRA for larger models
- **Full Fine-tuning** - Traditional full parameter training
Code Example
# Fine-tuning API examples
# 1. Create fine-tuning job
def create_finetune_job(model_id, dataset_url, config=None):
default_config = {
'method': 'lora',
'lora_config': {
'r': 32,
'alpha': 64,
'dropout': 0.05,
'target_modules': ['q_proj', 'v_proj', 'k_proj', 'o_proj']
},
'training_config': {
'epochs': 3,
'batch_size': 4,
'learning_rate': 2e-4,
'warmup_ratio': 0.1
}
}
payload = {
'model_id': model_id,
'dataset': {
'type': 'jsonl',
'url': dataset_url
},
'config': config or default_config,
'name': f'Custom {model_id} - {datetime.now().strftime("%Y%m%d_%H%M")}'
}
response = requests.post(
f'{BASE_URL}/fine-tunes',
headers=headers,
json=payload
)
return response.json()
# 2. Monitor fine-tuning progress
def get_finetune_status(job_id):
response = requests.get(f'{BASE_URL}/fine-tunes/{job_id}', headers=headers)
if response.status_code == 200:
job = response.json()
return {
'status': job['status'],
'progress': job.get('progress', 0),
'current_epoch': job.get('current_epoch', 0),
'loss': job.get('metrics', {}).get('train_loss'),
'eta_minutes': job.get('eta_minutes'),
'logs_url': job.get('logs_url')
}
return None
# 3. Stream training logs (WebSocket)
import websocket
import json
def stream_training_logs(job_id, api_key):
def on_message(ws, message):
data = json.loads(message)
if data['type'] == 'log':
print(f"[{data['timestamp']}] {data['message']}")
elif data['type'] == 'metrics':
metrics = data['data']
print(f"Epoch {metrics['epoch']}, Step {metrics['step']}: "
f"Loss={metrics['loss']:.4f}, LR={metrics['learning_rate']:.2e}")
elif data['type'] == 'status':
print(f"Status changed to: {data['status']}")
def on_error(ws, error):
print(f"WebSocket error: {error}")
ws_url = f"wss://api.langtrain.ai/v1/fine-tunes/{job_id}/stream?token={api_key}"
ws = websocket.WebSocketApp(ws_url, on_message=on_message, on_error=on_error)
ws.run_forever()
# 4. Cancel fine-tuning job
def cancel_finetune_job(job_id):
response = requests.delete(f'{BASE_URL}/fine-tunes/{job_id}', headers=headers)
return response.status_code == 204
# Usage examples
from datetime import datetime
# Start fine-tuning job
job = create_finetune_job(
model_id='meta-llama/Llama-2-7b-hf',
dataset_url='https://example.com/training_data.jsonl'
)
print(f"Started job {job['id']}, status: {job['status']}")
# Monitor progress
import time
while True:
status = get_finetune_status(job['id'])
print(f"Progress: {status['progress']}%, Status: {status['status']}")
if status['status'] in ['completed', 'failed']:
break
time.sleep(30) # Check every 30 secondsInference API
Use the Inference API to **generate text** with base models or your fine-tuned models. Supports both synchronous and streaming responses.
**Generation Parameters:**
- **temperature:** Controls randomness (0.1-2.0)
- **top_p:** Nucleus sampling threshold
- **max_tokens:** Maximum output length
- **frequency_penalty:** Reduces repetition
- **presence_penalty:** Encourages topic diversity
**Response Formats:**
- **Synchronous:** Get complete response at once
- **Streaming:** Receive tokens as they're generated (SSE)
- **Batch:** Process multiple prompts simultaneously
Code Example
# Inference API examples
# 1. Simple text generation
def generate_text(model_id, prompt, **kwargs):
payload = {
'model': model_id,
'prompt': prompt,
'max_tokens': kwargs.get('max_tokens', 256),
'temperature': kwargs.get('temperature', 0.7),
'top_p': kwargs.get('top_p', 0.9),
'frequency_penalty': kwargs.get('frequency_penalty', 0),
'presence_penalty': kwargs.get('presence_penalty', 0),
'stop': kwargs.get('stop', [])
}
response = requests.post(
f'{BASE_URL}/completions',
headers=headers,
json=payload
)
if response.status_code == 200:
result = response.json()
return {
'text': result['choices'][0]['text'],
'finish_reason': result['choices'][0]['finish_reason'],
'usage': result['usage']
}
return None
# 2. Streaming generation
def generate_stream(model_id, prompt, **kwargs):
payload = {
'model': model_id,
'prompt': prompt,
'stream': True,
**kwargs
}
response = requests.post(
f'{BASE_URL}/completions',
headers=headers,
json=payload,
stream=True
)
for line in response.iter_lines():
if line:
line = line.decode('utf-8')
if line.startswith('data: '):
data = json.loads(line[6:])
if data.get('choices'):
yield data['choices'][0]['delta'].get('content', '')
# 3. Chat completions (for chat models)
def chat_completion(model_id, messages, **kwargs):
payload = {
'model': model_id,
'messages': messages,
'max_tokens': kwargs.get('max_tokens', 512),
'temperature': kwargs.get('temperature', 0.7),
'stream': kwargs.get('stream', False)
}
response = requests.post(
f'{BASE_URL}/chat/completions',
headers=headers,
json=payload
)
return response.json()
# 4. Batch inference (multiple prompts)
def batch_generate(model_id, prompts, **kwargs):
payload = {
'model': model_id,
'prompts': prompts,
'batch_size': len(prompts),
**kwargs
}
response = requests.post(
f'{BASE_URL}/batch/completions',
headers=headers,
json=payload
)
return response.json()
# Usage examples
# Simple generation
result = generate_text(
'meta-llama/Llama-2-7b-hf',
'The future of AI is',
max_tokens=100,
temperature=0.8
)
print(f"Generated: {result['text']}")
print(f"Tokens used: {result['usage']['total_tokens']}")
# Streaming generation
print("Streaming response:")
for token in generate_stream('meta-llama/Llama-2-7b-hf', 'Write a short story about'):
print(token, end='', flush=True)
print()
# Chat completion
messages = [
{'role': 'system', 'content': 'You are a helpful AI assistant.'},
{'role': 'user', 'content': 'Explain quantum computing in simple terms.'}
]
chat_result = chat_completion('meta-llama/Llama-2-7b-chat-hf', messages)
print(f"Assistant: {chat_result['choices'][0]['message']['content']}")
# Batch processing
prompts = [
'Translate "Hello" to French:',
'Translate "Hello" to Spanish:',
'Translate "Hello" to German:'
]
batch_results = batch_generate('meta-llama/Llama-2-7b-hf', prompts)
for i, result in enumerate(batch_results['choices']):
print(f"Prompt {i+1}: {result['text'].strip()}")Error Handling & Rate Limits
Implement robust **error handling** and respect rate limits to ensure reliable API integration. The API returns standard HTTP status codes and detailed error messages.
**HTTP Status Codes:**
- **200** - Success
- **400** - Bad Request (invalid parameters)
- **401** - Unauthorized (invalid API key)
- **429** - Rate Limited
- **500** - Server Error
**Rate Limits:**
- **Free Tier:** 60 requests/minute, 1000 requests/day
- **Pro Tier:** 600 requests/minute, 50000 requests/day
- **Enterprise:** Custom limits based on agreement
Code Example
# Comprehensive error handling and retry logic
import time
import logging
from functools import wraps
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class LangTrainAPIError(Exception):
def __init__(self, status_code, message, error_code=None):
self.status_code = status_code
self.message = message
self.error_code = error_code
super().__init__(f"API Error {status_code}: {message}")
def retry_with_backoff(max_retries=3, backoff_factor=2):
"""Decorator for automatic retry with exponential backoff"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
for attempt in range(max_retries + 1):
try:
return func(*args, **kwargs)
except LangTrainAPIError as e:
if e.status_code == 429: # Rate limited
if attempt < max_retries:
wait_time = backoff_factor ** attempt
logger.warning(f"Rate limited, retrying in {wait_time}s (attempt {attempt + 1})")
time.sleep(wait_time)
continue
elif e.status_code >= 500: # Server error
if attempt < max_retries:
wait_time = backoff_factor ** attempt
logger.warning(f"Server error, retrying in {wait_time}s (attempt {attempt + 1})")
time.sleep(wait_time)
continue
raise e
except Exception as e:
if attempt < max_retries:
wait_time = backoff_factor ** attempt
logger.warning(f"Unexpected error, retrying in {wait_time}s: {e}")
time.sleep(wait_time)
continue
raise e
return None
return wrapper
return decorator
def handle_api_response(response):
"""Centralized response handling with detailed error information"""
if response.status_code == 200:
return response.json()
# Parse error response
try:
error_data = response.json()
message = error_data.get('error', {}).get('message', 'Unknown error')
error_code = error_data.get('error', {}).get('code')
except:
message = f"HTTP {response.status_code} error"
error_code = None
# Rate limiting specific handling
if response.status_code == 429:
retry_after = response.headers.get('Retry-After', 60)
message += f" - Retry after {retry_after} seconds"
raise LangTrainAPIError(response.status_code, message, error_code)
@retry_with_backoff(max_retries=3)
def robust_api_call(endpoint, method='GET', **kwargs):
"""Make API call with comprehensive error handling"""
try:
if method == 'GET':
response = requests.get(f'{BASE_URL}{endpoint}', headers=headers, **kwargs)
elif method == 'POST':
response = requests.post(f'{BASE_URL}{endpoint}', headers=headers, **kwargs)
elif method == 'DELETE':
response = requests.delete(f'{BASE_URL}{endpoint}', headers=headers, **kwargs)
return handle_api_response(response)
except requests.exceptions.Timeout:
logger.error("Request timed out")
raise LangTrainAPIError(408, "Request timeout")
except requests.exceptions.ConnectionError:
logger.error("Connection error")
raise LangTrainAPIError(503, "Service unavailable")
# Usage with error handling
def safe_generate_text(model_id, prompt, **kwargs):
try:
payload = {
'model': model_id,
'prompt': prompt,
**kwargs
}
result = robust_api_call('/completions', method='POST', json=payload)
return result['choices'][0]['text']
except LangTrainAPIError as e:
logger.error(f"API error: {e}")
if e.status_code == 400:
logger.error("Check your request parameters")
elif e.status_code == 401:
logger.error("Invalid API key - check your credentials")
elif e.status_code == 429:
logger.error("Rate limit exceeded - slow down requests")
elif e.status_code >= 500:
logger.error("Server error - try again later")
return None
except Exception as e:
logger.error(f"Unexpected error: {e}")
return None
# Rate limit monitoring
class RateLimiter:
def __init__(self, requests_per_minute=60):
self.requests_per_minute = requests_per_minute
self.requests = []
def wait_if_needed(self):
now = time.time()
# Remove requests older than 1 minute
self.requests = [req_time for req_time in self.requests if now - req_time < 60]
if len(self.requests) >= self.requests_per_minute:
sleep_time = 60 - (now - self.requests[0])
if sleep_time > 0:
logger.info(f"Rate limit reached, sleeping for {sleep_time:.1f}s")
time.sleep(sleep_time)
self.requests.append(now)
# Usage example with rate limiting
rate_limiter = RateLimiter(requests_per_minute=30) # Conservative limit
for prompt in prompts:
rate_limiter.wait_if_needed()
result = safe_generate_text('meta-llama/Llama-2-7b-hf', prompt)
if result:
print(f"✅ Generated: {result[:100]}...")
else:
print("❌ Failed to generate text")