L
Initializing Studio...
Deploy your fine-tuned models to production with auto-scaling, monitoring, and CI/CD integration.
# Deploy via CLI
langtrain deploy create \
--model my-fine-tuned-model \
--name production-api \
--region us-east-1 \
--min-instances 1 \
--max-instances 10
# Deploy via Python SDK
deployment = client.deployments.create(
model_id="your-model-id",
name="production-api",
config={
"region": "us-east-1",
"instance_type": "gpu-medium",
"min_instances": 1,
"max_instances": 10,
"auto_scaling": True
}
)# Generate Dockerfile
langtrain deploy generate-dockerfile --model my-model
# Build and deploy
docker build -t my-model:latest .
docker push your-registry/my-model:latest
# Kubernetes deployment
apiVersion: apps/v1
kind: Deployment
metadata:
name: langtrain-model
spec:
replicas: 3
selector:
matchLabels:
app: langtrain-model
template:
metadata:
labels:
app: langtrain-model
spec:
containers:
- name: model
image: your-registry/my-model:latest
ports:
- containerPort: 8000# Configure load balancer
deployment_config = {
"load_balancer": {
"algorithm": "round_robin",
"health_check": {
"path": "/health",
"interval": 30,
"timeout": 5,
"healthy_threshold": 2,
"unhealthy_threshold": 3
},
"sticky_sessions": False
},
"auto_scaling": {
"metric": "requests_per_second",
"target": 100,
"scale_up_cooldown": 300,
"scale_down_cooldown": 600
}
}# Set up monitoring
client.monitoring.create_alert(
deployment_id="your-deployment-id",
metric="response_time_p95",
threshold=2000, # 2 seconds
comparison="greater_than",
notification_channels=["email", "slack"]
)
# Custom metrics
client.monitoring.track_metric(
deployment_id="your-deployment-id",
metric_name="business_metric",
value=42,
tags={"version": "v1.2", "region": "us-east-1"}
)# GitHub Actions workflow
name: Deploy Model
on:
push:
branches: [main]
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Deploy to LangTrain
uses: langtrain/deploy-action@v1
with:
api-key: ${{ secrets.LANGTRAIN_API_KEY }}
model-id: ${{ vars.MODEL_ID }}
deployment-name: production-api