L
Initializing Studio...
Real-time monitoring and observability for your models and training processes.
from langtrain import Monitor
# Initialize monitoring
monitor = Monitor(
project_name='my_project',
experiment_name='bert_fine_tuning',
tracking_uri='http://localhost:5000'
)
# Start monitoring training
monitor.start_training(
model=model,
train_data=train_dataset,
val_data=val_dataset,
metrics=['loss', 'accuracy', 'f1_score'],
log_frequency=100 # Log every 100 steps
)
# Log custom metrics
monitor.log_metric('learning_rate', 0.001, step=epoch)
monitor.log_metric('batch_size', 32)
monitor.log_artifact('model_config.json', config)# Performance monitoring setup
from langtrain.monitoring import PerformanceMonitor
perf_monitor = PerformanceMonitor(
model=model,
baseline_metrics={
'accuracy': 0.92,
'latency_p95': 100, # milliseconds
'throughput': 1000 # requests/second
}
)
# Monitor inference performance
@perf_monitor.track_inference
def predict(inputs):
return model.predict(inputs)
# Set up alerts for performance degradation
perf_monitor.set_alert(
metric='accuracy',
threshold=0.85,
comparison='less_than',
action='email_alert'
)
# Generate performance reports
report = perf_monitor.generate_report(
time_range='last_7_days',
include_trends=True
)# System resource monitoring
from langtrain.monitoring import SystemMonitor
sys_monitor = SystemMonitor(
track_gpu=True,
track_memory=True,
track_disk=True,
track_network=True
)
# Start system monitoring
sys_monitor.start()
# Get current resource usage
resources = sys_monitor.get_current_usage()
print(f"GPU Utilization: {resources['gpu_utilization']}%")
print(f"Memory Usage: {resources['memory_usage']}%")
print(f"Disk I/O: {resources['disk_io']} MB/s")
# Set resource alerts
sys_monitor.set_alert(
metric='gpu_memory',
threshold=90, # Alert at 90% GPU memory usage
action='scale_resources'
)
# Log resource metrics
sys_monitor.log_to_dashboard(dashboard_url='http://grafana:3000')# Data quality monitoring
from langtrain.monitoring import DataMonitor
data_monitor = DataMonitor(
reference_data=training_data,
feature_columns=['text_length', 'sentiment_score'],
categorical_columns=['category', 'language']
)
# Monitor incoming data
@data_monitor.track_data_quality
def process_batch(batch_data):
# Your data processing logic
predictions = model.predict(batch_data)
return predictions
# Detect data drift
drift_report = data_monitor.detect_drift(
new_data=production_data,
drift_methods=['ks_test', 'chi_square', 'jensen_shannon']
)
if drift_report.has_drift:
print(f"Data drift detected in features: {drift_report.drifted_features}")
# Set up data quality alerts
data_monitor.configure_alerts(
drift_threshold=0.1,
quality_threshold=0.95,
notification_channels=['email', 'slack']
)# Custom dashboard creation
from langtrain.monitoring import Dashboard
dashboard = Dashboard(name='Model Performance Dashboard')
# Add metric widgets
dashboard.add_widget(
type='line_chart',
title='Training Loss',
metrics=['train_loss', 'val_loss'],
time_range='last_24_hours'
)
dashboard.add_widget(
type='gauge',
title='Current Accuracy',
metric='accuracy',
min_value=0.0,
max_value=1.0,
threshold_ranges=[
{'min': 0.0, 'max': 0.7, 'color': 'red'},
{'min': 0.7, 'max': 0.85, 'color': 'yellow'},
{'min': 0.85, 'max': 1.0, 'color': 'green'}
]
)
dashboard.add_widget(
type='table',
title='Model Comparison',
data_source='model_comparison_results',
columns=['model_name', 'accuracy', 'f1_score', 'latency']
)
# Deploy dashboard
dashboard.deploy(url='http://monitoring:8080/dashboard')