Distributed tracing and latency metrics for AI agents
Track agent execution, measure performance, and debug complex workflows across 400+ modules
Enterprise Observability
Part of 237 enterprise modules with 16 observability features including APM integration, real-time dashboards, and alerting. See Enterprise Documentation.
Part of 16 observability modules including distributed tracing, metrics collection, log aggregation, alerting, and APM.
Tracing Architecture
The Tracing module provides OpenTelemetry-compatible distributed tracing for AI agent applications. Track every step of agent execution with detailed spans, measure latency percentiles, and export traces to your observability platform.
fromagenticaiframework.tracingimportAgentStepTracer,tracer# Use global tracer instancetracer=AgentStepTracer()# Start a trace for an agent taskwithtracer.start_trace("agent_task",agent_id="agent-001")astrace:# Track individual stepswithtracer.start_span("llm_call",trace_id=trace.trace_id)asspan:response=llm.generate("What is AI?")span.set_attribute("model","gpt-4")span.set_attribute("tokens",150)withtracer.start_span("tool_execution",trace_id=trace.trace_id)asspan:result=tool.execute()span.set_attribute("tool_name","web_search")span.set_status("success")
withtracer.start_span("llm_inference")asspan:# Standard attributesspan.set_attribute("model.name","gpt-4")span.set_attribute("model.provider","openai")span.set_attribute("input.tokens",500)span.set_attribute("output.tokens",150)span.set_attribute("temperature",0.7)# Custom attributesspan.set_attribute("custom.request_id","req-123")span.set_attribute("custom.user_id","user-456")# Set statustry:result=llm.generate(prompt)span.set_status("success")span.set_attribute("output.length",len(result))exceptExceptionase:span.set_status("error",str(e))span.record_exception(e)raise
fromagenticaiframework.tracingimportLatencyMetrics,latency_metrics# Use global instancemetrics=LatencyMetrics()# Record individual latenciesmetrics.record("llm_inference",1.23)# secondsmetrics.record("llm_inference",0.98)metrics.record("llm_inference",1.45)# Record with contextmetrics.record("tool_execution",0.5,{"tool":"web_search","agent":"researcher"})
importlogginglogger=logging.getLogger(__name__)# Get percentiles for an operationpercentiles=metrics.get_percentiles("llm_inference")logger.info(f"P50: {percentiles['p50']:.3f}s")logger.info(f"P95: {percentiles['p95']:.3f}s")logger.info(f"P99: {percentiles['p99']:.3f}s")# Get all statisticsstats=metrics.get_stats("llm_inference")logger.info(f"Count: {stats['count']}")logger.info(f"Mean: {stats['mean']:.3f}s")logger.info(f"Min: {stats['min']:.3f}s")logger.info(f"Max: {stats['max']:.3f}s")logger.info(f"Std Dev: {stats['std']:.3f}s")
importlogginglogger=logging.getLogger(__name__)# Get metrics for last hourhourly_stats=metrics.get_stats("llm_inference",time_window="1h")# Get metrics for last 24 hoursdaily_stats=metrics.get_stats("llm_inference",time_window="24h")# Compare periodscomparison=metrics.compare_periods("llm_inference",period1="1h",period2="24h")logger.info(f"Latency change: {comparison['p50_change']:.2%}")
# Service A: Start trace and propagatewithtracer.start_trace("api_request")astrace:# Create context for downstream callcontext=trace.get_context()# Make HTTP call with trace contextresponse=httpx.post("https://service-b/process",headers=context.to_headers(),json=data)# Service B: Continue tracedefhandle_request(request):# Extract context from headerscontext=SpanContext.from_headers(request.headers)# Continue the tracewithtracer.continue_trace(context,"process_data"):result=process(request.json)returnresult
# Export to custom backenddefcustom_exporter(spans):forspaninspans:# Send to your backendsend_to_backend({"trace_id":span.trace_id,"span_id":span.span_id,"name":span.name,"duration":span.duration,"attributes":span.attributes})tracer.configure_export(exporter="custom",export_function=custom_exporter)