This guide provides comprehensive strategies for optimizing the performance of your AgenticAI Framework applications. Learn how to maximize throughput, minimize latency, and efficiently scale your agent-based systems.
fromagenticaiframework.agentsimportAgentManager,Agentfromconcurrent.futuresimportThreadPoolExecutor# Configure optimal pool sizeagent_manager=AgentManager(max_agents=50,# Based on CPU cores ร 2-4min_idle_agents=5,pool_timeout=30,enable_warmup=True)# Pre-warm agent pooldefwarmup_agents():"""Initialize agent pool before traffic"""foriinrange(agent_manager.min_idle_agents):agent=Agent(name=f"warmup_agent_{i}",role="general",capabilities=["task_execution"])agent_manager.register_agent(agent)warmup_agents()
fromagenticaiframework.tasksimportTask,TaskQueueimportasyncio# Use async/await for concurrent operationsasyncdefexecute_tasks_concurrently(tasks:list[Task]):"""Execute multiple tasks concurrently"""# Create task coroutinescoroutines=[agent.execute_task_async(task)fortaskintasks]# Execute with timeoutresults=awaitasyncio.gather(*coroutines,return_exceptions=True)returnresults# Batch processing for high throughputdefbatch_execute(tasks:list[Task],batch_size:int=10):"""Process tasks in optimized batches"""foriinrange(0,len(tasks),batch_size):batch=tasks[i:i+batch_size]results=asyncio.run(execute_tasks_concurrently(batch))yieldresults
fromagenticaiframework.llmsimportLLMManagerfromfunctoolsimportlru_cacheimporthashlibclassCachedLLMManager(LLMManager):"""LLM manager with intelligent caching"""def__init__(self,*args,**kwargs):super().__init__(*args,**kwargs)self.cache={}self.cache_hits=0self.cache_misses=0defgenerate_cache_key(self,prompt:str,model:str)->str:"""Generate cache key from prompt and model"""content=f"{model}:{prompt}"returnhashlib.sha256(content.encode()).hexdigest()defgenerate_with_cache(self,prompt:str,model:str="gpt-4"):"""Generate with caching"""cache_key=self.generate_cache_key(prompt,model)# Check cacheifcache_keyinself.cache:self.cache_hits+=1returnself.cache[cache_key]# Cache miss - call LLMself.cache_misses+=1response=self.generate(prompt,model=model)# Store in cacheself.cache[cache_key]=responsereturnresponsedefget_cache_stats(self):"""Get cache performance metrics"""total=self.cache_hits+self.cache_misseshit_rate=self.cache_hits/totaliftotal>0else0return{"hits":self.cache_hits,"misses":self.cache_misses,"hit_rate":f"{hit_rate:.2%}","cache_size":len(self.cache)}
# Inefficient - verbose promptinefficient_prompt="""Please analyze the following data and provide a comprehensive report including all details, explanations, and recommendations. Be thoroughand include examples for every point you make.Data: {data}"""# Efficient - concise promptefficient_prompt="""Analyze data and list: 1) Key findings 2) RecommendationsData: {data}"""# Token reduction: ~60% fewer tokens# Latency improvement: ~50% faster# Cost reduction: ~60% cheaper
fromagenticaiframework.llmsimportLLMManagerllm_manager=LLMManager()# Use appropriate model based on task complexitydefselect_optimal_model(task_complexity:str)->str:"""Select most cost-effective model"""model_strategy={"simple":"gpt-3.5-turbo",# Fast, cheap"moderate":"gpt-4-turbo",# Balanced"complex":"gpt-4",# Powerful"reasoning":"o1-preview"# Specialized}returnmodel_strategy.get(task_complexity,"gpt-3.5-turbo")# Example usageresponse=llm_manager.generate(prompt="Simple classification task",model=select_optimal_model("simple"))
# Use indexed lookupsmemory_manager.create_index("agent_id")memory_manager.create_index("timestamp")# Efficient retrievalresults=memory_manager.retrieve(filter={"agent_id":"agent_001"},limit=10,# Limit resultsuse_index=True,# Use indexprojection=["id","data"]# Return only needed fields)# Avoid full table scans# Slow - full scanall_memories=memory_manager.retrieve_all()# Fast - filtered queryrecent_memories=memory_manager.retrieve(filter={"timestamp":{"$gte":timestamp_24h_ago}},limit=100)
importlogginglogger=logging.getLogger(__name__)importscheduleimporttimedefcleanup_stale_memories():"""Periodic cleanup of old memories"""# Remove memories older than 7 dayscutoff_time=time.time()-(7*24*3600)deleted=memory_manager.delete_where(filter={"timestamp":{"$lt":cutoff_time}})logger.info(f"Cleaned up {deleted} stale memories")# Schedule cleanupschedule.every().day.at("02:00").do(cleanup_stale_memories)
fromagenticaiframework.knowledgeimportKnowledgeBase# Optimize vector dimensionsknowledge_base=KnowledgeBase(vector_dimension=384,# Use smaller models (MiniLM vs BERT)index_type="hnsw",# Hierarchical NSW for speedef_construction=200,# Balance speed/recallm=16# Connections per node)# Batch embeddings for efficiencydefbatch_embed_documents(documents:list[str],batch_size:int=32):"""Batch embedding generation"""embeddings=[]foriinrange(0,len(documents),batch_size):batch=documents[i:i+batch_size]batch_embeddings=knowledge_base.embed_batch(batch)embeddings.extend(batch_embeddings)returnembeddings
# Use approximate search for speedresults=knowledge_base.search(query="What is machine learning?",top_k=10,# Limit resultsef_search=50,# Lower for speed, higher for accuracythreshold=0.7# Minimum similarity)# Pre-filter before searchresults=knowledge_base.search(query="ML concepts",filter={"category":"machine_learning"},# Reduce search spacetop_k=5)
frommultiprocessingimportPoolimportnumpyasnpdefcpu_intensive_task(data):"""CPU-bound processing"""# Example: embedding generation, data transformationreturnnp.array(data).mean()# Use process pool for CPU-bound taskswithPool(processes=8)aspool:results=pool.map(cpu_intensive_task,large_dataset)
fromagenticaiframework.monitoringimportMonitoringManagerimporttimemonitor=MonitoringManager()deftrack_performance(func):"""Decorator to track function performance"""defwrapper(*args,**kwargs):start=time.time()try:result=func(*args,**kwargs)duration=time.time()-startmonitor.record_metric(metric="function_latency",value=duration,tags={"function":func.__name__})returnresultexceptExceptionase:monitor.record_error(error=str(e),context={"function":func.__name__})raisereturnwrapper@track_performancedefprocess_request(data):# Your code herepass
importcProfileimportpstatsfromioimportStringIOdefprofile_function(func,*args,**kwargs):"""Profile function execution"""profiler=cProfile.Profile()profiler.enable()result=func(*args,**kwargs)profiler.disable()# Print statss=StringIO()ps=pstats.Stats(profiler,stream=s).sort_stats('cumulative')ps.print_stats(20)# Top 20 functionslogger.info(s.getvalue())returnresult# Usageprofile_function(expensive_operation,data)
frommemory_profilerimportprofile@profiledefmemory_intensive_function():"""Function to profile for memory usage"""large_list=[iforiinrange(1000000)]large_dict={i:i*2foriinrange(1000000)}returnlarge_list,large_dict# Run to see line-by-line memory usagememory_intensive_function()