Master TestSession and TestAgent for orchestrating tests, assertions, and metrics collection.
The Session API is the heart of mcp-eval testing. It manages your agent’s lifecycle, collects metrics, runs assertions, and produces comprehensive test results.
# Using context manager (recommended)async with test_session("test-name") as agent: # Your test code here pass# Manual lifecycle (advanced)session = TestSession(test_name="test-name")agent = await session.__aenter__()try: # Your test code ...finally: await session.__aexit__(None, None, None) session.cleanup()
from mcp_eval.session import test_sessionfrom mcp_agent.agents.agent_spec import AgentSpecspec = AgentSpec( name="custom", instruction="You are a helpful test assistant", server_names=["my_server"],)async with test_session("custom-test", agent=spec) as agent: # Your test code pass
# Simple string generationresponse = await agent.generate_str("What is 2+2?")print(response) # "The answer is 4"# Full response object may be available depending on provider; prefer generate_str for portability
# Sessions maintain contextresponse1 = await agent.generate_str("My name is Alice")response2 = await agent.generate_str("What's my name?")# response2 will correctly identify "Alice"
# Always name your assertions for clarityawait session.assert_that( Expect.content.regex(r"\d+ items? found"), response=response, name="item_count_format" # Appears in reports)
# Check if all assertions passedif session.all_passed(): print("✅ All tests passed!")else: print("❌ Some tests failed")# Get detailed resultsresults = session.get_results()for result in results: print(f"Assertion: {result.name}") print(f"Passed: {result.passed}") if not result.passed: print(f"Reason: {result.details}")# Get pass/fail summarysummary = session.get_summary()print(f"Passed: {summary['passed']}/{summary['total']}")print(f"Pass rate: {summary['pass_rate']:.1%}")
class CustomSession(TestSession): async def on_tool_call(self, tool_name: str, args: dict): """Hook called before each tool execution.""" print(f"About to call {tool_name} with {args}") # Validate tool usage if tool_name == "dangerous_tool": raise ValueError("Dangerous tool not allowed in tests") async def on_assertion_complete(self, result): """Hook called after each assertion.""" if not result.passed: # Log failures to external system await self.log_to_monitoring(result)
# Store custom state in sessionsession.state["test_user_id"] = "user_123"session.state["test_context"] = {"environment": "staging"}# Access state in assertions or hooksuser_id = session.state.get("test_user_id")