Skip to content

Test agent

FixedResponseModel

Bases: Model

A Mock LLM Implementation for deterministic testing.

Instead of calling OpenAI, this class returns pre-defined response_parts. This allows us to simulate specific LLM behaviors (e.g., calling a tool, generating text, or even hallucinating a malicious command) to test how the Agent loop responds.

Attributes:

Name Type Description
response_parts list[Any]

The content the "LLM" should return in the first turn.

Source code in api/tests/integration/test_agent.py
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
class FixedResponseModel(Model):
    """
    A Mock LLM Implementation for deterministic testing.

    Instead of calling OpenAI, this class returns pre-defined `response_parts`.
    This allows us to simulate specific LLM behaviors (e.g., calling a tool,
    generating text, or even hallucinating a malicious command) to test how
    the Agent loop responds.

    Attributes:
        response_parts (list[Any]): The content the "LLM" should return in the first turn.
    """

    def __init__(self, response_parts: list[Any]):
        self.response_parts = response_parts
        self._call_count = 0

    async def request(
        self,
        messages: list[Any],
        model_settings: ModelSettings | None,
        model_request_parameters: Any | None,
    ) -> ModelResponse:
        if self._call_count == 0:
            self._call_count += 1
            return ModelResponse(parts=self.response_parts)
        else:
            return ModelResponse(parts=[TextPart("Testing complete.")])

    @property
    def model_name(self) -> str:
        return "fixed-response-mock"

    @property
    def system(self) -> str | None:
        return "mock-system-prompt"

mock_deps()

Creates mocked Agent Dependencies (Database).

Simulates a DuckDB connection to avoid needing a real database file on disk.

Source code in api/tests/integration/test_agent.py
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
@pytest.fixture
def mock_deps():
    """
    Creates mocked Agent Dependencies (Database).

    Simulates a DuckDB connection to avoid needing a real database file on disk.
    """
    deps = MagicMock(spec=AgentDeps)
    deps.db_path = ":memory:"

    mock_conn = MagicMock()
    mock_conn.execute.return_value.df.return_value.empty = False

    deps.get_db_connection.return_value = mock_conn
    return deps

orchestrator()

Initializes the Orchestrator with Test settings.

Source code in api/tests/integration/test_agent.py
72
73
74
75
76
@pytest.fixture
def orchestrator():
    """Initializes the Orchestrator with Test settings."""
    settings = get_settings()
    return SRAGAgentOrchestrator(settings)

test_agent_sql_guardrail_interception(orchestrator, mock_deps) async

Integration Test: Ensures the Guardrail actively stops a compromised Agent.

This test forces the Mock LLM to emit a malicious DROP TABLE tool call. We assert that pydantic-ai-guardrails intercepts this before it reaches the tool execution layer.

Flow:

  1. Setup FixedResponseModel to return a DROP SQL query.
  2. Run Agent.
  3. Assert: An OutputGuardrailViolation exception is raised.
Source code in api/tests/integration/test_agent.py
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
@pytest.mark.asyncio
async def test_agent_sql_guardrail_interception(orchestrator, mock_deps):
    """
    Integration Test: Ensures the Guardrail actively stops a compromised Agent.

    This test forces the Mock LLM to emit a malicious `DROP TABLE` tool call.
    We assert that `pydantic-ai-guardrails` intercepts this *before* it reaches
    the tool execution layer.

    **Flow:**

    1.  Setup `FixedResponseModel` to return a `DROP` SQL query.
    2.  Run Agent.
    3.  **Assert:** An `OutputGuardrailViolation` exception is raised.

    """
    malicious_response = [
        ToolCallPart(
            tool_name="stats_tool",
            args={"sql_query": "DROP TABLE srag_analytics; --"},
            tool_call_id="call_rogue_1",
        )
    ]

    orchestrator.base_agent.model = FixedResponseModel(malicious_response)

    with pytest.raises(OutputGuardrailViolation) as excinfo:
        await orchestrator.agent.run("Please count the records", deps=mock_deps)

    error_msg = str(excinfo.value)
    assert "validate_tool_parameters" in error_msg
    assert "Security Violation" in error_msg
    assert "DROP" in error_msg

test_agent_tool_routing_success(orchestrator, mock_deps) async

Integration Test: Verifies successful tool routing for valid queries.

Ensures that when the LLM emits a valid SELECT call, the arguments are passed correctly to the underlying database connection.

Source code in api/tests/integration/test_agent.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
@pytest.mark.asyncio
async def test_agent_tool_routing_success(orchestrator, mock_deps):
    """
    Integration Test: Verifies successful tool routing for valid queries.

    Ensures that when the LLM emits a valid `SELECT` call, the arguments are
    passed correctly to the underlying database connection.
    """
    valid_response = [
        ToolCallPart(
            tool_name="stats_tool",
            args={"sql_query": "SELECT count(*) FROM srag"},
            tool_call_id="call_valid_1",
        )
    ]

    orchestrator.base_agent.model = FixedResponseModel(valid_response)

    await orchestrator.agent.run("Count cases", deps=mock_deps)

    mock_conn = mock_deps.get_db_connection.return_value
    mock_conn.execute.assert_called()

    called_query = mock_conn.execute.call_args[0][0]
    assert "SELECT count(*)" in called_query

test_full_report_flow_integration(client)

End-to-End API Test: Checks the /report endpoint wiring.

This test mocks the internal orchestrator.run() method to isolate the FastAPI layer from the AI logic. It verifies that inputs (focus_area) are passed down and outputs (markdown) are returned up correctly.

Checks:

  1. Status Code 200.
  2. JSON response structure matches AgentResponse.
  3. Prompt injection logic (Focus Area appending) works as expected.
Source code in api/tests/integration/test_agent.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
def test_full_report_flow_integration(client):
    """
    End-to-End API Test: Checks the `/report` endpoint wiring.

    This test mocks the internal `orchestrator.run()` method to isolate the
    FastAPI layer from the AI logic. It verifies that inputs (focus_area)
    are passed down and outputs (markdown) are returned up correctly.

    **Checks:**

    1.  Status Code 200.
    2.  JSON response structure matches `AgentResponse`.
    3.  Prompt injection logic (Focus Area appending) works as expected.
    """
    mock_orchestrator = MagicMock()

    mock_result = MagicMock()
    mock_result.output = "Report generated with **bold text**."
    mock_result.all_messages.return_value = []

    mock_orchestrator.run = AsyncMock(return_value=mock_result)

    app.dependency_overrides[get_orchestrator] = lambda: mock_orchestrator

    try:
        payload = {"focus_area": "Children under 5"}
        response = client.post("/api/v1/agent/report", json=payload)

        assert response.status_code == 200, f"API failed: {response.text}"

        data = response.json()
        assert data["response"] == "Report generated with **bold text**."
        assert "execution_time" in data

        called_prompt = mock_orchestrator.run.call_args[0][0]
        assert "Children under 5" in called_prompt
        assert "STRICTLY FOLLOW" in called_prompt

    finally:
        app.dependency_overrides = {}