Adding traces to your Agent#
Once you have created an agent, for example, after following the Creating and using a Code Agent tutorial or the Creating a custom agent tutorial, it is essential to trace its behavior to understand better what happened and to ease debugging.
Prerequisites#
Dataiku >= 13.4
An OpenAI connection
Python >= 3.10
Additionally, if you want to start from one of the tutorials mentioned, you will need the corresponding prerequisites, as described in each tutorial.
Introduction#
Adding traces to your Agent’s code can help you understand the path your Agent has taken. It will be a tool to diagnose the potential issues that may occur. It’s also a way to analyze each step’s performance, helping to identify the steps to focus your efforts on and allowing performance improvements.
Adding traces to your Code Agent#
At the end of the Creating and using a Code Agent tutorial, you end up with the following code. Let’s see how the traces can be tailored to suit your needs.
code-custom.py
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage
import dataiku
from dataiku.langchain.dku_llm import DKUChatLLM
from dataiku.llm.python import BaseLLM
project = dataiku.api_client().get_default_project()
OPENAI_CONNECTION_NAME = "REPLACE_WITH_YOUR_CONNECTION_NAME" # example: "openAI"
def find_tool(name: str) -> object:
tools = project.list_agent_tools()
tool = [tool for tool in tools if tool.get("name") == name]
if tool:
print(tool[0])
return project.get_agent_tool(tool[0].get('id'))
# If you know the tool IDs, you can use them directly.
# If you know the tool IDs, you can use them directly.
get_customer = find_tool("Get Customer Info").as_langchain_structured_tool()
get_company = find_tool("Get Company Info").as_langchain_structured_tool()
tools = [get_customer, get_company]
class MyLLM(BaseLLM):
def __init__(self):
pass
def process(self, query, settings, trace):
prompt = query["messages"][0]["content"]
llm = DKUChatLLM(llm_id=f"openai:{OPENAI_CONNECTION_NAME}:gpt-4o-mini")
llm_with_tools = llm.bind_tools(tools)
messages = [HumanMessage(prompt)]
with trace.subspan("Invoke LLM with tools") as subspan:
ai_msg = llm_with_tools.invoke(messages)
tool_messages = []
with trace.subspan("Call the tools") as tools_subspan:
for tool_call in ai_msg.tool_calls:
with trace.subspan("Call a tool") as tool_subspan:
tool_subspan.attributes["tool_name"] = tool_call["name"]
tool_subspan.attributes["tool_args"] = tool_call["args"]
if tool_call["name"] == get_customer.name:
tool_output = get_customer(tool_call["args"])
else:
tool_output = get_company(tool_call["args"])
tool_messages.append(ToolMessage(tool_call_id=tool_call["id"], content=tool_output))
messages = [
HumanMessage(prompt),
ai_msg,
*tool_messages
]
with trace.subspan("Compute final answer") as subspan:
final_resp = llm_with_tools.invoke(messages)
return {"text": final_resp.content}
The workflow of traces starts within the process
function in the MyLLM
class.
class MyLLM(BaseLLM):
def __init__(self):
pass
def process(self, query, settings, trace):
The trace
object will then be used to add a subspan
to the generated traces.
with trace.subspan("Invoke LLM with tools") as subspan:
ai_msg = llm_with_tools.invoke(messages)
tool_messages = []
with trace.subspan("Call the tools") as tools_subspan:
You can organize the traces by naming each step you want to identify in your workflow.
You can also enrich a span
with specific data.
with trace.subspan("Call a tool") as tool_subspan:
tool_subspan.attributes["tool_name"] = tool_call["name"]
tool_subspan.attributes["tool_args"] = tool_call["args"]
The global structure of the span is as described below:
{
"type": "span",
"begin": "",
"end": "",
"duration": ,
"name": "",
"children": [],
"attributes": {},
"inputs": {},
"outputs": {}
}
You can fill out this dict with all the metadata required for your specific needs.
The fields
begin
andend
are timestamps completed by theduration
field.The
name
field is the one specified when callingtrace.subspan
.The
children
field represents the array of subspan for this span.The attributes field is the one we modified to trace the name and arguments of a tool call.
The last files are the
inputs
andoutputs
used in the case of LLM calls.
Last, every LLM Mesh call has a trace, which you can append to a span. You can build your own trace hierarchy, as shown below:
with trace.subspan("Calling a LLM") as subspan:
llm = dataiku.api_client().get_default_project().get_llm(<your LLM id>)
resp = llm.new_completion().with_message("this is a prompt").execute()
subspan.append_trace(resp.trace)
LangChain-compatible traces#
LangchainToDKUTracer
class and use it in each LangChain runnable you wish to track.
agent.py
from langchain import hub
from langchain.agents import AgentExecutor
from langchain.agents import create_openai_tools_agent
from langchain.tools import tool
from dataiku.llm.python import BaseLLM
from dataiku.langchain.dku_llm import DKUChatLLM
from dataiku.langchain import LangchainToDKUTracer
import dataiku
from dataiku import SQLExecutor2
from duckduckgo_search import DDGS
OPENAI_CONNECTION_NAME = "toto"
model = DKUChatLLM(llm_id=f"openai:{OPENAI_CONNECTION_NAME}:gpt-4o-mini")
def generate_get_customer(dataset_name: str):
@tool
def get_customer(customer_id: str) -> str:
"""Get customer name, position and company information from database.
The input is a customer id (stored as a string).
The ouput is a string of the form:
"The customer's name is \"{name}\", holding the position \"{job}\" at the company named {company}"
"""
dataset = dataiku.Dataset(dataset_name)
table_name = dataset.get_location_info().get('info', {}).get('table')
executor = SQLExecutor2(dataset=dataset)
customer_id = customer_id.replace("'", "\\'")
query_reader = executor.query_to_iter(
f"""SELECT name, job, company FROM "{table_name}" WHERE id = '{customer_id}'""")
for (name, job, company) in query_reader.iter_tuples():
return f"The customer's name is \"{name}\", holding the position \"{job}\" at the company named {company}"
return f"No information can be found about the customer {customer_id}"
return get_customer
@tool
def search_company_info(company_name: str) -> str:
"""
Use this tool when you need to retrieve information on a company.
The input of this tool is the company name.
The output is either a small recap of the company or "No information …"
meaning that we couldn't find information # about this company
"""
with DDGS() as ddgs:
results = list(ddgs.text(f"{company_name} (company)", max_results=1))
if results:
return f"Information found about {company_name}: {results[0]['body']}"
return f"No information found about {company_name}"
tools = [search_company_info]
class MyLLM(BaseLLM):
def __init__(self):
pass
def set_config(self, config, plugin_config):
self.config = config
self.dataset = config.get("dataset_name")
tools.append(generate_get_customer(dataset_name=self.dataset))
self.agent = create_openai_tools_agent(model.with_config({"tags": ["agent_llm"]}), tools,
hub.pull("hwchase17/openai-tools-agent"))
async def aprocess_stream(self, query, settings, trace):
prompt = query["messages"][0]["content"]
tracer = LangchainToDKUTracer(dku_trace=trace)
agent_executor = AgentExecutor(agent=self.agent, tools=tools)
async for event in agent_executor.astream_events({"input": prompt}, version="v2",
config={"callbacks": [tracer]}):
kind = event["event"]
if kind == "on_chat_model_stream":
content = event["data"]["chunk"].content
if content:
yield {"chunk": {"text": content}}
elif kind == "on_tool_start":
# Event chunks are not part of the answer itself, but can provide progress information
yield {"chunk": {"type": "event", "eventKind": "tool_call", "eventData": {"name": event["name"]}}}
The tracing workflow starts with the aprocess_stream
function in the MyLLM
class.
class MyLLM(BaseLLM):
def __init__(self):
pass
async def aprocess_stream(self, query, settings, trace):
You use the trace
object to create a LangchainToDKUTracer
object.
You can then register this tracer object on all callbacks in the configuration of the AgentExecutor
asynchronous events.
async def aprocess_stream(self, query, settings, trace):
prompt = query["messages"][0]["content"]
tracer = LangchainToDKUTracer(dku_trace=trace)
agent_executor = AgentExecutor(agent=self.agent, tools=tools)
async for event in agent_executor.astream_events({"input": prompt}, version="v2",
config={"callbacks": [tracer]}):
This will collect all the traces generated by the different calls made by your LangChain-based Agent.
Wrapping up#
With this tutorial, you now have the tools at hand to architect the traces you need:
Create and organize your own trace with
trace.subspan
Build your trace hierarchy with
span.append_trace
For LangChain agents, easily log the execution with a LangChain-compatible tracer, using
LangchainToDKUTracer
See also