Custom Tool Integration Guide
This guide walks through integrating a custom tool into ProbeLLM’s MCP-based tool system using a real-world chemistry example.
Example: Chemistry Tool Integration
We’ll integrate a molecule-to-SMILES converter from an external chemistry library.
Note
Source code reference anonymized for double-blind review.
Original Function:
from rdkit import Chem
def mol_to_smiles(mol: Chem.Mol, isomeric: bool = True, kekule: bool = False) -> str:
"""
Converts an RDKit molecule to a SMILES string.
Parameters:
mol (rdkit.Chem.rdchem.Mol): The RDKit molecule object.
isomeric (bool): Whether to include stereochemistry information.
kekule (bool): Whether to output the Kekule form.
Returns:
str: The SMILES representation of the molecule.
"""
if kekule:
Chem.Kekulize(mol)
return Chem.MolToSmiles(mol, kekuleSmiles=True)
return Chem.MolToSmiles(mol, isomericSmiles=isomeric)
Step 1: Define the Tool Specification
Create a ToolSpec that describes the tool’s interface:
from probellm.tools import ToolSpec
mol_to_smiles_spec = ToolSpec(
name="mol_to_smiles",
description="Converts an RDKit molecule to a SMILES string with optional stereochemistry and Kekule form.",
input_schema={
"type": "object",
"properties": {
"smiles_input": {
"type": "string",
"description": "Input SMILES string to convert the molecule from"
},
"isomeric": {
"type": "boolean",
"description": "Whether to include stereochemistry information",
"default": True
},
"kekule": {
"type": "boolean",
"description": "Whether to output the Kekule form",
"default": False
}
},
"required": ["smiles_input"]
}
)
Key Components:
name: Unique identifier for the tool (used incall_tool())description: Explains the tool’s purpose (helps LLM select appropriate tool)input_schema: JSON Schema defining expected parameters
Step 2: Implement the Handler Function
Create a handler that adapts your function to the MCP interface:
from typing import Dict, Any
from rdkit import Chem
def mol_to_smiles_handler(arguments: Dict[str, Any]) -> Dict[str, Any]:
"""
MCP tool handler that:
1. Extracts parameters from the arguments dict
2. Performs necessary conversions (SMILES string -> Mol object)
3. Calls the original function
4. Returns results in a standardized format
"""
try:
# Extract parameters
smiles_input = arguments.get("smiles_input")
isomeric = arguments.get("isomeric", True)
kekule = arguments.get("kekule", False)
# Validate input
if not smiles_input:
return {
"error": "smiles_input is required",
"success": False
}
# Convert SMILES to Mol object
mol = Chem.MolFromSmiles(smiles_input)
if mol is None:
return {
"error": f"Invalid SMILES string: {smiles_input}",
"success": False
}
# Call the original function
result_smiles = mol_to_smiles(mol, isomeric=isomeric, kekule=kekule)
# Return success result
return {
"success": True,
"smiles": result_smiles,
"input": smiles_input,
"isomeric": isomeric,
"kekule": kekule
}
except Exception as e:
# Error handling
return {
"success": False,
"error": str(e)
}
Handler Best Practices:
Parameter Extraction: Use
.get()with defaults for optional parametersInput Validation: Check for required fields and valid values
Error Handling: Catch exceptions and return error information (don’t raise)
Type Conversion: Convert MCP arguments to your function’s expected types
Structured Output: Return a dict with clear success/error indicators
Step 3: Register the Tool
Add the tool to a ToolRegistry:
from probellm.tools import ToolRegistry, LocalMCPTool
def register_chemistry_tools(registry: ToolRegistry) -> None:
"""Register chemistry tools into the registry."""
registry.register(LocalMCPTool(mol_to_smiles_spec, mol_to_smiles_handler))
Extending the Default Registry:
from probellm.tools import build_default_tool_registry
def build_extended_tool_registry(model: str, client) -> ToolRegistry:
"""Create a registry with default tools + chemistry tools."""
# Start with default tools (perturbation, python_exec, web_search)
registry = build_default_tool_registry(model, client)
# Add custom chemistry tools
register_chemistry_tools(registry)
return registry
Step 4: Test the Tool
Test your tool directly before integration:
# Create registry
registry = ToolRegistry()
register_chemistry_tools(registry)
# Test 1: Standard SMILES for benzene
response = registry.call_tool(
"mol_to_smiles",
{"smiles_input": "c1ccccc1"}
)
print("Test 1 - Standard form:")
print(response)
# Output: {
# 'jsonrpc': '2.0',
# 'id': '<uuid>',
# 'result': {
# 'success': True,
# 'smiles': 'c1ccccc1',
# 'input': 'c1ccccc1',
# 'isomeric': True,
# 'kekule': False
# }
# }
# Test 2: Kekule form
response = registry.call_tool(
"mol_to_smiles",
{
"smiles_input": "c1ccccc1",
"isomeric": False,
"kekule": True
}
)
print("\nTest 2 - Kekule form:")
print(response)
# Test 3: Chlorobenzene
response = registry.call_tool(
"mol_to_smiles",
{
"smiles_input": "c1ccc(cc1)Cl",
"isomeric": False,
"kekule": True
}
)
print("\nTest 3 - Chlorobenzene (Kekule):")
print(response)
# Test 4: Error handling - invalid SMILES
response = registry.call_tool(
"mol_to_smiles",
{"smiles_input": "invalid_smiles_xyz"}
)
print("\nTest 4 - Invalid SMILES:")
print(response)
# Output: {
# 'jsonrpc': '2.0',
# 'id': '<uuid>',
# 'result': {
# 'success': False,
# 'error': 'Invalid SMILES string: invalid_smiles_xyz'
# }
# }
Step 5: Use in the Pipeline
Integrate your custom registry into the vulnerability detection pipeline:
from probellm import VulnerabilityPipelineAsync
from probellm.utils.testcase_gen import TestCaseGenerator
# Option 1: Use in TestCaseGenerator
generator = TestCaseGenerator(model="gpt-4")
generator.tool_registry = build_extended_tool_registry(
model="gpt-4",
client=generator.client
)
# Now the LLM can choose to use mol_to_smiles when appropriate
result = generator.generate_nearby(
"Convert c1ccccc1 to canonical SMILES",
"c1ccccc1"
)
# Option 2: Use in the full pipeline
# (if the pipeline supports custom tool registries)
registry = build_extended_tool_registry(model="gpt-4", client=your_client)
pipeline = VulnerabilityPipelineAsync(
model_name="gpt-5.2",
test_model="gpt-4o-mini",
judge_model="gpt-5.2",
# Pass custom registry if supported
)
Understanding the MCP Response Format
When you call a tool via ToolRegistry.call_tool(), you get a JSON-RPC 2.0 envelope:
Success Response:
{
"jsonrpc": "2.0",
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"result": {
"success": true,
"smiles": "c1ccccc1",
"input": "c1ccccc1",
"isomeric": true,
"kekule": false
}
}
Error Response (Tool Not Found):
{
"jsonrpc": "2.0",
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"error": {
"code": "tool_not_found",
"message": "Tool 'unknown_tool' is not registered"
}
}
Error Response (Tool Execution Failed):
{
"jsonrpc": "2.0",
"id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
"error": {
"code": "tool_error",
"message": "Invalid SMILES string: xyz"
}
}
Advanced Patterns
Composing Multiple Tools
Your handler can call other tools in the registry:
def advanced_chemistry_handler(arguments: Dict[str, Any], registry: ToolRegistry) -> Dict[str, Any]:
"""Handler that uses multiple tools."""
# Step 1: Use web_search to get molecular properties
search_result = registry.call_tool("web_search", {
"topic": f"chemical properties of {arguments['compound_name']}"
})
# Step 2: Use mol_to_smiles to standardize representation
smiles_result = registry.call_tool("mol_to_smiles", {
"smiles_input": arguments["smiles_input"]
})
# Step 3: Use python_exec to compute molecular descriptors
code = f"""
from rdkit import Chem
from rdkit.Chem import Descriptors
mol = Chem.MolFromSmiles('{smiles_result['result']['smiles']}')
mw = Descriptors.MolWt(mol)
logp = Descriptors.MolLogP(mol)
print(f"MW={{mw}}, LogP={{logp}}")
"""
exec_result = registry.call_tool("python_exec", {"code": code})
return {
"smiles": smiles_result["result"]["smiles"],
"web_info": search_result["result"],
"descriptors": exec_result["result"]["stdout"]
}
Conditional Tool Selection
Handler can adaptively choose strategies:
def smart_chemistry_handler(arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Handler that adapts based on input complexity."""
smiles_input = arguments["smiles_input"]
# Simple molecules: direct conversion
if len(smiles_input) < 20:
return simple_conversion(smiles_input)
# Complex molecules: use advanced validation
else:
return advanced_validation_pipeline(smiles_input)
Domain-Specific Validation
Add custom validation logic:
def validated_chemistry_handler(arguments: Dict[str, Any]) -> Dict[str, Any]:
"""Handler with domain-specific validation."""
smiles_input = arguments["smiles_input"]
# Validate SMILES complexity
if len(smiles_input) > 200:
return {
"success": False,
"error": "SMILES string too long (max 200 characters)"
}
# Validate allowed elements (e.g., only organic)
mol = Chem.MolFromSmiles(smiles_input)
if mol:
allowed_elements = {'C', 'H', 'N', 'O', 'S', 'P', 'F', 'Cl', 'Br', 'I'}
mol_elements = {atom.GetSymbol() for atom in mol.GetAtoms()}
if not mol_elements.issubset(allowed_elements):
return {
"success": False,
"error": f"Disallowed elements: {mol_elements - allowed_elements}"
}
# Proceed with conversion
return mol_to_smiles_handler(arguments)
Complete Integration Example
Here’s a complete file you can use as a template:
"""Custom chemistry tools for ProbeLLM."""
from typing import Dict, Any
from rdkit import Chem
from probellm.tools import ToolRegistry, LocalMCPTool, ToolSpec, build_default_tool_registry
# ============================================================================
# Original Function (from external chemistry library)
# Source: (Anonymized for review)
# ============================================================================
def mol_to_smiles(mol: Chem.Mol, isomeric: bool = True, kekule: bool = False) -> str:
"""Converts an RDKit molecule to a SMILES string."""
if kekule:
Chem.Kekulize(mol)
return Chem.MolToSmiles(mol, kekuleSmiles=True)
return Chem.MolToSmiles(mol, isomericSmiles=isomeric)
# ============================================================================
# MCP Tool Integration
# ============================================================================
mol_to_smiles_spec = ToolSpec(
name="mol_to_smiles",
description="Converts an RDKit molecule to a SMILES string with optional stereochemistry and Kekule form.",
input_schema={
"type": "object",
"properties": {
"smiles_input": {
"type": "string",
"description": "Input SMILES string to convert the molecule from"
},
"isomeric": {
"type": "boolean",
"description": "Whether to include stereochemistry information",
"default": True
},
"kekule": {
"type": "boolean",
"description": "Whether to output the Kekule form",
"default": False
}
},
"required": ["smiles_input"]
}
)
def mol_to_smiles_handler(arguments: Dict[str, Any]) -> Dict[str, Any]:
"""MCP handler for mol_to_smiles tool."""
try:
smiles_input = arguments.get("smiles_input")
isomeric = arguments.get("isomeric", True)
kekule = arguments.get("kekule", False)
if not smiles_input:
return {"error": "smiles_input is required", "success": False}
mol = Chem.MolFromSmiles(smiles_input)
if mol is None:
return {
"error": f"Invalid SMILES string: {smiles_input}",
"success": False
}
result_smiles = mol_to_smiles(mol, isomeric=isomeric, kekule=kekule)
return {
"success": True,
"smiles": result_smiles,
"input": smiles_input,
"isomeric": isomeric,
"kekule": kekule
}
except Exception as e:
return {"success": False, "error": str(e)}
def register_chemistry_tools(registry: ToolRegistry) -> None:
"""Register all chemistry tools into the registry."""
registry.register(LocalMCPTool(mol_to_smiles_spec, mol_to_smiles_handler))
def build_chemistry_tool_registry(model: str, client) -> ToolRegistry:
"""Build a registry with default tools + chemistry tools."""
registry = build_default_tool_registry(model, client)
register_chemistry_tools(registry)
return registry
# ============================================================================
# Usage Example
# ============================================================================
if __name__ == "__main__":
# Create registry
registry = ToolRegistry()
register_chemistry_tools(registry)
# Test the tool
response = registry.call_tool("mol_to_smiles", {
"smiles_input": "c1ccccc1",
"kekule": True
})
print(response)
Troubleshooting
Tool Not Registered Error
# Problem: Tool name mismatch
registry.register(LocalMCPTool(ToolSpec(name="mol_smiles", ...), handler))
registry.call_tool("mol_to_smiles", {...}) # ❌ Wrong name
# Solution: Use exact name from spec
registry.call_tool("mol_smiles", {...}) # ✅ Correct
Input Validation Errors
# Problem: Missing required field
result = registry.call_tool("mol_to_smiles", {"kekule": True})
# Returns: {"error": "smiles_input is required", ...}
# Solution: Always provide required fields
result = registry.call_tool("mol_to_smiles", {
"smiles_input": "c1ccccc1", # ✅ Required field present
"kekule": True
})
Handler Exceptions
# Problem: Unhandled exception in handler
def bad_handler(args):
value = args["required_field"] # ❌ May raise KeyError
return {"result": process(value)}
# Solution: Use .get() and try/except
def good_handler(args):
try:
value = args.get("required_field") # ✅ Safe access
if not value:
return {"error": "required_field missing", "success": False}
return {"result": process(value), "success": True}
except Exception as e:
return {"error": str(e), "success": False}
Next Steps
Explore Other Examples: Check
probellm/tools/builtin.pyfor more patternsAdd Multiple Tools: Create a collection of related tools (e.g., chemistry toolkit)
Contribute: Submit your tools as examples for other users
Integrate with MCTS: Let the search engine automatically discover when to use your tools
See Also
Tool System (MCP-Based): Full API reference
Architecture Overview: How tools fit into the system