Coverage for src / agent / memory / mem0_utils.py: 41%
110 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
1# Copyright 2025-2026 Microsoft Corporation
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Utility functions for mem0 integration.
17This module provides helper functions for extracting LLM configuration
18from AgentConfig and creating mem0 Memory instances.
19"""
21import logging
22import os
23from pathlib import Path
24from typing import Any
26from agent.config.schema import AgentSettings
28logger = logging.getLogger(__name__)
30# Providers that mem0 supports for LLM
31# Note: local, github, anthropic require OPENAI_API_KEY for embeddings
32SUPPORTED_PROVIDERS = ["openai", "anthropic", "azure", "gemini", "github", "local"]
35def is_provider_compatible(config: AgentSettings) -> tuple[bool, str]:
36 """Check if LLM provider is compatible with mem0.
38 Args:
39 config: Agent configuration with LLM settings
41 Returns:
42 Tuple of (is_compatible, reason_if_not)
44 Example:
45 >>> config = AgentConfig(llm_provider="local")
46 >>> is_compatible, reason = is_provider_compatible(config)
47 >>> # (True, "") - but requires OPENAI_API_KEY for embeddings
48 """
49 if config.llm_provider in SUPPORTED_PROVIDERS:
50 return True, ""
51 elif config.llm_provider == "foundry":
52 return False, "foundry provider not yet tested with mem0"
53 else:
54 return False, f"unknown provider '{config.llm_provider}'"
57def extract_llm_config(config: AgentSettings) -> dict[str, Any]:
58 """Extract LLM configuration from AgentConfig for mem0.
60 Converts agent's LLM configuration to mem0-compatible format,
61 enabling mem0 to reuse the same LLM provider and model as the agent.
63 Args:
64 config: Agent configuration with LLM settings
66 Returns:
67 Dict with mem0 LLM configuration
69 Example:
70 >>> config = AgentConfig(llm_provider="openai", openai_api_key="sk-...")
71 >>> llm_config = extract_llm_config(config)
72 >>> # {"provider": "openai", "config": {"model": "gpt-4o-mini", "api_key": "sk-..."}}
73 """
74 # Map agent providers to mem0 providers
75 if config.llm_provider == "openai":
76 # Allow model override for mem0 (e.g., if project has limited model access)
77 openai_mem0_model = os.getenv("MEM0_LLM_MODEL", config.openai_model)
79 return {
80 "provider": "openai",
81 "config": {
82 "model": openai_mem0_model,
83 "api_key": config.openai_api_key,
84 "openai_base_url": "https://api.openai.com/v1", # Force direct OpenAI API
85 },
86 }
88 elif config.llm_provider == "anthropic":
89 return {
90 "provider": "anthropic",
91 "config": {
92 "model": config.anthropic_model,
93 "api_key": config.anthropic_api_key,
94 },
95 }
97 elif config.llm_provider == "azure":
98 # Azure OpenAI in mem0 doesn't accept azure_endpoint/api_version
99 # Use minimal config with just model and api_key
100 # Note: This may not work properly - recommend using OpenAI provider instead
102 # Check if user has OpenAI API key available
103 openai_api_key = os.getenv("OPENAI_API_KEY")
105 if openai_api_key:
106 # Use OpenAI provider for better compatibility
107 # Use MEM0_LLM_MODEL if set, otherwise try agent's OpenAI model if configured,
108 # finally fallback to gpt-4o-mini
109 azure_mem0_model_override = os.getenv("MEM0_LLM_MODEL")
110 if azure_mem0_model_override:
111 azure_mem0_model: str = azure_mem0_model_override
112 elif config.openai_api_key:
113 # Try to use agent's OpenAI model if it's configured
114 azure_mem0_model = config.openai_model
115 else:
116 # Final fallback
117 azure_mem0_model = "gpt-4o-mini"
119 logger.info(
120 f"Using OpenAI provider for mem0 LLM (model: {azure_mem0_model}). "
121 "Azure OpenAI will still be used for agent completions."
122 )
123 return {
124 "provider": "openai",
125 "config": {
126 "model": azure_mem0_model,
127 "api_key": openai_api_key,
128 "openai_base_url": "https://api.openai.com/v1",
129 },
130 }
131 else:
132 # Azure OpenAI support in mem0 is limited
133 logger.warning(
134 "Azure OpenAI provider not fully supported by mem0. "
135 "Set OPENAI_API_KEY environment variable for reliable mem0 operation. "
136 "Falling back to InMemoryStore."
137 )
138 raise ValueError(
139 "mem0 does not fully support Azure OpenAI provider. "
140 "Set OPENAI_API_KEY environment variable to use mem0 with OpenAI embeddings/LLM, "
141 "or use MEMORY_TYPE=in_memory for provider-independent memory."
142 )
144 elif config.llm_provider == "gemini":
145 return {
146 "provider": "gemini",
147 "config": {
148 "model": config.gemini_model,
149 "api_key": config.gemini_api_key,
150 },
151 }
153 elif config.llm_provider == "github":
154 # GitHub Models uses OpenAI-compatible API for LLM
155 # But embeddings require direct OpenAI API (OPENAI_API_KEY)
156 from agent.providers.github.auth import get_github_token
158 github_token = config.github_token or get_github_token()
160 # Construct base URL matching GitHubChatClient behavior
161 if config.github_org:
162 base_url = f"{config.github_endpoint}/orgs/{config.github_org}/inference"
163 else:
164 base_url = f"{config.github_endpoint}/inference"
166 return {
167 "provider": "github", # Mark as github so embedder knows to use OpenAI
168 "config": {
169 "model": config.github_model,
170 "api_key": github_token,
171 "openai_base_url": base_url,
172 },
173 }
175 elif config.llm_provider == "local":
176 # Local Docker models use OpenAI-compatible API for LLM
177 # But embeddings require direct OpenAI API (OPENAI_API_KEY)
178 openai_api_key = os.getenv("OPENAI_API_KEY")
179 if not openai_api_key:
180 raise ValueError(
181 "Local provider with mem0 requires OPENAI_API_KEY for embeddings. "
182 "Set OPENAI_API_KEY or use MEMORY_TYPE=in_memory."
183 )
185 return {
186 "provider": "local", # Mark as local so embedder knows to use OpenAI
187 "config": {
188 "model": config.local_model,
189 "api_key": "not-needed",
190 "openai_base_url": config.local_base_url,
191 },
192 }
194 else:
195 # Unsupported provider (foundry, unknown)
196 raise ValueError(
197 f"mem0 does not support '{config.llm_provider}' provider. "
198 f"Supported providers: {', '.join(SUPPORTED_PROVIDERS)}. "
199 "Use MEMORY_TYPE=in_memory for provider-independent memory."
200 )
203def _create_embedder_config(llm_config: dict[str, Any]) -> dict[str, Any]:
204 """Create embedder config from LLM config.
206 Args:
207 llm_config: LLM configuration dict from extract_llm_config()
209 Returns:
210 Dict with embedder configuration including provider-specific embedding models
211 """
212 provider: str = str(llm_config["provider"])
213 embedder_config: dict[str, Any]
215 # Build embedder config with only the parameters needed for embeddings
216 # Different from LLM config because embeddings don't use all the same parameters
217 if provider == "azure_openai":
218 # Azure OpenAI embeddings in mem0 require special handling
219 # Check if OpenAI API key is available for embeddings
220 openai_api_key = os.getenv("OPENAI_API_KEY")
222 if openai_api_key:
223 # Use OpenAI for embeddings (mem0 works better with OpenAI embeddings)
224 logger.info(
225 "Using OpenAI for embeddings (OPENAI_API_KEY found). "
226 "Azure OpenAI will still be used for LLM completions."
227 )
228 embedder_config = {
229 "provider": "openai",
230 "config": {
231 "model": get_embedding_model(llm_config),
232 "api_key": openai_api_key,
233 },
234 }
235 else:
236 # Fall back to trying Azure, but with minimal config
237 # This may not work reliably - recommend setting OPENAI_API_KEY
238 logger.warning(
239 "Azure OpenAI provider selected but OPENAI_API_KEY not found. "
240 "mem0 embeddings work best with OpenAI API. "
241 "Set OPENAI_API_KEY environment variable for reliable mem0 operation. "
242 "Attempting to use Azure embeddings (may fail)..."
243 )
244 embedder_config = {
245 "provider": "openai", # Use OpenAI provider format
246 "config": {
247 "model": get_embedding_model(llm_config),
248 "api_key": llm_config["config"].get("api_key", ""),
249 },
250 }
251 elif provider == "openai":
252 # OpenAI embeddings - use same API key
253 embedder_config = {
254 "provider": "openai",
255 "config": {
256 "model": get_embedding_model(llm_config),
257 "api_key": llm_config["config"].get("api_key"),
258 },
259 }
260 elif provider in ("anthropic", "github", "local"):
261 # These providers don't have their own embeddings in mem0
262 # Use OpenAI embeddings (requires OPENAI_API_KEY)
263 openai_api_key = os.getenv("OPENAI_API_KEY")
265 if openai_api_key:
266 embedder_config = {
267 "provider": "openai",
268 "config": {
269 "model": "text-embedding-3-small",
270 "api_key": openai_api_key,
271 },
272 }
273 else:
274 raise ValueError(
275 f"{provider.title()} provider with mem0 requires OPENAI_API_KEY for embeddings. "
276 "Set OPENAI_API_KEY or use MEMORY_TYPE=in_memory."
277 )
278 elif provider == "gemini":
279 # Gemini embeddings
280 embedder_config = {
281 "provider": "gemini",
282 "config": {
283 "model": get_embedding_model(llm_config),
284 "api_key": llm_config["config"].get("api_key"),
285 },
286 }
287 else:
288 # Default: copy all config but override model
289 embedder_config = {
290 "provider": provider,
291 "config": llm_config["config"].copy(),
292 }
293 embedder_config["config"]["model"] = get_embedding_model(llm_config)
295 return embedder_config
298def get_embedding_model(llm_config: dict[str, Any]) -> str:
299 """Get the embedding model name for a given LLM configuration.
301 Args:
302 llm_config: LLM configuration dict from extract_llm_config()
304 Returns:
305 The embedding model name (without provider suffixes)
306 """
307 if llm_config["provider"] == "openai":
308 return "text-embedding-3-small"
309 elif llm_config["provider"] == "anthropic":
310 return "voyage-3.5-lite"
311 elif llm_config["provider"] == "azure_openai":
312 return "text-embedding-3-small"
313 elif llm_config["provider"] == "gemini":
314 return "text-embedding-004"
315 else:
316 # Default for unknown providers
317 return "text-embedding-3-small"
320def get_storage_path(config: AgentSettings) -> Path:
321 """Get the storage path for local Chroma database.
323 Args:
324 config: Agent configuration
326 Returns:
327 Path to Chroma database directory
329 Example:
330 >>> path = get_storage_path(config)
331 >>> # /Users/daniel/.agent/mem0_data/chroma_db
332 """
333 if config.mem0_storage_path:
334 from pathlib import Path
336 return Path(config.mem0_storage_path)
338 # Default to memory_dir/chroma_db
339 if config.memory_dir:
340 return config.memory_dir / "chroma_db"
342 # Fallback to agent_data_dir (should always be set, but handle None gracefully)
343 if config.agent_data_dir:
344 return config.agent_data_dir / "mem0_data" / "chroma_db"
346 # Final fallback to home directory
347 return Path.home() / ".agent" / "mem0_data" / "chroma_db"
350def create_memory_instance(config: AgentSettings) -> Any:
351 """Create mem0 Memory instance with proper configuration.
353 Uses Memory.from_config for both local (Chroma) and cloud (mem0.ai) modes,
354 ensuring consistent API across both deployment options.
356 Args:
357 config: Agent configuration with mem0 and LLM settings
359 Returns:
360 Configured mem0.Memory instance
362 Raises:
363 ImportError: If mem0ai or chromadb packages not installed
364 ValueError: If configuration is invalid
366 Example:
367 >>> config = AgentConfig.from_env()
368 >>> memory = create_memory_instance(config)
369 """
370 try:
371 from mem0 import Memory
372 except ImportError:
373 raise ImportError(
374 "mem0ai package not installed. "
375 "Install with: uv pip install -e '.[mem0]' (or pip install -e '.[mem0]')"
376 )
378 # Determine if using cloud or local mode
379 is_cloud_mode = bool(config.mem0_api_key and config.mem0_org_id)
381 if is_cloud_mode:
382 # Cloud mode - use mem0.ai service
383 logger.info("Initializing mem0 in cloud mode (mem0.ai)")
385 # Extract LLM config
386 llm_config = extract_llm_config(config)
388 # Create embedder config (reuse LLM provider credentials)
389 embedder_config = _create_embedder_config(llm_config)
391 mem0_config = {
392 "llm": llm_config,
393 "embedder": embedder_config,
394 "vector_store": {
395 "provider": "mem0",
396 "config": {
397 "api_key": config.mem0_api_key,
398 "org_id": config.mem0_org_id,
399 },
400 },
401 }
402 else:
403 # Local mode - use Chroma file-based storage
404 storage_path = get_storage_path(config)
405 logger.info(f"Initializing mem0 in local mode: {storage_path}")
407 # Ensure storage directory exists
408 storage_path.mkdir(parents=True, exist_ok=True)
410 # Extract LLM config
411 llm_config = extract_llm_config(config)
413 # Create embedder config (reuse LLM provider credentials)
414 # For embeddings, we use the same provider and credentials as the LLM
415 embedder_config = _create_embedder_config(llm_config)
417 mem0_config = {
418 "llm": llm_config,
419 "embedder": embedder_config,
420 "vector_store": {
421 "provider": "chroma",
422 "config": {
423 "path": str(storage_path),
424 "collection_name": "agent_memories",
425 },
426 },
427 }
429 try:
430 memory = Memory.from_config(mem0_config)
431 logger.debug(
432 f"mem0 Memory instance created successfully ({'cloud' if is_cloud_mode else 'local'} mode)"
433 )
434 return memory
435 except Exception as e:
436 raise ValueError(f"Failed to initialize mem0 Memory: {e}")