Coverage for src / agent / memory / mem0_utils.py: 41%

110 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-11 14:30 +0000

1# Copyright 2025-2026 Microsoft Corporation 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Utility functions for mem0 integration. 

16 

17This module provides helper functions for extracting LLM configuration 

18from AgentConfig and creating mem0 Memory instances. 

19""" 

20 

21import logging 

22import os 

23from pathlib import Path 

24from typing import Any 

25 

26from agent.config.schema import AgentSettings 

27 

28logger = logging.getLogger(__name__) 

29 

30# Providers that mem0 supports for LLM 

31# Note: local, github, anthropic require OPENAI_API_KEY for embeddings 

32SUPPORTED_PROVIDERS = ["openai", "anthropic", "azure", "gemini", "github", "local"] 

33 

34 

35def is_provider_compatible(config: AgentSettings) -> tuple[bool, str]: 

36 """Check if LLM provider is compatible with mem0. 

37 

38 Args: 

39 config: Agent configuration with LLM settings 

40 

41 Returns: 

42 Tuple of (is_compatible, reason_if_not) 

43 

44 Example: 

45 >>> config = AgentConfig(llm_provider="local") 

46 >>> is_compatible, reason = is_provider_compatible(config) 

47 >>> # (True, "") - but requires OPENAI_API_KEY for embeddings 

48 """ 

49 if config.llm_provider in SUPPORTED_PROVIDERS: 

50 return True, "" 

51 elif config.llm_provider == "foundry": 

52 return False, "foundry provider not yet tested with mem0" 

53 else: 

54 return False, f"unknown provider '{config.llm_provider}'" 

55 

56 

57def extract_llm_config(config: AgentSettings) -> dict[str, Any]: 

58 """Extract LLM configuration from AgentConfig for mem0. 

59 

60 Converts agent's LLM configuration to mem0-compatible format, 

61 enabling mem0 to reuse the same LLM provider and model as the agent. 

62 

63 Args: 

64 config: Agent configuration with LLM settings 

65 

66 Returns: 

67 Dict with mem0 LLM configuration 

68 

69 Example: 

70 >>> config = AgentConfig(llm_provider="openai", openai_api_key="sk-...") 

71 >>> llm_config = extract_llm_config(config) 

72 >>> # {"provider": "openai", "config": {"model": "gpt-4o-mini", "api_key": "sk-..."}} 

73 """ 

74 # Map agent providers to mem0 providers 

75 if config.llm_provider == "openai": 

76 # Allow model override for mem0 (e.g., if project has limited model access) 

77 openai_mem0_model = os.getenv("MEM0_LLM_MODEL", config.openai_model) 

78 

79 return { 

80 "provider": "openai", 

81 "config": { 

82 "model": openai_mem0_model, 

83 "api_key": config.openai_api_key, 

84 "openai_base_url": "https://api.openai.com/v1", # Force direct OpenAI API 

85 }, 

86 } 

87 

88 elif config.llm_provider == "anthropic": 

89 return { 

90 "provider": "anthropic", 

91 "config": { 

92 "model": config.anthropic_model, 

93 "api_key": config.anthropic_api_key, 

94 }, 

95 } 

96 

97 elif config.llm_provider == "azure": 

98 # Azure OpenAI in mem0 doesn't accept azure_endpoint/api_version 

99 # Use minimal config with just model and api_key 

100 # Note: This may not work properly - recommend using OpenAI provider instead 

101 

102 # Check if user has OpenAI API key available 

103 openai_api_key = os.getenv("OPENAI_API_KEY") 

104 

105 if openai_api_key: 

106 # Use OpenAI provider for better compatibility 

107 # Use MEM0_LLM_MODEL if set, otherwise try agent's OpenAI model if configured, 

108 # finally fallback to gpt-4o-mini 

109 azure_mem0_model_override = os.getenv("MEM0_LLM_MODEL") 

110 if azure_mem0_model_override: 

111 azure_mem0_model: str = azure_mem0_model_override 

112 elif config.openai_api_key: 

113 # Try to use agent's OpenAI model if it's configured 

114 azure_mem0_model = config.openai_model 

115 else: 

116 # Final fallback 

117 azure_mem0_model = "gpt-4o-mini" 

118 

119 logger.info( 

120 f"Using OpenAI provider for mem0 LLM (model: {azure_mem0_model}). " 

121 "Azure OpenAI will still be used for agent completions." 

122 ) 

123 return { 

124 "provider": "openai", 

125 "config": { 

126 "model": azure_mem0_model, 

127 "api_key": openai_api_key, 

128 "openai_base_url": "https://api.openai.com/v1", 

129 }, 

130 } 

131 else: 

132 # Azure OpenAI support in mem0 is limited 

133 logger.warning( 

134 "Azure OpenAI provider not fully supported by mem0. " 

135 "Set OPENAI_API_KEY environment variable for reliable mem0 operation. " 

136 "Falling back to InMemoryStore." 

137 ) 

138 raise ValueError( 

139 "mem0 does not fully support Azure OpenAI provider. " 

140 "Set OPENAI_API_KEY environment variable to use mem0 with OpenAI embeddings/LLM, " 

141 "or use MEMORY_TYPE=in_memory for provider-independent memory." 

142 ) 

143 

144 elif config.llm_provider == "gemini": 

145 return { 

146 "provider": "gemini", 

147 "config": { 

148 "model": config.gemini_model, 

149 "api_key": config.gemini_api_key, 

150 }, 

151 } 

152 

153 elif config.llm_provider == "github": 

154 # GitHub Models uses OpenAI-compatible API for LLM 

155 # But embeddings require direct OpenAI API (OPENAI_API_KEY) 

156 from agent.providers.github.auth import get_github_token 

157 

158 github_token = config.github_token or get_github_token() 

159 

160 # Construct base URL matching GitHubChatClient behavior 

161 if config.github_org: 

162 base_url = f"{config.github_endpoint}/orgs/{config.github_org}/inference" 

163 else: 

164 base_url = f"{config.github_endpoint}/inference" 

165 

166 return { 

167 "provider": "github", # Mark as github so embedder knows to use OpenAI 

168 "config": { 

169 "model": config.github_model, 

170 "api_key": github_token, 

171 "openai_base_url": base_url, 

172 }, 

173 } 

174 

175 elif config.llm_provider == "local": 

176 # Local Docker models use OpenAI-compatible API for LLM 

177 # But embeddings require direct OpenAI API (OPENAI_API_KEY) 

178 openai_api_key = os.getenv("OPENAI_API_KEY") 

179 if not openai_api_key: 

180 raise ValueError( 

181 "Local provider with mem0 requires OPENAI_API_KEY for embeddings. " 

182 "Set OPENAI_API_KEY or use MEMORY_TYPE=in_memory." 

183 ) 

184 

185 return { 

186 "provider": "local", # Mark as local so embedder knows to use OpenAI 

187 "config": { 

188 "model": config.local_model, 

189 "api_key": "not-needed", 

190 "openai_base_url": config.local_base_url, 

191 }, 

192 } 

193 

194 else: 

195 # Unsupported provider (foundry, unknown) 

196 raise ValueError( 

197 f"mem0 does not support '{config.llm_provider}' provider. " 

198 f"Supported providers: {', '.join(SUPPORTED_PROVIDERS)}. " 

199 "Use MEMORY_TYPE=in_memory for provider-independent memory." 

200 ) 

201 

202 

203def _create_embedder_config(llm_config: dict[str, Any]) -> dict[str, Any]: 

204 """Create embedder config from LLM config. 

205 

206 Args: 

207 llm_config: LLM configuration dict from extract_llm_config() 

208 

209 Returns: 

210 Dict with embedder configuration including provider-specific embedding models 

211 """ 

212 provider: str = str(llm_config["provider"]) 

213 embedder_config: dict[str, Any] 

214 

215 # Build embedder config with only the parameters needed for embeddings 

216 # Different from LLM config because embeddings don't use all the same parameters 

217 if provider == "azure_openai": 

218 # Azure OpenAI embeddings in mem0 require special handling 

219 # Check if OpenAI API key is available for embeddings 

220 openai_api_key = os.getenv("OPENAI_API_KEY") 

221 

222 if openai_api_key: 

223 # Use OpenAI for embeddings (mem0 works better with OpenAI embeddings) 

224 logger.info( 

225 "Using OpenAI for embeddings (OPENAI_API_KEY found). " 

226 "Azure OpenAI will still be used for LLM completions." 

227 ) 

228 embedder_config = { 

229 "provider": "openai", 

230 "config": { 

231 "model": get_embedding_model(llm_config), 

232 "api_key": openai_api_key, 

233 }, 

234 } 

235 else: 

236 # Fall back to trying Azure, but with minimal config 

237 # This may not work reliably - recommend setting OPENAI_API_KEY 

238 logger.warning( 

239 "Azure OpenAI provider selected but OPENAI_API_KEY not found. " 

240 "mem0 embeddings work best with OpenAI API. " 

241 "Set OPENAI_API_KEY environment variable for reliable mem0 operation. " 

242 "Attempting to use Azure embeddings (may fail)..." 

243 ) 

244 embedder_config = { 

245 "provider": "openai", # Use OpenAI provider format 

246 "config": { 

247 "model": get_embedding_model(llm_config), 

248 "api_key": llm_config["config"].get("api_key", ""), 

249 }, 

250 } 

251 elif provider == "openai": 

252 # OpenAI embeddings - use same API key 

253 embedder_config = { 

254 "provider": "openai", 

255 "config": { 

256 "model": get_embedding_model(llm_config), 

257 "api_key": llm_config["config"].get("api_key"), 

258 }, 

259 } 

260 elif provider in ("anthropic", "github", "local"): 

261 # These providers don't have their own embeddings in mem0 

262 # Use OpenAI embeddings (requires OPENAI_API_KEY) 

263 openai_api_key = os.getenv("OPENAI_API_KEY") 

264 

265 if openai_api_key: 

266 embedder_config = { 

267 "provider": "openai", 

268 "config": { 

269 "model": "text-embedding-3-small", 

270 "api_key": openai_api_key, 

271 }, 

272 } 

273 else: 

274 raise ValueError( 

275 f"{provider.title()} provider with mem0 requires OPENAI_API_KEY for embeddings. " 

276 "Set OPENAI_API_KEY or use MEMORY_TYPE=in_memory." 

277 ) 

278 elif provider == "gemini": 

279 # Gemini embeddings 

280 embedder_config = { 

281 "provider": "gemini", 

282 "config": { 

283 "model": get_embedding_model(llm_config), 

284 "api_key": llm_config["config"].get("api_key"), 

285 }, 

286 } 

287 else: 

288 # Default: copy all config but override model 

289 embedder_config = { 

290 "provider": provider, 

291 "config": llm_config["config"].copy(), 

292 } 

293 embedder_config["config"]["model"] = get_embedding_model(llm_config) 

294 

295 return embedder_config 

296 

297 

298def get_embedding_model(llm_config: dict[str, Any]) -> str: 

299 """Get the embedding model name for a given LLM configuration. 

300 

301 Args: 

302 llm_config: LLM configuration dict from extract_llm_config() 

303 

304 Returns: 

305 The embedding model name (without provider suffixes) 

306 """ 

307 if llm_config["provider"] == "openai": 

308 return "text-embedding-3-small" 

309 elif llm_config["provider"] == "anthropic": 

310 return "voyage-3.5-lite" 

311 elif llm_config["provider"] == "azure_openai": 

312 return "text-embedding-3-small" 

313 elif llm_config["provider"] == "gemini": 

314 return "text-embedding-004" 

315 else: 

316 # Default for unknown providers 

317 return "text-embedding-3-small" 

318 

319 

320def get_storage_path(config: AgentSettings) -> Path: 

321 """Get the storage path for local Chroma database. 

322 

323 Args: 

324 config: Agent configuration 

325 

326 Returns: 

327 Path to Chroma database directory 

328 

329 Example: 

330 >>> path = get_storage_path(config) 

331 >>> # /Users/daniel/.agent/mem0_data/chroma_db 

332 """ 

333 if config.mem0_storage_path: 

334 from pathlib import Path 

335 

336 return Path(config.mem0_storage_path) 

337 

338 # Default to memory_dir/chroma_db 

339 if config.memory_dir: 

340 return config.memory_dir / "chroma_db" 

341 

342 # Fallback to agent_data_dir (should always be set, but handle None gracefully) 

343 if config.agent_data_dir: 

344 return config.agent_data_dir / "mem0_data" / "chroma_db" 

345 

346 # Final fallback to home directory 

347 return Path.home() / ".agent" / "mem0_data" / "chroma_db" 

348 

349 

350def create_memory_instance(config: AgentSettings) -> Any: 

351 """Create mem0 Memory instance with proper configuration. 

352 

353 Uses Memory.from_config for both local (Chroma) and cloud (mem0.ai) modes, 

354 ensuring consistent API across both deployment options. 

355 

356 Args: 

357 config: Agent configuration with mem0 and LLM settings 

358 

359 Returns: 

360 Configured mem0.Memory instance 

361 

362 Raises: 

363 ImportError: If mem0ai or chromadb packages not installed 

364 ValueError: If configuration is invalid 

365 

366 Example: 

367 >>> config = AgentConfig.from_env() 

368 >>> memory = create_memory_instance(config) 

369 """ 

370 try: 

371 from mem0 import Memory 

372 except ImportError: 

373 raise ImportError( 

374 "mem0ai package not installed. " 

375 "Install with: uv pip install -e '.[mem0]' (or pip install -e '.[mem0]')" 

376 ) 

377 

378 # Determine if using cloud or local mode 

379 is_cloud_mode = bool(config.mem0_api_key and config.mem0_org_id) 

380 

381 if is_cloud_mode: 

382 # Cloud mode - use mem0.ai service 

383 logger.info("Initializing mem0 in cloud mode (mem0.ai)") 

384 

385 # Extract LLM config 

386 llm_config = extract_llm_config(config) 

387 

388 # Create embedder config (reuse LLM provider credentials) 

389 embedder_config = _create_embedder_config(llm_config) 

390 

391 mem0_config = { 

392 "llm": llm_config, 

393 "embedder": embedder_config, 

394 "vector_store": { 

395 "provider": "mem0", 

396 "config": { 

397 "api_key": config.mem0_api_key, 

398 "org_id": config.mem0_org_id, 

399 }, 

400 }, 

401 } 

402 else: 

403 # Local mode - use Chroma file-based storage 

404 storage_path = get_storage_path(config) 

405 logger.info(f"Initializing mem0 in local mode: {storage_path}") 

406 

407 # Ensure storage directory exists 

408 storage_path.mkdir(parents=True, exist_ok=True) 

409 

410 # Extract LLM config 

411 llm_config = extract_llm_config(config) 

412 

413 # Create embedder config (reuse LLM provider credentials) 

414 # For embeddings, we use the same provider and credentials as the LLM 

415 embedder_config = _create_embedder_config(llm_config) 

416 

417 mem0_config = { 

418 "llm": llm_config, 

419 "embedder": embedder_config, 

420 "vector_store": { 

421 "provider": "chroma", 

422 "config": { 

423 "path": str(storage_path), 

424 "collection_name": "agent_memories", 

425 }, 

426 }, 

427 } 

428 

429 try: 

430 memory = Memory.from_config(mem0_config) 

431 logger.debug( 

432 f"mem0 Memory instance created successfully ({'cloud' if is_cloud_mode else 'local'} mode)" 

433 ) 

434 return memory 

435 except Exception as e: 

436 raise ValueError(f"Failed to initialize mem0 Memory: {e}")