Coverage for src / agent / services / maven / api.py: 60%

166 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-11 14:30 +0000

1# Copyright 2025-2026 Microsoft Corporation 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Maven Central API client with async HTTP and caching.""" 

16 

17import logging 

18import xml.etree.ElementTree as ET 

19from typing import Any 

20 

21import httpx 

22 

23from agent.services.maven.cache import MavenCacheService 

24from agent.services.maven.types import ( 

25 MavenCoordinate, 

26 MavenErrorCode, 

27 MavenMetadata, 

28) 

29from agent.services.maven.version import VersionService 

30 

31logger = logging.getLogger(__name__) 

32 

33 

34class MavenApiError(Exception): 

35 """Maven API error with error code.""" 

36 

37 def __init__(self, message: str, error_code: MavenErrorCode) -> None: 

38 super().__init__(message) 

39 self.error_code = error_code 

40 

41 

42class MavenApiService: 

43 """Async HTTP client for Maven Central with caching.""" 

44 

45 MAVEN_REPO_BASE = "https://repo1.maven.org/maven2" 

46 MAVEN_SEARCH_BASE = "https://search.maven.org/solrsearch/select" 

47 

48 def __init__( 

49 self, 

50 cache: MavenCacheService | None = None, 

51 timeout: int = 30, 

52 ) -> None: 

53 self.cache = cache or MavenCacheService() 

54 self.timeout = timeout 

55 

56 async def fetch_metadata(self, group_id: str, artifact_id: str) -> MavenMetadata: 

57 """Fetch artifact metadata from Maven Central.""" 

58 cached = self.cache.get_metadata(group_id, artifact_id) 

59 if cached: 

60 return MavenMetadata(**cached) 

61 

62 group_path = group_id.replace(".", "/") 

63 url = f"{self.MAVEN_REPO_BASE}/{group_path}/{artifact_id}/maven-metadata.xml" 

64 

65 try: 

66 async with httpx.AsyncClient(timeout=self.timeout) as client: 

67 response = await client.get(url) 

68 

69 if response.status_code == 404: 

70 raise MavenApiError( 

71 f"Dependency {group_id}:{artifact_id} not found", 

72 MavenErrorCode.DEPENDENCY_NOT_FOUND, 

73 ) 

74 

75 response.raise_for_status() 

76 metadata = self._parse_metadata_xml(response.text, group_id, artifact_id) 

77 self.cache.set_metadata(group_id, artifact_id, metadata.model_dump()) 

78 return metadata 

79 

80 except httpx.HTTPError as e: 

81 raise MavenApiError( 

82 f"Error fetching Maven metadata: {e}", 

83 MavenErrorCode.MAVEN_API_ERROR, 

84 ) from e 

85 

86 def _parse_metadata_xml( 

87 self, xml_content: str, group_id: str, artifact_id: str 

88 ) -> MavenMetadata: 

89 """Parse maven-metadata.xml content.""" 

90 try: 

91 root = ET.fromstring(xml_content) 

92 versions = [v.text for v in root.findall(".//version") if v.text] 

93 

94 return MavenMetadata( 

95 group_id=root.findtext("./groupId") or group_id, 

96 artifact_id=root.findtext("./artifactId") or artifact_id, 

97 latest_version=root.findtext("./versioning/latest"), 

98 release_version=root.findtext("./versioning/release"), 

99 versions=versions, 

100 ) 

101 except ET.ParseError as e: 

102 raise MavenApiError( 

103 f"Failed to parse Maven metadata XML: {e}", 

104 MavenErrorCode.MAVEN_API_ERROR, 

105 ) from e 

106 

107 async def check_artifact_exists( 

108 self, 

109 group_id: str, 

110 artifact_id: str, 

111 version: str, 

112 packaging: str = "jar", 

113 classifier: str | None = None, 

114 ) -> bool: 

115 """Check if artifact exists using HEAD request.""" 

116 cached = self.cache.get_exists(group_id, artifact_id, version, packaging, classifier) 

117 if cached is not None: 

118 return cached 

119 

120 group_path = group_id.replace(".", "/") 

121 filename = f"{artifact_id}-{version}" 

122 if classifier: 

123 filename += f"-{classifier}" 

124 filename += f".{packaging}" 

125 

126 url = f"{self.MAVEN_REPO_BASE}/{group_path}/{artifact_id}/{version}/{filename}" 

127 

128 try: 

129 async with httpx.AsyncClient(timeout=self.timeout) as client: 

130 response = await client.head(url) 

131 exists = response.status_code == 200 

132 

133 if not exists and response.status_code == 404: 

134 exists = await self._check_version_in_metadata(group_id, artifact_id, version) 

135 

136 self.cache.set_exists(group_id, artifact_id, version, exists, packaging, classifier) 

137 return exists 

138 

139 except httpx.HTTPError: 

140 return False 

141 

142 async def _check_version_in_metadata( 

143 self, group_id: str, artifact_id: str, version: str 

144 ) -> bool: 

145 """Check if version exists in metadata.""" 

146 try: 

147 metadata = await self.fetch_metadata(group_id, artifact_id) 

148 return version in metadata.versions 

149 except MavenApiError: 

150 return False 

151 

152 async def get_all_versions(self, group_id: str, artifact_id: str) -> list[str]: 

153 """Get all versions for an artifact.""" 

154 cached = self.cache.get_versions(group_id, artifact_id) 

155 if cached: 

156 return cached 

157 

158 try: 

159 metadata = await self.fetch_metadata(group_id, artifact_id) 

160 if not metadata.versions: 

161 return await self._get_versions_from_solr(group_id, artifact_id) 

162 

163 self.cache.set_versions(group_id, artifact_id, metadata.versions) 

164 return metadata.versions 

165 

166 except MavenApiError: 

167 return await self._get_versions_from_solr(group_id, artifact_id) 

168 

169 async def _get_versions_from_solr(self, group_id: str, artifact_id: str) -> list[str]: 

170 """Get versions using Solr search API.""" 

171 query = f"g:{group_id} AND a:{artifact_id}" 

172 params: dict[str, str | int] = { 

173 "q": query, 

174 "rows": 200, 

175 "wt": "json", 

176 "core": "gav", 

177 } 

178 

179 try: 

180 async with httpx.AsyncClient(timeout=self.timeout) as client: 

181 response = await client.get(self.MAVEN_SEARCH_BASE, params=params) 

182 response.raise_for_status() 

183 

184 data = response.json() 

185 docs = data.get("response", {}).get("docs", []) 

186 versions = [doc.get("v") for doc in docs if doc.get("v")] 

187 

188 if not versions: 

189 raise MavenApiError( 

190 f"No versions found for {group_id}:{artifact_id}", 

191 MavenErrorCode.DEPENDENCY_NOT_FOUND, 

192 ) 

193 

194 self.cache.set_versions(group_id, artifact_id, versions) 

195 return versions 

196 

197 except httpx.HTTPError as e: 

198 raise MavenApiError( 

199 f"Error searching Maven Central: {e}", 

200 MavenErrorCode.MAVEN_API_ERROR, 

201 ) from e 

202 

203 async def get_latest_version( 

204 self, 

205 group_id: str, 

206 artifact_id: str, 

207 packaging: str = "jar", 

208 classifier: str | None = None, 

209 ) -> str: 

210 """Get the latest version of an artifact.""" 

211 try: 

212 metadata = await self.fetch_metadata(group_id, artifact_id) 

213 

214 if metadata.release_version: 

215 if not classifier or await self.check_artifact_exists( 

216 group_id, artifact_id, metadata.release_version, packaging, classifier 

217 ): 

218 return metadata.release_version 

219 

220 if metadata.latest_version: 

221 if not classifier or await self.check_artifact_exists( 

222 group_id, artifact_id, metadata.latest_version, packaging, classifier 

223 ): 

224 return metadata.latest_version 

225 

226 if metadata.versions: 

227 if classifier: 

228 filtered = [] 

229 for v in metadata.versions: 

230 if await self.check_artifact_exists( 

231 group_id, artifact_id, v, packaging, classifier 

232 ): 

233 filtered.append(v) 

234 if filtered: 

235 result = VersionService.get_latest_version(filtered) 

236 if result: 

237 return result 

238 else: 

239 result = VersionService.get_latest_version(metadata.versions) 

240 if result: 

241 return result 

242 

243 raise MavenApiError( 

244 f"No suitable version found for {group_id}:{artifact_id}", 

245 MavenErrorCode.VERSION_NOT_FOUND, 

246 ) 

247 

248 except MavenApiError: 

249 return await self._get_latest_version_from_solr( 

250 group_id, artifact_id, packaging, classifier 

251 ) 

252 

253 async def _get_latest_version_from_solr( 

254 self, 

255 group_id: str, 

256 artifact_id: str, 

257 packaging: str = "jar", 

258 classifier: str | None = None, 

259 ) -> str: 

260 """Get latest version using Solr API.""" 

261 versions = await self._get_versions_from_solr(group_id, artifact_id) 

262 

263 if classifier: 

264 filtered = [] 

265 for v in versions: 

266 if await self.check_artifact_exists( 

267 group_id, artifact_id, v, packaging, classifier 

268 ): 

269 filtered.append(v) 

270 if not filtered: 

271 raise MavenApiError( 

272 f"No versions found with classifier {classifier}", 

273 MavenErrorCode.VERSION_NOT_FOUND, 

274 ) 

275 versions = filtered 

276 

277 result = VersionService.get_latest_version(versions) 

278 if not result: 

279 raise MavenApiError( 

280 f"No versions found for {group_id}:{artifact_id}", 

281 MavenErrorCode.VERSION_NOT_FOUND, 

282 ) 

283 

284 return result 

285 

286 async def search( 

287 self, 

288 query: str, 

289 packaging: str | None = None, 

290 classifier: str | None = None, 

291 rows: int = 20, 

292 ) -> dict[str, Any]: 

293 """Search Maven Central for artifacts.""" 

294 full_query = query 

295 if packaging: 

296 full_query += f" AND p:{packaging}" 

297 if classifier: 

298 full_query += f" AND l:{classifier}" 

299 

300 cached = self.cache.get_search(full_query) 

301 if cached: 

302 return dict(cached) 

303 

304 params: dict[str, str | int] = { 

305 "q": full_query, 

306 "rows": rows, 

307 "wt": "json", 

308 "core": "gav", 

309 } 

310 

311 try: 

312 async with httpx.AsyncClient(timeout=self.timeout) as client: 

313 response = await client.get(self.MAVEN_SEARCH_BASE, params=params) 

314 response.raise_for_status() 

315 

316 data: dict[str, Any] = response.json() 

317 self.cache.set_search(full_query, data) 

318 return data 

319 

320 except httpx.HTTPError as e: 

321 raise MavenApiError( 

322 f"Error searching Maven Central: {e}", 

323 MavenErrorCode.MAVEN_API_ERROR, 

324 ) from e 

325 

326 async def check_version( 

327 self, 

328 coordinate: str | MavenCoordinate, 

329 version: str, 

330 packaging: str = "jar", 

331 classifier: str | None = None, 

332 ) -> dict[str, Any]: 

333 """Check version with full information.""" 

334 if isinstance(coordinate, str): 

335 coord = MavenCoordinate.parse(coordinate) 

336 else: 

337 coord = coordinate 

338 

339 exists = await self.check_artifact_exists( 

340 coord.group_id, coord.artifact_id, version, packaging, classifier 

341 ) 

342 

343 try: 

344 all_versions = await self.get_all_versions(coord.group_id, coord.artifact_id) 

345 except MavenApiError: 

346 all_versions = [] 

347 

348 latest_versions = VersionService.find_latest_versions(all_versions, version) 

349 has_major, has_minor, has_patch = VersionService.is_update_available(version, all_versions) 

350 

351 return { 

352 "dependency": str(coord), 

353 "version": version, 

354 "exists": exists, 

355 "latest_versions": latest_versions.model_dump(), 

356 "has_major_update": has_major, 

357 "has_minor_update": has_minor, 

358 "has_patch_update": has_patch, 

359 "all_versions": all_versions, 

360 }