Coverage for src / agent / services / maven / api.py: 60%
166 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
1# Copyright 2025-2026 Microsoft Corporation
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Maven Central API client with async HTTP and caching."""
17import logging
18import xml.etree.ElementTree as ET
19from typing import Any
21import httpx
23from agent.services.maven.cache import MavenCacheService
24from agent.services.maven.types import (
25 MavenCoordinate,
26 MavenErrorCode,
27 MavenMetadata,
28)
29from agent.services.maven.version import VersionService
31logger = logging.getLogger(__name__)
34class MavenApiError(Exception):
35 """Maven API error with error code."""
37 def __init__(self, message: str, error_code: MavenErrorCode) -> None:
38 super().__init__(message)
39 self.error_code = error_code
42class MavenApiService:
43 """Async HTTP client for Maven Central with caching."""
45 MAVEN_REPO_BASE = "https://repo1.maven.org/maven2"
46 MAVEN_SEARCH_BASE = "https://search.maven.org/solrsearch/select"
48 def __init__(
49 self,
50 cache: MavenCacheService | None = None,
51 timeout: int = 30,
52 ) -> None:
53 self.cache = cache or MavenCacheService()
54 self.timeout = timeout
56 async def fetch_metadata(self, group_id: str, artifact_id: str) -> MavenMetadata:
57 """Fetch artifact metadata from Maven Central."""
58 cached = self.cache.get_metadata(group_id, artifact_id)
59 if cached:
60 return MavenMetadata(**cached)
62 group_path = group_id.replace(".", "/")
63 url = f"{self.MAVEN_REPO_BASE}/{group_path}/{artifact_id}/maven-metadata.xml"
65 try:
66 async with httpx.AsyncClient(timeout=self.timeout) as client:
67 response = await client.get(url)
69 if response.status_code == 404:
70 raise MavenApiError(
71 f"Dependency {group_id}:{artifact_id} not found",
72 MavenErrorCode.DEPENDENCY_NOT_FOUND,
73 )
75 response.raise_for_status()
76 metadata = self._parse_metadata_xml(response.text, group_id, artifact_id)
77 self.cache.set_metadata(group_id, artifact_id, metadata.model_dump())
78 return metadata
80 except httpx.HTTPError as e:
81 raise MavenApiError(
82 f"Error fetching Maven metadata: {e}",
83 MavenErrorCode.MAVEN_API_ERROR,
84 ) from e
86 def _parse_metadata_xml(
87 self, xml_content: str, group_id: str, artifact_id: str
88 ) -> MavenMetadata:
89 """Parse maven-metadata.xml content."""
90 try:
91 root = ET.fromstring(xml_content)
92 versions = [v.text for v in root.findall(".//version") if v.text]
94 return MavenMetadata(
95 group_id=root.findtext("./groupId") or group_id,
96 artifact_id=root.findtext("./artifactId") or artifact_id,
97 latest_version=root.findtext("./versioning/latest"),
98 release_version=root.findtext("./versioning/release"),
99 versions=versions,
100 )
101 except ET.ParseError as e:
102 raise MavenApiError(
103 f"Failed to parse Maven metadata XML: {e}",
104 MavenErrorCode.MAVEN_API_ERROR,
105 ) from e
107 async def check_artifact_exists(
108 self,
109 group_id: str,
110 artifact_id: str,
111 version: str,
112 packaging: str = "jar",
113 classifier: str | None = None,
114 ) -> bool:
115 """Check if artifact exists using HEAD request."""
116 cached = self.cache.get_exists(group_id, artifact_id, version, packaging, classifier)
117 if cached is not None:
118 return cached
120 group_path = group_id.replace(".", "/")
121 filename = f"{artifact_id}-{version}"
122 if classifier:
123 filename += f"-{classifier}"
124 filename += f".{packaging}"
126 url = f"{self.MAVEN_REPO_BASE}/{group_path}/{artifact_id}/{version}/{filename}"
128 try:
129 async with httpx.AsyncClient(timeout=self.timeout) as client:
130 response = await client.head(url)
131 exists = response.status_code == 200
133 if not exists and response.status_code == 404:
134 exists = await self._check_version_in_metadata(group_id, artifact_id, version)
136 self.cache.set_exists(group_id, artifact_id, version, exists, packaging, classifier)
137 return exists
139 except httpx.HTTPError:
140 return False
142 async def _check_version_in_metadata(
143 self, group_id: str, artifact_id: str, version: str
144 ) -> bool:
145 """Check if version exists in metadata."""
146 try:
147 metadata = await self.fetch_metadata(group_id, artifact_id)
148 return version in metadata.versions
149 except MavenApiError:
150 return False
152 async def get_all_versions(self, group_id: str, artifact_id: str) -> list[str]:
153 """Get all versions for an artifact."""
154 cached = self.cache.get_versions(group_id, artifact_id)
155 if cached:
156 return cached
158 try:
159 metadata = await self.fetch_metadata(group_id, artifact_id)
160 if not metadata.versions:
161 return await self._get_versions_from_solr(group_id, artifact_id)
163 self.cache.set_versions(group_id, artifact_id, metadata.versions)
164 return metadata.versions
166 except MavenApiError:
167 return await self._get_versions_from_solr(group_id, artifact_id)
169 async def _get_versions_from_solr(self, group_id: str, artifact_id: str) -> list[str]:
170 """Get versions using Solr search API."""
171 query = f"g:{group_id} AND a:{artifact_id}"
172 params: dict[str, str | int] = {
173 "q": query,
174 "rows": 200,
175 "wt": "json",
176 "core": "gav",
177 }
179 try:
180 async with httpx.AsyncClient(timeout=self.timeout) as client:
181 response = await client.get(self.MAVEN_SEARCH_BASE, params=params)
182 response.raise_for_status()
184 data = response.json()
185 docs = data.get("response", {}).get("docs", [])
186 versions = [doc.get("v") for doc in docs if doc.get("v")]
188 if not versions:
189 raise MavenApiError(
190 f"No versions found for {group_id}:{artifact_id}",
191 MavenErrorCode.DEPENDENCY_NOT_FOUND,
192 )
194 self.cache.set_versions(group_id, artifact_id, versions)
195 return versions
197 except httpx.HTTPError as e:
198 raise MavenApiError(
199 f"Error searching Maven Central: {e}",
200 MavenErrorCode.MAVEN_API_ERROR,
201 ) from e
203 async def get_latest_version(
204 self,
205 group_id: str,
206 artifact_id: str,
207 packaging: str = "jar",
208 classifier: str | None = None,
209 ) -> str:
210 """Get the latest version of an artifact."""
211 try:
212 metadata = await self.fetch_metadata(group_id, artifact_id)
214 if metadata.release_version:
215 if not classifier or await self.check_artifact_exists(
216 group_id, artifact_id, metadata.release_version, packaging, classifier
217 ):
218 return metadata.release_version
220 if metadata.latest_version:
221 if not classifier or await self.check_artifact_exists(
222 group_id, artifact_id, metadata.latest_version, packaging, classifier
223 ):
224 return metadata.latest_version
226 if metadata.versions:
227 if classifier:
228 filtered = []
229 for v in metadata.versions:
230 if await self.check_artifact_exists(
231 group_id, artifact_id, v, packaging, classifier
232 ):
233 filtered.append(v)
234 if filtered:
235 result = VersionService.get_latest_version(filtered)
236 if result:
237 return result
238 else:
239 result = VersionService.get_latest_version(metadata.versions)
240 if result:
241 return result
243 raise MavenApiError(
244 f"No suitable version found for {group_id}:{artifact_id}",
245 MavenErrorCode.VERSION_NOT_FOUND,
246 )
248 except MavenApiError:
249 return await self._get_latest_version_from_solr(
250 group_id, artifact_id, packaging, classifier
251 )
253 async def _get_latest_version_from_solr(
254 self,
255 group_id: str,
256 artifact_id: str,
257 packaging: str = "jar",
258 classifier: str | None = None,
259 ) -> str:
260 """Get latest version using Solr API."""
261 versions = await self._get_versions_from_solr(group_id, artifact_id)
263 if classifier:
264 filtered = []
265 for v in versions:
266 if await self.check_artifact_exists(
267 group_id, artifact_id, v, packaging, classifier
268 ):
269 filtered.append(v)
270 if not filtered:
271 raise MavenApiError(
272 f"No versions found with classifier {classifier}",
273 MavenErrorCode.VERSION_NOT_FOUND,
274 )
275 versions = filtered
277 result = VersionService.get_latest_version(versions)
278 if not result:
279 raise MavenApiError(
280 f"No versions found for {group_id}:{artifact_id}",
281 MavenErrorCode.VERSION_NOT_FOUND,
282 )
284 return result
286 async def search(
287 self,
288 query: str,
289 packaging: str | None = None,
290 classifier: str | None = None,
291 rows: int = 20,
292 ) -> dict[str, Any]:
293 """Search Maven Central for artifacts."""
294 full_query = query
295 if packaging:
296 full_query += f" AND p:{packaging}"
297 if classifier:
298 full_query += f" AND l:{classifier}"
300 cached = self.cache.get_search(full_query)
301 if cached:
302 return dict(cached)
304 params: dict[str, str | int] = {
305 "q": full_query,
306 "rows": rows,
307 "wt": "json",
308 "core": "gav",
309 }
311 try:
312 async with httpx.AsyncClient(timeout=self.timeout) as client:
313 response = await client.get(self.MAVEN_SEARCH_BASE, params=params)
314 response.raise_for_status()
316 data: dict[str, Any] = response.json()
317 self.cache.set_search(full_query, data)
318 return data
320 except httpx.HTTPError as e:
321 raise MavenApiError(
322 f"Error searching Maven Central: {e}",
323 MavenErrorCode.MAVEN_API_ERROR,
324 ) from e
326 async def check_version(
327 self,
328 coordinate: str | MavenCoordinate,
329 version: str,
330 packaging: str = "jar",
331 classifier: str | None = None,
332 ) -> dict[str, Any]:
333 """Check version with full information."""
334 if isinstance(coordinate, str):
335 coord = MavenCoordinate.parse(coordinate)
336 else:
337 coord = coordinate
339 exists = await self.check_artifact_exists(
340 coord.group_id, coord.artifact_id, version, packaging, classifier
341 )
343 try:
344 all_versions = await self.get_all_versions(coord.group_id, coord.artifact_id)
345 except MavenApiError:
346 all_versions = []
348 latest_versions = VersionService.find_latest_versions(all_versions, version)
349 has_major, has_minor, has_patch = VersionService.is_update_available(version, all_versions)
351 return {
352 "dependency": str(coord),
353 "version": version,
354 "exists": exists,
355 "latest_versions": latest_versions.model_dump(),
356 "has_major_update": has_major,
357 "has_minor_update": has_minor,
358 "has_patch_update": has_patch,
359 "all_versions": all_versions,
360 }