Coverage for src / agent / skills / manifest.py: 99%
109 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 14:30 +0000
1# Copyright 2025-2026 Microsoft Corporation
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15"""Skill manifest schema and parsing.
17This module defines Pydantic models for SKILL.md manifests and provides
18utilities for extracting and parsing YAML front matter.
20The SKILL.md format follows this structure:
21```yaml
22---
23name: skill-name
24description: Brief description of the skill
25version: 1.0.0
26---
28# Skill Documentation
29Markdown instructions for using the skill...
30```
31"""
33import re
34from datetime import datetime
35from pathlib import Path
36from typing import Any
38import yaml
39from pydantic import BaseModel, Field, field_validator
41from agent.skills.errors import SkillManifestError
44class SkillCLI(BaseModel):
45 """CLI executable configuration for a skill.
47 Allows skills to declare an associated CLI tool that can be executed
48 via `osdu-agent run <skill-name> [args...]`.
50 Fields:
51 name: CLI executable name (must be in PATH when installed)
52 install: Installation command (e.g., "uv tool install git+https://...")
53 description: Brief description of what the CLI does
55 Example:
56 >>> cli = SkillCLI(
57 ... name="osdu-quality",
58 ... install="uv tool install git+https://community.opengroup.org/danielscholl/osdu-quality.git",
59 ... description="GitLab CI/CD test reliability analysis CLI"
60 ... )
61 """
63 name: str = Field(..., description="CLI executable name (must be in PATH)")
64 install: str | None = Field(default=None, description="Installation command")
65 description: str | None = Field(default=None, description="CLI description")
68class SkillTriggers(BaseModel):
69 """Structured triggers for skill matching."""
71 keywords: list[str] = Field(default_factory=list) # Direct keyword matches
72 verbs: list[str] = Field(default_factory=list) # Action verbs
73 patterns: list[str] = Field(default_factory=list) # Regex patterns
76class SkillManifest(BaseModel):
77 """Pydantic model for SKILL.md YAML front matter.
79 Required fields:
80 name: Skill identifier (alphanumeric + hyphens/underscores, max 64 chars)
81 description: Brief description (max 500 chars)
83 Optional fields:
84 version: Semantic version (e.g., "1.0.0")
85 author: Author name
86 repository: Git repository URL
87 license: License identifier (e.g., "MIT")
88 min_agent_base_version: Minimum compatible osdu-agent version
89 max_agent_base_version: Maximum compatible osdu-agent version
90 toolsets: List of Python toolset classes to load ("module:Class" format)
91 scripts: List of script names (auto-discovered if omitted)
92 scripts_ignore: Glob patterns to exclude from script discovery
93 permissions: Environment variable allowlist for script execution
95 Example:
96 >>> manifest = SkillManifest(
97 ... name="kalshi-markets",
98 ... description="Access Kalshi prediction market data"
99 ... )
100 """
102 # Required fields
103 name: str = Field(..., min_length=1, max_length=64)
104 description: str = Field(..., min_length=1, max_length=500)
106 # Optional fields
107 version: str | None = None
108 author: str | None = None
109 repository: str | None = None
110 license: str | None = None
111 min_agent_base_version: str | None = None
112 max_agent_base_version: str | None = None
113 default_enabled: bool = True # Default enabled state for bundled skills
114 toolsets: list[str] = Field(default_factory=list)
115 scripts: list[str] | None = None # None = auto-discover
116 scripts_ignore: list[str] = Field(default_factory=list)
117 permissions: dict[str, list[str]] = Field(default_factory=dict)
119 # Markdown instructions (not in YAML, extracted separately)
120 instructions: str = ""
122 # Progressive disclosure fields
123 brief_description: str | None = None # One-line description for registry
124 triggers: SkillTriggers | None = None # Structured triggers for matching
126 # CLI pass-through configuration
127 cli: SkillCLI | None = Field(default=None, description="CLI executable configuration")
129 def model_post_init(self, __context: Any) -> None:
130 """Auto-generate brief description and add skill name as trigger."""
131 # Auto-generate brief description if not provided
132 if not self.brief_description:
133 # Take first sentence or first 80 chars, ensuring word boundaries
134 desc = self.description or ""
135 if "." in desc:
136 first_sentence = desc.split(".", 1)[0].strip()
137 else:
138 first_sentence = desc.strip()
140 if len(first_sentence) > 80:
141 # Truncate at last space before 77 chars, add "..."
142 cutoff = first_sentence[:77].rfind(" ")
143 if cutoff == -1:
144 self.brief_description = first_sentence[:77] + "..."
145 else:
146 self.brief_description = first_sentence[:cutoff] + "..."
147 else:
148 self.brief_description = first_sentence
150 # Ensure triggers exists (creates new instance, not mutating default)
151 if self.triggers is None:
152 self.triggers = SkillTriggers()
154 # Add skill name as implicit trigger (case-insensitive check)
155 # Creates new list to avoid mutating shared defaults
156 skill_name_lower = self.name.lower()
157 existing_keywords_lower = [kw.lower() for kw in self.triggers.keywords]
158 if skill_name_lower not in existing_keywords_lower:
159 # Create new list with skill name added
160 self.triggers = SkillTriggers(
161 keywords=self.triggers.keywords + [skill_name_lower],
162 verbs=self.triggers.verbs,
163 patterns=self.triggers.patterns,
164 )
166 @field_validator("name")
167 @classmethod
168 def validate_name(cls, v: str) -> str:
169 """Validate skill name format.
171 Must be alphanumeric + hyphens/underscores only, 1-64 characters.
172 """
173 if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", v):
174 raise ValueError("Skill name must be alphanumeric with hyphens/underscores, 1-64 chars")
175 return v
177 @field_validator("toolsets")
178 @classmethod
179 def validate_toolsets(cls, v: list[str]) -> list[str]:
180 """Validate toolset format (module:Class)."""
181 for toolset in v:
182 if ":" not in toolset:
183 raise ValueError(f"Toolset '{toolset}' must be in 'module:Class' format")
184 return v
186 @field_validator("scripts")
187 @classmethod
188 def validate_scripts(cls, v: list[str] | None) -> list[str] | None:
189 """Normalize script names (accept both 'status' and 'status.py')."""
190 if v is None:
191 return None
192 # Accept both formats, will normalize later
193 return v
196class SkillRegistryEntry(BaseModel):
197 """Pydantic model for skill registry persistence.
199 Tracks installed skills with metadata for reproducibility and trust.
201 Fields:
202 name: Display name (original case preserved)
203 name_canonical: Normalized for matching (lowercase, hyphens)
204 git_url: Git repository URL (None for bundled/local skills)
205 commit_sha: Pinned commit for reproducibility
206 branch: Git branch (e.g., "main")
207 tag: Git tag (e.g., "v1.0.0")
208 installed_path: Absolute path to skill directory
209 trusted: User explicitly approved (bundled=True, git requires confirmation)
210 installed_at: Installation timestamp
212 Example:
213 >>> entry = SkillRegistryEntry(
214 ... name="kalshi-markets",
215 ... name_canonical="kalshi-markets",
216 ... git_url=None,
217 ... installed_path=Path("/path/to/skills/core/kalshi-markets"),
218 ... trusted=True
219 ... )
220 """
222 name: str
223 name_canonical: str
224 git_url: str | None = None
225 commit_sha: str | None = None
226 branch: str | None = None
227 tag: str | None = None
228 installed_path: Path
229 trusted: bool = False
230 installed_at: datetime = Field(default_factory=datetime.now)
232 model_config = {
233 "arbitrary_types_allowed": True,
234 "json_encoders": {
235 Path: str,
236 datetime: lambda v: v.isoformat(),
237 },
238 }
241def extract_yaml_frontmatter(content: str) -> tuple[dict[str, Any], str]:
242 """Extract YAML front matter from SKILL.md content.
244 SKILL.md format:
245 ```
246 ---
247 name: skill-name
248 description: Brief description
249 ---
251 # Markdown instructions...
252 ```
254 Args:
255 content: Full SKILL.md file content
257 Returns:
258 Tuple of (yaml_data, markdown_instructions)
260 Raises:
261 SkillManifestError: If YAML front matter is missing or malformed
262 """
263 # Match YAML front matter between --- markers
264 pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)"
265 match = re.match(pattern, content, re.DOTALL)
267 if not match:
268 raise SkillManifestError(
269 "SKILL.md must start with YAML front matter delimited by '---' markers"
270 )
272 yaml_content = match.group(1)
273 markdown_content = match.group(2).strip()
275 try:
276 yaml_data = yaml.safe_load(yaml_content)
277 if not isinstance(yaml_data, dict):
278 raise SkillManifestError("YAML front matter must be a dictionary")
279 except yaml.YAMLError as e:
280 raise SkillManifestError(f"Invalid YAML front matter: {e}")
282 return yaml_data, markdown_content
285def parse_skill_manifest(skill_path: Path) -> SkillManifest:
286 """Parse SKILL.md manifest from a skill directory.
288 Args:
289 skill_path: Path to skill directory containing SKILL.md
291 Returns:
292 Parsed SkillManifest with YAML data and instructions
294 Raises:
295 SkillManifestError: If SKILL.md is missing, malformed, or invalid
296 """
297 manifest_path = skill_path / "SKILL.md"
299 if not manifest_path.exists():
300 raise SkillManifestError(f"SKILL.md not found in {skill_path}")
302 try:
303 content = manifest_path.read_text(encoding="utf-8")
304 except UnicodeDecodeError:
305 raise SkillManifestError("SKILL.md must be UTF-8 encoded")
307 yaml_data, instructions = extract_yaml_frontmatter(content)
309 # Add instructions to the data for model creation
310 yaml_data["instructions"] = instructions
312 try:
313 return SkillManifest(**yaml_data)
314 except Exception as e:
315 raise SkillManifestError(f"Invalid SKILL.md manifest: {e}")