Coverage for src / agent / skills / manifest.py: 99%

109 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-11 14:30 +0000

1# Copyright 2025-2026 Microsoft Corporation 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15"""Skill manifest schema and parsing. 

16 

17This module defines Pydantic models for SKILL.md manifests and provides 

18utilities for extracting and parsing YAML front matter. 

19 

20The SKILL.md format follows this structure: 

21```yaml 

22--- 

23name: skill-name 

24description: Brief description of the skill 

25version: 1.0.0 

26--- 

27 

28# Skill Documentation 

29Markdown instructions for using the skill... 

30``` 

31""" 

32 

33import re 

34from datetime import datetime 

35from pathlib import Path 

36from typing import Any 

37 

38import yaml 

39from pydantic import BaseModel, Field, field_validator 

40 

41from agent.skills.errors import SkillManifestError 

42 

43 

44class SkillCLI(BaseModel): 

45 """CLI executable configuration for a skill. 

46 

47 Allows skills to declare an associated CLI tool that can be executed 

48 via `osdu-agent run <skill-name> [args...]`. 

49 

50 Fields: 

51 name: CLI executable name (must be in PATH when installed) 

52 install: Installation command (e.g., "uv tool install git+https://...") 

53 description: Brief description of what the CLI does 

54 

55 Example: 

56 >>> cli = SkillCLI( 

57 ... name="osdu-quality", 

58 ... install="uv tool install git+https://community.opengroup.org/danielscholl/osdu-quality.git", 

59 ... description="GitLab CI/CD test reliability analysis CLI" 

60 ... ) 

61 """ 

62 

63 name: str = Field(..., description="CLI executable name (must be in PATH)") 

64 install: str | None = Field(default=None, description="Installation command") 

65 description: str | None = Field(default=None, description="CLI description") 

66 

67 

68class SkillTriggers(BaseModel): 

69 """Structured triggers for skill matching.""" 

70 

71 keywords: list[str] = Field(default_factory=list) # Direct keyword matches 

72 verbs: list[str] = Field(default_factory=list) # Action verbs 

73 patterns: list[str] = Field(default_factory=list) # Regex patterns 

74 

75 

76class SkillManifest(BaseModel): 

77 """Pydantic model for SKILL.md YAML front matter. 

78 

79 Required fields: 

80 name: Skill identifier (alphanumeric + hyphens/underscores, max 64 chars) 

81 description: Brief description (max 500 chars) 

82 

83 Optional fields: 

84 version: Semantic version (e.g., "1.0.0") 

85 author: Author name 

86 repository: Git repository URL 

87 license: License identifier (e.g., "MIT") 

88 min_agent_base_version: Minimum compatible osdu-agent version 

89 max_agent_base_version: Maximum compatible osdu-agent version 

90 toolsets: List of Python toolset classes to load ("module:Class" format) 

91 scripts: List of script names (auto-discovered if omitted) 

92 scripts_ignore: Glob patterns to exclude from script discovery 

93 permissions: Environment variable allowlist for script execution 

94 

95 Example: 

96 >>> manifest = SkillManifest( 

97 ... name="kalshi-markets", 

98 ... description="Access Kalshi prediction market data" 

99 ... ) 

100 """ 

101 

102 # Required fields 

103 name: str = Field(..., min_length=1, max_length=64) 

104 description: str = Field(..., min_length=1, max_length=500) 

105 

106 # Optional fields 

107 version: str | None = None 

108 author: str | None = None 

109 repository: str | None = None 

110 license: str | None = None 

111 min_agent_base_version: str | None = None 

112 max_agent_base_version: str | None = None 

113 default_enabled: bool = True # Default enabled state for bundled skills 

114 toolsets: list[str] = Field(default_factory=list) 

115 scripts: list[str] | None = None # None = auto-discover 

116 scripts_ignore: list[str] = Field(default_factory=list) 

117 permissions: dict[str, list[str]] = Field(default_factory=dict) 

118 

119 # Markdown instructions (not in YAML, extracted separately) 

120 instructions: str = "" 

121 

122 # Progressive disclosure fields 

123 brief_description: str | None = None # One-line description for registry 

124 triggers: SkillTriggers | None = None # Structured triggers for matching 

125 

126 # CLI pass-through configuration 

127 cli: SkillCLI | None = Field(default=None, description="CLI executable configuration") 

128 

129 def model_post_init(self, __context: Any) -> None: 

130 """Auto-generate brief description and add skill name as trigger.""" 

131 # Auto-generate brief description if not provided 

132 if not self.brief_description: 

133 # Take first sentence or first 80 chars, ensuring word boundaries 

134 desc = self.description or "" 

135 if "." in desc: 

136 first_sentence = desc.split(".", 1)[0].strip() 

137 else: 

138 first_sentence = desc.strip() 

139 

140 if len(first_sentence) > 80: 

141 # Truncate at last space before 77 chars, add "..." 

142 cutoff = first_sentence[:77].rfind(" ") 

143 if cutoff == -1: 

144 self.brief_description = first_sentence[:77] + "..." 

145 else: 

146 self.brief_description = first_sentence[:cutoff] + "..." 

147 else: 

148 self.brief_description = first_sentence 

149 

150 # Ensure triggers exists (creates new instance, not mutating default) 

151 if self.triggers is None: 

152 self.triggers = SkillTriggers() 

153 

154 # Add skill name as implicit trigger (case-insensitive check) 

155 # Creates new list to avoid mutating shared defaults 

156 skill_name_lower = self.name.lower() 

157 existing_keywords_lower = [kw.lower() for kw in self.triggers.keywords] 

158 if skill_name_lower not in existing_keywords_lower: 

159 # Create new list with skill name added 

160 self.triggers = SkillTriggers( 

161 keywords=self.triggers.keywords + [skill_name_lower], 

162 verbs=self.triggers.verbs, 

163 patterns=self.triggers.patterns, 

164 ) 

165 

166 @field_validator("name") 

167 @classmethod 

168 def validate_name(cls, v: str) -> str: 

169 """Validate skill name format. 

170 

171 Must be alphanumeric + hyphens/underscores only, 1-64 characters. 

172 """ 

173 if not re.match(r"^[a-zA-Z0-9_-]{1,64}$", v): 

174 raise ValueError("Skill name must be alphanumeric with hyphens/underscores, 1-64 chars") 

175 return v 

176 

177 @field_validator("toolsets") 

178 @classmethod 

179 def validate_toolsets(cls, v: list[str]) -> list[str]: 

180 """Validate toolset format (module:Class).""" 

181 for toolset in v: 

182 if ":" not in toolset: 

183 raise ValueError(f"Toolset '{toolset}' must be in 'module:Class' format") 

184 return v 

185 

186 @field_validator("scripts") 

187 @classmethod 

188 def validate_scripts(cls, v: list[str] | None) -> list[str] | None: 

189 """Normalize script names (accept both 'status' and 'status.py').""" 

190 if v is None: 

191 return None 

192 # Accept both formats, will normalize later 

193 return v 

194 

195 

196class SkillRegistryEntry(BaseModel): 

197 """Pydantic model for skill registry persistence. 

198 

199 Tracks installed skills with metadata for reproducibility and trust. 

200 

201 Fields: 

202 name: Display name (original case preserved) 

203 name_canonical: Normalized for matching (lowercase, hyphens) 

204 git_url: Git repository URL (None for bundled/local skills) 

205 commit_sha: Pinned commit for reproducibility 

206 branch: Git branch (e.g., "main") 

207 tag: Git tag (e.g., "v1.0.0") 

208 installed_path: Absolute path to skill directory 

209 trusted: User explicitly approved (bundled=True, git requires confirmation) 

210 installed_at: Installation timestamp 

211 

212 Example: 

213 >>> entry = SkillRegistryEntry( 

214 ... name="kalshi-markets", 

215 ... name_canonical="kalshi-markets", 

216 ... git_url=None, 

217 ... installed_path=Path("/path/to/skills/core/kalshi-markets"), 

218 ... trusted=True 

219 ... ) 

220 """ 

221 

222 name: str 

223 name_canonical: str 

224 git_url: str | None = None 

225 commit_sha: str | None = None 

226 branch: str | None = None 

227 tag: str | None = None 

228 installed_path: Path 

229 trusted: bool = False 

230 installed_at: datetime = Field(default_factory=datetime.now) 

231 

232 model_config = { 

233 "arbitrary_types_allowed": True, 

234 "json_encoders": { 

235 Path: str, 

236 datetime: lambda v: v.isoformat(), 

237 }, 

238 } 

239 

240 

241def extract_yaml_frontmatter(content: str) -> tuple[dict[str, Any], str]: 

242 """Extract YAML front matter from SKILL.md content. 

243 

244 SKILL.md format: 

245 ``` 

246 --- 

247 name: skill-name 

248 description: Brief description 

249 --- 

250 

251 # Markdown instructions... 

252 ``` 

253 

254 Args: 

255 content: Full SKILL.md file content 

256 

257 Returns: 

258 Tuple of (yaml_data, markdown_instructions) 

259 

260 Raises: 

261 SkillManifestError: If YAML front matter is missing or malformed 

262 """ 

263 # Match YAML front matter between --- markers 

264 pattern = r"^---\s*\n(.*?)\n---\s*\n(.*)" 

265 match = re.match(pattern, content, re.DOTALL) 

266 

267 if not match: 

268 raise SkillManifestError( 

269 "SKILL.md must start with YAML front matter delimited by '---' markers" 

270 ) 

271 

272 yaml_content = match.group(1) 

273 markdown_content = match.group(2).strip() 

274 

275 try: 

276 yaml_data = yaml.safe_load(yaml_content) 

277 if not isinstance(yaml_data, dict): 

278 raise SkillManifestError("YAML front matter must be a dictionary") 

279 except yaml.YAMLError as e: 

280 raise SkillManifestError(f"Invalid YAML front matter: {e}") 

281 

282 return yaml_data, markdown_content 

283 

284 

285def parse_skill_manifest(skill_path: Path) -> SkillManifest: 

286 """Parse SKILL.md manifest from a skill directory. 

287 

288 Args: 

289 skill_path: Path to skill directory containing SKILL.md 

290 

291 Returns: 

292 Parsed SkillManifest with YAML data and instructions 

293 

294 Raises: 

295 SkillManifestError: If SKILL.md is missing, malformed, or invalid 

296 """ 

297 manifest_path = skill_path / "SKILL.md" 

298 

299 if not manifest_path.exists(): 

300 raise SkillManifestError(f"SKILL.md not found in {skill_path}") 

301 

302 try: 

303 content = manifest_path.read_text(encoding="utf-8") 

304 except UnicodeDecodeError: 

305 raise SkillManifestError("SKILL.md must be UTF-8 encoded") 

306 

307 yaml_data, instructions = extract_yaml_frontmatter(content) 

308 

309 # Add instructions to the data for model creation 

310 yaml_data["instructions"] = instructions 

311 

312 try: 

313 return SkillManifest(**yaml_data) 

314 except Exception as e: 

315 raise SkillManifestError(f"Invalid SKILL.md manifest: {e}")