Source code for src.marcus_mcp.tools.attachment

"""
Artifact management tools with prescriptive storage locations.

This module provides tools to help agents store and track design
artifacts in organized locations while allowing flexibility when needed.
"""

import hashlib
import json
import logging
import subprocess
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

from src.core.project_history import ArtifactMetadata, ProjectHistoryPersistence

logger = logging.getLogger(__name__)

# Default artifact storage paths by type
ARTIFACT_PATHS = {
    "specification": "docs/specifications",
    "api": "docs/api",
    "design": "docs/design",
    "architecture": "docs/architecture",
    "documentation": "docs",
    "reference": "docs/references",
    "temporary": "tmp/artifacts",  # Should be .gitignored
}


[docs] async def log_artifact( task_id: str, filename: str, content: Union[str, Dict[str, Any], List[Any]], artifact_type: str, project_root: Optional[str] = None, description: Optional[str] = None, location: Optional[str] = None, # Optional override artifact_role: Optional[str] = None, state: Any = None, force: bool = False, ) -> Dict[str, Any]: """ Store an artifact with prescriptive location management. By default, artifacts are stored in standard locations based on their type. Marcus accepts ANY artifact type, making it domain-agnostic. Standard types (with predefined locations): - specification → docs/specifications/ - api → docs/api/ - design → docs/design/ - architecture → docs/architecture/ - documentation → docs/ - reference → docs/references/ - temporary → tmp/artifacts/ Custom types: - Any other type → docs/artifacts/ (default fallback) - You can use domain-specific types like "podcast-script", "research", "video-storyboard", "marketing-copy", etc. Parameters ---------- task_id : str The current task ID filename : str Name for the artifact file content : str or dict or list The artifact content to store. A ``dict`` or ``list`` is serialized to a pretty-printed JSON string before being written; a ``str`` is stored verbatim. Accepting structured input lets agents log parsed JSON (e.g. ``tsconfig.json``) directly without pre-stringifying it (issue #595 Fix 1). artifact_type : str Type of artifact (determines default location) project_root : Optional[str], optional Absolute path to the project root directory where artifacts will be created. All agents should use the same path. description : Optional[str], optional Optional description of the artifact location : Optional[str], optional Optional override for storage location (relative path) artifact_role : Optional[str], optional Semantic role of the artifact — ``"interface_contract"``, ``"implementation_spec"``, or ``"design_guide"``. Stored on the artifact entry so ``_collect_task_artifacts`` can inject role-aware ``usage_guidance`` for dependent agents (Option C). When ``None``, guidance falls back to label-based detection (Option B). state : Any, optional MCP state object force : bool, default=False Bypass the size-downgrade guard. When False, writing content that is smaller than 50 % of an existing docs/ file whose size exceeds 8 KB is rejected to prevent accidental stub overwrites. Pass True only when an intentional downsize is desired. Returns ------- Dict[str, Any] Dict with artifact location and storage details """ try: # Issue #595 Fix 1: agents (and the generated MCP schema) may pass # structured JSON — e.g. a parsed tsconfig.json — as ``content``. # Serialize any dict/list to a JSON string up front, before the # size guard and file write, both of which require text. if isinstance(content, (dict, list)): content = json.dumps(content, indent=2) # Validate project_root is provided if not project_root: return { "success": False, "error": "project_root is required", "data": {"task_id": task_id, "filename": filename}, } # Validate project_root is absolute and exists project_root_path = Path(project_root) if not project_root_path.is_absolute(): return { "success": False, "error": "project_root must be an absolute path", "data": {"task_id": task_id, "filename": filename}, } if not project_root_path.exists(): return { "success": False, "error": f"project_root directory does not exist: {project_root}", "data": {"task_id": task_id, "filename": filename}, } # Log info for non-standard artifact types (but still accept them!) standard_types = [ "specification", "api", "design", "architecture", "documentation", "reference", "temporary", ] if artifact_type not in standard_types: logger.info( f"Using custom artifact type '{artifact_type}' for {filename}. " f"Will store in docs/artifacts/ (use 'location' parameter to override)" ) # Determine storage location if location: # Use provided location (but ensure it's relative) artifact_path = Path(location) if artifact_path.is_absolute(): return { "success": False, "error": "Location must be a relative path", "data": {"task_id": task_id, "filename": filename}, } else: # Use default location based on type base_dir = ARTIFACT_PATHS.get(artifact_type, "docs/artifacts") artifact_path = Path(base_dir) / filename # Create full path using project_root instead of Path.cwd() full_path = project_root_path / artifact_path # Normalise away any ".." traversal segments so the artifact-root # bypass check cannot be subverted via paths like # "docs/../src/theme.css". strict=False because the target file # may not exist yet. try: _resolved_root = project_root_path.resolve() _resolved_full = full_path.resolve() _normalized_relative = _resolved_full.relative_to(_resolved_root) except ValueError: # Path resolves outside project root — reject unconditionally. return { "success": False, "error": ( f"Location resolves outside project root: {artifact_path}. " "Use a path that stays within the project directory." ), "data": {"task_id": task_id, "filename": filename}, } # Use the canonicalised paths for all downstream operations so # that filesystem writes and the git check target the same file. artifact_path = _normalized_relative full_path = _resolved_full # Guard: refuse to overwrite git-tracked source files. # log_artifact's contract is to persist artifacts (docs, reports, # design outputs) — not to overwrite source code managed by git. # dashboard-v82 post-mortem: Agent 1 overwrote theme.css and # design-tokens.json via log_artifact and had to restore from git # (commit d44dd5a). ``git ls-files --error-unmatch`` exits 0 only # when the path is tracked; non-zero means untracked or outside # the repo — both safe to write. # # Scope: only enforce for paths outside the known artifact output # roots (docs/ and tmp/). Files under those roots may legitimately # be tracked — a previous run committed them — and iterative # artifact refreshes must be allowed. Paths under src/, lib/, or # any other root are not artifact outputs and are guarded. # NOTE: artifact_path is already normalised above — parts[0] reflects # the true root even when the caller supplied ".." segments. _artifact_output_roots = {"docs", "tmp"} _in_artifact_dir = ( len(artifact_path.parts) > 0 and artifact_path.parts[0] in _artifact_output_roots ) if not _in_artifact_dir: try: git_result = subprocess.run( [ "git", "-C", str(project_root_path), "ls-files", "--error-unmatch", str(full_path), ], capture_output=True, ) if git_result.returncode == 0: return { "success": False, "error": ( f"Refusing to overwrite git-tracked file: {artifact_path}. " "log_artifact is for documentation artifacts, not source " "files. Use a docs/ or tmp/ path for this artifact." ), "data": {"task_id": task_id, "filename": filename}, } except FileNotFoundError: # git not available in this environment — skip the guard. logger.debug( "git not found; skipping tracked-file guard for %s", full_path ) # Ensure directory exists full_path.parent.mkdir(parents=True, exist_ok=True) # Guard: refuse to silently replace a large docs/ file with # substantially smaller content (stub-overwrite prevention). # Threshold: existing >= 8 KB AND new < 50 % of existing. # Scoped to docs/ only — tmp/ artifacts are legitimately replaced # with compact summaries and must not be gated. # Bypassed when force=True so intentional downsizes are allowed. _SIZE_GUARD_THRESHOLD = 8_000 # bytes _in_docs_dir = len(artifact_path.parts) > 0 and artifact_path.parts[0] == "docs" if not force and _in_docs_dir and full_path.exists(): existing_size = full_path.stat().st_size new_size = len(content.encode("utf-8")) if ( existing_size >= _SIZE_GUARD_THRESHOLD and new_size < existing_size * 0.5 ): return { "success": False, "error": ( f"Refusing to overwrite '{artifact_path}': existing file " f"is {existing_size:,} bytes but new content is only " f"{new_size:,} bytes (< 50 %). This looks like an accidental " "stub overwrite. Pass force=True if this is intentional." ), "data": {"task_id": task_id, "filename": filename}, } # Write content to file full_path.write_text(content, encoding="utf-8") # Initialize task_artifacts if not exists if not hasattr(state, "task_artifacts"): state.task_artifacts = {} if task_id not in state.task_artifacts: state.task_artifacts[task_id] = [] # Log the artifact artifact_entry: Dict[str, Any] = { "filename": filename, "location": str(artifact_path), "artifact_type": artifact_type, "description": description, "is_default_location": location is None, } if artifact_role is not None: artifact_entry["artifact_role"] = artifact_role state.task_artifacts[task_id].append(artifact_entry) # Add a comment to the task if kanban is available if state.kanban_client and description: try: # Find the task task = next((t for t in state.project_tasks if t.id == task_id), None) if task: card_id = getattr(task, "kanban_card_id", None) or task.id location_type = "default" if location is None else "custom" comment_text = ( f"📄 Created {artifact_type} artifact: {filename}\n" f"Location: {artifact_path} ({location_type})\n\n" f"{description}" ) await state.kanban_client.add_comment( task_id=card_id, comment=comment_text, ) except Exception as e: logger.warning(f"Could not add comment: {e}") log_msg = ( f"Stored {artifact_type} artifact {filename} for task " f"{task_id} at {artifact_path}" ) logger.info(log_msg) # Record in active experiment if one is running from src.experiments.live_experiment_monitor import get_active_monitor monitor = get_active_monitor() if monitor and monitor.is_running: monitor.record_artifact( task_id=task_id, artifact_type=artifact_type, filename=filename, description=description or "", ) # Persist to project history for post-project analysis await _persist_artifact_to_history( task_id=task_id, filename=filename, artifact_type=artifact_type, artifact_path=artifact_path, full_path=full_path, description=description, state=state, ) return { "success": True, "data": { "task_id": task_id, "filename": filename, "location": str(artifact_path), "full_path": str(full_path), "artifact_type": artifact_type, "is_default_location": location is None, "description": description, }, } except Exception as e: logger.error(f"Error storing artifact: {str(e)}") return { "success": False, "error": f"Failed to store artifact: {str(e)}", "data": {"task_id": task_id, "filename": filename}, }
async def _discover_artifacts_in_standard_locations( working_dir: Path, ) -> List[Dict[str, Any]]: """ Scan standard artifact directories for files in project directory. Parameters ---------- working_dir : Path The project root directory to scan for artifacts Returns ------- List[Dict[str, Any]] List of discovered artifact dictionaries """ discovered = [] for artifact_type, base_path in ARTIFACT_PATHS.items(): # Use working_dir instead of current directory path = working_dir / base_path if path.exists(): try: for file_path in path.rglob("*"): if file_path.is_file() and not file_path.name.startswith("."): discovered.append( { "filename": file_path.name, "location": str(file_path.relative_to(working_dir)), "artifact_type": artifact_type, "description": f"Discovered {artifact_type} file", "is_default_location": True, } ) except Exception as e: logger.warning(f"Error scanning {base_path}: {e}") return discovered async def _persist_artifact_to_history( task_id: str, filename: str, artifact_type: str, artifact_path: Path, full_path: Path, description: Optional[str], state: Any, ) -> None: """ Persist artifact metadata to project history for post-project analysis. Stores metadata about the artifact (not the content) to enable tracing what was produced during project execution. Parameters ---------- task_id : str Task that produced the artifact filename : str Name of the artifact file artifact_type : str Type of artifact artifact_path : Path Relative path to artifact full_path : Path Absolute path to artifact description : Optional[str] Description of the artifact state : Any Marcus server state Notes ----- Fails gracefully - errors are logged but don't interrupt the main flow. """ try: # Get project info from state if not hasattr(state, "current_project_id") or not state.current_project_id: logger.debug("No active project - skipping project history persistence") return project_id = state.current_project_id project_name = getattr(state, "current_project_name", project_id) # Get agent ID from state if available agent_id = getattr(state, "current_agent_id", "unknown") # Initialize project history persistence if not already done if not hasattr(state, "project_history_persistence"): state.project_history_persistence = ProjectHistoryPersistence() # Calculate file size and hash for integrity checking file_size_bytes = 0 sha256_hash = None if full_path.exists(): file_size_bytes = full_path.stat().st_size # Calculate SHA256 hash sha256 = hashlib.sha256() with open(full_path, "rb") as f: for chunk in iter(lambda: f.read(8192), b""): sha256.update(chunk) sha256_hash = sha256.hexdigest() # Get kanban comment URL if artifact was posted to kanban kanban_comment_url = None if hasattr(state, "last_kanban_comment_url"): kanban_comment_url = state.last_kanban_comment_url # Generate artifact ID now = datetime.now(timezone.utc) artifact_id = f"art_{task_id}_{now.timestamp()}" # Create artifact metadata artifact_metadata = ArtifactMetadata( artifact_id=artifact_id, task_id=task_id, agent_id=agent_id, timestamp=now, filename=filename, artifact_type=artifact_type, relative_path=str(artifact_path), absolute_path=str(full_path), description=description or "", file_size_bytes=file_size_bytes, sha256_hash=sha256_hash, kanban_comment_url=kanban_comment_url, project_id=project_id, ) # Persist to project history await state.project_history_persistence.append_artifact( project_id, project_name, artifact_metadata ) logger.info( f"Persisted artifact {filename} to project history for {project_id}" ) except Exception as e: # Graceful degradation - log but don't fail logger.warning(f"Failed to persist artifact to project history: {e}")