Source code for src.config.hybrid_inference_config

"""Configuration for Hybrid Dependency Inference.

Allows tuning of thresholds to optimize the balance between
accuracy and API call costs.
"""

from dataclasses import dataclass
from typing import Any



[docs]
@dataclass
class HybridInferenceConfig:
    """Configuration for hybrid dependency inference thresholds."""

    # Pattern matching thresholds
    pattern_confidence_threshold: float = 0.8
    """Pattern matches above this confidence don't need AI validation (0.0-1.0)
    Higher = more AI calls but better accuracy
    Lower = fewer AI calls but may miss complex dependencies
    Default: 0.8 (80% confidence)"""

    # AI analysis thresholds
    ai_confidence_threshold: float = 0.7
    """Minimum AI confidence to accept a dependency (0.0-1.0)
    Higher = more conservative, fewer false positives
    Lower = more permissive, may include weak dependencies
    Default: 0.7 (70% confidence)"""

    # Combination settings
    combined_confidence_boost: float = 0.15
    """Confidence boost when pattern and AI agree (0.0-0.3)
    Higher = stronger preference for agreement
    Lower = less weight on agreement
    Default: 0.15 (15% boost)"""

    # Performance settings
    max_ai_pairs_per_batch: int = 20
    """Maximum task pairs to analyze in one AI request
    Higher = fewer API calls but longer prompts
    Lower = more API calls but simpler prompts
    Default: 20 pairs"""

    min_shared_keywords: int = 2
    """Minimum shared keywords to consider tasks related
    Higher = fewer AI analyses, may miss subtle relationships
    Lower = more AI analyses, better coverage
    Default: 2 keywords"""

    # Cost control settings
    enable_ai_inference: bool = True
    """Master switch for AI inference
    True = Use hybrid approach
    False = Pattern-only (no API calls)
    Default: True"""

    cache_ttl_hours: int = 24
    """How long to cache AI inference results
    Higher = fewer repeat API calls
    Lower = more up-to-date analysis
    Default: 24 hours"""

    # Advanced settings
    require_component_match: bool = True
    """Whether tasks must share a component for pattern inference
    True = More accurate but may miss cross-component dependencies
    False = More permissive pattern matching
    Default: True"""

    max_dependency_chain_length: int = 10
    """Maximum length of dependency chains to prevent cycles
    Higher = Allow deeper task hierarchies
    Lower = Simpler, flatter task structures
    Default: 10"""


[docs]
    @classmethod
    def from_dict(cls, config_dict: dict[str, Any]) -> "HybridInferenceConfig":
        """Create config from dictionary."""
        return cls(**{k: v for k, v in config_dict.items() if hasattr(cls, k)})



[docs]
    def to_dict(self) -> dict[str, Any]:
        """Convert to dictionary for serialization."""
        return {
            "pattern_confidence_threshold": self.pattern_confidence_threshold,
            "ai_confidence_threshold": self.ai_confidence_threshold,
            "combined_confidence_boost": self.combined_confidence_boost,
            "max_ai_pairs_per_batch": self.max_ai_pairs_per_batch,
            "min_shared_keywords": self.min_shared_keywords,
            "enable_ai_inference": self.enable_ai_inference,
            "cache_ttl_hours": self.cache_ttl_hours,
            "require_component_match": self.require_component_match,
            "max_dependency_chain_length": self.max_dependency_chain_length,
        }



[docs]
    def validate(self) -> None:
        """Validate configuration values."""
        if not 0.0 <= self.pattern_confidence_threshold <= 1.0:
            raise ValueError("pattern_confidence_threshold must be between 0.0 and 1.0")

        if not 0.0 <= self.ai_confidence_threshold <= 1.0:
            raise ValueError("ai_confidence_threshold must be between 0.0 and 1.0")

        if not 0.0 <= self.combined_confidence_boost <= 0.3:
            raise ValueError("combined_confidence_boost must be between 0.0 and 0.3")

        if self.max_ai_pairs_per_batch < 1:
            raise ValueError("max_ai_pairs_per_batch must be at least 1")

        if self.min_shared_keywords < 1:
            raise ValueError("min_shared_keywords must be at least 1")

        if self.cache_ttl_hours < 0:
            raise ValueError("cache_ttl_hours must be non-negative")




# Preset configurations for different use cases
PRESETS = {
    "conservative": HybridInferenceConfig(
        pattern_confidence_threshold=0.9,  # Only use AI for low confidence patterns
        ai_confidence_threshold=0.8,  # Require high AI confidence
        combined_confidence_boost=0.2,  # Strong preference for agreement
        max_ai_pairs_per_batch=10,  # Smaller batches for accuracy
        min_shared_keywords=3,  # Stricter relatedness check
    ),
    "balanced": HybridInferenceConfig(
        # Use defaults
    ),
    "aggressive": HybridInferenceConfig(
        pattern_confidence_threshold=0.7,  # More AI validation
        ai_confidence_threshold=0.6,  # Accept more dependencies
        combined_confidence_boost=0.1,  # Less weight on agreement
        max_ai_pairs_per_batch=30,  # Larger batches for efficiency
        min_shared_keywords=1,  # Looser relatedness check
    ),
    "cost_optimized": HybridInferenceConfig(
        pattern_confidence_threshold=0.85,  # Trust patterns more
        ai_confidence_threshold=0.75,  # Moderate AI threshold
        max_ai_pairs_per_batch=50,  # Large batches to minimize calls
        cache_ttl_hours=48,  # Longer cache
        enable_ai_inference=True,  # Still use AI but sparingly
    ),
    "pattern_only": HybridInferenceConfig(
        enable_ai_inference=False  # No API calls at all
    ),
}



[docs]
def get_preset_config(preset_name: str) -> HybridInferenceConfig:
    """Get a preset configuration by name."""
    if preset_name not in PRESETS:
        raise ValueError(
            f"Unknown preset: {preset_name}. Available: {list(PRESETS.keys())}"
        )
    return PRESETS[preset_name]