Source code for src.config.hybrid_inference_config
"""Configuration for Hybrid Dependency Inference.
Allows tuning of thresholds to optimize the balance between
accuracy and API call costs.
"""
from dataclasses import dataclass
from typing import Any
[docs]
@dataclass
class HybridInferenceConfig:
"""Configuration for hybrid dependency inference thresholds."""
# Pattern matching thresholds
pattern_confidence_threshold: float = 0.8
"""Pattern matches above this confidence don't need AI validation (0.0-1.0)
Higher = more AI calls but better accuracy
Lower = fewer AI calls but may miss complex dependencies
Default: 0.8 (80% confidence)"""
# AI analysis thresholds
ai_confidence_threshold: float = 0.7
"""Minimum AI confidence to accept a dependency (0.0-1.0)
Higher = more conservative, fewer false positives
Lower = more permissive, may include weak dependencies
Default: 0.7 (70% confidence)"""
# Combination settings
combined_confidence_boost: float = 0.15
"""Confidence boost when pattern and AI agree (0.0-0.3)
Higher = stronger preference for agreement
Lower = less weight on agreement
Default: 0.15 (15% boost)"""
# Performance settings
max_ai_pairs_per_batch: int = 20
"""Maximum task pairs to analyze in one AI request
Higher = fewer API calls but longer prompts
Lower = more API calls but simpler prompts
Default: 20 pairs"""
min_shared_keywords: int = 2
"""Minimum shared keywords to consider tasks related
Higher = fewer AI analyses, may miss subtle relationships
Lower = more AI analyses, better coverage
Default: 2 keywords"""
# Cost control settings
enable_ai_inference: bool = True
"""Master switch for AI inference
True = Use hybrid approach
False = Pattern-only (no API calls)
Default: True"""
cache_ttl_hours: int = 24
"""How long to cache AI inference results
Higher = fewer repeat API calls
Lower = more up-to-date analysis
Default: 24 hours"""
# Advanced settings
require_component_match: bool = True
"""Whether tasks must share a component for pattern inference
True = More accurate but may miss cross-component dependencies
False = More permissive pattern matching
Default: True"""
max_dependency_chain_length: int = 10
"""Maximum length of dependency chains to prevent cycles
Higher = Allow deeper task hierarchies
Lower = Simpler, flatter task structures
Default: 10"""
[docs]
@classmethod
def from_dict(cls, config_dict: dict[str, Any]) -> "HybridInferenceConfig":
"""Create config from dictionary."""
return cls(**{k: v for k, v in config_dict.items() if hasattr(cls, k)})
[docs]
def to_dict(self) -> dict[str, Any]:
"""Convert to dictionary for serialization."""
return {
"pattern_confidence_threshold": self.pattern_confidence_threshold,
"ai_confidence_threshold": self.ai_confidence_threshold,
"combined_confidence_boost": self.combined_confidence_boost,
"max_ai_pairs_per_batch": self.max_ai_pairs_per_batch,
"min_shared_keywords": self.min_shared_keywords,
"enable_ai_inference": self.enable_ai_inference,
"cache_ttl_hours": self.cache_ttl_hours,
"require_component_match": self.require_component_match,
"max_dependency_chain_length": self.max_dependency_chain_length,
}
[docs]
def validate(self) -> None:
"""Validate configuration values."""
if not 0.0 <= self.pattern_confidence_threshold <= 1.0:
raise ValueError("pattern_confidence_threshold must be between 0.0 and 1.0")
if not 0.0 <= self.ai_confidence_threshold <= 1.0:
raise ValueError("ai_confidence_threshold must be between 0.0 and 1.0")
if not 0.0 <= self.combined_confidence_boost <= 0.3:
raise ValueError("combined_confidence_boost must be between 0.0 and 0.3")
if self.max_ai_pairs_per_batch < 1:
raise ValueError("max_ai_pairs_per_batch must be at least 1")
if self.min_shared_keywords < 1:
raise ValueError("min_shared_keywords must be at least 1")
if self.cache_ttl_hours < 0:
raise ValueError("cache_ttl_hours must be non-negative")
# Preset configurations for different use cases
PRESETS = {
"conservative": HybridInferenceConfig(
pattern_confidence_threshold=0.9, # Only use AI for low confidence patterns
ai_confidence_threshold=0.8, # Require high AI confidence
combined_confidence_boost=0.2, # Strong preference for agreement
max_ai_pairs_per_batch=10, # Smaller batches for accuracy
min_shared_keywords=3, # Stricter relatedness check
),
"balanced": HybridInferenceConfig(
# Use defaults
),
"aggressive": HybridInferenceConfig(
pattern_confidence_threshold=0.7, # More AI validation
ai_confidence_threshold=0.6, # Accept more dependencies
combined_confidence_boost=0.1, # Less weight on agreement
max_ai_pairs_per_batch=30, # Larger batches for efficiency
min_shared_keywords=1, # Looser relatedness check
),
"cost_optimized": HybridInferenceConfig(
pattern_confidence_threshold=0.85, # Trust patterns more
ai_confidence_threshold=0.75, # Moderate AI threshold
max_ai_pairs_per_batch=50, # Large batches to minimize calls
cache_ttl_hours=48, # Longer cache
enable_ai_inference=True, # Still use AI but sparingly
),
"pattern_only": HybridInferenceConfig(
enable_ai_inference=False # No API calls at all
),
}
[docs]
def get_preset_config(preset_name: str) -> HybridInferenceConfig:
"""Get a preset configuration by name."""
if preset_name not in PRESETS:
raise ValueError(
f"Unknown preset: {preset_name}. Available: {list(PRESETS.keys())}"
)
return PRESETS[preset_name]