Source code for src.integrations.enhanced_task_classifier

"""
Enhanced Task Type Classification System.

Provides robust task type identification with expanded keyword lists,
pattern matching, and context-aware classification for 95%+ accuracy.
"""

import re
from dataclasses import dataclass
from typing import Dict, List, Pattern, Tuple

from src.core.models import Task
from src.integrations.nlp_task_utils import TaskType


[docs] @dataclass class ClassificationResult: """Result of task type classification with confidence.""" task_type: TaskType confidence: float matched_keywords: List[str] matched_patterns: List[str] reasoning: str
[docs] class EnhancedTaskClassifier: """ Enhanced task classifier with expanded keywords and pattern matching. Improvements over basic classifier: - Expanded keyword lists based on real-world usage - Regular expression pattern matching - Context-aware classification - Confidence scoring - Support for compound task names """ # Expanded keyword mappings with categories TASK_KEYWORDS = { TaskType.DESIGN: { "primary": [ "design", "architect", "plan", "planning", "architecture", "blueprint", "specification", "spec", "specs", "research", "analyze", "analysis", "study", "investigate", ], "secondary": [ "wireframe", "mockup", "prototype", "diagram", "model", "schema", "structure", "layout", "interface", "ui/ux", "ux", "ui", "workflow", "concept", "draft", "outline", "framework", "pattern", "template", ], "verbs": [ "design", "plan", "architect", "draft", "outline", "conceptualize", "define", "specify", "model", ], }, TaskType.IMPLEMENTATION: { "primary": [ "implement", "build", "develop", "code", "program", "construct", "engineer", "fix", "bug", "bugfix", "patch", "repair", ], "secondary": [ "feature", "functionality", "component", "module", "service", "api", "endpoint", "integration", "backend", "frontend", "database", "logic", "algorithm", "function", "class", "handler", "controller", "middleware", ], "verbs": [ "implement", "build", "create", "develop", "code", "write", "add", "integrate", "setup", "configure", "establish", "construct", "generate", "produce", ], }, TaskType.TESTING: { "primary": [ "test", "testing", "qa", "quality", "verify", "validate", "check", "assert", ], "secondary": [ "unit", "integration", "e2e", "end-to-end", "functional", "regression", "smoke", "acceptance", "performance", "load", "stress", "coverage", "suite", "scenario", "case", "cases", "spec", "specification", "behavior", ], "verbs": [ "test", "verify", "validate", "check", "ensure", "confirm", "assert", "examine", "inspect", "audit", ], }, TaskType.DOCUMENTATION: { "primary": [ "document", "documentation", "docs", "readme", "guide", "manual", "wiki", "tutorial", ], "secondary": [ "howto", "how-to", "reference", "api-docs", "changelog", "notes", "instructions", "help", "faq", "examples", "samples", "comments", "annotations", "description", "explanation", "onboarding", ], "verbs": [ "document", "write", "annotate", "comment", "describe", "explain", "detail", "add", # For "add comments" ], }, TaskType.DEPLOYMENT: { "primary": [ "deploy", "deployment", "release", "launch", "rollout", "publish", ], "secondary": [ "production", "staging", "live", "go-live", "ship", "deliver", "distribution", "installation", "setup", "migration", "upgrade", "rollback", "hotfix", ], "verbs": [ "deploy", "release", "launch", "publish", "ship", "deliver", "distribute", "install", "migrate", ], }, TaskType.INTEGRATION: { "primary": [ "integration verification", "build verification", "smoke test", "startup verification", "system verification", ], "secondary": [ "health check", "port check", "endpoint verification", "runtime verification", "startup check", ], "verbs": [ "verify integration", "verify build", "verify startup", "smoke test", ], }, TaskType.INFRASTRUCTURE: { "primary": [ "infrastructure", "setup", "configure", "provision", "environment", "devops", ], "secondary": [ "server", "database", "network", "docker", "kubernetes", "k8s", "container", "vm", "cloud", "aws", "azure", "gcp", "ci/cd", "pipeline", "monitoring", "logging", "security", "firewall", "ssl", "dns", "cdn", ], "verbs": [ "setup", "configure", "provision", "install", "initialize", "bootstrap", "orchestrate", "manage", ], }, } # Pattern matching for more complex task names TASK_PATTERNS = { TaskType.DESIGN: [ r"(?:create|define|plan)\s+(?:the\s+)?" r"(?:system|application|software)\s+(?:architecture|design)", r"design\s+(?:the\s+)?(?:data|database)\s+" r"(?:model|schema|structure)", r"(?:create|design)\s+(?:ui|ux|user\s+interface|" r"user\s+experience)", r"(?:define|specify)\s+(?:api|interface)\s+" r"(?:contracts?|specifications?)", r"(?:plan|design)\s+(?:the\s+)?(?:workflow|process|flow)", ], TaskType.IMPLEMENTATION: [ r"(?:implement|build|create)\s+(?:the\s+)?(?:\w+\s+)?" r"(?:feature|functionality|component)", r"(?:add|integrate)\s+(?:\w+\s+)?(?:support|integration)\s+" r"(?:for|with)", r"(?:develop|code|write)\s+(?:the\s+)?(?:\w+\s+)?" r"(?:api|service|endpoint)", r"(?:create|build)\s+(?:the\s+)?(?:\w+\s+)?(?:backend|frontend|ui)", r"(?:implement|add)\s+(?:\w+\s+)?(?:logic|algorithm|handler)", ], TaskType.TESTING: [ r"write.*tests?", # Simplified pattern - put first r"(?:write|create|add)\s+(?:unit\s+)?tests?\s+(?:for|to)", r"(?:test|verify|validate)\s+(?:the\s+)?(?:\w+\s+)?" r"(?:functionality|feature|component)", r"(?:create|write)\s+(?:integration|e2e|end-to-end)\s+tests?", r"(?:ensure|verify|check)\s+(?:that|if)\s+(?:\w+\s+)?(?:works?|functions?)", r"(?:add|improve)\s+test\s+coverage", ], TaskType.DOCUMENTATION: [ r"(?:document|write\s+documentation)\s+(?:for|about)", r"(?:create|write|update)\s+(?:the\s+)?(?:api|user|developer)\s+" r"(?:docs|documentation|guide)", r"(?:add|write)\s+(?:code\s+)?comments?\s+(?:to|for)", r"(?:create|update)\s+(?:the\s+)?readme(?:\.md)?", r"(?:write|create)\s+(?:a\s+)?(?:tutorial|guide|manual)", ], TaskType.DEPLOYMENT: [ r"(?:deploy|release)\s+(?:to|on)\s+(?:production|staging|live)", r"(?:setup|configure)\s+(?:the\s+)?deployment\s+(?:pipeline|process)", r"(?:publish|ship)\s+(?:the\s+)?(?:application|app|service)", r"(?:rollout|launch)\s+(?:the\s+)?(?:feature|update|version)", r"(?:migrate|upgrade)\s+(?:the\s+)?(?:production|live)\s+" r"(?:environment|system)", ], TaskType.INTEGRATION: [ r"(?:integration|build|startup)\s+verification", r"(?:verify|check)\s+(?:the\s+)?(?:build|startup|integration)", r"smoke\s+test\s+(?:the\s+)?(?:application|app|project)", r"(?:verify|check)\s+(?:the\s+)?(?:app|application)\s+" r"(?:works|runs|starts|responds)", ], TaskType.INFRASTRUCTURE: [ r"(?:setup|configure)\s+(?:the\s+)?(?:ci/cd|pipeline|automation)", r"(?:provision|create)\s+(?:the\s+)?(?:infrastructure|environment)", r"(?:configure|setup)\s+(?:the\s+)?(?:monitoring|logging|alerts)", r"(?:install|setup)\s+(?:and\s+configure\s+)?(?:docker|kubernetes|k8s)", r"(?:create|setup)\s+(?:the\s+)?(?:server|network)" r"(?!\s+connection)", # Exclude "connection" r"(?:setup|configure)\s+(?:the\s+)?database\s+" r"(?:cluster|infrastructure|environment|server)", # More specific patterns ], }
[docs] def __init__(self) -> None: """Initialize the enhanced classifier.""" # Compile patterns for efficiency self._compiled_patterns: Dict[TaskType, List[Pattern[str]]] = {} for task_type, patterns in self.TASK_PATTERNS.items(): self._compiled_patterns[task_type] = [ re.compile(pattern, re.IGNORECASE) for pattern in patterns ]
[docs] def classify(self, task: Task) -> TaskType: """ Classify a task using enhanced logic. Args: task: Task to classify Returns ------- TaskType TaskType enum value """ result = self.classify_with_confidence(task) return result.task_type
[docs] def classify_with_confidence(self, task: Task) -> ClassificationResult: """ Classify a task and return detailed results with confidence. Args: task: Task to classify Returns ------- ClassificationResult ClassificationResult with type, confidence, and reasoning """ # Separate strong signals (name, labels) from weak signals (description) # This allows us to weight them appropriately task_name = task.name.lower() task_description = (task.description or "").lower() task_labels = task.labels or [] # Score each task type scores = {} matched_keywords = {} matched_patterns = {} for task_type in TaskType: if task_type == TaskType.OTHER: continue score, keywords, patterns = self._score_task_type( task_name=task_name, task_description=task_description, task_labels=task_labels, task_type=task_type, ) scores[task_type] = score matched_keywords[task_type] = keywords matched_patterns[task_type] = patterns # Find best match if not scores: return ClassificationResult( task_type=TaskType.OTHER, confidence=0.0, matched_keywords=[], matched_patterns=[], reasoning="No matching keywords or patterns found", ) # GH-180: Removed ambiguous case handling that artificially boosted DESIGN # The new weighted scoring system (strong signals > weak signals) makes # this override unnecessary and was causing misclassification. # Task name and labels are now weighted more heavily than description, # so a task named "Implement X" with label "implement" will correctly # classify as IMPLEMENTATION even if description contains "design" ambiguous_case = False # Defensive check: ensure scores is not empty before calling max() if not scores: return ClassificationResult( task_type=TaskType.OTHER, confidence=0.0, matched_keywords=[], matched_patterns=[], reasoning="Scores dictionary became empty after processing", ) best_type = max(scores.items(), key=lambda x: x[1])[0] best_score = scores[best_type] # Calculate confidence based on score and uniqueness total_score = sum(scores.values()) # If score is too low, treat as OTHER with 0 confidence if best_score < 1.0: return ClassificationResult( task_type=TaskType.OTHER, confidence=0.0, matched_keywords=[], matched_patterns=[], reasoning="Insufficient evidence for classification", ) # Calculate base confidence - normalize score to a reasonable range if total_score > 0: # Better confidence calculation that considers both score and uniqueness score_ratio = best_score / total_score if total_score > 0 else 0 base_confidence = min(best_score / 5.0, 1.0) # Adjusted scaling uniqueness_bonus = score_ratio * 0.15 # Bonus for uniqueness confidence = max(0.85, base_confidence + uniqueness_bonus) # Higher minimum # Reduce confidence if we have multiple competing scores # (conflicting indicators) competing_scores = [score for score in scores.values() if score > 1.0] if len(competing_scores) > 1: # Get competing scores that aren't the best score other_scores = [s for s in competing_scores if s != best_score] # Handle edge case where all competing scores equal best_score (ties) max_competing = max(other_scores) if other_scores else 0 if ( max_competing > 0 and best_score / max_competing < 3.0 ): # More lenient threshold for conflict confidence = min( confidence * 0.6, 0.65 ) # Significantly reduce confidence else: confidence = 0.5 # Boost confidence if we have strong indicators if matched_patterns[best_type]: confidence = min(confidence * 1.1, 0.95) # Extra boost for tasks with multiple matching keywords if len(matched_keywords[best_type]) >= 3: confidence = min(confidence * 1.05, 0.95) # Reduce confidence for ambiguous cases if ambiguous_case: confidence = min(confidence * 0.75, 0.75) # Cap at 0.75 for ambiguous tasks # Ensure confidence never exceeds 1.0 confidence = min(confidence, 1.0) # Generate reasoning reasoning = self._generate_reasoning( best_type, matched_keywords[best_type], matched_patterns[best_type], ) return ClassificationResult( task_type=best_type, confidence=confidence, matched_keywords=matched_keywords[best_type], matched_patterns=matched_patterns[best_type], reasoning=reasoning, )
def _score_task_type( self, task_name: str, task_description: str, task_labels: list[str], task_type: TaskType, ) -> Tuple[float, List[str], List[str]]: """ Score how well a task matches a task type. Uses weighted scoring where strong signals (task name, labels) have higher weight than weak signals (description keywords). Args: task_name: Task name (strong signal) task_description: Task description (weak signal) task_labels: Task labels (very strong signal - explicit categorization) task_type: TaskType to score against Returns ------- tuple Tuple of (score, matched_keywords, matched_patterns) """ score = 0.0 matched_keywords = [] matched_patterns = [] keywords_dict = self.TASK_KEYWORDS.get(task_type, {}) # STRONGEST SIGNAL: Explicit labels (weight: 8.0) # Labels are explicit categorization by users/systems label_boost = 0.0 for label in task_labels: label_lower = label.lower() # Check for direct task type matches if ( label_lower in ["test", "testing", "qa"] and task_type == TaskType.TESTING ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["implement", "implementation"] and task_type == TaskType.IMPLEMENTATION ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["design", "architecture"] and task_type == TaskType.DESIGN ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["documentation", "docs", "readme"] and task_type == TaskType.DOCUMENTATION ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["deploy", "deployment", "release"] and task_type == TaskType.DEPLOYMENT ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["infrastructure", "devops", "setup"] and task_type == TaskType.INFRASTRUCTURE ): label_boost += 8.0 matched_keywords.append(label_lower) elif ( label_lower in ["integration", "integration verification"] and task_type == TaskType.INTEGRATION ): label_boost += 8.0 matched_keywords.append(label_lower) score += label_boost # Combine name and description for keyword/pattern matching # But track which came from name (strong) vs description (weak) combined_text = f"{task_name} {task_description} {' '.join(task_labels)}" # STRONG SIGNAL: Primary keywords in task name (weight: 5.0-6.0) # Medium signal: Primary keywords in description (weight: 1.5-2.0) for keyword in keywords_dict.get("primary", []): pattern = rf"\b{re.escape(keyword)}s?\b" # Check task name first (strong signal) name_match = re.search(pattern, task_name) desc_match = re.search(pattern, task_description) if name_match: # Keyword in name is a STRONG signal position_weight = 1.2 if name_match.start() < 10 else 1.0 # EDGE CASE: Database connections are IMPLEMENTATION combined_text = f"{task_name} {task_description}".lower() if ( task_type == TaskType.INFRASTRUCTURE and keyword in ["setup", "configure"] and "database" in combined_text and "connection" in combined_text ): # Much lower score to avoid infrastructure classification score += 0.5 # EDGE CASE: "code" with doc keywords is DOCUMENTATION elif ( task_type == TaskType.IMPLEMENTATION and keyword == "code" and ("comment" in combined_text or "document" in combined_text) ): # Lower score when "code" appears with documentation keywords score += 1.0 # Special handling for certain keywords elif task_type == TaskType.TESTING and keyword in ["test", "testing"]: score += 6.0 * position_weight elif task_type == TaskType.IMPLEMENTATION and keyword in [ "implement", "build", ]: score += 6.0 * position_weight elif task_type == TaskType.DESIGN and keyword in ["design", "plan"]: score += 6.0 * position_weight else: score += 5.0 * position_weight matched_keywords.append(keyword) elif desc_match: # Keyword only in description is a WEAK signal if task_type == TaskType.TESTING and keyword in ["test", "testing"]: score += 2.0 elif task_type == TaskType.DOCUMENTATION and keyword == "document": score += 2.0 else: score += 1.5 matched_keywords.append(keyword) # Secondary keywords: moderate weight for name, low weight for description for keyword in keywords_dict.get("secondary", []): pattern = rf"\b{re.escape(keyword)}s?\b" name_match = re.search(pattern, task_name) desc_match = re.search(pattern, task_description) if name_match: # Secondary keyword in name score += 2.0 matched_keywords.append(keyword) elif desc_match: # Secondary keyword in description only if ( task_type == TaskType.DOCUMENTATION and keyword == "comments" and ("add" in combined_text or "annotate" in combined_text) ): score += 1.5 else: score += 0.5 if keyword not in matched_keywords: matched_keywords.append(keyword) # Verb usage: higher weight in name, lower in description for verb in keywords_dict.get("verbs", []): name_match = re.search(rf"\b{verb}\b", task_name) desc_match = re.search(rf"\b{verb}\b", task_description) if name_match: # Verb in task name is a strong signal # EDGE CASE: Database connections are IMPLEMENTATION # "Setup database connections" is implementation, not infra combined_text = f"{task_name} {task_description}".lower() if ( task_type == TaskType.INFRASTRUCTURE and verb in ["setup", "configure"] and "database" in combined_text and "connection" in combined_text ): # Much lower score to avoid infrastructure classification score += 0.3 elif ( task_type == TaskType.IMPLEMENTATION and verb in ["setup", "configure"] and "database" in combined_text and "connection" in combined_text ): # Higher score to prefer IMPLEMENTATION score += 3.0 # EDGE CASE: Code comments should be DOCUMENTATION not IMPLEMENTATION # "Add code comments" is documentation work, not implementation elif ( task_type == TaskType.DOCUMENTATION and verb == "add" and "comment" in combined_text ): # Very high score for adding comments score += 4.0 elif ( task_type == TaskType.DOCUMENTATION and verb in ["document", "annotate", "comment"] and ("function" in combined_text or "code" in combined_text) ): # Higher score for documentation-specific verbs with code context score += 3.5 # Special handling for implementation verbs elif task_type == TaskType.IMPLEMENTATION and verb in [ "implement", "build", "create", "develop", ]: score += 4.0 elif task_type == TaskType.TESTING and verb in [ "test", "verify", "validate", ]: score += 4.0 elif task_type == TaskType.DESIGN and verb in [ "design", "plan", "architect", ]: score += 4.0 else: score += 3.0 if verb not in matched_keywords: matched_keywords.append(verb) elif desc_match: # Verb in description is a weak signal # Generic verbs in description get very low weight if verb in ["update", "create", "write", "add", "build"]: score += 0.3 elif task_type == TaskType.DOCUMENTATION and verb in [ "annotate", "comment", "document", ]: score += 1.0 else: score += 0.5 if verb not in matched_keywords: matched_keywords.append(verb) # Pattern matching: higher weight in name, medium in combined for regex_pattern in self._compiled_patterns.get(task_type, []): name_match = regex_pattern.search(task_name) combined_match = regex_pattern.search(combined_text) if name_match: # Pattern match in name is very strong score += 5.0 matched_patterns.append(regex_pattern.pattern) elif combined_match: # Pattern match in description is moderate score += 2.0 if regex_pattern.pattern not in matched_patterns: matched_patterns.append(regex_pattern.pattern) # Reduced penalty for conflicting keywords (only in name) # Description conflicts don't matter as much for other_type in TaskType: if other_type == task_type or other_type == TaskType.OTHER: continue other_keywords = self.TASK_KEYWORDS.get(other_type, {}) # Only penalize if primary keywords of other types are in task NAME for keyword in other_keywords.get("primary", []): if keyword in task_name and keyword not in matched_keywords: score -= 0.3 # Reduced penalty return score, matched_keywords, matched_patterns def _generate_reasoning( self, task_type: TaskType, keywords: List[str], patterns: List[str] ) -> str: """Generate human-readable reasoning for classification.""" reasons = [] if patterns: reasons.append(f"matched patterns: {', '.join(patterns[:2])}") if keywords: primary_keywords = [ k for k in keywords if k in self.TASK_KEYWORDS[task_type].get("primary", []) ] if primary_keywords: reasons.append( f"contains primary keywords: {', '.join(primary_keywords[:3])}" ) else: reasons.append(f"contains keywords: {', '.join(keywords[:3])}") if not reasons: reasons.append("default classification based on context") return f"Classified as {task_type.value} because task {' and '.join(reasons)}"
[docs] def get_suggestions(self, task: Task) -> Dict[str, List[str]]: """ Get suggestions for improving task classification. Returns ------- dict Dict with suggestions for better task naming """ result = self.classify_with_confidence(task) suggestions = {} # Only provide suggestions for unclear tasks if result.confidence < 0.8 or result.task_type == TaskType.OTHER: # For OTHER tasks, provide general suggestions if result.task_type == TaskType.OTHER: suggestions["improve_clarity"] = [ "Consider starting with action words like: design, " "implement, test, document, deploy", "Be more specific about the task type", "Avoid ambiguous terms that could match multiple types", ] else: task_keywords = self.TASK_KEYWORDS.get(result.task_type, {}) primary = task_keywords.get("primary", []) # Only add suggestions if we have keywords for this task type if primary: suggestions["improve_clarity"] = [ f"Consider starting with: {', '.join(primary[:3])}", "Be more specific about the task type", "Avoid ambiguous terms that could match multiple types", ] if not result.matched_patterns and primary: suggestions["use_patterns"] = [ f"For {result.task_type.value} tasks, try patterns like:", f"- '{primary[0]} [component name]'", f"- '{primary[0]} [feature] for [purpose]'", ] return suggestions
[docs] def is_type(self, task: Task, task_type: TaskType) -> bool: """Check if a task is of a specific type.""" return self.classify(task) == task_type
[docs] def filter_by_type(self, tasks: List[Task], task_type: TaskType) -> List[Task]: """Filter tasks by type.""" return [task for task in tasks if self.classify(task) == task_type]