Spaces:
Sleeping
Sleeping
| import numpy as np | |
| import pandas as pd | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.cluster import KMeans | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import random | |
| import json | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| from typing import List, Dict, Any, Optional | |
| from prompts import CATEGORY_SUGGESTION_PROMPT, TEXT_CLASSIFICATION_PROMPT | |
| class BaseClassifier: | |
| """Base class for text classifiers""" | |
| def __init__(self) -> None: | |
| pass | |
| def classify(self, texts: List[str], categories: Optional[List[str]] = None) -> List[Dict[str, Any]]: | |
| """ | |
| Classify a list of texts into categories | |
| Args: | |
| texts (list): List of text strings to classify | |
| categories (list, optional): List of category names. If None, categories will be auto-detected | |
| Returns: | |
| list: List of classification results with categories, confidence scores, and explanations | |
| """ | |
| raise NotImplementedError("Subclasses must implement this method") | |
| def _generate_default_categories(self, texts: List[str], num_clusters: int = 5) -> List[str]: | |
| """ | |
| Generate default categories based on text clustering | |
| Args: | |
| texts (list): List of text strings | |
| num_clusters (int): Number of clusters to generate | |
| Returns: | |
| list: List of category names | |
| """ | |
| # Simple implementation - in real system this would be more sophisticated | |
| default_categories: List[str] = [f"Category {i+1}" for i in range(num_clusters)] | |
| return default_categories | |