|
|
@@ -87,11 +87,12 @@ class IntentClassifier:
|
|
|
"""
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
- self._encoder = None # SentenceTransformer
|
|
|
+ self._encoder = None # SentenceTransformer oder ONNX-Encoder
|
|
|
+ self._tokenizer = None # HuggingFace Tokenizer (fuer ONNX-Encoder)
|
|
|
self._classifier = None # ONNX InferenceSession oder PyTorch
|
|
|
self._use_onnx = False
|
|
|
+ self._use_onnx_encoder = False # True wenn ONNX-Encoder statt SentenceTransformer
|
|
|
self._intent_names: list[str] = []
|
|
|
- self._tag_names: list[str] = []
|
|
|
self._slot_names: list[str] = []
|
|
|
self._embedding_dim: int = 0
|
|
|
self._loaded = False
|
|
|
@@ -147,14 +148,40 @@ class IntentClassifier:
|
|
|
perror(f"IntentClassifier: Metadaten-Fehler: {e}")
|
|
|
return False
|
|
|
|
|
|
- # Encoder laden
|
|
|
- try:
|
|
|
- from sentence_transformers import SentenceTransformer
|
|
|
- self._encoder = SentenceTransformer(base_model)
|
|
|
- pdebug(f"IntentClassifier: Encoder geladen ({base_model})")
|
|
|
- except ImportError:
|
|
|
- perror("sentence-transformers nicht installiert")
|
|
|
- return False
|
|
|
+ # Encoder laden (ONNX bevorzugt, Fallback SentenceTransformer)
|
|
|
+ encoder_dir = model_dir / "encoder_onnx"
|
|
|
+ has_onnx_encoder = encoder_dir.is_dir() and any(
|
|
|
+ f.suffix == ".onnx" for f in encoder_dir.iterdir() if f.is_file()
|
|
|
+ )
|
|
|
+ if has_onnx_encoder:
|
|
|
+ # ONNX-Encoder (kein PyTorch noetig, ~112MB)
|
|
|
+ try:
|
|
|
+ from optimum.onnxruntime import ORTModelForFeatureExtraction
|
|
|
+ from transformers import AutoTokenizer
|
|
|
+
|
|
|
+ # Quantisiertes Modell bevorzugen
|
|
|
+ onnx_files = [f.name for f in encoder_dir.iterdir() if f.suffix == ".onnx"]
|
|
|
+ file_name = "model_quantized.onnx" if "model_quantized.onnx" in onnx_files else None
|
|
|
+ self._encoder = ORTModelForFeatureExtraction.from_pretrained(
|
|
|
+ str(encoder_dir), file_name=file_name,
|
|
|
+ )
|
|
|
+ self._tokenizer = AutoTokenizer.from_pretrained(str(encoder_dir))
|
|
|
+ self._use_onnx_encoder = True
|
|
|
+ pinfo(f"IntentClassifier: ONNX-Encoder geladen ({encoder_dir})")
|
|
|
+ except ImportError:
|
|
|
+ pdebug("optimum nicht verfuegbar, versuche SentenceTransformer")
|
|
|
+ except Exception as e:
|
|
|
+ pdebug(f"ONNX-Encoder Fehler: {e}, versuche SentenceTransformer")
|
|
|
+
|
|
|
+ if not self._use_onnx_encoder:
|
|
|
+ # Fallback: SentenceTransformer (braucht PyTorch, ~400MB)
|
|
|
+ try:
|
|
|
+ from sentence_transformers import SentenceTransformer
|
|
|
+ self._encoder = SentenceTransformer(base_model)
|
|
|
+ pdebug(f"IntentClassifier: SentenceTransformer geladen ({base_model})")
|
|
|
+ except ImportError:
|
|
|
+ perror("Weder optimum noch sentence-transformers installiert")
|
|
|
+ return False
|
|
|
|
|
|
# Classifier laden (ONNX bevorzugt, Fallback PyTorch)
|
|
|
onnx_path = model_dir / "intent_classifier.onnx"
|
|
|
@@ -197,8 +224,8 @@ class IntentClassifier:
|
|
|
|
|
|
self._loaded = True
|
|
|
pinfo(
|
|
|
- f"IntentClassifier geladen: {len(self._intent_names)} Intents, "
|
|
|
- f"{len(self._tag_names)} Slot-Tags"
|
|
|
+ f"IntentClassifier geladen: {len(self._intent_names)} Intents"
|
|
|
+ f"{', ONNX-Encoder' if self._use_onnx_encoder else ', SentenceTransformer'}"
|
|
|
)
|
|
|
return True
|
|
|
|
|
|
@@ -226,8 +253,25 @@ class IntentClassifier:
|
|
|
|
|
|
# 1. Embedding berechnen
|
|
|
import numpy as np
|
|
|
- embedding = self._encoder.encode([text], show_progress_bar=False)
|
|
|
- embedding = np.array(embedding, dtype=np.float32)
|
|
|
+
|
|
|
+ if self._use_onnx_encoder:
|
|
|
+ # ONNX-Encoder: Tokenize → Encoder → Mean-Pooling
|
|
|
+ inputs = self._tokenizer(
|
|
|
+ text, return_tensors="np",
|
|
|
+ padding=True, truncation=True, max_length=128,
|
|
|
+ )
|
|
|
+ outputs = self._encoder(**inputs)
|
|
|
+ # Mean-Pooling ueber Token-Embeddings (ohne Padding)
|
|
|
+ token_embeddings = outputs.last_hidden_state[0] # (seq_len, dim)
|
|
|
+ attention_mask = inputs["attention_mask"][0] # (seq_len,)
|
|
|
+ mask = attention_mask.astype(np.float32)
|
|
|
+ masked = token_embeddings * mask[:, np.newaxis]
|
|
|
+ embedding = masked.sum(axis=0) / mask.sum()
|
|
|
+ embedding = embedding.reshape(1, -1).astype(np.float32)
|
|
|
+ else:
|
|
|
+ # SentenceTransformer: Direkte Embedding-Berechnung
|
|
|
+ embedding = self._encoder.encode([text], show_progress_bar=False)
|
|
|
+ embedding = np.array(embedding, dtype=np.float32)
|
|
|
|
|
|
# 2. Classifier ausfuehren
|
|
|
if self._use_onnx:
|