Spaces:
Sleeping
Sleeping
Update botsignal.py
Browse files- botsignal.py +136 -94
botsignal.py
CHANGED
|
@@ -3,9 +3,13 @@ import os
|
|
| 3 |
import re
|
| 4 |
import io
|
| 5 |
import hashlib
|
| 6 |
-
from collections import deque
|
|
|
|
| 7 |
from mimetypes import guess_extension
|
| 8 |
-
from typing import List
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
from rapidfuzz import fuzz
|
| 11 |
from telethon import TelegramClient, events
|
|
@@ -17,17 +21,25 @@ API_ID = int(os.environ.get("API_ID", "0"))
|
|
| 17 |
API_HASH = os.environ.get("API_HASH", "")
|
| 18 |
STRING_SESSION = os.environ.get("STRING_SESSION", "")
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
|
|
|
|
| 22 |
"https://t.me/PEPE_Calls28",
|
| 23 |
"https://t.me/Tanjirocall",
|
|
|
|
|
|
|
| 24 |
"https://t.me/ChinaPumpCommunity",
|
| 25 |
"https://t.me/Milagrosdegencalls",
|
| 26 |
"https://t.me/GM_Degencalls",
|
| 27 |
]
|
| 28 |
-
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignalll")
|
| 29 |
|
| 30 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
THEME_KEYWORDS = [
|
| 32 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 33 |
"pump", "spot", "futures", "setup",
|
|
@@ -37,35 +49,29 @@ KEYWORD_WEIGHT = 1.0
|
|
| 37 |
FUZZ_WEIGHT = 0.6
|
| 38 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 39 |
|
| 40 |
-
# Filter pengecualian
|
| 41 |
EXCLUDE_PHRASES = [
|
| 42 |
"achievement unlocked",
|
| 43 |
]
|
| 44 |
|
| 45 |
-
# Frasa/tautan yang kalau ada di suatu baris -> baris itu dibuang
|
| 46 |
-
# - "dm" akan cocok sebagai kata utuh (pakai regex \bdm\b), jadi "random" tidak ikut
|
| 47 |
-
BLOCK_PATTERNS = [
|
| 48 |
-
r"\bdm\b", # DM ajakan
|
| 49 |
-
r"\bcontact\b",
|
| 50 |
-
r"\bvip\b",
|
| 51 |
-
r"(?:https?://)?t\.me/\S+", # link t.me/...
|
| 52 |
-
r"(?:https?://)?telegram\.me/\S+",
|
| 53 |
-
r"(?:https?://)?wa\.me/\S+",
|
| 54 |
-
r"@[\w\d_]{2,}" # mention @username
|
| 55 |
-
]
|
| 56 |
-
|
| 57 |
# Media handling
|
| 58 |
INCLUDE_MEDIA = os.environ.get("INCLUDE_MEDIA", "1") == "1"
|
| 59 |
MAX_MEDIA_MB = float(os.environ.get("MAX_MEDIA_MB", "12"))
|
| 60 |
SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
|
| 61 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 62 |
|
| 63 |
-
#
|
| 64 |
INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
|
| 65 |
|
| 66 |
# Dedup buffer
|
| 67 |
DEDUP_BUFFER_SIZE = int(os.environ.get("DEDUP_BUFFER_SIZE", "800"))
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
|
| 70 |
# ========= Client bootstrap =========
|
| 71 |
def build_client() -> TelegramClient:
|
|
@@ -78,6 +84,9 @@ def build_client() -> TelegramClient:
|
|
| 78 |
client = build_client()
|
| 79 |
recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
|
| 80 |
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
# ========= Utilities =========
|
| 83 |
def debug_log(reason: str, content: str = "") -> None:
|
|
@@ -87,32 +96,26 @@ def debug_log(reason: str, content: str = "") -> None:
|
|
| 87 |
def normalize_for_filter(text: str) -> str:
|
| 88 |
if not text:
|
| 89 |
return ""
|
| 90 |
-
|
| 91 |
-
s = re.sub(r"(?m)^>.*$", "", text)
|
| 92 |
s = re.sub(r"\s+", " ", s).strip()
|
| 93 |
return s
|
| 94 |
|
| 95 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
| 96 |
-
"""Skor sederhana: exact keyword + fuzzy ratio rata-rata top 3."""
|
| 97 |
if not text:
|
| 98 |
return 0.0
|
| 99 |
t = text.lower()
|
| 100 |
|
| 101 |
-
# exact hits
|
| 102 |
exact_hits = 0
|
| 103 |
for kw in keywords:
|
| 104 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 105 |
exact_hits += 1
|
| 106 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
| 107 |
|
| 108 |
-
# fuzzy hits (ambil 3 nilai tertinggi)
|
| 109 |
fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
|
| 110 |
fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
|
| 111 |
-
|
| 112 |
return exact_score + fuzzy_score
|
| 113 |
|
| 114 |
def hash_for_dedup(text: str, msg) -> str:
|
| 115 |
-
"""Gabungkan teks dan sid media agar tidak double post."""
|
| 116 |
parts = [text or ""]
|
| 117 |
if getattr(msg, "id", None) is not None:
|
| 118 |
parts.append(str(msg.id))
|
|
@@ -128,7 +131,6 @@ def hash_for_dedup(text: str, msg) -> str:
|
|
| 128 |
return hashlib.sha1(raw).hexdigest()
|
| 129 |
|
| 130 |
def is_image_message(msg) -> bool:
|
| 131 |
-
"""True jika pesan mengandung foto atau dokumen gambar yang boleh."""
|
| 132 |
if getattr(msg, "photo", None) is not None:
|
| 133 |
return True
|
| 134 |
doc = getattr(msg, "document", None)
|
|
@@ -150,43 +152,57 @@ def media_too_big(msg) -> bool:
|
|
| 150 |
return (doc.size or 0) > MAX_MEDIA_MB * 1024 * 1024
|
| 151 |
return False
|
| 152 |
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
"""
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
| 157 |
"""
|
| 158 |
-
if not
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
lines = text.splitlines()
|
| 162 |
-
keep: list[str] = []
|
| 163 |
-
compiled = [re.compile(pat, flags=re.IGNORECASE) for pat in BLOCK_PATTERNS]
|
| 164 |
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
# Pertahankan baris kosong; nanti kita rapikan
|
| 169 |
-
keep.append(line)
|
| 170 |
-
continue
|
| 171 |
-
if any(rx.search(l) for rx in compiled):
|
| 172 |
-
# buang seluruh baris yang mengandung kata/tautan terlarang
|
| 173 |
-
continue
|
| 174 |
-
keep.append(line)
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
|
|
|
|
|
|
| 180 |
|
| 181 |
|
| 182 |
# ========= Core actions =========
|
| 183 |
-
async def send_as_is(msg,
|
| 184 |
-
"""
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 188 |
try:
|
| 189 |
-
# Jika pesan berupa photo asli
|
| 190 |
if getattr(msg, "photo", None):
|
| 191 |
await client.send_file(
|
| 192 |
TARGET_CHAT,
|
|
@@ -197,13 +213,11 @@ async def send_as_is(msg, override_text: str | None = None) -> None:
|
|
| 197 |
)
|
| 198 |
return
|
| 199 |
|
| 200 |
-
# Jika berupa document image/gif/video
|
| 201 |
doc = getattr(msg, "document", None)
|
| 202 |
if doc:
|
| 203 |
data = await client.download_media(msg, file=bytes)
|
| 204 |
if data:
|
| 205 |
bio = io.BytesIO(data)
|
| 206 |
-
# Tentukan ekstensi file dari mime
|
| 207 |
ext = ".jpg"
|
| 208 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 209 |
if mt:
|
|
@@ -223,7 +237,6 @@ async def send_as_is(msg, override_text: str | None = None) -> None:
|
|
| 223 |
except Exception as e:
|
| 224 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 225 |
|
| 226 |
-
# Fallback: text saja
|
| 227 |
await client.send_message(
|
| 228 |
TARGET_CHAT,
|
| 229 |
orig_text,
|
|
@@ -231,19 +244,33 @@ async def send_as_is(msg, override_text: str | None = None) -> None:
|
|
| 231 |
link_preview=True,
|
| 232 |
)
|
| 233 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
-
async def process_message(msg,
|
| 236 |
-
"""
|
|
|
|
|
|
|
| 237 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 238 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 239 |
|
| 240 |
-
#
|
| 241 |
for phrase in EXCLUDE_PHRASES:
|
| 242 |
if phrase.lower() in text_norm:
|
| 243 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
| 244 |
return
|
| 245 |
|
| 246 |
-
# Dedup
|
| 247 |
h = hash_for_dedup(text_norm, msg)
|
| 248 |
if h in recent_hashes:
|
| 249 |
debug_log("Duplikat, dilewati", orig_text)
|
|
@@ -256,24 +283,39 @@ async def process_message(msg, source_name: str) -> None:
|
|
| 256 |
if score < RELEVANCE_THRESHOLD:
|
| 257 |
return
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
return
|
| 264 |
|
| 265 |
-
|
| 266 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 267 |
|
| 268 |
|
| 269 |
async def backfill_history(entity, limit: int) -> None:
|
| 270 |
-
"""Tarik pesan lama dari suatu source untuk diproses (opsional)."""
|
| 271 |
if limit <= 0:
|
| 272 |
return
|
| 273 |
print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
|
| 274 |
async for m in client.iter_messages(entity, limit=limit):
|
| 275 |
try:
|
| 276 |
-
|
|
|
|
| 277 |
except Exception as e:
|
| 278 |
debug_log("Error saat memproses backfill", str(e))
|
| 279 |
|
|
@@ -282,28 +324,37 @@ async def backfill_history(entity, limit: int) -> None:
|
|
| 282 |
@client.on(events.NewMessage(chats=SOURCE_CHATS))
|
| 283 |
async def on_new_message(event):
|
| 284 |
try:
|
| 285 |
-
await process_message(event.message,
|
| 286 |
except Exception as e:
|
| 287 |
print("Process error:", e)
|
| 288 |
|
| 289 |
|
| 290 |
# ========= Entry points =========
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
async def start_bot_background() -> None:
|
| 292 |
"""
|
| 293 |
-
Dipanggil dari server FastAPI (server.py).
|
| 294 |
-
Menjalankan client + backfill
|
| 295 |
"""
|
| 296 |
await client.start()
|
| 297 |
|
| 298 |
-
#
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
ent = await client.get_entity(src)
|
| 303 |
-
resolved_sources.append(ent)
|
| 304 |
-
except Exception as e:
|
| 305 |
-
print(f"Gagal resolve sumber {src}: {e}")
|
| 306 |
|
|
|
|
| 307 |
for ent in resolved_sources:
|
| 308 |
try:
|
| 309 |
await backfill_history(ent, INITIAL_BACKFILL)
|
|
@@ -311,24 +362,17 @@ async def start_bot_background() -> None:
|
|
| 311 |
print(f"Backfill gagal untuk {ent}: {e}")
|
| 312 |
|
| 313 |
print("Kurator berjalan (background task). Menunggu pesan baru...")
|
| 314 |
-
# Jangan blokir: jalankan client sampai disconnect sebagai task terpisah
|
| 315 |
asyncio.create_task(client.run_until_disconnected())
|
| 316 |
|
| 317 |
-
|
| 318 |
async def app_main() -> None:
|
| 319 |
"""
|
| 320 |
-
|
| 321 |
-
(blok hingga disconnect).
|
| 322 |
"""
|
| 323 |
await client.start()
|
| 324 |
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
ent = await client.get_entity(src)
|
| 329 |
-
resolved_sources.append(ent)
|
| 330 |
-
except Exception as e:
|
| 331 |
-
print(f"Gagal resolve sumber {src}: {e}")
|
| 332 |
|
| 333 |
for ent in resolved_sources:
|
| 334 |
await backfill_history(ent, INITIAL_BACKFILL)
|
|
@@ -338,7 +382,5 @@ async def app_main() -> None:
|
|
| 338 |
|
| 339 |
|
| 340 |
if __name__ == "__main__":
|
| 341 |
-
# Hanya untuk run lokal; jangan di-import saat Uvicorn
|
| 342 |
-
import nest_asyncio
|
| 343 |
nest_asyncio.apply()
|
| 344 |
asyncio.run(app_main())
|
|
|
|
| 3 |
import re
|
| 4 |
import io
|
| 5 |
import hashlib
|
| 6 |
+
from collections import deque, defaultdict
|
| 7 |
+
from datetime import datetime, timedelta, timezone
|
| 8 |
from mimetypes import guess_extension
|
| 9 |
+
from typing import List, Tuple, Optional, Dict
|
| 10 |
+
|
| 11 |
+
import nest_asyncio
|
| 12 |
+
nest_asyncio.apply()
|
| 13 |
|
| 14 |
from rapidfuzz import fuzz
|
| 15 |
from telethon import TelegramClient, events
|
|
|
|
| 21 |
API_HASH = os.environ.get("API_HASH", "")
|
| 22 |
STRING_SESSION = os.environ.get("STRING_SESSION", "")
|
| 23 |
|
| 24 |
+
# --- Definisikan sumber sebagai CORE vs SUPPORT ---
|
| 25 |
+
# Kamu bisa pakai: "@username", id (int), atau "https://t.me/xxxxx"
|
| 26 |
+
CORE_CHATS = [
|
| 27 |
"https://t.me/PEPE_Calls28",
|
| 28 |
"https://t.me/Tanjirocall",
|
| 29 |
+
]
|
| 30 |
+
SUPPORT_CHATS = [
|
| 31 |
"https://t.me/ChinaPumpCommunity",
|
| 32 |
"https://t.me/Milagrosdegencalls",
|
| 33 |
"https://t.me/GM_Degencalls",
|
| 34 |
]
|
|
|
|
| 35 |
|
| 36 |
+
# Gabungan digunakan untuk handler event
|
| 37 |
+
SOURCE_CHATS = [*CORE_CHATS, *SUPPORT_CHATS]
|
| 38 |
+
|
| 39 |
+
# Target (boleh @username / id / link)
|
| 40 |
+
TARGET_CHAT = os.environ.get("TARGET_CHAT", "https://t.me/MidasTouchsignall")
|
| 41 |
+
|
| 42 |
+
# Kata kunci topik untuk relevansi (tetap dari versi sebelumnya)
|
| 43 |
THEME_KEYWORDS = [
|
| 44 |
"call", "signal", "entry", "buy", "sell", "tp", "sl",
|
| 45 |
"pump", "spot", "futures", "setup",
|
|
|
|
| 49 |
FUZZ_WEIGHT = 0.6
|
| 50 |
RELEVANCE_THRESHOLD = float(os.environ.get("RELEVANCE_THRESHOLD", "1.0"))
|
| 51 |
|
| 52 |
+
# Filter pengecualian
|
| 53 |
EXCLUDE_PHRASES = [
|
| 54 |
"achievement unlocked",
|
| 55 |
]
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Media handling
|
| 58 |
INCLUDE_MEDIA = os.environ.get("INCLUDE_MEDIA", "1") == "1"
|
| 59 |
MAX_MEDIA_MB = float(os.environ.get("MAX_MEDIA_MB", "12"))
|
| 60 |
SKIP_STICKERS = os.environ.get("SKIP_STICKERS", "1") == "1"
|
| 61 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 62 |
|
| 63 |
+
# Backfill
|
| 64 |
INITIAL_BACKFILL = int(os.environ.get("INITIAL_BACKFILL", "20"))
|
| 65 |
|
| 66 |
# Dedup buffer
|
| 67 |
DEDUP_BUFFER_SIZE = int(os.environ.get("DEDUP_BUFFER_SIZE", "800"))
|
| 68 |
|
| 69 |
+
# Jendela waktu penghitungan kelas (menit)
|
| 70 |
+
CLASS_WINDOW_MINUTES = int(os.environ.get("CLASS_WINDOW_MINUTES", "10"))
|
| 71 |
+
|
| 72 |
+
# Support gating minimal unik grup untuk boleh kirim
|
| 73 |
+
SUPPORT_MIN_UNIQUE = int(os.environ.get("SUPPORT_MIN_UNIQUE", "2"))
|
| 74 |
+
|
| 75 |
|
| 76 |
# ========= Client bootstrap =========
|
| 77 |
def build_client() -> TelegramClient:
|
|
|
|
| 84 |
client = build_client()
|
| 85 |
recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
|
| 86 |
|
| 87 |
+
# Peta id_chat -> "core" / "support"
|
| 88 |
+
chat_roles: Dict[int, str] = {} # diisi saat startup setelah resolve entity
|
| 89 |
+
|
| 90 |
|
| 91 |
# ========= Utilities =========
|
| 92 |
def debug_log(reason: str, content: str = "") -> None:
|
|
|
|
| 96 |
def normalize_for_filter(text: str) -> str:
|
| 97 |
if not text:
|
| 98 |
return ""
|
| 99 |
+
s = re.sub(r"(?m)^>.*$", "", text) # hilangin quote
|
|
|
|
| 100 |
s = re.sub(r"\s+", " ", s).strip()
|
| 101 |
return s
|
| 102 |
|
| 103 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
|
|
|
| 104 |
if not text:
|
| 105 |
return 0.0
|
| 106 |
t = text.lower()
|
| 107 |
|
|
|
|
| 108 |
exact_hits = 0
|
| 109 |
for kw in keywords:
|
| 110 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 111 |
exact_hits += 1
|
| 112 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
| 113 |
|
|
|
|
| 114 |
fuzz_scores = sorted((fuzz.partial_ratio(kw, t) / 100.0 for kw in keywords), reverse=True)[:3]
|
| 115 |
fuzzy_score = (sum(fuzz_scores) / max(1, len(fuzz_scores))) * FUZZ_WEIGHT if fuzz_scores else 0.0
|
|
|
|
| 116 |
return exact_score + fuzzy_score
|
| 117 |
|
| 118 |
def hash_for_dedup(text: str, msg) -> str:
|
|
|
|
| 119 |
parts = [text or ""]
|
| 120 |
if getattr(msg, "id", None) is not None:
|
| 121 |
parts.append(str(msg.id))
|
|
|
|
| 131 |
return hashlib.sha1(raw).hexdigest()
|
| 132 |
|
| 133 |
def is_image_message(msg) -> bool:
|
|
|
|
| 134 |
if getattr(msg, "photo", None) is not None:
|
| 135 |
return True
|
| 136 |
doc = getattr(msg, "document", None)
|
|
|
|
| 152 |
return (doc.size or 0) > MAX_MEDIA_MB * 1024 * 1024
|
| 153 |
return False
|
| 154 |
|
| 155 |
+
|
| 156 |
+
# ========= Class aggregator (windowed unique groups) =========
|
| 157 |
+
# keyword -> { group_key(str): last_seen(datetime, UTC) }
|
| 158 |
+
keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dict)
|
| 159 |
+
|
| 160 |
+
def _prune_expired(now: datetime) -> None:
|
| 161 |
+
window = timedelta(minutes=CLASS_WINDOW_MINUTES)
|
| 162 |
+
cutoff = now - window
|
| 163 |
+
for kw, m in list(keyword_group_last_seen.items()):
|
| 164 |
+
for gk, ts in list(m.items()):
|
| 165 |
+
if ts < cutoff:
|
| 166 |
+
del m[gk]
|
| 167 |
+
if not m:
|
| 168 |
+
del keyword_group_last_seen[kw]
|
| 169 |
+
|
| 170 |
+
def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] = None) -> Tuple[str, int]:
|
| 171 |
"""
|
| 172 |
+
Update hit keyword oleh grup (dalam window waktu) & kembalikan (label, unique_count).
|
| 173 |
+
Label:
|
| 174 |
+
1 -> 'rendah'
|
| 175 |
+
2-3 -> 'sedang'
|
| 176 |
+
>=4 -> 'kuat'
|
| 177 |
"""
|
| 178 |
+
if not now:
|
| 179 |
+
now = datetime.now(timezone.utc)
|
| 180 |
+
_prune_expired(now)
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
+
bucket = keyword_group_last_seen[keyword]
|
| 183 |
+
bucket[group_key] = now # insert/update
|
| 184 |
+
unique_groups = len(bucket)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
if unique_groups >= 4:
|
| 187 |
+
return "kuat", unique_groups
|
| 188 |
+
elif unique_groups >= 2:
|
| 189 |
+
return "sedang", unique_groups
|
| 190 |
+
else:
|
| 191 |
+
return "rendah", unique_groups
|
| 192 |
|
| 193 |
|
| 194 |
# ========= Core actions =========
|
| 195 |
+
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 196 |
+
"""
|
| 197 |
+
Forward konten 'apa adanya' (termasuk media yang diizinkan).
|
| 198 |
+
Kompatibel dengan versi awal kamu:contentReference[oaicite:2]{index=2}.
|
| 199 |
+
"""
|
| 200 |
+
orig_text = text_override if text_override is not None else (
|
| 201 |
+
msg.message or (getattr(msg, "raw_text", None) or "")
|
| 202 |
+
)
|
| 203 |
|
| 204 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 205 |
try:
|
|
|
|
| 206 |
if getattr(msg, "photo", None):
|
| 207 |
await client.send_file(
|
| 208 |
TARGET_CHAT,
|
|
|
|
| 213 |
)
|
| 214 |
return
|
| 215 |
|
|
|
|
| 216 |
doc = getattr(msg, "document", None)
|
| 217 |
if doc:
|
| 218 |
data = await client.download_media(msg, file=bytes)
|
| 219 |
if data:
|
| 220 |
bio = io.BytesIO(data)
|
|
|
|
| 221 |
ext = ".jpg"
|
| 222 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 223 |
if mt:
|
|
|
|
| 237 |
except Exception as e:
|
| 238 |
debug_log("Gagal kirim sebagai media, fallback ke text", str(e))
|
| 239 |
|
|
|
|
| 240 |
await client.send_message(
|
| 241 |
TARGET_CHAT,
|
| 242 |
orig_text,
|
|
|
|
| 244 |
link_preview=True,
|
| 245 |
)
|
| 246 |
|
| 247 |
+
def _extract_main_keyword(text_norm: str) -> Optional[str]:
|
| 248 |
+
t = text_norm
|
| 249 |
+
t = re.sub(r"\$([a-z0-9]+)", r"\1", t, flags=re.I) # $BTC -> btc
|
| 250 |
+
for kw in THEME_KEYWORDS:
|
| 251 |
+
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
| 252 |
+
return kw.lower()
|
| 253 |
+
return None
|
| 254 |
+
|
| 255 |
+
def _role_of(chat_id: int) -> str:
|
| 256 |
+
# default ke 'core' kalau tak dikenal (lebih permisif)
|
| 257 |
+
return chat_roles.get(chat_id, "core")
|
| 258 |
+
|
| 259 |
|
| 260 |
+
async def process_message(msg, source_chat_id: int) -> None:
|
| 261 |
+
"""
|
| 262 |
+
Filter, dedup, relevansi, klasifikasi, 'gating' support vs core, lalu kirim.
|
| 263 |
+
"""
|
| 264 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 265 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 266 |
|
| 267 |
+
# Exclude phrases (case-insensitive)
|
| 268 |
for phrase in EXCLUDE_PHRASES:
|
| 269 |
if phrase.lower() in text_norm:
|
| 270 |
debug_log("Dilewati karena EXCLUDE_PHRASES", orig_text)
|
| 271 |
return
|
| 272 |
|
| 273 |
+
# Dedup
|
| 274 |
h = hash_for_dedup(text_norm, msg)
|
| 275 |
if h in recent_hashes:
|
| 276 |
debug_log("Duplikat, dilewati", orig_text)
|
|
|
|
| 283 |
if score < RELEVANCE_THRESHOLD:
|
| 284 |
return
|
| 285 |
|
| 286 |
+
role = _role_of(source_chat_id) # 'core' atau 'support'
|
| 287 |
+
|
| 288 |
+
# Tentukan keyword & kelas
|
| 289 |
+
main_kw = _extract_main_keyword(text_norm)
|
| 290 |
+
class_label = None
|
| 291 |
+
unique_groups = 0
|
| 292 |
+
if main_kw:
|
| 293 |
+
group_key = str(source_chat_id)
|
| 294 |
+
now = datetime.now(timezone.utc)
|
| 295 |
+
class_label, unique_groups = update_and_classify(main_kw, group_key, now)
|
| 296 |
+
|
| 297 |
+
# Gating: support hanya kirim jika unique_groups >= SUPPORT_MIN_UNIQUE
|
| 298 |
+
if role == "support" and unique_groups < SUPPORT_MIN_UNIQUE:
|
| 299 |
+
debug_log(f"Support ditahan (unique_groups={unique_groups} < {SUPPORT_MIN_UNIQUE})", orig_text)
|
| 300 |
return
|
| 301 |
|
| 302 |
+
# Susun prefix kelas
|
| 303 |
+
text_to_send = orig_text
|
| 304 |
+
if class_label:
|
| 305 |
+
text_to_send = f"[{class_label.upper()}] {orig_text}"
|
| 306 |
+
|
| 307 |
+
await send_as_is(msg, text_override=text_to_send)
|
| 308 |
+
debug_log(f"Dikirim ke target (role={role}, unique_groups={unique_groups})", orig_text)
|
| 309 |
|
| 310 |
|
| 311 |
async def backfill_history(entity, limit: int) -> None:
|
|
|
|
| 312 |
if limit <= 0:
|
| 313 |
return
|
| 314 |
print(f"[Backfill] Tarik {limit} pesan terakhir dari {entity} ...")
|
| 315 |
async for m in client.iter_messages(entity, limit=limit):
|
| 316 |
try:
|
| 317 |
+
# entity.id aman untuk identitas chat
|
| 318 |
+
await process_message(m, source_chat_id=entity.id)
|
| 319 |
except Exception as e:
|
| 320 |
debug_log("Error saat memproses backfill", str(e))
|
| 321 |
|
|
|
|
| 324 |
@client.on(events.NewMessage(chats=SOURCE_CHATS))
|
| 325 |
async def on_new_message(event):
|
| 326 |
try:
|
| 327 |
+
await process_message(event.message, source_chat_id=event.chat_id)
|
| 328 |
except Exception as e:
|
| 329 |
print("Process error:", e)
|
| 330 |
|
| 331 |
|
| 332 |
# ========= Entry points =========
|
| 333 |
+
async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
|
| 334 |
+
"""Resolve identifier menjadi entity + tandai perannya di chat_roles."""
|
| 335 |
+
resolved = []
|
| 336 |
+
for src in raw_list:
|
| 337 |
+
try:
|
| 338 |
+
ent = await client.get_entity(src)
|
| 339 |
+
resolved.append(ent)
|
| 340 |
+
chat_roles[int(ent.id)] = role_label
|
| 341 |
+
except Exception as e:
|
| 342 |
+
print(f"Gagal resolve sumber {src}: {e}")
|
| 343 |
+
return resolved
|
| 344 |
+
|
| 345 |
async def start_bot_background() -> None:
|
| 346 |
"""
|
| 347 |
+
Dipanggil dari server FastAPI (server.py) saat startup:contentReference[oaicite:3]{index=3}.
|
| 348 |
+
Menjalankan client + backfill tanpa memblokir web server.
|
| 349 |
"""
|
| 350 |
await client.start()
|
| 351 |
|
| 352 |
+
# Resolve CORE & SUPPORT dan isi chat_roles
|
| 353 |
+
resolved_core = await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 354 |
+
resolved_support = await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 355 |
+
resolved_sources = [*resolved_core, *resolved_support]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
|
| 357 |
+
# Backfill opsional
|
| 358 |
for ent in resolved_sources:
|
| 359 |
try:
|
| 360 |
await backfill_history(ent, INITIAL_BACKFILL)
|
|
|
|
| 362 |
print(f"Backfill gagal untuk {ent}: {e}")
|
| 363 |
|
| 364 |
print("Kurator berjalan (background task). Menunggu pesan baru...")
|
|
|
|
| 365 |
asyncio.create_task(client.run_until_disconnected())
|
| 366 |
|
|
|
|
| 367 |
async def app_main() -> None:
|
| 368 |
"""
|
| 369 |
+
Mode legacy: `python botsignal.py` (blok hingga disconnect):contentReference[oaicite:4]{index=4}.
|
|
|
|
| 370 |
"""
|
| 371 |
await client.start()
|
| 372 |
|
| 373 |
+
resolved_core = await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 374 |
+
resolved_support = await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 375 |
+
resolved_sources = [*resolved_core, *resolved_support]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
for ent in resolved_sources:
|
| 378 |
await backfill_history(ent, INITIAL_BACKFILL)
|
|
|
|
| 382 |
|
| 383 |
|
| 384 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 385 |
nest_asyncio.apply()
|
| 386 |
asyncio.run(app_main())
|