Spaces:
Sleeping
Sleeping
Update botsignal.py
Browse files- botsignal.py +71 -152
botsignal.py
CHANGED
|
@@ -4,6 +4,7 @@ import re
|
|
| 4 |
import io
|
| 5 |
import sqlite3
|
| 6 |
import hashlib
|
|
|
|
| 7 |
from collections import deque, defaultdict
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from mimetypes import guess_extension
|
|
@@ -69,6 +70,10 @@ LB_TRIGGER = os.environ.get("LB_TRIGGER", "/lb")
|
|
| 69 |
LEADERBOARD_BOT = os.environ.get("LEADERBOARD_BOT", "@PhanesGreenBot")
|
| 70 |
LB_REQUIRE_MIN_RANKS = int(os.environ.get("LB_REQUIRE_MIN_RANKS", "0")) # set 3 jika mau minimal 3 peringkat
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
# ====== Anti-spam Leaderboard (cooldown + dedup) ======
|
| 73 |
LEADERBOARD_COOLDOWN_SEC = int(os.environ.get("LEADERBOARD_COOLDOWN_SEC", "600")) # default 10 menit
|
| 74 |
_last_lb_hash: Optional[str] = None
|
|
@@ -101,7 +106,6 @@ def _is_true_leaderboard(text: str) -> bool:
|
|
| 101 |
return False
|
| 102 |
if not re.search(r"📊\s*Group\s*Stats", text):
|
| 103 |
return False
|
| 104 |
-
# opsional: pastikan ada minimal N baris ranking
|
| 105 |
if LB_REQUIRE_MIN_RANKS > 0:
|
| 106 |
ranks = re.findall(r"(?m)^\s*[\W\s]*\d{1,2}\s+.+\[[\d\.]+x\]\s*$", text)
|
| 107 |
if len(ranks) < LB_REQUIRE_MIN_RANKS:
|
|
@@ -130,12 +134,12 @@ BACKFILL_BUFFER_MINUTES = int(os.environ.get("BACKFILL_BUFFER_MINUTES", "3"))
|
|
| 130 |
UPDATE_STRATEGY = os.environ.get("UPDATE_STRATEGY", "reply").lower()
|
| 131 |
UPDATE_COOLDOWN_SEC = int(os.environ.get("UPDATE_COOLDOWN_SEC", "5"))
|
| 132 |
|
| 133 |
-
# Media flags
|
| 134 |
INCLUDE_MEDIA = os.environ.get("INCLUDE_MEDIA", "0") == "1"
|
| 135 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 136 |
MAX_MEDIA_MB = int(os.environ.get("MAX_MEDIA_MB", "8"))
|
| 137 |
|
| 138 |
-
# Thematic keywords + relevance threshold
|
| 139 |
THEME_KEYWORDS = [kw.strip().lower() for kw in os.environ.get(
|
| 140 |
"THEME_KEYWORDS",
|
| 141 |
"pump,call,entry,entries,sl,tp,launch,airdrop,gem,moon,ath,breakout"
|
|
@@ -161,8 +165,7 @@ def build_client() -> TelegramClient:
|
|
| 161 |
client = build_client()
|
| 162 |
recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
|
| 163 |
recent_content_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE) # content-only dedup
|
| 164 |
-
# entity-based dedup
|
| 165 |
-
recent_entity_keys: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
|
| 166 |
|
| 167 |
# Peta id_chat -> "core" / "support"
|
| 168 |
chat_roles: Dict[int, str] = {} # diisi saat startup setelah resolve entity
|
|
@@ -198,7 +201,6 @@ def _init_db():
|
|
| 198 |
conn.close()
|
| 199 |
|
| 200 |
def db_load_state():
|
| 201 |
-
"""Load last_posted & kw_group_seen into memory on startup."""
|
| 202 |
conn = _db()
|
| 203 |
last = {}
|
| 204 |
for kw, mid, tier in conn.execute("SELECT keyword, msg_id, tier FROM last_posted"):
|
|
@@ -255,10 +257,10 @@ def _windows(tokens: List[str], size: int = 20):
|
|
| 255 |
for i in range(0, len(tokens), size):
|
| 256 |
yield " ".join(tokens[i : i + size])
|
| 257 |
|
| 258 |
-
# --- Bersihkan URL/CA untuk
|
| 259 |
CA_SOL_RE = re.compile(r"\b[1-9A-HJ-NP-Za-km-z]{32,48}\b") # Solana base58 (perkiraan)
|
| 260 |
CA_EVM_RE = re.compile(r"\b0x[a-fA-F0-9]{40}\b") # EVM address
|
| 261 |
-
CA_LABEL_RE = re.compile(r"\bCA\s*[:=]\s*\S+", re.IGNORECASE)
|
| 262 |
|
| 263 |
def _strip_urls_and_mentions(s: str) -> str:
|
| 264 |
s = re.sub(r"https?://\S+", "", s)
|
|
@@ -267,10 +269,6 @@ def _strip_urls_and_mentions(s: str) -> str:
|
|
| 267 |
return re.sub(r"\s+", " ", s).strip()
|
| 268 |
|
| 269 |
def strip_contracts_for_scoring(s: str) -> str:
|
| 270 |
-
"""
|
| 271 |
-
Hilangkan URL/mention, alamat kontrak, dan token setelah 'CA:'
|
| 272 |
-
agar kata 'pump' pada CA/URL (mis. pump.fun) tidak memengaruhi skor.
|
| 273 |
-
"""
|
| 274 |
s0 = _strip_urls_and_mentions(s)
|
| 275 |
s1 = CA_LABEL_RE.sub(" ", s0)
|
| 276 |
s2 = CA_EVM_RE.sub(" ", s1)
|
|
@@ -278,21 +276,14 @@ def strip_contracts_for_scoring(s: str) -> str:
|
|
| 278 |
return re.sub(r"\s+", " ", s3).strip()
|
| 279 |
|
| 280 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
| 281 |
-
"""Skor: exact keyword + fuzzy windowed (top-3 rata-rata) agar adil untuk teks panjang."""
|
| 282 |
if not text:
|
| 283 |
return 0.0
|
| 284 |
-
|
| 285 |
-
# Gunakan versi yang TIDAK mengandung URL/CA agar 'pump' di CA tidak ikut dihitung
|
| 286 |
t = strip_contracts_for_scoring(text).lower()
|
| 287 |
-
|
| 288 |
-
# exact hits (unik)
|
| 289 |
exact_hits = 0
|
| 290 |
for kw in set(keywords):
|
| 291 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 292 |
exact_hits += 1
|
| 293 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
| 294 |
-
|
| 295 |
-
# fuzzy windowed: ambil top-3 skor di antara jendela 20 token
|
| 296 |
tokens = _tokenize_words(t)
|
| 297 |
if not tokens:
|
| 298 |
return exact_score
|
|
@@ -306,11 +297,9 @@ def score_relevance(text: str, keywords: List[str]) -> float:
|
|
| 306 |
scores.append(best)
|
| 307 |
fuzzy_top3 = sorted(scores, reverse=True)[:3]
|
| 308 |
fuzzy_score = (sum(fuzzy_top3) / max(1, len(fuzzy_top3))) * FUZZ_WEIGHT if fuzzy_top3 else 0.0
|
| 309 |
-
|
| 310 |
return exact_score + fuzzy_score
|
| 311 |
|
| 312 |
def hash_for_dedup(text: str, msg) -> str:
|
| 313 |
-
"""Hash campuran (lama) – menahan duplikat per pesan+media."""
|
| 314 |
parts = [text or ""]
|
| 315 |
if getattr(msg, "id", None) is not None:
|
| 316 |
parts.append(str(msg.id))
|
|
@@ -326,7 +315,6 @@ def hash_for_dedup(text: str, msg) -> str:
|
|
| 326 |
return hashlib.sha1(raw).hexdigest()
|
| 327 |
|
| 328 |
def content_only_hash(text: str) -> str:
|
| 329 |
-
"""Hash berbasis isi saja (untuk lintas-grup crosspost)."""
|
| 330 |
norm = _strip_urls_and_mentions(normalize_for_filter(text))
|
| 331 |
return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
|
| 332 |
|
|
@@ -337,14 +325,12 @@ keyword_group_last_seen: defaultdict[str, dict[str, datetime]] = defaultdict(dic
|
|
| 337 |
def _prune_expired(now: datetime) -> None:
|
| 338 |
window = timedelta(minutes=CLASS_WINDOW_MINUTES)
|
| 339 |
cutoff = now - window
|
| 340 |
-
# in-memory prune
|
| 341 |
for kw, m in list(keyword_group_last_seen.items()):
|
| 342 |
for gk, ts in list(m.items()):
|
| 343 |
if ts < cutoff:
|
| 344 |
del m[gk]
|
| 345 |
if not m:
|
| 346 |
del keyword_group_last_seen[kw]
|
| 347 |
-
# db prune
|
| 348 |
db_prune_expired(cutoff)
|
| 349 |
|
| 350 |
def _classify_by_unique(unique_groups: int) -> Tuple[str, int]:
|
|
@@ -359,27 +345,18 @@ def _classify_by_unique(unique_groups: int) -> Tuple[str, int]:
|
|
| 359 |
|
| 360 |
|
| 361 |
def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] = None) -> Tuple[str, int, bool]:
|
| 362 |
-
"""
|
| 363 |
-
Update 'last seen' untuk (keyword, group_key) dalam window.
|
| 364 |
-
Return: (class_label, unique_groups, is_new_group)
|
| 365 |
-
is_new_group = True hanya jika group_key BELUM tercatat di window aktif.
|
| 366 |
-
"""
|
| 367 |
if not now:
|
| 368 |
now = datetime.now(timezone.utc)
|
| 369 |
_prune_expired(now)
|
| 370 |
-
|
| 371 |
bucket = keyword_group_last_seen[keyword]
|
| 372 |
is_new_group = group_key not in bucket
|
| 373 |
-
|
| 374 |
-
# tulis/refresh timestamp (meski bukan grup baru, hanya memperbarui waktu)
|
| 375 |
bucket[group_key] = now
|
| 376 |
db_upsert_kw_seen(keyword, group_key, now)
|
| 377 |
-
|
| 378 |
class_label, unique_groups = _classify_by_unique(len(bucket))
|
| 379 |
return class_label, unique_groups, is_new_group
|
| 380 |
|
| 381 |
|
| 382 |
-
# ========= Sentence-level invite filter
|
| 383 |
INVITE_PATTERNS = [
|
| 384 |
r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
|
| 385 |
r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
|
|
@@ -387,18 +364,14 @@ INVITE_PATTERNS = [
|
|
| 387 |
r"\bvip\b", r"\bpremium\b", r"\bberbayar\b", r"\bpaid\b", r"\bexclusive\b",
|
| 388 |
r"\bwhitelist\b", r"\bprivate( group| channel)?\b", r"\bmembership?\b",
|
| 389 |
r"\bsubscribe\b", r"\blangganan\b",
|
| 390 |
-
# kata kunci promo/iklan
|
| 391 |
r"\bpromo\b", r"\bpromosi\b", r"\biklan\b",
|
| 392 |
-
r"\badvert\b", r"\badvertise\b", r"\badvertisement\b",
|
| 393 |
-
# tautan undangan/shortener
|
| 394 |
r"(t\.me\/joinchat|t\.me\/\+|telegram\.me\/|discord\.gg\/|wa\.me\/|whatsapp\.com\/)",
|
| 395 |
r"(bit\.ly|tinyurl\.com|linktr\.ee)",
|
| 396 |
]
|
| 397 |
INVITE_REGEXES = [re.compile(p, re.IGNORECASE) for p in INVITE_PATTERNS]
|
| 398 |
|
| 399 |
-
# whitelist: kalimat yang nampak "sinyal asli", jangan dihapus
|
| 400 |
WHITELIST_STRONG_SIGNAL = [
|
| 401 |
-
r"\$[a-z0-9]{2,10}",
|
| 402 |
r"\b(entry|entries|buy|sell)\b",
|
| 403 |
r"\bsl\b", r"\btp\b", r"\btp\d\b",
|
| 404 |
]
|
|
@@ -408,10 +381,8 @@ def _is_invite_sentence(s: str) -> bool:
|
|
| 408 |
t = s.strip()
|
| 409 |
if not t:
|
| 410 |
return False
|
| 411 |
-
# Jika kalimat memuat sinyal kuat, jangan dibuang walau ada kata invite
|
| 412 |
if any(r.search(t) for r in WHITELIST_REGEXES):
|
| 413 |
return False
|
| 414 |
-
# Jika ada 1+ pola ajakan, buang
|
| 415 |
return any(r.search(t) for r in INVITE_REGEXES)
|
| 416 |
|
| 417 |
def filter_invite_sentences(text: str) -> str:
|
|
@@ -426,34 +397,21 @@ def filter_invite_sentences(text: str) -> str:
|
|
| 426 |
|
| 427 |
# ========= Media helpers =========
|
| 428 |
def is_image_message(msg) -> bool:
|
| 429 |
-
"""
|
| 430 |
-
True untuk:
|
| 431 |
-
- Photo bawaan Telegram
|
| 432 |
-
- Image (image/*). Jika 'image/webp' (sticker), hormati SKIP_STICKERS
|
| 433 |
-
- GIF/Video hanya jika ALLOW_GIFS_VIDEOS=True
|
| 434 |
-
"""
|
| 435 |
if getattr(msg, "photo", None):
|
| 436 |
return True
|
| 437 |
doc = getattr(msg, "document", None)
|
| 438 |
if not doc:
|
| 439 |
return False
|
| 440 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
| 441 |
-
|
| 442 |
-
# Skip sticker (sering berupa image/webp)
|
| 443 |
SKIP_STICKERS = True
|
| 444 |
if mt == "image/webp" and SKIP_STICKERS:
|
| 445 |
return False
|
| 446 |
-
|
| 447 |
if mt.startswith("image/"):
|
| 448 |
-
# GIF diizinkan hanya jika ALLOW_GIFS_VIDEOS=True
|
| 449 |
if mt == "image/gif":
|
| 450 |
return ALLOW_GIFS_VIDEOS
|
| 451 |
return True
|
| 452 |
-
|
| 453 |
-
# Video diizinkan hanya jika ALLOW_GIFS_VIDEOS=True
|
| 454 |
if mt.startswith("video/"):
|
| 455 |
return ALLOW_GIFS_VIDEOS
|
| 456 |
-
|
| 457 |
return False
|
| 458 |
|
| 459 |
def media_too_big(msg) -> bool:
|
|
@@ -467,48 +425,33 @@ def media_too_big(msg) -> bool:
|
|
| 467 |
|
| 468 |
|
| 469 |
# ========= Post-on-threshold with EDIT/REPLY/NEW (persisted) =========
|
| 470 |
-
# Urutan tier baru (perlu untuk naik-turun tier aman)
|
| 471 |
TIER_ORDER = {"Low 🌱": 0, "Medium ⚡": 1, "Strong 💪": 2, "FOMO 🔥": 3}
|
| 472 |
|
| 473 |
-
last_posted: Dict[str, Dict[str, object]] = {}
|
| 474 |
-
# simpan body & waktu update terakhir per entitas
|
| 475 |
last_body: Dict[str, str] = {}
|
| 476 |
last_update_ts: Dict[str, float] = {}
|
| 477 |
|
| 478 |
def format_body_with_spacing(body: str, tier_label: str) -> str:
|
| 479 |
-
"""
|
| 480 |
-
Rapikan teks dengan menambahkan spasi antar bagian penting & header tier.
|
| 481 |
-
- Menyisipkan baris kosong sebelum: stats, ca, links, security, trade carefully
|
| 482 |
-
- Menghapus baris kosong berlebihan
|
| 483 |
-
- Menaruh [TIER] di paling atas + satu baris kosong
|
| 484 |
-
"""
|
| 485 |
if not body:
|
| 486 |
return f"[{tier_label}]"
|
| 487 |
-
|
| 488 |
-
# Normalisasi pemisah baris & trimming
|
| 489 |
raw_lines = [ln.strip() for ln in body.splitlines()]
|
| 490 |
-
raw_lines = [ln for ln in raw_lines if ln != ""]
|
| 491 |
-
|
| 492 |
formatted: List[str] = []
|
| 493 |
trigger_words = ("stats", "ca", "links", "security", "trade carefully")
|
| 494 |
-
|
| 495 |
-
for i, line in enumerate(raw_lines):
|
| 496 |
low = line.lower()
|
| 497 |
if any(tw in low for tw in trigger_words):
|
| 498 |
if formatted and formatted[-1] != "":
|
| 499 |
formatted.append("")
|
| 500 |
formatted.append(line)
|
| 501 |
-
|
| 502 |
pretty = "\n".join(formatted)
|
| 503 |
pretty = re.sub(r"\n{3,}", "\n\n", pretty).strip()
|
| 504 |
-
|
| 505 |
return f"[{tier_label}]\n\n{pretty}"
|
| 506 |
|
| 507 |
async def _send_initial(msg, text: str) -> int:
|
| 508 |
if DRY_RUN:
|
| 509 |
print("[DRY_RUN] send_initial:", text[:140])
|
| 510 |
return -1
|
| 511 |
-
# kirim media bila ada & allowed
|
| 512 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 513 |
try:
|
| 514 |
if getattr(msg, "photo", None):
|
|
@@ -546,15 +489,8 @@ async def _send_initial(msg, text: str) -> int:
|
|
| 546 |
return await _send_initial(msg, text)
|
| 547 |
|
| 548 |
async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, update_like: bool = False, allow_tier_upgrade: bool = True) -> None:
|
| 549 |
-
"""
|
| 550 |
-
allow_tier_upgrade: hanya True jika ada GRUP BARU yang berkontribusi
|
| 551 |
-
pada entitas dalam window; kalau False, meski class_label hasil hitung
|
| 552 |
-
lebih tinggi, kita TIDAK meng-upgrade tier.
|
| 553 |
-
"""
|
| 554 |
prev = last_posted.get(keyword)
|
| 555 |
now_ts = datetime.now().timestamp()
|
| 556 |
-
|
| 557 |
-
# kirim pertama kali
|
| 558 |
if not prev:
|
| 559 |
text = format_body_with_spacing(body, new_tier)
|
| 560 |
msg_id = await _send_initial(src_msg, text)
|
|
@@ -564,11 +500,9 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, upd
|
|
| 564 |
if msg_id != -1:
|
| 565 |
db_save_last_posted(keyword, msg_id, new_tier)
|
| 566 |
return
|
| 567 |
-
|
| 568 |
-
# Jika tier naik, cek dulu apakah diizinkan
|
| 569 |
if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
|
| 570 |
if not allow_tier_upgrade:
|
| 571 |
-
text = format_body_with_spacing(body, prev["tier"])
|
| 572 |
if not update_like:
|
| 573 |
return
|
| 574 |
try:
|
|
@@ -585,8 +519,6 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, upd
|
|
| 585 |
except Exception as e:
|
| 586 |
debug_log("Update gagal (no-upgrade path)", str(e))
|
| 587 |
return
|
| 588 |
-
|
| 589 |
-
# tier naik dan diizinkan
|
| 590 |
try:
|
| 591 |
text = format_body_with_spacing(body, new_tier)
|
| 592 |
await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
|
|
@@ -608,15 +540,10 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, upd
|
|
| 608 |
if msg_id != -1:
|
| 609 |
db_save_last_posted(keyword, msg_id, new_tier)
|
| 610 |
return
|
| 611 |
-
|
| 612 |
-
# Tier sama: hanya proses jika ini update_like
|
| 613 |
if not update_like:
|
| 614 |
return
|
| 615 |
-
|
| 616 |
-
# Hindari spam: kalau body sama atau masih cooldown, no-op
|
| 617 |
if body.strip() == last_body.get(keyword, "").strip() and (now_ts - last_update_ts.get(keyword, 0) < UPDATE_COOLDOWN_SEC):
|
| 618 |
return
|
| 619 |
-
|
| 620 |
try:
|
| 621 |
text = format_body_with_spacing(body, new_tier)
|
| 622 |
if UPDATE_STRATEGY == "edit":
|
|
@@ -646,7 +573,7 @@ async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, upd
|
|
| 646 |
debug_log("Update gagal (strategy)", str(e))
|
| 647 |
|
| 648 |
|
| 649 |
-
# ========= Core actions
|
| 650 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 651 |
if DRY_RUN:
|
| 652 |
print("[DRY_RUN] send_as_is:", (text_override or msg.message or "")[:140])
|
|
@@ -659,7 +586,6 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 659 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 660 |
entities = getattr(msg, "entities", None)
|
| 661 |
|
| 662 |
-
# ✅ perbaikan operator logika: gunakan 'and'
|
| 663 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 664 |
try:
|
| 665 |
if getattr(msg, "photo", None):
|
|
@@ -696,60 +622,36 @@ async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
|
| 696 |
await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
|
| 697 |
|
| 698 |
|
| 699 |
-
# ========= Keyword extraction
|
| 700 |
TICKER_CLEAN_RE = re.compile(r"\$[A-Za-z0-9]{2,12}")
|
| 701 |
TICKER_NOISY_RE = re.compile(r"\$[A-Za-z0-9](?:[^A-Za-z0-9]+[A-Za-z0-9]){1,11}")
|
| 702 |
|
| 703 |
def _extract_tickers(text_norm: str) -> List[str]:
|
| 704 |
-
"""
|
| 705 |
-
Ambil $TICKER dengan dua cara:
|
| 706 |
-
- Bersih: $ABC, $JBCOIN
|
| 707 |
-
- Noisy: $J*BCOIN -> dinormalisasi jadi $JBCOIN untuk *keyword* saja.
|
| 708 |
-
(Teks asli tetap dikirim apa adanya.)
|
| 709 |
-
"""
|
| 710 |
found = []
|
| 711 |
-
|
| 712 |
-
# bersih
|
| 713 |
for m in TICKER_CLEAN_RE.finditer(text_norm):
|
| 714 |
found.append(m.group(0).lower())
|
| 715 |
-
|
| 716 |
-
# noisy -> normalisasi internal
|
| 717 |
for m in TICKER_NOISY_RE.finditer(text_norm):
|
| 718 |
raw = m.group(0)
|
| 719 |
norm = "$" + re.sub(r"[^A-Za-z0-9]+", "", raw[1:])
|
| 720 |
-
if 3 <= len(norm) <= 13:
|
| 721 |
found.append(norm.lower())
|
| 722 |
-
|
| 723 |
-
# unik & pertahankan urutan
|
| 724 |
-
seen = set()
|
| 725 |
-
uniq = []
|
| 726 |
for x in found:
|
| 727 |
if x not in seen:
|
| 728 |
-
uniq.append(x)
|
| 729 |
-
seen.add(x)
|
| 730 |
return uniq
|
| 731 |
|
| 732 |
def _extract_all_keywords(text_norm: str) -> List[str]:
|
| 733 |
-
"""
|
| 734 |
-
Deteksi SEMUA keyword dari THEME_KEYWORDS + $ticker.
|
| 735 |
-
Tidak menghapus simbol '$' (sesuai permintaan).
|
| 736 |
-
"""
|
| 737 |
t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
|
| 738 |
-
|
| 739 |
found = []
|
| 740 |
for kw in THEME_KEYWORDS:
|
| 741 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
| 742 |
found.append(kw.lower())
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
found
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
seen = set()
|
| 749 |
-
for kw in found:
|
| 750 |
-
if kw not in seen:
|
| 751 |
-
uniq.append(kw)
|
| 752 |
-
seen.add(kw)
|
| 753 |
return uniq
|
| 754 |
|
| 755 |
def _choose_dominant_keyword(text_norm: str, kws: List[str]) -> Optional[str]:
|
|
@@ -776,19 +678,15 @@ def _unique_counts_by_role(keyword: str) -> Tuple[int, int]:
|
|
| 776 |
(core_ids if role == "core" else sup_ids).add(gk)
|
| 777 |
return len(core_ids), len(sup_ids)
|
| 778 |
|
| 779 |
-
|
| 780 |
-
# ========= Entity-key extraction (CA > $ticker) =========
|
| 781 |
def extract_entity_key(text: str) -> Optional[str]:
|
| 782 |
t = normalize_for_filter(text)
|
| 783 |
m_evm = CA_EVM_RE.search(t)
|
| 784 |
m_sol = CA_SOL_RE.search(t)
|
| 785 |
if m_evm or m_sol:
|
| 786 |
if m_evm:
|
| 787 |
-
|
| 788 |
-
return f"ca:evm:{addr}"
|
| 789 |
else:
|
| 790 |
-
|
| 791 |
-
return f"ca:sol:{addr}"
|
| 792 |
tickers = _extract_tickers(t.lower())
|
| 793 |
if tickers:
|
| 794 |
return f"ticker:{tickers[0][1:].lower()}"
|
|
@@ -797,17 +695,10 @@ def extract_entity_key(text: str) -> Optional[str]:
|
|
| 797 |
|
| 798 |
# ========= NEW: Filter Phanes di jalur umum =========
|
| 799 |
async def _is_phanes_and_not_leaderboard(msg, text: str) -> bool:
|
| 800 |
-
"""
|
| 801 |
-
True jika pesan berasal dari Phanes (via_bot atau username pengirim === bot Phanes)
|
| 802 |
-
DAN bukan teks leaderboard. Pesan seperti ini akan diabaikan di jalur umum.
|
| 803 |
-
"""
|
| 804 |
try:
|
| 805 |
-
# via inline bot
|
| 806 |
if getattr(msg, "via_bot_id", None) and PHANES_BOT_ID is not None:
|
| 807 |
if int(msg.via_bot_id) == int(PHANES_BOT_ID):
|
| 808 |
return not _is_true_leaderboard(text or "")
|
| 809 |
-
|
| 810 |
-
# username sender
|
| 811 |
sender = await msg.get_sender()
|
| 812 |
uname = (getattr(sender, "username", "") or "").lower()
|
| 813 |
if uname == LEADERBOARD_BOT.lstrip("@").lower():
|
|
@@ -821,7 +712,6 @@ async def process_message(msg, source_chat_id: int) -> None:
|
|
| 821 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 822 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 823 |
|
| 824 |
-
# === HARD FILTER: jika ini pesan Phanes non-leaderboard, skip total ===
|
| 825 |
if await _is_phanes_and_not_leaderboard(msg, orig_text):
|
| 826 |
debug_log("Skip: pesan Phanes non-leaderboard", orig_text)
|
| 827 |
return
|
|
@@ -927,7 +817,7 @@ async def on_new_message(event):
|
|
| 927 |
except Exception as e:
|
| 928 |
print(f"Process error di chat {event.chat_id}: {e}")
|
| 929 |
|
| 930 |
-
# === Leaderboard listener (
|
| 931 |
@client.on(events.NewMessage(chats=(LEADERBOARD_GROUP,)))
|
| 932 |
async def on_leaderboard_reply(event):
|
| 933 |
try:
|
|
@@ -936,9 +826,7 @@ async def on_leaderboard_reply(event):
|
|
| 936 |
if not text:
|
| 937 |
return
|
| 938 |
|
| 939 |
-
# --- Pastikan sumbernya memang Phanes bot (inline atau username) ---
|
| 940 |
ok_source = False
|
| 941 |
-
|
| 942 |
if getattr(msg, "via_bot_id", None) and PHANES_BOT_ID is not None:
|
| 943 |
if int(msg.via_bot_id) == int(PHANES_BOT_ID):
|
| 944 |
ok_source = True
|
|
@@ -955,11 +843,9 @@ async def on_leaderboard_reply(event):
|
|
| 955 |
if not ok_source:
|
| 956 |
return
|
| 957 |
|
| 958 |
-
# --- HANYA ambil leaderboard utama (header wajib) ---
|
| 959 |
if not _is_true_leaderboard(text):
|
| 960 |
return
|
| 961 |
|
| 962 |
-
# ===== Dedup + cooldown dengan hash yang dinormalisasi =====
|
| 963 |
global _last_lb_hash, _last_lb_ts
|
| 964 |
h = _hash_text_1line(_normalize_lb_for_hash(text))
|
| 965 |
now = asyncio.get_event_loop().time()
|
|
@@ -968,13 +854,43 @@ async def on_leaderboard_reply(event):
|
|
| 968 |
_last_lb_hash = h
|
| 969 |
_last_lb_ts = now
|
| 970 |
|
| 971 |
-
# Forward apa adanya ke TARGET_CHAT
|
| 972 |
await send_as_is(msg)
|
| 973 |
debug_log("Forward Leaderboard", text[:120])
|
| 974 |
except Exception as e:
|
| 975 |
debug_log("LB forward error", str(e))
|
| 976 |
|
| 977 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 978 |
# ========= Entry points =========
|
| 979 |
async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
|
| 980 |
resolved = []
|
|
@@ -992,22 +908,20 @@ async def start_bot_background() -> None:
|
|
| 992 |
await client.start()
|
| 993 |
_init_db()
|
| 994 |
|
| 995 |
-
# Load persisted state
|
| 996 |
global last_posted, keyword_group_last_seen, PHANES_BOT_ID
|
| 997 |
last_posted, keyword_group_last_seen = db_load_state()
|
| 998 |
|
| 999 |
await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 1000 |
await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 1001 |
|
| 1002 |
-
# Resolve bot Phanes (untuk via_bot_id)
|
| 1003 |
try:
|
| 1004 |
ph_ent = await client.get_entity(LEADERBOARD_BOT)
|
| 1005 |
PHANES_BOT_ID = abs(int(ph_ent.id))
|
| 1006 |
print(f"Resolved Phanes bot id: {PHANES_BOT_ID}")
|
| 1007 |
except Exception as e:
|
| 1008 |
-
print(f"Gagal resolve LEADERBOARD_BOT: {e} (
|
| 1009 |
|
| 1010 |
-
#
|
| 1011 |
try:
|
| 1012 |
lb_ent = await client.get_entity(LEADERBOARD_GROUP)
|
| 1013 |
await client.send_message(lb_ent, LB_TRIGGER)
|
|
@@ -1015,7 +929,9 @@ async def start_bot_background() -> None:
|
|
| 1015 |
except Exception as e:
|
| 1016 |
print(f"Gagal resolve/trigger leaderboard group: {e}")
|
| 1017 |
|
| 1018 |
-
#
|
|
|
|
|
|
|
| 1019 |
print("Kurator berjalan (background task). Menunggu pesan baru...")
|
| 1020 |
asyncio.create_task(client.run_until_disconnected())
|
| 1021 |
|
|
@@ -1030,15 +946,14 @@ async def app_main() -> None:
|
|
| 1030 |
await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 1031 |
await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 1032 |
|
| 1033 |
-
# Resolve bot Phanes (untuk via_bot_id)
|
| 1034 |
try:
|
| 1035 |
ph_ent = await client.get_entity(LEADERBOARD_BOT)
|
| 1036 |
PHANES_BOT_ID = abs(int(ph_ent.id))
|
| 1037 |
print(f"Resolved Phanes bot id: {PHANES_BOT_ID}")
|
| 1038 |
except Exception as e:
|
| 1039 |
-
print(f"Gagal resolve LEADERBOARD_BOT: {e} (
|
| 1040 |
|
| 1041 |
-
#
|
| 1042 |
try:
|
| 1043 |
lb_ent = await client.get_entity(LEADERBOARD_GROUP)
|
| 1044 |
await client.send_message(lb_ent, LB_TRIGGER)
|
|
@@ -1046,8 +961,12 @@ async def app_main() -> None:
|
|
| 1046 |
except Exception as e:
|
| 1047 |
print(f"Gagal trigger leaderboard group: {e}")
|
| 1048 |
|
|
|
|
|
|
|
|
|
|
| 1049 |
print("Kurator berjalan. Menunggu pesan baru... (Stop dengan interrupt).")
|
| 1050 |
await client.run_until_disconnected()
|
| 1051 |
|
|
|
|
| 1052 |
if __name__ == "__main__":
|
| 1053 |
asyncio.run(app_main())
|
|
|
|
| 4 |
import io
|
| 5 |
import sqlite3
|
| 6 |
import hashlib
|
| 7 |
+
import random
|
| 8 |
from collections import deque, defaultdict
|
| 9 |
from datetime import datetime, timedelta, timezone
|
| 10 |
from mimetypes import guess_extension
|
|
|
|
| 70 |
LEADERBOARD_BOT = os.environ.get("LEADERBOARD_BOT", "@PhanesGreenBot")
|
| 71 |
LB_REQUIRE_MIN_RANKS = int(os.environ.get("LB_REQUIRE_MIN_RANKS", "0")) # set 3 jika mau minimal 3 peringkat
|
| 72 |
|
| 73 |
+
# ====== Scheduler interval (acak per JAM) ======
|
| 74 |
+
LB_INTERVAL_MIN_HOURS = int(os.environ.get("LB_INTERVAL_MIN_HOURS", "3"))
|
| 75 |
+
LB_INTERVAL_MAX_HOURS = int(os.environ.get("LB_INTERVAL_MAX_HOURS", "6"))
|
| 76 |
+
|
| 77 |
# ====== Anti-spam Leaderboard (cooldown + dedup) ======
|
| 78 |
LEADERBOARD_COOLDOWN_SEC = int(os.environ.get("LEADERBOARD_COOLDOWN_SEC", "600")) # default 10 menit
|
| 79 |
_last_lb_hash: Optional[str] = None
|
|
|
|
| 106 |
return False
|
| 107 |
if not re.search(r"📊\s*Group\s*Stats", text):
|
| 108 |
return False
|
|
|
|
| 109 |
if LB_REQUIRE_MIN_RANKS > 0:
|
| 110 |
ranks = re.findall(r"(?m)^\s*[\W\s]*\d{1,2}\s+.+\[[\d\.]+x\]\s*$", text)
|
| 111 |
if len(ranks) < LB_REQUIRE_MIN_RANKS:
|
|
|
|
| 134 |
UPDATE_STRATEGY = os.environ.get("UPDATE_STRATEGY", "reply").lower()
|
| 135 |
UPDATE_COOLDOWN_SEC = int(os.environ.get("UPDATE_COOLDOWN_SEC", "5"))
|
| 136 |
|
| 137 |
+
# Media flags
|
| 138 |
INCLUDE_MEDIA = os.environ.get("INCLUDE_MEDIA", "0") == "1"
|
| 139 |
ALLOW_GIFS_VIDEOS = os.environ.get("ALLOW_GIFS_VIDEOS", "0") == "1"
|
| 140 |
MAX_MEDIA_MB = int(os.environ.get("MAX_MEDIA_MB", "8"))
|
| 141 |
|
| 142 |
+
# Thematic keywords + relevance threshold
|
| 143 |
THEME_KEYWORDS = [kw.strip().lower() for kw in os.environ.get(
|
| 144 |
"THEME_KEYWORDS",
|
| 145 |
"pump,call,entry,entries,sl,tp,launch,airdrop,gem,moon,ath,breakout"
|
|
|
|
| 165 |
client = build_client()
|
| 166 |
recent_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE)
|
| 167 |
recent_content_hashes: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE) # content-only dedup
|
| 168 |
+
recent_entity_keys: deque[str] = deque(maxlen=DEDUP_BUFFER_SIZE) # entity-based dedup
|
|
|
|
| 169 |
|
| 170 |
# Peta id_chat -> "core" / "support"
|
| 171 |
chat_roles: Dict[int, str] = {} # diisi saat startup setelah resolve entity
|
|
|
|
| 201 |
conn.close()
|
| 202 |
|
| 203 |
def db_load_state():
|
|
|
|
| 204 |
conn = _db()
|
| 205 |
last = {}
|
| 206 |
for kw, mid, tier in conn.execute("SELECT keyword, msg_id, tier FROM last_posted"):
|
|
|
|
| 257 |
for i in range(0, len(tokens), size):
|
| 258 |
yield " ".join(tokens[i : i + size])
|
| 259 |
|
| 260 |
+
# --- Bersihkan URL/CA untuk skor relevansi ---
|
| 261 |
CA_SOL_RE = re.compile(r"\b[1-9A-HJ-NP-Za-km-z]{32,48}\b") # Solana base58 (perkiraan)
|
| 262 |
CA_EVM_RE = re.compile(r"\b0x[a-fA-F0-9]{40}\b") # EVM address
|
| 263 |
+
CA_LABEL_RE = re.compile(r"\bCA\s*[:=]\s*\S+", re.IGNORECASE)
|
| 264 |
|
| 265 |
def _strip_urls_and_mentions(s: str) -> str:
|
| 266 |
s = re.sub(r"https?://\S+", "", s)
|
|
|
|
| 269 |
return re.sub(r"\s+", " ", s).strip()
|
| 270 |
|
| 271 |
def strip_contracts_for_scoring(s: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
s0 = _strip_urls_and_mentions(s)
|
| 273 |
s1 = CA_LABEL_RE.sub(" ", s0)
|
| 274 |
s2 = CA_EVM_RE.sub(" ", s1)
|
|
|
|
| 276 |
return re.sub(r"\s+", " ", s3).strip()
|
| 277 |
|
| 278 |
def score_relevance(text: str, keywords: List[str]) -> float:
|
|
|
|
| 279 |
if not text:
|
| 280 |
return 0.0
|
|
|
|
|
|
|
| 281 |
t = strip_contracts_for_scoring(text).lower()
|
|
|
|
|
|
|
| 282 |
exact_hits = 0
|
| 283 |
for kw in set(keywords):
|
| 284 |
if kw in t or re.search(rf"\b{re.escape(kw)}\b", t):
|
| 285 |
exact_hits += 1
|
| 286 |
exact_score = exact_hits * KEYWORD_WEIGHT
|
|
|
|
|
|
|
| 287 |
tokens = _tokenize_words(t)
|
| 288 |
if not tokens:
|
| 289 |
return exact_score
|
|
|
|
| 297 |
scores.append(best)
|
| 298 |
fuzzy_top3 = sorted(scores, reverse=True)[:3]
|
| 299 |
fuzzy_score = (sum(fuzzy_top3) / max(1, len(fuzzy_top3))) * FUZZ_WEIGHT if fuzzy_top3 else 0.0
|
|
|
|
| 300 |
return exact_score + fuzzy_score
|
| 301 |
|
| 302 |
def hash_for_dedup(text: str, msg) -> str:
|
|
|
|
| 303 |
parts = [text or ""]
|
| 304 |
if getattr(msg, "id", None) is not None:
|
| 305 |
parts.append(str(msg.id))
|
|
|
|
| 315 |
return hashlib.sha1(raw).hexdigest()
|
| 316 |
|
| 317 |
def content_only_hash(text: str) -> str:
|
|
|
|
| 318 |
norm = _strip_urls_and_mentions(normalize_for_filter(text))
|
| 319 |
return hashlib.sha1(norm.encode("utf-8", errors="ignore")).hexdigest()
|
| 320 |
|
|
|
|
| 325 |
def _prune_expired(now: datetime) -> None:
|
| 326 |
window = timedelta(minutes=CLASS_WINDOW_MINUTES)
|
| 327 |
cutoff = now - window
|
|
|
|
| 328 |
for kw, m in list(keyword_group_last_seen.items()):
|
| 329 |
for gk, ts in list(m.items()):
|
| 330 |
if ts < cutoff:
|
| 331 |
del m[gk]
|
| 332 |
if not m:
|
| 333 |
del keyword_group_last_seen[kw]
|
|
|
|
| 334 |
db_prune_expired(cutoff)
|
| 335 |
|
| 336 |
def _classify_by_unique(unique_groups: int) -> Tuple[str, int]:
|
|
|
|
| 345 |
|
| 346 |
|
| 347 |
def update_and_classify(keyword: str, group_key: str, now: Optional[datetime] = None) -> Tuple[str, int, bool]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 348 |
if not now:
|
| 349 |
now = datetime.now(timezone.utc)
|
| 350 |
_prune_expired(now)
|
|
|
|
| 351 |
bucket = keyword_group_last_seen[keyword]
|
| 352 |
is_new_group = group_key not in bucket
|
|
|
|
|
|
|
| 353 |
bucket[group_key] = now
|
| 354 |
db_upsert_kw_seen(keyword, group_key, now)
|
|
|
|
| 355 |
class_label, unique_groups = _classify_by_unique(len(bucket))
|
| 356 |
return class_label, unique_groups, is_new_group
|
| 357 |
|
| 358 |
|
| 359 |
+
# ========= Sentence-level invite filter =========
|
| 360 |
INVITE_PATTERNS = [
|
| 361 |
r"\bjoin\b", r"\bjoin (us|our|channel|group)\b",
|
| 362 |
r"\bdm\b", r"\bdm (me|gw|gue|gua|saya|admin)\b",
|
|
|
|
| 364 |
r"\bvip\b", r"\bpremium\b", r"\bberbayar\b", r"\bpaid\b", r"\bexclusive\b",
|
| 365 |
r"\bwhitelist\b", r"\bprivate( group| channel)?\b", r"\bmembership?\b",
|
| 366 |
r"\bsubscribe\b", r"\blangganan\b",
|
|
|
|
| 367 |
r"\bpromo\b", r"\bpromosi\b", r"\biklan\b",
|
|
|
|
|
|
|
| 368 |
r"(t\.me\/joinchat|t\.me\/\+|telegram\.me\/|discord\.gg\/|wa\.me\/|whatsapp\.com\/)",
|
| 369 |
r"(bit\.ly|tinyurl\.com|linktr\.ee)",
|
| 370 |
]
|
| 371 |
INVITE_REGEXES = [re.compile(p, re.IGNORECASE) for p in INVITE_PATTERNS]
|
| 372 |
|
|
|
|
| 373 |
WHITELIST_STRONG_SIGNAL = [
|
| 374 |
+
r"\$[a-z0-9]{2,10}",
|
| 375 |
r"\b(entry|entries|buy|sell)\b",
|
| 376 |
r"\bsl\b", r"\btp\b", r"\btp\d\b",
|
| 377 |
]
|
|
|
|
| 381 |
t = s.strip()
|
| 382 |
if not t:
|
| 383 |
return False
|
|
|
|
| 384 |
if any(r.search(t) for r in WHITELIST_REGEXES):
|
| 385 |
return False
|
|
|
|
| 386 |
return any(r.search(t) for r in INVITE_REGEXES)
|
| 387 |
|
| 388 |
def filter_invite_sentences(text: str) -> str:
|
|
|
|
| 397 |
|
| 398 |
# ========= Media helpers =========
|
| 399 |
def is_image_message(msg) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
if getattr(msg, "photo", None):
|
| 401 |
return True
|
| 402 |
doc = getattr(msg, "document", None)
|
| 403 |
if not doc:
|
| 404 |
return False
|
| 405 |
mt = (getattr(doc, "mime_type", "") or "").lower()
|
|
|
|
|
|
|
| 406 |
SKIP_STICKERS = True
|
| 407 |
if mt == "image/webp" and SKIP_STICKERS:
|
| 408 |
return False
|
|
|
|
| 409 |
if mt.startswith("image/"):
|
|
|
|
| 410 |
if mt == "image/gif":
|
| 411 |
return ALLOW_GIFS_VIDEOS
|
| 412 |
return True
|
|
|
|
|
|
|
| 413 |
if mt.startswith("video/"):
|
| 414 |
return ALLOW_GIFS_VIDEOS
|
|
|
|
| 415 |
return False
|
| 416 |
|
| 417 |
def media_too_big(msg) -> bool:
|
|
|
|
| 425 |
|
| 426 |
|
| 427 |
# ========= Post-on-threshold with EDIT/REPLY/NEW (persisted) =========
|
|
|
|
| 428 |
TIER_ORDER = {"Low 🌱": 0, "Medium ⚡": 1, "Strong 💪": 2, "FOMO 🔥": 3}
|
| 429 |
|
| 430 |
+
last_posted: Dict[str, Dict[str, object]] = {}
|
|
|
|
| 431 |
last_body: Dict[str, str] = {}
|
| 432 |
last_update_ts: Dict[str, float] = {}
|
| 433 |
|
| 434 |
def format_body_with_spacing(body: str, tier_label: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 435 |
if not body:
|
| 436 |
return f"[{tier_label}]"
|
|
|
|
|
|
|
| 437 |
raw_lines = [ln.strip() for ln in body.splitlines()]
|
| 438 |
+
raw_lines = [ln for ln in raw_lines if ln != ""]
|
|
|
|
| 439 |
formatted: List[str] = []
|
| 440 |
trigger_words = ("stats", "ca", "links", "security", "trade carefully")
|
| 441 |
+
for line in raw_lines:
|
|
|
|
| 442 |
low = line.lower()
|
| 443 |
if any(tw in low for tw in trigger_words):
|
| 444 |
if formatted and formatted[-1] != "":
|
| 445 |
formatted.append("")
|
| 446 |
formatted.append(line)
|
|
|
|
| 447 |
pretty = "\n".join(formatted)
|
| 448 |
pretty = re.sub(r"\n{3,}", "\n\n", pretty).strip()
|
|
|
|
| 449 |
return f"[{tier_label}]\n\n{pretty}"
|
| 450 |
|
| 451 |
async def _send_initial(msg, text: str) -> int:
|
| 452 |
if DRY_RUN:
|
| 453 |
print("[DRY_RUN] send_initial:", text[:140])
|
| 454 |
return -1
|
|
|
|
| 455 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 456 |
try:
|
| 457 |
if getattr(msg, "photo", None):
|
|
|
|
| 489 |
return await _send_initial(msg, text)
|
| 490 |
|
| 491 |
async def post_or_update(keyword: str, body: str, new_tier: str, src_msg, *, update_like: bool = False, allow_tier_upgrade: bool = True) -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 492 |
prev = last_posted.get(keyword)
|
| 493 |
now_ts = datetime.now().timestamp()
|
|
|
|
|
|
|
| 494 |
if not prev:
|
| 495 |
text = format_body_with_spacing(body, new_tier)
|
| 496 |
msg_id = await _send_initial(src_msg, text)
|
|
|
|
| 500 |
if msg_id != -1:
|
| 501 |
db_save_last_posted(keyword, msg_id, new_tier)
|
| 502 |
return
|
|
|
|
|
|
|
| 503 |
if TIER_ORDER.get(new_tier, 0) > TIER_ORDER.get(prev["tier"], 0):
|
| 504 |
if not allow_tier_upgrade:
|
| 505 |
+
text = format_body_with_spacing(body, prev["tier"])
|
| 506 |
if not update_like:
|
| 507 |
return
|
| 508 |
try:
|
|
|
|
| 519 |
except Exception as e:
|
| 520 |
debug_log("Update gagal (no-upgrade path)", str(e))
|
| 521 |
return
|
|
|
|
|
|
|
| 522 |
try:
|
| 523 |
text = format_body_with_spacing(body, new_tier)
|
| 524 |
await client.edit_message(TARGET_CHAT, prev["msg_id"], text)
|
|
|
|
| 540 |
if msg_id != -1:
|
| 541 |
db_save_last_posted(keyword, msg_id, new_tier)
|
| 542 |
return
|
|
|
|
|
|
|
| 543 |
if not update_like:
|
| 544 |
return
|
|
|
|
|
|
|
| 545 |
if body.strip() == last_body.get(keyword, "").strip() and (now_ts - last_update_ts.get(keyword, 0) < UPDATE_COOLDOWN_SEC):
|
| 546 |
return
|
|
|
|
| 547 |
try:
|
| 548 |
text = format_body_with_spacing(body, new_tier)
|
| 549 |
if UPDATE_STRATEGY == "edit":
|
|
|
|
| 573 |
debug_log("Update gagal (strategy)", str(e))
|
| 574 |
|
| 575 |
|
| 576 |
+
# ========= Core actions =========
|
| 577 |
async def send_as_is(msg, text_override: Optional[str] = None) -> None:
|
| 578 |
if DRY_RUN:
|
| 579 |
print("[DRY_RUN] send_as_is:", (text_override or msg.message or "")[:140])
|
|
|
|
| 586 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 587 |
entities = getattr(msg, "entities", None)
|
| 588 |
|
|
|
|
| 589 |
if INCLUDE_MEDIA and is_image_message(msg) and not media_too_big(msg):
|
| 590 |
try:
|
| 591 |
if getattr(msg, "photo", None):
|
|
|
|
| 622 |
await client.send_message(TARGET_CHAT, orig_text, formatting_entities=entities, link_preview=True)
|
| 623 |
|
| 624 |
|
| 625 |
+
# ========= Keyword & entity extraction =========
|
| 626 |
TICKER_CLEAN_RE = re.compile(r"\$[A-Za-z0-9]{2,12}")
|
| 627 |
TICKER_NOISY_RE = re.compile(r"\$[A-Za-z0-9](?:[^A-Za-z0-9]+[A-Za-z0-9]){1,11}")
|
| 628 |
|
| 629 |
def _extract_tickers(text_norm: str) -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
found = []
|
|
|
|
|
|
|
| 631 |
for m in TICKER_CLEAN_RE.finditer(text_norm):
|
| 632 |
found.append(m.group(0).lower())
|
|
|
|
|
|
|
| 633 |
for m in TICKER_NOISY_RE.finditer(text_norm):
|
| 634 |
raw = m.group(0)
|
| 635 |
norm = "$" + re.sub(r"[^A-Za-z0-9]+", "", raw[1:])
|
| 636 |
+
if 3 <= len(norm) <= 13:
|
| 637 |
found.append(norm.lower())
|
| 638 |
+
seen = set(); uniq = []
|
|
|
|
|
|
|
|
|
|
| 639 |
for x in found:
|
| 640 |
if x not in seen:
|
| 641 |
+
uniq.append(x); seen.add(x)
|
|
|
|
| 642 |
return uniq
|
| 643 |
|
| 644 |
def _extract_all_keywords(text_norm: str) -> List[str]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
t = re.sub(r"\$([a-z0-9]+)", r"\1", text_norm, flags=re.I)
|
|
|
|
| 646 |
found = []
|
| 647 |
for kw in THEME_KEYWORDS:
|
| 648 |
if re.search(rf"(^|\W){re.escape(kw)}(\W|$)", t, flags=re.I):
|
| 649 |
found.append(kw.lower())
|
| 650 |
+
found.extend(_extract_tickers(text_norm))
|
| 651 |
+
seen = set(); uniq = []
|
| 652 |
+
for x in found:
|
| 653 |
+
if x not in seen:
|
| 654 |
+
uniq.append(x); seen.add(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 655 |
return uniq
|
| 656 |
|
| 657 |
def _choose_dominant_keyword(text_norm: str, kws: List[str]) -> Optional[str]:
|
|
|
|
| 678 |
(core_ids if role == "core" else sup_ids).add(gk)
|
| 679 |
return len(core_ids), len(sup_ids)
|
| 680 |
|
|
|
|
|
|
|
| 681 |
def extract_entity_key(text: str) -> Optional[str]:
|
| 682 |
t = normalize_for_filter(text)
|
| 683 |
m_evm = CA_EVM_RE.search(t)
|
| 684 |
m_sol = CA_SOL_RE.search(t)
|
| 685 |
if m_evm or m_sol:
|
| 686 |
if m_evm:
|
| 687 |
+
return f"ca:evm:{m_evm.group(0).lower()}"
|
|
|
|
| 688 |
else:
|
| 689 |
+
return f"ca:sol:{m_sol.group(0)}"
|
|
|
|
| 690 |
tickers = _extract_tickers(t.lower())
|
| 691 |
if tickers:
|
| 692 |
return f"ticker:{tickers[0][1:].lower()}"
|
|
|
|
| 695 |
|
| 696 |
# ========= NEW: Filter Phanes di jalur umum =========
|
| 697 |
async def _is_phanes_and_not_leaderboard(msg, text: str) -> bool:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
try:
|
|
|
|
| 699 |
if getattr(msg, "via_bot_id", None) and PHANES_BOT_ID is not None:
|
| 700 |
if int(msg.via_bot_id) == int(PHANES_BOT_ID):
|
| 701 |
return not _is_true_leaderboard(text or "")
|
|
|
|
|
|
|
| 702 |
sender = await msg.get_sender()
|
| 703 |
uname = (getattr(sender, "username", "") or "").lower()
|
| 704 |
if uname == LEADERBOARD_BOT.lstrip("@").lower():
|
|
|
|
| 712 |
orig_text = msg.message or (getattr(msg, "raw_text", None) or "")
|
| 713 |
text_norm = normalize_for_filter(orig_text).lower()
|
| 714 |
|
|
|
|
| 715 |
if await _is_phanes_and_not_leaderboard(msg, orig_text):
|
| 716 |
debug_log("Skip: pesan Phanes non-leaderboard", orig_text)
|
| 717 |
return
|
|
|
|
| 817 |
except Exception as e:
|
| 818 |
print(f"Process error di chat {event.chat_id}: {e}")
|
| 819 |
|
| 820 |
+
# === Leaderboard listener (ketat) ===
|
| 821 |
@client.on(events.NewMessage(chats=(LEADERBOARD_GROUP,)))
|
| 822 |
async def on_leaderboard_reply(event):
|
| 823 |
try:
|
|
|
|
| 826 |
if not text:
|
| 827 |
return
|
| 828 |
|
|
|
|
| 829 |
ok_source = False
|
|
|
|
| 830 |
if getattr(msg, "via_bot_id", None) and PHANES_BOT_ID is not None:
|
| 831 |
if int(msg.via_bot_id) == int(PHANES_BOT_ID):
|
| 832 |
ok_source = True
|
|
|
|
| 843 |
if not ok_source:
|
| 844 |
return
|
| 845 |
|
|
|
|
| 846 |
if not _is_true_leaderboard(text):
|
| 847 |
return
|
| 848 |
|
|
|
|
| 849 |
global _last_lb_hash, _last_lb_ts
|
| 850 |
h = _hash_text_1line(_normalize_lb_for_hash(text))
|
| 851 |
now = asyncio.get_event_loop().time()
|
|
|
|
| 854 |
_last_lb_hash = h
|
| 855 |
_last_lb_ts = now
|
| 856 |
|
|
|
|
| 857 |
await send_as_is(msg)
|
| 858 |
debug_log("Forward Leaderboard", text[:120])
|
| 859 |
except Exception as e:
|
| 860 |
debug_log("LB forward error", str(e))
|
| 861 |
|
| 862 |
|
| 863 |
+
# ========= Scheduler: /lb acak per JAM =========
|
| 864 |
+
async def periodic_lb_trigger():
|
| 865 |
+
"""Kirim /lb ke LEADERBOARD_GROUP tiap interval acak (jam)."""
|
| 866 |
+
try:
|
| 867 |
+
lb_ent = await client.get_entity(LEADERBOARD_GROUP)
|
| 868 |
+
except Exception as e:
|
| 869 |
+
print(f"[LB-SCHED] Gagal resolve LEADERBOARD_GROUP: {e} — retry 5m")
|
| 870 |
+
await asyncio.sleep(300)
|
| 871 |
+
return asyncio.create_task(periodic_lb_trigger())
|
| 872 |
+
|
| 873 |
+
while True:
|
| 874 |
+
try:
|
| 875 |
+
wait_min = max(1, LB_INTERVAL_MIN_HOURS)
|
| 876 |
+
wait_max = max(wait_min + 1, LB_INTERVAL_MAX_HOURS)
|
| 877 |
+
delta_hr = random.randint(wait_min, wait_max)
|
| 878 |
+
jitter_min = random.randint(1, 10)
|
| 879 |
+
sleep_s = delta_hr * 3600 + jitter_min * 60
|
| 880 |
+
|
| 881 |
+
print(f"[LB-SCHED] Next /lb in ~{delta_hr}h {jitter_min}m")
|
| 882 |
+
await asyncio.sleep(sleep_s)
|
| 883 |
+
|
| 884 |
+
await client.send_message(lb_ent, LB_TRIGGER)
|
| 885 |
+
print("[LB-SCHED] /lb sent")
|
| 886 |
+
except FloodWaitError as fw:
|
| 887 |
+
print(f"[LB-SCHED] FloodWait {fw.seconds}s, menunggu...")
|
| 888 |
+
await asyncio.sleep(fw.seconds + 2)
|
| 889 |
+
except Exception as e:
|
| 890 |
+
print(f"[LB-SCHED] Error: {e} — cooldown 5m & lanjut")
|
| 891 |
+
await asyncio.sleep(300)
|
| 892 |
+
|
| 893 |
+
|
| 894 |
# ========= Entry points =========
|
| 895 |
async def _resolve_and_tag_chats(raw_list, role_label: str) -> list:
|
| 896 |
resolved = []
|
|
|
|
| 908 |
await client.start()
|
| 909 |
_init_db()
|
| 910 |
|
|
|
|
| 911 |
global last_posted, keyword_group_last_seen, PHANES_BOT_ID
|
| 912 |
last_posted, keyword_group_last_seen = db_load_state()
|
| 913 |
|
| 914 |
await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 915 |
await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 916 |
|
|
|
|
| 917 |
try:
|
| 918 |
ph_ent = await client.get_entity(LEADERBOARD_BOT)
|
| 919 |
PHANES_BOT_ID = abs(int(ph_ent.id))
|
| 920 |
print(f"Resolved Phanes bot id: {PHANES_BOT_ID}")
|
| 921 |
except Exception as e:
|
| 922 |
+
print(f"Gagal resolve LEADERBOARD_BOT: {e} (fallback pola teks)")
|
| 923 |
|
| 924 |
+
# (opsional) trigger awal
|
| 925 |
try:
|
| 926 |
lb_ent = await client.get_entity(LEADERBOARD_GROUP)
|
| 927 |
await client.send_message(lb_ent, LB_TRIGGER)
|
|
|
|
| 929 |
except Exception as e:
|
| 930 |
print(f"Gagal resolve/trigger leaderboard group: {e}")
|
| 931 |
|
| 932 |
+
# === START SCHEDULER /lb acak per JAM ===
|
| 933 |
+
asyncio.create_task(periodic_lb_trigger())
|
| 934 |
+
|
| 935 |
print("Kurator berjalan (background task). Menunggu pesan baru...")
|
| 936 |
asyncio.create_task(client.run_until_disconnected())
|
| 937 |
|
|
|
|
| 946 |
await _resolve_and_tag_chats(CORE_CHATS, "core")
|
| 947 |
await _resolve_and_tag_chats(SUPPORT_CHATS, "support")
|
| 948 |
|
|
|
|
| 949 |
try:
|
| 950 |
ph_ent = await client.get_entity(LEADERBOARD_BOT)
|
| 951 |
PHANES_BOT_ID = abs(int(ph_ent.id))
|
| 952 |
print(f"Resolved Phanes bot id: {PHANES_BOT_ID}")
|
| 953 |
except Exception as e:
|
| 954 |
+
print(f"Gagal resolve LEADERBOARD_BOT: {e} (fallback pola teks)")
|
| 955 |
|
| 956 |
+
# (opsional) trigger awal
|
| 957 |
try:
|
| 958 |
lb_ent = await client.get_entity(LEADERBOARD_GROUP)
|
| 959 |
await client.send_message(lb_ent, LB_TRIGGER)
|
|
|
|
| 961 |
except Exception as e:
|
| 962 |
print(f"Gagal trigger leaderboard group: {e}")
|
| 963 |
|
| 964 |
+
# === START SCHEDULER /lb acak per JAM ===
|
| 965 |
+
asyncio.create_task(periodic_lb_trigger())
|
| 966 |
+
|
| 967 |
print("Kurator berjalan. Menunggu pesan baru... (Stop dengan interrupt).")
|
| 968 |
await client.run_until_disconnected()
|
| 969 |
|
| 970 |
+
|
| 971 |
if __name__ == "__main__":
|
| 972 |
asyncio.run(app_main())
|