First commit

This commit is contained in:
unknown
2026-06-09 21:18:13 -03:00
commit 5bff6b938b
66 changed files with 10922 additions and 0 deletions
+193
View File
@@ -0,0 +1,193 @@
"""
api_implementations.py
Contiene métodos utilizados por el alimentador a lo largo de "chats.py" para manipular los endpoints del sistema.
"""
from dotenv import load_dotenv
import os
import requests
from fastapi import APIRouter, HTTPException, status
from typing import Dict, Optional
from auth import get_internal_headers
##TODO EL QUILOMBO NUEVO##
API_BASE_URL = os.getenv('API_URL')
TIMEOUT = 5.0
router = APIRouter()
load_dotenv()
def patch_api_sync(validated_data: Dict, endpoint: str) -> Optional[Dict]:
"""Función síncrona para hacer PUT a la API"""
try:
response = requests.patch(
f"{API_BASE_URL}/{endpoint}/",
json=validated_data,
headers=get_internal_headers(),
timeout=TIMEOUT
)
if response.status_code == status.HTTP_400_BAD_REQUEST:
error_detail = response.json().get("detail", "")
return error_detail
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=f"Connection failed: {str(e)}"
)
def post_api_sync(validated_data: Dict, endpoint: str) -> Optional[Dict]:
"""Función síncrona para hacer POST a la API"""
try:
response = requests.post(
f"{API_BASE_URL}/{endpoint}/",
json=validated_data,
headers=get_internal_headers(),
timeout=TIMEOUT
)
if response.status_code == status.HTTP_400_BAD_REQUEST:
error_detail = response.json().get("detail", "")
if "already exists" in error_detail:
return None
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=f"Connection failed: {str(e)}"
)
def get_api_sync(endpoint: str):
"""Función síncrona para hacer GET a la API"""
try:
response = requests.get(
f"{API_BASE_URL}/{endpoint}/",
headers=get_internal_headers()
)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as e:
raise HTTPException(
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
detail=f"Connection failed: {str(e)}"
)
def create_telegram_group(group_data: Dict) -> Optional[Dict]:
"""Crea un grupo en la API remota (versión síncrona)"""
try:
validated_data = {
"id_telegram": group_data["id_telegram"],
"name": group_data["name"],
"description": group_data["description"],
"type": group_data["type"],
"message_position": 0
}
return post_api_sync(validated_data, "groups")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
def refresh_telegram_group(channel: int, group_data: Dict) -> Optional[Dict]:
"""Crea un grupo en la API remota (versión síncrona)"""
try:
validated_data = {
"name": str(group_data["name"]),
"description": str(group_data["description"]),
"type": str(group_data["type"])
}
print(f"La data que vamos intentar cargar es {validated_data}")
return patch_api_sync(validated_data, f"groups/{channel}")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
def act_message_posittion(group_id: int, message_id: int):
try:
validated_data = {
"message_position": message_id
}
return patch_api_sync(validated_data, f"groups/{group_id}/update-position")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
def create_sender(sender: Dict) -> Optional[Dict]:
"""Crea un mensaje en la API remota (versión síncrona)"""
try:
sender_data = {
"id_telegram": sender["id_telegram"],
"type": sender["type"],
"username": sender["username"],
"first_name": str(sender["first_name"]),
"last_name": str(sender["last_name"]),
"phone": str(sender["phone"])
}
print(sender_data)
return post_api_sync(sender_data, "senders")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
def create_message(message: Dict) -> Optional[Dict]:
"""Crea un mensaje en la API remota (versión síncrona)"""
try:
message_data = {
"id_mess_g": message["id_mess_g"],
"content": str(message["content"]),
"date": message["date"],
"sender_id": message["sender_id"],
"group_id": message["group_id"]
}
response = post_api_sync(message_data, "messages")
if response is None:
return None
if message['attachments'] is not None:
try:
create_attachment(message['attachments'])
except Exception as e:
print(f"Failed to create attachment for message {response['id_mess_g']}: {str(e)}")
return response
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
def create_attachment(attachment: Dict) -> Optional[Dict]:
"""Crea un mensaje en la API remota (versión síncrona)"""
try:
attachment_data = {
"message_id": attachment["message_id"],
"type": attachment["type"],
"description": str(attachment["description"]),
"isDownloaded": "false",
"group_id": attachment["group_id"]
}
print(f"Trying to load attachment data: {attachment_data}")
return post_api_sync(attachment_data, "attachments")
except Exception as e:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail=f"Invalid group data: {str(e)}"
)
+833
View File
@@ -0,0 +1,833 @@
"""
Chats.py
Contiene la lógica del alimentador.
"""
import asyncio
import glob
import os
import time
import threading
from threading import Lock
from typing import Optional, List, Dict
import shutil
import tempfile
from unittest import result
from dotenv import load_dotenv
from telethon import TelegramClient, functions
from telethon.errors import MediaEmptyError
from telethon.tl.types import (
Document, MessageMediaPhoto, MessageMediaDocument, MessageMediaWebPage,
MessageMediaContact, MessageMediaGeo, MessageMediaVenue, MessageMediaGame,
MessageMediaInvoice, DocumentAttributeAudio, DocumentAttributeVideo,
DocumentAttributeSticker, DocumentAttributeAnimated, DocumentAttributeFilename,
Channel, Chat
)
from integrations.api_implementations import (
get_api_sync, post_api_sync, create_sender, create_message,
act_message_posittion, refresh_telegram_group
)
from io import BytesIO
class TelegramClientManager:
"""Responsable únicamente de la conexión, autenticación y ciclo de vida del cliente."""
def __init__(self):
load_dotenv()
self.api_id = os.getenv("TELEGRAM_API_ID")
self.api_hash = os.getenv("TELEGRAM_API_HASH")
self.download_path = os.getenv("DOWNLOAD_PATH", "downloads")
self.client: Optional[TelegramClient] = None
self.is_authorized = False
self.session_path = None
def get_sessions_file(self):
"""Busca el archivo de sesión más reciente en telegram_sessions/."""
session_dir = os.path.join(os.getcwd(), "telegram_sessions")
session_files = glob.glob(os.path.join(session_dir, "session_*.session"))
if not session_files:
raise RuntimeError("No se encontró ningún archivo de sesión en telegram_sessions/")
return max(session_files, key=os.path.getmtime)
def connect(self):
"""
Utiliza un archivo de sesión existente para autenticar una sesión de Telegram mediante un cliente de Telethon.
"""
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
session_path = self.get_sessions_file().replace(".session", "")
print(f"[INFO] Usando sesión: {session_path}")
self.client = TelegramClient(
session_path,
self.api_id,
self.api_hash,
connection_retries=5,
retry_delay=5,
auto_reconnect=True
)
# start() en vez de connect() — inicializa el loop interno correctamente
self.client.start()
self.is_authorized = self.client.is_user_authorized()
if self.is_authorized:
print("[INFO] Cliente Telegram conectado y autorizado")
else:
print("[WARN] Sesión no autorizada")
def is_active(self) -> bool:
"""Verifica si la sesión es válida"""
session_filename = self.get_sessions_file()
session_src = f"{session_filename}"
if not os.path.exists(session_src):
raise RuntimeError(f"Session file no encontrado: {session_src}")
session_dir = os.path.dirname(session_filename)
tmp_session = tempfile.mktemp(prefix="val_session_", dir=session_dir)
shutil.copy2(session_src, f"{tmp_session}.session")
result = {"is_active": False, "error": None}
def _validate_in_thread():
tmp_client = None
tmp_loop = asyncio.new_event_loop()
asyncio.set_event_loop(tmp_loop)
try:
tmp_client = TelegramClient(
tmp_session,
self.api_id,
self.api_hash,
loop=tmp_loop,
)
tmp_loop.run_until_complete(tmp_client.connect())
if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()):
result["error"] = "Sesión no autorizada"
result["is_active"] = False
return
result["is_active"] = True
print("[is_active] OK — Sesión válida")
except Exception as e:
result["error"] = str(e)
result["is_active"] = False
print(f"[is_active] ERROR: {type(e).__name__}: {e}")
finally:
if tmp_client:
try:
tmp_loop.run_until_complete(tmp_client.disconnect())
except Exception:
pass
asyncio.set_event_loop(None)
try:
tmp_loop.close()
except Exception:
pass
tmp_file = f"{tmp_session}.session"
if os.path.exists(tmp_file):
try:
os.remove(tmp_file)
except Exception:
pass
thread = threading.Thread(target=_validate_in_thread, daemon=True)
thread.start()
thread.join(timeout=30)
if thread.is_alive():
result["error"] = "Timeout al validar la sesión en Telegram"
result["is_active"] = False
print(result)
return result["is_active"]
def get_client(self) -> Optional[TelegramClient]:
"""Devuelve el cliente, reconectando si es necesario."""
if self.client is None:
self.connect()
return self.client
if not self.client.is_connected():
print("[INFO] Cliente desconectado, reconectando...")
self.client.connect()
time.sleep(1)
self.is_authorized = self.client.is_user_authorized()
return self.client
def disconnect(self):
"""Desconecta el cliente de forma segura."""
if self.client and self.client.is_connected():
self.client.disconnect()
print("[INFO] Cliente Telegram desconectado")
def __del__(self):
self.disconnect()
class DataTransformer:
"""Métodos estáticos para transformar datos de Telethon a dicts."""
@staticmethod
def get_message_attachment_info(message, message_id: int, group_id: int) -> Optional[Dict]:
"""Extrae y estructura la información del adjunto de un mensaje."""
if not hasattr(message, 'media') or not message.media:
return None
media = message.media
info = {
'message_id': message_id,
'group_id': group_id,
'type': None,
'description': None,
'isDownloaded': False
}
if isinstance(media, MessageMediaPhoto):
info.update({
'type': 'photo',
'description': f'Photo (ID: {media.photo.id})'
})
elif isinstance(media, MessageMediaDocument):
doc = media.document
if isinstance(doc, Document):
attr_map = {
DocumentAttributeAudio: lambda a: ('voice', 'Voice message') if getattr(a, 'voice', False) else ('audio', 'Audio'),
DocumentAttributeVideo: lambda a: ('video', 'Video'),
DocumentAttributeSticker: lambda a: ('sticker', 'Sticker'),
DocumentAttributeAnimated: lambda a: ('gif', 'GIF'),
}
tipo = None
descripcion = None
for attr in doc.attributes:
for attr_class, resolver in attr_map.items():
if isinstance(attr, attr_class):
tipo, descripcion = resolver(attr)
break
if tipo:
break
if not tipo:
filename = next(
(a.file_name for a in doc.attributes if isinstance(a, DocumentAttributeFilename)),
doc.mime_type
)
tipo, descripcion = 'document', f'Document ({filename})'
info.update({'type': tipo, 'description': descripcion})
elif isinstance(media, MessageMediaWebPage):
url = getattr(media.webpage, 'url', str(media.webpage))
info.update({'type': 'webpage', 'description': f'URL: {url}'})
elif isinstance(media, MessageMediaContact):
info.update({
'type': 'contact',
'description': f'Contact: {media.first_name} {media.last_name}'
})
elif isinstance(media, MessageMediaGeo):
info.update({
'type': 'location',
'description': f'Location (lat, long): {media.geo.lat}, {media.geo.long}'
})
elif isinstance(media, MessageMediaVenue):
info.update({
'type': 'venue',
'description': f'Place: {media.title} ({media.address})'
})
elif isinstance(media, MessageMediaGame):
info.update({'type': 'game', 'description': f'Game: {media.game.title}'})
elif isinstance(media, MessageMediaInvoice):
info.update({'type': 'invoice', 'description': f'Invoice: {media.title}'})
else:
info.update({
'type': 'unknown',
'description': f'Unknown media type: {type(media)}'
})
return info
@staticmethod
def get_user_info(client: TelegramClient, user_id: int) -> Optional[Dict]:
"""Obtiene información de un usuario por ID."""
try:
# Sin "with client" para no desconectar el cliente compartido
user = client.loop.run_until_complete(client.get_entity(user_id))
return {
'id_telegram': user_id,
'type': 'bot' if getattr(user, 'bot', False) else 'user',
'username': getattr(user, 'username', None),
'first_name': getattr(user, 'first_name', None),
'last_name': getattr(user, 'last_name', None),
'phone': getattr(user, 'phone', None)
}
except Exception as e:
print(f"Error getting user info for {user_id}: {e}")
return None
class TelegramScraper:
"""Lógica de scraping de canales. No maneja conexión directamente."""
def __init__(self, client_manager: TelegramClientManager):
self.manager = client_manager
self.channel_identifier = None
self.first_id = 0
self.ChannelMessages = []
@property
def client(self) -> TelegramClient:
"""Siempre devuelve un cliente conectado."""
return self.manager.get_client()
# --- Helpers de paginación ---
def _last_message_id(self, channel_messages) -> int:
try:
return channel_messages.messages[0].id
except IndexError:
return 0
def _first_message_id(self, channel_messages) -> int:
try:
return channel_messages.messages[-1].id
except IndexError:
return 0
# --- Configuración de canal ---
def set_chat_id(self, channel: str):
try:
if '-' in channel:
self.channel_identifier = int(channel)
else:
self.channel_identifier = channel
except Exception as e:
raise ValueError(f"Error resolving channel: {e}")
def get_channel_info(self) -> Optional[Dict]:
"""
Obtiene información de la entidad y determina su tipo correctamente.
Tipos de entidad en Telethon:
- Channel, megagroup=False → canal (público o privado)
- Channel, megagroup=True → supergrupo (usa prefijo -100)
- Chat → grupo normal (NO usa prefijo -100)
"""
try:
entity = self.client.loop.run_until_complete(
self.client.get_entity(self.channel_identifier)
)
username = getattr(entity, 'username', None)
is_public = bool(username)
entity_id = getattr(entity, 'id', None)
title = getattr(entity, 'title', '')
description = getattr(entity, 'about', '')
if isinstance(entity, Channel):
if entity.megagroup:
# Supergrupo — usa prefijo -100
chat_type = 'public_supergroup' if is_public else 'private_supergroup'
else:
# Canal puro
chat_type = 'public_channel' if is_public else 'private_channel'
elif isinstance(entity, Chat):
# Grupo normal — NO usa prefijo -100
chat_type = 'group'
else:
# Usuario u otro tipo — tratarlo como privado
chat_type = 'private_channel'
return {
'type': chat_type,
'is_group': isinstance(entity, Chat),
'is_megagroup': isinstance(entity, Channel) and entity.megagroup,
'username': username or 'private',
'id': entity_id,
'title': title,
'description': description,
}
except Exception as e:
print(f"Error getting channel info: {e}")
if "authorization" in str(e).lower():
self.manager.is_authorized = False
return None
# --- Obtención de mensajes ---
def _get_channel_messages(self, limit: int, id_init: int, min_id: int, max_id: int):
"""
Obtiene un lote de mensajes de un canal.
"""
try:
return self.client.loop.run_until_complete(
self.client(functions.messages.GetHistoryRequest(
peer=self.channel_identifier,
limit=limit,
offset_date=0,
add_offset=0,
offset_id=id_init,
min_id=min_id,
max_id=max_id,
hash=0
))
)
except Exception as e:
print(f"Error fetching messages: {e}")
if "authorization" in str(e).lower():
self.manager.is_authorized = False
return None
def set_channel_messages(self):
"""Carga todos los mensajes del canal en self.ChannelMessages."""
channel_messages_array = []
first_messages = self._get_channel_messages(1, 0, 0, 0)
self.first_id = self._first_message_id(first_messages)
last_id = self._last_message_id(first_messages)
while last_id != 0:
messages = self._get_channel_messages(100, last_id, 0, 0)
channel_messages_array.append(messages)
last_id = self._first_message_id(messages)
time.sleep(0.5)
print(f"Finished, we made {len(channel_messages_array)} cycles")
self.ChannelMessages = channel_messages_array
def refresh_chat(self):
"""Carga solo los mensajes nuevos desde first_id."""
channel_messages_array = []
first_messages = self._get_channel_messages(1, 0, 0, 0)
last_id = self._last_message_id(first_messages)
while last_id > self.first_id:
messages = self._get_channel_messages(100, last_id, self.first_id, 0)
channel_messages_array.append(messages)
print(f"last id: {last_id}, new last_id: {self._first_message_id(messages)}")
last_id = self._first_message_id(messages)
time.sleep(0.5)
print(f"Finished, we made {len(channel_messages_array)} cycles")
self.ChannelMessages = channel_messages_array
# --- Procesamiento y publicación ---
def _preload_senders(self) -> Dict:
"""Precarga y publica los senders únicos encontrados en los mensajes."""
unique_sender_ids = {
msg.sender_id
for channel_message in self.ChannelMessages
for msg in channel_message.messages
if hasattr(msg, 'sender_id') and msg.sender_id
}
print(f"🔍 {len(unique_sender_ids)} usuarios únicos encontrados")
users_cache = {}
for sender_id in unique_sender_ids:
user_info = DataTransformer.get_user_info(self.client, sender_id)
if user_info:
users_cache[sender_id] = user_info
try:
exists = get_api_sync(f"senders/{sender_id}")
except Exception:
exists = None
if exists is None:
create_sender(user_info)
return users_cache
def get_and_post_message_info(self) -> List[Dict]:
"""Procesa y publica todos los mensajes cargados en self.ChannelMessages."""
users_cache = self._preload_senders()
msgs = []
for channel_message in self.ChannelMessages:
for msg in channel_message.messages:
sender_id = getattr(msg, 'sender_id', None)
message = {
'id_mess_g': msg.id,
'content': getattr(msg, 'message', ''),
'date': msg.date.isoformat(),
'attachments': DataTransformer.get_message_attachment_info(
msg, msg.id, self.channel_identifier
),
'sender_id': sender_id,
'group_id': self.channel_identifier
}
try:
create_message(message)
msgs.append(message)
except Exception as e:
print(f"Error creating message: {str(e)}")
if msgs:
act_message_posittion(self.channel_identifier, msgs[-1]["id_mess_g"])
else:
print("The channel has not pending messages")
return msgs
def get_message_info(self) -> List[Dict]:
"""Devuelve los mensajes estructurados sin publicarlos en la API para debug."""
users_cache = self._preload_senders()
msgs = []
for channel_message in self.ChannelMessages:
for msg in channel_message.messages:
sender_id = getattr(msg, 'sender_id', None)
msgs.append({
'id_mess_g': msg.id,
'content': getattr(msg, 'message', ''),
'date': msg.date.isoformat(),
'attachments': DataTransformer.get_message_attachment_info(
msg, msg.id, self.channel_identifier
),
'sender': users_cache.get(sender_id, {})
})
return msgs
# --- Gestión de canales ---
def add_chat(self, channel: str) -> Dict:
"""
Agrega o actualiza un canal/grupo en la API.
Construye el ID negativo correcto según el tipo:
- Canal y Supergrupo → -100{id}
- Grupo normal → -{id} (sin el 100)
"""
self.set_chat_id(channel)
info = self.get_channel_info()
if not info:
raise ValueError(f"Channel/group {channel} not found or inaccessible")
entity_id = info['id']
# Construir el ID negativo correcto según el tipo
if info.get('is_group'):
# Grupo normal: ID negativo simple
if not str(entity_id).startswith('-'):
entity_id = f"-{entity_id}"
else:
# Canal o supergrupo: prefijo -100
if not str(entity_id).startswith('-100'):
entity_id = f"-100{entity_id}"
info['id'] = str(entity_id)
new_group = {
"name": info.get('username') if info.get('username') != 'private'
else info.get('title', f"group_{entity_id}"),
"description": info.get('description') or info.get('title', ''),
"type": info['type']
}
print(f"[add_chat] id={entity_id}, type={info['type']}, group_data={new_group}")
created_group = refresh_telegram_group(entity_id, new_group)
if created_group:
return {
"status": "success",
"group": created_group,
"message": "Group updated successfully"
}
return {"status": "error", "message": f"Error updating {channel}"}
def add_chats(self):
"""Actualiza la información de todos los grupos almacenados."""
chats = get_api_sync("groups")
print(chats)
for chat in chats:
self.add_chat(str(chat['id_telegram']))
def feeder_loop(self):
"""Ciclo principal de alimentación de datos."""
print("Init feeder loop")
inicio = time.time()
groups = get_api_sync("groups")
for group in groups:
channel_id = group["id_telegram"]
self.set_chat_id(str(channel_id))
info = self.get_channel_info()
print(f"Channel info: {info}")
self.first_id = group["message_position"]
self.refresh_chat()
self.get_and_post_message_info()
fin = time.time()
print(f"Tiempo empleado: {fin - inicio:.2f} segundos")
# --- Descarga de adjuntos ---
def download_attachment_to_buffer(self, group_id: int, message_id: int) -> BytesIO:
"""
Descarga el media de un mensaje de Telegram a un BytesIO.
Crea una sesión Telethon temporal sobre una copia del session file,
en un thread dedicado para no interferir con el loop del scraper.
"""
session_filename = self.manager.get_sessions_file()
session_src = f"{session_filename}"
if not os.path.exists(session_src):
raise RuntimeError(f"Session file no encontrado: {session_src}")
session_dir = os.path.dirname(session_filename)
tmp_session = tempfile.mktemp(prefix="dl_session_", dir=session_dir)
shutil.copy2(session_src, f"{tmp_session}.session")
print(f"[DL] Session copiada -> {tmp_session}.session")
result = {"buffer": None, "error": None}
def _download_in_thread():
tmp_client = None
tmp_loop = asyncio.new_event_loop()
asyncio.set_event_loop(tmp_loop)
try:
tmp_client = TelegramClient(
tmp_session,
self.manager.api_id,
self.manager.api_hash,
loop=tmp_loop,
)
# connect() en lugar de start() — no dispara prompts interactivos
tmp_loop.run_until_complete(tmp_client.connect())
if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()):
raise RuntimeError("La sesión copiada no está autorizada")
print(f"[DL] Cliente temporal conectado y autorizado")
message = tmp_loop.run_until_complete(
tmp_client.get_messages(group_id, ids=message_id)
)
if message is None:
raise ValueError(f"Mensaje {message_id} no encontrado en grupo {group_id}")
if not message.media:
raise ValueError(f"El mensaje {message_id} no tiene media adjunta")
print(f"[DL] Mensaje encontrado, media={type(message.media).__name__}")
buffer = BytesIO()
tmp_loop.run_until_complete(
tmp_client.download_media(message, file=buffer)
)
buffer.seek(0)
print(f"[DL] Descarga OK, tamaño={buffer.getbuffer().nbytes} bytes")
result["buffer"] = buffer
except Exception as e:
print(f"[DL] ERROR en thread: {type(e).__name__}: {e}")
result["error"] = e
finally:
if tmp_client is not None:
try:
if not tmp_loop.is_closed():
tmp_loop.run_until_complete(tmp_client.disconnect())
except Exception as e:
print(f"[DL] ERROR desconectando cliente temporal: {type(e).__name__}: {e}")
try:
asyncio.set_event_loop(None)
except Exception:
pass
try:
tmp_loop.close()
except Exception as e:
print(f"[DL] ERROR cerrando loop temporal: {type(e).__name__}: {e}")
tmp_session_file = f"{tmp_session}.session"
if os.path.exists(tmp_session_file):
try:
os.remove(tmp_session_file)
print(f"[DL] Session temporal eliminada: {tmp_session_file}")
except Exception as e:
print(f"[DL] No se pudo eliminar session temporal: {e}")
thread = threading.Thread(target=_download_in_thread, daemon=True)
thread.start()
thread.join(timeout=600)
if thread.is_alive():
raise RuntimeError("Timeout: la descarga tardó más de 10 minutos")
if result["error"] is not None:
raise result["error"]
if result["buffer"] is None:
raise RuntimeError("La descarga no produjo contenido")
return result["buffer"]
def validate_chat(self, channel_id: int) -> Dict:
"""
Valida que un canal/grupo exista y sea accesible en Telegram,
y retorna su información sin guardarlo en la DB.
Usa sesión temporal (mismo patrón que download_attachment_to_buffer)
para no interferir con el loop del scraper.
Returns:
dict con 'valid': True/False y, si válido, los campos de info.
Raises:
RuntimeError si no se puede obtener el session file.
"""
session_filename = self.session_path
session_src = f"{session_filename}"
if not os.path.exists(session_src):
raise RuntimeError(f"Session file no encontrado: {session_src}")
session_dir = os.path.dirname(session_filename)
tmp_session = tempfile.mktemp(prefix="val_session_", dir=session_dir)
shutil.copy2(session_src, f"{tmp_session}.session")
result = {"valid": False, "error": None, "info": None}
def _validate_in_thread():
tmp_client = None
tmp_loop = asyncio.new_event_loop()
asyncio.set_event_loop(tmp_loop)
try:
tmp_client = TelegramClient(
tmp_session,
self.manager.api_id,
self.manager.api_hash,
loop=tmp_loop,
)
tmp_loop.run_until_complete(tmp_client.connect())
if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()):
result["error"] = "Sesión no autorizada"
return
entity = tmp_loop.run_until_complete(
tmp_client.get_entity(channel_id)
)
username = getattr(entity, 'username', None)
is_public = bool(username)
entity_id = getattr(entity, 'id', None)
title = getattr(entity, 'title', '')
description = getattr(entity, 'about', '')
if isinstance(entity, Channel):
if entity.megagroup:
chat_type = 'public_supergroup' if is_public else 'private_supergroup'
else:
chat_type = 'public_channel' if is_public else 'private_channel'
# Construir ID con prefijo -100
full_id = int(f"-100{entity_id}") if entity_id else channel_id
elif isinstance(entity, Chat):
chat_type = 'group'
full_id = -abs(entity_id) if entity_id else channel_id
else:
result["error"] = f"Tipo de entidad no soportado: {type(entity).__name__}"
return
result["valid"] = True
result["info"] = {
"id_telegram": full_id,
"type": chat_type,
"username": username or title or str(full_id),
"title": title,
"description": description,
}
print(f"[validate_chat] OK — id={full_id}, type={chat_type}, title={title}")
except Exception as e:
result["error"] = str(e)
print(f"[validate_chat] ERROR: {type(e).__name__}: {e}")
finally:
if tmp_client:
try:
tmp_loop.run_until_complete(tmp_client.disconnect())
except Exception:
pass
asyncio.set_event_loop(None)
try:
tmp_loop.close()
except Exception:
pass
tmp_file = f"{tmp_session}.session"
if os.path.exists(tmp_file):
try:
os.remove(tmp_file)
except Exception:
pass
thread = threading.Thread(target=_validate_in_thread, daemon=True)
thread.start()
thread.join(timeout=30)
if thread.is_alive():
result["error"] = "Timeout al validar el canal en Telegram"
return result
class TelegramChatSingleton:
"""Singleton thread-safe. Expone el scraper y el cliente."""
_manager: Optional[TelegramClientManager] = None
_scraper: Optional[TelegramScraper] = None
_lock = Lock()
def __new__(cls):
raise RuntimeError("Use get_scraper() or get_client() classmethods")
@classmethod
def get_tmp_scraper(cls) -> TelegramScraper:
"""Crea y devuelve una instancia temporal de TelegramScraper con su propio manager."""
tmp_manager = TelegramClientManager()
if tmp_manager.is_active():
return TelegramScraper(tmp_manager)
else:
raise RuntimeError("No se pudo crear el scraper temporal: sesión no válida")
@classmethod
def get_scraper(cls) -> TelegramScraper:
"""Devuelve la instancia única del scraper, creándola si no existe."""
if cls._scraper is None:
with cls._lock:
if cls._scraper is None:
cls._manager = TelegramClientManager()
cls._manager.connect()
cls._scraper = TelegramScraper(cls._manager)
return cls._scraper
@classmethod
def get_client(cls) -> Optional[TelegramClient]:
"""Devuelve el cliente activo si existe. NO intenta conectar."""
if cls._manager is None:
return None
return cls._manager.client # acceso directo, sin llamar get_client() del manager
@classmethod
def cleanup(cls):
"""Limpia la instancia (útil para tests o reinicio manual)."""
with cls._lock:
if cls._manager:
cls._manager.disconnect()
cls._manager = None
cls._scraper = None
View File