""" Chats.py Contiene la lógica del alimentador. """ import asyncio import glob import os import time import threading from threading import Lock from typing import Optional, List, Dict import shutil import tempfile from unittest import result from dotenv import load_dotenv from telethon import TelegramClient, functions from telethon.errors import MediaEmptyError from telethon.tl.types import ( Document, MessageMediaPhoto, MessageMediaDocument, MessageMediaWebPage, MessageMediaContact, MessageMediaGeo, MessageMediaVenue, MessageMediaGame, MessageMediaInvoice, DocumentAttributeAudio, DocumentAttributeVideo, DocumentAttributeSticker, DocumentAttributeAnimated, DocumentAttributeFilename, Channel, Chat ) from integrations.api_implementations import ( get_api_sync, post_api_sync, create_sender, create_message, act_message_posittion, refresh_telegram_group ) from io import BytesIO class TelegramClientManager: """Responsable únicamente de la conexión, autenticación y ciclo de vida del cliente.""" def __init__(self): load_dotenv() self.api_id = os.getenv("TELEGRAM_API_ID") self.api_hash = os.getenv("TELEGRAM_API_HASH") self.download_path = os.getenv("DOWNLOAD_PATH", "downloads") self.client: Optional[TelegramClient] = None self.is_authorized = False self.session_path = None def get_sessions_file(self): """Busca el archivo de sesión más reciente en telegram_sessions/.""" session_dir = os.path.join(os.getcwd(), "telegram_sessions") session_files = glob.glob(os.path.join(session_dir, "session_*.session")) if not session_files: raise RuntimeError("No se encontró ningún archivo de sesión en telegram_sessions/") return max(session_files, key=os.path.getmtime) def connect(self): """ Utiliza un archivo de sesión existente para autenticar una sesión de Telegram mediante un cliente de Telethon. """ loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) session_path = self.get_sessions_file().replace(".session", "") print(f"[INFO] Usando sesión: {session_path}") self.client = TelegramClient( session_path, self.api_id, self.api_hash, connection_retries=5, retry_delay=5, auto_reconnect=True ) # start() en vez de connect() — inicializa el loop interno correctamente self.client.start() self.is_authorized = self.client.is_user_authorized() if self.is_authorized: print("[INFO] Cliente Telegram conectado y autorizado") else: print("[WARN] Sesión no autorizada") def is_active(self) -> bool: """Verifica si la sesión es válida""" session_filename = self.get_sessions_file() session_src = f"{session_filename}" if not os.path.exists(session_src): raise RuntimeError(f"Session file no encontrado: {session_src}") session_dir = os.path.dirname(session_filename) tmp_session = tempfile.mktemp(prefix="val_session_", dir=session_dir) shutil.copy2(session_src, f"{tmp_session}.session") result = {"is_active": False, "error": None} def _validate_in_thread(): tmp_client = None tmp_loop = asyncio.new_event_loop() asyncio.set_event_loop(tmp_loop) try: tmp_client = TelegramClient( tmp_session, self.api_id, self.api_hash, loop=tmp_loop, ) tmp_loop.run_until_complete(tmp_client.connect()) if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()): result["error"] = "Sesión no autorizada" result["is_active"] = False return result["is_active"] = True print("[is_active] OK — Sesión válida") except Exception as e: result["error"] = str(e) result["is_active"] = False print(f"[is_active] ERROR: {type(e).__name__}: {e}") finally: if tmp_client: try: tmp_loop.run_until_complete(tmp_client.disconnect()) except Exception: pass asyncio.set_event_loop(None) try: tmp_loop.close() except Exception: pass tmp_file = f"{tmp_session}.session" if os.path.exists(tmp_file): try: os.remove(tmp_file) except Exception: pass thread = threading.Thread(target=_validate_in_thread, daemon=True) thread.start() thread.join(timeout=30) if thread.is_alive(): result["error"] = "Timeout al validar la sesión en Telegram" result["is_active"] = False print(result) return result["is_active"] def get_client(self) -> Optional[TelegramClient]: """Devuelve el cliente, reconectando si es necesario.""" if self.client is None: self.connect() return self.client if not self.client.is_connected(): print("[INFO] Cliente desconectado, reconectando...") self.client.connect() time.sleep(1) self.is_authorized = self.client.is_user_authorized() return self.client def disconnect(self): """Desconecta el cliente de forma segura.""" if self.client and self.client.is_connected(): self.client.disconnect() print("[INFO] Cliente Telegram desconectado") def __del__(self): self.disconnect() class DataTransformer: """Métodos estáticos para transformar datos de Telethon a dicts.""" @staticmethod def get_message_attachment_info(message, message_id: int, group_id: int) -> Optional[Dict]: """Extrae y estructura la información del adjunto de un mensaje.""" if not hasattr(message, 'media') or not message.media: return None media = message.media info = { 'message_id': message_id, 'group_id': group_id, 'type': None, 'description': None, 'isDownloaded': False } if isinstance(media, MessageMediaPhoto): info.update({ 'type': 'photo', 'description': f'Photo (ID: {media.photo.id})' }) elif isinstance(media, MessageMediaDocument): doc = media.document if isinstance(doc, Document): attr_map = { DocumentAttributeAudio: lambda a: ('voice', 'Voice message') if getattr(a, 'voice', False) else ('audio', 'Audio'), DocumentAttributeVideo: lambda a: ('video', 'Video'), DocumentAttributeSticker: lambda a: ('sticker', 'Sticker'), DocumentAttributeAnimated: lambda a: ('gif', 'GIF'), } tipo = None descripcion = None for attr in doc.attributes: for attr_class, resolver in attr_map.items(): if isinstance(attr, attr_class): tipo, descripcion = resolver(attr) break if tipo: break if not tipo: filename = next( (a.file_name for a in doc.attributes if isinstance(a, DocumentAttributeFilename)), doc.mime_type ) tipo, descripcion = 'document', f'Document ({filename})' info.update({'type': tipo, 'description': descripcion}) elif isinstance(media, MessageMediaWebPage): url = getattr(media.webpage, 'url', str(media.webpage)) info.update({'type': 'webpage', 'description': f'URL: {url}'}) elif isinstance(media, MessageMediaContact): info.update({ 'type': 'contact', 'description': f'Contact: {media.first_name} {media.last_name}' }) elif isinstance(media, MessageMediaGeo): info.update({ 'type': 'location', 'description': f'Location (lat, long): {media.geo.lat}, {media.geo.long}' }) elif isinstance(media, MessageMediaVenue): info.update({ 'type': 'venue', 'description': f'Place: {media.title} ({media.address})' }) elif isinstance(media, MessageMediaGame): info.update({'type': 'game', 'description': f'Game: {media.game.title}'}) elif isinstance(media, MessageMediaInvoice): info.update({'type': 'invoice', 'description': f'Invoice: {media.title}'}) else: info.update({ 'type': 'unknown', 'description': f'Unknown media type: {type(media)}' }) return info @staticmethod def get_user_info(client: TelegramClient, user_id: int) -> Optional[Dict]: """Obtiene información de un usuario por ID.""" try: # Sin "with client" para no desconectar el cliente compartido user = client.loop.run_until_complete(client.get_entity(user_id)) return { 'id_telegram': user_id, 'type': 'bot' if getattr(user, 'bot', False) else 'user', 'username': getattr(user, 'username', None), 'first_name': getattr(user, 'first_name', None), 'last_name': getattr(user, 'last_name', None), 'phone': getattr(user, 'phone', None) } except Exception as e: print(f"Error getting user info for {user_id}: {e}") return None class TelegramScraper: """Lógica de scraping de canales. No maneja conexión directamente.""" def __init__(self, client_manager: TelegramClientManager): self.manager = client_manager self.channel_identifier = None self.first_id = 0 self.ChannelMessages = [] @property def client(self) -> TelegramClient: """Siempre devuelve un cliente conectado.""" return self.manager.get_client() # --- Helpers de paginación --- def _last_message_id(self, channel_messages) -> int: try: return channel_messages.messages[0].id except IndexError: return 0 def _first_message_id(self, channel_messages) -> int: try: return channel_messages.messages[-1].id except IndexError: return 0 # --- Configuración de canal --- def set_chat_id(self, channel: str): try: if '-' in channel: self.channel_identifier = int(channel) else: self.channel_identifier = channel except Exception as e: raise ValueError(f"Error resolving channel: {e}") def get_channel_info(self) -> Optional[Dict]: """ Obtiene información de la entidad y determina su tipo correctamente. Tipos de entidad en Telethon: - Channel, megagroup=False → canal (público o privado) - Channel, megagroup=True → supergrupo (usa prefijo -100) - Chat → grupo normal (NO usa prefijo -100) """ try: entity = self.client.loop.run_until_complete( self.client.get_entity(self.channel_identifier) ) username = getattr(entity, 'username', None) is_public = bool(username) entity_id = getattr(entity, 'id', None) title = getattr(entity, 'title', '') description = getattr(entity, 'about', '') if isinstance(entity, Channel): if entity.megagroup: # Supergrupo — usa prefijo -100 chat_type = 'public_supergroup' if is_public else 'private_supergroup' else: # Canal puro chat_type = 'public_channel' if is_public else 'private_channel' elif isinstance(entity, Chat): # Grupo normal — NO usa prefijo -100 chat_type = 'group' else: # Usuario u otro tipo — tratarlo como privado chat_type = 'private_channel' return { 'type': chat_type, 'is_group': isinstance(entity, Chat), 'is_megagroup': isinstance(entity, Channel) and entity.megagroup, 'username': username or 'private', 'id': entity_id, 'title': title, 'description': description, } except Exception as e: print(f"Error getting channel info: {e}") if "authorization" in str(e).lower(): self.manager.is_authorized = False return None # --- Obtención de mensajes --- def _get_channel_messages(self, limit: int, id_init: int, min_id: int, max_id: int): """ Obtiene un lote de mensajes de un canal. """ try: return self.client.loop.run_until_complete( self.client(functions.messages.GetHistoryRequest( peer=self.channel_identifier, limit=limit, offset_date=0, add_offset=0, offset_id=id_init, min_id=min_id, max_id=max_id, hash=0 )) ) except Exception as e: print(f"Error fetching messages: {e}") if "authorization" in str(e).lower(): self.manager.is_authorized = False return None def set_channel_messages(self): """Carga todos los mensajes del canal en self.ChannelMessages.""" channel_messages_array = [] first_messages = self._get_channel_messages(1, 0, 0, 0) self.first_id = self._first_message_id(first_messages) last_id = self._last_message_id(first_messages) while last_id != 0: messages = self._get_channel_messages(100, last_id, 0, 0) channel_messages_array.append(messages) last_id = self._first_message_id(messages) time.sleep(0.5) print(f"Finished, we made {len(channel_messages_array)} cycles") self.ChannelMessages = channel_messages_array def refresh_chat(self): """Carga solo los mensajes nuevos desde first_id.""" channel_messages_array = [] first_messages = self._get_channel_messages(1, 0, 0, 0) last_id = self._last_message_id(first_messages) while last_id > self.first_id: messages = self._get_channel_messages(100, last_id, self.first_id, 0) channel_messages_array.append(messages) print(f"last id: {last_id}, new last_id: {self._first_message_id(messages)}") last_id = self._first_message_id(messages) time.sleep(0.5) print(f"Finished, we made {len(channel_messages_array)} cycles") self.ChannelMessages = channel_messages_array # --- Procesamiento y publicación --- def _preload_senders(self) -> Dict: """Precarga y publica los senders únicos encontrados en los mensajes.""" unique_sender_ids = { msg.sender_id for channel_message in self.ChannelMessages for msg in channel_message.messages if hasattr(msg, 'sender_id') and msg.sender_id } print(f"🔍 {len(unique_sender_ids)} usuarios únicos encontrados") users_cache = {} for sender_id in unique_sender_ids: user_info = DataTransformer.get_user_info(self.client, sender_id) if user_info: users_cache[sender_id] = user_info try: exists = get_api_sync(f"senders/{sender_id}") except Exception: exists = None if exists is None: create_sender(user_info) return users_cache def get_and_post_message_info(self) -> List[Dict]: """Procesa y publica todos los mensajes cargados en self.ChannelMessages.""" users_cache = self._preload_senders() msgs = [] for channel_message in self.ChannelMessages: for msg in channel_message.messages: sender_id = getattr(msg, 'sender_id', None) message = { 'id_mess_g': msg.id, 'content': getattr(msg, 'message', ''), 'date': msg.date.isoformat(), 'attachments': DataTransformer.get_message_attachment_info( msg, msg.id, self.channel_identifier ), 'sender_id': sender_id, 'group_id': self.channel_identifier } try: create_message(message) msgs.append(message) except Exception as e: print(f"Error creating message: {str(e)}") if msgs: act_message_posittion(self.channel_identifier, msgs[-1]["id_mess_g"]) else: print("The channel has not pending messages") return msgs def get_message_info(self) -> List[Dict]: """Devuelve los mensajes estructurados sin publicarlos en la API para debug.""" users_cache = self._preload_senders() msgs = [] for channel_message in self.ChannelMessages: for msg in channel_message.messages: sender_id = getattr(msg, 'sender_id', None) msgs.append({ 'id_mess_g': msg.id, 'content': getattr(msg, 'message', ''), 'date': msg.date.isoformat(), 'attachments': DataTransformer.get_message_attachment_info( msg, msg.id, self.channel_identifier ), 'sender': users_cache.get(sender_id, {}) }) return msgs # --- Gestión de canales --- def add_chat(self, channel: str) -> Dict: """ Agrega o actualiza un canal/grupo en la API. Construye el ID negativo correcto según el tipo: - Canal y Supergrupo → -100{id} - Grupo normal → -{id} (sin el 100) """ self.set_chat_id(channel) info = self.get_channel_info() if not info: raise ValueError(f"Channel/group {channel} not found or inaccessible") entity_id = info['id'] # Construir el ID negativo correcto según el tipo if info.get('is_group'): # Grupo normal: ID negativo simple if not str(entity_id).startswith('-'): entity_id = f"-{entity_id}" else: # Canal o supergrupo: prefijo -100 if not str(entity_id).startswith('-100'): entity_id = f"-100{entity_id}" info['id'] = str(entity_id) new_group = { "name": info.get('username') if info.get('username') != 'private' else info.get('title', f"group_{entity_id}"), "description": info.get('description') or info.get('title', ''), "type": info['type'] } print(f"[add_chat] id={entity_id}, type={info['type']}, group_data={new_group}") created_group = refresh_telegram_group(entity_id, new_group) if created_group: return { "status": "success", "group": created_group, "message": "Group updated successfully" } return {"status": "error", "message": f"Error updating {channel}"} def add_chats(self): """Actualiza la información de todos los grupos almacenados.""" chats = get_api_sync("groups") print(chats) for chat in chats: self.add_chat(str(chat['id_telegram'])) def feeder_loop(self): """Ciclo principal de alimentación de datos.""" print("Init feeder loop") inicio = time.time() groups = get_api_sync("groups") for group in groups: channel_id = group["id_telegram"] self.set_chat_id(str(channel_id)) info = self.get_channel_info() print(f"Channel info: {info}") self.first_id = group["message_position"] self.refresh_chat() self.get_and_post_message_info() fin = time.time() print(f"Tiempo empleado: {fin - inicio:.2f} segundos") # --- Descarga de adjuntos --- def download_attachment_to_buffer(self, group_id: int, message_id: int) -> BytesIO: """ Descarga el media de un mensaje de Telegram a un BytesIO. Crea una sesión Telethon temporal sobre una copia del session file, en un thread dedicado para no interferir con el loop del scraper. """ session_filename = self.manager.get_sessions_file() session_src = f"{session_filename}" if not os.path.exists(session_src): raise RuntimeError(f"Session file no encontrado: {session_src}") session_dir = os.path.dirname(session_filename) tmp_session = tempfile.mktemp(prefix="dl_session_", dir=session_dir) shutil.copy2(session_src, f"{tmp_session}.session") print(f"[DL] Session copiada -> {tmp_session}.session") result = {"buffer": None, "error": None} def _download_in_thread(): tmp_client = None tmp_loop = asyncio.new_event_loop() asyncio.set_event_loop(tmp_loop) try: tmp_client = TelegramClient( tmp_session, self.manager.api_id, self.manager.api_hash, loop=tmp_loop, ) # connect() en lugar de start() — no dispara prompts interactivos tmp_loop.run_until_complete(tmp_client.connect()) if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()): raise RuntimeError("La sesión copiada no está autorizada") print(f"[DL] Cliente temporal conectado y autorizado") message = tmp_loop.run_until_complete( tmp_client.get_messages(group_id, ids=message_id) ) if message is None: raise ValueError(f"Mensaje {message_id} no encontrado en grupo {group_id}") if not message.media: raise ValueError(f"El mensaje {message_id} no tiene media adjunta") print(f"[DL] Mensaje encontrado, media={type(message.media).__name__}") buffer = BytesIO() tmp_loop.run_until_complete( tmp_client.download_media(message, file=buffer) ) buffer.seek(0) print(f"[DL] Descarga OK, tamaño={buffer.getbuffer().nbytes} bytes") result["buffer"] = buffer except Exception as e: print(f"[DL] ERROR en thread: {type(e).__name__}: {e}") result["error"] = e finally: if tmp_client is not None: try: if not tmp_loop.is_closed(): tmp_loop.run_until_complete(tmp_client.disconnect()) except Exception as e: print(f"[DL] ERROR desconectando cliente temporal: {type(e).__name__}: {e}") try: asyncio.set_event_loop(None) except Exception: pass try: tmp_loop.close() except Exception as e: print(f"[DL] ERROR cerrando loop temporal: {type(e).__name__}: {e}") tmp_session_file = f"{tmp_session}.session" if os.path.exists(tmp_session_file): try: os.remove(tmp_session_file) print(f"[DL] Session temporal eliminada: {tmp_session_file}") except Exception as e: print(f"[DL] No se pudo eliminar session temporal: {e}") thread = threading.Thread(target=_download_in_thread, daemon=True) thread.start() thread.join(timeout=600) if thread.is_alive(): raise RuntimeError("Timeout: la descarga tardó más de 10 minutos") if result["error"] is not None: raise result["error"] if result["buffer"] is None: raise RuntimeError("La descarga no produjo contenido") return result["buffer"] def validate_chat(self, channel_id: int) -> Dict: """ Valida que un canal/grupo exista y sea accesible en Telegram, y retorna su información sin guardarlo en la DB. Usa sesión temporal (mismo patrón que download_attachment_to_buffer) para no interferir con el loop del scraper. Returns: dict con 'valid': True/False y, si válido, los campos de info. Raises: RuntimeError si no se puede obtener el session file. """ session_filename = self.session_path session_src = f"{session_filename}" if not os.path.exists(session_src): raise RuntimeError(f"Session file no encontrado: {session_src}") session_dir = os.path.dirname(session_filename) tmp_session = tempfile.mktemp(prefix="val_session_", dir=session_dir) shutil.copy2(session_src, f"{tmp_session}.session") result = {"valid": False, "error": None, "info": None} def _validate_in_thread(): tmp_client = None tmp_loop = asyncio.new_event_loop() asyncio.set_event_loop(tmp_loop) try: tmp_client = TelegramClient( tmp_session, self.manager.api_id, self.manager.api_hash, loop=tmp_loop, ) tmp_loop.run_until_complete(tmp_client.connect()) if not tmp_loop.run_until_complete(tmp_client.is_user_authorized()): result["error"] = "Sesión no autorizada" return entity = tmp_loop.run_until_complete( tmp_client.get_entity(channel_id) ) username = getattr(entity, 'username', None) is_public = bool(username) entity_id = getattr(entity, 'id', None) title = getattr(entity, 'title', '') description = getattr(entity, 'about', '') if isinstance(entity, Channel): if entity.megagroup: chat_type = 'public_supergroup' if is_public else 'private_supergroup' else: chat_type = 'public_channel' if is_public else 'private_channel' # Construir ID con prefijo -100 full_id = int(f"-100{entity_id}") if entity_id else channel_id elif isinstance(entity, Chat): chat_type = 'group' full_id = -abs(entity_id) if entity_id else channel_id else: result["error"] = f"Tipo de entidad no soportado: {type(entity).__name__}" return result["valid"] = True result["info"] = { "id_telegram": full_id, "type": chat_type, "username": username or title or str(full_id), "title": title, "description": description, } print(f"[validate_chat] OK — id={full_id}, type={chat_type}, title={title}") except Exception as e: result["error"] = str(e) print(f"[validate_chat] ERROR: {type(e).__name__}: {e}") finally: if tmp_client: try: tmp_loop.run_until_complete(tmp_client.disconnect()) except Exception: pass asyncio.set_event_loop(None) try: tmp_loop.close() except Exception: pass tmp_file = f"{tmp_session}.session" if os.path.exists(tmp_file): try: os.remove(tmp_file) except Exception: pass thread = threading.Thread(target=_validate_in_thread, daemon=True) thread.start() thread.join(timeout=30) if thread.is_alive(): result["error"] = "Timeout al validar el canal en Telegram" return result class TelegramChatSingleton: """Singleton thread-safe. Expone el scraper y el cliente.""" _manager: Optional[TelegramClientManager] = None _scraper: Optional[TelegramScraper] = None _lock = Lock() def __new__(cls): raise RuntimeError("Use get_scraper() or get_client() classmethods") @classmethod def get_tmp_scraper(cls) -> TelegramScraper: """Crea y devuelve una instancia temporal de TelegramScraper con su propio manager.""" tmp_manager = TelegramClientManager() if tmp_manager.is_active(): return TelegramScraper(tmp_manager) else: raise RuntimeError("No se pudo crear el scraper temporal: sesión no válida") @classmethod def get_scraper(cls) -> TelegramScraper: """Devuelve la instancia única del scraper, creándola si no existe.""" if cls._scraper is None: with cls._lock: if cls._scraper is None: cls._manager = TelegramClientManager() cls._manager.connect() cls._scraper = TelegramScraper(cls._manager) return cls._scraper @classmethod def get_client(cls) -> Optional[TelegramClient]: """Devuelve el cliente activo si existe. NO intenta conectar.""" if cls._manager is None: return None return cls._manager.client # acceso directo, sin llamar get_client() del manager @classmethod def cleanup(cls): """Limpia la instancia (útil para tests o reinicio manual).""" with cls._lock: if cls._manager: cls._manager.disconnect() cls._manager = None cls._scraper = None