tg2pdf/main.py

from pyrogram import Client
from pyrogram import filters
import asyncio
import subprocess
from pathlib import Path
import shutil
import time

import config # Import settings from config.py
api_id = config.API_ID
api_hash = config.API_HASH
bot_token = config.BOT_TOKEN
app = Client(
     "2pdf",  in_memory=True,
     api_id=api_id, api_hash=api_hash,
     bot_token=bot_token
)

def validDocument(mime_type):
    valid_mime_types = [
        "application/msword", # .doc
        "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
        "application/vnd.ms-excel", # .xls
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .xlsx
        "application/vnd.oasis.opendocument.text", # .odt
        "application/rtf", # .rtf
        "text/plain", # .txt
        "application/xhtml+xml", # .html
        "application/epub+zip", # .epub
        "application/vnd.oasis.opendocument.spreadsheet", # .ods
        "application/vnd.oasis.opendocument.presentation", # .odp
        "application/vnd.oasis.opendocument.graphics", # .odg
        "application/vnd.visio", # .vsd
        "image/svg+xml", # .svg
        "application/vnd.ms-powerpoint", # .ppt
        "application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx
        "application/x-abiword", # .abw
        "application/vnd.ms-works", # .wps
        "application/x-tex", # .tex
        "text/csv", # .csv
        "text/tab-separated-values", # .tsv
    ]
    return mime_type in valid_mime_types

async def downloadDocument(file_id, sourceDocumentTempName):
    filepath = await (app.download_media(file_id, file_name=sourceDocumentTempName))
    return filepath

def unoCheck(): # Check if unoserver even exists
    global unoserverPath
    unoserverPath = shutil.which("unoserver")
    if unoserverPath:
        return unoserverPath
    else:
        return None

def unoStart():
    unoserverPath = unoCheck()
    subprocess.Popen(args=[unoserverPath, "--daemon"])

async def unoConvertDocument(sourceDocumentPath, sourceDocumentTempName):
    unoconvertPath = shutil.which("unoconvert")
    outputDir = "output"
    outputDocumentPath = Path(outputDir) / Path(sourceDocumentTempName)
    Path(outputDir).mkdir(parents=True, exist_ok=True) # Ensure directory exists
    subprocess.run(args=[unoconvertPath, "--convert-to", "pdf", sourceDocumentPath, outputDocumentPath])
    subprocess.run(args=["rm", sourceDocumentPath], check=True)
    return outputDocumentPath

async def getUploadDocumentName(outputDocumentPath, sourceDocumentName):
    uploadDocumentName = Path(f"{sourceDocumentName}").with_suffix('.pdf').as_posix()
    return uploadDocumentName

async def uploadDocument(outputDocumentPath, chat, message, name):
    await app.send_document(document=outputDocumentPath, chat_id=chat, reply_to_message_id=message, file_name=name)
    subprocess.run(args=["rm", outputDocumentPath])

@app.on_message(filters.document) # Get all of the messages that have files in them
async def documentFetcher(client, message):
    mime_type = message.document.mime_type
    if validDocument(mime_type=mime_type) == True:
        sourceDocumentPath = await downloadDocument(message.document.file_id, message.document.file_unique_id)
        outputDocumentPath = await unoConvertDocument(sourceDocumentPath, message.document.file_unique_id)
        uploadDocumentName = await getUploadDocumentName(outputDocumentPath, message.document.file_name)
        await uploadDocument(outputDocumentPath, message.chat.id, message.id, uploadDocumentName)
    else:
       return # Hopefully message will get ignored and no futher resources will be used

@app.on_message(filters.command("start"))
async def startResponder(client, message):
    startMessage = "This bot converts any document libreoffice supports into a pdf\nSource code: https://git.darkn.space/neko/tg2pdf\nYou can add it into your group and it should detect the appropriate files automatically\nList of supported file formats:\n.doc .docx\n.xls .xlsx\n.odt .ods\n.rtf .txt\n.html .csv .tsv\n.epub\n.odp .odp\n.odg\n.vsd .svg\n.ppt .pptx\n.abw .wps\n.tex"
    await message.reply(startMessage, reply_to_message_id=message.id)


unoStart() # Attempt to start unoserver
app.run()