tg2pdf/main.py

96 lines
4.2 KiB
Python

from pyrogram import Client
from pyrogram import filters
import asyncio
import subprocess
from pathlib import Path
import shutil
import time
import config # Import settings from config.py
api_id = config.API_ID
api_hash = config.API_HASH
bot_token = config.BOT_TOKEN
app = Client(
"2pdf", in_memory=True,
api_id=api_id, api_hash=api_hash,
bot_token=bot_token
)
def validDocument(mime_type):
valid_mime_types = [
"application/msword", # .doc
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
"application/vnd.ms-excel", # .xls
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .xlsx
"application/vnd.oasis.opendocument.text", # .odt
"application/rtf", # .rtf
"text/plain", # .txt
"application/xhtml+xml", # .html
"application/epub+zip", # .epub
"application/vnd.oasis.opendocument.spreadsheet", # .ods
"application/vnd.oasis.opendocument.presentation", # .odp
"application/vnd.oasis.opendocument.graphics", # .odg
"application/vnd.visio", # .vsd
"image/svg+xml", # .svg
"application/vnd.ms-powerpoint", # .ppt
"application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx
"application/x-abiword", # .abw
"application/vnd.ms-works", # .wps
"application/x-tex", # .tex
"text/csv", # .csv
"text/tab-separated-values", # .tsv
]
return mime_type in valid_mime_types
async def downloadDocument(file_id, sourceDocumentTempName):
filepath = await (app.download_media(file_id, file_name=sourceDocumentTempName))
return filepath
def unoCheck(): # Check if unoserver even exists
global unoserverPath
unoserverPath = shutil.which("unoserver")
if unoserverPath:
return unoserverPath
else:
return None
def unoStart():
unoserverPath = unoCheck()
subprocess.Popen(args=[unoserverPath, "--daemon"])
async def unoConvertDocument(sourceDocumentPath, sourceDocumentTempName):
unoconvertPath = shutil.which("unoconvert")
outputDir = "output"
outputDocumentPath = Path(outputDir) / Path(sourceDocumentTempName)
Path(outputDir).mkdir(parents=True, exist_ok=True) # Ensure directory exists
subprocess.run(args=[unoconvertPath, "--convert-to", "pdf", sourceDocumentPath, outputDocumentPath])
subprocess.run(args=["rm", sourceDocumentPath], check=True)
return outputDocumentPath
async def getUploadDocumentName(outputDocumentPath, sourceDocumentName):
uploadDocumentName = Path(f"{sourceDocumentName}").with_suffix('.pdf').as_posix()
return uploadDocumentName
async def uploadDocument(outputDocumentPath, chat, message, name):
await app.send_document(document=outputDocumentPath, chat_id=chat, reply_to_message_id=message, file_name=name)
subprocess.run(args=["rm", outputDocumentPath])
@app.on_message(filters.document) # Get all of the messages that have files in them
async def documentFetcher(client, message):
mime_type = message.document.mime_type
if validDocument(mime_type=mime_type) == True:
sourceDocumentPath = await downloadDocument(message.document.file_id, message.document.file_unique_id)
outputDocumentPath = await unoConvertDocument(sourceDocumentPath, message.document.file_unique_id)
uploadDocumentName = await getUploadDocumentName(outputDocumentPath, message.document.file_name)
await uploadDocument(outputDocumentPath, message.chat.id, message.id, uploadDocumentName)
else:
return # Hopefully message will get ignored and no futher resources will be used
@app.on_message(filters.command("start"))
async def startResponder(client, message):
startMessage = "This bot converts any document libreoffice supports into a pdf\nSource code: https://git.darkn.space/neko/tg2pdf\nYou can add it into your group and it should detect the appropriate files automatically\nList of supported file formats:\n.doc .docx\n.xls .xlsx\n.odt .ods\n.rtf .txt\n.html .csv .tsv\n.epub\n.odp .odp\n.odg\n.vsd .svg\n.ppt .pptx\n.abw .wps\n.tex"
await message.reply(startMessage, reply_to_message_id=message.id)
unoStart() # Attempt to start unoserver
app.run()