tg2pdf/main.py

107 lines
4.5 KiB
Python
Raw Normal View History

2025-01-16 16:43:35 +02:00
from pyrogram import Client
from pyrogram import filters
import asyncio
import subprocess
from pathlib import Path
import shutil
2025-01-16 16:43:35 +02:00
# api_id =
#api_hash = ""
#bot_token = ""
# app = Client(
# "2pdf",
# api_id=api_id, api_hash=api_hash,
# bot_token=bot_token
# )
app = Client("2pdf")
def validDocument(mime_type):
valid_mime_types = [
"application/msword", # .doc
"application/vnd.openxmlformats-officedocument.wordprocessingml.document", # .docx
"application/vnd.ms-excel", # .xls
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # .xlsx
"application/vnd.oasis.opendocument.text", # .odt
"application/rtf", # .rtf
"text/plain", # .txt
"application/xhtml+xml", # .html
"application/epub+zip", # .epub
"application/vnd.oasis.opendocument.spreadsheet", # .ods
"application/vnd.oasis.opendocument.presentation", # .odp
"application/vnd.oasis.opendocument.graphics", # .odg
"application/vnd.visio", # .vsd
"image/svg+xml", # .svg
"application/vnd.ms-powerpoint", # .ppt
"application/vnd.openxmlformats-officedocument.presentationml.presentation", # .pptx
"application/x-abiword", # .abw
"application/vnd.ms-works", # .wps
"application/x-tex", # .tex
"text/csv", # .csv
"text/tab-separated-values", # .tsv
]
return mime_type in valid_mime_types
2025-01-16 16:43:35 +02:00
async def downloadDocument(file_id, file_name):
filepath = await (app.download_media(file_id, file_name=file_name))
2025-01-16 16:43:35 +02:00
return filepath
2025-01-21 16:16:10 +02:00
def unoCheck(): # Check if unoserver even exists
global unoserverPath
unoserverPath = shutil.which("unoservers")
if unoserverPath:
return unoserverPath
else:
return None
def unoStart():
unoserverPath = unoCheck()
if unoserverPath:
2025-01-21 16:18:20 +02:00
retval = 1
while retval != 0:
subprocess.Popen(args=[unoserverPath, "--daemon"])
2025-01-21 16:16:10 +02:00
else:
return
async def unoConvertDocument(sourceDocumentPath, sourceDocumentName):
unoconvertPath = shutil.which("unoconvert")
outputDir = "output"
outputDocumentPath = Path(outputDir) / Path(f"{sourceDocumentName}").with_suffix('.pdf')
Path(outputDir).mkdir(parents=True, exist_ok=True) # Ensure directory exists
subprocess.run(args=[unoconvertPath, "--convert-to", "pdf", sourceDocumentPath, outputDocumentPath])
return outputDocumentPath
async def convertDocumentOneShot(sourceDocumentPath, sourceDocumentName):
libreofficePath = shutil.which("libreoffice") # Find full path to the libreoffice binary
outputDir = "output"
Path(outputDir).mkdir(parents=True, exist_ok=True) # Ensure directory exists
subprocess.run(args=[libreofficePath, "--headless", "--convert-to", "pdf", "--outdir", outputDir, sourceDocumentPath], check=True)
outputDocumentPath = Path(outputDir) / Path(f"{sourceDocumentName}").with_suffix('.pdf')
subprocess.run(args=["rm", sourceDocumentPath], check=True)
2025-01-16 16:43:35 +02:00
return outputDocumentPath
async def uploadDocument(outputDocumentPath, chat, message):
await app.send_document(document=outputDocumentPath, chat_id=chat, reply_to_message_id=message)
subprocess.run(args=["rm", outputDocumentPath])
2025-01-16 16:43:35 +02:00
@app.on_message(filters.document) # Get all of the messages that have files in them
async def documentFetcher(client, message):
mime_type = message.document.mime_type
if validDocument(mime_type=mime_type) == True:
sourceDocumentPath = await downloadDocument(message.document.file_id, message.document.file_name)
2025-01-21 16:16:10 +02:00
if unoserverPath:
outputDocumentPath = await unoConvertDocument(sourceDocumentPath, message.document.file_name)
else:
outputDocumentPath = await convertDocumentOneShot(sourceDocumentPath, message.document.file_name)
2025-01-16 16:43:35 +02:00
await uploadDocument(outputDocumentPath, message.chat.id, message.id)
else:
return # Hopefully message will get ignored and no futher resources will be used
2025-01-21 16:16:10 +02:00
2025-01-21 17:02:53 +02:00
@app.on_message(filters.command("start"))
async def startResponder(client, message):
startMessage = "This bot converts any document libreoffice supports into a pdf\nSource code: https://git.darkn.space/neko/tg2pdf\nYou can add it into your group and it should detect the appropriate files automatically\nList of supported file formats:\n.doc .docx .doc\n.xls .xlsx\n.odt .ods\n.rtf .txt\n.html .csv .tsv\n.epub\n.odp .odp\n.odg\n.vsd .svg\n.ppt .pptx\n.abw .wps\n.tex"
2025-01-21 17:02:53 +02:00
await message.reply(startMessage, reply_to_message_id=message.id)
2025-01-21 16:16:10 +02:00
unoStart()
2025-01-16 16:43:35 +02:00
app.run()