Files
hinpdof/app.py
Hadley Rich 6c6c837301
Some checks failed
Build and Publish Docker Image / build (push) Failing after 17s
Initial
2024-11-11 10:26:47 +13:00

103 lines
2.6 KiB
Python

import io
import logging
import re
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse, StreamingResponse
from pydantic import BaseModel, Field
from weasyprint import HTML
# Initialize logging
logging.basicConfig(level=logging.INFO)
class PdfRequest(BaseModel):
html: str = Field(..., min_length=1, description="HTML content to convert to PDF")
filename: str | None = None
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Compile the regular expression once
FILENAME_SANITIZE_REGEX = re.compile(r"[^a-zA-Z0-9_\-]")
def sanitize_filename(filename: str) -> str:
"""
Sanitize the filename by replacing invalid characters with underscores.
Args:
filename (str): The original filename.
Returns:
str: The sanitized filename.
"""
return FILENAME_SANITIZE_REGEX.sub("_", filename)
async def pdf_generator(byte_string: bytes):
"""
Generator to yield PDF content in chunks.
Args:
byte_string (bytes): The PDF content as bytes.
Yields:
bytes: Chunks of the PDF content.
"""
byte_stream = io.BytesIO(byte_string)
chunk = byte_stream.read(4096)
while chunk:
yield chunk
chunk = byte_stream.read(4096)
@app.post("/pdf")
async def pdf(body: PdfRequest):
"""
Endpoint to convert HTML content to a PDF file.
Args:
body (PdfRequest): Request body containing HTML content and an optional filename.
Returns:
StreamingResponse: A streaming response with the generated PDF file.
"""
logging.info("Received request to generate PDF")
try:
byte_string = HTML(string=body.html).write_pdf()
except Exception as e:
logging.error(f"Error generating PDF: {e}")
raise HTTPException(status_code=400, detail="Invalid HTML input")
filename = sanitize_filename(body.filename.strip() if body.filename else "hinpdof")
headers = {
"Content-Type": "application/pdf",
"Content-Disposition": f'attachment; filename="{filename}.pdf"',
}
logging.info(f"PDF generated successfully: {filename}.pdf")
return StreamingResponse(pdf_generator(byte_string), headers=headers)
@app.get("/health")
async def health_check():
"""
Endpoint to check the health status of the application.
Returns:
JSONResponse: A JSON response with the status of the application.
"""
return JSONResponse(content={"status": "ok"})