This commit is contained in:
62
.dockerignore
Normal file
62
.dockerignore
Normal file
@@ -0,0 +1,62 @@
|
||||
# Ignore Python bytecode files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
# Ignore distribution / packaging files
|
||||
build/
|
||||
dist/
|
||||
*.egg-info/
|
||||
.eggs/
|
||||
wheels/
|
||||
|
||||
# Ignore virtual environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Ignore test and coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Ignore Jupyter Notebook checkpoints
|
||||
.ipynb_checkpoints
|
||||
|
||||
# Ignore IPython profile directories
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# Ignore pyenv files
|
||||
.python-version
|
||||
|
||||
# Ignore Poetry lock file
|
||||
poetry.lock
|
||||
|
||||
# Ignore VS Code settings
|
||||
.vscode/
|
||||
|
||||
# Ignore Dockerfile and Dockerignore itself
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
|
||||
# Ignore Git files
|
||||
.git/
|
||||
.gitignore
|
||||
|
||||
# Ignore logs and temporary files
|
||||
*.log
|
||||
*.tmp
|
||||
34
.github/workflows/build.yaml
vendored
Normal file
34
.github/workflows/build.yaml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
name: Build and Publish Docker Image
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Login to Docker registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: git.nice.net.nz
|
||||
username: hads
|
||||
password: ${{ secrets.PAT }}
|
||||
|
||||
- name: Build and push Docker image
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: git.nice.net.nz/hads/hinpdof:latest
|
||||
97
.gitignore
vendored
Normal file
97
.gitignore
vendored
Normal file
@@ -0,0 +1,97 @@
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# poetry
|
||||
poetry.lock
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# VS Code
|
||||
.vscode/
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/python
|
||||
25
.pre-commit-config.yaml
Normal file
25
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,25 @@
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v5.0.0
|
||||
hooks:
|
||||
- id: check-toml
|
||||
- id: check-json
|
||||
- id: check-merge-conflict
|
||||
- id: debug-statements
|
||||
- id: end-of-file-fixer
|
||||
- id: trailing-whitespace
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.7.3
|
||||
hooks:
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
- id: ruff-format
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: tests
|
||||
name: run tests
|
||||
require_serial: true
|
||||
entry: pytest -v tests
|
||||
language: system
|
||||
types: [python]
|
||||
stages: [pre-push]
|
||||
57
Dockerfile
Normal file
57
Dockerfile
Normal file
@@ -0,0 +1,57 @@
|
||||
FROM python:3.13-slim AS base
|
||||
|
||||
FROM base AS builder
|
||||
|
||||
ARG DEBIAN_FRONTEND="noninteractive"
|
||||
|
||||
RUN pip install poetry==1.8.3
|
||||
|
||||
ENV PYTHONUNBUFFERED=1 \
|
||||
PYTHONDONTWRITEBYTECODE=1 \
|
||||
PYTHONPATH=/app \
|
||||
POETRY_NO_INTERACTION=1 \
|
||||
POETRY_VIRTUALENVS_IN_PROJECT=1 \
|
||||
POETRY_VIRTUALENVS_CREATE=1 \
|
||||
POETRY_CACHE_DIR=/tmp/poetry_cache
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY pyproject.toml poetry.lock ./
|
||||
|
||||
RUN poetry install --without dev --no-root --compile
|
||||
|
||||
FROM base
|
||||
|
||||
RUN apt update \
|
||||
&& apt dist-upgrade -y \
|
||||
&& apt install -y --no-install-recommends \
|
||||
libpango-1.0-0 \
|
||||
libpangoft2-1.0-0 \
|
||||
libharfbuzz-subset0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN adduser --system --uid 1000 --group app
|
||||
USER app
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV VIRTUAL_ENV=/app/.venv \
|
||||
PATH="/app/.venv/bin:$PATH"
|
||||
|
||||
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
|
||||
|
||||
COPY app.py ./
|
||||
|
||||
ARG PORT=8080
|
||||
ENV PORT=$PORT
|
||||
EXPOSE $PORT
|
||||
|
||||
CMD ["sh", "-c", \
|
||||
"exec gunicorn --preload \
|
||||
--bind 0.0.0.0:${PORT} \
|
||||
--workers 2 \
|
||||
--threads 2 \
|
||||
--worker-tmp-dir /dev/shm \
|
||||
--access-logfile - \
|
||||
--forwarded-allow-ips '*' \
|
||||
app:app"]
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2023 Bear Su
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
50
README.md
Normal file
50
README.md
Normal file
@@ -0,0 +1,50 @@
|
||||
# hinpdof
|
||||
|
||||
`hinpdof` is a FastAPI-based app that converts HTML content to PDF using WeasyPrint.
|
||||
|
||||
## Features
|
||||
|
||||
- Convert HTML content to PDF
|
||||
- Customizable PDF filenames
|
||||
- Health check endpoint
|
||||
|
||||
## Requirements
|
||||
|
||||
- Python 3.12+
|
||||
- FastAPI
|
||||
- WeasyPrint
|
||||
- Uvicorn
|
||||
|
||||
## Installation
|
||||
|
||||
1. Clone the repository:
|
||||
|
||||
```sh
|
||||
git clone https://git.nice.net.nz/hinpdof.git
|
||||
cd hinpdof
|
||||
```
|
||||
|
||||
2. Install dependencies using Poetry:
|
||||
|
||||
```sh
|
||||
poetry install
|
||||
```
|
||||
|
||||
3. Run the application:
|
||||
|
||||
```sh
|
||||
poetry run uvicorn app:app --reload
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Convert HTML to PDF
|
||||
|
||||
Send a POST request to `/pdf` with the following JSON body:
|
||||
|
||||
```json
|
||||
{
|
||||
"html": "<h1>Hello, World!</h1>",
|
||||
"filename": "testfile"
|
||||
}
|
||||
```
|
||||
102
app.py
Normal file
102
app.py
Normal file
@@ -0,0 +1,102 @@
|
||||
import io
|
||||
import logging
|
||||
import re
|
||||
|
||||
from fastapi import FastAPI, HTTPException
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import JSONResponse, StreamingResponse
|
||||
from pydantic import BaseModel, Field
|
||||
from weasyprint import HTML
|
||||
|
||||
# Initialize logging
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
|
||||
|
||||
class PdfRequest(BaseModel):
|
||||
html: str = Field(..., min_length=1, description="HTML content to convert to PDF")
|
||||
filename: str | None = None
|
||||
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
|
||||
# Compile the regular expression once
|
||||
FILENAME_SANITIZE_REGEX = re.compile(r"[^a-zA-Z0-9_\-]")
|
||||
|
||||
|
||||
def sanitize_filename(filename: str) -> str:
|
||||
"""
|
||||
Sanitize the filename by replacing invalid characters with underscores.
|
||||
|
||||
Args:
|
||||
filename (str): The original filename.
|
||||
|
||||
Returns:
|
||||
str: The sanitized filename.
|
||||
"""
|
||||
return FILENAME_SANITIZE_REGEX.sub("_", filename)
|
||||
|
||||
|
||||
async def pdf_generator(byte_string: bytes):
|
||||
"""
|
||||
Generator to yield PDF content in chunks.
|
||||
|
||||
Args:
|
||||
byte_string (bytes): The PDF content as bytes.
|
||||
|
||||
Yields:
|
||||
bytes: Chunks of the PDF content.
|
||||
"""
|
||||
byte_stream = io.BytesIO(byte_string)
|
||||
chunk = byte_stream.read(4096)
|
||||
while chunk:
|
||||
yield chunk
|
||||
chunk = byte_stream.read(4096)
|
||||
|
||||
|
||||
@app.post("/pdf")
|
||||
async def pdf(body: PdfRequest):
|
||||
"""
|
||||
Endpoint to convert HTML content to a PDF file.
|
||||
|
||||
Args:
|
||||
body (PdfRequest): Request body containing HTML content and an optional filename.
|
||||
|
||||
Returns:
|
||||
StreamingResponse: A streaming response with the generated PDF file.
|
||||
"""
|
||||
logging.info("Received request to generate PDF")
|
||||
try:
|
||||
byte_string = HTML(string=body.html).write_pdf()
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating PDF: {e}")
|
||||
raise HTTPException(status_code=400, detail="Invalid HTML input")
|
||||
|
||||
filename = sanitize_filename(body.filename.strip() if body.filename else "hinpdof")
|
||||
|
||||
headers = {
|
||||
"Content-Type": "application/pdf",
|
||||
"Content-Disposition": f'attachment; filename="{filename}.pdf"',
|
||||
}
|
||||
|
||||
logging.info(f"PDF generated successfully: {filename}.pdf")
|
||||
return StreamingResponse(pdf_generator(byte_string), headers=headers)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health_check():
|
||||
"""
|
||||
Endpoint to check the health status of the application.
|
||||
|
||||
Returns:
|
||||
JSONResponse: A JSON response with the status of the application.
|
||||
"""
|
||||
return JSONResponse(content={"status": "ok"})
|
||||
31
pyproject.toml
Normal file
31
pyproject.toml
Normal file
@@ -0,0 +1,31 @@
|
||||
[tool.poetry]
|
||||
name = "hinpdof"
|
||||
version = "0.1.0"
|
||||
license = "MIT"
|
||||
description = "Uses FastAPI to expose a REST API which takes HTML as input and converts to PDF output using Weasyprint"
|
||||
authors = ["Hadley Rich <hads@nice.net.nz>"]
|
||||
readme = "README.md"
|
||||
package-mode = false
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.12"
|
||||
fastapi = "^0.115.4"
|
||||
weasyprint = "^63.0"
|
||||
uvicorn = "^0.32.0"
|
||||
|
||||
[tool.poetry.group.dev.dependencies]
|
||||
pytest = "^8.3.3"
|
||||
httpx = "^0.27.2"
|
||||
pytest-cov = "^4.0.0"
|
||||
ruff = "^0.0.289"
|
||||
pre-commit = "^3.4.0"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
[tool.ruff]
|
||||
line-length = 88
|
||||
target-version = "py312"
|
||||
select = ["E", "F", "W", "C", "N", "B"]
|
||||
ignore = ["E501"]
|
||||
82
test_app.py
Normal file
82
test_app.py
Normal file
@@ -0,0 +1,82 @@
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from app import app
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def client():
|
||||
return TestClient(app)
|
||||
|
||||
|
||||
def test_health_check(client):
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"status": "ok"}
|
||||
|
||||
|
||||
def test_pdf_generation(client):
|
||||
request_data = {"html": "<h1>Hello, World!</h1>", "filename": "testfile"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert response.headers["Content-Type"] == "application/pdf"
|
||||
assert (
|
||||
response.headers["Content-Disposition"] == 'attachment; filename="testfile.pdf"'
|
||||
)
|
||||
|
||||
|
||||
def test_pdf_generation_default_filename(client):
|
||||
request_data = {"html": "<h1>Hello, World!</h1>", "filename": None}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert response.headers["Content-Type"] == "application/pdf"
|
||||
assert (
|
||||
response.headers["Content-Disposition"] == 'attachment; filename="hinpdof.pdf"'
|
||||
)
|
||||
|
||||
|
||||
def test_pdf_generation_invalid_html(client):
|
||||
request_data = {"html": "", "filename": "testfile"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 422 # Unprocessable Entity due to invalid input
|
||||
|
||||
|
||||
def test_pdf_generation_missing_html(client):
|
||||
request_data = {"filename": "testfile"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert (
|
||||
response.status_code == 422
|
||||
) # Unprocessable Entity due to missing required field
|
||||
|
||||
|
||||
def test_pdf_generation_large_html(client):
|
||||
large_html = "<h1>" + "Hello, World! " * 1000 + "</h1>"
|
||||
request_data = {"html": large_html, "filename": "largefile"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert response.headers["Content-Type"] == "application/pdf"
|
||||
assert (
|
||||
response.headers["Content-Disposition"]
|
||||
== 'attachment; filename="largefile.pdf"'
|
||||
)
|
||||
|
||||
|
||||
def test_pdf_generation_invalid_filename(client):
|
||||
request_data = {"html": "<h1>Hello, World!</h1>", "filename": "invalid/filename"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert response.headers["Content-Type"] == "application/pdf"
|
||||
assert (
|
||||
response.headers["Content-Disposition"]
|
||||
== 'attachment; filename="invalid_filename.pdf"'
|
||||
)
|
||||
|
||||
|
||||
def test_pdf_generation_missing_filename(client):
|
||||
request_data = {"html": "<h1>Hello, World!</h1>"}
|
||||
response = client.post("/pdf", json=request_data)
|
||||
assert response.status_code == 200
|
||||
assert response.headers["Content-Type"] == "application/pdf"
|
||||
assert (
|
||||
response.headers["Content-Disposition"] == 'attachment; filename="hinpdof.pdf"'
|
||||
)
|
||||
Reference in New Issue
Block a user