/ 内不动。"""
out = []
skip_depth = 0
for token in _TAG_SPLIT.split(html):
if not token:
continue
if token.startswith("<"):
m = _TAG_NAME_RE.match(token)
if m:
closing, name = m.group(1), m.group(2).lower()
if name in _SKIP_TAGS and not token.rstrip().endswith("/>"):
skip_depth += -1 if closing else 1
skip_depth = max(0, skip_depth)
out.append(token)
else:
out.append(token if skip_depth else _emit_links(_emit_chem(token)))
return "".join(out)
def _read_sections(src: Path) -> str:
if src.is_dir():
parts = [md.read_text(encoding="utf-8") for md in sorted(src.glob("*.md"))]
if not parts:
raise SystemExit(f"[render_pdf] no *.md under {src}")
return "\n\n".join(parts)
return src.read_text(encoding="utf-8")
def _css(color: bool) -> str:
primary = PRIMARY if color else "#000000"
head_fill = TABLE_HEAD_FILL if color else "#000000"
zebra = TABLE_ZEBRA if color else "#FFFFFF"
tldr = TLDR_FILL if color else "#FFFFFF"
link = LINK_BLUE if color else "#000000"
return f"""
@page {{ size: A4; margin: 2.2cm 2cm; }}
* {{ -webkit-print-color-adjust: exact; print-color-adjust: exact; }}
body {{ font-family: 'Times New Roman','Noto Serif CJK SC','Noto Sans CJK SC',serif;
font-size: 12pt; line-height: 1.6; color: #000; }}
h1 {{ font-family: 'Noto Sans CJK SC',sans-serif; font-size: 19pt; color: {primary};
border-bottom: 2px solid {primary}; padding-bottom: 4pt; margin: 22pt 0 12pt; }}
h2 {{ font-family: 'Noto Sans CJK SC',sans-serif; font-size: 15pt; color: {primary}; margin: 20pt 0 8pt; }}
h3 {{ font-family: 'Noto Sans CJK SC',sans-serif; font-size: 13pt; color: {primary}; margin: 16pt 0 6pt; }}
p {{ text-align: justify; margin: 6pt 0; }}
a {{ color: {link}; text-decoration: underline; word-break: break-all; }}
sub {{ font-size: 0.72em; }}
table {{ border-collapse: collapse; width: 100%; margin: 12pt 0; font-size: 10.5pt; }}
th {{ background: {head_fill}; color: #fff; padding: 6pt 8pt; border: 1px solid #999; text-align: center; }}
td {{ padding: 5pt 8pt; border: 1px solid #999; }}
tr:nth-child(even) td {{ background: {zebra}; }}
blockquote {{ border-left: 4px solid {primary}; background: {tldr}; margin: 12pt 0;
padding: 8pt 12pt; font-size: 11pt; }}
blockquote p {{ margin: 3pt 0; }}
code {{ font-family: Consolas,monospace; font-size: 10pt; background: #f5f5f5; padding: 1pt 3pt; }}
ul,ol {{ margin: 6pt 0; padding-left: 22pt; }}
li {{ margin: 3pt 0; }}
"""
def _find_chromium() -> str:
env = os.environ.get("CHROMIUM") or os.environ.get("CHROME")
cands = [env] if env else []
cands += ["chromium", "chromium-browser", "google-chrome",
"/usr/bin/chromium", "/usr/bin/chromium-browser"]
for c in cands:
if c and (shutil.which(c) or Path(c).exists()):
return shutil.which(c) or c
raise SystemExit("[render_pdf] chromium 不在沙盒里(镜像应已装,给 mermaid 用)。"
"确认 `which chromium` 或设 CHROMIUM 环境变量。")
def md_to_pdf(src: Path, out: Path, *, color: bool = True, profile: str = "") -> Path:
try:
import markdown
except ImportError:
raise SystemExit("[render_pdf] 缺 `markdown` 包。基础镜像应已装(requirements.txt);"
"本地补:.venv/Scripts/python.exe -m pip install markdown")
md_text = _read_sections(src)
body = markdown.markdown(
md_text, extensions=["tables", "fenced_code", "sane_lists", "attr_list"]
)
body = _enrich_html(body)
html = (f''
f"{body}")
chromium = _find_chromium()
out.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="render-pdf-") as tmp:
html_path = Path(tmp) / "doc.html"
html_path.write_text(html, encoding="utf-8")
cmd = [
chromium, "--headless", "--disable-gpu", "--no-sandbox",
"--disable-dev-shm-usage", f"--user-data-dir={tmp}/cr",
"--no-pdf-header-footer",
f"--print-to-pdf={out}", html_path.as_uri(),
]
proc = subprocess.run(cmd, capture_output=True, timeout=120, check=False)
if proc.returncode != 0 or not out.exists() or out.stat().st_size == 0:
tail = (proc.stderr or proc.stdout or b"").decode("utf-8", "replace")[-600:]
raise SystemExit(f"[render_pdf] chromium 转 PDF 失败(rc={proc.returncode}):\n{tail}")
return out