references/convert_md_to_svg_html_only_dependency.ps1

273 lines
No EOL
8.6 KiB
PowerShell

param($param1, $param2, $param3, $param4, $param5, $param6)
# Function to fix HTML tags for XML compatibility
function Fix-HtmlTags {
param($html)
# Fix self-closing tags
$html = $html -replace '<br\s*>', '<br/>'
$html = $html -replace '<br\s+([^/>]*?)>', '<br $1/>'
$html = $html -replace '<hr\s*>', '<hr/>'
$html = $html -replace '<hr\s+([^/>]*?)>', '<hr $1/>'
$html = $html -replace '<img\s+([^/>]*?)>', '<img $1/>'
$html = $html -replace '<input\s+([^/>]*?)>', '<input $1/>'
$html = $html -replace '<meta\s+([^/>]*?)>', '<meta $1/>'
$html = $html -replace '<link\s+([^/>]*?)>', '<link $1/>'
return $html
}
# Detect mode based on parameters
$mode = $param2
if ($mode -eq "auto-insert-breaks") {
# Auto-insert page breaks mode
$mdFile = $param1
$charsPerPage = if ($param3) { [int]$param3 } else { 3000 }
Write-Host "=== Auto-inserting page breaks into $mdFile ==="
Write-Host "Target: $charsPerPage chars per page"
Write-Host ""
try {
$content = Get-Content $mdFile -Raw -Encoding UTF8
# Step 1: Remove all existing PAGE_BREAK markers AND surrounding blank lines
Write-Host "Removing existing PAGE_BREAK markers..."
$originalBreakCount = ([regex]::Matches($content, '<!--\s*PAGE_BREAK\s*-->')).Count
# Remove: blank line + PAGE_BREAK + blank line (any combination)
$content = $content -replace '(?m)^\s*\r?\n\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n\s*\r?\n', "`n"
$content = $content -replace '(?m)^\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n\s*\r?\n', "`n"
$content = $content -replace '(?m)^\s*\r?\n\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n', "`n"
$content = $content -replace '(?m)^\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n', "`n"
$content = $content -replace '<!--\s*PAGE_BREAK\s*-->', ''
Write-Host " Removed $originalBreakCount existing markers"
# Step 2: Extract YAML front matter (preserve it)
$yamlMatch = [regex]::Match($content, '(?s)^---.*?---\s*')
$yaml = ""
$bodyContent = $content
if ($yamlMatch.Success) {
$yaml = $yamlMatch.Value
$bodyContent = $content.Substring($yamlMatch.Length)
Write-Host "Preserved YAML front matter"
}
# Step 3: Remove HTML comments from body for character counting
$bodyWithoutComments = $bodyContent -replace '<!--(?!.*PAGE_BREAK).*?-->', ''
# Step 4: Split by lines and calculate where to insert breaks
$lines = $bodyContent -split "`r?`n"
$linesWithoutComments = $bodyWithoutComments -split "`r?`n"
$newLines = @()
$charCount = 0
$breakCount = 0
$needsBreak = $false
for ($i = 0; $i -lt $lines.Count; $i++) {
$line = $lines[$i]
$lineWithoutComments = if ($i -lt $linesWithoutComments.Count) { $linesWithoutComments[$i] } else { $line }
$lineChars = $lineWithoutComments.Length
# Check if we've exceeded the character limit (counting only non-comment text)
if (($charCount + $lineChars) -gt $charsPerPage -and $newLines.Count -gt 0) {
$needsBreak = $true
}
# ONLY insert break before headings (most safe location)
if ($needsBreak) {
$isHeading = $line -match '^\s*#+\s'
if ($isHeading) {
# Insert break before this heading
$newLines += ""
$newLines += "<!-- PAGE_BREAK -->"
$newLines += ""
$breakCount++
Write-Host " Inserted break #$breakCount before heading at line $i (after $charCount chars)"
$charCount = 0
$needsBreak = $false
}
}
$newLines += $line
$charCount += $lineChars
}
# Step 5: Reconstruct the file
$newContent = $yaml + ($newLines -join "`n")
# Step 6: Write back to file
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
[System.IO.File]::WriteAllText($mdFile, $newContent, $utf8NoBom)
Write-Host ""
Write-Host "SUCCESS!"
Write-Host " Inserted $breakCount new page breaks"
Write-Host " This will create $($breakCount + 1) pages"
Write-Host " File updated: $mdFile"
exit 0
} catch {
Write-Host "ERROR: $_"
Write-Host $_.Exception.Message
exit 1
}
} else {
# SVG creation mode
$htmlFile = $param1
$baseOutputPath = $param2
$pageWidth = $param3
$pageHeight = $param4
$zoom = $param5
Write-Host "=== Processing HTML with manual page breaks ==="
try {
$htmlContent = Get-Content $htmlFile -Raw -Encoding UTF8
# FIX: Apply XML tag fixes to entire HTML first
Write-Host "Fixing HTML tags for XML compatibility..."
$htmlContent = Fix-HtmlTags $htmlContent
# Extract styles
$allStyles = ""
$styleMatches = [regex]::Matches($htmlContent, '(?si)<style[^>]*>(.*?)</style>')
foreach ($match in $styleMatches) {
$allStyles += $match.Groups[1].Value + "`n"
}
$allStyles = $allStyles -replace 'li\s*\{\s*p\s*\{[^}]*\}\s*\}', 'li p { margin: 0px; }'
# Extract body content
$bodyContent = ""
if ($htmlContent -match '(?si)<body[^>]*>(.*)</body>') {
$bodyContent = $matches[1]
} else {
$bodyContent = $htmlContent
}
# Split by PAGE_BREAK markers
$pageBreakPattern = '<!--\s*PAGE_BREAK\s*-->'
$pages = [regex]::Split($bodyContent, $pageBreakPattern)
$totalPages = $pages.Count
Write-Host "Found $totalPages pages"
$pageWidthNum = [double]$pageWidth
$pageHeightNum = [double]$pageHeight
$zoomNum = [double]$zoom
# Calculate dimensions
$scaledWidth = $pageWidthNum / $zoomNum
$scaledHeight = $pageHeightNum / $zoomNum
$contentWidth = $pageWidthNum
# Create each page as a separate SVG
for ($pageNum = 1; $pageNum -le $totalPages; $pageNum++) {
$svgFile = "$baseOutputPath-$pageNum.svg"
$pageContent = $pages[$pageNum - 1]
Write-Host "Creating page $pageNum of $totalPages..."
$svgContent = @"
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink"
width="${pageWidthNum}mm"
height="${pageHeightNum}mm"
viewBox="0 0 ${pageWidthNum} ${pageHeightNum}">
<defs>
<clipPath id="page-clip">
<rect x="0" y="0" width="${pageWidthNum}" height="${pageHeightNum}" />
</clipPath>
</defs>
<g clip-path="url(#page-clip)">
<g transform="scale(${zoomNum})">
<foreignObject x="0" y="0" width="${scaledWidth}" height="${scaledHeight}">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="UTF-8"/>
<style>
/* SCOPED STYLES - only affect this foreignObject */
.page-content * {
box-sizing: border-box;
}
.page-content html,
.page-content body {
margin: 0;
padding: 0;
overflow: visible;
}
.page-content body {
width: ${contentWidth}mm;
max-width: ${contentWidth}mm;
min-height: ${scaledHeight}mm;
padding: 5mm;
}
.page-content p,
.page-content h1,
.page-content h2,
.page-content h3,
.page-content h4,
.page-content h5,
.page-content h6,
.page-content li,
.page-content td,
.page-content th,
.page-content div,
.page-content span,
.page-content a {
max-width: 100%;
overflow-wrap: break-word;
word-wrap: break-word;
word-break: break-word;
}
.page-content table {
width: 100%;
max-width: 100%;
table-layout: fixed;
}
$allStyles
</style>
</head>
<body class="page-content">
$pageContent
</body>
</html>
</foreignObject>
</g>
</g>
</svg>
"@
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
[System.IO.File]::WriteAllText($svgFile, $svgContent, $utf8NoBom)
Write-Host " Created: $(Split-Path -Leaf $svgFile)"
}
Write-Host "SUCCESS: Created $totalPages SVG files"
exit 0
} catch {
Write-Host "ERROR: $_"
Write-Host $_.Exception.Message
exit 1
}
}