273 lines
No EOL
8.6 KiB
PowerShell
273 lines
No EOL
8.6 KiB
PowerShell
param($param1, $param2, $param3, $param4, $param5, $param6)
|
|
|
|
# Function to fix HTML tags for XML compatibility
|
|
function Fix-HtmlTags {
|
|
param($html)
|
|
|
|
# Fix self-closing tags
|
|
$html = $html -replace '<br\s*>', '<br/>'
|
|
$html = $html -replace '<br\s+([^/>]*?)>', '<br $1/>'
|
|
$html = $html -replace '<hr\s*>', '<hr/>'
|
|
$html = $html -replace '<hr\s+([^/>]*?)>', '<hr $1/>'
|
|
$html = $html -replace '<img\s+([^/>]*?)>', '<img $1/>'
|
|
$html = $html -replace '<input\s+([^/>]*?)>', '<input $1/>'
|
|
$html = $html -replace '<meta\s+([^/>]*?)>', '<meta $1/>'
|
|
$html = $html -replace '<link\s+([^/>]*?)>', '<link $1/>'
|
|
|
|
return $html
|
|
}
|
|
|
|
# Detect mode based on parameters
|
|
$mode = $param2
|
|
|
|
if ($mode -eq "auto-insert-breaks") {
|
|
# Auto-insert page breaks mode
|
|
$mdFile = $param1
|
|
$charsPerPage = if ($param3) { [int]$param3 } else { 3000 }
|
|
|
|
Write-Host "=== Auto-inserting page breaks into $mdFile ==="
|
|
Write-Host "Target: $charsPerPage chars per page"
|
|
Write-Host ""
|
|
|
|
try {
|
|
$content = Get-Content $mdFile -Raw -Encoding UTF8
|
|
|
|
# Step 1: Remove all existing PAGE_BREAK markers AND surrounding blank lines
|
|
Write-Host "Removing existing PAGE_BREAK markers..."
|
|
$originalBreakCount = ([regex]::Matches($content, '<!--\s*PAGE_BREAK\s*-->')).Count
|
|
|
|
# Remove: blank line + PAGE_BREAK + blank line (any combination)
|
|
$content = $content -replace '(?m)^\s*\r?\n\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n\s*\r?\n', "`n"
|
|
$content = $content -replace '(?m)^\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n\s*\r?\n', "`n"
|
|
$content = $content -replace '(?m)^\s*\r?\n\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n', "`n"
|
|
$content = $content -replace '(?m)^\s*<!--\s*PAGE_BREAK\s*-->\s*\r?\n', "`n"
|
|
$content = $content -replace '<!--\s*PAGE_BREAK\s*-->', ''
|
|
|
|
Write-Host " Removed $originalBreakCount existing markers"
|
|
|
|
# Step 2: Extract YAML front matter (preserve it)
|
|
$yamlMatch = [regex]::Match($content, '(?s)^---.*?---\s*')
|
|
$yaml = ""
|
|
$bodyContent = $content
|
|
|
|
if ($yamlMatch.Success) {
|
|
$yaml = $yamlMatch.Value
|
|
$bodyContent = $content.Substring($yamlMatch.Length)
|
|
Write-Host "Preserved YAML front matter"
|
|
}
|
|
|
|
# Step 3: Remove HTML comments from body for character counting
|
|
$bodyWithoutComments = $bodyContent -replace '<!--(?!.*PAGE_BREAK).*?-->', ''
|
|
|
|
# Step 4: Split by lines and calculate where to insert breaks
|
|
$lines = $bodyContent -split "`r?`n"
|
|
$linesWithoutComments = $bodyWithoutComments -split "`r?`n"
|
|
|
|
$newLines = @()
|
|
$charCount = 0
|
|
$breakCount = 0
|
|
$needsBreak = $false
|
|
|
|
for ($i = 0; $i -lt $lines.Count; $i++) {
|
|
$line = $lines[$i]
|
|
$lineWithoutComments = if ($i -lt $linesWithoutComments.Count) { $linesWithoutComments[$i] } else { $line }
|
|
$lineChars = $lineWithoutComments.Length
|
|
|
|
# Check if we've exceeded the character limit (counting only non-comment text)
|
|
if (($charCount + $lineChars) -gt $charsPerPage -and $newLines.Count -gt 0) {
|
|
$needsBreak = $true
|
|
}
|
|
|
|
# ONLY insert break before headings (most safe location)
|
|
if ($needsBreak) {
|
|
$isHeading = $line -match '^\s*#+\s'
|
|
|
|
if ($isHeading) {
|
|
# Insert break before this heading
|
|
$newLines += ""
|
|
$newLines += "<!-- PAGE_BREAK -->"
|
|
$newLines += ""
|
|
|
|
$breakCount++
|
|
Write-Host " Inserted break #$breakCount before heading at line $i (after $charCount chars)"
|
|
$charCount = 0
|
|
$needsBreak = $false
|
|
}
|
|
}
|
|
|
|
$newLines += $line
|
|
$charCount += $lineChars
|
|
}
|
|
|
|
# Step 5: Reconstruct the file
|
|
$newContent = $yaml + ($newLines -join "`n")
|
|
|
|
# Step 6: Write back to file
|
|
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
|
|
[System.IO.File]::WriteAllText($mdFile, $newContent, $utf8NoBom)
|
|
|
|
Write-Host ""
|
|
Write-Host "SUCCESS!"
|
|
Write-Host " Inserted $breakCount new page breaks"
|
|
Write-Host " This will create $($breakCount + 1) pages"
|
|
Write-Host " File updated: $mdFile"
|
|
|
|
exit 0
|
|
|
|
} catch {
|
|
Write-Host "ERROR: $_"
|
|
Write-Host $_.Exception.Message
|
|
exit 1
|
|
}
|
|
|
|
} else {
|
|
# SVG creation mode
|
|
$htmlFile = $param1
|
|
$baseOutputPath = $param2
|
|
$pageWidth = $param3
|
|
$pageHeight = $param4
|
|
$zoom = $param5
|
|
|
|
Write-Host "=== Processing HTML with manual page breaks ==="
|
|
|
|
try {
|
|
$htmlContent = Get-Content $htmlFile -Raw -Encoding UTF8
|
|
|
|
# FIX: Apply XML tag fixes to entire HTML first
|
|
Write-Host "Fixing HTML tags for XML compatibility..."
|
|
$htmlContent = Fix-HtmlTags $htmlContent
|
|
|
|
# Extract styles
|
|
$allStyles = ""
|
|
$styleMatches = [regex]::Matches($htmlContent, '(?si)<style[^>]*>(.*?)</style>')
|
|
foreach ($match in $styleMatches) {
|
|
$allStyles += $match.Groups[1].Value + "`n"
|
|
}
|
|
|
|
$allStyles = $allStyles -replace 'li\s*\{\s*p\s*\{[^}]*\}\s*\}', 'li p { margin: 0px; }'
|
|
|
|
# Extract body content
|
|
$bodyContent = ""
|
|
if ($htmlContent -match '(?si)<body[^>]*>(.*)</body>') {
|
|
$bodyContent = $matches[1]
|
|
} else {
|
|
$bodyContent = $htmlContent
|
|
}
|
|
|
|
# Split by PAGE_BREAK markers
|
|
$pageBreakPattern = '<!--\s*PAGE_BREAK\s*-->'
|
|
$pages = [regex]::Split($bodyContent, $pageBreakPattern)
|
|
|
|
$totalPages = $pages.Count
|
|
Write-Host "Found $totalPages pages"
|
|
|
|
$pageWidthNum = [double]$pageWidth
|
|
$pageHeightNum = [double]$pageHeight
|
|
$zoomNum = [double]$zoom
|
|
|
|
# Calculate dimensions
|
|
$scaledWidth = $pageWidthNum / $zoomNum
|
|
$scaledHeight = $pageHeightNum / $zoomNum
|
|
$contentWidth = $pageWidthNum
|
|
|
|
# Create each page as a separate SVG
|
|
for ($pageNum = 1; $pageNum -le $totalPages; $pageNum++) {
|
|
$svgFile = "$baseOutputPath-$pageNum.svg"
|
|
$pageContent = $pages[$pageNum - 1]
|
|
|
|
Write-Host "Creating page $pageNum of $totalPages..."
|
|
|
|
$svgContent = @"
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<svg xmlns="http://www.w3.org/2000/svg"
|
|
xmlns:xlink="http://www.w3.org/1999/xlink"
|
|
width="${pageWidthNum}mm"
|
|
height="${pageHeightNum}mm"
|
|
viewBox="0 0 ${pageWidthNum} ${pageHeightNum}">
|
|
|
|
<defs>
|
|
<clipPath id="page-clip">
|
|
<rect x="0" y="0" width="${pageWidthNum}" height="${pageHeightNum}" />
|
|
</clipPath>
|
|
</defs>
|
|
|
|
<g clip-path="url(#page-clip)">
|
|
<g transform="scale(${zoomNum})">
|
|
<foreignObject x="0" y="0" width="${scaledWidth}" height="${scaledHeight}">
|
|
<html xmlns="http://www.w3.org/1999/xhtml">
|
|
<head>
|
|
<meta charset="UTF-8"/>
|
|
<style>
|
|
/* SCOPED STYLES - only affect this foreignObject */
|
|
.page-content * {
|
|
box-sizing: border-box;
|
|
}
|
|
|
|
.page-content html,
|
|
.page-content body {
|
|
margin: 0;
|
|
padding: 0;
|
|
overflow: visible;
|
|
}
|
|
|
|
.page-content body {
|
|
width: ${contentWidth}mm;
|
|
max-width: ${contentWidth}mm;
|
|
min-height: ${scaledHeight}mm;
|
|
padding: 5mm;
|
|
}
|
|
|
|
.page-content p,
|
|
.page-content h1,
|
|
.page-content h2,
|
|
.page-content h3,
|
|
.page-content h4,
|
|
.page-content h5,
|
|
.page-content h6,
|
|
.page-content li,
|
|
.page-content td,
|
|
.page-content th,
|
|
.page-content div,
|
|
.page-content span,
|
|
.page-content a {
|
|
max-width: 100%;
|
|
overflow-wrap: break-word;
|
|
word-wrap: break-word;
|
|
word-break: break-word;
|
|
}
|
|
|
|
.page-content table {
|
|
width: 100%;
|
|
max-width: 100%;
|
|
table-layout: fixed;
|
|
}
|
|
|
|
$allStyles
|
|
</style>
|
|
</head>
|
|
<body class="page-content">
|
|
$pageContent
|
|
</body>
|
|
</html>
|
|
</foreignObject>
|
|
</g>
|
|
</g>
|
|
|
|
</svg>
|
|
"@
|
|
|
|
$utf8NoBom = New-Object System.Text.UTF8Encoding $false
|
|
[System.IO.File]::WriteAllText($svgFile, $svgContent, $utf8NoBom)
|
|
|
|
Write-Host " Created: $(Split-Path -Leaf $svgFile)"
|
|
}
|
|
|
|
Write-Host "SUCCESS: Created $totalPages SVG files"
|
|
exit 0
|
|
|
|
} catch {
|
|
Write-Host "ERROR: $_"
|
|
Write-Host $_.Exception.Message
|
|
exit 1
|
|
}
|
|
} |