From 350c3f616036dbeb218f518a8dfa9bf13a1c9e09 Mon Sep 17 00:00:00 2001 From: Joakim Date: Thu, 11 Sep 2025 16:43:40 +0200 Subject: [PATCH] feat: implement minimal server-first markdown processing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backend implementation: - Add goldmark dependency for markdown processing - Create MarkdownProcessor with minimal config (bold, italic, links only) - Update content injector with HTML injection capabilities - Add injectHTMLContent() for safe DOM manipulation - Server now converts **bold**, *italic*, [links](url) to HTML during enhancement Frontend alignment: - Restrict marked.js to match server capabilities - Disable unsupported features (headings, lists, code blocks, tables) - Update turndown rules to prevent unsupported markdown generation - Frontend editor preview now matches server output exactly Server as source of truth: - Build-time markdown→HTML conversion during enhancement - Zero runtime overhead for end users - Consistent formatting between editor preview and final output - Raw markdown stored in database, HTML served to visitors Tested features: - **bold** → bold ✅ - *italic* → italic ✅ - [text](url) → text ✅ --- go.mod | 1 + go.sum | 2 + internal/content/enhancer.go | 2 +- internal/content/injector.go | 101 +++++++++++++++++++++++++++---- internal/content/markdown.go | 67 +++++++++++++++++++++ lib/src/utils/markdown.js | 113 ++++++++++++++++++++++++++++++----- 6 files changed, 256 insertions(+), 30 deletions(-) create mode 100644 internal/content/markdown.go diff --git a/go.mod b/go.mod index a70f35b..6ea5f5a 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,7 @@ require ( github.com/spf13/cast v1.6.0 // indirect github.com/spf13/pflag v1.0.5 // indirect github.com/subosito/gotenv v1.6.0 // indirect + github.com/yuin/goldmark v1.7.8 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect diff --git a/go.sum b/go.sum index ea71084..0c94992 100644 --- a/go.sum +++ b/go.sum @@ -63,6 +63,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/yuin/goldmark v1.7.8 h1:iERMLn0/QJeHFhxSt3p6PeN9mGnvIKSpG9YYorDMnic= +github.com/yuin/goldmark v1.7.8/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= go.uber.org/atomic v1.9.0 h1:ECmE8Bn/WFTYwEW/bpKD3M8VtR/zQVbavAoalC1PYyE= go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= diff --git a/internal/content/enhancer.go b/internal/content/enhancer.go index c428406..986c370 100644 --- a/internal/content/enhancer.go +++ b/internal/content/enhancer.go @@ -108,7 +108,7 @@ func (e *Enhancer) findAndInjectNodes(rootNode *html.Node, elem parser.Element, } // Inject content attributes for the correctly matched node - e.injector.addContentAttributes(targetNode, elem.ContentID, string(elem.Type)) + e.injector.AddContentAttributes(targetNode, elem.ContentID, string(elem.Type)) // Inject content if available if contentItem != nil { diff --git a/internal/content/injector.go b/internal/content/injector.go index b64e1ce..de78060 100644 --- a/internal/content/injector.go +++ b/internal/content/injector.go @@ -2,6 +2,7 @@ package content import ( "fmt" + "log" "strings" "golang.org/x/net/html" @@ -9,15 +10,17 @@ import ( // Injector handles content injection into HTML elements type Injector struct { - client ContentClient - siteID string + client ContentClient + siteID string + mdProcessor *MarkdownProcessor } // NewInjector creates a new content injector func NewInjector(client ContentClient, siteID string) *Injector { return &Injector{ - client: client, - siteID: siteID, + client: client, + siteID: siteID, + mdProcessor: NewMarkdownProcessor(), } } @@ -31,7 +34,7 @@ func (i *Injector) InjectContent(element *Element, contentID string) error { // If no content found, keep original content but add data attributes if contentItem == nil { - i.addContentAttributes(element.Node, contentID, element.Type) + i.AddContentAttributes(element.Node, contentID, element.Type) return nil } @@ -48,7 +51,7 @@ func (i *Injector) InjectContent(element *Element, contentID string) error { } // Add data attributes for editor functionality - i.addContentAttributes(element.Node, contentID, element.Type) + i.AddContentAttributes(element.Node, contentID, element.Type) return nil } @@ -72,7 +75,7 @@ func (i *Injector) InjectBulkContent(elements []ElementWithID) error { contentItem, exists := contentMap[elem.ContentID] // Add content attributes regardless - i.addContentAttributes(elem.Element.Node, elem.ContentID, elem.Element.Type) + i.AddContentAttributes(elem.Element.Node, elem.ContentID, elem.Element.Type) if !exists { // Keep original content if not found in database @@ -112,11 +115,23 @@ func (i *Injector) injectTextContent(node *html.Node, content string) { node.AppendChild(textNode) } -// injectMarkdownContent handles markdown content (for now, just as text) +// injectMarkdownContent handles markdown content - converts markdown to HTML func (i *Injector) injectMarkdownContent(node *html.Node, content string) { - // For now, treat markdown as text content - // TODO: Implement markdown to HTML conversion - i.injectTextContent(node, content) + if content == "" { + i.injectTextContent(node, "") + return + } + + // Convert markdown to HTML using server processor + htmlContent, err := i.mdProcessor.ToHTML(content) + if err != nil { + log.Printf("⚠️ Markdown conversion failed for content '%s': %v, falling back to text", content, err) + i.injectTextContent(node, content) + return + } + + // Inject the HTML content + i.injectHTMLContent(node, htmlContent) } // injectLinkContent handles link/button content with URL extraction @@ -126,8 +141,68 @@ func (i *Injector) injectLinkContent(node *html.Node, content string) { i.injectTextContent(node, content) } -// addContentAttributes adds necessary data attributes and insertr class for editor functionality -func (i *Injector) addContentAttributes(node *html.Node, contentID string, contentType string) { +// injectHTMLContent safely injects HTML content into a DOM node +func (i *Injector) injectHTMLContent(node *html.Node, htmlContent string) { + // Clear existing content + i.clearNode(node) + + if htmlContent == "" { + return + } + + // Wrap content to create valid HTML document for parsing + wrappedHTML := "
" + htmlContent + "
" + + // Parse HTML string + doc, err := html.Parse(strings.NewReader(wrappedHTML)) + if err != nil { + log.Printf("Failed to parse HTML content '%s': %v, falling back to text", htmlContent, err) + i.injectTextContent(node, htmlContent) + return + } + + // Find the wrapper div and move its children to target node + wrapper := i.findElementByTag(doc, "div") + if wrapper == nil { + log.Printf("Could not find wrapper div in parsed HTML") + return + } + + // Move parsed nodes to target element + for child := wrapper.FirstChild; child != nil; { + next := child.NextSibling + wrapper.RemoveChild(child) + node.AppendChild(child) + child = next + } +} + +// clearNode removes all child nodes from a given node +func (i *Injector) clearNode(node *html.Node) { + for child := node.FirstChild; child != nil; { + next := child.NextSibling + node.RemoveChild(child) + child = next + } +} + +// findElementByTag finds the first element with the specified tag name +func (i *Injector) findElementByTag(node *html.Node, tag string) *html.Node { + if node.Type == html.ElementNode && node.Data == tag { + return node + } + + for child := node.FirstChild; child != nil; child = child.NextSibling { + if found := i.findElementByTag(child, tag); found != nil { + return found + } + } + + return nil +} + +// AddContentAttributes adds necessary data attributes and insertr class for editor functionality +func (i *Injector) AddContentAttributes(node *html.Node, contentID string, contentType string) { i.setAttribute(node, "data-content-id", contentID) i.setAttribute(node, "data-content-type", contentType) i.addClass(node, "insertr") diff --git a/internal/content/markdown.go b/internal/content/markdown.go new file mode 100644 index 0000000..8490fc5 --- /dev/null +++ b/internal/content/markdown.go @@ -0,0 +1,67 @@ +package content + +import ( + "bytes" + "log" + + "github.com/yuin/goldmark" + "github.com/yuin/goldmark/parser" + "github.com/yuin/goldmark/renderer/html" + "github.com/yuin/goldmark/util" +) + +// MarkdownProcessor handles minimal markdown processing +// Supports only: **bold**, *italic*, and [link](url) +type MarkdownProcessor struct { + parser goldmark.Markdown +} + +// NewMarkdownProcessor creates a new markdown processor with minimal configuration +func NewMarkdownProcessor() *MarkdownProcessor { + // Configure goldmark to only support basic inline formatting + md := goldmark.New( + goldmark.WithParserOptions( + parser.WithInlineParsers( + // Bold (**text**) and italic (*text*) - same parser handles both + util.Prioritized(parser.NewEmphasisParser(), 500), + + // Links [text](url) + util.Prioritized(parser.NewLinkParser(), 600), + ), + // Disable all block parsers except paragraph (no headings, lists, etc.) + parser.WithBlockParsers( + util.Prioritized(parser.NewParagraphParser(), 200), + ), + ), + goldmark.WithRendererOptions( + html.WithXHTML(), //
instead of
+ html.WithHardWraps(), // Line breaks become
+ html.WithUnsafe(), // Allow existing HTML to pass through + ), + ) + + return &MarkdownProcessor{parser: md} +} + +// ToHTML converts markdown string to HTML +func (mp *MarkdownProcessor) ToHTML(markdown string) (string, error) { + if markdown == "" { + return "", nil + } + + var buf bytes.Buffer + if err := mp.parser.Convert([]byte(markdown), &buf); err != nil { + log.Printf("Markdown conversion failed: %v", err) + return "", err + } + + html := buf.String() + + // Clean up goldmark's paragraph wrapping - we want inline content + // Remove

and

tags if the content is wrapped in a single paragraph + if len(html) > 7 && html[:3] == "

" && html[len(html)-4:] == "

" { + html = html[3 : len(html)-4] + } + + return html, nil +} diff --git a/lib/src/utils/markdown.js b/lib/src/utils/markdown.js index c319b20..1fa4538 100644 --- a/lib/src/utils/markdown.js +++ b/lib/src/utils/markdown.js @@ -14,32 +14,75 @@ export class MarkdownConverter { } /** - * Configure marked for HTML output + * Configure marked for HTML output - MINIMAL MODE + * Only supports: **bold**, *italic*, and [links](url) + * Matches server-side goldmark configuration */ initializeMarked() { marked.setOptions({ - gfm: true, // GitHub Flavored Markdown - breaks: true, // Convert \n to
+ gfm: false, // Disable GFM to match server minimal mode + breaks: true, // Convert \n to
(matches server) pedantic: false, // Don't be overly strict sanitize: false, // Allow HTML (we control the input) - smartLists: true, // Smarter list behavior + smartLists: false, // Disable lists (not supported on server) smartypants: false // Don't convert quotes/dashes }); + + // Override renderers to restrict to minimal feature set + marked.use({ + renderer: { + // Disable headings - treat as plain text + heading(text, level) { + return text; + }, + // Disable lists - treat as plain text + list(body, ordered, start) { + return body.replace(/<\/?li>/g, ''); + }, + listitem(text) { + return text + '\n'; + }, + // Disable code blocks - treat as plain text + code(code, language) { + return code; + }, + blockquote(quote) { + return quote; // Disable blockquotes - treat as plain text + }, + // Disable horizontal rules + hr() { + return ''; + }, + // Disable tables + table(header, body) { + return header + body; + }, + tablecell(content, flags) { + return content; + }, + tablerow(content) { + return content; + } + } + }); } /** - * Configure turndown for markdown output + * Configure turndown for markdown output - MINIMAL MODE + * Only supports: **bold**, *italic*, and [links](url) + * Matches server-side goldmark configuration */ initializeTurndown() { this.turndown = new TurndownService({ - headingStyle: 'atx', // # headers instead of underlines - hr: '---', // horizontal rule style - bulletListMarker: '-', // bullet list marker - codeBlockStyle: 'fenced', // ``` code blocks - fence: '```', // fence marker - emDelimiter: '*', // emphasis delimiter - strongDelimiter: '**', // strong delimiter - linkStyle: 'inlined', // [text](url) instead of reference style + // Minimal configuration - only basic formatting + headingStyle: 'atx', // # headers (but will be disabled) + hr: '---', // horizontal rule (but will be disabled) + bulletListMarker: '-', // bullet list (but will be disabled) + codeBlockStyle: 'fenced', // code blocks (but will be disabled) + fence: '```', // fence marker (but will be disabled) + emDelimiter: '*', // *italic* - matches server + strongDelimiter: '**', // **bold** - matches server + linkStyle: 'inlined', // [text](url) - matches server linkReferenceStyle: 'full' // full reference links }); @@ -48,7 +91,9 @@ export class MarkdownConverter { } /** - * Add custom turndown rules for better HTML → Markdown conversion + * Add custom turndown rules - MINIMAL MODE + * Only supports: **bold**, *italic*, and [links](url) + * Disables all other formatting to match server */ addTurndownRules() { // Handle paragraph spacing properly - ensure double newlines between paragraphs @@ -60,7 +105,7 @@ export class MarkdownConverter { } }); - // Handle bold text in markdown + // Handle bold text in markdown - keep this (supported) this.turndown.addRule('bold', { filter: ['strong', 'b'], replacement: function (content) { @@ -69,7 +114,7 @@ export class MarkdownConverter { } }); - // Handle italic text in markdown + // Handle italic text in markdown - keep this (supported) this.turndown.addRule('italic', { filter: ['em', 'i'], replacement: function (content) { @@ -77,6 +122,42 @@ export class MarkdownConverter { return '*' + content + '*'; } }); + + // DISABLE unsupported features - convert to plain text + this.turndown.addRule('disableHeadings', { + filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], + replacement: function (content) { + return content; // Just return text content, no # markup + } + }); + + this.turndown.addRule('disableLists', { + filter: ['ul', 'ol', 'li'], + replacement: function (content) { + return content; // Just return text content, no list markup + } + }); + + this.turndown.addRule('disableCode', { + filter: ['pre', 'code'], + replacement: function (content) { + return content; // Just return text content, no code markup + } + }); + + this.turndown.addRule('disableBlockquotes', { + filter: 'blockquote', + replacement: function (content) { + return content; // Just return text content, no > markup + } + }); + + this.turndown.addRule('disableHR', { + filter: 'hr', + replacement: function () { + return ''; // Remove horizontal rules entirely + } + }); } /**