From b7998a4b3c8e2f985b47b69d63796f15fda13bb6 Mon Sep 17 00:00:00 2001 From: Joakim Date: Fri, 19 Sep 2025 16:03:05 +0200 Subject: [PATCH] feat: Implement HTML-first style preservation system - Add StyleContext class for extracting and applying HTML attributes/styles - Enhance MarkdownConverter with style-aware conversion methods - Switch backend storage from markdown to HTML with 'html' content type - Update editor workflow to preserve CSS classes, IDs, and attributes - Maintain markdown editing UX while storing HTML for style preservation - Support complex attributes like rel, data-*, aria-*, etc. This enables editing styled content like text while preserving all styling attributes through the markdown editing process. --- AGENTS.md | 7 +- internal/api/handlers.go | 4 +- internal/engine/injector.go | 2 + lib/src/core/editor.js | 32 ++++- lib/src/ui/editor.js | 38 ++++-- lib/src/utils/markdown.js | 203 +++++++++++++++++++++++++++- lib/src/utils/style-context.js | 238 +++++++++++++++++++++++++++++++++ 7 files changed, 498 insertions(+), 26 deletions(-) create mode 100644 lib/src/utils/style-context.js diff --git a/AGENTS.md b/AGENTS.md index 850581a..23748a1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,14 +1,11 @@ # AGENTS.md - Developer Guide for Insertr ## Build/Test Commands -Let me handle running and building the server. I run just dev in a different terminal, so you can just test the api directly, without spinning up an instance. - `just dev` - Full-stack development (recommended) - `just build` - Build entire project (Go binary + JS library) - `just build-lib` - Build JS library only -- `just test` - Run tests (placeholder, no actual tests yet) -- `just lint` - Run linting (placeholder, no actual linting yet) -- `just air` - Hot reload Go backend only -- `go test ./...` - Run Go tests (when available) + +For running and testing our application read our justfile. ## Code Style Guidelines diff --git a/internal/api/handlers.go b/internal/api/handlers.go index dd9877c..923b457 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -299,13 +299,13 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) { } } - // Determine content type: use provided type, fallback to existing type, default to "text" + // Determine content type: use provided type, fallback to existing type, default to "html" contentType := req.Type if contentType == "" && contentExists { contentType = h.getContentType(existingContent) } if contentType == "" { - contentType = "text" // default type for new content + contentType = "html" // default type for new content (changed from "text") } var content interface{} diff --git a/internal/engine/injector.go b/internal/engine/injector.go index 6c9b9e0..6729b42 100644 --- a/internal/engine/injector.go +++ b/internal/engine/injector.go @@ -103,6 +103,8 @@ func (i *Injector) InjectBulkContent(elements []ElementWithID) error { i.injectTextContent(elem.Element.Node, contentItem.Value) case "markdown": i.injectMarkdownContent(elem.Element.Node, contentItem.Value) + case "html": + i.injectHTMLContent(elem.Element.Node, contentItem.Value) case "link": i.injectLinkContent(elem.Element.Node, contentItem.Value) default: diff --git a/lib/src/core/editor.js b/lib/src/core/editor.js index fda7bed..90c2f6f 100644 --- a/lib/src/core/editor.js +++ b/lib/src/core/editor.js @@ -1,4 +1,5 @@ import { InsertrFormRenderer } from '../ui/form-renderer.js'; +import { markdownConverter } from '../utils/markdown.js'; /** * InsertrEditor - Content editing workflow and business logic @@ -114,16 +115,35 @@ export class InsertrEditor { try { // Extract content value based on type - let contentValue; + let markdownContent; if (meta.element.tagName.toLowerCase() === 'a') { // For links, save the text content (URL is handled separately if needed) - contentValue = formData.text || formData; + markdownContent = formData.text || formData; } else { - contentValue = formData.text || formData; + markdownContent = formData.text || formData; + } + + // Convert markdown to HTML with style preservation + let contentValue; + const contentType = this.determineContentType(meta.element); + + if (contentType === 'html') { + // Extract style context from original element and convert markdown to HTML + const { markdown, styleContext } = markdownConverter.htmlToMarkdownWithContext(meta.element.innerHTML, meta.element); + + if (styleContext && styleContext.hasPreservableContent) { + // Convert markdown back to HTML with style preservation + contentValue = markdownConverter.markdownToHtmlWithStyles(markdownContent, styleContext); + } else { + // No styles to preserve, simple conversion + contentValue = markdownConverter.markdownToHtml(markdownContent); + } + } else { + // For other content types (text, link), use markdown as-is + contentValue = markdownContent; } // Universal upsert - server handles ID extraction/generation from markup - const contentType = this.determineContentType(meta.element); const result = await this.apiClient.createContent( contentValue, contentType, @@ -155,8 +175,8 @@ export class InsertrEditor { return 'link'; } - // ALL text elements use markdown for consistent editing experience - return 'markdown'; + // ALL text elements use HTML storage with markdown editing interface + return 'html'; } handleCancel(meta) { diff --git a/lib/src/ui/editor.js b/lib/src/ui/editor.js index 97a9799..0689168 100644 --- a/lib/src/ui/editor.js +++ b/lib/src/ui/editor.js @@ -375,10 +375,11 @@ class EditContext { this.primaryElement = elements[0]; this.originalContent = null; this.currentContent = currentContent; + this.styleContext = null; // Store style context for preservation } /** - * Extract content from elements in markdown format + * Extract content from elements in markdown format with style preservation */ extractContent() { if (this.elements.length === 1) { @@ -386,22 +387,30 @@ class EditContext { // Handle links specially if (element.tagName.toLowerCase() === 'a') { + // Extract with style context for links + const { markdown, styleContext } = markdownConverter.htmlToMarkdownWithContext(element.innerHTML, element); + this.styleContext = styleContext; + return { - text: markdownConverter.htmlToMarkdown(element.innerHTML), + text: markdown, url: element.href }; } - // Single element - convert to markdown - return markdownConverter.htmlToMarkdown(element.innerHTML); + // Single element - convert to markdown with style context + const { markdown, styleContext } = markdownConverter.htmlToMarkdownWithContext(element.innerHTML, element); + this.styleContext = styleContext; + return markdown; } else { - // Multiple elements - use group extraction - return markdownConverter.extractGroupMarkdown(this.elements); + // Multiple elements - use group extraction with style context + const { markdown, styleContext } = markdownConverter.extractGroupMarkdownWithContext(this.elements); + this.styleContext = styleContext; + return markdown; } } /** - * Apply content to elements from markdown/object + * Apply content to elements from markdown/object with style preservation */ applyContent(content) { if (this.elements.length === 1) { @@ -409,19 +418,24 @@ class EditContext { // Handle links specially if (element.tagName.toLowerCase() === 'a' && typeof content === 'object') { - element.innerHTML = markdownConverter.markdownToHtml(content.text || ''); + const html = this.styleContext ? + markdownConverter.markdownToHtmlWithStyles(content.text || '', this.styleContext) : + markdownConverter.markdownToHtml(content.text || ''); + element.innerHTML = html; if (content.url) { element.href = content.url; } return; } - // Single element - convert markdown to HTML - const html = markdownConverter.markdownToHtml(content); + // Single element - convert markdown to HTML with style restoration + const html = this.styleContext ? + markdownConverter.markdownToHtmlWithStyles(content, this.styleContext) : + markdownConverter.markdownToHtml(content); element.innerHTML = html; } else { - // Multiple elements - use group update - markdownConverter.updateGroupElements(this.elements, content); + // Multiple elements - use group update with style preservation + markdownConverter.updateGroupElementsWithStyles(this.elements, content, this.styleContext); } } diff --git a/lib/src/utils/markdown.js b/lib/src/utils/markdown.js index 1fa4538..9229c64 100644 --- a/lib/src/utils/markdown.js +++ b/lib/src/utils/markdown.js @@ -1,8 +1,9 @@ /** - * Markdown conversion utilities using Marked and Turndown + * Markdown conversion utilities using Marked and Turndown with Style Preservation */ import { marked } from 'marked'; import TurndownService from 'turndown'; +import { StyleContext } from './style-context.js'; /** * MarkdownConverter - Handles bidirectional HTML ↔ Markdown conversion @@ -11,6 +12,7 @@ export class MarkdownConverter { constructor() { this.initializeMarked(); this.initializeTurndown(); + this.styleContext = new StyleContext(); } /** @@ -186,6 +188,37 @@ export class MarkdownConverter { } } + /** + * Convert HTML to Markdown with style context preservation + * @param {string} html - HTML string to convert + * @param {HTMLElement} originalElement - Original DOM element for context + * @returns {Object} - Object containing markdown and style context + */ + htmlToMarkdownWithContext(html, originalElement = null) { + if (!html || html.trim() === '') { + return { markdown: '', styleContext: null }; + } + + let styleContext = null; + + // Extract style context if original element provided + if (originalElement) { + styleContext = this.styleContext.extractStyleContext(originalElement); + } else { + // Create temporary element to analyze + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + styleContext = this.styleContext.extractStyleContext(tempDiv); + } + + const markdown = this.htmlToMarkdown(html); + + return { + markdown, + styleContext: styleContext.hasPreservableContent ? styleContext : null + }; + } + /** * Convert Markdown to HTML * @param {string} markdown - Markdown string to convert @@ -210,6 +243,51 @@ export class MarkdownConverter { } } + /** + * Convert Markdown to HTML with style context restoration + * @param {string} markdown - Markdown string to convert + * @param {Object} styleContext - Style context to restore + * @returns {string} - HTML string with styles restored + */ + markdownToHtmlWithStyles(markdown, styleContext) { + if (!markdown || markdown.trim() === '') { + return ''; + } + + // Convert markdown to basic HTML first + const basicHtml = this.markdownToHtml(markdown); + + // If no style context, return basic HTML + if (!styleContext || !this.styleContext.validateContext(styleContext)) { + return basicHtml; + } + + // Apply style context to the converted HTML + return this.applyStyleContextToHtml(basicHtml, styleContext); + } + + /** + * Apply style context to HTML string + * @param {string} html - HTML string to enhance + * @param {Object} styleContext - Style context to apply + * @returns {string} - Enhanced HTML with styles applied + */ + applyStyleContextToHtml(html, styleContext) { + try { + // Create temporary container + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + // Apply style context + this.styleContext.applyStyleContext(tempDiv, styleContext); + + return tempDiv.innerHTML; + } catch (error) { + console.warn('Failed to apply style context:', error); + return html; // Return original HTML on error + } + } + /** * Extract HTML content from a group of elements * @param {HTMLElement[]} elements - Array of DOM elements @@ -235,6 +313,73 @@ export class MarkdownConverter { return htmlParts.join('\n'); } + /** + * Extract HTML content with style context from a group of elements + * @param {HTMLElement[]} elements - Array of DOM elements + * @returns {Object} - Object with HTML content and combined style context + */ + extractGroupHTMLWithContext(elements) { + const htmlParts = []; + const allStyleContexts = []; + + elements.forEach((element, index) => { + // Extract style context for this element + const elementContext = this.styleContext.extractStyleContext(element); + if (elementContext.hasPreservableContent) { + allStyleContexts.push({ + index, + context: elementContext + }); + } + + // Extract HTML content + const html = element.innerHTML.trim(); + if (html) { + if (element.tagName.toLowerCase() === 'p') { + htmlParts.push(element.outerHTML); + } else { + htmlParts.push(`

${html}

`); + } + } + }); + + // Combine all style contexts + const combinedContext = this.combineStyleContexts(allStyleContexts); + + return { + html: htmlParts.join('\n'), + styleContext: combinedContext + }; + } + + /** + * Combine multiple style contexts into a single context + * @param {Array} styleContexts - Array of style contexts with index info + * @returns {Object} - Combined style context + */ + combineStyleContexts(styleContexts) { + if (styleContexts.length === 0) { + return null; + } + + const combinedMap = new Map(); + let hasContent = false; + + styleContexts.forEach(({ index, context }) => { + // Adjust paths to include element index + for (const [path, elementInfo] of context.elementMap) { + const adjustedPath = `${index}.${path}`; + combinedMap.set(adjustedPath, elementInfo); + hasContent = true; + } + }); + + return hasContent ? { + elementMap: combinedMap, + hasPreservableContent: true + } : null; + } + /** * Convert HTML content from group elements to markdown * @param {HTMLElement[]} elements - Array of DOM elements @@ -246,6 +391,21 @@ export class MarkdownConverter { return markdown; } + /** + * Convert HTML content from group elements to markdown with style context + * @param {HTMLElement[]} elements - Array of DOM elements + * @returns {Object} - Object with markdown and style context + */ + extractGroupMarkdownWithContext(elements) { + const { html, styleContext } = this.extractGroupHTMLWithContext(elements); + const markdown = this.htmlToMarkdown(html); + + return { + markdown, + styleContext + }; + } + /** * Update group elements with markdown content * @param {HTMLElement[]} elements - Array of DOM elements to update @@ -282,6 +442,47 @@ export class MarkdownConverter { } } } + + /** + * Update group elements with markdown content and style context + * @param {HTMLElement[]} elements - Array of DOM elements to update + * @param {string} markdown - Markdown content to render + * @param {Object} styleContext - Style context to apply + */ + updateGroupElementsWithStyles(elements, markdown, styleContext) { + // Convert markdown to HTML with styles + const html = styleContext ? + this.markdownToHtmlWithStyles(markdown, styleContext) : + this.markdownToHtml(markdown); + + // Split HTML into paragraphs + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + const paragraphs = Array.from(tempDiv.querySelectorAll('p, div, h1, h2, h3, h4, h5, h6')); + + // Handle case where we have more/fewer paragraphs than elements + const maxCount = Math.max(elements.length, paragraphs.length); + + for (let i = 0; i < maxCount; i++) { + if (i < elements.length && i < paragraphs.length) { + // Update existing element with corresponding paragraph + elements[i].innerHTML = paragraphs[i].innerHTML; + } else if (i < elements.length) { + // More elements than paragraphs - clear extra elements + elements[i].innerHTML = ''; + } else if (i < paragraphs.length) { + // More paragraphs than elements - create new element + const newElement = document.createElement('p'); + newElement.innerHTML = paragraphs[i].innerHTML; + + // Insert after the last existing element + const lastElement = elements[elements.length - 1]; + lastElement.parentNode.insertBefore(newElement, lastElement.nextSibling); + elements.push(newElement); // Add to our elements array for future updates + } + } + } } // Export singleton instance diff --git a/lib/src/utils/style-context.js b/lib/src/utils/style-context.js new file mode 100644 index 0000000..1cac7b9 --- /dev/null +++ b/lib/src/utils/style-context.js @@ -0,0 +1,238 @@ +/** + * Style Context Extraction System for Insertr + * + * Analyzes HTML elements to extract styling context for preservation + * during markdown editing. Focuses on attributes, classes, and inline styles + * that should be preserved when content is converted to/from markdown. + */ + +export class StyleContext { + constructor() { + this.preservedAttributes = new Set([ + 'class', 'id', 'rel', 'target', 'title', 'alt', 'href', + 'src', 'data-*', 'aria-*', 'role', 'tabindex' + ]); + } + + /** + * Extract complete style context from an HTML element + * @param {HTMLElement} element - The element to analyze + * @returns {Object} Style context with element map and metadata + */ + extractStyleContext(element) { + const context = { + elementMap: new Map(), + rootElement: this.cloneElementStructure(element), + hasPreservableContent: false + }; + + this.analyzeElement(element, context, []); + return context; + } + + /** + * Recursively analyze element and its children for style preservation + * @param {HTMLElement} element - Current element + * @param {Object} context - Style context being built + * @param {Array} path - Path to current element + */ + analyzeElement(element, context, path) { + const elementInfo = this.extractElementInfo(element); + + if (elementInfo.hasPreservableAttributes) { + context.hasPreservableContent = true; + context.elementMap.set(path.join('.'), elementInfo); + } + + // Analyze children one level deep for now + Array.from(element.children).forEach((child, index) => { + const childPath = [...path, index.toString()]; + this.analyzeElement(child, context, childPath); + }); + } + + /** + * Extract styling information from a single element + * @param {HTMLElement} element - Element to analyze + * @returns {Object} Element style information + */ + extractElementInfo(element) { + const tagName = element.tagName.toLowerCase(); + const attributes = this.extractAttributes(element); + const hasPreservableAttributes = Object.keys(attributes).length > 0; + + return { + tagName, + attributes, + hasPreservableAttributes, + textContent: this.getDirectTextContent(element), + hasChildren: element.children.length > 0 + }; + } + + /** + * Extract relevant attributes from an element + * @param {HTMLElement} element - Element to extract attributes from + * @returns {Object} Filtered attributes object + */ + extractAttributes(element) { + const attributes = {}; + + for (const attr of element.attributes) { + const name = attr.name.toLowerCase(); + + // Include if it's in our preserved set or matches a pattern + if (this.shouldPreserveAttribute(name)) { + attributes[name] = attr.value; + } + } + + return attributes; + } + + /** + * Check if an attribute should be preserved + * @param {string} attributeName - Name of the attribute + * @returns {boolean} Whether to preserve this attribute + */ + shouldPreserveAttribute(attributeName) { + // Direct matches + if (this.preservedAttributes.has(attributeName)) { + return true; + } + + // Pattern matches (data-*, aria-*) + return attributeName.startsWith('data-') || + attributeName.startsWith('aria-'); + } + + /** + * Get only the direct text content of an element (not from children) + * @param {HTMLElement} element - Element to get text from + * @returns {string} Direct text content + */ + getDirectTextContent(element) { + let text = ''; + for (const node of element.childNodes) { + if (node.nodeType === Node.TEXT_NODE) { + text += node.textContent; + } + } + return text.trim(); + } + + /** + * Create a structural clone of an element (attributes only, no content) + * @param {HTMLElement} element - Element to clone structure of + * @returns {Object} Cloned structure + */ + cloneElementStructure(element) { + return { + tagName: element.tagName.toLowerCase(), + attributes: this.extractAttributes(element), + children: Array.from(element.children).map(child => + this.cloneElementStructure(child) + ) + }; + } + + /** + * Apply style context back to an HTML element + * @param {HTMLElement} element - Element to apply styles to + * @param {Object} context - Style context to apply + * @param {Array} path - Current path in the element tree + */ + applyStyleContext(element, context, path = []) { + const pathKey = path.join('.'); + const elementInfo = context.elementMap.get(pathKey); + + if (elementInfo) { + this.applyAttributes(element, elementInfo.attributes); + } + + // Apply to children + Array.from(element.children).forEach((child, index) => { + const childPath = [...path, index.toString()]; + this.applyStyleContext(child, context, childPath); + }); + } + + /** + * Apply attributes to an element + * @param {HTMLElement} element - Element to apply attributes to + * @param {Object} attributes - Attributes to apply + */ + applyAttributes(element, attributes) { + for (const [name, value] of Object.entries(attributes)) { + element.setAttribute(name, value); + } + } + + /** + * Generate markdown formatting options based on detected styles + * @param {Object} context - Style context + * @returns {Object} Formatting options for markdown conversion + */ + generateFormattingOptions(context) { + const options = { + preserveLinks: true, + preserveStrong: true, + preserveEmphasis: true, + customElements: new Map() + }; + + // Analyze element map to detect patterns + for (const [path, elementInfo] of context.elementMap) { + if (elementInfo.tagName === 'a' && elementInfo.attributes.class) { + options.customElements.set('link', { + tagName: 'a', + attributes: elementInfo.attributes + }); + } + + if (elementInfo.tagName === 'strong' && elementInfo.attributes.class) { + options.customElements.set('strong', { + tagName: 'strong', + attributes: elementInfo.attributes + }); + } + + if (elementInfo.tagName === 'span' && elementInfo.attributes.class) { + options.customElements.set('span', { + tagName: 'span', + attributes: elementInfo.attributes + }); + } + } + + return options; + } + + /** + * Validate that style context can be safely applied + * @param {Object} context - Style context to validate + * @returns {boolean} Whether context is valid and safe + */ + validateContext(context) { + if (!context || !context.elementMap) { + return false; + } + + // Check for potentially dangerous attributes + for (const [path, elementInfo] of context.elementMap) { + for (const [attr, value] of Object.entries(elementInfo.attributes)) { + // Block script-related attributes for security + if (attr.toLowerCase().startsWith('on') || + attr.toLowerCase() === 'javascript' || + (typeof value === 'string' && value.includes('javascript:'))) { + console.warn(`Blocking potentially dangerous attribute: ${attr}="${value}"`); + return false; + } + } + } + + return true; + } +} + +export default StyleContext; \ No newline at end of file