feat: Implement HTML-first style preservation system

- Add StyleContext class for extracting and applying HTML attributes/styles
- Enhance MarkdownConverter with style-aware conversion methods
- Switch backend storage from markdown to HTML with 'html' content type
- Update editor workflow to preserve CSS classes, IDs, and attributes
- Maintain markdown editing UX while storing HTML for style preservation
- Support complex attributes like rel, data-*, aria-*, etc.

This enables editing styled content like <a class="fancy" rel="me">text</a>
while preserving all styling attributes through the markdown editing process.
This commit is contained in:
2025-09-19 16:03:05 +02:00
parent 00c2ba34e6
commit b7998a4b3c
7 changed files with 498 additions and 26 deletions

View File

@@ -1,8 +1,9 @@
/**
* Markdown conversion utilities using Marked and Turndown
* Markdown conversion utilities using Marked and Turndown with Style Preservation
*/
import { marked } from 'marked';
import TurndownService from 'turndown';
import { StyleContext } from './style-context.js';
/**
* MarkdownConverter - Handles bidirectional HTML ↔ Markdown conversion
@@ -11,6 +12,7 @@ export class MarkdownConverter {
constructor() {
this.initializeMarked();
this.initializeTurndown();
this.styleContext = new StyleContext();
}
/**
@@ -186,6 +188,37 @@ export class MarkdownConverter {
}
}
/**
* Convert HTML to Markdown with style context preservation
* @param {string} html - HTML string to convert
* @param {HTMLElement} originalElement - Original DOM element for context
* @returns {Object} - Object containing markdown and style context
*/
htmlToMarkdownWithContext(html, originalElement = null) {
if (!html || html.trim() === '') {
return { markdown: '', styleContext: null };
}
let styleContext = null;
// Extract style context if original element provided
if (originalElement) {
styleContext = this.styleContext.extractStyleContext(originalElement);
} else {
// Create temporary element to analyze
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
styleContext = this.styleContext.extractStyleContext(tempDiv);
}
const markdown = this.htmlToMarkdown(html);
return {
markdown,
styleContext: styleContext.hasPreservableContent ? styleContext : null
};
}
/**
* Convert Markdown to HTML
* @param {string} markdown - Markdown string to convert
@@ -210,6 +243,51 @@ export class MarkdownConverter {
}
}
/**
* Convert Markdown to HTML with style context restoration
* @param {string} markdown - Markdown string to convert
* @param {Object} styleContext - Style context to restore
* @returns {string} - HTML string with styles restored
*/
markdownToHtmlWithStyles(markdown, styleContext) {
if (!markdown || markdown.trim() === '') {
return '';
}
// Convert markdown to basic HTML first
const basicHtml = this.markdownToHtml(markdown);
// If no style context, return basic HTML
if (!styleContext || !this.styleContext.validateContext(styleContext)) {
return basicHtml;
}
// Apply style context to the converted HTML
return this.applyStyleContextToHtml(basicHtml, styleContext);
}
/**
* Apply style context to HTML string
* @param {string} html - HTML string to enhance
* @param {Object} styleContext - Style context to apply
* @returns {string} - Enhanced HTML with styles applied
*/
applyStyleContextToHtml(html, styleContext) {
try {
// Create temporary container
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
// Apply style context
this.styleContext.applyStyleContext(tempDiv, styleContext);
return tempDiv.innerHTML;
} catch (error) {
console.warn('Failed to apply style context:', error);
return html; // Return original HTML on error
}
}
/**
* Extract HTML content from a group of elements
* @param {HTMLElement[]} elements - Array of DOM elements
@@ -235,6 +313,73 @@ export class MarkdownConverter {
return htmlParts.join('\n');
}
/**
* Extract HTML content with style context from a group of elements
* @param {HTMLElement[]} elements - Array of DOM elements
* @returns {Object} - Object with HTML content and combined style context
*/
extractGroupHTMLWithContext(elements) {
const htmlParts = [];
const allStyleContexts = [];
elements.forEach((element, index) => {
// Extract style context for this element
const elementContext = this.styleContext.extractStyleContext(element);
if (elementContext.hasPreservableContent) {
allStyleContexts.push({
index,
context: elementContext
});
}
// Extract HTML content
const html = element.innerHTML.trim();
if (html) {
if (element.tagName.toLowerCase() === 'p') {
htmlParts.push(element.outerHTML);
} else {
htmlParts.push(`<p>${html}</p>`);
}
}
});
// Combine all style contexts
const combinedContext = this.combineStyleContexts(allStyleContexts);
return {
html: htmlParts.join('\n'),
styleContext: combinedContext
};
}
/**
* Combine multiple style contexts into a single context
* @param {Array} styleContexts - Array of style contexts with index info
* @returns {Object} - Combined style context
*/
combineStyleContexts(styleContexts) {
if (styleContexts.length === 0) {
return null;
}
const combinedMap = new Map();
let hasContent = false;
styleContexts.forEach(({ index, context }) => {
// Adjust paths to include element index
for (const [path, elementInfo] of context.elementMap) {
const adjustedPath = `${index}.${path}`;
combinedMap.set(adjustedPath, elementInfo);
hasContent = true;
}
});
return hasContent ? {
elementMap: combinedMap,
hasPreservableContent: true
} : null;
}
/**
* Convert HTML content from group elements to markdown
* @param {HTMLElement[]} elements - Array of DOM elements
@@ -246,6 +391,21 @@ export class MarkdownConverter {
return markdown;
}
/**
* Convert HTML content from group elements to markdown with style context
* @param {HTMLElement[]} elements - Array of DOM elements
* @returns {Object} - Object with markdown and style context
*/
extractGroupMarkdownWithContext(elements) {
const { html, styleContext } = this.extractGroupHTMLWithContext(elements);
const markdown = this.htmlToMarkdown(html);
return {
markdown,
styleContext
};
}
/**
* Update group elements with markdown content
* @param {HTMLElement[]} elements - Array of DOM elements to update
@@ -282,6 +442,47 @@ export class MarkdownConverter {
}
}
}
/**
* Update group elements with markdown content and style context
* @param {HTMLElement[]} elements - Array of DOM elements to update
* @param {string} markdown - Markdown content to render
* @param {Object} styleContext - Style context to apply
*/
updateGroupElementsWithStyles(elements, markdown, styleContext) {
// Convert markdown to HTML with styles
const html = styleContext ?
this.markdownToHtmlWithStyles(markdown, styleContext) :
this.markdownToHtml(markdown);
// Split HTML into paragraphs
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
const paragraphs = Array.from(tempDiv.querySelectorAll('p, div, h1, h2, h3, h4, h5, h6'));
// Handle case where we have more/fewer paragraphs than elements
const maxCount = Math.max(elements.length, paragraphs.length);
for (let i = 0; i < maxCount; i++) {
if (i < elements.length && i < paragraphs.length) {
// Update existing element with corresponding paragraph
elements[i].innerHTML = paragraphs[i].innerHTML;
} else if (i < elements.length) {
// More elements than paragraphs - clear extra elements
elements[i].innerHTML = '';
} else if (i < paragraphs.length) {
// More paragraphs than elements - create new element
const newElement = document.createElement('p');
newElement.innerHTML = paragraphs[i].innerHTML;
// Insert after the last existing element
const lastElement = elements[elements.length - 1];
lastElement.parentNode.insertBefore(newElement, lastElement.nextSibling);
elements.push(newElement); // Add to our elements array for future updates
}
}
}
}
// Export singleton instance

View File

@@ -0,0 +1,238 @@
/**
* Style Context Extraction System for Insertr
*
* Analyzes HTML elements to extract styling context for preservation
* during markdown editing. Focuses on attributes, classes, and inline styles
* that should be preserved when content is converted to/from markdown.
*/
export class StyleContext {
constructor() {
this.preservedAttributes = new Set([
'class', 'id', 'rel', 'target', 'title', 'alt', 'href',
'src', 'data-*', 'aria-*', 'role', 'tabindex'
]);
}
/**
* Extract complete style context from an HTML element
* @param {HTMLElement} element - The element to analyze
* @returns {Object} Style context with element map and metadata
*/
extractStyleContext(element) {
const context = {
elementMap: new Map(),
rootElement: this.cloneElementStructure(element),
hasPreservableContent: false
};
this.analyzeElement(element, context, []);
return context;
}
/**
* Recursively analyze element and its children for style preservation
* @param {HTMLElement} element - Current element
* @param {Object} context - Style context being built
* @param {Array} path - Path to current element
*/
analyzeElement(element, context, path) {
const elementInfo = this.extractElementInfo(element);
if (elementInfo.hasPreservableAttributes) {
context.hasPreservableContent = true;
context.elementMap.set(path.join('.'), elementInfo);
}
// Analyze children one level deep for now
Array.from(element.children).forEach((child, index) => {
const childPath = [...path, index.toString()];
this.analyzeElement(child, context, childPath);
});
}
/**
* Extract styling information from a single element
* @param {HTMLElement} element - Element to analyze
* @returns {Object} Element style information
*/
extractElementInfo(element) {
const tagName = element.tagName.toLowerCase();
const attributes = this.extractAttributes(element);
const hasPreservableAttributes = Object.keys(attributes).length > 0;
return {
tagName,
attributes,
hasPreservableAttributes,
textContent: this.getDirectTextContent(element),
hasChildren: element.children.length > 0
};
}
/**
* Extract relevant attributes from an element
* @param {HTMLElement} element - Element to extract attributes from
* @returns {Object} Filtered attributes object
*/
extractAttributes(element) {
const attributes = {};
for (const attr of element.attributes) {
const name = attr.name.toLowerCase();
// Include if it's in our preserved set or matches a pattern
if (this.shouldPreserveAttribute(name)) {
attributes[name] = attr.value;
}
}
return attributes;
}
/**
* Check if an attribute should be preserved
* @param {string} attributeName - Name of the attribute
* @returns {boolean} Whether to preserve this attribute
*/
shouldPreserveAttribute(attributeName) {
// Direct matches
if (this.preservedAttributes.has(attributeName)) {
return true;
}
// Pattern matches (data-*, aria-*)
return attributeName.startsWith('data-') ||
attributeName.startsWith('aria-');
}
/**
* Get only the direct text content of an element (not from children)
* @param {HTMLElement} element - Element to get text from
* @returns {string} Direct text content
*/
getDirectTextContent(element) {
let text = '';
for (const node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
text += node.textContent;
}
}
return text.trim();
}
/**
* Create a structural clone of an element (attributes only, no content)
* @param {HTMLElement} element - Element to clone structure of
* @returns {Object} Cloned structure
*/
cloneElementStructure(element) {
return {
tagName: element.tagName.toLowerCase(),
attributes: this.extractAttributes(element),
children: Array.from(element.children).map(child =>
this.cloneElementStructure(child)
)
};
}
/**
* Apply style context back to an HTML element
* @param {HTMLElement} element - Element to apply styles to
* @param {Object} context - Style context to apply
* @param {Array} path - Current path in the element tree
*/
applyStyleContext(element, context, path = []) {
const pathKey = path.join('.');
const elementInfo = context.elementMap.get(pathKey);
if (elementInfo) {
this.applyAttributes(element, elementInfo.attributes);
}
// Apply to children
Array.from(element.children).forEach((child, index) => {
const childPath = [...path, index.toString()];
this.applyStyleContext(child, context, childPath);
});
}
/**
* Apply attributes to an element
* @param {HTMLElement} element - Element to apply attributes to
* @param {Object} attributes - Attributes to apply
*/
applyAttributes(element, attributes) {
for (const [name, value] of Object.entries(attributes)) {
element.setAttribute(name, value);
}
}
/**
* Generate markdown formatting options based on detected styles
* @param {Object} context - Style context
* @returns {Object} Formatting options for markdown conversion
*/
generateFormattingOptions(context) {
const options = {
preserveLinks: true,
preserveStrong: true,
preserveEmphasis: true,
customElements: new Map()
};
// Analyze element map to detect patterns
for (const [path, elementInfo] of context.elementMap) {
if (elementInfo.tagName === 'a' && elementInfo.attributes.class) {
options.customElements.set('link', {
tagName: 'a',
attributes: elementInfo.attributes
});
}
if (elementInfo.tagName === 'strong' && elementInfo.attributes.class) {
options.customElements.set('strong', {
tagName: 'strong',
attributes: elementInfo.attributes
});
}
if (elementInfo.tagName === 'span' && elementInfo.attributes.class) {
options.customElements.set('span', {
tagName: 'span',
attributes: elementInfo.attributes
});
}
}
return options;
}
/**
* Validate that style context can be safely applied
* @param {Object} context - Style context to validate
* @returns {boolean} Whether context is valid and safe
*/
validateContext(context) {
if (!context || !context.elementMap) {
return false;
}
// Check for potentially dangerous attributes
for (const [path, elementInfo] of context.elementMap) {
for (const [attr, value] of Object.entries(elementInfo.attributes)) {
// Block script-related attributes for security
if (attr.toLowerCase().startsWith('on') ||
attr.toLowerCase() === 'javascript' ||
(typeof value === 'string' && value.includes('javascript:'))) {
console.warn(`Blocking potentially dangerous attribute: ${attr}="${value}"`);
return false;
}
}
}
return true;
}
}
export default StyleContext;