Files
insertr/lib/src/utils/html-preservation.js
Joakim 67f9f242b5 feat: Implement complete style detection and preservation foundation
- Add StyleDetectionEngine with one-layer-deep nested element analysis
- Add HTMLPreservationEngine for direct HTML manipulation without lossy conversion
- Implement structure-preserving content parsing that maintains element positions
- Add multi-property element support for links (href + content), images (src + alt), buttons
- Create comprehensive test suite with real DOM element validation
- Replace markdown-based system foundation with HTML-first architecture
- Preserve all element attributes (classes, IDs, data-*, aria-*) during editing
- Generate human-readable style names from detected nested elements
- Support template extraction with multiple insertion points for complex elements

Foundation complete for Phase 2 style-aware editor interface per CLASSES.md specification.
2025-09-19 19:33:56 +02:00

372 lines
13 KiB
JavaScript

/**
* HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure
*
* Handles the storage and application of HTML content while maintaining:
* - All element attributes (classes, IDs, data-*, etc.)
* - Nested styled element structure
* - Developer-defined styling context
*
* This replaces the lossy markdown conversion system with perfect fidelity HTML operations.
*/
export class HTMLPreservationEngine {
constructor() {
this.allowedTags = new Set([
// Text formatting
'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var',
// Links and interactive
'a', 'button',
// Structure
'p', 'div', 'section', 'article', 'header', 'footer', 'nav',
// Lists
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
// Headings
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
// Media
'img', 'figure', 'figcaption',
// Quotes and citations
'blockquote', 'cite', 'q',
// Tables
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
// Inline elements
'small', 'sub', 'sup', 'mark', 'del', 'ins',
// Icons and symbols
'i' // Often used for icons
]);
this.allowedAttributes = new Set([
// Universal attributes
'class', 'id', 'title', 'lang', 'dir',
// Data attributes (all data-* allowed)
// ARIA attributes (all aria-* allowed)
// Link attributes
'href', 'rel', 'target', 'download',
// Media attributes
'src', 'alt', 'width', 'height',
// Form attributes
'type', 'value', 'placeholder', 'disabled', 'readonly',
// Table attributes
'colspan', 'rowspan', 'scope',
// Other semantic attributes
'datetime', 'cite'
]);
}
/**
* Extract content while preserving structure for editing
*
* @param {HTMLElement} element - The .insertr element to extract content from
* @returns {Object} - Extracted content with preservation metadata
*/
extractForEditing(element) {
return {
// Complete HTML content for rich editing
html: element.innerHTML,
// Plain text for simple editing fallback
text: this.extractPlainTextWithStructure(element),
// Element's own attributes (never modified by content editing)
containerAttributes: this.extractElementAttributes(element),
// Original state for restoration if needed
originalHTML: element.innerHTML,
// Metadata for validation
elementTag: element.tagName.toLowerCase(),
hasNestedElements: element.children.length > 0
};
}
/**
* Apply edited content while preserving structure and validating safety
*
* @param {HTMLElement} element - Target element to update
* @param {string} newHTML - New HTML content from editor
* @returns {boolean} - Success status
*/
applyFromEditing(element, newHTML) {
try {
// Validate HTML structure and safety
const validatedHTML = this.validateAndSanitizeHTML(newHTML);
// Apply validated content
element.innerHTML = validatedHTML;
// Element's own attributes are never modified
// (classes, IDs on the .insertr element itself are preserved)
return true;
} catch (error) {
console.error('Failed to apply HTML content:', error);
return false;
}
}
/**
* Validate and sanitize HTML to ensure safety and structure preservation
*
* @param {string} html - HTML to validate
* @returns {string} - Sanitized HTML
*/
validateAndSanitizeHTML(html) {
// Create temporary container for parsing
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
// Recursively validate and clean
this.sanitizeElement(tempDiv);
return tempDiv.innerHTML;
}
/**
* Recursively sanitize element and its children
*
* @param {HTMLElement} element - Element to sanitize
*/
sanitizeElement(element) {
// Check all child elements
const children = Array.from(element.children);
for (const child of children) {
// Check if tag is allowed
if (!this.allowedTags.has(child.tagName.toLowerCase())) {
// Remove disallowed tags but preserve content
const textContent = child.textContent;
const textNode = document.createTextNode(textContent);
child.parentNode.replaceChild(textNode, child);
continue;
}
// Sanitize attributes
this.sanitizeAttributes(child);
// Recursively sanitize children
this.sanitizeElement(child);
}
}
/**
* Sanitize element attributes, removing dangerous ones
*
* @param {HTMLElement} element - Element to sanitize attributes for
*/
sanitizeAttributes(element) {
const attributesToRemove = [];
for (const attr of element.attributes) {
const attrName = attr.name.toLowerCase();
// Always allow data-* and aria-* attributes
if (attrName.startsWith('data-') || attrName.startsWith('aria-')) {
continue;
}
// Check if attribute is in allowed list
if (!this.allowedAttributes.has(attrName)) {
attributesToRemove.push(attrName);
continue;
}
// Sanitize attribute values for security
if (attrName === 'href') {
const href = attr.value.toLowerCase().trim();
// Allow relative URLs, http/https, mailto, tel
if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) {
attributesToRemove.push(attrName);
}
}
}
// Remove invalid attributes
attributesToRemove.forEach(attrName => {
element.removeAttribute(attrName);
});
}
/**
* Extract plain text while preserving some structural information
* Used for simple editing interfaces
*
* @param {HTMLElement} element - Element to extract text from
* @returns {string} - Plain text with preserved structure
*/
extractPlainTextWithStructure(element) {
// For simple elements, just return textContent
if (element.children.length === 0) {
return element.textContent;
}
// For complex elements, preserve some structure
let text = '';
for (const node of element.childNodes) {
if (node.nodeType === Node.TEXT_NODE) {
text += node.textContent;
} else if (node.nodeType === Node.ELEMENT_NODE) {
// Add the text content of nested elements
text += node.textContent;
}
}
return text.trim();
}
/**
* Extract all attributes from element for preservation
*
* @param {HTMLElement} element - Element to extract attributes from
* @returns {Object} - Attributes object
*/
extractElementAttributes(element) {
const attributes = {};
for (const attr of element.attributes) {
attributes[attr.name] = attr.value;
}
return attributes;
}
/**
* Restore element attributes (used for element-level preservation)
*
* @param {HTMLElement} element - Element to restore attributes to
* @param {Object} attributes - Attributes to restore
*/
restoreElementAttributes(element, attributes) {
// Clear existing attributes (except core ones)
const existingAttrs = Array.from(element.attributes);
existingAttrs.forEach(attr => {
if (attr.name !== 'contenteditable') { // Preserve editing state
element.removeAttribute(attr.name);
}
});
// Restore saved attributes
Object.entries(attributes).forEach(([name, value]) => {
element.setAttribute(name, value);
});
}
/**
* Check if HTML content is safe and maintains expected structure
*
* @param {string} html - HTML to validate
* @returns {boolean} - True if HTML is valid and safe
*/
isValidHTML(html) {
try {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
// Check for script tags or other dangerous elements
if (tempDiv.querySelector('script, object, embed, iframe')) {
return false;
}
return true;
} catch (error) {
return false;
}
}
/**
* Create a safe copy of HTML content for editing
*
* @param {string} html - Original HTML
* @returns {string} - Safe copy for editing
*/
createEditableCopy(html) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
// Remove any potentially dangerous attributes
const allElements = tempDiv.querySelectorAll('*');
allElements.forEach(element => {
this.sanitizeAttributes(element);
});
return tempDiv.innerHTML;
}
/**
* Merge edited content back while preserving specific styled elements
* Used for complex editing scenarios where certain elements must be preserved
*
* @param {string} originalHTML - Original HTML content
* @param {string} editedHTML - Edited HTML content
* @param {Array} preserveSelectors - CSS selectors for elements to preserve
* @returns {string} - Merged HTML with preserved elements
*/
mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) {
if (preserveSelectors.length === 0) {
return editedHTML;
}
const originalDiv = document.createElement('div');
originalDiv.innerHTML = originalHTML;
const editedDiv = document.createElement('div');
editedDiv.innerHTML = editedHTML;
// Preserve specific elements from original
preserveSelectors.forEach(selector => {
const originalElements = originalDiv.querySelectorAll(selector);
const editedElements = editedDiv.querySelectorAll(selector);
// Replace edited elements with original preserved ones
originalElements.forEach((originalEl, index) => {
if (editedElements[index]) {
editedElements[index].replaceWith(originalEl.cloneNode(true));
}
});
});
return editedDiv.innerHTML;
}
/**
* Convert HTML content to safe editing format
* Ensures content can be safely edited without losing essential structure
*
* @param {HTMLElement} element - Element containing content to prepare
* @returns {Object} - Prepared content for editing
*/
prepareForEditing(element) {
const extracted = this.extractForEditing(element);
// Create safe editable copy
const editableHTML = this.createEditableCopy(extracted.html);
return {
...extracted,
editableHTML: editableHTML,
isComplex: extracted.hasNestedElements
};
}
/**
* Finalize edited content and apply to element
* Handles validation, sanitization, and safe application
*
* @param {HTMLElement} element - Target element
* @param {Object} editedContent - Content from editor
* @returns {boolean} - Success status
*/
finalizeEditing(element, editedContent) {
try {
// Determine content type and apply appropriately
if (typeof editedContent === 'string') {
// Simple text or HTML string
return this.applyFromEditing(element, editedContent);
} else if (editedContent.html) {
// Rich content object
return this.applyFromEditing(element, editedContent.html);
}
return false;
} catch (error) {
console.error('Failed to finalize editing:', error);
return false;
}
}
}
// Export singleton instance
export const htmlPreservationEngine = new HTMLPreservationEngine();