/** * HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure * * Handles the storage and application of HTML content while maintaining: * - All element attributes (classes, IDs, data-*, etc.) * - Nested styled element structure * - Developer-defined styling context * * This replaces the lossy markdown conversion system with perfect fidelity HTML operations. */ export class HTMLPreservationEngine { constructor() { this.allowedTags = new Set([ // Text formatting 'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var', // Links and interactive 'a', 'button', // Structure 'p', 'div', 'section', 'article', 'header', 'footer', 'nav', // Lists 'ul', 'ol', 'li', 'dl', 'dt', 'dd', // Headings 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', // Media 'img', 'figure', 'figcaption', // Quotes and citations 'blockquote', 'cite', 'q', // Tables 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', // Inline elements 'small', 'sub', 'sup', 'mark', 'del', 'ins', // Icons and symbols 'i' // Often used for icons ]); this.allowedAttributes = new Set([ // Universal attributes 'class', 'id', 'title', 'lang', 'dir', // Data attributes (all data-* allowed) // ARIA attributes (all aria-* allowed) // Link attributes 'href', 'rel', 'target', 'download', // Media attributes 'src', 'alt', 'width', 'height', // Form attributes 'type', 'value', 'placeholder', 'disabled', 'readonly', // Table attributes 'colspan', 'rowspan', 'scope', // Other semantic attributes 'datetime', 'cite' ]); } /** * Extract content while preserving structure for editing * * @param {HTMLElement} element - The .insertr element to extract content from * @returns {Object} - Extracted content with preservation metadata */ extractForEditing(element) { return { // Complete HTML content for rich editing html: element.innerHTML, // Plain text for simple editing fallback text: this.extractPlainTextWithStructure(element), // Element's own attributes (never modified by content editing) containerAttributes: this.extractElementAttributes(element), // Original state for restoration if needed originalHTML: element.innerHTML, // Metadata for validation elementTag: element.tagName.toLowerCase(), hasNestedElements: element.children.length > 0 }; } /** * Apply edited content while preserving structure and validating safety * * @param {HTMLElement} element - Target element to update * @param {string} newHTML - New HTML content from editor * @returns {boolean} - Success status */ applyFromEditing(element, newHTML) { try { // Validate HTML structure and safety const validatedHTML = this.validateAndSanitizeHTML(newHTML); // Apply validated content element.innerHTML = validatedHTML; // Element's own attributes are never modified // (classes, IDs on the .insertr element itself are preserved) return true; } catch (error) { console.error('Failed to apply HTML content:', error); return false; } } /** * Validate and sanitize HTML to ensure safety and structure preservation * * @param {string} html - HTML to validate * @returns {string} - Sanitized HTML */ validateAndSanitizeHTML(html) { // Create temporary container for parsing const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; // Recursively validate and clean this.sanitizeElement(tempDiv); return tempDiv.innerHTML; } /** * Recursively sanitize element and its children * * @param {HTMLElement} element - Element to sanitize */ sanitizeElement(element) { // Check all child elements const children = Array.from(element.children); for (const child of children) { // Check if tag is allowed if (!this.allowedTags.has(child.tagName.toLowerCase())) { // Remove disallowed tags but preserve content const textContent = child.textContent; const textNode = document.createTextNode(textContent); child.parentNode.replaceChild(textNode, child); continue; } // Sanitize attributes this.sanitizeAttributes(child); // Recursively sanitize children this.sanitizeElement(child); } } /** * Sanitize element attributes, removing dangerous ones * * @param {HTMLElement} element - Element to sanitize attributes for */ sanitizeAttributes(element) { const attributesToRemove = []; for (const attr of element.attributes) { const attrName = attr.name.toLowerCase(); // Always allow data-* and aria-* attributes if (attrName.startsWith('data-') || attrName.startsWith('aria-')) { continue; } // Check if attribute is in allowed list if (!this.allowedAttributes.has(attrName)) { attributesToRemove.push(attrName); continue; } // Sanitize attribute values for security if (attrName === 'href') { const href = attr.value.toLowerCase().trim(); // Allow relative URLs, http/https, mailto, tel if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) { attributesToRemove.push(attrName); } } } // Remove invalid attributes attributesToRemove.forEach(attrName => { element.removeAttribute(attrName); }); } /** * Extract plain text while preserving some structural information * Used for simple editing interfaces * * @param {HTMLElement} element - Element to extract text from * @returns {string} - Plain text with preserved structure */ extractPlainTextWithStructure(element) { // For simple elements, just return textContent if (element.children.length === 0) { return element.textContent; } // For complex elements, preserve some structure let text = ''; for (const node of element.childNodes) { if (node.nodeType === Node.TEXT_NODE) { text += node.textContent; } else if (node.nodeType === Node.ELEMENT_NODE) { // Add the text content of nested elements text += node.textContent; } } return text.trim(); } /** * Extract all attributes from element for preservation * * @param {HTMLElement} element - Element to extract attributes from * @returns {Object} - Attributes object */ extractElementAttributes(element) { const attributes = {}; for (const attr of element.attributes) { attributes[attr.name] = attr.value; } return attributes; } /** * Restore element attributes (used for element-level preservation) * * @param {HTMLElement} element - Element to restore attributes to * @param {Object} attributes - Attributes to restore */ restoreElementAttributes(element, attributes) { // Clear existing attributes (except core ones) const existingAttrs = Array.from(element.attributes); existingAttrs.forEach(attr => { if (attr.name !== 'contenteditable') { // Preserve editing state element.removeAttribute(attr.name); } }); // Restore saved attributes Object.entries(attributes).forEach(([name, value]) => { element.setAttribute(name, value); }); } /** * Check if HTML content is safe and maintains expected structure * * @param {string} html - HTML to validate * @returns {boolean} - True if HTML is valid and safe */ isValidHTML(html) { try { const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; // Check for script tags or other dangerous elements if (tempDiv.querySelector('script, object, embed, iframe')) { return false; } return true; } catch (error) { return false; } } /** * Create a safe copy of HTML content for editing * * @param {string} html - Original HTML * @returns {string} - Safe copy for editing */ createEditableCopy(html) { const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; // Remove any potentially dangerous attributes const allElements = tempDiv.querySelectorAll('*'); allElements.forEach(element => { this.sanitizeAttributes(element); }); return tempDiv.innerHTML; } /** * Merge edited content back while preserving specific styled elements * Used for complex editing scenarios where certain elements must be preserved * * @param {string} originalHTML - Original HTML content * @param {string} editedHTML - Edited HTML content * @param {Array} preserveSelectors - CSS selectors for elements to preserve * @returns {string} - Merged HTML with preserved elements */ mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) { if (preserveSelectors.length === 0) { return editedHTML; } const originalDiv = document.createElement('div'); originalDiv.innerHTML = originalHTML; const editedDiv = document.createElement('div'); editedDiv.innerHTML = editedHTML; // Preserve specific elements from original preserveSelectors.forEach(selector => { const originalElements = originalDiv.querySelectorAll(selector); const editedElements = editedDiv.querySelectorAll(selector); // Replace edited elements with original preserved ones originalElements.forEach((originalEl, index) => { if (editedElements[index]) { editedElements[index].replaceWith(originalEl.cloneNode(true)); } }); }); return editedDiv.innerHTML; } /** * Convert HTML content to safe editing format * Ensures content can be safely edited without losing essential structure * * @param {HTMLElement} element - Element containing content to prepare * @returns {Object} - Prepared content for editing */ prepareForEditing(element) { const extracted = this.extractForEditing(element); // Create safe editable copy const editableHTML = this.createEditableCopy(extracted.html); return { ...extracted, editableHTML: editableHTML, isComplex: extracted.hasNestedElements }; } /** * Finalize edited content and apply to element * Handles validation, sanitization, and safe application * * @param {HTMLElement} element - Target element * @param {Object} editedContent - Content from editor * @returns {boolean} - Success status */ finalizeEditing(element, editedContent) { try { // Determine content type and apply appropriately if (typeof editedContent === 'string') { // Simple text or HTML string return this.applyFromEditing(element, editedContent); } else if (editedContent.html) { // Rich content object return this.applyFromEditing(element, editedContent.html); } return false; } catch (error) { console.error('Failed to finalize editing:', error); return false; } } } // Export singleton instance export const htmlPreservationEngine = new HTMLPreservationEngine();