feat: Implement complete style detection and preservation foundation

- Add StyleDetectionEngine with one-layer-deep nested element analysis - Add HTMLPreservationEngine for direct HTML manipulation without lossy conversion - Implement structure-preserving content parsing that maintains element positions - Add multi-property element support for links (href + content), images (src + alt), buttons - Create comprehensive test suite with real DOM element validation - Replace markdown-based system foundation with HTML-first architecture - Preserve all element attributes (classes, IDs, data-*, aria-*) during editing - Generate human-readable style names from detected nested elements - Support template extraction with multiple insertion points for complex elements Foundation complete for Phase 2 style-aware editor interface per CLASSES.md specification.
2025-09-19 19:33:56 +02:00
parent 968e64a57e
commit 67f9f242b5
6 changed files with 2032 additions and 3 deletions
--- a/lib/src/utils/html-preservation.js
+++ b/lib/src/utils/html-preservation.js
@@ -0,0 +1,372 @@
+/**
+ * HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure
+ * 
+ * Handles the storage and application of HTML content while maintaining:
+ * - All element attributes (classes, IDs, data-*, etc.)
+ * - Nested styled element structure  
+ * - Developer-defined styling context
+ * 
+ * This replaces the lossy markdown conversion system with perfect fidelity HTML operations.
+ */
+export class HTMLPreservationEngine {
+    constructor() {
+        this.allowedTags = new Set([
+            // Text formatting
+            'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var',
+            // Links and interactive
+            'a', 'button',
+            // Structure
+            'p', 'div', 'section', 'article', 'header', 'footer', 'nav',
+            // Lists
+            'ul', 'ol', 'li', 'dl', 'dt', 'dd',
+            // Headings
+            'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
+            // Media
+            'img', 'figure', 'figcaption',
+            // Quotes and citations
+            'blockquote', 'cite', 'q',
+            // Tables
+            'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
+            // Inline elements
+            'small', 'sub', 'sup', 'mark', 'del', 'ins',
+            // Icons and symbols
+            'i' // Often used for icons
+        ]);
+        
+        this.allowedAttributes = new Set([
+            // Universal attributes
+            'class', 'id', 'title', 'lang', 'dir',
+            // Data attributes (all data-* allowed)
+            // ARIA attributes (all aria-* allowed)
+            // Link attributes
+            'href', 'rel', 'target', 'download',
+            // Media attributes
+            'src', 'alt', 'width', 'height',
+            // Form attributes
+            'type', 'value', 'placeholder', 'disabled', 'readonly',
+            // Table attributes
+            'colspan', 'rowspan', 'scope',
+            // Other semantic attributes
+            'datetime', 'cite'
+        ]);
+    }
+
+    /**
+     * Extract content while preserving structure for editing
+     * 
+     * @param {HTMLElement} element - The .insertr element to extract content from
+     * @returns {Object} - Extracted content with preservation metadata
+     */
+    extractForEditing(element) {
+        return {
+            // Complete HTML content for rich editing
+            html: element.innerHTML,
+            // Plain text for simple editing fallback
+            text: this.extractPlainTextWithStructure(element),
+            // Element's own attributes (never modified by content editing)
+            containerAttributes: this.extractElementAttributes(element),
+            // Original state for restoration if needed
+            originalHTML: element.innerHTML,
+            // Metadata for validation
+            elementTag: element.tagName.toLowerCase(),
+            hasNestedElements: element.children.length > 0
+        };
+    }
+
+    /**
+     * Apply edited content while preserving structure and validating safety
+     * 
+     * @param {HTMLElement} element - Target element to update
+     * @param {string} newHTML - New HTML content from editor
+     * @returns {boolean} - Success status
+     */
+    applyFromEditing(element, newHTML) {
+        try {
+            // Validate HTML structure and safety
+            const validatedHTML = this.validateAndSanitizeHTML(newHTML);
+            
+            // Apply validated content
+            element.innerHTML = validatedHTML;
+            
+            // Element's own attributes are never modified
+            // (classes, IDs on the .insertr element itself are preserved)
+            
+            return true;
+        } catch (error) {
+            console.error('Failed to apply HTML content:', error);
+            return false;
+        }
+    }
+
+    /**
+     * Validate and sanitize HTML to ensure safety and structure preservation
+     * 
+     * @param {string} html - HTML to validate
+     * @returns {string} - Sanitized HTML
+     */
+    validateAndSanitizeHTML(html) {
+        // Create temporary container for parsing
+        const tempDiv = document.createElement('div');
+        tempDiv.innerHTML = html;
+        
+        // Recursively validate and clean
+        this.sanitizeElement(tempDiv);
+        
+        return tempDiv.innerHTML;
+    }
+
+    /**
+     * Recursively sanitize element and its children
+     * 
+     * @param {HTMLElement} element - Element to sanitize
+     */
+    sanitizeElement(element) {
+        // Check all child elements
+        const children = Array.from(element.children);
+        
+        for (const child of children) {
+            // Check if tag is allowed
+            if (!this.allowedTags.has(child.tagName.toLowerCase())) {
+                // Remove disallowed tags but preserve content
+                const textContent = child.textContent;
+                const textNode = document.createTextNode(textContent);
+                child.parentNode.replaceChild(textNode, child);
+                continue;
+            }
+            
+            // Sanitize attributes
+            this.sanitizeAttributes(child);
+            
+            // Recursively sanitize children
+            this.sanitizeElement(child);
+        }
+    }
+
+    /**
+     * Sanitize element attributes, removing dangerous ones
+     * 
+     * @param {HTMLElement} element - Element to sanitize attributes for
+     */
+    sanitizeAttributes(element) {
+        const attributesToRemove = [];
+        
+        for (const attr of element.attributes) {
+            const attrName = attr.name.toLowerCase();
+            
+            // Always allow data-* and aria-* attributes
+            if (attrName.startsWith('data-') || attrName.startsWith('aria-')) {
+                continue;
+            }
+            
+            // Check if attribute is in allowed list
+            if (!this.allowedAttributes.has(attrName)) {
+                attributesToRemove.push(attrName);
+                continue;
+            }
+            
+            // Sanitize attribute values for security
+            if (attrName === 'href') {
+                const href = attr.value.toLowerCase().trim();
+                // Allow relative URLs, http/https, mailto, tel
+                if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) {
+                    attributesToRemove.push(attrName);
+                }
+            }
+        }
+        
+        // Remove invalid attributes
+        attributesToRemove.forEach(attrName => {
+            element.removeAttribute(attrName);
+        });
+    }
+
+    /**
+     * Extract plain text while preserving some structural information
+     * Used for simple editing interfaces
+     * 
+     * @param {HTMLElement} element - Element to extract text from  
+     * @returns {string} - Plain text with preserved structure
+     */
+    extractPlainTextWithStructure(element) {
+        // For simple elements, just return textContent
+        if (element.children.length === 0) {
+            return element.textContent;
+        }
+        
+        // For complex elements, preserve some structure
+        let text = '';
+        for (const node of element.childNodes) {
+            if (node.nodeType === Node.TEXT_NODE) {
+                text += node.textContent;
+            } else if (node.nodeType === Node.ELEMENT_NODE) {
+                // Add the text content of nested elements
+                text += node.textContent;
+            }
+        }
+        
+        return text.trim();
+    }
+
+    /**
+     * Extract all attributes from element for preservation
+     * 
+     * @param {HTMLElement} element - Element to extract attributes from
+     * @returns {Object} - Attributes object
+     */
+    extractElementAttributes(element) {
+        const attributes = {};
+        
+        for (const attr of element.attributes) {
+            attributes[attr.name] = attr.value;
+        }
+        
+        return attributes;
+    }
+
+    /**
+     * Restore element attributes (used for element-level preservation)
+     * 
+     * @param {HTMLElement} element - Element to restore attributes to
+     * @param {Object} attributes - Attributes to restore
+     */
+    restoreElementAttributes(element, attributes) {
+        // Clear existing attributes (except core ones)
+        const existingAttrs = Array.from(element.attributes);
+        existingAttrs.forEach(attr => {
+            if (attr.name !== 'contenteditable') { // Preserve editing state
+                element.removeAttribute(attr.name);
+            }
+        });
+        
+        // Restore saved attributes
+        Object.entries(attributes).forEach(([name, value]) => {
+            element.setAttribute(name, value);
+        });
+    }
+
+    /**
+     * Check if HTML content is safe and maintains expected structure
+     * 
+     * @param {string} html - HTML to validate
+     * @returns {boolean} - True if HTML is valid and safe
+     */
+    isValidHTML(html) {
+        try {
+            const tempDiv = document.createElement('div');
+            tempDiv.innerHTML = html;
+            
+            // Check for script tags or other dangerous elements
+            if (tempDiv.querySelector('script, object, embed, iframe')) {
+                return false;
+            }
+            
+            return true;
+        } catch (error) {
+            return false;
+        }
+    }
+
+    /**
+     * Create a safe copy of HTML content for editing
+     * 
+     * @param {string} html - Original HTML
+     * @returns {string} - Safe copy for editing
+     */
+    createEditableCopy(html) {
+        const tempDiv = document.createElement('div');
+        tempDiv.innerHTML = html;
+        
+        // Remove any potentially dangerous attributes
+        const allElements = tempDiv.querySelectorAll('*');
+        allElements.forEach(element => {
+            this.sanitizeAttributes(element);
+        });
+        
+        return tempDiv.innerHTML;
+    }
+
+    /**
+     * Merge edited content back while preserving specific styled elements
+     * Used for complex editing scenarios where certain elements must be preserved
+     * 
+     * @param {string} originalHTML - Original HTML content
+     * @param {string} editedHTML - Edited HTML content  
+     * @param {Array} preserveSelectors - CSS selectors for elements to preserve
+     * @returns {string} - Merged HTML with preserved elements
+     */
+    mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) {
+        if (preserveSelectors.length === 0) {
+            return editedHTML;
+        }
+        
+        const originalDiv = document.createElement('div');
+        originalDiv.innerHTML = originalHTML;
+        
+        const editedDiv = document.createElement('div');
+        editedDiv.innerHTML = editedHTML;
+        
+        // Preserve specific elements from original
+        preserveSelectors.forEach(selector => {
+            const originalElements = originalDiv.querySelectorAll(selector);
+            const editedElements = editedDiv.querySelectorAll(selector);
+            
+            // Replace edited elements with original preserved ones
+            originalElements.forEach((originalEl, index) => {
+                if (editedElements[index]) {
+                    editedElements[index].replaceWith(originalEl.cloneNode(true));
+                }
+            });
+        });
+        
+        return editedDiv.innerHTML;
+    }
+
+    /**
+     * Convert HTML content to safe editing format
+     * Ensures content can be safely edited without losing essential structure
+     * 
+     * @param {HTMLElement} element - Element containing content to prepare
+     * @returns {Object} - Prepared content for editing
+     */
+    prepareForEditing(element) {
+        const extracted = this.extractForEditing(element);
+        
+        // Create safe editable copy
+        const editableHTML = this.createEditableCopy(extracted.html);
+        
+        return {
+            ...extracted,
+            editableHTML: editableHTML,
+            isComplex: extracted.hasNestedElements
+        };
+    }
+
+    /**
+     * Finalize edited content and apply to element
+     * Handles validation, sanitization, and safe application
+     * 
+     * @param {HTMLElement} element - Target element
+     * @param {Object} editedContent - Content from editor
+     * @returns {boolean} - Success status
+     */
+    finalizeEditing(element, editedContent) {
+        try {
+            // Determine content type and apply appropriately
+            if (typeof editedContent === 'string') {
+                // Simple text or HTML string
+                return this.applyFromEditing(element, editedContent);
+            } else if (editedContent.html) {
+                // Rich content object
+                return this.applyFromEditing(element, editedContent.html);
+            }
+            
+            return false;
+        } catch (error) {
+            console.error('Failed to finalize editing:', error);
+            return false;
+        }
+    }
+}
+
+// Export singleton instance
+export const htmlPreservationEngine = new HTMLPreservationEngine();