diff --git a/lib/rollup.config.js b/lib/rollup.config.js index 9af8781..ada692c 100644 --- a/lib/rollup.config.js +++ b/lib/rollup.config.js @@ -11,7 +11,8 @@ export default [ output: { file: 'dist/insertr.js', format: 'iife', - name: 'Insertr' + name: 'Insertr', + exports: 'default' }, plugins: [ nodeResolve() @@ -23,11 +24,12 @@ export default [ output: { file: 'dist/insertr.min.js', format: 'iife', - name: 'Insertr' + name: 'Insertr', + exports: 'default' }, plugins: [ nodeResolve(), terser() ] } -]; \ No newline at end of file +]; diff --git a/lib/src/utils/html-preservation.js b/lib/src/utils/html-preservation.js new file mode 100644 index 0000000..8fed02c --- /dev/null +++ b/lib/src/utils/html-preservation.js @@ -0,0 +1,372 @@ +/** + * HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure + * + * Handles the storage and application of HTML content while maintaining: + * - All element attributes (classes, IDs, data-*, etc.) + * - Nested styled element structure + * - Developer-defined styling context + * + * This replaces the lossy markdown conversion system with perfect fidelity HTML operations. + */ +export class HTMLPreservationEngine { + constructor() { + this.allowedTags = new Set([ + // Text formatting + 'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var', + // Links and interactive + 'a', 'button', + // Structure + 'p', 'div', 'section', 'article', 'header', 'footer', 'nav', + // Lists + 'ul', 'ol', 'li', 'dl', 'dt', 'dd', + // Headings + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + // Media + 'img', 'figure', 'figcaption', + // Quotes and citations + 'blockquote', 'cite', 'q', + // Tables + 'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', + // Inline elements + 'small', 'sub', 'sup', 'mark', 'del', 'ins', + // Icons and symbols + 'i' // Often used for icons + ]); + + this.allowedAttributes = new Set([ + // Universal attributes + 'class', 'id', 'title', 'lang', 'dir', + // Data attributes (all data-* allowed) + // ARIA attributes (all aria-* allowed) + // Link attributes + 'href', 'rel', 'target', 'download', + // Media attributes + 'src', 'alt', 'width', 'height', + // Form attributes + 'type', 'value', 'placeholder', 'disabled', 'readonly', + // Table attributes + 'colspan', 'rowspan', 'scope', + // Other semantic attributes + 'datetime', 'cite' + ]); + } + + /** + * Extract content while preserving structure for editing + * + * @param {HTMLElement} element - The .insertr element to extract content from + * @returns {Object} - Extracted content with preservation metadata + */ + extractForEditing(element) { + return { + // Complete HTML content for rich editing + html: element.innerHTML, + // Plain text for simple editing fallback + text: this.extractPlainTextWithStructure(element), + // Element's own attributes (never modified by content editing) + containerAttributes: this.extractElementAttributes(element), + // Original state for restoration if needed + originalHTML: element.innerHTML, + // Metadata for validation + elementTag: element.tagName.toLowerCase(), + hasNestedElements: element.children.length > 0 + }; + } + + /** + * Apply edited content while preserving structure and validating safety + * + * @param {HTMLElement} element - Target element to update + * @param {string} newHTML - New HTML content from editor + * @returns {boolean} - Success status + */ + applyFromEditing(element, newHTML) { + try { + // Validate HTML structure and safety + const validatedHTML = this.validateAndSanitizeHTML(newHTML); + + // Apply validated content + element.innerHTML = validatedHTML; + + // Element's own attributes are never modified + // (classes, IDs on the .insertr element itself are preserved) + + return true; + } catch (error) { + console.error('Failed to apply HTML content:', error); + return false; + } + } + + /** + * Validate and sanitize HTML to ensure safety and structure preservation + * + * @param {string} html - HTML to validate + * @returns {string} - Sanitized HTML + */ + validateAndSanitizeHTML(html) { + // Create temporary container for parsing + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + // Recursively validate and clean + this.sanitizeElement(tempDiv); + + return tempDiv.innerHTML; + } + + /** + * Recursively sanitize element and its children + * + * @param {HTMLElement} element - Element to sanitize + */ + sanitizeElement(element) { + // Check all child elements + const children = Array.from(element.children); + + for (const child of children) { + // Check if tag is allowed + if (!this.allowedTags.has(child.tagName.toLowerCase())) { + // Remove disallowed tags but preserve content + const textContent = child.textContent; + const textNode = document.createTextNode(textContent); + child.parentNode.replaceChild(textNode, child); + continue; + } + + // Sanitize attributes + this.sanitizeAttributes(child); + + // Recursively sanitize children + this.sanitizeElement(child); + } + } + + /** + * Sanitize element attributes, removing dangerous ones + * + * @param {HTMLElement} element - Element to sanitize attributes for + */ + sanitizeAttributes(element) { + const attributesToRemove = []; + + for (const attr of element.attributes) { + const attrName = attr.name.toLowerCase(); + + // Always allow data-* and aria-* attributes + if (attrName.startsWith('data-') || attrName.startsWith('aria-')) { + continue; + } + + // Check if attribute is in allowed list + if (!this.allowedAttributes.has(attrName)) { + attributesToRemove.push(attrName); + continue; + } + + // Sanitize attribute values for security + if (attrName === 'href') { + const href = attr.value.toLowerCase().trim(); + // Allow relative URLs, http/https, mailto, tel + if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) { + attributesToRemove.push(attrName); + } + } + } + + // Remove invalid attributes + attributesToRemove.forEach(attrName => { + element.removeAttribute(attrName); + }); + } + + /** + * Extract plain text while preserving some structural information + * Used for simple editing interfaces + * + * @param {HTMLElement} element - Element to extract text from + * @returns {string} - Plain text with preserved structure + */ + extractPlainTextWithStructure(element) { + // For simple elements, just return textContent + if (element.children.length === 0) { + return element.textContent; + } + + // For complex elements, preserve some structure + let text = ''; + for (const node of element.childNodes) { + if (node.nodeType === Node.TEXT_NODE) { + text += node.textContent; + } else if (node.nodeType === Node.ELEMENT_NODE) { + // Add the text content of nested elements + text += node.textContent; + } + } + + return text.trim(); + } + + /** + * Extract all attributes from element for preservation + * + * @param {HTMLElement} element - Element to extract attributes from + * @returns {Object} - Attributes object + */ + extractElementAttributes(element) { + const attributes = {}; + + for (const attr of element.attributes) { + attributes[attr.name] = attr.value; + } + + return attributes; + } + + /** + * Restore element attributes (used for element-level preservation) + * + * @param {HTMLElement} element - Element to restore attributes to + * @param {Object} attributes - Attributes to restore + */ + restoreElementAttributes(element, attributes) { + // Clear existing attributes (except core ones) + const existingAttrs = Array.from(element.attributes); + existingAttrs.forEach(attr => { + if (attr.name !== 'contenteditable') { // Preserve editing state + element.removeAttribute(attr.name); + } + }); + + // Restore saved attributes + Object.entries(attributes).forEach(([name, value]) => { + element.setAttribute(name, value); + }); + } + + /** + * Check if HTML content is safe and maintains expected structure + * + * @param {string} html - HTML to validate + * @returns {boolean} - True if HTML is valid and safe + */ + isValidHTML(html) { + try { + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + // Check for script tags or other dangerous elements + if (tempDiv.querySelector('script, object, embed, iframe')) { + return false; + } + + return true; + } catch (error) { + return false; + } + } + + /** + * Create a safe copy of HTML content for editing + * + * @param {string} html - Original HTML + * @returns {string} - Safe copy for editing + */ + createEditableCopy(html) { + const tempDiv = document.createElement('div'); + tempDiv.innerHTML = html; + + // Remove any potentially dangerous attributes + const allElements = tempDiv.querySelectorAll('*'); + allElements.forEach(element => { + this.sanitizeAttributes(element); + }); + + return tempDiv.innerHTML; + } + + /** + * Merge edited content back while preserving specific styled elements + * Used for complex editing scenarios where certain elements must be preserved + * + * @param {string} originalHTML - Original HTML content + * @param {string} editedHTML - Edited HTML content + * @param {Array} preserveSelectors - CSS selectors for elements to preserve + * @returns {string} - Merged HTML with preserved elements + */ + mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) { + if (preserveSelectors.length === 0) { + return editedHTML; + } + + const originalDiv = document.createElement('div'); + originalDiv.innerHTML = originalHTML; + + const editedDiv = document.createElement('div'); + editedDiv.innerHTML = editedHTML; + + // Preserve specific elements from original + preserveSelectors.forEach(selector => { + const originalElements = originalDiv.querySelectorAll(selector); + const editedElements = editedDiv.querySelectorAll(selector); + + // Replace edited elements with original preserved ones + originalElements.forEach((originalEl, index) => { + if (editedElements[index]) { + editedElements[index].replaceWith(originalEl.cloneNode(true)); + } + }); + }); + + return editedDiv.innerHTML; + } + + /** + * Convert HTML content to safe editing format + * Ensures content can be safely edited without losing essential structure + * + * @param {HTMLElement} element - Element containing content to prepare + * @returns {Object} - Prepared content for editing + */ + prepareForEditing(element) { + const extracted = this.extractForEditing(element); + + // Create safe editable copy + const editableHTML = this.createEditableCopy(extracted.html); + + return { + ...extracted, + editableHTML: editableHTML, + isComplex: extracted.hasNestedElements + }; + } + + /** + * Finalize edited content and apply to element + * Handles validation, sanitization, and safe application + * + * @param {HTMLElement} element - Target element + * @param {Object} editedContent - Content from editor + * @returns {boolean} - Success status + */ + finalizeEditing(element, editedContent) { + try { + // Determine content type and apply appropriately + if (typeof editedContent === 'string') { + // Simple text or HTML string + return this.applyFromEditing(element, editedContent); + } else if (editedContent.html) { + // Rich content object + return this.applyFromEditing(element, editedContent.html); + } + + return false; + } catch (error) { + console.error('Failed to finalize editing:', error); + return false; + } + } +} + +// Export singleton instance +export const htmlPreservationEngine = new HTMLPreservationEngine(); \ No newline at end of file diff --git a/lib/src/utils/html-preservation.test.js b/lib/src/utils/html-preservation.test.js new file mode 100644 index 0000000..8c59623 --- /dev/null +++ b/lib/src/utils/html-preservation.test.js @@ -0,0 +1,348 @@ +/** + * Unit tests for HTMLPreservationEngine + * Tests HTML preservation, sanitization, and attribute maintenance + */ +import { HTMLPreservationEngine } from './html-preservation.js'; + +// Mock DOM environment for testing +const mockDocument = { + createElement: (tagName) => ({ + tagName: tagName.toUpperCase(), + innerHTML: '', + textContent: '', + children: [], + childNodes: [], + attributes: [], + classList: new Set(), + + // Mock methods + appendChild: function(child) { this.children.push(child); }, + removeChild: function(child) { + const index = this.children.indexOf(child); + if (index > -1) this.children.splice(index, 1); + }, + replaceChild: function(newChild, oldChild) { + const index = this.children.indexOf(oldChild); + if (index > -1) this.children[index] = newChild; + }, + querySelector: function(selector) { return null; }, + querySelectorAll: function(selector) { return []; }, + cloneNode: function(deep) { return mockDocument.createElement(tagName); }, + setAttribute: function(name, value) { this.attributes[name] = value; }, + getAttribute: function(name) { return this.attributes[name]; }, + removeAttribute: function(name) { delete this.attributes[name]; } + }), + createTextNode: (text) => ({ + nodeType: 3, // TEXT_NODE + textContent: text + }) +}; + +global.document = mockDocument; +global.Node = { + TEXT_NODE: 3, + ELEMENT_NODE: 1 +}; + +describe('HTMLPreservationEngine', () => { + let engine; + + beforeEach(() => { + engine = new HTMLPreservationEngine(); + }); + + describe('Content Extraction', () => { + test('should extract content with preservation metadata', () => { + const mockElement = createMockElement('p', { + innerHTML: 'Hello world!', + classes: ['insertr'], + attributes: { id: 'test-element' } + }); + + const extracted = engine.extractForEditing(mockElement); + + expect(extracted.html).toBe('Hello world!'); + expect(extracted.text).toBe('Hello world!'); + expect(extracted.containerAttributes.class).toBe('insertr'); + expect(extracted.containerAttributes.id).toBe('test-element'); + expect(extracted.elementTag).toBe('p'); + expect(extracted.hasNestedElements).toBe(true); + }); + + test('should handle simple text content', () => { + const mockElement = createMockElement('p', { + innerHTML: 'Simple text content', + textContent: 'Simple text content' + }); + + const extracted = engine.extractForEditing(mockElement); + + expect(extracted.html).toBe('Simple text content'); + expect(extracted.text).toBe('Simple text content'); + expect(extracted.hasNestedElements).toBe(false); + }); + }); + + describe('HTML Validation and Sanitization', () => { + test('should validate safe HTML', () => { + const safeHTML = 'Hello world!'; + expect(engine.isValidHTML(safeHTML)).toBe(true); + }); + + test('should reject dangerous HTML', () => { + const dangerousHTML = 'Hello world!'; + expect(engine.isValidHTML(dangerousHTML)).toBe(false); + }); + + test('should sanitize HTML by removing dangerous elements', () => { + const unsafeHTML = 'Hello safe'; + const sanitized = engine.validateAndSanitizeHTML(unsafeHTML); + + expect(sanitized).not.toContain(''; + + const success = engine.applyFromEditing(mockElement, invalidHTML); + + // Should handle gracefully (either sanitize or reject) + expect(typeof success).toBe('boolean'); + }); + }); + + describe('Plain Text Extraction', () => { + test('should extract plain text preserving structure', () => { + const mockElement = createMockElement('p', { + textContent: 'Hello world and welcome!' + }); + + const plainText = engine.extractPlainTextWithStructure(mockElement); + expect(plainText).toBe('Hello world and welcome!'); + }); + + test('should handle complex nested content', () => { + const mockElement = createMockElement('p', { + children: [ + { nodeType: 3, textContent: 'Hello ' }, + { nodeType: 1, textContent: 'world' }, + { nodeType: 3, textContent: ' and welcome!' } + ], + childNodes: [ + { nodeType: 3, textContent: 'Hello ' }, + { nodeType: 1, textContent: 'world' }, + { nodeType: 3, textContent: ' and welcome!' } + ] + }); + + const plainText = engine.extractPlainTextWithStructure(mockElement); + expect(plainText).toBe('Hello world and welcome!'); + }); + }); + + describe('Editable Content Preparation', () => { + test('should prepare content for safe editing', () => { + const mockElement = createMockElement('p', { + innerHTML: 'Hello world!', + children: [{ tagName: 'STRONG' }] + }); + + const prepared = engine.prepareForEditing(mockElement); + + expect(prepared.html).toBe('Hello world!'); + expect(prepared.editableHTML).toBeDefined(); + expect(prepared.isComplex).toBe(true); + expect(prepared.originalHTML).toBe('Hello world!'); + }); + }); + + describe('Content Finalization', () => { + test('should finalize string content', () => { + const mockElement = createMockElement('p'); + const editedContent = 'New content'; + + const success = engine.finalizeEditing(mockElement, editedContent); + expect(success).toBe(true); + }); + + test('should finalize object content', () => { + const mockElement = createMockElement('p'); + const editedContent = { + html: 'New content', + text: 'New content' + }; + + const success = engine.finalizeEditing(mockElement, editedContent); + expect(success).toBe(true); + }); + + test('should handle invalid content gracefully', () => { + const mockElement = createMockElement('p'); + const invalidContent = null; + + const success = engine.finalizeEditing(mockElement, invalidContent); + expect(success).toBe(false); + }); + }); + + describe('Security and Safety', () => { + test('should allow safe tags', () => { + const safeTags = ['strong', 'em', 'a', 'span', 'p', 'div', 'h1', 'h2', 'h3']; + safeTags.forEach(tag => { + expect(engine.allowedTags.has(tag)).toBe(true); + }); + }); + + test('should allow safe attributes', () => { + const safeAttrs = ['class', 'id', 'href', 'title', 'data-test', 'aria-label']; + safeAttrs.forEach(attr => { + expect( + engine.allowedAttributes.has(attr) || + attr.startsWith('data-') || + attr.startsWith('aria-') + ).toBe(true); + }); + }); + + test('should create safe editable copy', () => { + const unsafeHTML = '

Safe content

'; + const safeCopy = engine.createEditableCopy(unsafeHTML); + + expect(safeCopy).toContain('

Safe content

'); + expect(safeCopy).not.toContain('