feat: Implement complete style detection and preservation foundation
- Add StyleDetectionEngine with one-layer-deep nested element analysis - Add HTMLPreservationEngine for direct HTML manipulation without lossy conversion - Implement structure-preserving content parsing that maintains element positions - Add multi-property element support for links (href + content), images (src + alt), buttons - Create comprehensive test suite with real DOM element validation - Replace markdown-based system foundation with HTML-first architecture - Preserve all element attributes (classes, IDs, data-*, aria-*) during editing - Generate human-readable style names from detected nested elements - Support template extraction with multiple insertion points for complex elements Foundation complete for Phase 2 style-aware editor interface per CLASSES.md specification.
This commit is contained in:
372
lib/src/utils/html-preservation.js
Normal file
372
lib/src/utils/html-preservation.js
Normal file
@@ -0,0 +1,372 @@
|
||||
/**
|
||||
* HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure
|
||||
*
|
||||
* Handles the storage and application of HTML content while maintaining:
|
||||
* - All element attributes (classes, IDs, data-*, etc.)
|
||||
* - Nested styled element structure
|
||||
* - Developer-defined styling context
|
||||
*
|
||||
* This replaces the lossy markdown conversion system with perfect fidelity HTML operations.
|
||||
*/
|
||||
export class HTMLPreservationEngine {
|
||||
constructor() {
|
||||
this.allowedTags = new Set([
|
||||
// Text formatting
|
||||
'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var',
|
||||
// Links and interactive
|
||||
'a', 'button',
|
||||
// Structure
|
||||
'p', 'div', 'section', 'article', 'header', 'footer', 'nav',
|
||||
// Lists
|
||||
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
|
||||
// Headings
|
||||
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
||||
// Media
|
||||
'img', 'figure', 'figcaption',
|
||||
// Quotes and citations
|
||||
'blockquote', 'cite', 'q',
|
||||
// Tables
|
||||
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
|
||||
// Inline elements
|
||||
'small', 'sub', 'sup', 'mark', 'del', 'ins',
|
||||
// Icons and symbols
|
||||
'i' // Often used for icons
|
||||
]);
|
||||
|
||||
this.allowedAttributes = new Set([
|
||||
// Universal attributes
|
||||
'class', 'id', 'title', 'lang', 'dir',
|
||||
// Data attributes (all data-* allowed)
|
||||
// ARIA attributes (all aria-* allowed)
|
||||
// Link attributes
|
||||
'href', 'rel', 'target', 'download',
|
||||
// Media attributes
|
||||
'src', 'alt', 'width', 'height',
|
||||
// Form attributes
|
||||
'type', 'value', 'placeholder', 'disabled', 'readonly',
|
||||
// Table attributes
|
||||
'colspan', 'rowspan', 'scope',
|
||||
// Other semantic attributes
|
||||
'datetime', 'cite'
|
||||
]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract content while preserving structure for editing
|
||||
*
|
||||
* @param {HTMLElement} element - The .insertr element to extract content from
|
||||
* @returns {Object} - Extracted content with preservation metadata
|
||||
*/
|
||||
extractForEditing(element) {
|
||||
return {
|
||||
// Complete HTML content for rich editing
|
||||
html: element.innerHTML,
|
||||
// Plain text for simple editing fallback
|
||||
text: this.extractPlainTextWithStructure(element),
|
||||
// Element's own attributes (never modified by content editing)
|
||||
containerAttributes: this.extractElementAttributes(element),
|
||||
// Original state for restoration if needed
|
||||
originalHTML: element.innerHTML,
|
||||
// Metadata for validation
|
||||
elementTag: element.tagName.toLowerCase(),
|
||||
hasNestedElements: element.children.length > 0
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply edited content while preserving structure and validating safety
|
||||
*
|
||||
* @param {HTMLElement} element - Target element to update
|
||||
* @param {string} newHTML - New HTML content from editor
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
applyFromEditing(element, newHTML) {
|
||||
try {
|
||||
// Validate HTML structure and safety
|
||||
const validatedHTML = this.validateAndSanitizeHTML(newHTML);
|
||||
|
||||
// Apply validated content
|
||||
element.innerHTML = validatedHTML;
|
||||
|
||||
// Element's own attributes are never modified
|
||||
// (classes, IDs on the .insertr element itself are preserved)
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
console.error('Failed to apply HTML content:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate and sanitize HTML to ensure safety and structure preservation
|
||||
*
|
||||
* @param {string} html - HTML to validate
|
||||
* @returns {string} - Sanitized HTML
|
||||
*/
|
||||
validateAndSanitizeHTML(html) {
|
||||
// Create temporary container for parsing
|
||||
const tempDiv = document.createElement('div');
|
||||
tempDiv.innerHTML = html;
|
||||
|
||||
// Recursively validate and clean
|
||||
this.sanitizeElement(tempDiv);
|
||||
|
||||
return tempDiv.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively sanitize element and its children
|
||||
*
|
||||
* @param {HTMLElement} element - Element to sanitize
|
||||
*/
|
||||
sanitizeElement(element) {
|
||||
// Check all child elements
|
||||
const children = Array.from(element.children);
|
||||
|
||||
for (const child of children) {
|
||||
// Check if tag is allowed
|
||||
if (!this.allowedTags.has(child.tagName.toLowerCase())) {
|
||||
// Remove disallowed tags but preserve content
|
||||
const textContent = child.textContent;
|
||||
const textNode = document.createTextNode(textContent);
|
||||
child.parentNode.replaceChild(textNode, child);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sanitize attributes
|
||||
this.sanitizeAttributes(child);
|
||||
|
||||
// Recursively sanitize children
|
||||
this.sanitizeElement(child);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sanitize element attributes, removing dangerous ones
|
||||
*
|
||||
* @param {HTMLElement} element - Element to sanitize attributes for
|
||||
*/
|
||||
sanitizeAttributes(element) {
|
||||
const attributesToRemove = [];
|
||||
|
||||
for (const attr of element.attributes) {
|
||||
const attrName = attr.name.toLowerCase();
|
||||
|
||||
// Always allow data-* and aria-* attributes
|
||||
if (attrName.startsWith('data-') || attrName.startsWith('aria-')) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if attribute is in allowed list
|
||||
if (!this.allowedAttributes.has(attrName)) {
|
||||
attributesToRemove.push(attrName);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sanitize attribute values for security
|
||||
if (attrName === 'href') {
|
||||
const href = attr.value.toLowerCase().trim();
|
||||
// Allow relative URLs, http/https, mailto, tel
|
||||
if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) {
|
||||
attributesToRemove.push(attrName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove invalid attributes
|
||||
attributesToRemove.forEach(attrName => {
|
||||
element.removeAttribute(attrName);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract plain text while preserving some structural information
|
||||
* Used for simple editing interfaces
|
||||
*
|
||||
* @param {HTMLElement} element - Element to extract text from
|
||||
* @returns {string} - Plain text with preserved structure
|
||||
*/
|
||||
extractPlainTextWithStructure(element) {
|
||||
// For simple elements, just return textContent
|
||||
if (element.children.length === 0) {
|
||||
return element.textContent;
|
||||
}
|
||||
|
||||
// For complex elements, preserve some structure
|
||||
let text = '';
|
||||
for (const node of element.childNodes) {
|
||||
if (node.nodeType === Node.TEXT_NODE) {
|
||||
text += node.textContent;
|
||||
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
||||
// Add the text content of nested elements
|
||||
text += node.textContent;
|
||||
}
|
||||
}
|
||||
|
||||
return text.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract all attributes from element for preservation
|
||||
*
|
||||
* @param {HTMLElement} element - Element to extract attributes from
|
||||
* @returns {Object} - Attributes object
|
||||
*/
|
||||
extractElementAttributes(element) {
|
||||
const attributes = {};
|
||||
|
||||
for (const attr of element.attributes) {
|
||||
attributes[attr.name] = attr.value;
|
||||
}
|
||||
|
||||
return attributes;
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore element attributes (used for element-level preservation)
|
||||
*
|
||||
* @param {HTMLElement} element - Element to restore attributes to
|
||||
* @param {Object} attributes - Attributes to restore
|
||||
*/
|
||||
restoreElementAttributes(element, attributes) {
|
||||
// Clear existing attributes (except core ones)
|
||||
const existingAttrs = Array.from(element.attributes);
|
||||
existingAttrs.forEach(attr => {
|
||||
if (attr.name !== 'contenteditable') { // Preserve editing state
|
||||
element.removeAttribute(attr.name);
|
||||
}
|
||||
});
|
||||
|
||||
// Restore saved attributes
|
||||
Object.entries(attributes).forEach(([name, value]) => {
|
||||
element.setAttribute(name, value);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if HTML content is safe and maintains expected structure
|
||||
*
|
||||
* @param {string} html - HTML to validate
|
||||
* @returns {boolean} - True if HTML is valid and safe
|
||||
*/
|
||||
isValidHTML(html) {
|
||||
try {
|
||||
const tempDiv = document.createElement('div');
|
||||
tempDiv.innerHTML = html;
|
||||
|
||||
// Check for script tags or other dangerous elements
|
||||
if (tempDiv.querySelector('script, object, embed, iframe')) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a safe copy of HTML content for editing
|
||||
*
|
||||
* @param {string} html - Original HTML
|
||||
* @returns {string} - Safe copy for editing
|
||||
*/
|
||||
createEditableCopy(html) {
|
||||
const tempDiv = document.createElement('div');
|
||||
tempDiv.innerHTML = html;
|
||||
|
||||
// Remove any potentially dangerous attributes
|
||||
const allElements = tempDiv.querySelectorAll('*');
|
||||
allElements.forEach(element => {
|
||||
this.sanitizeAttributes(element);
|
||||
});
|
||||
|
||||
return tempDiv.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge edited content back while preserving specific styled elements
|
||||
* Used for complex editing scenarios where certain elements must be preserved
|
||||
*
|
||||
* @param {string} originalHTML - Original HTML content
|
||||
* @param {string} editedHTML - Edited HTML content
|
||||
* @param {Array} preserveSelectors - CSS selectors for elements to preserve
|
||||
* @returns {string} - Merged HTML with preserved elements
|
||||
*/
|
||||
mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) {
|
||||
if (preserveSelectors.length === 0) {
|
||||
return editedHTML;
|
||||
}
|
||||
|
||||
const originalDiv = document.createElement('div');
|
||||
originalDiv.innerHTML = originalHTML;
|
||||
|
||||
const editedDiv = document.createElement('div');
|
||||
editedDiv.innerHTML = editedHTML;
|
||||
|
||||
// Preserve specific elements from original
|
||||
preserveSelectors.forEach(selector => {
|
||||
const originalElements = originalDiv.querySelectorAll(selector);
|
||||
const editedElements = editedDiv.querySelectorAll(selector);
|
||||
|
||||
// Replace edited elements with original preserved ones
|
||||
originalElements.forEach((originalEl, index) => {
|
||||
if (editedElements[index]) {
|
||||
editedElements[index].replaceWith(originalEl.cloneNode(true));
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return editedDiv.innerHTML;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML content to safe editing format
|
||||
* Ensures content can be safely edited without losing essential structure
|
||||
*
|
||||
* @param {HTMLElement} element - Element containing content to prepare
|
||||
* @returns {Object} - Prepared content for editing
|
||||
*/
|
||||
prepareForEditing(element) {
|
||||
const extracted = this.extractForEditing(element);
|
||||
|
||||
// Create safe editable copy
|
||||
const editableHTML = this.createEditableCopy(extracted.html);
|
||||
|
||||
return {
|
||||
...extracted,
|
||||
editableHTML: editableHTML,
|
||||
isComplex: extracted.hasNestedElements
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Finalize edited content and apply to element
|
||||
* Handles validation, sanitization, and safe application
|
||||
*
|
||||
* @param {HTMLElement} element - Target element
|
||||
* @param {Object} editedContent - Content from editor
|
||||
* @returns {boolean} - Success status
|
||||
*/
|
||||
finalizeEditing(element, editedContent) {
|
||||
try {
|
||||
// Determine content type and apply appropriately
|
||||
if (typeof editedContent === 'string') {
|
||||
// Simple text or HTML string
|
||||
return this.applyFromEditing(element, editedContent);
|
||||
} else if (editedContent.html) {
|
||||
// Rich content object
|
||||
return this.applyFromEditing(element, editedContent.html);
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
console.error('Failed to finalize editing:', error);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton instance
|
||||
export const htmlPreservationEngine = new HTMLPreservationEngine();
|
||||
Reference in New Issue
Block a user