- Add StyleDetectionEngine with one-layer-deep nested element analysis - Add HTMLPreservationEngine for direct HTML manipulation without lossy conversion - Implement structure-preserving content parsing that maintains element positions - Add multi-property element support for links (href + content), images (src + alt), buttons - Create comprehensive test suite with real DOM element validation - Replace markdown-based system foundation with HTML-first architecture - Preserve all element attributes (classes, IDs, data-*, aria-*) during editing - Generate human-readable style names from detected nested elements - Support template extraction with multiple insertion points for complex elements Foundation complete for Phase 2 style-aware editor interface per CLASSES.md specification.
372 lines
13 KiB
JavaScript
372 lines
13 KiB
JavaScript
/**
|
|
* HTMLPreservationEngine - Direct HTML manipulation preserving all attributes and structure
|
|
*
|
|
* Handles the storage and application of HTML content while maintaining:
|
|
* - All element attributes (classes, IDs, data-*, etc.)
|
|
* - Nested styled element structure
|
|
* - Developer-defined styling context
|
|
*
|
|
* This replaces the lossy markdown conversion system with perfect fidelity HTML operations.
|
|
*/
|
|
export class HTMLPreservationEngine {
|
|
constructor() {
|
|
this.allowedTags = new Set([
|
|
// Text formatting
|
|
'strong', 'b', 'em', 'i', 'span', 'code', 'kbd', 'samp', 'var',
|
|
// Links and interactive
|
|
'a', 'button',
|
|
// Structure
|
|
'p', 'div', 'section', 'article', 'header', 'footer', 'nav',
|
|
// Lists
|
|
'ul', 'ol', 'li', 'dl', 'dt', 'dd',
|
|
// Headings
|
|
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
|
|
// Media
|
|
'img', 'figure', 'figcaption',
|
|
// Quotes and citations
|
|
'blockquote', 'cite', 'q',
|
|
// Tables
|
|
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td',
|
|
// Inline elements
|
|
'small', 'sub', 'sup', 'mark', 'del', 'ins',
|
|
// Icons and symbols
|
|
'i' // Often used for icons
|
|
]);
|
|
|
|
this.allowedAttributes = new Set([
|
|
// Universal attributes
|
|
'class', 'id', 'title', 'lang', 'dir',
|
|
// Data attributes (all data-* allowed)
|
|
// ARIA attributes (all aria-* allowed)
|
|
// Link attributes
|
|
'href', 'rel', 'target', 'download',
|
|
// Media attributes
|
|
'src', 'alt', 'width', 'height',
|
|
// Form attributes
|
|
'type', 'value', 'placeholder', 'disabled', 'readonly',
|
|
// Table attributes
|
|
'colspan', 'rowspan', 'scope',
|
|
// Other semantic attributes
|
|
'datetime', 'cite'
|
|
]);
|
|
}
|
|
|
|
/**
|
|
* Extract content while preserving structure for editing
|
|
*
|
|
* @param {HTMLElement} element - The .insertr element to extract content from
|
|
* @returns {Object} - Extracted content with preservation metadata
|
|
*/
|
|
extractForEditing(element) {
|
|
return {
|
|
// Complete HTML content for rich editing
|
|
html: element.innerHTML,
|
|
// Plain text for simple editing fallback
|
|
text: this.extractPlainTextWithStructure(element),
|
|
// Element's own attributes (never modified by content editing)
|
|
containerAttributes: this.extractElementAttributes(element),
|
|
// Original state for restoration if needed
|
|
originalHTML: element.innerHTML,
|
|
// Metadata for validation
|
|
elementTag: element.tagName.toLowerCase(),
|
|
hasNestedElements: element.children.length > 0
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Apply edited content while preserving structure and validating safety
|
|
*
|
|
* @param {HTMLElement} element - Target element to update
|
|
* @param {string} newHTML - New HTML content from editor
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
applyFromEditing(element, newHTML) {
|
|
try {
|
|
// Validate HTML structure and safety
|
|
const validatedHTML = this.validateAndSanitizeHTML(newHTML);
|
|
|
|
// Apply validated content
|
|
element.innerHTML = validatedHTML;
|
|
|
|
// Element's own attributes are never modified
|
|
// (classes, IDs on the .insertr element itself are preserved)
|
|
|
|
return true;
|
|
} catch (error) {
|
|
console.error('Failed to apply HTML content:', error);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Validate and sanitize HTML to ensure safety and structure preservation
|
|
*
|
|
* @param {string} html - HTML to validate
|
|
* @returns {string} - Sanitized HTML
|
|
*/
|
|
validateAndSanitizeHTML(html) {
|
|
// Create temporary container for parsing
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = html;
|
|
|
|
// Recursively validate and clean
|
|
this.sanitizeElement(tempDiv);
|
|
|
|
return tempDiv.innerHTML;
|
|
}
|
|
|
|
/**
|
|
* Recursively sanitize element and its children
|
|
*
|
|
* @param {HTMLElement} element - Element to sanitize
|
|
*/
|
|
sanitizeElement(element) {
|
|
// Check all child elements
|
|
const children = Array.from(element.children);
|
|
|
|
for (const child of children) {
|
|
// Check if tag is allowed
|
|
if (!this.allowedTags.has(child.tagName.toLowerCase())) {
|
|
// Remove disallowed tags but preserve content
|
|
const textContent = child.textContent;
|
|
const textNode = document.createTextNode(textContent);
|
|
child.parentNode.replaceChild(textNode, child);
|
|
continue;
|
|
}
|
|
|
|
// Sanitize attributes
|
|
this.sanitizeAttributes(child);
|
|
|
|
// Recursively sanitize children
|
|
this.sanitizeElement(child);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sanitize element attributes, removing dangerous ones
|
|
*
|
|
* @param {HTMLElement} element - Element to sanitize attributes for
|
|
*/
|
|
sanitizeAttributes(element) {
|
|
const attributesToRemove = [];
|
|
|
|
for (const attr of element.attributes) {
|
|
const attrName = attr.name.toLowerCase();
|
|
|
|
// Always allow data-* and aria-* attributes
|
|
if (attrName.startsWith('data-') || attrName.startsWith('aria-')) {
|
|
continue;
|
|
}
|
|
|
|
// Check if attribute is in allowed list
|
|
if (!this.allowedAttributes.has(attrName)) {
|
|
attributesToRemove.push(attrName);
|
|
continue;
|
|
}
|
|
|
|
// Sanitize attribute values for security
|
|
if (attrName === 'href') {
|
|
const href = attr.value.toLowerCase().trim();
|
|
// Allow relative URLs, http/https, mailto, tel
|
|
if (!href.match(/^(https?:\/\/|mailto:|tel:|#|\/)/)) {
|
|
attributesToRemove.push(attrName);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove invalid attributes
|
|
attributesToRemove.forEach(attrName => {
|
|
element.removeAttribute(attrName);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Extract plain text while preserving some structural information
|
|
* Used for simple editing interfaces
|
|
*
|
|
* @param {HTMLElement} element - Element to extract text from
|
|
* @returns {string} - Plain text with preserved structure
|
|
*/
|
|
extractPlainTextWithStructure(element) {
|
|
// For simple elements, just return textContent
|
|
if (element.children.length === 0) {
|
|
return element.textContent;
|
|
}
|
|
|
|
// For complex elements, preserve some structure
|
|
let text = '';
|
|
for (const node of element.childNodes) {
|
|
if (node.nodeType === Node.TEXT_NODE) {
|
|
text += node.textContent;
|
|
} else if (node.nodeType === Node.ELEMENT_NODE) {
|
|
// Add the text content of nested elements
|
|
text += node.textContent;
|
|
}
|
|
}
|
|
|
|
return text.trim();
|
|
}
|
|
|
|
/**
|
|
* Extract all attributes from element for preservation
|
|
*
|
|
* @param {HTMLElement} element - Element to extract attributes from
|
|
* @returns {Object} - Attributes object
|
|
*/
|
|
extractElementAttributes(element) {
|
|
const attributes = {};
|
|
|
|
for (const attr of element.attributes) {
|
|
attributes[attr.name] = attr.value;
|
|
}
|
|
|
|
return attributes;
|
|
}
|
|
|
|
/**
|
|
* Restore element attributes (used for element-level preservation)
|
|
*
|
|
* @param {HTMLElement} element - Element to restore attributes to
|
|
* @param {Object} attributes - Attributes to restore
|
|
*/
|
|
restoreElementAttributes(element, attributes) {
|
|
// Clear existing attributes (except core ones)
|
|
const existingAttrs = Array.from(element.attributes);
|
|
existingAttrs.forEach(attr => {
|
|
if (attr.name !== 'contenteditable') { // Preserve editing state
|
|
element.removeAttribute(attr.name);
|
|
}
|
|
});
|
|
|
|
// Restore saved attributes
|
|
Object.entries(attributes).forEach(([name, value]) => {
|
|
element.setAttribute(name, value);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Check if HTML content is safe and maintains expected structure
|
|
*
|
|
* @param {string} html - HTML to validate
|
|
* @returns {boolean} - True if HTML is valid and safe
|
|
*/
|
|
isValidHTML(html) {
|
|
try {
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = html;
|
|
|
|
// Check for script tags or other dangerous elements
|
|
if (tempDiv.querySelector('script, object, embed, iframe')) {
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
} catch (error) {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Create a safe copy of HTML content for editing
|
|
*
|
|
* @param {string} html - Original HTML
|
|
* @returns {string} - Safe copy for editing
|
|
*/
|
|
createEditableCopy(html) {
|
|
const tempDiv = document.createElement('div');
|
|
tempDiv.innerHTML = html;
|
|
|
|
// Remove any potentially dangerous attributes
|
|
const allElements = tempDiv.querySelectorAll('*');
|
|
allElements.forEach(element => {
|
|
this.sanitizeAttributes(element);
|
|
});
|
|
|
|
return tempDiv.innerHTML;
|
|
}
|
|
|
|
/**
|
|
* Merge edited content back while preserving specific styled elements
|
|
* Used for complex editing scenarios where certain elements must be preserved
|
|
*
|
|
* @param {string} originalHTML - Original HTML content
|
|
* @param {string} editedHTML - Edited HTML content
|
|
* @param {Array} preserveSelectors - CSS selectors for elements to preserve
|
|
* @returns {string} - Merged HTML with preserved elements
|
|
*/
|
|
mergeWithPreservation(originalHTML, editedHTML, preserveSelectors = []) {
|
|
if (preserveSelectors.length === 0) {
|
|
return editedHTML;
|
|
}
|
|
|
|
const originalDiv = document.createElement('div');
|
|
originalDiv.innerHTML = originalHTML;
|
|
|
|
const editedDiv = document.createElement('div');
|
|
editedDiv.innerHTML = editedHTML;
|
|
|
|
// Preserve specific elements from original
|
|
preserveSelectors.forEach(selector => {
|
|
const originalElements = originalDiv.querySelectorAll(selector);
|
|
const editedElements = editedDiv.querySelectorAll(selector);
|
|
|
|
// Replace edited elements with original preserved ones
|
|
originalElements.forEach((originalEl, index) => {
|
|
if (editedElements[index]) {
|
|
editedElements[index].replaceWith(originalEl.cloneNode(true));
|
|
}
|
|
});
|
|
});
|
|
|
|
return editedDiv.innerHTML;
|
|
}
|
|
|
|
/**
|
|
* Convert HTML content to safe editing format
|
|
* Ensures content can be safely edited without losing essential structure
|
|
*
|
|
* @param {HTMLElement} element - Element containing content to prepare
|
|
* @returns {Object} - Prepared content for editing
|
|
*/
|
|
prepareForEditing(element) {
|
|
const extracted = this.extractForEditing(element);
|
|
|
|
// Create safe editable copy
|
|
const editableHTML = this.createEditableCopy(extracted.html);
|
|
|
|
return {
|
|
...extracted,
|
|
editableHTML: editableHTML,
|
|
isComplex: extracted.hasNestedElements
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Finalize edited content and apply to element
|
|
* Handles validation, sanitization, and safe application
|
|
*
|
|
* @param {HTMLElement} element - Target element
|
|
* @param {Object} editedContent - Content from editor
|
|
* @returns {boolean} - Success status
|
|
*/
|
|
finalizeEditing(element, editedContent) {
|
|
try {
|
|
// Determine content type and apply appropriately
|
|
if (typeof editedContent === 'string') {
|
|
// Simple text or HTML string
|
|
return this.applyFromEditing(element, editedContent);
|
|
} else if (editedContent.html) {
|
|
// Rich content object
|
|
return this.applyFromEditing(element, editedContent.html);
|
|
}
|
|
|
|
return false;
|
|
} catch (error) {
|
|
console.error('Failed to finalize editing:', error);
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Export singleton instance
|
|
export const htmlPreservationEngine = new HTMLPreservationEngine(); |