- Problem: Element ID collisions between similar elements (logo h1 vs hero h1) causing content to be injected into wrong elements - Root cause: Enhancer used naive tag+class matching instead of parser's sophisticated semantic analysis for element identification Systematic solution: - Enhanced parser architecture with exported utilities (GetClasses, ContainsClass) - Added FindElementInDocument() with content-based semantic matching - Replaced naive findAndInjectNodes() with parser-based element matching - Removed code duplication between parser and enhancer packages Backend improvements: - Moved ID generation to backend for single source of truth - Added ElementContext struct for frontend-backend communication - Updated API handlers to support context-based content ID generation Frontend improvements: - Enhanced getElementMetadata() to extract semantic context - Updated save flow to handle both enhanced and non-enhanced elements - Improved API client to use backend-generated content IDs Result: - Unique content IDs: navbar-logo-200530 vs hero-title-a1de7b - Precise element matching using content validation - Single source of truth for DOM utilities in parser package - Eliminated 40+ lines of duplicate code while fixing core bug
196 lines
4.6 KiB
Go
196 lines
4.6 KiB
Go
package parser
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// GetClasses extracts CSS classes from an HTML node
|
|
func GetClasses(node *html.Node) []string {
|
|
classAttr := getAttribute(node, "class")
|
|
if classAttr == "" {
|
|
return []string{}
|
|
}
|
|
|
|
classes := strings.Fields(classAttr)
|
|
return classes
|
|
}
|
|
|
|
// ContainsClass checks if a class list contains a specific class
|
|
func ContainsClass(classes []string, target string) bool {
|
|
for _, class := range classes {
|
|
if class == target {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// getAttribute gets an attribute value from an HTML node
|
|
func getAttribute(node *html.Node, key string) string {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
return attr.Val
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// extractTextContent gets the text content from an HTML node
|
|
func extractTextContent(node *html.Node) string {
|
|
var text strings.Builder
|
|
extractTextRecursive(node, &text)
|
|
return strings.TrimSpace(text.String())
|
|
}
|
|
|
|
// extractTextRecursive recursively extracts text from node and children
|
|
func extractTextRecursive(node *html.Node, text *strings.Builder) {
|
|
if node.Type == html.TextNode {
|
|
text.WriteString(node.Data)
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
// Skip script and style elements
|
|
if child.Type == html.ElementNode &&
|
|
(child.Data == "script" || child.Data == "style") {
|
|
continue
|
|
}
|
|
extractTextRecursive(child, text)
|
|
}
|
|
}
|
|
|
|
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
|
|
func hasOnlyTextContent(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
switch child.Type {
|
|
case html.ElementNode:
|
|
// Found a nested HTML element - not text-only
|
|
return false
|
|
case html.TextNode:
|
|
// Text nodes are fine, continue checking
|
|
continue
|
|
default:
|
|
// Comments, etc. - continue checking
|
|
continue
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// isContainer checks if a tag is typically used as a container element
|
|
func isContainer(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
containerTags := map[string]bool{
|
|
"div": true,
|
|
"section": true,
|
|
"article": true,
|
|
"header": true,
|
|
"footer": true,
|
|
"main": true,
|
|
"aside": true,
|
|
"nav": true,
|
|
}
|
|
|
|
return containerTags[node.Data]
|
|
}
|
|
|
|
// findViableChildren finds all child elements that are viable for editing
|
|
func findViableChildren(node *html.Node) []*html.Node {
|
|
var viable []*html.Node
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
// Skip whitespace-only text nodes
|
|
if child.Type == html.TextNode {
|
|
if strings.TrimSpace(child.Data) == "" {
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Only consider element nodes
|
|
if child.Type != html.ElementNode {
|
|
continue
|
|
}
|
|
|
|
// Skip self-closing elements for now
|
|
if isSelfClosing(child) {
|
|
continue
|
|
}
|
|
|
|
// Check if element has only text content
|
|
if hasOnlyTextContent(child) {
|
|
viable = append(viable, child)
|
|
}
|
|
}
|
|
|
|
return viable
|
|
}
|
|
|
|
// isSelfClosing checks if an element is typically self-closing
|
|
func isSelfClosing(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
selfClosingTags := map[string]bool{
|
|
"img": true,
|
|
"input": true,
|
|
"br": true,
|
|
"hr": true,
|
|
"meta": true,
|
|
"link": true,
|
|
"area": true,
|
|
"base": true,
|
|
"col": true,
|
|
"embed": true,
|
|
"source": true,
|
|
"track": true,
|
|
"wbr": true,
|
|
}
|
|
|
|
return selfClosingTags[node.Data]
|
|
}
|
|
|
|
// FindElementInDocument finds a parser element in HTML document tree using semantic matching
|
|
func FindElementInDocument(doc *html.Node, element Element) *html.Node {
|
|
return findElementWithContext(doc, element)
|
|
}
|
|
|
|
// findElementWithContext uses the parser's semantic understanding to find the correct element
|
|
func findElementWithContext(node *html.Node, target Element) *html.Node {
|
|
if node.Type == html.ElementNode && node.Data == target.Tag {
|
|
classes := GetClasses(node)
|
|
if ContainsClass(classes, "insertr") {
|
|
// Content-based validation for precise matching
|
|
textContent := extractTextContent(node)
|
|
nodeContent := strings.TrimSpace(textContent)
|
|
targetContent := strings.TrimSpace(target.Content)
|
|
|
|
if nodeContent == targetContent {
|
|
return node
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively search children
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
if result := findElementWithContext(child, target); result != nil {
|
|
return result
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetAttribute gets an attribute value from an HTML node (exported version)
|
|
func GetAttribute(node *html.Node, key string) string {
|
|
return getAttribute(node, key)
|
|
}
|