fix: systematic element matching bug in enhancement pipeline

- Problem: Element ID collisions between similar elements (logo h1 vs hero h1) causing content to be injected into wrong elements - Root cause: Enhancer used naive tag+class matching instead of parser's sophisticated semantic analysis for element identification Systematic solution: - Enhanced parser architecture with exported utilities (GetClasses, ContainsClass) - Added FindElementInDocument() with content-based semantic matching - Replaced naive findAndInjectNodes() with parser-based element matching - Removed code duplication between parser and enhancer packages Backend improvements: - Moved ID generation to backend for single source of truth - Added ElementContext struct for frontend-backend communication - Updated API handlers to support context-based content ID generation Frontend improvements: - Enhanced getElementMetadata() to extract semantic context - Updated save flow to handle both enhanced and non-enhanced elements - Improved API client to use backend-generated content IDs Result: - Unique content IDs: navbar-logo-200530 vs hero-title-a1de7b - Precise element matching using content validation - Single source of truth for DOM utilities in parser package - Eliminated 40+ lines of duplicate code while fixing core bug
2025-09-11 14:14:57 +02:00
parent f73e21ce6e
commit ef1d1083ce
12 changed files with 575 additions and 314 deletions
@@ -17,6 +17,8 @@ import (
 	"github.com/insertr/insertr/internal/db"
 	"github.com/insertr/insertr/internal/db/postgresql"
 	"github.com/insertr/insertr/internal/db/sqlite"
+	"github.com/insertr/insertr/internal/parser"
+	"golang.org/x/net/html"
 )

 // ContentHandler handles all content-related HTTP requests
@@ -232,6 +234,16 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
 		siteID = "default" // final fallback
 	}

+	// Determine content ID - use provided ID or generate from element context
+	contentID := req.ID
+	if contentID == "" {
+		if req.ElementContext == nil {
+			http.Error(w, "Either ID or element_context required", http.StatusBadRequest)
+			return
+		}
+		contentID = h.generateContentID(req.ElementContext)
+	}
+
 	// Extract user from request using authentication service
 	userInfo, authErr := h.authService.ExtractUserFromRequest(r)
 	if authErr != nil {
@@ -246,7 +258,7 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
 	switch h.database.GetDBType() {
 	case "sqlite3":
 		content, err = h.database.GetSQLiteQueries().CreateContent(context.Background(), sqlite.CreateContentParams{
-			ID:           req.ID,
+			ID:           contentID,
 			SiteID:       siteID,
 			Value:        req.Value,
 			Type:         req.Type,
@@ -254,7 +266,7 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
 		})
 	case "postgresql":
 		content, err = h.database.GetPostgreSQLQueries().CreateContent(context.Background(), postgresql.CreateContentParams{
-			ID:           req.ID,
+			ID:           contentID,
 			SiteID:       siteID,
 			Value:        req.Value,
 			Type:         req.Type,
@@ -728,3 +740,41 @@ func (h *ContentHandler) versionMatches(version interface{}, contentID, siteID s
 	}
 	return false
 }
+
+// generateContentID creates a content ID from element context using the parser
+func (h *ContentHandler) generateContentID(ctx *ElementContext) string {
+	// Create virtual node for existing parser ID generation
+	virtualNode := &html.Node{
+		Type: html.ElementNode,
+		Data: ctx.Tag,
+		Attr: []html.Attribute{
+			{Key: "class", Val: strings.Join(ctx.Classes, " ")},
+		},
+	}
+
+	// Add parent context as a virtual parent node if provided
+	if ctx.ParentContext != "" && ctx.ParentContext != "content" {
+		parentNode := &html.Node{
+			Type: html.ElementNode,
+			Data: "section",
+			Attr: []html.Attribute{
+				{Key: "class", Val: ctx.ParentContext},
+			},
+		}
+		parentNode.AppendChild(virtualNode)
+		virtualNode.Parent = parentNode
+	}
+
+	// Add text content for hash generation
+	if ctx.OriginalContent != "" {
+		textNode := &html.Node{
+			Type: html.TextNode,
+			Data: ctx.OriginalContent,
+		}
+		virtualNode.AppendChild(textNode)
+	}
+
+	// Use existing parser ID generator
+	idGenerator := parser.NewIDGenerator()
+	return idGenerator.Generate(virtualNode)
+}
@@ -31,13 +31,23 @@ type ContentVersionsResponse struct {
 	Versions []ContentVersion `json:"versions"`
 }

+// Element context for backend ID generation
+type ElementContext struct {
+	Tag             string   `json:"tag"`
+	Classes         []string `json:"classes"`
+	OriginalContent string   `json:"original_content"`
+	ParentContext   string   `json:"parent_context"`
+	Purpose         string   `json:"purpose"`
+}
+
 // Request models
 type CreateContentRequest struct {
-	ID        string `json:"id"`
-	SiteID    string `json:"site_id,omitempty"`
-	Value     string `json:"value"`
-	Type      string `json:"type"`
-	CreatedBy string `json:"created_by,omitempty"`
+	ID             string          `json:"id,omitempty"`              // For enhanced sites
+	ElementContext *ElementContext `json:"element_context,omitempty"` // For non-enhanced sites
+	SiteID         string          `json:"site_id,omitempty"`
+	Value          string          `json:"value"`
+	Type           string          `json:"type"`
+	CreatedBy      string          `json:"created_by,omitempty"`
 }

 type UpdateContentRequest struct {
@@ -98,52 +98,32 @@ func (e *Enhancer) injectElementContent(doc *html.Node, elem parser.Element) err
 	return nil
 }

-// findAndInjectNodes recursively finds nodes and injects content
-func (e *Enhancer) findAndInjectNodes(node *html.Node, elem parser.Element, contentItem *ContentItem) {
-	if node.Type == html.ElementNode {
-		// Check if this node matches our element criteria
-		classes := getClasses(node)
-		if containsClass(classes, "insertr") && node.Data == elem.Tag {
-			// This might be our target node - inject content
-			e.injector.addContentAttributes(node, elem.ContentID, string(elem.Type))
-
-			if contentItem != nil {
-				switch elem.Type {
-				case parser.ContentText:
-					e.injector.injectTextContent(node, contentItem.Value)
-				case parser.ContentMarkdown:
-					e.injector.injectMarkdownContent(node, contentItem.Value)
-				case parser.ContentLink:
-					e.injector.injectLinkContent(node, contentItem.Value)
-				}
-			}
-		}
+// findAndInjectNodes finds the specific node for this element and injects content
+func (e *Enhancer) findAndInjectNodes(rootNode *html.Node, elem parser.Element, contentItem *ContentItem) {
+	// Use parser-based element matching to find the correct specific node
+	targetNode := e.findNodeInDocument(rootNode, elem)
+	if targetNode == nil {
+		// Element not found - this is normal for elements without content in database
+		return
 	}

-	// Recursively process children
-	for child := node.FirstChild; child != nil; child = child.NextSibling {
-		e.findAndInjectNodes(child, elem, contentItem)
+	// Inject content attributes for the correctly matched node
+	e.injector.addContentAttributes(targetNode, elem.ContentID, string(elem.Type))
+
+	// Inject content if available
+	if contentItem != nil {
+		switch elem.Type {
+		case parser.ContentText:
+			e.injector.injectTextContent(targetNode, contentItem.Value)
+		case parser.ContentMarkdown:
+			e.injector.injectMarkdownContent(targetNode, contentItem.Value)
+		case parser.ContentLink:
+			e.injector.injectLinkContent(targetNode, contentItem.Value)
+		}
 	}
 }

-// Helper functions from parser package
-func getClasses(node *html.Node) []string {
-	for _, attr := range node.Attr {
-		if attr.Key == "class" {
-			return strings.Fields(attr.Val)
-		}
-	}
-	return []string{}
-}
-
-func containsClass(classes []string, target string) bool {
-	for _, class := range classes {
-		if class == target {
-			return true
-		}
-	}
-	return false
-}
+// Helper functions are now provided by the parser package

 // EnhanceDirectory processes all HTML files in a directory
 func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
@@ -295,28 +275,10 @@ func (e *Enhancer) enhanceFileInPlace(filePath string, elements []parser.Element
 	return e.writeHTML(doc, filePath)
 }

-// findNodeInDocument finds a specific node in the HTML document tree
+// findNodeInDocument finds a specific node in the HTML document tree using parser utilities
 func (e *Enhancer) findNodeInDocument(doc *html.Node, elem parser.Element) *html.Node {
-	// This is a simplified approach - in a production system we might need
-	// more sophisticated node matching based on attributes, position, etc.
-	return e.findNodeByTagAndClass(doc, elem.Tag, "insertr")
+	// Use parser's sophisticated matching logic
+	return parser.FindElementInDocument(doc, elem)
 }

-// findNodeByTagAndClass recursively searches for a node with specific tag and class
-func (e *Enhancer) findNodeByTagAndClass(node *html.Node, targetTag, targetClass string) *html.Node {
-	if node.Type == html.ElementNode && node.Data == targetTag {
-		classes := getClasses(node)
-		if containsClass(classes, targetClass) {
-			return node
-		}
-	}
-
-	// Search children
-	for child := node.FirstChild; child != nil; child = child.NextSibling {
-		if result := e.findNodeByTagAndClass(child, targetTag, targetClass); result != nil {
-			return result
-		}
-	}
-
-	return nil
-}
+// All element matching functions are now provided by the parser package
@@ -28,7 +28,9 @@ func (g *IDGenerator) Generate(node *html.Node) string {
 	contentHash := g.getContentHash(node)

 	baseID := g.createBaseID(context, purpose, contentHash)
-	return g.ensureUnique(baseID)
+	finalID := g.ensureUnique(baseID)
+
+	return finalID
 }

 // getSemanticContext determines the semantic context from parent elements
@@ -36,11 +38,11 @@ func (g *IDGenerator) getSemanticContext(node *html.Node) string {
 	// Walk up the tree to find semantic containers
 	parent := node.Parent
 	for parent != nil && parent.Type == html.ElementNode {
-		classes := getClasses(parent)
+		classes := GetClasses(parent)

 		// Check for common semantic section classes
 		for _, class := range []string{"hero", "services", "nav", "navbar", "footer", "about", "contact", "testimonial"} {
-			if containsClass(classes, class) {
+			if ContainsClass(classes, class) {
 				return class
 			}
 		}
@@ -68,7 +70,7 @@ func (g *IDGenerator) getSemanticContext(node *html.Node) string {
 // getPurpose determines the purpose/role of the element
 func (g *IDGenerator) getPurpose(node *html.Node) string {
 	tag := strings.ToLower(node.Data)
-	classes := getClasses(node)
+	classes := GetClasses(node)

 	// Check for specific CSS classes that indicate purpose
 	for _, class := range classes {
@@ -90,15 +90,15 @@ func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
 // findInsertrElements recursively finds all elements with "insertr" class
 func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
 	if node.Type == html.ElementNode {
-		classes := getClasses(node)
+		classes := GetClasses(node)

 		// Check if element has "insertr" class
-		if containsClass(classes, "insertr") {
+		if ContainsClass(classes, "insertr") {
 			if isContainer(node) {
 				// Container element - expand to viable children
 				viableChildren := findViableChildren(node)
 				for _, child := range viableChildren {
-					childClasses := getClasses(child)
+					childClasses := GetClasses(child)
 					element, warning := p.createElement(child, filePath, childClasses)
 					*elements = append(*elements, element)
 					if warning != "" {
@@ -181,13 +181,13 @@ func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
 // detectContentType determines the content type based on element and classes
 func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
 	// Check for explicit type classes first
-	if containsClass(classes, "insertr-markdown") {
+	if ContainsClass(classes, "insertr-markdown") {
 		return ContentMarkdown
 	}
-	if containsClass(classes, "insertr-link") {
+	if ContainsClass(classes, "insertr-link") {
 		return ContentLink
 	}
-	if containsClass(classes, "insertr-text") {
+	if ContainsClass(classes, "insertr-text") {
 		return ContentText
 	}

@@ -6,8 +6,8 @@ import (
 	"golang.org/x/net/html"
 )

-// getClasses extracts CSS classes from an HTML node
-func getClasses(node *html.Node) []string {
+// GetClasses extracts CSS classes from an HTML node
+func GetClasses(node *html.Node) []string {
 	classAttr := getAttribute(node, "class")
 	if classAttr == "" {
 		return []string{}
@@ -17,8 +17,8 @@ func getClasses(node *html.Node) []string {
 	return classes
 }

-// containsClass checks if a class list contains a specific class
-func containsClass(classes []string, target string) bool {
+// ContainsClass checks if a class list contains a specific class
+func ContainsClass(classes []string, target string) bool {
 	for _, class := range classes {
 		if class == target {
 			return true
@@ -157,3 +157,39 @@ func isSelfClosing(node *html.Node) bool {

 	return selfClosingTags[node.Data]
 }
+
+// FindElementInDocument finds a parser element in HTML document tree using semantic matching
+func FindElementInDocument(doc *html.Node, element Element) *html.Node {
+	return findElementWithContext(doc, element)
+}
+
+// findElementWithContext uses the parser's semantic understanding to find the correct element
+func findElementWithContext(node *html.Node, target Element) *html.Node {
+	if node.Type == html.ElementNode && node.Data == target.Tag {
+		classes := GetClasses(node)
+		if ContainsClass(classes, "insertr") {
+			// Content-based validation for precise matching
+			textContent := extractTextContent(node)
+			nodeContent := strings.TrimSpace(textContent)
+			targetContent := strings.TrimSpace(target.Content)
+
+			if nodeContent == targetContent {
+				return node
+			}
+		}
+	}
+
+	// Recursively search children
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		if result := findElementWithContext(child, target); result != nil {
+			return result
+		}
+	}
+
+	return nil
+}
+
+// GetAttribute gets an attribute value from an HTML node (exported version)
+func GetAttribute(node *html.Node, key string) string {
+	return getAttribute(node, key)
+}