fix: systematic element matching bug in enhancement pipeline
- Problem: Element ID collisions between similar elements (logo h1 vs hero h1) causing content to be injected into wrong elements - Root cause: Enhancer used naive tag+class matching instead of parser's sophisticated semantic analysis for element identification Systematic solution: - Enhanced parser architecture with exported utilities (GetClasses, ContainsClass) - Added FindElementInDocument() with content-based semantic matching - Replaced naive findAndInjectNodes() with parser-based element matching - Removed code duplication between parser and enhancer packages Backend improvements: - Moved ID generation to backend for single source of truth - Added ElementContext struct for frontend-backend communication - Updated API handlers to support context-based content ID generation Frontend improvements: - Enhanced getElementMetadata() to extract semantic context - Updated save flow to handle both enhanced and non-enhanced elements - Improved API client to use backend-generated content IDs Result: - Unique content IDs: navbar-logo-200530 vs hero-title-a1de7b - Precise element matching using content validation - Single source of truth for DOM utilities in parser package - Eliminated 40+ lines of duplicate code while fixing core bug
This commit is contained in:
@@ -17,6 +17,8 @@ import (
|
||||
"github.com/insertr/insertr/internal/db"
|
||||
"github.com/insertr/insertr/internal/db/postgresql"
|
||||
"github.com/insertr/insertr/internal/db/sqlite"
|
||||
"github.com/insertr/insertr/internal/parser"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// ContentHandler handles all content-related HTTP requests
|
||||
@@ -232,6 +234,16 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
|
||||
siteID = "default" // final fallback
|
||||
}
|
||||
|
||||
// Determine content ID - use provided ID or generate from element context
|
||||
contentID := req.ID
|
||||
if contentID == "" {
|
||||
if req.ElementContext == nil {
|
||||
http.Error(w, "Either ID or element_context required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
contentID = h.generateContentID(req.ElementContext)
|
||||
}
|
||||
|
||||
// Extract user from request using authentication service
|
||||
userInfo, authErr := h.authService.ExtractUserFromRequest(r)
|
||||
if authErr != nil {
|
||||
@@ -246,7 +258,7 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
|
||||
switch h.database.GetDBType() {
|
||||
case "sqlite3":
|
||||
content, err = h.database.GetSQLiteQueries().CreateContent(context.Background(), sqlite.CreateContentParams{
|
||||
ID: req.ID,
|
||||
ID: contentID,
|
||||
SiteID: siteID,
|
||||
Value: req.Value,
|
||||
Type: req.Type,
|
||||
@@ -254,7 +266,7 @@ func (h *ContentHandler) CreateContent(w http.ResponseWriter, r *http.Request) {
|
||||
})
|
||||
case "postgresql":
|
||||
content, err = h.database.GetPostgreSQLQueries().CreateContent(context.Background(), postgresql.CreateContentParams{
|
||||
ID: req.ID,
|
||||
ID: contentID,
|
||||
SiteID: siteID,
|
||||
Value: req.Value,
|
||||
Type: req.Type,
|
||||
@@ -728,3 +740,41 @@ func (h *ContentHandler) versionMatches(version interface{}, contentID, siteID s
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// generateContentID creates a content ID from element context using the parser
|
||||
func (h *ContentHandler) generateContentID(ctx *ElementContext) string {
|
||||
// Create virtual node for existing parser ID generation
|
||||
virtualNode := &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: ctx.Tag,
|
||||
Attr: []html.Attribute{
|
||||
{Key: "class", Val: strings.Join(ctx.Classes, " ")},
|
||||
},
|
||||
}
|
||||
|
||||
// Add parent context as a virtual parent node if provided
|
||||
if ctx.ParentContext != "" && ctx.ParentContext != "content" {
|
||||
parentNode := &html.Node{
|
||||
Type: html.ElementNode,
|
||||
Data: "section",
|
||||
Attr: []html.Attribute{
|
||||
{Key: "class", Val: ctx.ParentContext},
|
||||
},
|
||||
}
|
||||
parentNode.AppendChild(virtualNode)
|
||||
virtualNode.Parent = parentNode
|
||||
}
|
||||
|
||||
// Add text content for hash generation
|
||||
if ctx.OriginalContent != "" {
|
||||
textNode := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: ctx.OriginalContent,
|
||||
}
|
||||
virtualNode.AppendChild(textNode)
|
||||
}
|
||||
|
||||
// Use existing parser ID generator
|
||||
idGenerator := parser.NewIDGenerator()
|
||||
return idGenerator.Generate(virtualNode)
|
||||
}
|
||||
|
||||
@@ -31,13 +31,23 @@ type ContentVersionsResponse struct {
|
||||
Versions []ContentVersion `json:"versions"`
|
||||
}
|
||||
|
||||
// Element context for backend ID generation
|
||||
type ElementContext struct {
|
||||
Tag string `json:"tag"`
|
||||
Classes []string `json:"classes"`
|
||||
OriginalContent string `json:"original_content"`
|
||||
ParentContext string `json:"parent_context"`
|
||||
Purpose string `json:"purpose"`
|
||||
}
|
||||
|
||||
// Request models
|
||||
type CreateContentRequest struct {
|
||||
ID string `json:"id"`
|
||||
SiteID string `json:"site_id,omitempty"`
|
||||
Value string `json:"value"`
|
||||
Type string `json:"type"`
|
||||
CreatedBy string `json:"created_by,omitempty"`
|
||||
ID string `json:"id,omitempty"` // For enhanced sites
|
||||
ElementContext *ElementContext `json:"element_context,omitempty"` // For non-enhanced sites
|
||||
SiteID string `json:"site_id,omitempty"`
|
||||
Value string `json:"value"`
|
||||
Type string `json:"type"`
|
||||
CreatedBy string `json:"created_by,omitempty"`
|
||||
}
|
||||
|
||||
type UpdateContentRequest struct {
|
||||
|
||||
@@ -98,52 +98,32 @@ func (e *Enhancer) injectElementContent(doc *html.Node, elem parser.Element) err
|
||||
return nil
|
||||
}
|
||||
|
||||
// findAndInjectNodes recursively finds nodes and injects content
|
||||
func (e *Enhancer) findAndInjectNodes(node *html.Node, elem parser.Element, contentItem *ContentItem) {
|
||||
if node.Type == html.ElementNode {
|
||||
// Check if this node matches our element criteria
|
||||
classes := getClasses(node)
|
||||
if containsClass(classes, "insertr") && node.Data == elem.Tag {
|
||||
// This might be our target node - inject content
|
||||
e.injector.addContentAttributes(node, elem.ContentID, string(elem.Type))
|
||||
|
||||
if contentItem != nil {
|
||||
switch elem.Type {
|
||||
case parser.ContentText:
|
||||
e.injector.injectTextContent(node, contentItem.Value)
|
||||
case parser.ContentMarkdown:
|
||||
e.injector.injectMarkdownContent(node, contentItem.Value)
|
||||
case parser.ContentLink:
|
||||
e.injector.injectLinkContent(node, contentItem.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
// findAndInjectNodes finds the specific node for this element and injects content
|
||||
func (e *Enhancer) findAndInjectNodes(rootNode *html.Node, elem parser.Element, contentItem *ContentItem) {
|
||||
// Use parser-based element matching to find the correct specific node
|
||||
targetNode := e.findNodeInDocument(rootNode, elem)
|
||||
if targetNode == nil {
|
||||
// Element not found - this is normal for elements without content in database
|
||||
return
|
||||
}
|
||||
|
||||
// Recursively process children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
e.findAndInjectNodes(child, elem, contentItem)
|
||||
// Inject content attributes for the correctly matched node
|
||||
e.injector.addContentAttributes(targetNode, elem.ContentID, string(elem.Type))
|
||||
|
||||
// Inject content if available
|
||||
if contentItem != nil {
|
||||
switch elem.Type {
|
||||
case parser.ContentText:
|
||||
e.injector.injectTextContent(targetNode, contentItem.Value)
|
||||
case parser.ContentMarkdown:
|
||||
e.injector.injectMarkdownContent(targetNode, contentItem.Value)
|
||||
case parser.ContentLink:
|
||||
e.injector.injectLinkContent(targetNode, contentItem.Value)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper functions from parser package
|
||||
func getClasses(node *html.Node) []string {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == "class" {
|
||||
return strings.Fields(attr.Val)
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
func containsClass(classes []string, target string) bool {
|
||||
for _, class := range classes {
|
||||
if class == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
// Helper functions are now provided by the parser package
|
||||
|
||||
// EnhanceDirectory processes all HTML files in a directory
|
||||
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
|
||||
@@ -295,28 +275,10 @@ func (e *Enhancer) enhanceFileInPlace(filePath string, elements []parser.Element
|
||||
return e.writeHTML(doc, filePath)
|
||||
}
|
||||
|
||||
// findNodeInDocument finds a specific node in the HTML document tree
|
||||
// findNodeInDocument finds a specific node in the HTML document tree using parser utilities
|
||||
func (e *Enhancer) findNodeInDocument(doc *html.Node, elem parser.Element) *html.Node {
|
||||
// This is a simplified approach - in a production system we might need
|
||||
// more sophisticated node matching based on attributes, position, etc.
|
||||
return e.findNodeByTagAndClass(doc, elem.Tag, "insertr")
|
||||
// Use parser's sophisticated matching logic
|
||||
return parser.FindElementInDocument(doc, elem)
|
||||
}
|
||||
|
||||
// findNodeByTagAndClass recursively searches for a node with specific tag and class
|
||||
func (e *Enhancer) findNodeByTagAndClass(node *html.Node, targetTag, targetClass string) *html.Node {
|
||||
if node.Type == html.ElementNode && node.Data == targetTag {
|
||||
classes := getClasses(node)
|
||||
if containsClass(classes, targetClass) {
|
||||
return node
|
||||
}
|
||||
}
|
||||
|
||||
// Search children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
if result := e.findNodeByTagAndClass(child, targetTag, targetClass); result != nil {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
// All element matching functions are now provided by the parser package
|
||||
|
||||
@@ -28,7 +28,9 @@ func (g *IDGenerator) Generate(node *html.Node) string {
|
||||
contentHash := g.getContentHash(node)
|
||||
|
||||
baseID := g.createBaseID(context, purpose, contentHash)
|
||||
return g.ensureUnique(baseID)
|
||||
finalID := g.ensureUnique(baseID)
|
||||
|
||||
return finalID
|
||||
}
|
||||
|
||||
// getSemanticContext determines the semantic context from parent elements
|
||||
@@ -36,11 +38,11 @@ func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
||||
// Walk up the tree to find semantic containers
|
||||
parent := node.Parent
|
||||
for parent != nil && parent.Type == html.ElementNode {
|
||||
classes := getClasses(parent)
|
||||
classes := GetClasses(parent)
|
||||
|
||||
// Check for common semantic section classes
|
||||
for _, class := range []string{"hero", "services", "nav", "navbar", "footer", "about", "contact", "testimonial"} {
|
||||
if containsClass(classes, class) {
|
||||
if ContainsClass(classes, class) {
|
||||
return class
|
||||
}
|
||||
}
|
||||
@@ -68,7 +70,7 @@ func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
||||
// getPurpose determines the purpose/role of the element
|
||||
func (g *IDGenerator) getPurpose(node *html.Node) string {
|
||||
tag := strings.ToLower(node.Data)
|
||||
classes := getClasses(node)
|
||||
classes := GetClasses(node)
|
||||
|
||||
// Check for specific CSS classes that indicate purpose
|
||||
for _, class := range classes {
|
||||
|
||||
@@ -90,15 +90,15 @@ func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
|
||||
// findInsertrElements recursively finds all elements with "insertr" class
|
||||
func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
|
||||
if node.Type == html.ElementNode {
|
||||
classes := getClasses(node)
|
||||
classes := GetClasses(node)
|
||||
|
||||
// Check if element has "insertr" class
|
||||
if containsClass(classes, "insertr") {
|
||||
if ContainsClass(classes, "insertr") {
|
||||
if isContainer(node) {
|
||||
// Container element - expand to viable children
|
||||
viableChildren := findViableChildren(node)
|
||||
for _, child := range viableChildren {
|
||||
childClasses := getClasses(child)
|
||||
childClasses := GetClasses(child)
|
||||
element, warning := p.createElement(child, filePath, childClasses)
|
||||
*elements = append(*elements, element)
|
||||
if warning != "" {
|
||||
@@ -181,13 +181,13 @@ func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
|
||||
// detectContentType determines the content type based on element and classes
|
||||
func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
|
||||
// Check for explicit type classes first
|
||||
if containsClass(classes, "insertr-markdown") {
|
||||
if ContainsClass(classes, "insertr-markdown") {
|
||||
return ContentMarkdown
|
||||
}
|
||||
if containsClass(classes, "insertr-link") {
|
||||
if ContainsClass(classes, "insertr-link") {
|
||||
return ContentLink
|
||||
}
|
||||
if containsClass(classes, "insertr-text") {
|
||||
if ContainsClass(classes, "insertr-text") {
|
||||
return ContentText
|
||||
}
|
||||
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// getClasses extracts CSS classes from an HTML node
|
||||
func getClasses(node *html.Node) []string {
|
||||
// GetClasses extracts CSS classes from an HTML node
|
||||
func GetClasses(node *html.Node) []string {
|
||||
classAttr := getAttribute(node, "class")
|
||||
if classAttr == "" {
|
||||
return []string{}
|
||||
@@ -17,8 +17,8 @@ func getClasses(node *html.Node) []string {
|
||||
return classes
|
||||
}
|
||||
|
||||
// containsClass checks if a class list contains a specific class
|
||||
func containsClass(classes []string, target string) bool {
|
||||
// ContainsClass checks if a class list contains a specific class
|
||||
func ContainsClass(classes []string, target string) bool {
|
||||
for _, class := range classes {
|
||||
if class == target {
|
||||
return true
|
||||
@@ -157,3 +157,39 @@ func isSelfClosing(node *html.Node) bool {
|
||||
|
||||
return selfClosingTags[node.Data]
|
||||
}
|
||||
|
||||
// FindElementInDocument finds a parser element in HTML document tree using semantic matching
|
||||
func FindElementInDocument(doc *html.Node, element Element) *html.Node {
|
||||
return findElementWithContext(doc, element)
|
||||
}
|
||||
|
||||
// findElementWithContext uses the parser's semantic understanding to find the correct element
|
||||
func findElementWithContext(node *html.Node, target Element) *html.Node {
|
||||
if node.Type == html.ElementNode && node.Data == target.Tag {
|
||||
classes := GetClasses(node)
|
||||
if ContainsClass(classes, "insertr") {
|
||||
// Content-based validation for precise matching
|
||||
textContent := extractTextContent(node)
|
||||
nodeContent := strings.TrimSpace(textContent)
|
||||
targetContent := strings.TrimSpace(target.Content)
|
||||
|
||||
if nodeContent == targetContent {
|
||||
return node
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively search children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
if result := findElementWithContext(child, target); result != nil {
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetAttribute gets an attribute value from an HTML node (exported version)
|
||||
func GetAttribute(node *html.Node, key string) string {
|
||||
return getAttribute(node, key)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user