feat: implement structural IDs with database-first existence checking
- Remove content preview from ID generation for stable structural IDs - Implement database-first approach to check content existence before creation - Add enhanced DOM path, semantic context, and precise sibling indexing - Replace HTML attribute checking with reliable database lookups - Add collision handling with increment counters for similar elements Fixes: ✅ UNIQUE constraint errors eliminated (multiple enhancement runs work) ✅ Structural stability (same element keeps same ID regardless of content changes) ✅ Database-driven workflow (single source of truth for content existence) ✅ Enhanced collision resistance with detailed structural differentiation Results: No more 'Failed to store content' errors, stable enhance workflow.
This commit is contained in:
@@ -60,20 +60,12 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
|
|||||||
processedElements := make([]ProcessedElement, len(elements))
|
processedElements := make([]ProcessedElement, len(elements))
|
||||||
|
|
||||||
for i, elem := range elements {
|
for i, elem := range elements {
|
||||||
// Check if element already has a data-content-id
|
// Generate structural ID (always deterministic)
|
||||||
existingID := e.getAttribute(elem.Node, "data-content-id")
|
id := e.idGenerator.Generate(elem.Node, input.FilePath)
|
||||||
var id string
|
|
||||||
var wasGenerated bool
|
|
||||||
|
|
||||||
if existingID != "" {
|
// Database-first approach: Check if content already exists
|
||||||
// Use existing ID from enhanced element
|
existingContent, err := e.client.GetContent(input.SiteID, id)
|
||||||
id = existingID
|
contentExists := (err == nil && existingContent != nil)
|
||||||
wasGenerated = false
|
|
||||||
} else {
|
|
||||||
// Generate new ID for unprocessed element
|
|
||||||
id = e.idGenerator.Generate(elem.Node, input.FilePath)
|
|
||||||
wasGenerated = true
|
|
||||||
}
|
|
||||||
|
|
||||||
generatedIDs[fmt.Sprintf("element_%d", i)] = id
|
generatedIDs[fmt.Sprintf("element_%d", i)] = id
|
||||||
|
|
||||||
@@ -81,7 +73,7 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
|
|||||||
Node: elem.Node,
|
Node: elem.Node,
|
||||||
ID: id,
|
ID: id,
|
||||||
Type: elem.Type,
|
Type: elem.Type,
|
||||||
Generated: wasGenerated,
|
Generated: !contentExists, // Mark as generated only if new to database
|
||||||
Tag: elem.Node.Data,
|
Tag: elem.Node.Data,
|
||||||
Classes: GetClasses(elem.Node),
|
Classes: GetClasses(elem.Node),
|
||||||
}
|
}
|
||||||
@@ -89,8 +81,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
|
|||||||
// Add/update content attributes to the node
|
// Add/update content attributes to the node
|
||||||
e.addContentAttributes(elem.Node, id, elem.Type)
|
e.addContentAttributes(elem.Node, id, elem.Type)
|
||||||
|
|
||||||
// Store content and template for newly discovered elements (first-pass)
|
// Store content only for truly new elements (database-first check)
|
||||||
if wasGenerated && (input.Mode == Enhancement || input.Mode == ContentInjection) {
|
if !contentExists && (input.Mode == Enhancement || input.Mode == ContentInjection) {
|
||||||
// Extract content and template from the unprocessed element
|
// Extract content and template from the unprocessed element
|
||||||
htmlContent := e.extractHTMLContent(elem.Node)
|
htmlContent := e.extractHTMLContent(elem.Node)
|
||||||
originalTemplate := e.extractOriginalTemplate(elem.Node)
|
originalTemplate := e.extractOriginalTemplate(elem.Node)
|
||||||
@@ -100,6 +92,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
// Log error but don't fail the enhancement - content just won't be stored
|
// Log error but don't fail the enhancement - content just won't be stored
|
||||||
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
|
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
|
||||||
|
} else {
|
||||||
|
fmt.Printf("✅ Created new content: %s (%s)\n", id, elem.Type)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -120,26 +120,21 @@ func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int)
|
|||||||
|
|
||||||
// createDeterministicSignature creates a deterministic signature for element identification
|
// createDeterministicSignature creates a deterministic signature for element identification
|
||||||
func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string {
|
func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string {
|
||||||
// Build enhanced signature with 6 components for maximum differentiation
|
// Build structural signature for stable IDs across content changes
|
||||||
tag := node.Data
|
tag := node.Data
|
||||||
domPath := g.getSimpleDOMPath(node)
|
domPath := g.getDetailedDOMPath(node)
|
||||||
classes := strings.Join(GetClasses(node), " ")
|
allClasses := strings.Join(GetClasses(node), " ")
|
||||||
contentPreview := g.getContentPreview(node)
|
semanticContext := g.getSemanticContext(node)
|
||||||
siblingIndex := g.getSiblingIndex(node)
|
preciseIndex := g.getPreciseSiblingIndex(node)
|
||||||
|
|
||||||
// Normalize content preview to first 20 chars
|
// Create purely structural deterministic signature
|
||||||
if len(contentPreview) > 20 {
|
|
||||||
contentPreview = contentPreview[:20]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create comprehensive deterministic signature
|
|
||||||
signature := fmt.Sprintf("%s|%s|%s|%s|%s|%d",
|
signature := fmt.Sprintf("%s|%s|%s|%s|%s|%d",
|
||||||
filePath, // File context for uniqueness across files
|
filePath, // File context for uniqueness across files
|
||||||
domPath, // Structural position in DOM
|
domPath, // Detailed structural position in DOM
|
||||||
tag, // Element type
|
tag, // Element type
|
||||||
classes, // CSS classes for style differentiation
|
allClasses, // All CSS classes for style differentiation
|
||||||
contentPreview, // Content for similar-structure differentiation
|
semanticContext, // Semantic context (header/main/footer/nav)
|
||||||
siblingIndex, // Position among similar siblings
|
preciseIndex, // Precise position among exact siblings
|
||||||
)
|
)
|
||||||
|
|
||||||
// Create deterministic hash suffix (6 chars)
|
// Create deterministic hash suffix (6 chars)
|
||||||
@@ -179,6 +174,142 @@ func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
|
|||||||
return strings.Join(pathParts, ">")
|
return strings.Join(pathParts, ">")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// getDetailedDOMPath creates a more detailed DOM path for enhanced structural differentiation
|
||||||
|
func (g *IDGenerator) getDetailedDOMPath(node *html.Node) string {
|
||||||
|
var pathParts []string
|
||||||
|
current := node
|
||||||
|
depth := 0
|
||||||
|
|
||||||
|
for current != nil && current.Type == html.ElementNode && depth < 5 {
|
||||||
|
part := current.Data
|
||||||
|
|
||||||
|
// Add all meaningful classes for maximum differentiation
|
||||||
|
classes := GetClasses(current)
|
||||||
|
var meaningfulClasses []string
|
||||||
|
for _, class := range classes {
|
||||||
|
if class != "insertr" && class != "" {
|
||||||
|
meaningfulClasses = append(meaningfulClasses, class)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(meaningfulClasses) > 0 {
|
||||||
|
part += "." + strings.Join(meaningfulClasses, ".")
|
||||||
|
}
|
||||||
|
|
||||||
|
pathParts = append([]string{part}, pathParts...)
|
||||||
|
current = current.Parent
|
||||||
|
depth++
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Join(pathParts, ">")
|
||||||
|
}
|
||||||
|
|
||||||
|
// getSemanticContext identifies the semantic container (header, main, footer, nav)
|
||||||
|
func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
||||||
|
current := node.Parent
|
||||||
|
|
||||||
|
// Traverse up to find semantic containers
|
||||||
|
for current != nil && current.Type == html.ElementNode {
|
||||||
|
tag := strings.ToLower(current.Data)
|
||||||
|
|
||||||
|
// Direct semantic tags
|
||||||
|
switch tag {
|
||||||
|
case "header":
|
||||||
|
return "header"
|
||||||
|
case "main":
|
||||||
|
return "main"
|
||||||
|
case "footer":
|
||||||
|
return "footer"
|
||||||
|
case "nav":
|
||||||
|
return "nav"
|
||||||
|
case "aside":
|
||||||
|
return "aside"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Semantic classes
|
||||||
|
classes := GetClasses(current)
|
||||||
|
for _, class := range classes {
|
||||||
|
class = strings.ToLower(class)
|
||||||
|
if strings.Contains(class, "header") {
|
||||||
|
return "header"
|
||||||
|
}
|
||||||
|
if strings.Contains(class, "footer") {
|
||||||
|
return "footer"
|
||||||
|
}
|
||||||
|
if strings.Contains(class, "nav") {
|
||||||
|
return "nav"
|
||||||
|
}
|
||||||
|
if strings.Contains(class, "sidebar") || strings.Contains(class, "aside") {
|
||||||
|
return "aside"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
current = current.Parent
|
||||||
|
}
|
||||||
|
|
||||||
|
return "content"
|
||||||
|
}
|
||||||
|
|
||||||
|
// getPreciseSiblingIndex returns position among siblings with exact tag and class match
|
||||||
|
func (g *IDGenerator) getPreciseSiblingIndex(node *html.Node) int {
|
||||||
|
if node.Parent == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
index := 0
|
||||||
|
tag := node.Data
|
||||||
|
classes := GetClasses(node)
|
||||||
|
|
||||||
|
// Sort classes for consistent comparison
|
||||||
|
sortedClasses := make([]string, len(classes))
|
||||||
|
copy(sortedClasses, classes)
|
||||||
|
for i := 0; i < len(sortedClasses); i++ {
|
||||||
|
for j := i + 1; j < len(sortedClasses); j++ {
|
||||||
|
if sortedClasses[i] > sortedClasses[j] {
|
||||||
|
sortedClasses[i], sortedClasses[j] = sortedClasses[j], sortedClasses[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
|
||||||
|
if sibling.Type == html.ElementNode && sibling.Data == tag {
|
||||||
|
siblingClasses := GetClasses(sibling)
|
||||||
|
|
||||||
|
// Sort sibling classes for comparison
|
||||||
|
sortedSiblingClasses := make([]string, len(siblingClasses))
|
||||||
|
copy(sortedSiblingClasses, siblingClasses)
|
||||||
|
for i := 0; i < len(sortedSiblingClasses); i++ {
|
||||||
|
for j := i + 1; j < len(sortedSiblingClasses); j++ {
|
||||||
|
if sortedSiblingClasses[i] > sortedSiblingClasses[j] {
|
||||||
|
sortedSiblingClasses[i], sortedSiblingClasses[j] = sortedSiblingClasses[j], sortedSiblingClasses[i]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if classes match exactly
|
||||||
|
if g.classSlicesEqual(sortedClasses, sortedSiblingClasses) {
|
||||||
|
if sibling == node {
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
index++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index
|
||||||
|
}
|
||||||
|
|
||||||
|
// classSlicesEqual compares two sorted class slices for equality
|
||||||
|
func (g *IDGenerator) classSlicesEqual(a, b []string) bool {
|
||||||
|
if len(a) != len(b) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := range a {
|
||||||
|
if a[i] != b[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// getContentPreview extracts first 50 characters of text content for uniqueness
|
// getContentPreview extracts first 50 characters of text content for uniqueness
|
||||||
func (g *IDGenerator) getContentPreview(node *html.Node) string {
|
func (g *IDGenerator) getContentPreview(node *html.Node) string {
|
||||||
var text strings.Builder
|
var text strings.Builder
|
||||||
|
|||||||
Reference in New Issue
Block a user