feat: implement structural IDs with database-first existence checking

- Remove content preview from ID generation for stable structural IDs
- Implement database-first approach to check content existence before creation
- Add enhanced DOM path, semantic context, and precise sibling indexing
- Replace HTML attribute checking with reliable database lookups
- Add collision handling with increment counters for similar elements

Fixes:
 UNIQUE constraint errors eliminated (multiple enhancement runs work)
 Structural stability (same element keeps same ID regardless of content changes)
 Database-driven workflow (single source of truth for content existence)
 Enhanced collision resistance with detailed structural differentiation

Results: No more 'Failed to store content' errors, stable enhance workflow.
This commit is contained in:
2025-09-20 21:39:40 +02:00
parent 369d516381
commit b5225c1388
2 changed files with 158 additions and 33 deletions

View File

@@ -60,20 +60,12 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
processedElements := make([]ProcessedElement, len(elements))
for i, elem := range elements {
// Check if element already has a data-content-id
existingID := e.getAttribute(elem.Node, "data-content-id")
var id string
var wasGenerated bool
// Generate structural ID (always deterministic)
id := e.idGenerator.Generate(elem.Node, input.FilePath)
if existingID != "" {
// Use existing ID from enhanced element
id = existingID
wasGenerated = false
} else {
// Generate new ID for unprocessed element
id = e.idGenerator.Generate(elem.Node, input.FilePath)
wasGenerated = true
}
// Database-first approach: Check if content already exists
existingContent, err := e.client.GetContent(input.SiteID, id)
contentExists := (err == nil && existingContent != nil)
generatedIDs[fmt.Sprintf("element_%d", i)] = id
@@ -81,7 +73,7 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
Node: elem.Node,
ID: id,
Type: elem.Type,
Generated: wasGenerated,
Generated: !contentExists, // Mark as generated only if new to database
Tag: elem.Node.Data,
Classes: GetClasses(elem.Node),
}
@@ -89,8 +81,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
// Add/update content attributes to the node
e.addContentAttributes(elem.Node, id, elem.Type)
// Store content and template for newly discovered elements (first-pass)
if wasGenerated && (input.Mode == Enhancement || input.Mode == ContentInjection) {
// Store content only for truly new elements (database-first check)
if !contentExists && (input.Mode == Enhancement || input.Mode == ContentInjection) {
// Extract content and template from the unprocessed element
htmlContent := e.extractHTMLContent(elem.Node)
originalTemplate := e.extractOriginalTemplate(elem.Node)
@@ -100,6 +92,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
if err != nil {
// Log error but don't fail the enhancement - content just won't be stored
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
} else {
fmt.Printf("✅ Created new content: %s (%s)\n", id, elem.Type)
}
}
}