Files
insertr/internal/engine/engine.go
Joakim b5225c1388 feat: implement structural IDs with database-first existence checking
- Remove content preview from ID generation for stable structural IDs
- Implement database-first approach to check content existence before creation
- Add enhanced DOM path, semantic context, and precise sibling indexing
- Replace HTML attribute checking with reliable database lookups
- Add collision handling with increment counters for similar elements

Fixes:
 UNIQUE constraint errors eliminated (multiple enhancement runs work)
 Structural stability (same element keeps same ID regardless of content changes)
 Database-driven workflow (single source of truth for content existence)
 Enhanced collision resistance with detailed structural differentiation

Results: No more 'Failed to store content' errors, stable enhance workflow.
2025-09-20 21:39:40 +02:00

258 lines
7.5 KiB
Go

package engine
import (
"fmt"
"strings"
"golang.org/x/net/html"
)
// AuthProvider represents authentication provider information
type AuthProvider struct {
Type string // "mock", "jwt", "authentik"
}
// ContentEngine is the unified content processing engine
type ContentEngine struct {
idGenerator *IDGenerator
client ContentClient
authProvider *AuthProvider
injector *Injector
}
// NewContentEngine creates a new content processing engine
func NewContentEngine(client ContentClient) *ContentEngine {
authProvider := &AuthProvider{Type: "mock"} // default
return &ContentEngine{
idGenerator: NewIDGenerator(),
client: client,
authProvider: authProvider,
injector: NewInjector(client, ""), // siteID will be set per operation
}
}
// NewContentEngineWithAuth creates a new content processing engine with auth config
func NewContentEngineWithAuth(client ContentClient, authProvider *AuthProvider) *ContentEngine {
if authProvider == nil {
authProvider = &AuthProvider{Type: "mock"}
}
return &ContentEngine{
idGenerator: NewIDGenerator(),
client: client,
authProvider: authProvider,
injector: NewInjectorWithAuth(client, "", authProvider), // siteID will be set per operation
}
}
// ProcessContent processes HTML content according to the specified mode
func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, error) {
// 1. Parse HTML
doc, err := html.Parse(strings.NewReader(string(input.HTML)))
if err != nil {
return nil, fmt.Errorf("parsing HTML: %w", err)
}
// 2. Find insertr elements
elements := e.findInsertrElements(doc)
// 3. Generate IDs for elements
generatedIDs := make(map[string]string)
processedElements := make([]ProcessedElement, len(elements))
for i, elem := range elements {
// Generate structural ID (always deterministic)
id := e.idGenerator.Generate(elem.Node, input.FilePath)
// Database-first approach: Check if content already exists
existingContent, err := e.client.GetContent(input.SiteID, id)
contentExists := (err == nil && existingContent != nil)
generatedIDs[fmt.Sprintf("element_%d", i)] = id
processedElements[i] = ProcessedElement{
Node: elem.Node,
ID: id,
Type: elem.Type,
Generated: !contentExists, // Mark as generated only if new to database
Tag: elem.Node.Data,
Classes: GetClasses(elem.Node),
}
// Add/update content attributes to the node
e.addContentAttributes(elem.Node, id, elem.Type)
// Store content only for truly new elements (database-first check)
if !contentExists && (input.Mode == Enhancement || input.Mode == ContentInjection) {
// Extract content and template from the unprocessed element
htmlContent := e.extractHTMLContent(elem.Node)
originalTemplate := e.extractOriginalTemplate(elem.Node)
// Store in database via content client
_, err := e.client.CreateContent(input.SiteID, id, htmlContent, originalTemplate, elem.Type, "system")
if err != nil {
// Log error but don't fail the enhancement - content just won't be stored
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
} else {
fmt.Printf("✅ Created new content: %s (%s)\n", id, elem.Type)
}
}
}
// 4. Inject content if required by mode
if input.Mode == Enhancement || input.Mode == ContentInjection {
err = e.injectContent(processedElements, input.SiteID)
if err != nil {
return nil, fmt.Errorf("injecting content: %w", err)
}
}
// 5. Inject editor assets for enhancement mode (development)
if input.Mode == Enhancement {
injector := NewInjectorWithAuth(e.client, input.SiteID, e.authProvider)
injector.InjectEditorAssets(doc, true, "")
}
return &ContentResult{
Document: doc,
Elements: processedElements,
GeneratedIDs: generatedIDs,
}, nil
}
// InsertrElement represents an insertr element found in HTML
type InsertrElement struct {
Node *html.Node
Type string
}
// findInsertrElements finds all elements with class="insertr"
func (e *ContentEngine) findInsertrElements(doc *html.Node) []InsertrElement {
var elements []InsertrElement
e.walkNodes(doc, func(n *html.Node) {
if n.Type == html.ElementNode && e.hasInsertrClass(n) {
elementType := e.determineContentType(n)
elements = append(elements, InsertrElement{
Node: n,
Type: elementType,
})
}
})
return elements
}
// walkNodes walks through all nodes in the document
func (e *ContentEngine) walkNodes(n *html.Node, fn func(*html.Node)) {
fn(n)
for c := n.FirstChild; c != nil; c = c.NextSibling {
e.walkNodes(c, fn)
}
}
// hasInsertrClass checks if node has class="insertr"
func (e *ContentEngine) hasInsertrClass(node *html.Node) bool {
classes := GetClasses(node)
for _, class := range classes {
if class == "insertr" {
return true
}
}
return false
}
// determineContentType determines the content type based on element
func (e *ContentEngine) determineContentType(node *html.Node) string {
tag := strings.ToLower(node.Data)
switch tag {
case "a", "button":
return "link"
case "h1", "h2", "h3", "h4", "h5", "h6":
return "text"
case "p", "div", "section", "article", "span":
return "text"
default:
return "text"
}
}
// addContentAttributes adds data-content-id and data-content-type attributes
func (e *ContentEngine) addContentAttributes(node *html.Node, contentID, contentType string) {
// Add data-content-id attribute
e.setAttribute(node, "data-content-id", contentID)
// Add data-content-type attribute
e.setAttribute(node, "data-content-type", contentType)
}
// getAttribute gets an attribute value from an HTML node
func (e *ContentEngine) getAttribute(node *html.Node, key string) string {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val
}
}
return ""
}
// setAttribute sets an attribute on an HTML node
func (e *ContentEngine) setAttribute(node *html.Node, key, value string) {
// Remove existing attribute if it exists
for i, attr := range node.Attr {
if attr.Key == key {
node.Attr[i].Val = value
return
}
}
// Add new attribute
node.Attr = append(node.Attr, html.Attribute{
Key: key,
Val: value,
})
}
// injectContent injects content from database into elements
func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string) error {
for i := range elements {
elem := &elements[i]
// Try to get content from database
contentItem, err := e.client.GetContent(siteID, elem.ID)
if err != nil {
// Content not found is not an error - element just won't have injected content
continue
}
if contentItem != nil {
// Inject the content into the element
elem.Content = contentItem.HTMLContent
// Update injector siteID for this operation
e.injector.siteID = siteID
e.injector.injectHTMLContent(elem.Node, contentItem.HTMLContent)
}
}
return nil
}
// extractHTMLContent extracts the inner HTML content from a node
func (e *ContentEngine) extractHTMLContent(node *html.Node) string {
var content strings.Builder
// Render all child nodes in order to preserve HTML structure
for child := node.FirstChild; child != nil; child = child.NextSibling {
if err := html.Render(&content, child); err == nil {
// All nodes (text and element) rendered in correct order
}
}
return strings.TrimSpace(content.String())
}
// extractOriginalTemplate extracts the outer HTML of the element (including the element itself)
func (e *ContentEngine) extractOriginalTemplate(node *html.Node) string {
var buf strings.Builder
if err := html.Render(&buf, node); err != nil {
return ""
}
return buf.String()
}