- Remove content preview from ID generation for stable structural IDs - Implement database-first approach to check content existence before creation - Add enhanced DOM path, semantic context, and precise sibling indexing - Replace HTML attribute checking with reliable database lookups - Add collision handling with increment counters for similar elements Fixes: ✅ UNIQUE constraint errors eliminated (multiple enhancement runs work) ✅ Structural stability (same element keeps same ID regardless of content changes) ✅ Database-driven workflow (single source of truth for content existence) ✅ Enhanced collision resistance with detailed structural differentiation Results: No more 'Failed to store content' errors, stable enhance workflow.
258 lines
7.5 KiB
Go
258 lines
7.5 KiB
Go
package engine
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// AuthProvider represents authentication provider information
|
|
type AuthProvider struct {
|
|
Type string // "mock", "jwt", "authentik"
|
|
}
|
|
|
|
// ContentEngine is the unified content processing engine
|
|
type ContentEngine struct {
|
|
idGenerator *IDGenerator
|
|
client ContentClient
|
|
authProvider *AuthProvider
|
|
injector *Injector
|
|
}
|
|
|
|
// NewContentEngine creates a new content processing engine
|
|
func NewContentEngine(client ContentClient) *ContentEngine {
|
|
authProvider := &AuthProvider{Type: "mock"} // default
|
|
return &ContentEngine{
|
|
idGenerator: NewIDGenerator(),
|
|
client: client,
|
|
authProvider: authProvider,
|
|
injector: NewInjector(client, ""), // siteID will be set per operation
|
|
}
|
|
}
|
|
|
|
// NewContentEngineWithAuth creates a new content processing engine with auth config
|
|
func NewContentEngineWithAuth(client ContentClient, authProvider *AuthProvider) *ContentEngine {
|
|
if authProvider == nil {
|
|
authProvider = &AuthProvider{Type: "mock"}
|
|
}
|
|
return &ContentEngine{
|
|
idGenerator: NewIDGenerator(),
|
|
client: client,
|
|
authProvider: authProvider,
|
|
injector: NewInjectorWithAuth(client, "", authProvider), // siteID will be set per operation
|
|
}
|
|
}
|
|
|
|
// ProcessContent processes HTML content according to the specified mode
|
|
func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, error) {
|
|
// 1. Parse HTML
|
|
doc, err := html.Parse(strings.NewReader(string(input.HTML)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parsing HTML: %w", err)
|
|
}
|
|
|
|
// 2. Find insertr elements
|
|
elements := e.findInsertrElements(doc)
|
|
|
|
// 3. Generate IDs for elements
|
|
generatedIDs := make(map[string]string)
|
|
processedElements := make([]ProcessedElement, len(elements))
|
|
|
|
for i, elem := range elements {
|
|
// Generate structural ID (always deterministic)
|
|
id := e.idGenerator.Generate(elem.Node, input.FilePath)
|
|
|
|
// Database-first approach: Check if content already exists
|
|
existingContent, err := e.client.GetContent(input.SiteID, id)
|
|
contentExists := (err == nil && existingContent != nil)
|
|
|
|
generatedIDs[fmt.Sprintf("element_%d", i)] = id
|
|
|
|
processedElements[i] = ProcessedElement{
|
|
Node: elem.Node,
|
|
ID: id,
|
|
Type: elem.Type,
|
|
Generated: !contentExists, // Mark as generated only if new to database
|
|
Tag: elem.Node.Data,
|
|
Classes: GetClasses(elem.Node),
|
|
}
|
|
|
|
// Add/update content attributes to the node
|
|
e.addContentAttributes(elem.Node, id, elem.Type)
|
|
|
|
// Store content only for truly new elements (database-first check)
|
|
if !contentExists && (input.Mode == Enhancement || input.Mode == ContentInjection) {
|
|
// Extract content and template from the unprocessed element
|
|
htmlContent := e.extractHTMLContent(elem.Node)
|
|
originalTemplate := e.extractOriginalTemplate(elem.Node)
|
|
|
|
// Store in database via content client
|
|
_, err := e.client.CreateContent(input.SiteID, id, htmlContent, originalTemplate, elem.Type, "system")
|
|
if err != nil {
|
|
// Log error but don't fail the enhancement - content just won't be stored
|
|
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
|
|
} else {
|
|
fmt.Printf("✅ Created new content: %s (%s)\n", id, elem.Type)
|
|
}
|
|
}
|
|
}
|
|
|
|
// 4. Inject content if required by mode
|
|
if input.Mode == Enhancement || input.Mode == ContentInjection {
|
|
err = e.injectContent(processedElements, input.SiteID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("injecting content: %w", err)
|
|
}
|
|
}
|
|
|
|
// 5. Inject editor assets for enhancement mode (development)
|
|
if input.Mode == Enhancement {
|
|
injector := NewInjectorWithAuth(e.client, input.SiteID, e.authProvider)
|
|
injector.InjectEditorAssets(doc, true, "")
|
|
}
|
|
|
|
return &ContentResult{
|
|
Document: doc,
|
|
Elements: processedElements,
|
|
GeneratedIDs: generatedIDs,
|
|
}, nil
|
|
}
|
|
|
|
// InsertrElement represents an insertr element found in HTML
|
|
type InsertrElement struct {
|
|
Node *html.Node
|
|
Type string
|
|
}
|
|
|
|
// findInsertrElements finds all elements with class="insertr"
|
|
func (e *ContentEngine) findInsertrElements(doc *html.Node) []InsertrElement {
|
|
var elements []InsertrElement
|
|
e.walkNodes(doc, func(n *html.Node) {
|
|
if n.Type == html.ElementNode && e.hasInsertrClass(n) {
|
|
elementType := e.determineContentType(n)
|
|
elements = append(elements, InsertrElement{
|
|
Node: n,
|
|
Type: elementType,
|
|
})
|
|
}
|
|
})
|
|
return elements
|
|
}
|
|
|
|
// walkNodes walks through all nodes in the document
|
|
func (e *ContentEngine) walkNodes(n *html.Node, fn func(*html.Node)) {
|
|
fn(n)
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
e.walkNodes(c, fn)
|
|
}
|
|
}
|
|
|
|
// hasInsertrClass checks if node has class="insertr"
|
|
func (e *ContentEngine) hasInsertrClass(node *html.Node) bool {
|
|
classes := GetClasses(node)
|
|
for _, class := range classes {
|
|
if class == "insertr" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// determineContentType determines the content type based on element
|
|
func (e *ContentEngine) determineContentType(node *html.Node) string {
|
|
tag := strings.ToLower(node.Data)
|
|
|
|
switch tag {
|
|
case "a", "button":
|
|
return "link"
|
|
case "h1", "h2", "h3", "h4", "h5", "h6":
|
|
return "text"
|
|
case "p", "div", "section", "article", "span":
|
|
return "text"
|
|
default:
|
|
return "text"
|
|
}
|
|
}
|
|
|
|
// addContentAttributes adds data-content-id and data-content-type attributes
|
|
func (e *ContentEngine) addContentAttributes(node *html.Node, contentID, contentType string) {
|
|
// Add data-content-id attribute
|
|
e.setAttribute(node, "data-content-id", contentID)
|
|
// Add data-content-type attribute
|
|
e.setAttribute(node, "data-content-type", contentType)
|
|
}
|
|
|
|
// getAttribute gets an attribute value from an HTML node
|
|
func (e *ContentEngine) getAttribute(node *html.Node, key string) string {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
return attr.Val
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// setAttribute sets an attribute on an HTML node
|
|
func (e *ContentEngine) setAttribute(node *html.Node, key, value string) {
|
|
// Remove existing attribute if it exists
|
|
for i, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
node.Attr[i].Val = value
|
|
return
|
|
}
|
|
}
|
|
// Add new attribute
|
|
node.Attr = append(node.Attr, html.Attribute{
|
|
Key: key,
|
|
Val: value,
|
|
})
|
|
}
|
|
|
|
// injectContent injects content from database into elements
|
|
func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string) error {
|
|
for i := range elements {
|
|
elem := &elements[i]
|
|
|
|
// Try to get content from database
|
|
contentItem, err := e.client.GetContent(siteID, elem.ID)
|
|
if err != nil {
|
|
// Content not found is not an error - element just won't have injected content
|
|
continue
|
|
}
|
|
|
|
if contentItem != nil {
|
|
// Inject the content into the element
|
|
elem.Content = contentItem.HTMLContent
|
|
|
|
// Update injector siteID for this operation
|
|
e.injector.siteID = siteID
|
|
e.injector.injectHTMLContent(elem.Node, contentItem.HTMLContent)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// extractHTMLContent extracts the inner HTML content from a node
|
|
func (e *ContentEngine) extractHTMLContent(node *html.Node) string {
|
|
var content strings.Builder
|
|
|
|
// Render all child nodes in order to preserve HTML structure
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
if err := html.Render(&content, child); err == nil {
|
|
// All nodes (text and element) rendered in correct order
|
|
}
|
|
}
|
|
|
|
return strings.TrimSpace(content.String())
|
|
}
|
|
|
|
// extractOriginalTemplate extracts the outer HTML of the element (including the element itself)
|
|
func (e *ContentEngine) extractOriginalTemplate(node *html.Node) string {
|
|
var buf strings.Builder
|
|
if err := html.Render(&buf, node); err != nil {
|
|
return ""
|
|
}
|
|
return buf.String()
|
|
}
|