feat: implement unified content engine to eliminate ID generation inconsistencies
- Create internal/engine module as single source of truth for content processing - Consolidate 4 separate ID generation systems into one unified engine - Update API handlers to use engine for consistent server-side ID generation - Remove frontend client-side ID generation, delegate to server engine - Ensure identical HTML markup + file path produces identical content IDs - Resolve content persistence failures caused by ID fragmentation between manual editing and enhancement processes
This commit is contained in:
190
internal/engine/engine.go
Normal file
190
internal/engine/engine.go
Normal file
@@ -0,0 +1,190 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// ContentEngine is the unified content processing engine
|
||||
type ContentEngine struct {
|
||||
idGenerator *IDGenerator
|
||||
client ContentClient
|
||||
}
|
||||
|
||||
// NewContentEngine creates a new content processing engine
|
||||
func NewContentEngine(client ContentClient) *ContentEngine {
|
||||
return &ContentEngine{
|
||||
idGenerator: NewIDGenerator(),
|
||||
client: client,
|
||||
}
|
||||
}
|
||||
|
||||
// ProcessContent processes HTML content according to the specified mode
|
||||
func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, error) {
|
||||
// 1. Parse HTML
|
||||
doc, err := html.Parse(strings.NewReader(string(input.HTML)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing HTML: %w", err)
|
||||
}
|
||||
|
||||
// 2. Find insertr elements
|
||||
elements := e.findInsertrElements(doc)
|
||||
|
||||
// 3. Generate IDs for elements
|
||||
generatedIDs := make(map[string]string)
|
||||
processedElements := make([]ProcessedElement, len(elements))
|
||||
|
||||
for i, elem := range elements {
|
||||
// Generate ID using the same algorithm as the parser
|
||||
id := e.idGenerator.Generate(elem.Node, input.FilePath)
|
||||
generatedIDs[fmt.Sprintf("element_%d", i)] = id
|
||||
|
||||
processedElements[i] = ProcessedElement{
|
||||
Node: elem.Node,
|
||||
ID: id,
|
||||
Type: elem.Type,
|
||||
Generated: true,
|
||||
Tag: elem.Node.Data,
|
||||
Classes: GetClasses(elem.Node),
|
||||
}
|
||||
|
||||
// Add content attributes to the node
|
||||
e.addContentAttributes(elem.Node, id, elem.Type)
|
||||
}
|
||||
|
||||
// 4. Inject content if required by mode
|
||||
if input.Mode == Enhancement || input.Mode == ContentInjection {
|
||||
err = e.injectContent(processedElements, input.SiteID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("injecting content: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return &ContentResult{
|
||||
Document: doc,
|
||||
Elements: processedElements,
|
||||
GeneratedIDs: generatedIDs,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// InsertrElement represents an insertr element found in HTML
|
||||
type InsertrElement struct {
|
||||
Node *html.Node
|
||||
Type string
|
||||
}
|
||||
|
||||
// findInsertrElements finds all elements with class="insertr"
|
||||
func (e *ContentEngine) findInsertrElements(doc *html.Node) []InsertrElement {
|
||||
var elements []InsertrElement
|
||||
e.walkNodes(doc, func(n *html.Node) {
|
||||
if n.Type == html.ElementNode && e.hasInsertrClass(n) {
|
||||
elementType := e.determineContentType(n)
|
||||
elements = append(elements, InsertrElement{
|
||||
Node: n,
|
||||
Type: elementType,
|
||||
})
|
||||
}
|
||||
})
|
||||
return elements
|
||||
}
|
||||
|
||||
// walkNodes walks through all nodes in the document
|
||||
func (e *ContentEngine) walkNodes(n *html.Node, fn func(*html.Node)) {
|
||||
fn(n)
|
||||
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
||||
e.walkNodes(c, fn)
|
||||
}
|
||||
}
|
||||
|
||||
// hasInsertrClass checks if node has class="insertr"
|
||||
func (e *ContentEngine) hasInsertrClass(node *html.Node) bool {
|
||||
classes := GetClasses(node)
|
||||
for _, class := range classes {
|
||||
if class == "insertr" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// determineContentType determines the content type based on element
|
||||
func (e *ContentEngine) determineContentType(node *html.Node) string {
|
||||
tag := strings.ToLower(node.Data)
|
||||
|
||||
switch tag {
|
||||
case "a", "button":
|
||||
return "link"
|
||||
case "h1", "h2", "h3", "h4", "h5", "h6":
|
||||
return "text"
|
||||
case "p", "div", "section", "article", "span":
|
||||
return "markdown"
|
||||
default:
|
||||
return "text"
|
||||
}
|
||||
}
|
||||
|
||||
// addContentAttributes adds data-content-id and data-content-type attributes
|
||||
func (e *ContentEngine) addContentAttributes(node *html.Node, contentID, contentType string) {
|
||||
// Add data-content-id attribute
|
||||
e.setAttribute(node, "data-content-id", contentID)
|
||||
// Add data-content-type attribute
|
||||
e.setAttribute(node, "data-content-type", contentType)
|
||||
}
|
||||
|
||||
// setAttribute sets an attribute on an HTML node
|
||||
func (e *ContentEngine) setAttribute(node *html.Node, key, value string) {
|
||||
// Remove existing attribute if it exists
|
||||
for i, attr := range node.Attr {
|
||||
if attr.Key == key {
|
||||
node.Attr[i].Val = value
|
||||
return
|
||||
}
|
||||
}
|
||||
// Add new attribute
|
||||
node.Attr = append(node.Attr, html.Attribute{
|
||||
Key: key,
|
||||
Val: value,
|
||||
})
|
||||
}
|
||||
|
||||
// injectContent injects content from database into elements
|
||||
func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string) error {
|
||||
for i := range elements {
|
||||
elem := &elements[i]
|
||||
|
||||
// Try to get content from database
|
||||
contentItem, err := e.client.GetContent(siteID, elem.ID)
|
||||
if err != nil {
|
||||
// Content not found is not an error - element just won't have injected content
|
||||
continue
|
||||
}
|
||||
|
||||
if contentItem != nil {
|
||||
// Inject the content into the element
|
||||
elem.Content = contentItem.Value
|
||||
e.injectContentIntoNode(elem.Node, contentItem.Value, contentItem.Type)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// injectContentIntoNode injects content value into an HTML node
|
||||
func (e *ContentEngine) injectContentIntoNode(node *html.Node, content, contentType string) {
|
||||
// Clear existing text content
|
||||
for child := node.FirstChild; child != nil; {
|
||||
next := child.NextSibling
|
||||
if child.Type == html.TextNode {
|
||||
node.RemoveChild(child)
|
||||
}
|
||||
child = next
|
||||
}
|
||||
|
||||
// Add new text content
|
||||
textNode := &html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: content,
|
||||
}
|
||||
node.AppendChild(textNode)
|
||||
}
|
||||
Reference in New Issue
Block a user