- Create internal/engine module as single source of truth for content processing - Consolidate 4 separate ID generation systems into one unified engine - Update API handlers to use engine for consistent server-side ID generation - Remove frontend client-side ID generation, delegate to server engine - Ensure identical HTML markup + file path produces identical content IDs - Resolve content persistence failures caused by ID fragmentation between manual editing and enhancement processes
191 lines
4.9 KiB
Go
191 lines
4.9 KiB
Go
package engine
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// ContentEngine is the unified content processing engine
|
|
type ContentEngine struct {
|
|
idGenerator *IDGenerator
|
|
client ContentClient
|
|
}
|
|
|
|
// NewContentEngine creates a new content processing engine
|
|
func NewContentEngine(client ContentClient) *ContentEngine {
|
|
return &ContentEngine{
|
|
idGenerator: NewIDGenerator(),
|
|
client: client,
|
|
}
|
|
}
|
|
|
|
// ProcessContent processes HTML content according to the specified mode
|
|
func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, error) {
|
|
// 1. Parse HTML
|
|
doc, err := html.Parse(strings.NewReader(string(input.HTML)))
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parsing HTML: %w", err)
|
|
}
|
|
|
|
// 2. Find insertr elements
|
|
elements := e.findInsertrElements(doc)
|
|
|
|
// 3. Generate IDs for elements
|
|
generatedIDs := make(map[string]string)
|
|
processedElements := make([]ProcessedElement, len(elements))
|
|
|
|
for i, elem := range elements {
|
|
// Generate ID using the same algorithm as the parser
|
|
id := e.idGenerator.Generate(elem.Node, input.FilePath)
|
|
generatedIDs[fmt.Sprintf("element_%d", i)] = id
|
|
|
|
processedElements[i] = ProcessedElement{
|
|
Node: elem.Node,
|
|
ID: id,
|
|
Type: elem.Type,
|
|
Generated: true,
|
|
Tag: elem.Node.Data,
|
|
Classes: GetClasses(elem.Node),
|
|
}
|
|
|
|
// Add content attributes to the node
|
|
e.addContentAttributes(elem.Node, id, elem.Type)
|
|
}
|
|
|
|
// 4. Inject content if required by mode
|
|
if input.Mode == Enhancement || input.Mode == ContentInjection {
|
|
err = e.injectContent(processedElements, input.SiteID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("injecting content: %w", err)
|
|
}
|
|
}
|
|
|
|
return &ContentResult{
|
|
Document: doc,
|
|
Elements: processedElements,
|
|
GeneratedIDs: generatedIDs,
|
|
}, nil
|
|
}
|
|
|
|
// InsertrElement represents an insertr element found in HTML
|
|
type InsertrElement struct {
|
|
Node *html.Node
|
|
Type string
|
|
}
|
|
|
|
// findInsertrElements finds all elements with class="insertr"
|
|
func (e *ContentEngine) findInsertrElements(doc *html.Node) []InsertrElement {
|
|
var elements []InsertrElement
|
|
e.walkNodes(doc, func(n *html.Node) {
|
|
if n.Type == html.ElementNode && e.hasInsertrClass(n) {
|
|
elementType := e.determineContentType(n)
|
|
elements = append(elements, InsertrElement{
|
|
Node: n,
|
|
Type: elementType,
|
|
})
|
|
}
|
|
})
|
|
return elements
|
|
}
|
|
|
|
// walkNodes walks through all nodes in the document
|
|
func (e *ContentEngine) walkNodes(n *html.Node, fn func(*html.Node)) {
|
|
fn(n)
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
e.walkNodes(c, fn)
|
|
}
|
|
}
|
|
|
|
// hasInsertrClass checks if node has class="insertr"
|
|
func (e *ContentEngine) hasInsertrClass(node *html.Node) bool {
|
|
classes := GetClasses(node)
|
|
for _, class := range classes {
|
|
if class == "insertr" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// determineContentType determines the content type based on element
|
|
func (e *ContentEngine) determineContentType(node *html.Node) string {
|
|
tag := strings.ToLower(node.Data)
|
|
|
|
switch tag {
|
|
case "a", "button":
|
|
return "link"
|
|
case "h1", "h2", "h3", "h4", "h5", "h6":
|
|
return "text"
|
|
case "p", "div", "section", "article", "span":
|
|
return "markdown"
|
|
default:
|
|
return "text"
|
|
}
|
|
}
|
|
|
|
// addContentAttributes adds data-content-id and data-content-type attributes
|
|
func (e *ContentEngine) addContentAttributes(node *html.Node, contentID, contentType string) {
|
|
// Add data-content-id attribute
|
|
e.setAttribute(node, "data-content-id", contentID)
|
|
// Add data-content-type attribute
|
|
e.setAttribute(node, "data-content-type", contentType)
|
|
}
|
|
|
|
// setAttribute sets an attribute on an HTML node
|
|
func (e *ContentEngine) setAttribute(node *html.Node, key, value string) {
|
|
// Remove existing attribute if it exists
|
|
for i, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
node.Attr[i].Val = value
|
|
return
|
|
}
|
|
}
|
|
// Add new attribute
|
|
node.Attr = append(node.Attr, html.Attribute{
|
|
Key: key,
|
|
Val: value,
|
|
})
|
|
}
|
|
|
|
// injectContent injects content from database into elements
|
|
func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string) error {
|
|
for i := range elements {
|
|
elem := &elements[i]
|
|
|
|
// Try to get content from database
|
|
contentItem, err := e.client.GetContent(siteID, elem.ID)
|
|
if err != nil {
|
|
// Content not found is not an error - element just won't have injected content
|
|
continue
|
|
}
|
|
|
|
if contentItem != nil {
|
|
// Inject the content into the element
|
|
elem.Content = contentItem.Value
|
|
e.injectContentIntoNode(elem.Node, contentItem.Value, contentItem.Type)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// injectContentIntoNode injects content value into an HTML node
|
|
func (e *ContentEngine) injectContentIntoNode(node *html.Node, content, contentType string) {
|
|
// Clear existing text content
|
|
for child := node.FirstChild; child != nil; {
|
|
next := child.NextSibling
|
|
if child.Type == html.TextNode {
|
|
node.RemoveChild(child)
|
|
}
|
|
child = next
|
|
}
|
|
|
|
// Add new text content
|
|
textNode := &html.Node{
|
|
Type: html.TextNode,
|
|
Data: content,
|
|
}
|
|
node.AppendChild(textNode)
|
|
}
|