- Replace content-aware extractCleanTemplate with structure-only extractStructureSignature for template comparison - Add extractTemplateForStorage to preserve actual content for meaningful template display - Update generateTemplateSignature to use purely structural comparison ignoring text content - Remove redundant extractClassSignature function (functionality moved to extractStructureSignature) - Resolves issue where identical DOM structures created multiple templates due to content differences - Knowledge cards and other collections now correctly deduplicate to single templates while preserving content for previews
161 lines
4.6 KiB
Go
161 lines
4.6 KiB
Go
package engine
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"slices"
|
|
"sort"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// addContentAttributes adds data-content-id attribute only
|
|
func (e *ContentEngine) addContentAttributes(node *html.Node, contentID string) {
|
|
// Add data-content-id attribute
|
|
SetAttribute(node, "data-content-id", contentID)
|
|
}
|
|
|
|
// injectContent injects content from database into elements
|
|
func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string) error {
|
|
for i := range elements {
|
|
elem := &elements[i]
|
|
|
|
// Get content from database by ID - FIXED: Use context.Background() instead of nil
|
|
contentItem, err := e.client.GetContent(context.Background(), siteID, elem.ID)
|
|
if err != nil {
|
|
// Content not found - skip silently (enhancement mode should not fail on missing content)
|
|
continue
|
|
}
|
|
|
|
if contentItem != nil {
|
|
// Inject the content into the element
|
|
elem.Content = contentItem.HTMLContent
|
|
|
|
// Update injector siteID for this operation
|
|
// HACK: I do not like this. Injector refactor?
|
|
e.injector.siteID = siteID
|
|
e.injector.injectHTMLContent(elem.Node, contentItem.HTMLContent)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// extractHTMLContent extracts the inner HTML content from a node
|
|
func (e *ContentEngine) extractHTMLContent(node *html.Node) string {
|
|
var content strings.Builder
|
|
|
|
// Render all child nodes in order to preserve HTML structure
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
if err := html.Render(&content, child); err == nil {
|
|
// All nodes (text and element) rendered in correct order
|
|
}
|
|
}
|
|
|
|
return strings.TrimSpace(content.String())
|
|
}
|
|
|
|
// extractOriginalTemplate extracts the outer HTML of the element (including the element itself)
|
|
func (e *ContentEngine) extractOriginalTemplate(node *html.Node) string {
|
|
var buf strings.Builder
|
|
if err := html.Render(&buf, node); err != nil {
|
|
return ""
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// extractStructureSignature creates a content-agnostic signature for template comparison
|
|
// This only considers DOM structure and class attributes, completely ignoring text content
|
|
func (e *ContentEngine) extractStructureSignature(node *html.Node) string {
|
|
var signature strings.Builder
|
|
|
|
e.walkNodes(node, func(n *html.Node) {
|
|
if n.Type == html.ElementNode {
|
|
// Get classes for this element
|
|
classes := GetClasses(n)
|
|
if len(classes) > 0 {
|
|
// Sort classes for consistent comparison
|
|
sortedClasses := make([]string, len(classes))
|
|
copy(sortedClasses, classes)
|
|
sort.Strings(sortedClasses)
|
|
|
|
// Add to signature: element[class1,class2,...]
|
|
signature.WriteString(fmt.Sprintf("%s[%s];", n.Data, strings.Join(sortedClasses, ",")))
|
|
} else {
|
|
// Element with no classes
|
|
signature.WriteString(fmt.Sprintf("%s[];", n.Data))
|
|
}
|
|
}
|
|
// Completely ignore text nodes and their content
|
|
})
|
|
|
|
return signature.String()
|
|
}
|
|
|
|
// extractTemplateForStorage extracts template HTML while preserving content but removing data-content-id attributes
|
|
func (e *ContentEngine) extractTemplateForStorage(node *html.Node) string {
|
|
// Clone the node to avoid modifying the original
|
|
clonedNode := e.cloneNode(node)
|
|
|
|
// Remove all data-content-id attributes but preserve all content
|
|
e.walkNodes(clonedNode, func(n *html.Node) {
|
|
if n.Type == html.ElementNode {
|
|
// Remove data-content-id attribute
|
|
e.removeAttribute(n, "data-content-id")
|
|
}
|
|
})
|
|
|
|
var buf strings.Builder
|
|
if err := html.Render(&buf, clonedNode); err != nil {
|
|
return ""
|
|
}
|
|
return buf.String()
|
|
}
|
|
|
|
// removeAttribute removes an attribute from an HTML node
|
|
func (e *ContentEngine) removeAttribute(n *html.Node, key string) {
|
|
for i, attr := range n.Attr {
|
|
if attr.Key == key {
|
|
n.Attr = slices.Delete(n.Attr, i, i+1)
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// hasClass checks if a node has a specific class
|
|
func (e *ContentEngine) hasClass(n *html.Node, className string) bool {
|
|
for _, attr := range n.Attr {
|
|
if attr.Key == "class" {
|
|
classes := strings.Fields(attr.Val)
|
|
if slices.Contains(classes, className) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// getPlaceholderForElement returns appropriate placeholder text for different element types
|
|
func (e *ContentEngine) getPlaceholderForElement(elementType string) string {
|
|
placeholders := map[string]string{
|
|
"h1": "Heading 1",
|
|
"h2": "Heading 2",
|
|
"h3": "Heading 3",
|
|
"h4": "Heading 4",
|
|
"h5": "Heading 5",
|
|
"h6": "Heading 6",
|
|
"p": "Paragraph text",
|
|
"span": "Text",
|
|
"div": "Content block",
|
|
"button": "Button",
|
|
"a": "Link text",
|
|
"li": "List item",
|
|
"blockquote": "Quote text",
|
|
}
|
|
|
|
if placeholder, exists := placeholders[elementType]; exists {
|
|
return placeholder
|
|
}
|
|
return "Enter content..."
|
|
}
|