Fix template deduplication by separating structure comparison from content storage
- Replace content-aware extractCleanTemplate with structure-only extractStructureSignature for template comparison - Add extractTemplateForStorage to preserve actual content for meaningful template display - Update generateTemplateSignature to use purely structural comparison ignoring text content - Remove redundant extractClassSignature function (functionality moved to extractStructureSignature) - Resolves issue where identical DOM structures created multiple templates due to content differences - Knowledge cards and other collections now correctly deduplicate to single templates while preserving content for previews
This commit is contained in:
@@ -2,7 +2,9 @@ package engine
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
@@ -62,31 +64,44 @@ func (e *ContentEngine) extractOriginalTemplate(node *html.Node) string {
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// extractCleanTemplate extracts a clean template without data-content-id attributes and with placeholder content. Used for collection template variants.
|
||||
func (e *ContentEngine) extractCleanTemplate(node *html.Node) string {
|
||||
// extractStructureSignature creates a content-agnostic signature for template comparison
|
||||
// This only considers DOM structure and class attributes, completely ignoring text content
|
||||
func (e *ContentEngine) extractStructureSignature(node *html.Node) string {
|
||||
var signature strings.Builder
|
||||
|
||||
e.walkNodes(node, func(n *html.Node) {
|
||||
if n.Type == html.ElementNode {
|
||||
// Get classes for this element
|
||||
classes := GetClasses(n)
|
||||
if len(classes) > 0 {
|
||||
// Sort classes for consistent comparison
|
||||
sortedClasses := make([]string, len(classes))
|
||||
copy(sortedClasses, classes)
|
||||
sort.Strings(sortedClasses)
|
||||
|
||||
// Add to signature: element[class1,class2,...]
|
||||
signature.WriteString(fmt.Sprintf("%s[%s];", n.Data, strings.Join(sortedClasses, ",")))
|
||||
} else {
|
||||
// Element with no classes
|
||||
signature.WriteString(fmt.Sprintf("%s[];", n.Data))
|
||||
}
|
||||
}
|
||||
// Completely ignore text nodes and their content
|
||||
})
|
||||
|
||||
return signature.String()
|
||||
}
|
||||
|
||||
// extractTemplateForStorage extracts template HTML while preserving content but removing data-content-id attributes
|
||||
func (e *ContentEngine) extractTemplateForStorage(node *html.Node) string {
|
||||
// Clone the node to avoid modifying the original
|
||||
clonedNode := e.cloneNode(node)
|
||||
|
||||
// Remove all data-content-id attributes and replace content with placeholders
|
||||
// Remove all data-content-id attributes but preserve all content
|
||||
e.walkNodes(clonedNode, func(n *html.Node) {
|
||||
if n.Type == html.ElementNode {
|
||||
// Remove data-content-id attribute
|
||||
e.removeAttribute(n, "data-content-id")
|
||||
|
||||
// If this is an .insertr element, replace content with placeholder
|
||||
if e.hasClass(n, "insertr") {
|
||||
placeholderText := e.getPlaceholderForElement(n.Data)
|
||||
// Clear existing children and add placeholder text
|
||||
for child := n.FirstChild; child != nil; {
|
||||
next := child.NextSibling
|
||||
n.RemoveChild(child)
|
||||
child = next
|
||||
}
|
||||
n.AppendChild(&html.Node{
|
||||
Type: html.TextNode,
|
||||
Data: placeholderText,
|
||||
})
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user