Files
insertr/internal/engine/collection.go
Joakim 16ad759880 Fix template deduplication by separating structure comparison from content storage
- Replace content-aware extractCleanTemplate with structure-only extractStructureSignature for template comparison
- Add extractTemplateForStorage to preserve actual content for meaningful template display
- Update generateTemplateSignature to use purely structural comparison ignoring text content
- Remove redundant extractClassSignature function (functionality moved to extractStructureSignature)
- Resolves issue where identical DOM structures created multiple templates due to content differences
- Knowledge cards and other collections now correctly deduplicate to single templates while preserving content for previews
2025-11-01 23:09:46 +01:00

536 lines
20 KiB
Go
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package engine
import (
"context"
"fmt"
"strings"
"time"
"github.com/insertr/insertr/internal/db"
"golang.org/x/net/html"
)
// CollectionElement represents an insertr-add collection element found in HTML
type CollectionElement struct {
Node *html.Node
}
// hasInsertrAddClass checks if node has class="insertr-add" (collection)
func (e *ContentEngine) hasInsertrAddClass(node *html.Node) bool {
classes := GetClasses(node)
return ContainsClass(classes, "insertr-add")
}
// processCollection handles collection detection, persistence and reconstruction
func (e *ContentEngine) processCollection(collectionNode *html.Node, collectionID, siteID string) error {
// 1. Check if collection exists in database
existingCollection, err := e.client.GetCollection(context.Background(), siteID, collectionID)
collectionExists := (err == nil && existingCollection != nil)
if !collectionExists {
// 2. New collection: extract container HTML and create collection record
containerHTML := e.extractOriginalTemplate(collectionNode)
_, err := e.client.CreateCollection(context.Background(), siteID, collectionID, containerHTML, "system")
if err != nil {
return fmt.Errorf("failed to create collection %s: %w", collectionID, err)
}
// 3. Extract templates and store initial items from existing children
err = e.extractAndStoreTemplatesAndItems(collectionNode, collectionID, siteID)
if err != nil {
return fmt.Errorf("failed to extract templates and items for collection %s: %w", collectionID, err)
}
fmt.Printf("✅ Created new collection: %s with templates and initial items\n", collectionID)
} else {
// 4. Existing collection: Always reconstruct from database (database is source of truth)
err = e.reconstructCollectionItems(collectionNode, collectionID, siteID)
if err != nil {
return fmt.Errorf("failed to reconstruct collection %s: %w", collectionID, err)
}
// Optional: Show item count for feedback
existingItems, _ := e.client.GetCollectionItems(context.Background(), siteID, collectionID)
fmt.Printf("✅ Reconstructed collection: %s from database (%d items)\n", collectionID, len(existingItems))
}
return nil
}
// extractAndStoreTemplatesAndItems extracts templates and stores initial items from existing collection children
func (e *ContentEngine) extractAndStoreTemplatesAndItems(collectionNode *html.Node, collectionID, siteID string) error {
var templateIDs []int
templateCount := 0
// Walk through direct children of the collection
for child := collectionNode.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
templateCount++
}
}
// If no templates found, create a default template
if templateCount == 0 {
_, err := e.client.CreateCollectionTemplate(context.Background(), siteID, collectionID, "default", "<div>New item</div>", true)
if err != nil {
return fmt.Errorf("failed to create default template: %w", err)
}
fmt.Printf("✅ Created default template for collection %s\n", collectionID)
return nil
}
// Create templates for each unique child structure and styling (deduplicated)
seenTemplates := make(map[string]int) // templateSignature -> templateID
templateIndex := 0
for child := collectionNode.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
templateHTML := e.extractTemplateForStorage(child)
templateSignature := e.generateTemplateSignature(child)
// Check if we've already seen this exact template structure + styling
if existingTemplateID, exists := seenTemplates[templateSignature]; exists {
// Reuse existing template
templateIDs = append(templateIDs, existingTemplateID)
fmt.Printf("✅ Reusing existing template for identical structure+styling in collection %s\n", collectionID)
} else {
// Create new template for unique structure+styling combination
templateName := e.generateTemplateNameFromSignature(child, templateIndex+1)
isDefault := templateIndex == 0
template, err := e.client.CreateCollectionTemplate(context.Background(), siteID, collectionID, templateName, templateHTML, isDefault)
if err != nil {
return fmt.Errorf("failed to create template %s: %w", templateName, err)
}
// Store the mapping and append to results
seenTemplates[templateSignature] = template.TemplateID
templateIDs = append(templateIDs, template.TemplateID)
templateIndex++
fmt.Printf("✅ Created new template '%s' for collection %s\n", templateName, collectionID)
}
}
}
// Store original children as initial collection items (database-first approach)
err := e.storeChildrenAsCollectionItems(collectionNode, collectionID, siteID, templateIDs)
if err != nil {
return fmt.Errorf("failed to store initial collection items: %w", err)
}
// Clear HTML children and reconstruct from database (ensures consistency)
err = e.reconstructCollectionItems(collectionNode, collectionID, siteID)
if err != nil {
return fmt.Errorf("failed to reconstruct initial collection items: %w", err)
}
return nil
}
// reconstructCollectionItems rebuilds collection items from database and adds them to DOM
func (e *ContentEngine) reconstructCollectionItems(collectionNode *html.Node, collectionID, siteID string) error {
// Get all items for this collection from database
items, err := e.client.GetCollectionItems(context.Background(), siteID, collectionID)
if err != nil {
return fmt.Errorf("failed to get collection items: %w", err)
}
// Get templates for this collection
templates, err := e.client.GetCollectionTemplates(context.Background(), siteID, collectionID)
if err != nil {
return fmt.Errorf("failed to get collection templates: %w", err)
}
// Build template lookup for efficiency
templateLookup := make(map[int]*db.CollectionTemplateItem)
for _, template := range templates {
templateLookup[template.TemplateID] = &template
}
// Clear existing children from the collection node
for child := collectionNode.FirstChild; child != nil; {
next := child.NextSibling
collectionNode.RemoveChild(child)
child = next
}
// Reconstruct items in order from database
for _, item := range items {
_, exists := templateLookup[item.TemplateID]
if !exists {
fmt.Printf("⚠️ Template %d not found for item %s, skipping\n", item.TemplateID, item.ItemID)
continue
}
// Parse the stored structural template HTML
structuralDoc, err := html.Parse(strings.NewReader(item.HTMLContent))
if err != nil {
fmt.Printf("⚠️ Failed to parse structural template for item %s: %v\n", item.ItemID, err)
continue
}
// Find the body and extract its children (stored as complete structure)
var structuralChild *html.Node
e.walkNodes(structuralDoc, func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "body" {
// Get the first element child of body
for child := n.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
structuralChild = child
break
}
}
}
})
if structuralChild != nil {
// Remove from its current parent before adding to collection
if structuralChild.Parent != nil {
structuralChild.Parent.RemoveChild(structuralChild)
}
// RESTORED: Inject content into .insertr elements within collection items
// Walk through structural elements and hydrate with content from content table
e.walkNodes(structuralChild, func(n *html.Node) {
if n.Type == html.ElementNode && HasClass(n, "insertr") {
// Get content ID from data attribute
contentID := GetAttribute(n, "data-content-id")
if contentID != "" {
// Get actual content from database and inject it
contentItem, err := e.client.GetContent(context.Background(), siteID, contentID)
if err == nil && contentItem != nil {
// Use injector to hydrate content (unified .insertr approach)
e.injector.siteID = siteID
e.injector.injectHTMLContent(n, contentItem.HTMLContent)
}
}
}
})
// Inject data-item-id attribute for collection item identification
if structuralChild.Type == html.ElementNode {
SetAttribute(structuralChild, "data-item-id", item.ItemID)
}
collectionNode.AppendChild(structuralChild)
}
}
fmt.Printf("✅ Reconstructed %d items for collection %s\n", len(items), collectionID)
return nil
}
// processChildElementsAsContent processes .insertr elements within a collection child and stores them as individual content
func (e *ContentEngine) processChildElementsAsContent(childElement *html.Node, siteID, itemID string) ([]ContentEntry, error) {
var contentEntries []ContentEntry
// Walk through the child element and find .insertr elements
e.walkNodes(childElement, func(n *html.Node) {
if n.Type == html.ElementNode && e.hasClass(n, "insertr") {
// Generate content ID for this .insertr element, including item ID for uniqueness
contentID := e.idGenerator.Generate(n, fmt.Sprintf("%s-content", itemID))
// Extract the content
htmlContent := e.extractHTMLContent(n)
template := e.extractTemplateForStorage(n)
// Store content entry
contentEntries = append(contentEntries, ContentEntry{
ID: contentID,
SiteID: siteID,
HTMLContent: htmlContent,
Template: template,
})
// Set the data-content-id attribute
SetAttribute(n, "data-content-id", contentID)
// Keep content for initial display - don't clear it
// The content is already stored in the database and will be available for editing
// Preserving content ensures elements have height and are clickable
}
})
return contentEntries, nil
}
// generateStructuralTemplateFromChild creates a structural template with placeholders for content
func (e *ContentEngine) generateStructuralTemplateFromChild(childElement *html.Node, contentEntries []ContentEntry) (string, error) {
// Clone the child to avoid modifying the original
clonedChild := e.cloneNode(childElement)
// Walk through and replace .insertr content with data-content-id attributes
entryIndex := 0
e.walkNodes(clonedChild, func(n *html.Node) {
if n.Type == html.ElementNode && e.hasClass(n, "insertr") {
if entryIndex < len(contentEntries) {
// Set the data-content-id attribute
SetAttribute(n, "data-content-id", contentEntries[entryIndex].ID)
// Keep content for structural template - ensures elements have height and are clickable
// Content is stored separately in database for editing
entryIndex++
}
}
})
// Generate HTML for the structural template
var buf strings.Builder
if err := html.Render(&buf, clonedChild); err != nil {
return "", fmt.Errorf("failed to render structural template: %w", err)
}
return buf.String(), nil
}
// createVirtualElementFromTemplate creates a virtual DOM element from template HTML
func (e *ContentEngine) createVirtualElementFromTemplate(templateHTML string) (*html.Node, error) {
// Parse template HTML into a virtual DOM
templateDoc, err := html.Parse(strings.NewReader(templateHTML))
if err != nil {
return nil, fmt.Errorf("failed to parse template HTML: %w", err)
}
// Find the first element in the body
var templateElement *html.Node
e.walkNodes(templateDoc, func(n *html.Node) {
if templateElement == nil && n.Type == html.ElementNode && n.Data != "html" && n.Data != "head" && n.Data != "body" {
templateElement = n
}
})
if templateElement == nil {
return nil, fmt.Errorf("no valid element found in template HTML")
}
return templateElement, nil
}
// CreateCollectionItemFromTemplate creates a collection item using the unified engine approach
func (e *ContentEngine) CreateCollectionItemFromTemplate(
siteID, collectionID string,
templateID int,
templateHTML string,
lastEditedBy string,
) (*db.CollectionItemWithTemplate, error) {
// Create virtual element from template for ID generation
virtualElement, err := e.createVirtualElementFromTemplate(templateHTML)
if err != nil {
return nil, fmt.Errorf("failed to create virtual element: %w", err)
}
// Generate unique item ID using unified generator with collection context + timestamp for uniqueness
baseID := e.idGenerator.Generate(virtualElement, "collection-item")
itemID := fmt.Sprintf("%s-%d", baseID, time.Now().UnixNano()%1000000) // Add 6-digit unique suffix
// Process any .insertr elements within the template and store as content
contentEntries, err := e.processChildElementsAsContent(virtualElement, siteID, itemID)
if err != nil {
return nil, fmt.Errorf("failed to process child elements: %w", err)
}
// Store individual content entries in content table
for _, entry := range contentEntries {
_, err := e.client.CreateContent(context.Background(), entry.SiteID, entry.ID, entry.HTMLContent, entry.Template, lastEditedBy)
if err != nil {
return nil, fmt.Errorf("failed to create content entry %s: %w", entry.ID, err)
}
}
// Generate structural template for the collection item
structuralTemplate, err := e.generateStructuralTemplateFromChild(virtualElement, contentEntries)
if err != nil {
return nil, fmt.Errorf("failed to generate structural template: %w", err)
}
// Get next position to place new item at the end of collection
maxPosition, err := e.client.GetMaxPosition(context.Background(), siteID, collectionID)
if err != nil {
return nil, fmt.Errorf("failed to get max position for collection %s: %w", collectionID, err)
}
nextPosition := maxPosition + 1
fmt.Printf("🔢 Max position for collection %s: %d, assigning new item position: %d\n", collectionID, maxPosition, nextPosition)
// Create collection item with structural template at end position
collectionItem, err := e.client.CreateCollectionItem(context.Background(),
siteID, collectionID, itemID, templateID, structuralTemplate, nextPosition, lastEditedBy,
)
if err != nil {
return nil, fmt.Errorf("failed to create collection item: %w", err)
}
return collectionItem, nil
}
// storeChildrenAsCollectionItems stores HTML children as collection items in database
func (e *ContentEngine) storeChildrenAsCollectionItems(collectionNode *html.Node, collectionID, siteID string, templateIDs []int) error {
var childElements []*html.Node
// Walk through direct children of the collection
for child := collectionNode.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.ElementNode {
childElements = append(childElements, child)
}
}
if len(childElements) == 0 {
fmt.Printf(" No children found to store as collection items for %s\n", collectionID)
return nil
}
// Store each child as a collection item
for i, childElement := range childElements {
// Use corresponding template ID, or default to first template
templateID := templateIDs[0] // Default to first template
if i < len(templateIDs) {
templateID = templateIDs[i]
}
// Generate item ID using unified generator with collection context
itemID := e.idGenerator.Generate(childElement, "collection-item")
// Process any .insertr elements within this child and store as content
contentEntries, err := e.processChildElementsAsContent(childElement, siteID, itemID)
if err != nil {
return fmt.Errorf("failed to process child elements: %w", err)
}
// Store individual content entries in content table
for _, entry := range contentEntries {
_, err := e.client.CreateContent(context.Background(), entry.SiteID, entry.ID, entry.HTMLContent, entry.Template, "system")
if err != nil {
return fmt.Errorf("failed to create content entry %s: %w", entry.ID, err)
}
}
// Generate structural template for this collection item
structuralTemplate, err := e.generateStructuralTemplateFromChild(childElement, contentEntries)
if err != nil {
return fmt.Errorf("failed to generate structural template: %w", err)
}
// Store structural template in collection_items (content lives in content table)
_, err = e.client.CreateCollectionItem(context.Background(), siteID, collectionID, itemID, templateID, structuralTemplate, i+1, "system")
if err != nil {
return fmt.Errorf("failed to create collection item %s: %w", itemID, err)
}
fmt.Printf("✅ Stored initial collection item: %s (template %d) with %d content entries\n", itemID, templateID, len(contentEntries))
}
return nil
}
// collectionProcessor handles collection-specific processing logic
type collectionProcessor struct {
engine *ContentEngine
}
// newCollectionProcessor creates a collection processor
func (e *ContentEngine) newCollectionProcessor() *collectionProcessor {
return &collectionProcessor{engine: e}
}
// process handles the full collection processing workflow
func (cp *collectionProcessor) process(collectionNode *html.Node, collectionID, siteID string) error {
return cp.engine.processCollection(collectionNode, collectionID, siteID)
}
// extractAndStoreTemplatesAndItems delegates to engine method
func (cp *collectionProcessor) extractAndStoreTemplatesAndItems(collectionNode *html.Node, collectionID, siteID string) error {
return cp.engine.extractAndStoreTemplatesAndItems(collectionNode, collectionID, siteID)
}
// reconstructItems delegates to engine method
func (cp *collectionProcessor) reconstructItems(collectionNode *html.Node, collectionID, siteID string) error {
return cp.engine.reconstructCollectionItems(collectionNode, collectionID, siteID)
}
// cloneNode creates a deep copy of an HTML node
func (e *ContentEngine) cloneNode(node *html.Node) *html.Node {
cloned := &html.Node{
Type: node.Type,
Data: node.Data,
DataAtom: node.DataAtom,
Namespace: node.Namespace,
}
// Clone attributes
for _, attr := range node.Attr {
cloned.Attr = append(cloned.Attr, html.Attribute{
Namespace: attr.Namespace,
Key: attr.Key,
Val: attr.Val,
})
}
// Clone children recursively
for child := node.FirstChild; child != nil; child = child.NextSibling {
clonedChild := e.cloneNode(child)
cloned.AppendChild(clonedChild)
}
return cloned
}
// generateTemplateSignature creates a unique signature for template comparison
// This is purely structure + class based, completely ignoring content
func (e *ContentEngine) generateTemplateSignature(element *html.Node) string {
// Get content-agnostic structure signature
return e.extractStructureSignature(element)
}
// generateTemplateNameFromSignature creates human-readable template names
func (e *ContentEngine) generateTemplateNameFromSignature(element *html.Node, fallbackIndex int) string {
// Extract root element classes for naming
rootClasses := GetClasses(element)
if len(rootClasses) > 0 {
// Find distinctive classes (exclude common structural and base classes)
var distinctiveClasses []string
commonClasses := map[string]bool{
"insertr": true, "insertr-add": true,
// Common base classes that don't indicate variants
"testimonial-item": true, "card": true, "item": true, "post": true,
"container": true, "wrapper": true, "content": true,
}
for _, class := range rootClasses {
if !commonClasses[class] {
distinctiveClasses = append(distinctiveClasses, class)
}
}
if len(distinctiveClasses) > 0 {
// Use distinctive classes for naming
name := strings.Join(distinctiveClasses, "_")
// Capitalize and clean up
name = strings.ReplaceAll(name, "-", "_")
if len(name) > 20 {
name = name[:20]
}
return strings.Title(strings.ToLower(name))
} else if len(rootClasses) > 1 {
// If only common classes, use the last non-insertr class
for i := len(rootClasses) - 1; i >= 0; i-- {
if rootClasses[i] != "insertr" && rootClasses[i] != "insertr-add" {
name := strings.ReplaceAll(rootClasses[i], "-", "_")
return strings.Title(strings.ToLower(name))
}
}
}
}
// Fallback to numbered template
return fmt.Sprintf("template_%d", fallbackIndex)
}
// min returns the smaller of two integers
func min(a, b int) int {
if a < b {
return a
}
return b
}