diff --git a/internal/engine/engine.go b/internal/engine/engine.go index 383d0a4..1deadcd 100644 --- a/internal/engine/engine.go +++ b/internal/engine/engine.go @@ -60,20 +60,12 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro processedElements := make([]ProcessedElement, len(elements)) for i, elem := range elements { - // Check if element already has a data-content-id - existingID := e.getAttribute(elem.Node, "data-content-id") - var id string - var wasGenerated bool + // Generate structural ID (always deterministic) + id := e.idGenerator.Generate(elem.Node, input.FilePath) - if existingID != "" { - // Use existing ID from enhanced element - id = existingID - wasGenerated = false - } else { - // Generate new ID for unprocessed element - id = e.idGenerator.Generate(elem.Node, input.FilePath) - wasGenerated = true - } + // Database-first approach: Check if content already exists + existingContent, err := e.client.GetContent(input.SiteID, id) + contentExists := (err == nil && existingContent != nil) generatedIDs[fmt.Sprintf("element_%d", i)] = id @@ -81,7 +73,7 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro Node: elem.Node, ID: id, Type: elem.Type, - Generated: wasGenerated, + Generated: !contentExists, // Mark as generated only if new to database Tag: elem.Node.Data, Classes: GetClasses(elem.Node), } @@ -89,8 +81,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro // Add/update content attributes to the node e.addContentAttributes(elem.Node, id, elem.Type) - // Store content and template for newly discovered elements (first-pass) - if wasGenerated && (input.Mode == Enhancement || input.Mode == ContentInjection) { + // Store content only for truly new elements (database-first check) + if !contentExists && (input.Mode == Enhancement || input.Mode == ContentInjection) { // Extract content and template from the unprocessed element htmlContent := e.extractHTMLContent(elem.Node) originalTemplate := e.extractOriginalTemplate(elem.Node) @@ -100,6 +92,8 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro if err != nil { // Log error but don't fail the enhancement - content just won't be stored fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err) + } else { + fmt.Printf("✅ Created new content: %s (%s)\n", id, elem.Type) } } } diff --git a/internal/engine/id_generator.go b/internal/engine/id_generator.go index f65e026..badd7eb 100644 --- a/internal/engine/id_generator.go +++ b/internal/engine/id_generator.go @@ -120,26 +120,21 @@ func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) // createDeterministicSignature creates a deterministic signature for element identification func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string { - // Build enhanced signature with 6 components for maximum differentiation + // Build structural signature for stable IDs across content changes tag := node.Data - domPath := g.getSimpleDOMPath(node) - classes := strings.Join(GetClasses(node), " ") - contentPreview := g.getContentPreview(node) - siblingIndex := g.getSiblingIndex(node) + domPath := g.getDetailedDOMPath(node) + allClasses := strings.Join(GetClasses(node), " ") + semanticContext := g.getSemanticContext(node) + preciseIndex := g.getPreciseSiblingIndex(node) - // Normalize content preview to first 20 chars - if len(contentPreview) > 20 { - contentPreview = contentPreview[:20] - } - - // Create comprehensive deterministic signature + // Create purely structural deterministic signature signature := fmt.Sprintf("%s|%s|%s|%s|%s|%d", - filePath, // File context for uniqueness across files - domPath, // Structural position in DOM - tag, // Element type - classes, // CSS classes for style differentiation - contentPreview, // Content for similar-structure differentiation - siblingIndex, // Position among similar siblings + filePath, // File context for uniqueness across files + domPath, // Detailed structural position in DOM + tag, // Element type + allClasses, // All CSS classes for style differentiation + semanticContext, // Semantic context (header/main/footer/nav) + preciseIndex, // Precise position among exact siblings ) // Create deterministic hash suffix (6 chars) @@ -179,6 +174,142 @@ func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string { return strings.Join(pathParts, ">") } +// getDetailedDOMPath creates a more detailed DOM path for enhanced structural differentiation +func (g *IDGenerator) getDetailedDOMPath(node *html.Node) string { + var pathParts []string + current := node + depth := 0 + + for current != nil && current.Type == html.ElementNode && depth < 5 { + part := current.Data + + // Add all meaningful classes for maximum differentiation + classes := GetClasses(current) + var meaningfulClasses []string + for _, class := range classes { + if class != "insertr" && class != "" { + meaningfulClasses = append(meaningfulClasses, class) + } + } + if len(meaningfulClasses) > 0 { + part += "." + strings.Join(meaningfulClasses, ".") + } + + pathParts = append([]string{part}, pathParts...) + current = current.Parent + depth++ + } + + return strings.Join(pathParts, ">") +} + +// getSemanticContext identifies the semantic container (header, main, footer, nav) +func (g *IDGenerator) getSemanticContext(node *html.Node) string { + current := node.Parent + + // Traverse up to find semantic containers + for current != nil && current.Type == html.ElementNode { + tag := strings.ToLower(current.Data) + + // Direct semantic tags + switch tag { + case "header": + return "header" + case "main": + return "main" + case "footer": + return "footer" + case "nav": + return "nav" + case "aside": + return "aside" + } + + // Semantic classes + classes := GetClasses(current) + for _, class := range classes { + class = strings.ToLower(class) + if strings.Contains(class, "header") { + return "header" + } + if strings.Contains(class, "footer") { + return "footer" + } + if strings.Contains(class, "nav") { + return "nav" + } + if strings.Contains(class, "sidebar") || strings.Contains(class, "aside") { + return "aside" + } + } + + current = current.Parent + } + + return "content" +} + +// getPreciseSiblingIndex returns position among siblings with exact tag and class match +func (g *IDGenerator) getPreciseSiblingIndex(node *html.Node) int { + if node.Parent == nil { + return 0 + } + + index := 0 + tag := node.Data + classes := GetClasses(node) + + // Sort classes for consistent comparison + sortedClasses := make([]string, len(classes)) + copy(sortedClasses, classes) + for i := 0; i < len(sortedClasses); i++ { + for j := i + 1; j < len(sortedClasses); j++ { + if sortedClasses[i] > sortedClasses[j] { + sortedClasses[i], sortedClasses[j] = sortedClasses[j], sortedClasses[i] + } + } + } + + for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling { + if sibling.Type == html.ElementNode && sibling.Data == tag { + siblingClasses := GetClasses(sibling) + + // Sort sibling classes for comparison + sortedSiblingClasses := make([]string, len(siblingClasses)) + copy(sortedSiblingClasses, siblingClasses) + for i := 0; i < len(sortedSiblingClasses); i++ { + for j := i + 1; j < len(sortedSiblingClasses); j++ { + if sortedSiblingClasses[i] > sortedSiblingClasses[j] { + sortedSiblingClasses[i], sortedSiblingClasses[j] = sortedSiblingClasses[j], sortedSiblingClasses[i] + } + } + } + + // Check if classes match exactly + if g.classSlicesEqual(sortedClasses, sortedSiblingClasses) { + if sibling == node { + return index + } + index++ + } + } + } + return index +} + +// classSlicesEqual compares two sorted class slices for equality +func (g *IDGenerator) classSlicesEqual(a, b []string) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i] != b[i] { + return false + } + } + return true +} + // getContentPreview extracts first 50 characters of text content for uniqueness func (g *IDGenerator) getContentPreview(node *html.Node) string { var text strings.Builder