diff --git a/internal/engine/id_generator.go b/internal/engine/id_generator.go index c076411..f65e026 100644 --- a/internal/engine/id_generator.go +++ b/internal/engine/id_generator.go @@ -120,34 +120,30 @@ func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) // createDeterministicSignature creates a deterministic signature for element identification func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string { - // Build signature from stable characteristics - var sigParts []string - - // 1. DOM path (simplified, max 3 levels) + // Build enhanced signature with 6 components for maximum differentiation + tag := node.Data domPath := g.getSimpleDOMPath(node) - if domPath != "" { - sigParts = append(sigParts, domPath) - } - - // 2. Sibling position - siblingIndex := g.getSiblingIndex(node) - sigParts = append(sigParts, fmt.Sprintf("pos%d", siblingIndex)) - - // 3. Content preview (first few chars for uniqueness) + classes := strings.Join(GetClasses(node), " ") contentPreview := g.getContentPreview(node) - if contentPreview != "" { - // Use first 20 chars for signature - if len(contentPreview) > 20 { - contentPreview = contentPreview[:20] - } - sigParts = append(sigParts, contentPreview) + siblingIndex := g.getSiblingIndex(node) + + // Normalize content preview to first 20 chars + if len(contentPreview) > 20 { + contentPreview = contentPreview[:20] } - // 4. Create hash of combined signature - combined := strings.Join(sigParts, "|") - hash := sha256.Sum256([]byte(combined)) + // Create comprehensive deterministic signature + signature := fmt.Sprintf("%s|%s|%s|%s|%s|%d", + filePath, // File context for uniqueness across files + domPath, // Structural position in DOM + tag, // Element type + classes, // CSS classes for style differentiation + contentPreview, // Content for similar-structure differentiation + siblingIndex, // Position among similar siblings + ) - // Use first 6 characters of hash for short, deterministic suffix + // Create deterministic hash suffix (6 chars) + hash := sha256.Sum256([]byte(signature)) return fmt.Sprintf("%x", hash)[:6] } @@ -157,17 +153,24 @@ func (g *IDGenerator) createSignature(node *html.Node, filePath string) string { return "" } -// getSimpleDOMPath creates a simple DOM path for uniqueness +// getSimpleDOMPath creates a simple but precise DOM path for uniqueness (max 3 levels) func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string { var pathParts []string current := node depth := 0 - for current != nil && current.Type == html.ElementNode && depth < 5 { + for current != nil && current.Type == html.ElementNode && depth < 3 { part := current.Data - if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" { - part += "." + classes[0] + + // Add first meaningful class (not insertr) for better differentiation + classes := GetClasses(current) + for _, class := range classes { + if class != "insertr" && class != "" { + part += "." + class + break + } } + pathParts = append([]string{part}, pathParts...) current = current.Parent depth++ @@ -203,7 +206,7 @@ func (g *IDGenerator) extractTextContent(node *html.Node, text *strings.Builder) } } -// getSiblingIndex returns the position of this element among its siblings of the same type +// getSiblingIndex returns the position of this element among its siblings of the same type and class func (g *IDGenerator) getSiblingIndex(node *html.Node) int { if node.Parent == nil { return 0 @@ -213,15 +216,42 @@ func (g *IDGenerator) getSiblingIndex(node *html.Node) int { tag := node.Data classes := GetClasses(node) + // First try: match by tag + insertr class (most common case) + hasInsertr := false + for _, class := range classes { + if class == "insertr" { + hasInsertr = true + break + } + } + for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling { if sibling.Type == html.ElementNode && sibling.Data == tag { siblingClasses := GetClasses(sibling) - // Check if classes match (for more precise positioning) - if g.classesMatch(classes, siblingClasses) { - if sibling == node { - return index + + // For insertr elements, match by tag + insertr class + if hasInsertr { + siblingHasInsertr := false + for _, class := range siblingClasses { + if class == "insertr" { + siblingHasInsertr = true + break + } + } + if siblingHasInsertr { + if sibling == node { + return index + } + index++ + } + } else { + // For non-insertr elements, match by exact class list + if g.classesMatch(classes, siblingClasses) { + if sibling == node { + return index + } + index++ } - index++ } } }