diff --git a/internal/engine/id_generator.go b/internal/engine/id_generator.go index a656ade..c076411 100644 --- a/internal/engine/id_generator.go +++ b/internal/engine/id_generator.go @@ -1,11 +1,11 @@ package engine import ( + "crypto/sha256" "fmt" "path/filepath" "strings" - "github.com/google/uuid" "golang.org/x/net/html" ) @@ -23,7 +23,7 @@ func NewIDGenerator() *IDGenerator { } } -// Generate creates a content ID for an HTML element using lightweight hierarchical approach +// Generate creates a content ID for an HTML element using deterministic approach func (g *IDGenerator) Generate(node *html.Node, filePath string) string { // 1. File context (minimal) fileName := g.getFileName(filePath) @@ -35,12 +35,19 @@ func (g *IDGenerator) Generate(node *html.Node, filePath string) string { // 3. Build readable prefix (deterministic, no runtime counting) prefix := g.buildDeterministicPrefix(fileName, tag, primaryClass) - // 5. Add UUID-based suffix for guaranteed uniqueness - uuidSuffix := uuid.New().String()[:6] // Use first 6 chars of UUID + // 4. Create deterministic suffix based on element characteristics + signature := g.createDeterministicSignature(node, filePath) - finalID := fmt.Sprintf("%s-%s", prefix, uuidSuffix) + finalID := fmt.Sprintf("%s-%s", prefix, signature) + + // Ensure uniqueness within this session + counter := 1 + originalID := finalID + for g.usedIDs[finalID] { + finalID = fmt.Sprintf("%s-%d", originalID, counter) + counter++ + } - // Ensure uniqueness (should be guaranteed by hash, but safety check) g.usedIDs[finalID] = true return finalID @@ -111,9 +118,42 @@ func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) return strings.Join(parts, "-") } -// createSignature creates a unique signature for collision resistance (DEPRECATED - using UUID now) +// createDeterministicSignature creates a deterministic signature for element identification +func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string { + // Build signature from stable characteristics + var sigParts []string + + // 1. DOM path (simplified, max 3 levels) + domPath := g.getSimpleDOMPath(node) + if domPath != "" { + sigParts = append(sigParts, domPath) + } + + // 2. Sibling position + siblingIndex := g.getSiblingIndex(node) + sigParts = append(sigParts, fmt.Sprintf("pos%d", siblingIndex)) + + // 3. Content preview (first few chars for uniqueness) + contentPreview := g.getContentPreview(node) + if contentPreview != "" { + // Use first 20 chars for signature + if len(contentPreview) > 20 { + contentPreview = contentPreview[:20] + } + sigParts = append(sigParts, contentPreview) + } + + // 4. Create hash of combined signature + combined := strings.Join(sigParts, "|") + hash := sha256.Sum256([]byte(combined)) + + // Use first 6 characters of hash for short, deterministic suffix + return fmt.Sprintf("%x", hash)[:6] +} + +// createSignature creates a unique signature for collision resistance (DEPRECATED - using deterministic now) func (g *IDGenerator) createSignature(node *html.Node, filePath string) string { - // This method is kept for compatibility but not used in UUID-based generation + // This method is kept for compatibility but not used in deterministic generation return "" }