package engine import ( "crypto/sha256" "encoding/hex" "fmt" "path/filepath" "strings" "golang.org/x/net/html" ) // IDGenerator generates unique content IDs for elements using lightweight hierarchical approach type IDGenerator struct { usedIDs map[string]bool elementCounts map[string]int // Track counts per file+type for indexing } // NewIDGenerator creates a new ID generator func NewIDGenerator() *IDGenerator { return &IDGenerator{ usedIDs: make(map[string]bool), elementCounts: make(map[string]int), } } // Generate creates a content ID for an HTML element using lightweight hierarchical approach func (g *IDGenerator) Generate(node *html.Node, filePath string) string { // 1. File context (minimal) fileName := g.getFileName(filePath) // 2. Element identity (lightweight) tag := strings.ToLower(node.Data) primaryClass := g.getPrimaryClass(node) // 3. Build readable prefix (deterministic, no runtime counting) prefix := g.buildDeterministicPrefix(fileName, tag, primaryClass) // 5. Add collision-resistant suffix signature := g.createSignature(node, filePath) hash := sha256.Sum256([]byte(signature)) suffix := hex.EncodeToString(hash[:3]) finalID := fmt.Sprintf("%s-%s", prefix, suffix) // Ensure uniqueness (should be guaranteed by hash, but safety check) g.usedIDs[finalID] = true return finalID } // getFileName extracts filename without extension for ID prefix func (g *IDGenerator) getFileName(filePath string) string { base := filepath.Base(filePath) return strings.TrimSuffix(base, filepath.Ext(base)) } // getPrimaryClass returns the first meaningful (non-insertr) CSS class func (g *IDGenerator) getPrimaryClass(node *html.Node) string { classes := GetClasses(node) for _, class := range classes { if class != "insertr" && class != "" { return class } } return "" } // getElementKey creates a key for tracking element counts func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string { if primaryClass != "" { return fmt.Sprintf("%s-%s", fileName, primaryClass) } return fmt.Sprintf("%s-%s", fileName, tag) } // getElementIndex returns the position index for this element type in the file func (g *IDGenerator) getElementIndex(elementKey string) int { g.elementCounts[elementKey]++ return g.elementCounts[elementKey] } // buildDeterministicPrefix creates human-readable prefix without runtime counting func (g *IDGenerator) buildDeterministicPrefix(fileName, tag, primaryClass string) string { var parts []string parts = append(parts, fileName) if primaryClass != "" { parts = append(parts, primaryClass) } else { parts = append(parts, tag) } // No runtime index - rely on hash for uniqueness return strings.Join(parts, "-") } // buildPrefix creates human-readable prefix for the ID (legacy method) func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string { var parts []string parts = append(parts, fileName) if primaryClass != "" { parts = append(parts, primaryClass) } else { parts = append(parts, tag) } // Only add index if it's not the first element of this type if index > 1 { parts = append(parts, fmt.Sprintf("%d", index)) } return strings.Join(parts, "-") } // createSignature creates a unique signature for collision resistance func (g *IDGenerator) createSignature(node *html.Node, filePath string) string { // Minimal signature for uniqueness tag := node.Data classes := strings.Join(GetClasses(node), " ") domPath := g.getSimpleDOMPath(node) return fmt.Sprintf("%s|%s|%s|%s", filePath, domPath, tag, classes) } // getSimpleDOMPath creates a simple DOM path for uniqueness func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string { var pathParts []string current := node depth := 0 for current != nil && current.Type == html.ElementNode && depth < 5 { part := current.Data if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" { part += "." + classes[0] } pathParts = append([]string{part}, pathParts...) current = current.Parent depth++ } return strings.Join(pathParts, ">") }