Files
insertr/internal/parser/id_generator.go
Joakim 72bd31b626 feat: implement collision-free lightweight hierarchical ID generation
- Replace content-hash based ID generation with position-based algorithm
- Use file + element identity + position index + hash for unique IDs
- Generate human-readable prefixes (e.g. index-lead-, index-p-2-)
- Add collision-resistant hash suffixes for guaranteed uniqueness
- Update Generate() to accept filePath parameter for context
- Fix ID collisions where hero and footer elements shared same ID
- Clean demo site files removing all data-content-id attributes
- Preserve insertr-gate elements for authentication functionality

Results: Hero gets 'index-lead-2-fc31f2', footer gets 'index-p-13-99fd13'
No more content cross-contamination between different elements.
2025-09-11 17:38:15 +02:00

134 lines
3.7 KiB
Go

package parser
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"path/filepath"
"strings"
"golang.org/x/net/html"
)
// IDGenerator generates unique content IDs for elements using lightweight hierarchical approach
type IDGenerator struct {
usedIDs map[string]bool
elementCounts map[string]int // Track counts per file+type for indexing
}
// NewIDGenerator creates a new ID generator
func NewIDGenerator() *IDGenerator {
return &IDGenerator{
usedIDs: make(map[string]bool),
elementCounts: make(map[string]int),
}
}
// Generate creates a content ID for an HTML element using lightweight hierarchical approach
func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
// 1. File context (minimal)
fileName := g.getFileName(filePath)
// 2. Element identity (lightweight)
tag := strings.ToLower(node.Data)
primaryClass := g.getPrimaryClass(node)
// 3. Position context (simple)
elementKey := g.getElementKey(fileName, tag, primaryClass)
index := g.getElementIndex(elementKey)
// 4. Build readable prefix
prefix := g.buildPrefix(fileName, tag, primaryClass, index)
// 5. Add collision-resistant suffix
signature := g.createSignature(node, filePath)
hash := sha256.Sum256([]byte(signature))
suffix := hex.EncodeToString(hash[:3])
finalID := fmt.Sprintf("%s-%s", prefix, suffix)
// Ensure uniqueness (should be guaranteed by hash, but safety check)
g.usedIDs[finalID] = true
return finalID
}
// getFileName extracts filename without extension for ID prefix
func (g *IDGenerator) getFileName(filePath string) string {
base := filepath.Base(filePath)
return strings.TrimSuffix(base, filepath.Ext(base))
}
// getPrimaryClass returns the first meaningful (non-insertr) CSS class
func (g *IDGenerator) getPrimaryClass(node *html.Node) string {
classes := GetClasses(node)
for _, class := range classes {
if class != "insertr" && class != "" {
return class
}
}
return ""
}
// getElementKey creates a key for tracking element counts
func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string {
if primaryClass != "" {
return fmt.Sprintf("%s-%s", fileName, primaryClass)
}
return fmt.Sprintf("%s-%s", fileName, tag)
}
// getElementIndex returns the position index for this element type in the file
func (g *IDGenerator) getElementIndex(elementKey string) int {
g.elementCounts[elementKey]++
return g.elementCounts[elementKey]
}
// buildPrefix creates human-readable prefix for the ID
func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string {
var parts []string
parts = append(parts, fileName)
if primaryClass != "" {
parts = append(parts, primaryClass)
} else {
parts = append(parts, tag)
}
// Only add index if it's not the first element of this type
if index > 1 {
parts = append(parts, fmt.Sprintf("%d", index))
}
return strings.Join(parts, "-")
}
// createSignature creates a unique signature for collision resistance
func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
// Minimal signature for uniqueness
tag := node.Data
classes := strings.Join(GetClasses(node), " ")
domPath := g.getSimpleDOMPath(node)
return fmt.Sprintf("%s|%s|%s|%s", filePath, domPath, tag, classes)
}
// getSimpleDOMPath creates a simple DOM path for uniqueness
func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
var pathParts []string
current := node
depth := 0
for current != nil && current.Type == html.ElementNode && depth < 5 {
part := current.Data
if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" {
part += "." + classes[0]
}
pathParts = append([]string{part}, pathParts...)
current = current.Parent
depth++
}
return strings.Join(pathParts, ">")
}