Consolidates duplicate code and removes technical debt accumulated during rapid development. This cleanup improves maintainability while preserving all functionality. Backend cleanup: - Remove unused legacy function findViableChildrenLegacy() - Consolidate duplicate SQL null string helper functions into shared utils - Unify text extraction functions across utils, engine, and id_generator - Consolidate duplicate attribute getter functions into single implementation Frontend cleanup: - Remove duplicate authentication methods (authenticateWithOAuth vs performOAuthFlow) - Remove unused hasPermission() method from auth.js - Centralize repetitive API endpoint construction in api-client.js - Reduce excessive console logging while preserving important error logs Impact: -144 lines of code, improved maintainability, no functionality changes All tests pass and builds succeed 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
487 lines
13 KiB
Go
487 lines
13 KiB
Go
package engine
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// IDGenerator generates unique content IDs for elements using lightweight hierarchical approach
|
|
type IDGenerator struct {
|
|
usedIDs map[string]bool
|
|
elementCounts map[string]int // Track counts per file+type for indexing
|
|
}
|
|
|
|
// NewIDGenerator creates a new ID generator
|
|
func NewIDGenerator() *IDGenerator {
|
|
return &IDGenerator{
|
|
usedIDs: make(map[string]bool),
|
|
elementCounts: make(map[string]int),
|
|
}
|
|
}
|
|
|
|
// Generate creates a content ID for an HTML element using deterministic approach
|
|
func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
|
|
// 1. File context (minimal)
|
|
fileName := g.getFileName(filePath)
|
|
|
|
// 2. Element identity (lightweight)
|
|
tag := strings.ToLower(node.Data)
|
|
primaryClass := g.getPrimaryClass(node)
|
|
|
|
// 3. Build readable prefix (deterministic, no runtime counting)
|
|
prefix := g.buildDeterministicPrefix(fileName, tag, primaryClass)
|
|
|
|
// 4. Create deterministic suffix based on element characteristics
|
|
signature := g.createDeterministicSignature(node, filePath)
|
|
|
|
finalID := fmt.Sprintf("%s-%s", prefix, signature)
|
|
|
|
// Ensure uniqueness within this session
|
|
counter := 1
|
|
originalID := finalID
|
|
for g.usedIDs[finalID] {
|
|
finalID = fmt.Sprintf("%s-%d", originalID, counter)
|
|
counter++
|
|
}
|
|
|
|
g.usedIDs[finalID] = true
|
|
|
|
return finalID
|
|
}
|
|
|
|
// getFileName extracts filename without extension for ID prefix
|
|
func (g *IDGenerator) getFileName(filePath string) string {
|
|
base := filepath.Base(filePath)
|
|
return strings.TrimSuffix(base, filepath.Ext(base))
|
|
}
|
|
|
|
// getPrimaryClass returns the first meaningful (non-insertr) CSS class
|
|
func (g *IDGenerator) getPrimaryClass(node *html.Node) string {
|
|
classes := GetClasses(node)
|
|
for _, class := range classes {
|
|
if class != "insertr" && class != "" {
|
|
return class
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// getElementKey creates a key for tracking element counts
|
|
func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string {
|
|
if primaryClass != "" {
|
|
return fmt.Sprintf("%s-%s", fileName, primaryClass)
|
|
}
|
|
return fmt.Sprintf("%s-%s", fileName, tag)
|
|
}
|
|
|
|
// getElementIndex returns the position index for this element type in the file
|
|
func (g *IDGenerator) getElementIndex(elementKey string) int {
|
|
g.elementCounts[elementKey]++
|
|
return g.elementCounts[elementKey]
|
|
}
|
|
|
|
// buildDeterministicPrefix creates human-readable prefix without runtime counting
|
|
func (g *IDGenerator) buildDeterministicPrefix(fileName, tag, primaryClass string) string {
|
|
var parts []string
|
|
parts = append(parts, fileName)
|
|
|
|
if primaryClass != "" {
|
|
parts = append(parts, primaryClass)
|
|
} else {
|
|
parts = append(parts, tag)
|
|
}
|
|
|
|
// No runtime index - rely on hash for uniqueness
|
|
return strings.Join(parts, "-")
|
|
}
|
|
|
|
// buildPrefix creates human-readable prefix for the ID (legacy method)
|
|
func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string {
|
|
var parts []string
|
|
parts = append(parts, fileName)
|
|
|
|
if primaryClass != "" {
|
|
parts = append(parts, primaryClass)
|
|
} else {
|
|
parts = append(parts, tag)
|
|
}
|
|
|
|
// Only add index if it's not the first element of this type
|
|
if index > 1 {
|
|
parts = append(parts, fmt.Sprintf("%d", index))
|
|
}
|
|
|
|
return strings.Join(parts, "-")
|
|
}
|
|
|
|
// createDeterministicSignature creates a deterministic signature for element identification
|
|
func (g *IDGenerator) createDeterministicSignature(node *html.Node, filePath string) string {
|
|
// Build structural signature for stable IDs across content changes
|
|
tag := node.Data
|
|
domPath := g.getDetailedDOMPath(node)
|
|
allClasses := strings.Join(GetClasses(node), " ")
|
|
semanticContext := g.getSemanticContext(node)
|
|
parentContext := g.getParentContainerContext(node)
|
|
preciseIndex := g.getPreciseSiblingIndex(node)
|
|
|
|
// Create enhanced structural deterministic signature with parent context
|
|
signature := fmt.Sprintf("%s|%s|%s|%s|%s|%s|%d",
|
|
filePath, // File context for uniqueness across files
|
|
domPath, // Detailed structural position in DOM
|
|
tag, // Element type
|
|
allClasses, // All CSS classes for style differentiation
|
|
semanticContext, // Semantic context (header/main/footer/nav)
|
|
parentContext, // Parent container unique context
|
|
preciseIndex, // Precise position among exact siblings
|
|
)
|
|
|
|
// Create deterministic hash suffix (6 chars)
|
|
hash := sha256.Sum256([]byte(signature))
|
|
return fmt.Sprintf("%x", hash)[:6]
|
|
}
|
|
|
|
// createSignature creates a unique signature for collision resistance (DEPRECATED - using deterministic now)
|
|
func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
|
|
// This method is kept for compatibility but not used in deterministic generation
|
|
return ""
|
|
}
|
|
|
|
// getSimpleDOMPath creates a simple but precise DOM path for uniqueness (max 3 levels)
|
|
func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
|
|
var pathParts []string
|
|
current := node
|
|
depth := 0
|
|
|
|
for current != nil && current.Type == html.ElementNode && depth < 3 {
|
|
part := current.Data
|
|
|
|
// Add first meaningful class (not insertr) for better differentiation
|
|
classes := GetClasses(current)
|
|
for _, class := range classes {
|
|
if class != "insertr" && class != "" {
|
|
part += "." + class
|
|
break
|
|
}
|
|
}
|
|
|
|
pathParts = append([]string{part}, pathParts...)
|
|
current = current.Parent
|
|
depth++
|
|
}
|
|
|
|
return strings.Join(pathParts, ">")
|
|
}
|
|
|
|
// getDetailedDOMPath creates a more detailed DOM path for enhanced structural differentiation
|
|
func (g *IDGenerator) getDetailedDOMPath(node *html.Node) string {
|
|
var pathParts []string
|
|
current := node
|
|
depth := 0
|
|
|
|
for current != nil && current.Type == html.ElementNode && depth < 5 {
|
|
part := current.Data
|
|
|
|
// Add all meaningful classes for maximum differentiation
|
|
classes := GetClasses(current)
|
|
var meaningfulClasses []string
|
|
for _, class := range classes {
|
|
if class != "insertr" && class != "" {
|
|
meaningfulClasses = append(meaningfulClasses, class)
|
|
}
|
|
}
|
|
if len(meaningfulClasses) > 0 {
|
|
part += "." + strings.Join(meaningfulClasses, ".")
|
|
}
|
|
|
|
pathParts = append([]string{part}, pathParts...)
|
|
current = current.Parent
|
|
depth++
|
|
}
|
|
|
|
return strings.Join(pathParts, ">")
|
|
}
|
|
|
|
// getSemanticContext identifies the semantic container (header, main, footer, nav)
|
|
func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
|
current := node.Parent
|
|
|
|
// Traverse up to find semantic containers
|
|
for current != nil && current.Type == html.ElementNode {
|
|
tag := strings.ToLower(current.Data)
|
|
|
|
// Direct semantic tags
|
|
switch tag {
|
|
case "header":
|
|
return "header"
|
|
case "main":
|
|
return "main"
|
|
case "footer":
|
|
return "footer"
|
|
case "nav":
|
|
return "nav"
|
|
case "aside":
|
|
return "aside"
|
|
}
|
|
|
|
// Semantic classes
|
|
classes := GetClasses(current)
|
|
for _, class := range classes {
|
|
class = strings.ToLower(class)
|
|
if strings.Contains(class, "header") {
|
|
return "header"
|
|
}
|
|
if strings.Contains(class, "footer") {
|
|
return "footer"
|
|
}
|
|
if strings.Contains(class, "nav") {
|
|
return "nav"
|
|
}
|
|
if strings.Contains(class, "sidebar") || strings.Contains(class, "aside") {
|
|
return "aside"
|
|
}
|
|
}
|
|
|
|
current = current.Parent
|
|
}
|
|
|
|
return "content"
|
|
}
|
|
|
|
// getPreciseSiblingIndex returns position among siblings with exact tag and class match
|
|
func (g *IDGenerator) getPreciseSiblingIndex(node *html.Node) int {
|
|
if node.Parent == nil {
|
|
return 0
|
|
}
|
|
|
|
index := 0
|
|
tag := node.Data
|
|
classes := GetClasses(node)
|
|
|
|
// Sort classes for consistent comparison
|
|
sortedClasses := make([]string, len(classes))
|
|
copy(sortedClasses, classes)
|
|
for i := 0; i < len(sortedClasses); i++ {
|
|
for j := i + 1; j < len(sortedClasses); j++ {
|
|
if sortedClasses[i] > sortedClasses[j] {
|
|
sortedClasses[i], sortedClasses[j] = sortedClasses[j], sortedClasses[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
|
|
if sibling.Type == html.ElementNode && sibling.Data == tag {
|
|
siblingClasses := GetClasses(sibling)
|
|
|
|
// Sort sibling classes for comparison
|
|
sortedSiblingClasses := make([]string, len(siblingClasses))
|
|
copy(sortedSiblingClasses, siblingClasses)
|
|
for i := 0; i < len(sortedSiblingClasses); i++ {
|
|
for j := i + 1; j < len(sortedSiblingClasses); j++ {
|
|
if sortedSiblingClasses[i] > sortedSiblingClasses[j] {
|
|
sortedSiblingClasses[i], sortedSiblingClasses[j] = sortedSiblingClasses[j], sortedSiblingClasses[i]
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if classes match exactly
|
|
if g.classSlicesEqual(sortedClasses, sortedSiblingClasses) {
|
|
if sibling == node {
|
|
return index
|
|
}
|
|
index++
|
|
}
|
|
}
|
|
}
|
|
return index
|
|
}
|
|
|
|
// classSlicesEqual compares two sorted class slices for equality
|
|
func (g *IDGenerator) classSlicesEqual(a, b []string) bool {
|
|
if len(a) != len(b) {
|
|
return false
|
|
}
|
|
for i := range a {
|
|
if a[i] != b[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// getParentContainerContext extracts unique context from parent containers and siblings
|
|
func (g *IDGenerator) getParentContainerContext(node *html.Node) string {
|
|
if node.Parent == nil {
|
|
return ""
|
|
}
|
|
|
|
// First check for unique context in immediate parent's children (siblings)
|
|
siblingContext := g.getSiblingContext(node)
|
|
if siblingContext != "" {
|
|
return "sibling:" + siblingContext
|
|
}
|
|
|
|
// Look through parent chain for unique identifiers
|
|
current := node.Parent
|
|
depth := 0
|
|
|
|
for current != nil && current.Type == html.ElementNode && depth < 3 {
|
|
// Check for ID attribute (most unique)
|
|
if id := GetAttribute(current, "id"); id != "" {
|
|
return "id:" + id
|
|
}
|
|
|
|
// Check for unique classes (not just "insertr" or common ones)
|
|
classes := GetClasses(current)
|
|
for _, class := range classes {
|
|
if class != "insertr" && class != "container" && class != "content" && class != "" {
|
|
return "class:" + class
|
|
}
|
|
}
|
|
|
|
// Look for unique text content in parent's children (like headings)
|
|
uniqueText := g.getParentUniqueText(current)
|
|
if uniqueText != "" {
|
|
return "text:" + uniqueText
|
|
}
|
|
|
|
current = current.Parent
|
|
depth++
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// getSiblingContext looks for unique identifying content in sibling elements
|
|
func (g *IDGenerator) getSiblingContext(node *html.Node) string {
|
|
if node.Parent == nil {
|
|
return ""
|
|
}
|
|
|
|
// Look for heading siblings that provide unique context
|
|
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
|
|
if sibling.Type == html.ElementNode && sibling != node {
|
|
tag := strings.ToLower(sibling.Data)
|
|
// Check for heading elements
|
|
if tag == "h1" || tag == "h2" || tag == "h3" || tag == "h4" || tag == "h5" || tag == "h6" {
|
|
content := ExtractTextContent(sibling)
|
|
if content != "" && len(content) > 3 {
|
|
// Return first 12 chars for uniqueness
|
|
if len(content) > 12 {
|
|
content = content[:12]
|
|
}
|
|
return content
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return ""
|
|
}
|
|
|
|
// getParentUniqueText extracts unique text from parent's child elements
|
|
func (g *IDGenerator) getParentUniqueText(parent *html.Node) string {
|
|
for child := parent.FirstChild; child != nil; child = child.NextSibling {
|
|
if child.Type == html.ElementNode {
|
|
tag := strings.ToLower(child.Data)
|
|
// Look for heading elements or elements with distinctive text
|
|
if tag == "h1" || tag == "h2" || tag == "h3" || tag == "h4" || tag == "h5" || tag == "h6" {
|
|
content := ExtractTextContent(child)
|
|
if content != "" && len(content) > 2 {
|
|
// Return first 15 chars of heading text for uniqueness
|
|
if len(content) > 15 {
|
|
content = content[:15]
|
|
}
|
|
return content
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
|
|
// getContentPreview extracts first 50 characters of text content for uniqueness
|
|
func (g *IDGenerator) getContentPreview(node *html.Node) string {
|
|
content := ExtractTextContent(node)
|
|
if len(content) > 50 {
|
|
content = content[:50]
|
|
}
|
|
// Remove newlines and normalize whitespace
|
|
content = strings.ReplaceAll(content, "\n", " ")
|
|
content = strings.ReplaceAll(content, "\t", " ")
|
|
for strings.Contains(content, " ") {
|
|
content = strings.ReplaceAll(content, " ", " ")
|
|
}
|
|
return content
|
|
}
|
|
|
|
|
|
// getSiblingIndex returns the position of this element among its siblings of the same type and class
|
|
func (g *IDGenerator) getSiblingIndex(node *html.Node) int {
|
|
if node.Parent == nil {
|
|
return 0
|
|
}
|
|
|
|
index := 0
|
|
tag := node.Data
|
|
classes := GetClasses(node)
|
|
|
|
// First try: match by tag + insertr class (most common case)
|
|
hasInsertr := false
|
|
for _, class := range classes {
|
|
if class == "insertr" {
|
|
hasInsertr = true
|
|
break
|
|
}
|
|
}
|
|
|
|
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
|
|
if sibling.Type == html.ElementNode && sibling.Data == tag {
|
|
siblingClasses := GetClasses(sibling)
|
|
|
|
// For insertr elements, match by tag + insertr class
|
|
if hasInsertr {
|
|
siblingHasInsertr := false
|
|
for _, class := range siblingClasses {
|
|
if class == "insertr" {
|
|
siblingHasInsertr = true
|
|
break
|
|
}
|
|
}
|
|
if siblingHasInsertr {
|
|
if sibling == node {
|
|
return index
|
|
}
|
|
index++
|
|
}
|
|
} else {
|
|
// For non-insertr elements, match by exact class list
|
|
if g.classesMatch(classes, siblingClasses) {
|
|
if sibling == node {
|
|
return index
|
|
}
|
|
index++
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return index
|
|
}
|
|
|
|
// classesMatch checks if two class lists are equivalent
|
|
func (g *IDGenerator) classesMatch(classes1, classes2 []string) bool {
|
|
if len(classes1) != len(classes2) {
|
|
return false
|
|
}
|
|
for i, class := range classes1 {
|
|
if i >= len(classes2) || class != classes2[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|