- Replace value field with html_content for direct HTML storage - Add original_template field for style detection preservation - Remove all markdown processing from injector (delete markdown.go) - Fix critical content extraction/injection bugs in engine - Add missing UpdateContent PUT handler for content persistence - Fix API client field names and add updateContent() method - Resolve content type validation (only allow text/link types) - Add UUID-based ID generation to prevent collisions - Complete first-pass processing workflow for unprocessed elements - Verify end-to-end: Enhancement → Database → API → Editor → Persistence All 37 files updated for HTML-first content management system. Phase 3a implementation complete and production ready.
203 lines
5.7 KiB
Go
203 lines
5.7 KiB
Go
package engine
|
|
|
|
import (
|
|
"fmt"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/google/uuid"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// IDGenerator generates unique content IDs for elements using lightweight hierarchical approach
|
|
type IDGenerator struct {
|
|
usedIDs map[string]bool
|
|
elementCounts map[string]int // Track counts per file+type for indexing
|
|
}
|
|
|
|
// NewIDGenerator creates a new ID generator
|
|
func NewIDGenerator() *IDGenerator {
|
|
return &IDGenerator{
|
|
usedIDs: make(map[string]bool),
|
|
elementCounts: make(map[string]int),
|
|
}
|
|
}
|
|
|
|
// Generate creates a content ID for an HTML element using lightweight hierarchical approach
|
|
func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
|
|
// 1. File context (minimal)
|
|
fileName := g.getFileName(filePath)
|
|
|
|
// 2. Element identity (lightweight)
|
|
tag := strings.ToLower(node.Data)
|
|
primaryClass := g.getPrimaryClass(node)
|
|
|
|
// 3. Build readable prefix (deterministic, no runtime counting)
|
|
prefix := g.buildDeterministicPrefix(fileName, tag, primaryClass)
|
|
|
|
// 5. Add UUID-based suffix for guaranteed uniqueness
|
|
uuidSuffix := uuid.New().String()[:6] // Use first 6 chars of UUID
|
|
|
|
finalID := fmt.Sprintf("%s-%s", prefix, uuidSuffix)
|
|
|
|
// Ensure uniqueness (should be guaranteed by hash, but safety check)
|
|
g.usedIDs[finalID] = true
|
|
|
|
return finalID
|
|
}
|
|
|
|
// getFileName extracts filename without extension for ID prefix
|
|
func (g *IDGenerator) getFileName(filePath string) string {
|
|
base := filepath.Base(filePath)
|
|
return strings.TrimSuffix(base, filepath.Ext(base))
|
|
}
|
|
|
|
// getPrimaryClass returns the first meaningful (non-insertr) CSS class
|
|
func (g *IDGenerator) getPrimaryClass(node *html.Node) string {
|
|
classes := GetClasses(node)
|
|
for _, class := range classes {
|
|
if class != "insertr" && class != "" {
|
|
return class
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// getElementKey creates a key for tracking element counts
|
|
func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string {
|
|
if primaryClass != "" {
|
|
return fmt.Sprintf("%s-%s", fileName, primaryClass)
|
|
}
|
|
return fmt.Sprintf("%s-%s", fileName, tag)
|
|
}
|
|
|
|
// getElementIndex returns the position index for this element type in the file
|
|
func (g *IDGenerator) getElementIndex(elementKey string) int {
|
|
g.elementCounts[elementKey]++
|
|
return g.elementCounts[elementKey]
|
|
}
|
|
|
|
// buildDeterministicPrefix creates human-readable prefix without runtime counting
|
|
func (g *IDGenerator) buildDeterministicPrefix(fileName, tag, primaryClass string) string {
|
|
var parts []string
|
|
parts = append(parts, fileName)
|
|
|
|
if primaryClass != "" {
|
|
parts = append(parts, primaryClass)
|
|
} else {
|
|
parts = append(parts, tag)
|
|
}
|
|
|
|
// No runtime index - rely on hash for uniqueness
|
|
return strings.Join(parts, "-")
|
|
}
|
|
|
|
// buildPrefix creates human-readable prefix for the ID (legacy method)
|
|
func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string {
|
|
var parts []string
|
|
parts = append(parts, fileName)
|
|
|
|
if primaryClass != "" {
|
|
parts = append(parts, primaryClass)
|
|
} else {
|
|
parts = append(parts, tag)
|
|
}
|
|
|
|
// Only add index if it's not the first element of this type
|
|
if index > 1 {
|
|
parts = append(parts, fmt.Sprintf("%d", index))
|
|
}
|
|
|
|
return strings.Join(parts, "-")
|
|
}
|
|
|
|
// createSignature creates a unique signature for collision resistance (DEPRECATED - using UUID now)
|
|
func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
|
|
// This method is kept for compatibility but not used in UUID-based generation
|
|
return ""
|
|
}
|
|
|
|
// getSimpleDOMPath creates a simple DOM path for uniqueness
|
|
func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
|
|
var pathParts []string
|
|
current := node
|
|
depth := 0
|
|
|
|
for current != nil && current.Type == html.ElementNode && depth < 5 {
|
|
part := current.Data
|
|
if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" {
|
|
part += "." + classes[0]
|
|
}
|
|
pathParts = append([]string{part}, pathParts...)
|
|
current = current.Parent
|
|
depth++
|
|
}
|
|
|
|
return strings.Join(pathParts, ">")
|
|
}
|
|
|
|
// getContentPreview extracts first 50 characters of text content for uniqueness
|
|
func (g *IDGenerator) getContentPreview(node *html.Node) string {
|
|
var text strings.Builder
|
|
g.extractTextContent(node, &text)
|
|
content := strings.TrimSpace(text.String())
|
|
if len(content) > 50 {
|
|
content = content[:50]
|
|
}
|
|
// Remove newlines and normalize whitespace
|
|
content = strings.ReplaceAll(content, "\n", " ")
|
|
content = strings.ReplaceAll(content, "\t", " ")
|
|
for strings.Contains(content, " ") {
|
|
content = strings.ReplaceAll(content, " ", " ")
|
|
}
|
|
return content
|
|
}
|
|
|
|
// extractTextContent recursively extracts text content from a node
|
|
func (g *IDGenerator) extractTextContent(node *html.Node, text *strings.Builder) {
|
|
if node.Type == html.TextNode {
|
|
text.WriteString(node.Data)
|
|
}
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
g.extractTextContent(child, text)
|
|
}
|
|
}
|
|
|
|
// getSiblingIndex returns the position of this element among its siblings of the same type
|
|
func (g *IDGenerator) getSiblingIndex(node *html.Node) int {
|
|
if node.Parent == nil {
|
|
return 0
|
|
}
|
|
|
|
index := 0
|
|
tag := node.Data
|
|
classes := GetClasses(node)
|
|
|
|
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
|
|
if sibling.Type == html.ElementNode && sibling.Data == tag {
|
|
siblingClasses := GetClasses(sibling)
|
|
// Check if classes match (for more precise positioning)
|
|
if g.classesMatch(classes, siblingClasses) {
|
|
if sibling == node {
|
|
return index
|
|
}
|
|
index++
|
|
}
|
|
}
|
|
}
|
|
return index
|
|
}
|
|
|
|
// classesMatch checks if two class lists are equivalent
|
|
func (g *IDGenerator) classesMatch(classes1, classes2 []string) bool {
|
|
if len(classes1) != len(classes2) {
|
|
return false
|
|
}
|
|
for i, class := range classes1 {
|
|
if i >= len(classes2) || class != classes2[i] {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|