🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary** **Key Changes:** ✅ **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve) ✅ **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup ✅ **Smart Configuration**: Viper + YAML config with CLI flag precedence ✅ **Updated Build System**: Unified justfile, Air, and npm scripts **Command Structure:** - `insertr enhance [input-dir]` - Build-time content injection - `insertr serve` - HTTP API server (dev + production modes) - `insertr --config insertr.yaml` - YAML configuration support **Architecture Benefits:** - **Shared Database Layer**: Single source of truth for content models - **Flexible Workflows**: Local DB for dev, remote API for production - **Simple Deployment**: One binary for all use cases - **Better UX**: Consistent configuration across build and runtime **Preserved Features:** - Multi-database support (SQLite + PostgreSQL) - sqlc code generation and type safety - Version control system with rollback - Professional API endpoints - Content enhancement pipeline **Development Workflow:** - `just dev` - Full-stack development (API server + demo site) - `just serve` - API server only - `just enhance` - Build-time content injection - `air` - Hot reload unified binary **Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
168 lines
3.8 KiB
Go
168 lines
3.8 KiB
Go
package parser
|
|
|
|
import (
|
|
"crypto/sha1"
|
|
"fmt"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// IDGenerator generates unique content IDs for elements
|
|
type IDGenerator struct {
|
|
usedIDs map[string]bool
|
|
}
|
|
|
|
// NewIDGenerator creates a new ID generator
|
|
func NewIDGenerator() *IDGenerator {
|
|
return &IDGenerator{
|
|
usedIDs: make(map[string]bool),
|
|
}
|
|
}
|
|
|
|
// Generate creates a content ID for an HTML element
|
|
func (g *IDGenerator) Generate(node *html.Node) string {
|
|
context := g.getSemanticContext(node)
|
|
purpose := g.getPurpose(node)
|
|
contentHash := g.getContentHash(node)
|
|
|
|
baseID := g.createBaseID(context, purpose, contentHash)
|
|
return g.ensureUnique(baseID)
|
|
}
|
|
|
|
// getSemanticContext determines the semantic context from parent elements
|
|
func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
|
// Walk up the tree to find semantic containers
|
|
parent := node.Parent
|
|
for parent != nil && parent.Type == html.ElementNode {
|
|
classes := getClasses(parent)
|
|
|
|
// Check for common semantic section classes
|
|
for _, class := range []string{"hero", "services", "nav", "navbar", "footer", "about", "contact", "testimonial"} {
|
|
if containsClass(classes, class) {
|
|
return class
|
|
}
|
|
}
|
|
|
|
// Check for semantic HTML elements
|
|
switch parent.Data {
|
|
case "nav":
|
|
return "nav"
|
|
case "header":
|
|
return "header"
|
|
case "footer":
|
|
return "footer"
|
|
case "main":
|
|
return "main"
|
|
case "aside":
|
|
return "aside"
|
|
}
|
|
|
|
parent = parent.Parent
|
|
}
|
|
|
|
return "content"
|
|
}
|
|
|
|
// getPurpose determines the purpose/role of the element
|
|
func (g *IDGenerator) getPurpose(node *html.Node) string {
|
|
tag := strings.ToLower(node.Data)
|
|
classes := getClasses(node)
|
|
|
|
// Check for specific CSS classes that indicate purpose
|
|
for _, class := range classes {
|
|
switch {
|
|
case strings.Contains(class, "title"):
|
|
return "title"
|
|
case strings.Contains(class, "headline"):
|
|
return "headline"
|
|
case strings.Contains(class, "description"):
|
|
return "description"
|
|
case strings.Contains(class, "subtitle"):
|
|
return "subtitle"
|
|
case strings.Contains(class, "cta"):
|
|
return "cta"
|
|
case strings.Contains(class, "button"):
|
|
return "button"
|
|
case strings.Contains(class, "logo"):
|
|
return "logo"
|
|
case strings.Contains(class, "lead"):
|
|
return "lead"
|
|
}
|
|
}
|
|
|
|
// Infer purpose from HTML tag
|
|
switch tag {
|
|
case "h1":
|
|
return "title"
|
|
case "h2":
|
|
return "subtitle"
|
|
case "h3", "h4", "h5", "h6":
|
|
return "heading"
|
|
case "p":
|
|
return "text"
|
|
case "a":
|
|
return "link"
|
|
case "button":
|
|
return "button"
|
|
default:
|
|
return "content"
|
|
}
|
|
}
|
|
|
|
// getContentHash creates a short hash of the content for ID generation
|
|
func (g *IDGenerator) getContentHash(node *html.Node) string {
|
|
text := extractTextContent(node)
|
|
|
|
// Create hash of the text content
|
|
hash := fmt.Sprintf("%x", sha1.Sum([]byte(text)))
|
|
|
|
// Return first 6 characters for brevity
|
|
return hash[:6]
|
|
}
|
|
|
|
// createBaseID creates the base ID from components
|
|
func (g *IDGenerator) createBaseID(context, purpose, contentHash string) string {
|
|
parts := []string{}
|
|
|
|
// Add context if meaningful
|
|
if context != "content" {
|
|
parts = append(parts, context)
|
|
}
|
|
|
|
// Add purpose
|
|
parts = append(parts, purpose)
|
|
|
|
// Always add content hash for uniqueness
|
|
parts = append(parts, contentHash)
|
|
|
|
baseID := strings.Join(parts, "-")
|
|
|
|
// Clean up the ID
|
|
baseID = regexp.MustCompile(`-+`).ReplaceAllString(baseID, "-")
|
|
baseID = strings.Trim(baseID, "-")
|
|
|
|
// Ensure it's not empty
|
|
if baseID == "" {
|
|
baseID = fmt.Sprintf("content-%s", contentHash)
|
|
}
|
|
|
|
return baseID
|
|
}
|
|
|
|
// ensureUnique makes sure the ID is unique by adding a suffix if needed
|
|
func (g *IDGenerator) ensureUnique(baseID string) string {
|
|
if !g.usedIDs[baseID] {
|
|
g.usedIDs[baseID] = true
|
|
return baseID
|
|
}
|
|
|
|
// If base ID is taken, add a hash suffix
|
|
hash := fmt.Sprintf("%x", sha1.Sum([]byte(baseID)))[:6]
|
|
uniqueID := fmt.Sprintf("%s-%s", baseID, hash)
|
|
|
|
g.usedIDs[uniqueID] = true
|
|
return uniqueID
|
|
}
|