refactor: implement unified binary architecture

🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary**

**Key Changes:**
 **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve)
 **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup
 **Smart Configuration**: Viper + YAML config with CLI flag precedence
 **Updated Build System**: Unified justfile, Air, and npm scripts

**Command Structure:**
- `insertr enhance [input-dir]` - Build-time content injection
- `insertr serve` - HTTP API server (dev + production modes)
- `insertr --config insertr.yaml` - YAML configuration support

**Architecture Benefits:**
- **Shared Database Layer**: Single source of truth for content models
- **Flexible Workflows**: Local DB for dev, remote API for production
- **Simple Deployment**: One binary for all use cases
- **Better UX**: Consistent configuration across build and runtime

**Preserved Features:**
- Multi-database support (SQLite + PostgreSQL)
- sqlc code generation and type safety
- Version control system with rollback
- Professional API endpoints
- Content enhancement pipeline

**Development Workflow:**
- `just dev` - Full-stack development (API server + demo site)
- `just serve` - API server only
- `just enhance` - Build-time content injection
- `air` - Hot reload unified binary

**Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
This commit is contained in:
2025-09-09 00:39:35 +02:00
parent 4dc479ba9e
commit e28000fd33
43 changed files with 4339 additions and 75 deletions

View File

@@ -0,0 +1,167 @@
package parser
import (
"crypto/sha1"
"fmt"
"regexp"
"strings"
"golang.org/x/net/html"
)
// IDGenerator generates unique content IDs for elements
type IDGenerator struct {
usedIDs map[string]bool
}
// NewIDGenerator creates a new ID generator
func NewIDGenerator() *IDGenerator {
return &IDGenerator{
usedIDs: make(map[string]bool),
}
}
// Generate creates a content ID for an HTML element
func (g *IDGenerator) Generate(node *html.Node) string {
context := g.getSemanticContext(node)
purpose := g.getPurpose(node)
contentHash := g.getContentHash(node)
baseID := g.createBaseID(context, purpose, contentHash)
return g.ensureUnique(baseID)
}
// getSemanticContext determines the semantic context from parent elements
func (g *IDGenerator) getSemanticContext(node *html.Node) string {
// Walk up the tree to find semantic containers
parent := node.Parent
for parent != nil && parent.Type == html.ElementNode {
classes := getClasses(parent)
// Check for common semantic section classes
for _, class := range []string{"hero", "services", "nav", "navbar", "footer", "about", "contact", "testimonial"} {
if containsClass(classes, class) {
return class
}
}
// Check for semantic HTML elements
switch parent.Data {
case "nav":
return "nav"
case "header":
return "header"
case "footer":
return "footer"
case "main":
return "main"
case "aside":
return "aside"
}
parent = parent.Parent
}
return "content"
}
// getPurpose determines the purpose/role of the element
func (g *IDGenerator) getPurpose(node *html.Node) string {
tag := strings.ToLower(node.Data)
classes := getClasses(node)
// Check for specific CSS classes that indicate purpose
for _, class := range classes {
switch {
case strings.Contains(class, "title"):
return "title"
case strings.Contains(class, "headline"):
return "headline"
case strings.Contains(class, "description"):
return "description"
case strings.Contains(class, "subtitle"):
return "subtitle"
case strings.Contains(class, "cta"):
return "cta"
case strings.Contains(class, "button"):
return "button"
case strings.Contains(class, "logo"):
return "logo"
case strings.Contains(class, "lead"):
return "lead"
}
}
// Infer purpose from HTML tag
switch tag {
case "h1":
return "title"
case "h2":
return "subtitle"
case "h3", "h4", "h5", "h6":
return "heading"
case "p":
return "text"
case "a":
return "link"
case "button":
return "button"
default:
return "content"
}
}
// getContentHash creates a short hash of the content for ID generation
func (g *IDGenerator) getContentHash(node *html.Node) string {
text := extractTextContent(node)
// Create hash of the text content
hash := fmt.Sprintf("%x", sha1.Sum([]byte(text)))
// Return first 6 characters for brevity
return hash[:6]
}
// createBaseID creates the base ID from components
func (g *IDGenerator) createBaseID(context, purpose, contentHash string) string {
parts := []string{}
// Add context if meaningful
if context != "content" {
parts = append(parts, context)
}
// Add purpose
parts = append(parts, purpose)
// Always add content hash for uniqueness
parts = append(parts, contentHash)
baseID := strings.Join(parts, "-")
// Clean up the ID
baseID = regexp.MustCompile(`-+`).ReplaceAllString(baseID, "-")
baseID = strings.Trim(baseID, "-")
// Ensure it's not empty
if baseID == "" {
baseID = fmt.Sprintf("content-%s", contentHash)
}
return baseID
}
// ensureUnique makes sure the ID is unique by adding a suffix if needed
func (g *IDGenerator) ensureUnique(baseID string) string {
if !g.usedIDs[baseID] {
g.usedIDs[baseID] = true
return baseID
}
// If base ID is taken, add a hash suffix
hash := fmt.Sprintf("%x", sha1.Sum([]byte(baseID)))[:6]
uniqueID := fmt.Sprintf("%s-%s", baseID, hash)
g.usedIDs[uniqueID] = true
return uniqueID
}