refactor: implement unified binary architecture

🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary** **Key Changes:** ✅ **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve) ✅ **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup ✅ **Smart Configuration**: Viper + YAML config with CLI flag precedence ✅ **Updated Build System**: Unified justfile, Air, and npm scripts **Command Structure:** - `insertr enhance [input-dir]` - Build-time content injection - `insertr serve` - HTTP API server (dev + production modes) - `insertr --config insertr.yaml` - YAML configuration support **Architecture Benefits:** - **Shared Database Layer**: Single source of truth for content models - **Flexible Workflows**: Local DB for dev, remote API for production - **Simple Deployment**: One binary for all use cases - **Better UX**: Consistent configuration across build and runtime **Preserved Features:** - Multi-database support (SQLite + PostgreSQL) - sqlc code generation and type safety - Version control system with rollback - Professional API endpoints - Content enhancement pipeline **Development Workflow:** - `just dev` - Full-stack development (API server + demo site) - `just serve` - API server only - `just enhance` - Build-time content injection - `air` - Hot reload unified binary **Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
2025-09-09 00:39:35 +02:00
parent 4dc479ba9e
commit e28000fd33
43 changed files with 4339 additions and 75 deletions
--- a/internal/parser/utils.go
+++ b/internal/parser/utils.go
@@ -0,0 +1,159 @@
+package parser
+
+import (
+	"strings"
+
+	"golang.org/x/net/html"
+)
+
+// getClasses extracts CSS classes from an HTML node
+func getClasses(node *html.Node) []string {
+	classAttr := getAttribute(node, "class")
+	if classAttr == "" {
+		return []string{}
+	}
+
+	classes := strings.Fields(classAttr)
+	return classes
+}
+
+// containsClass checks if a class list contains a specific class
+func containsClass(classes []string, target string) bool {
+	for _, class := range classes {
+		if class == target {
+			return true
+		}
+	}
+	return false
+}
+
+// getAttribute gets an attribute value from an HTML node
+func getAttribute(node *html.Node, key string) string {
+	for _, attr := range node.Attr {
+		if attr.Key == key {
+			return attr.Val
+		}
+	}
+	return ""
+}
+
+// extractTextContent gets the text content from an HTML node
+func extractTextContent(node *html.Node) string {
+	var text strings.Builder
+	extractTextRecursive(node, &text)
+	return strings.TrimSpace(text.String())
+}
+
+// extractTextRecursive recursively extracts text from node and children
+func extractTextRecursive(node *html.Node, text *strings.Builder) {
+	if node.Type == html.TextNode {
+		text.WriteString(node.Data)
+	}
+
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		// Skip script and style elements
+		if child.Type == html.ElementNode &&
+			(child.Data == "script" || child.Data == "style") {
+			continue
+		}
+		extractTextRecursive(child, text)
+	}
+}
+
+// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
+func hasOnlyTextContent(node *html.Node) bool {
+	if node.Type != html.ElementNode {
+		return false
+	}
+
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		switch child.Type {
+		case html.ElementNode:
+			// Found a nested HTML element - not text-only
+			return false
+		case html.TextNode:
+			// Text nodes are fine, continue checking
+			continue
+		default:
+			// Comments, etc. - continue checking
+			continue
+		}
+	}
+	return true
+}
+
+// isContainer checks if a tag is typically used as a container element
+func isContainer(node *html.Node) bool {
+	if node.Type != html.ElementNode {
+		return false
+	}
+
+	containerTags := map[string]bool{
+		"div":     true,
+		"section": true,
+		"article": true,
+		"header":  true,
+		"footer":  true,
+		"main":    true,
+		"aside":   true,
+		"nav":     true,
+	}
+
+	return containerTags[node.Data]
+}
+
+// findViableChildren finds all child elements that are viable for editing
+func findViableChildren(node *html.Node) []*html.Node {
+	var viable []*html.Node
+
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		// Skip whitespace-only text nodes
+		if child.Type == html.TextNode {
+			if strings.TrimSpace(child.Data) == "" {
+				continue
+			}
+		}
+
+		// Only consider element nodes
+		if child.Type != html.ElementNode {
+			continue
+		}
+
+		// Skip self-closing elements for now
+		if isSelfClosing(child) {
+			continue
+		}
+
+		// Check if element has only text content
+		if hasOnlyTextContent(child) {
+			viable = append(viable, child)
+		}
+	}
+
+	return viable
+}
+
+// isSelfClosing checks if an element is typically self-closing
+func isSelfClosing(node *html.Node) bool {
+	if node.Type != html.ElementNode {
+		return false
+	}
+
+	selfClosingTags := map[string]bool{
+		"img":    true,
+		"input":  true,
+		"br":     true,
+		"hr":     true,
+		"meta":   true,
+		"link":   true,
+		"area":   true,
+		"base":   true,
+		"col":    true,
+		"embed":  true,
+		"source": true,
+		"track":  true,
+		"wbr":    true,
+	}
+
+	return selfClosingTags[node.Data]
+}