refactor: implement unified binary architecture

🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary**

**Key Changes:**
 **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve)
 **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup
 **Smart Configuration**: Viper + YAML config with CLI flag precedence
 **Updated Build System**: Unified justfile, Air, and npm scripts

**Command Structure:**
- `insertr enhance [input-dir]` - Build-time content injection
- `insertr serve` - HTTP API server (dev + production modes)
- `insertr --config insertr.yaml` - YAML configuration support

**Architecture Benefits:**
- **Shared Database Layer**: Single source of truth for content models
- **Flexible Workflows**: Local DB for dev, remote API for production
- **Simple Deployment**: One binary for all use cases
- **Better UX**: Consistent configuration across build and runtime

**Preserved Features:**
- Multi-database support (SQLite + PostgreSQL)
- sqlc code generation and type safety
- Version control system with rollback
- Professional API endpoints
- Content enhancement pipeline

**Development Workflow:**
- `just dev` - Full-stack development (API server + demo site)
- `just serve` - API server only
- `just enhance` - Build-time content injection
- `air` - Hot reload unified binary

**Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
This commit is contained in:
2025-09-09 00:39:35 +02:00
parent 4dc479ba9e
commit e28000fd33
43 changed files with 4339 additions and 75 deletions

159
internal/parser/utils.go Normal file
View File

@@ -0,0 +1,159 @@
package parser
import (
"strings"
"golang.org/x/net/html"
)
// getClasses extracts CSS classes from an HTML node
func getClasses(node *html.Node) []string {
classAttr := getAttribute(node, "class")
if classAttr == "" {
return []string{}
}
classes := strings.Fields(classAttr)
return classes
}
// containsClass checks if a class list contains a specific class
func containsClass(classes []string, target string) bool {
for _, class := range classes {
if class == target {
return true
}
}
return false
}
// getAttribute gets an attribute value from an HTML node
func getAttribute(node *html.Node, key string) string {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val
}
}
return ""
}
// extractTextContent gets the text content from an HTML node
func extractTextContent(node *html.Node) string {
var text strings.Builder
extractTextRecursive(node, &text)
return strings.TrimSpace(text.String())
}
// extractTextRecursive recursively extracts text from node and children
func extractTextRecursive(node *html.Node, text *strings.Builder) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Skip script and style elements
if child.Type == html.ElementNode &&
(child.Data == "script" || child.Data == "style") {
continue
}
extractTextRecursive(child, text)
}
}
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
func hasOnlyTextContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.ElementNode:
// Found a nested HTML element - not text-only
return false
case html.TextNode:
// Text nodes are fine, continue checking
continue
default:
// Comments, etc. - continue checking
continue
}
}
return true
}
// isContainer checks if a tag is typically used as a container element
func isContainer(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
containerTags := map[string]bool{
"div": true,
"section": true,
"article": true,
"header": true,
"footer": true,
"main": true,
"aside": true,
"nav": true,
}
return containerTags[node.Data]
}
// findViableChildren finds all child elements that are viable for editing
func findViableChildren(node *html.Node) []*html.Node {
var viable []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Skip whitespace-only text nodes
if child.Type == html.TextNode {
if strings.TrimSpace(child.Data) == "" {
continue
}
}
// Only consider element nodes
if child.Type != html.ElementNode {
continue
}
// Skip self-closing elements for now
if isSelfClosing(child) {
continue
}
// Check if element has only text content
if hasOnlyTextContent(child) {
viable = append(viable, child)
}
}
return viable
}
// isSelfClosing checks if an element is typically self-closing
func isSelfClosing(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
selfClosingTags := map[string]bool{
"img": true,
"input": true,
"br": true,
"hr": true,
"meta": true,
"link": true,
"area": true,
"base": true,
"col": true,
"embed": true,
"source": true,
"track": true,
"wbr": true,
}
return selfClosingTags[node.Data]
}