refactor: implement unified binary architecture

🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary** **Key Changes:** ✅ **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve) ✅ **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup ✅ **Smart Configuration**: Viper + YAML config with CLI flag precedence ✅ **Updated Build System**: Unified justfile, Air, and npm scripts **Command Structure:** - `insertr enhance [input-dir]` - Build-time content injection - `insertr serve` - HTTP API server (dev + production modes) - `insertr --config insertr.yaml` - YAML configuration support **Architecture Benefits:** - **Shared Database Layer**: Single source of truth for content models - **Flexible Workflows**: Local DB for dev, remote API for production - **Simple Deployment**: One binary for all use cases - **Better UX**: Consistent configuration across build and runtime **Preserved Features:** - Multi-database support (SQLite + PostgreSQL) - sqlc code generation and type safety - Version control system with rollback - Professional API endpoints - Content enhancement pipeline **Development Workflow:** - `just dev` - Full-stack development (API server + demo site) - `just serve` - API server only - `just enhance` - Build-time content injection - `air` - Hot reload unified binary **Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
2025-09-09 00:39:35 +02:00
parent 4dc479ba9e
commit e28000fd33
43 changed files with 4339 additions and 75 deletions
--- a/internal/parser/parser.go
+++ b/internal/parser/parser.go
@@ -0,0 +1,229 @@
+package parser
+
+import (
+	"fmt"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"strings"
+
+	"golang.org/x/net/html"
+)
+
+// Parser handles HTML parsing and element detection
+type Parser struct {
+	idGenerator *IDGenerator
+}
+
+// New creates a new Parser instance
+func New() *Parser {
+	return &Parser{
+		idGenerator: NewIDGenerator(),
+	}
+}
+
+// ParseDirectory parses all HTML files in the given directory
+func (p *Parser) ParseDirectory(dir string) (*ParseResult, error) {
+	result := &ParseResult{
+		Elements: []Element{},
+		Warnings: []string{},
+		Stats: ParseStats{
+			TypeBreakdown: make(map[ContentType]int),
+		},
+	}
+
+	err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Only process HTML files
+		if d.IsDir() || !strings.HasSuffix(strings.ToLower(path), ".html") {
+			return nil
+		}
+
+		elements, warnings, err := p.parseFile(path)
+		if err != nil {
+			result.Warnings = append(result.Warnings,
+				fmt.Sprintf("Error parsing %s: %v", path, err))
+			return nil // Continue processing other files
+		}
+
+		result.Elements = append(result.Elements, elements...)
+		result.Warnings = append(result.Warnings, warnings...)
+		result.Stats.FilesProcessed++
+
+		return nil
+	})
+
+	if err != nil {
+		return nil, fmt.Errorf("error walking directory: %w", err)
+	}
+
+	// Calculate statistics
+	p.calculateStats(result)
+
+	return result, nil
+}
+
+// parseFile parses a single HTML file
+func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
+	file, err := os.Open(filePath)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error opening file: %w", err)
+	}
+	defer file.Close()
+
+	doc, err := html.Parse(file)
+	if err != nil {
+		return nil, nil, fmt.Errorf("error parsing HTML: %w", err)
+	}
+
+	var elements []Element
+	var warnings []string
+
+	p.findInsertrElements(doc, filePath, &elements, &warnings)
+
+	return elements, warnings, nil
+}
+
+// findInsertrElements recursively finds all elements with "insertr" class
+func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
+	if node.Type == html.ElementNode {
+		classes := getClasses(node)
+
+		// Check if element has "insertr" class
+		if containsClass(classes, "insertr") {
+			if isContainer(node) {
+				// Container element - expand to viable children
+				viableChildren := findViableChildren(node)
+				for _, child := range viableChildren {
+					childClasses := getClasses(child)
+					element, warning := p.createElement(child, filePath, childClasses)
+					*elements = append(*elements, element)
+					if warning != "" {
+						*warnings = append(*warnings, warning)
+					}
+				}
+
+				// Don't process children recursively since we've handled the container's children
+				return
+			} else {
+				// Regular element - process as before
+				element, warning := p.createElement(node, filePath, classes)
+				*elements = append(*elements, element)
+				if warning != "" {
+					*warnings = append(*warnings, warning)
+				}
+			}
+		}
+	}
+
+	// Recursively check children
+	for child := node.FirstChild; child != nil; child = child.NextSibling {
+		p.findInsertrElements(child, filePath, elements, warnings)
+	}
+}
+
+// createElement creates an Element from an HTML node
+func (p *Parser) createElement(node *html.Node, filePath string, classes []string) (Element, string) {
+	var warning string
+
+	// Resolve content ID (existing or generated)
+	contentID, hasExistingID := p.resolveContentID(node)
+	if !hasExistingID {
+		contentID = p.idGenerator.Generate(node)
+	}
+
+	// Detect content type
+	contentType := p.detectContentType(node, classes)
+
+	// Extract text content
+	content := extractTextContent(node)
+
+	element := Element{
+		FilePath:  filePath,
+		Node:      node,
+		ContentID: contentID,
+		Type:      contentType,
+		Tag:       strings.ToLower(node.Data),
+		Classes:   classes,
+		Content:   content,
+		HasID:     hasExistingID,
+		Generated: !hasExistingID,
+	}
+
+	// Generate warnings for edge cases
+	if content == "" {
+		warning = fmt.Sprintf("Element <%s> with id '%s' has no text content",
+			element.Tag, element.ContentID)
+	}
+
+	return element, warning
+}
+
+// resolveContentID gets the content ID from existing attributes
+func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
+	// 1. Check for existing HTML id attribute
+	if id := getAttribute(node, "id"); id != "" {
+		return id, true
+	}
+
+	// 2. Check for data-content-id attribute
+	if contentID := getAttribute(node, "data-content-id"); contentID != "" {
+		return contentID, true
+	}
+
+	// 3. No existing ID found
+	return "", false
+}
+
+// detectContentType determines the content type based on element and classes
+func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
+	// Check for explicit type classes first
+	if containsClass(classes, "insertr-markdown") {
+		return ContentMarkdown
+	}
+	if containsClass(classes, "insertr-link") {
+		return ContentLink
+	}
+	if containsClass(classes, "insertr-text") {
+		return ContentText
+	}
+
+	// Infer from HTML tag and context
+	tag := strings.ToLower(node.Data)
+	switch tag {
+	case "h1", "h2", "h3", "h4", "h5", "h6":
+		return ContentText
+	case "p":
+		// Paragraphs default to markdown for rich content
+		return ContentMarkdown
+	case "a", "button":
+		return ContentLink
+	case "div", "section":
+		// Default divs/sections to markdown for rich content
+		return ContentMarkdown
+	case "span":
+		return ContentText
+	default:
+		return ContentText
+	}
+}
+
+// calculateStats computes statistics for the parse result
+func (p *Parser) calculateStats(result *ParseResult) {
+	result.Stats.TotalElements = len(result.Elements)
+
+	for _, element := range result.Elements {
+		// Count existing vs generated IDs
+		if element.HasID {
+			result.Stats.ExistingIDs++
+		} else {
+			result.Stats.GeneratedIDs++
+		}
+
+		// Count content types
+		result.Stats.TypeBreakdown[element.Type]++
+	}
+}