refactor: implement unified binary architecture
🏗️ **Major Architecture Refactoring: Separate CLI + Server → Unified Binary** **Key Changes:** ✅ **Unified Binary**: Single 'insertr' binary with subcommands (enhance, serve) ✅ **Preserved Database Architecture**: Maintained sophisticated sqlc multi-DB setup ✅ **Smart Configuration**: Viper + YAML config with CLI flag precedence ✅ **Updated Build System**: Unified justfile, Air, and npm scripts **Command Structure:** - `insertr enhance [input-dir]` - Build-time content injection - `insertr serve` - HTTP API server (dev + production modes) - `insertr --config insertr.yaml` - YAML configuration support **Architecture Benefits:** - **Shared Database Layer**: Single source of truth for content models - **Flexible Workflows**: Local DB for dev, remote API for production - **Simple Deployment**: One binary for all use cases - **Better UX**: Consistent configuration across build and runtime **Preserved Features:** - Multi-database support (SQLite + PostgreSQL) - sqlc code generation and type safety - Version control system with rollback - Professional API endpoints - Content enhancement pipeline **Development Workflow:** - `just dev` - Full-stack development (API server + demo site) - `just serve` - API server only - `just enhance` - Build-time content injection - `air` - Hot reload unified binary **Migration:** Consolidated insertr-cli/ and insertr-server/ → unified root structure
This commit is contained in:
167
internal/parser/id_generator.go
Normal file
167
internal/parser/id_generator.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"crypto/sha1"
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// IDGenerator generates unique content IDs for elements
|
||||
type IDGenerator struct {
|
||||
usedIDs map[string]bool
|
||||
}
|
||||
|
||||
// NewIDGenerator creates a new ID generator
|
||||
func NewIDGenerator() *IDGenerator {
|
||||
return &IDGenerator{
|
||||
usedIDs: make(map[string]bool),
|
||||
}
|
||||
}
|
||||
|
||||
// Generate creates a content ID for an HTML element
|
||||
func (g *IDGenerator) Generate(node *html.Node) string {
|
||||
context := g.getSemanticContext(node)
|
||||
purpose := g.getPurpose(node)
|
||||
contentHash := g.getContentHash(node)
|
||||
|
||||
baseID := g.createBaseID(context, purpose, contentHash)
|
||||
return g.ensureUnique(baseID)
|
||||
}
|
||||
|
||||
// getSemanticContext determines the semantic context from parent elements
|
||||
func (g *IDGenerator) getSemanticContext(node *html.Node) string {
|
||||
// Walk up the tree to find semantic containers
|
||||
parent := node.Parent
|
||||
for parent != nil && parent.Type == html.ElementNode {
|
||||
classes := getClasses(parent)
|
||||
|
||||
// Check for common semantic section classes
|
||||
for _, class := range []string{"hero", "services", "nav", "navbar", "footer", "about", "contact", "testimonial"} {
|
||||
if containsClass(classes, class) {
|
||||
return class
|
||||
}
|
||||
}
|
||||
|
||||
// Check for semantic HTML elements
|
||||
switch parent.Data {
|
||||
case "nav":
|
||||
return "nav"
|
||||
case "header":
|
||||
return "header"
|
||||
case "footer":
|
||||
return "footer"
|
||||
case "main":
|
||||
return "main"
|
||||
case "aside":
|
||||
return "aside"
|
||||
}
|
||||
|
||||
parent = parent.Parent
|
||||
}
|
||||
|
||||
return "content"
|
||||
}
|
||||
|
||||
// getPurpose determines the purpose/role of the element
|
||||
func (g *IDGenerator) getPurpose(node *html.Node) string {
|
||||
tag := strings.ToLower(node.Data)
|
||||
classes := getClasses(node)
|
||||
|
||||
// Check for specific CSS classes that indicate purpose
|
||||
for _, class := range classes {
|
||||
switch {
|
||||
case strings.Contains(class, "title"):
|
||||
return "title"
|
||||
case strings.Contains(class, "headline"):
|
||||
return "headline"
|
||||
case strings.Contains(class, "description"):
|
||||
return "description"
|
||||
case strings.Contains(class, "subtitle"):
|
||||
return "subtitle"
|
||||
case strings.Contains(class, "cta"):
|
||||
return "cta"
|
||||
case strings.Contains(class, "button"):
|
||||
return "button"
|
||||
case strings.Contains(class, "logo"):
|
||||
return "logo"
|
||||
case strings.Contains(class, "lead"):
|
||||
return "lead"
|
||||
}
|
||||
}
|
||||
|
||||
// Infer purpose from HTML tag
|
||||
switch tag {
|
||||
case "h1":
|
||||
return "title"
|
||||
case "h2":
|
||||
return "subtitle"
|
||||
case "h3", "h4", "h5", "h6":
|
||||
return "heading"
|
||||
case "p":
|
||||
return "text"
|
||||
case "a":
|
||||
return "link"
|
||||
case "button":
|
||||
return "button"
|
||||
default:
|
||||
return "content"
|
||||
}
|
||||
}
|
||||
|
||||
// getContentHash creates a short hash of the content for ID generation
|
||||
func (g *IDGenerator) getContentHash(node *html.Node) string {
|
||||
text := extractTextContent(node)
|
||||
|
||||
// Create hash of the text content
|
||||
hash := fmt.Sprintf("%x", sha1.Sum([]byte(text)))
|
||||
|
||||
// Return first 6 characters for brevity
|
||||
return hash[:6]
|
||||
}
|
||||
|
||||
// createBaseID creates the base ID from components
|
||||
func (g *IDGenerator) createBaseID(context, purpose, contentHash string) string {
|
||||
parts := []string{}
|
||||
|
||||
// Add context if meaningful
|
||||
if context != "content" {
|
||||
parts = append(parts, context)
|
||||
}
|
||||
|
||||
// Add purpose
|
||||
parts = append(parts, purpose)
|
||||
|
||||
// Always add content hash for uniqueness
|
||||
parts = append(parts, contentHash)
|
||||
|
||||
baseID := strings.Join(parts, "-")
|
||||
|
||||
// Clean up the ID
|
||||
baseID = regexp.MustCompile(`-+`).ReplaceAllString(baseID, "-")
|
||||
baseID = strings.Trim(baseID, "-")
|
||||
|
||||
// Ensure it's not empty
|
||||
if baseID == "" {
|
||||
baseID = fmt.Sprintf("content-%s", contentHash)
|
||||
}
|
||||
|
||||
return baseID
|
||||
}
|
||||
|
||||
// ensureUnique makes sure the ID is unique by adding a suffix if needed
|
||||
func (g *IDGenerator) ensureUnique(baseID string) string {
|
||||
if !g.usedIDs[baseID] {
|
||||
g.usedIDs[baseID] = true
|
||||
return baseID
|
||||
}
|
||||
|
||||
// If base ID is taken, add a hash suffix
|
||||
hash := fmt.Sprintf("%x", sha1.Sum([]byte(baseID)))[:6]
|
||||
uniqueID := fmt.Sprintf("%s-%s", baseID, hash)
|
||||
|
||||
g.usedIDs[uniqueID] = true
|
||||
return uniqueID
|
||||
}
|
||||
229
internal/parser/parser.go
Normal file
229
internal/parser/parser.go
Normal file
@@ -0,0 +1,229 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Parser handles HTML parsing and element detection
|
||||
type Parser struct {
|
||||
idGenerator *IDGenerator
|
||||
}
|
||||
|
||||
// New creates a new Parser instance
|
||||
func New() *Parser {
|
||||
return &Parser{
|
||||
idGenerator: NewIDGenerator(),
|
||||
}
|
||||
}
|
||||
|
||||
// ParseDirectory parses all HTML files in the given directory
|
||||
func (p *Parser) ParseDirectory(dir string) (*ParseResult, error) {
|
||||
result := &ParseResult{
|
||||
Elements: []Element{},
|
||||
Warnings: []string{},
|
||||
Stats: ParseStats{
|
||||
TypeBreakdown: make(map[ContentType]int),
|
||||
},
|
||||
}
|
||||
|
||||
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Only process HTML files
|
||||
if d.IsDir() || !strings.HasSuffix(strings.ToLower(path), ".html") {
|
||||
return nil
|
||||
}
|
||||
|
||||
elements, warnings, err := p.parseFile(path)
|
||||
if err != nil {
|
||||
result.Warnings = append(result.Warnings,
|
||||
fmt.Sprintf("Error parsing %s: %v", path, err))
|
||||
return nil // Continue processing other files
|
||||
}
|
||||
|
||||
result.Elements = append(result.Elements, elements...)
|
||||
result.Warnings = append(result.Warnings, warnings...)
|
||||
result.Stats.FilesProcessed++
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error walking directory: %w", err)
|
||||
}
|
||||
|
||||
// Calculate statistics
|
||||
p.calculateStats(result)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// parseFile parses a single HTML file
|
||||
func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error opening file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
doc, err := html.Parse(file)
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("error parsing HTML: %w", err)
|
||||
}
|
||||
|
||||
var elements []Element
|
||||
var warnings []string
|
||||
|
||||
p.findInsertrElements(doc, filePath, &elements, &warnings)
|
||||
|
||||
return elements, warnings, nil
|
||||
}
|
||||
|
||||
// findInsertrElements recursively finds all elements with "insertr" class
|
||||
func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
|
||||
if node.Type == html.ElementNode {
|
||||
classes := getClasses(node)
|
||||
|
||||
// Check if element has "insertr" class
|
||||
if containsClass(classes, "insertr") {
|
||||
if isContainer(node) {
|
||||
// Container element - expand to viable children
|
||||
viableChildren := findViableChildren(node)
|
||||
for _, child := range viableChildren {
|
||||
childClasses := getClasses(child)
|
||||
element, warning := p.createElement(child, filePath, childClasses)
|
||||
*elements = append(*elements, element)
|
||||
if warning != "" {
|
||||
*warnings = append(*warnings, warning)
|
||||
}
|
||||
}
|
||||
|
||||
// Don't process children recursively since we've handled the container's children
|
||||
return
|
||||
} else {
|
||||
// Regular element - process as before
|
||||
element, warning := p.createElement(node, filePath, classes)
|
||||
*elements = append(*elements, element)
|
||||
if warning != "" {
|
||||
*warnings = append(*warnings, warning)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively check children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
p.findInsertrElements(child, filePath, elements, warnings)
|
||||
}
|
||||
}
|
||||
|
||||
// createElement creates an Element from an HTML node
|
||||
func (p *Parser) createElement(node *html.Node, filePath string, classes []string) (Element, string) {
|
||||
var warning string
|
||||
|
||||
// Resolve content ID (existing or generated)
|
||||
contentID, hasExistingID := p.resolveContentID(node)
|
||||
if !hasExistingID {
|
||||
contentID = p.idGenerator.Generate(node)
|
||||
}
|
||||
|
||||
// Detect content type
|
||||
contentType := p.detectContentType(node, classes)
|
||||
|
||||
// Extract text content
|
||||
content := extractTextContent(node)
|
||||
|
||||
element := Element{
|
||||
FilePath: filePath,
|
||||
Node: node,
|
||||
ContentID: contentID,
|
||||
Type: contentType,
|
||||
Tag: strings.ToLower(node.Data),
|
||||
Classes: classes,
|
||||
Content: content,
|
||||
HasID: hasExistingID,
|
||||
Generated: !hasExistingID,
|
||||
}
|
||||
|
||||
// Generate warnings for edge cases
|
||||
if content == "" {
|
||||
warning = fmt.Sprintf("Element <%s> with id '%s' has no text content",
|
||||
element.Tag, element.ContentID)
|
||||
}
|
||||
|
||||
return element, warning
|
||||
}
|
||||
|
||||
// resolveContentID gets the content ID from existing attributes
|
||||
func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
|
||||
// 1. Check for existing HTML id attribute
|
||||
if id := getAttribute(node, "id"); id != "" {
|
||||
return id, true
|
||||
}
|
||||
|
||||
// 2. Check for data-content-id attribute
|
||||
if contentID := getAttribute(node, "data-content-id"); contentID != "" {
|
||||
return contentID, true
|
||||
}
|
||||
|
||||
// 3. No existing ID found
|
||||
return "", false
|
||||
}
|
||||
|
||||
// detectContentType determines the content type based on element and classes
|
||||
func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
|
||||
// Check for explicit type classes first
|
||||
if containsClass(classes, "insertr-markdown") {
|
||||
return ContentMarkdown
|
||||
}
|
||||
if containsClass(classes, "insertr-link") {
|
||||
return ContentLink
|
||||
}
|
||||
if containsClass(classes, "insertr-text") {
|
||||
return ContentText
|
||||
}
|
||||
|
||||
// Infer from HTML tag and context
|
||||
tag := strings.ToLower(node.Data)
|
||||
switch tag {
|
||||
case "h1", "h2", "h3", "h4", "h5", "h6":
|
||||
return ContentText
|
||||
case "p":
|
||||
// Paragraphs default to markdown for rich content
|
||||
return ContentMarkdown
|
||||
case "a", "button":
|
||||
return ContentLink
|
||||
case "div", "section":
|
||||
// Default divs/sections to markdown for rich content
|
||||
return ContentMarkdown
|
||||
case "span":
|
||||
return ContentText
|
||||
default:
|
||||
return ContentText
|
||||
}
|
||||
}
|
||||
|
||||
// calculateStats computes statistics for the parse result
|
||||
func (p *Parser) calculateStats(result *ParseResult) {
|
||||
result.Stats.TotalElements = len(result.Elements)
|
||||
|
||||
for _, element := range result.Elements {
|
||||
// Count existing vs generated IDs
|
||||
if element.HasID {
|
||||
result.Stats.ExistingIDs++
|
||||
} else {
|
||||
result.Stats.GeneratedIDs++
|
||||
}
|
||||
|
||||
// Count content types
|
||||
result.Stats.TypeBreakdown[element.Type]++
|
||||
}
|
||||
}
|
||||
41
internal/parser/types.go
Normal file
41
internal/parser/types.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package parser
|
||||
|
||||
import "golang.org/x/net/html"
|
||||
|
||||
// ContentType represents the type of editable content
|
||||
type ContentType string
|
||||
|
||||
const (
|
||||
ContentText ContentType = "text"
|
||||
ContentMarkdown ContentType = "markdown"
|
||||
ContentLink ContentType = "link"
|
||||
)
|
||||
|
||||
// Element represents a parsed editable element
|
||||
type Element struct {
|
||||
FilePath string `json:"file_path"`
|
||||
Node *html.Node `json:"-"` // Don't serialize HTML node
|
||||
ContentID string `json:"content_id"`
|
||||
Type ContentType `json:"type"`
|
||||
Tag string `json:"tag"`
|
||||
Classes []string `json:"classes"`
|
||||
Content string `json:"content"`
|
||||
HasID bool `json:"has_id"` // Whether element had existing ID
|
||||
Generated bool `json:"generated"` // Whether ID was generated
|
||||
}
|
||||
|
||||
// ParseResult contains the results of parsing HTML files
|
||||
type ParseResult struct {
|
||||
Elements []Element `json:"elements"`
|
||||
Warnings []string `json:"warnings"`
|
||||
Stats ParseStats `json:"stats"`
|
||||
}
|
||||
|
||||
// ParseStats provides statistics about the parsing operation
|
||||
type ParseStats struct {
|
||||
FilesProcessed int `json:"files_processed"`
|
||||
TotalElements int `json:"total_elements"`
|
||||
ExistingIDs int `json:"existing_ids"`
|
||||
GeneratedIDs int `json:"generated_ids"`
|
||||
TypeBreakdown map[ContentType]int `json:"type_breakdown"`
|
||||
}
|
||||
159
internal/parser/utils.go
Normal file
159
internal/parser/utils.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package parser
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// getClasses extracts CSS classes from an HTML node
|
||||
func getClasses(node *html.Node) []string {
|
||||
classAttr := getAttribute(node, "class")
|
||||
if classAttr == "" {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
classes := strings.Fields(classAttr)
|
||||
return classes
|
||||
}
|
||||
|
||||
// containsClass checks if a class list contains a specific class
|
||||
func containsClass(classes []string, target string) bool {
|
||||
for _, class := range classes {
|
||||
if class == target {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// getAttribute gets an attribute value from an HTML node
|
||||
func getAttribute(node *html.Node, key string) string {
|
||||
for _, attr := range node.Attr {
|
||||
if attr.Key == key {
|
||||
return attr.Val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// extractTextContent gets the text content from an HTML node
|
||||
func extractTextContent(node *html.Node) string {
|
||||
var text strings.Builder
|
||||
extractTextRecursive(node, &text)
|
||||
return strings.TrimSpace(text.String())
|
||||
}
|
||||
|
||||
// extractTextRecursive recursively extracts text from node and children
|
||||
func extractTextRecursive(node *html.Node, text *strings.Builder) {
|
||||
if node.Type == html.TextNode {
|
||||
text.WriteString(node.Data)
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
// Skip script and style elements
|
||||
if child.Type == html.ElementNode &&
|
||||
(child.Data == "script" || child.Data == "style") {
|
||||
continue
|
||||
}
|
||||
extractTextRecursive(child, text)
|
||||
}
|
||||
}
|
||||
|
||||
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
|
||||
func hasOnlyTextContent(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Type {
|
||||
case html.ElementNode:
|
||||
// Found a nested HTML element - not text-only
|
||||
return false
|
||||
case html.TextNode:
|
||||
// Text nodes are fine, continue checking
|
||||
continue
|
||||
default:
|
||||
// Comments, etc. - continue checking
|
||||
continue
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// isContainer checks if a tag is typically used as a container element
|
||||
func isContainer(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
containerTags := map[string]bool{
|
||||
"div": true,
|
||||
"section": true,
|
||||
"article": true,
|
||||
"header": true,
|
||||
"footer": true,
|
||||
"main": true,
|
||||
"aside": true,
|
||||
"nav": true,
|
||||
}
|
||||
|
||||
return containerTags[node.Data]
|
||||
}
|
||||
|
||||
// findViableChildren finds all child elements that are viable for editing
|
||||
func findViableChildren(node *html.Node) []*html.Node {
|
||||
var viable []*html.Node
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
// Skip whitespace-only text nodes
|
||||
if child.Type == html.TextNode {
|
||||
if strings.TrimSpace(child.Data) == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Only consider element nodes
|
||||
if child.Type != html.ElementNode {
|
||||
continue
|
||||
}
|
||||
|
||||
// Skip self-closing elements for now
|
||||
if isSelfClosing(child) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if element has only text content
|
||||
if hasOnlyTextContent(child) {
|
||||
viable = append(viable, child)
|
||||
}
|
||||
}
|
||||
|
||||
return viable
|
||||
}
|
||||
|
||||
// isSelfClosing checks if an element is typically self-closing
|
||||
func isSelfClosing(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
selfClosingTags := map[string]bool{
|
||||
"img": true,
|
||||
"input": true,
|
||||
"br": true,
|
||||
"hr": true,
|
||||
"meta": true,
|
||||
"link": true,
|
||||
"area": true,
|
||||
"base": true,
|
||||
"col": true,
|
||||
"embed": true,
|
||||
"source": true,
|
||||
"track": true,
|
||||
"wbr": true,
|
||||
}
|
||||
|
||||
return selfClosingTags[node.Data]
|
||||
}
|
||||
Reference in New Issue
Block a user