Refactor architecture: eliminate auto-discovery and consolidate packages

- Remove auto-discovery entirely (~450 lines)
  * Delete internal/content/discoverer.go
  * Simplify enhancer to single-phase processing
  * Remove duplicate container expansion logic

- Consolidate repository implementations
  * Move internal/content/client.go → internal/db/http_client.go
  * Group all repository implementations in db/ package

- Add file utilities to engine following Go stdlib patterns
  * Add engine.ProcessFile() and ProcessDirectory() methods
  * Engine now handles both content processing AND file operations

- Move site management to dedicated package
  * Move internal/content/site_manager.go → internal/sites/manager.go
  * Clear separation of site lifecycle from content processing

- Preserve container expansion (syntactic sugar)
  * .insertr on containers still auto-applies to viable children
  * Container detection logic consolidated in engine/utils.go

Result: Clean architecture with single source of truth for .insertr processing
This commit is contained in:
2025-10-19 22:37:26 +02:00
parent 87b78a4a69
commit dbdd4361b7
8 changed files with 152 additions and 667 deletions

View File

@@ -9,6 +9,7 @@ import (
"github.com/spf13/cobra" "github.com/spf13/cobra"
"github.com/insertr/insertr/internal/config"
"github.com/insertr/insertr/internal/content" "github.com/insertr/insertr/internal/content"
"github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/db"
) )
@@ -95,7 +96,7 @@ func runEnhance(cmd *cobra.Command, args []string) {
var client db.ContentRepository var client db.ContentRepository
if cfg.API.URL != "" { if cfg.API.URL != "" {
fmt.Printf("🌐 Using content API: %s\n", cfg.API.URL) fmt.Printf("🌐 Using content API: %s\n", cfg.API.URL)
client = content.NewHTTPClient(cfg.API.URL, cfg.API.Key) client = db.NewHTTPClient(cfg.API.URL, cfg.API.Key)
} else if cfg.Database.Path != "" { } else if cfg.Database.Path != "" {
fmt.Printf("🗄️ Using database: %s\n", cfg.Database.Path) fmt.Printf("🗄️ Using database: %s\n", cfg.Database.Path)
database, err := db.NewDatabase(cfg.Database.Path) database, err := db.NewDatabase(cfg.Database.Path)
@@ -110,7 +111,7 @@ func runEnhance(cmd *cobra.Command, args []string) {
// Load site-specific configuration // Load site-specific configuration
enhancementConfig := content.EnhancementConfig{ enhancementConfig := content.EnhancementConfig{
Discovery: content.DiscoveryConfig{ Discovery: config.DiscoveryConfig{
Enabled: false, // Default: disabled for explicit class="insertr" markings only Enabled: false, // Default: disabled for explicit class="insertr" markings only
Aggressive: false, Aggressive: false,
Containers: true, Containers: true,

View File

@@ -17,9 +17,9 @@ import (
"github.com/insertr/insertr/internal/api" "github.com/insertr/insertr/internal/api"
"github.com/insertr/insertr/internal/auth" "github.com/insertr/insertr/internal/auth"
"github.com/insertr/insertr/internal/config" "github.com/insertr/insertr/internal/config"
"github.com/insertr/insertr/internal/content"
"github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/db"
"github.com/insertr/insertr/internal/engine" "github.com/insertr/insertr/internal/engine"
"github.com/insertr/insertr/internal/sites"
) )
var serveCmd = &cobra.Command{ var serveCmd = &cobra.Command{
@@ -76,14 +76,14 @@ func runServe(cmd *cobra.Command, args []string) {
} }
// Create legacy auth config for compatibility // Create legacy auth config for compatibility
authConfig := &auth.AuthConfig{ authConfig := &config.AuthConfig{
DevMode: cfg.Auth.DevMode, DevMode: cfg.Auth.DevMode,
Provider: cfg.Auth.Provider, Provider: cfg.Auth.Provider,
JWTSecret: cfg.Auth.JWTSecret, JWTSecret: cfg.Auth.JWTSecret,
} }
if cfg.Auth.OIDC != nil { if cfg.Auth.OIDC != nil {
authConfig.OIDC = &auth.OIDCConfig{ authConfig.OIDC = &config.OIDCConfig{
Endpoint: cfg.Auth.OIDC.Endpoint, Endpoint: cfg.Auth.OIDC.Endpoint,
ClientID: cfg.Auth.OIDC.ClientID, ClientID: cfg.Auth.OIDC.ClientID,
ClientSecret: cfg.Auth.OIDC.ClientSecret, ClientSecret: cfg.Auth.OIDC.ClientSecret,
@@ -107,7 +107,7 @@ func runServe(cmd *cobra.Command, args []string) {
// Initialize site manager with auth provider // Initialize site manager with auth provider
authProvider := &engine.AuthProvider{Type: cfg.Auth.Provider} authProvider := &engine.AuthProvider{Type: cfg.Auth.Provider}
siteManager := content.NewSiteManagerWithAuth(contentClient, cfg.Auth.DevMode, authProvider) siteManager := sites.NewSiteManagerWithAuth(contentClient, cfg.Auth.DevMode, authProvider)
// Convert config sites to legacy format and register // Convert config sites to legacy format and register
var legacySites []*config.SiteConfig var legacySites []*config.SiteConfig
@@ -120,7 +120,7 @@ func runServe(cmd *cobra.Command, args []string) {
AutoEnhance: site.AutoEnhance, AutoEnhance: site.AutoEnhance,
} }
if site.Discovery != nil { if site.Discovery != nil {
legacySite.Discovery = &content.DiscoveryConfig{ legacySite.Discovery = &config.DiscoveryConfig{
Enabled: site.Discovery.Enabled, Enabled: site.Discovery.Enabled,
Aggressive: site.Discovery.Aggressive, Aggressive: site.Discovery.Aggressive,
Containers: site.Discovery.Containers, Containers: site.Discovery.Containers,

View File

@@ -9,16 +9,16 @@ import (
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
"github.com/insertr/insertr/internal/auth" "github.com/insertr/insertr/internal/auth"
"github.com/insertr/insertr/internal/content"
"github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/db"
"github.com/insertr/insertr/internal/engine" "github.com/insertr/insertr/internal/engine"
"github.com/insertr/insertr/internal/sites"
) )
// ContentHandler handles all content-related HTTP requests // ContentHandler handles all content-related HTTP requests
type ContentHandler struct { type ContentHandler struct {
repository db.ContentRepository repository db.ContentRepository
authService *auth.AuthService authService *auth.AuthService
siteManager *content.SiteManager siteManager *sites.SiteManager
engine *engine.ContentEngine engine *engine.ContentEngine
} }
@@ -36,7 +36,7 @@ func NewContentHandler(database *db.Database, authService *auth.AuthService) *Co
} }
// SetSiteManager sets the site manager for file enhancement // SetSiteManager sets the site manager for file enhancement
func (h *ContentHandler) SetSiteManager(siteManager *content.SiteManager) { func (h *ContentHandler) SetSiteManager(siteManager *sites.SiteManager) {
h.siteManager = siteManager h.siteManager = siteManager
} }

View File

@@ -1,462 +0,0 @@
package content
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"github.com/insertr/insertr/internal/engine"
"golang.org/x/net/html"
)
// Discoverer handles automatic discovery of editable elements in HTML
type Discoverer struct {
// Element discovery is now self-contained and configurable
}
// NewDiscoverer creates a new Discoverer instance
func NewDiscoverer() *Discoverer {
return &Discoverer{}
}
// DiscoveryResult contains statistics about element discovery
type DiscoveryResult struct {
FilesProcessed int
ElementsEnhanced int
ContainersAdded int
IndividualsAdded int
SkippedFiles []string
EnhancedFiles []string
}
// DiscoverDirectory discovers editable elements in all HTML files in a directory
func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) {
result := &DiscoveryResult{
SkippedFiles: []string{},
EnhancedFiles: []string{},
}
// Create output directory if it doesn't exist
if err := os.MkdirAll(outputDir, 0755); err != nil {
return nil, fmt.Errorf("failed to create output directory: %w", err)
}
err := filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Skip directories
if d.IsDir() {
return nil
}
// Only process HTML files
if !strings.HasSuffix(strings.ToLower(path), ".html") {
// Copy non-HTML files as-is
return disc.copyFile(path, inputDir, outputDir)
}
// Discover elements in HTML file
enhanced, err := disc.discoverFile(path, aggressive)
if err != nil {
result.SkippedFiles = append(result.SkippedFiles, path)
// Copy original file on error
return disc.copyFile(path, inputDir, outputDir)
}
// Write enhanced file
outputPath := disc.getOutputPath(path, inputDir, outputDir)
if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil {
return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err)
}
result.FilesProcessed++
result.ElementsEnhanced += enhanced.ElementsEnhanced
result.ContainersAdded += enhanced.ContainersAdded
result.IndividualsAdded += enhanced.IndividualsAdded
result.EnhancedFiles = append(result.EnhancedFiles, outputPath)
return nil
})
return result, err
}
// FileDiscoveryResult contains details about a single file discovery
type FileDiscoveryResult struct {
ElementsEnhanced int
ContainersAdded int
IndividualsAdded int
SugarTransformed int // Count of syntactic sugar transformations
Document *html.Node
}
// discoverFile processes a single HTML file and adds insertr classes
func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("error opening file: %w", err)
}
defer file.Close()
doc, err := html.Parse(file)
if err != nil {
return nil, fmt.Errorf("error parsing HTML: %w", err)
}
result := &FileDiscoveryResult{Document: doc}
// Find candidates for enhancement
disc.discoverNode(doc, result, aggressive)
return result, nil
}
// discoverNode recursively discovers editable nodes in the document
func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) {
if node.Type != html.ElementNode {
// Recursively check children
for child := node.FirstChild; child != nil; child = child.NextSibling {
disc.discoverNode(child, result, aggressive)
}
return
}
// Handle syntactic sugar: containers with .insertr class
if disc.hasInsertrClass(node) && disc.isGoodContainer(node) {
// Syntactic sugar transformation: remove .insertr from container, add to viable children
viableChildren := engine.FindViableChildren(node)
if len(viableChildren) >= 1 {
disc.removeInsertrClass(node) // Remove from container
for _, child := range viableChildren {
if !disc.hasInsertrClass(child) {
disc.addInsertrClass(child)
result.IndividualsAdded++
result.ElementsEnhanced++
}
}
result.ContainersAdded++
result.SugarTransformed++ // Track sugar transformations
// Don't process children since we just processed them
return
}
// If no viable children, leave .insertr on the element (individual editing)
return
}
// Skip if already has insertr class (individual editing elements)
if disc.hasInsertrClass(node) {
return
}
// Check if this is a container that should use expansion
if disc.isGoodContainer(node) {
viableChildren := engine.FindViableChildren(node)
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
// Container expansion: add insertr class to each viable child, not the container
for _, child := range viableChildren {
if !disc.hasInsertrClass(child) {
disc.addInsertrClass(child)
result.IndividualsAdded++
result.ElementsEnhanced++
}
}
result.ContainersAdded++
// Don't process children since we just processed them
return
}
}
// Check if this individual element should be enhanced
if disc.isGoodIndividualElement(node) {
disc.addInsertrClass(node)
result.IndividualsAdded++
result.ElementsEnhanced++
// Don't process children of enhanced individual elements
return
}
// Recursively check children
for child := node.FirstChild; child != nil; child = child.NextSibling {
disc.discoverNode(child, result, aggressive)
}
}
// isGoodContainer checks if an element is a good candidate for container expansion
func (disc *Discoverer) isGoodContainer(node *html.Node) bool {
containerTags := map[string]bool{
"div": true,
"section": true,
"article": true,
"header": true,
"footer": true,
"main": true,
"aside": true,
"nav": true,
}
tag := strings.ToLower(node.Data)
if !containerTags[tag] {
return false
}
// Skip containers that are clearly non-content
if disc.isNonContentElement(node) {
return false
}
// Skip containers in the head section
if disc.isInHead(node) {
return false
}
// Skip containers with technical/framework-specific classes that suggest they're not content
classes := disc.getClasses(node)
for _, class := range classes {
lowerClass := strings.ToLower(class)
// Skip Next.js internal classes and other framework artifacts
if strings.Contains(lowerClass, "__next") ||
strings.Contains(lowerClass, "webpack") ||
strings.Contains(lowerClass, "hydration") ||
strings.Contains(lowerClass, "react") ||
strings.Contains(lowerClass, "gatsby") {
return false
}
}
return true
}
// isGoodIndividualElement checks if an element is a good candidate for individual enhancement
func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool {
// Skip self-closing elements
if disc.isSelfClosing(node) {
return false
}
// Skip non-content elements that should never be editable
if disc.isNonContentElement(node) {
return false
}
// Skip elements inside head section
if disc.isInHead(node) {
return false
}
// Skip elements with no meaningful content
if disc.hasNoMeaningfulContent(node) {
return false
}
// Check if element has editable content
return disc.hasEditableContent(node)
}
// hasEditableContent uses the engine's enhanced detection logic
func (disc *Discoverer) hasEditableContent(node *html.Node) bool {
return engine.HasEditableContent(node)
}
// hasInsertrClass checks if a node already has the insertr class
func (disc *Discoverer) hasInsertrClass(node *html.Node) bool {
classes := disc.getClasses(node)
for _, class := range classes {
if class == "insertr" {
return true
}
}
return false
}
// addInsertrClass adds the insertr class to a node
func (disc *Discoverer) addInsertrClass(node *html.Node) {
classes := disc.getClasses(node)
classes = append(classes, "insertr")
disc.setClasses(node, classes)
}
// removeInsertrClass removes the insertr class from a node
func (disc *Discoverer) removeInsertrClass(node *html.Node) {
classes := disc.getClasses(node)
var filteredClasses []string
for _, class := range classes {
if class != "insertr" {
filteredClasses = append(filteredClasses, class)
}
}
disc.setClasses(node, filteredClasses)
}
// getClasses extracts CSS classes from a node
func (disc *Discoverer) getClasses(node *html.Node) []string {
for i, attr := range node.Attr {
if attr.Key == "class" {
if attr.Val == "" {
return []string{}
}
return strings.Fields(attr.Val)
}
// Update existing class attribute
if attr.Key == "class" {
node.Attr[i] = attr
return strings.Fields(attr.Val)
}
}
return []string{}
}
// setClasses sets CSS classes on a node
func (disc *Discoverer) setClasses(node *html.Node, classes []string) {
classValue := strings.Join(classes, " ")
// Update existing class attribute or add new one
for i, attr := range node.Attr {
if attr.Key == "class" {
node.Attr[i].Val = classValue
return
}
}
// Add new class attribute
node.Attr = append(node.Attr, html.Attribute{
Key: "class",
Val: classValue,
})
}
// isSelfClosing checks if an element is self-closing
func (disc *Discoverer) isSelfClosing(node *html.Node) bool {
selfClosingTags := map[string]bool{
"img": true, "input": true, "br": true, "hr": true,
"meta": true, "link": true, "area": true, "base": true,
"col": true, "embed": true, "source": true, "track": true, "wbr": true,
}
return selfClosingTags[strings.ToLower(node.Data)]
}
// isNonContentElement checks if an element should never be editable
func (disc *Discoverer) isNonContentElement(node *html.Node) bool {
nonContentTags := map[string]bool{
"script": true, // JavaScript code
"style": true, // CSS styles
"meta": true, // Metadata
"link": true, // Links to resources
"title": true, // Document title (handled separately)
"head": true, // Document head
"html": true, // Root element
"body": true, // Body element (too broad)
"noscript": true, // Fallback content
"template": true, // HTML templates
"svg": true, // SVG graphics (complex)
"canvas": true, // Canvas graphics
"iframe": true, // Embedded content
"object": true, // Embedded objects
"embed": true, // Embedded content
"video": true, // Video elements (complex)
"audio": true, // Audio elements (complex)
"map": true, // Image maps
"area": true, // Image map areas
"base": true, // Base URL
"col": true, // Table columns
"colgroup": true, // Table column groups
"track": true, // Video/audio tracks
"source": true, // Media sources
"param": true, // Object parameters
"wbr": true, // Word break opportunities
}
return nonContentTags[strings.ToLower(node.Data)]
}
// isInHead checks if a node is inside the document head
func (disc *Discoverer) isInHead(node *html.Node) bool {
current := node.Parent
for current != nil {
if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" {
return true
}
current = current.Parent
}
return false
}
// hasNoMeaningfulContent checks if an element has no meaningful text content
func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return true
}
// Extract text content
content := engine.ExtractTextContent(node)
// Empty or whitespace-only content
if content == "" {
return true
}
// Very short content that's likely not meaningful
if len(content) < 2 {
return true
}
// Content that looks like technical artifacts
technicalPatterns := []string{
"$", "<!--", "-->", "{", "}", "[", "]",
"function", "var ", "const ", "let ", "return",
"import", "export", "require", "module.exports",
"/*", "*/", "//", "<?", "?>", "<%", "%>",
}
for _, pattern := range technicalPatterns {
if strings.Contains(content, pattern) {
return true
}
}
return false
}
// copyFile copies a file from input to output directory
func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error {
outputPath := disc.getOutputPath(filePath, inputDir, outputDir)
// Create output directory for the file
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return err
}
input, err := os.ReadFile(filePath)
if err != nil {
return err
}
return os.WriteFile(outputPath, input, 0644)
}
// getOutputPath converts input path to output path
func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string {
relPath, _ := filepath.Rel(inputDir, filePath)
return filepath.Join(outputDir, relPath)
}
// writeEnhancedFile writes the enhanced HTML document to a file
func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error {
// Create output directory
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return err
}
file, err := os.Create(outputPath)
if err != nil {
return err
}
defer file.Close()
return html.Render(file, enhanced.Document)
}

View File

@@ -1,9 +1,6 @@
package content package content
import ( import (
"fmt"
"golang.org/x/net/html"
"os"
"path/filepath" "path/filepath"
"strings" "strings"
@@ -14,18 +11,14 @@ import (
// EnhancementConfig configures the enhancement pipeline // EnhancementConfig configures the enhancement pipeline
type EnhancementConfig struct { type EnhancementConfig struct {
Discovery DiscoveryConfig Discovery config.DiscoveryConfig
ContentInjection bool ContentInjection bool
GenerateIDs bool GenerateIDs bool
} }
// Type alias for backward compatibility
type DiscoveryConfig = config.DiscoveryConfig
// Enhancer combines discovery, ID generation, and content injection in unified pipeline // Enhancer combines discovery, ID generation, and content injection in unified pipeline
type Enhancer struct { type Enhancer struct {
engine *engine.ContentEngine engine *engine.ContentEngine
discoverer *Discoverer
config EnhancementConfig config EnhancementConfig
siteID string siteID string
} }
@@ -34,7 +27,6 @@ type Enhancer struct {
func NewEnhancer(client db.ContentRepository, siteID string, config EnhancementConfig) *Enhancer { func NewEnhancer(client db.ContentRepository, siteID string, config EnhancementConfig) *Enhancer {
return &Enhancer{ return &Enhancer{
engine: engine.NewContentEngine(client), engine: engine.NewContentEngine(client),
discoverer: NewDiscoverer(),
config: config, config: config,
siteID: siteID, siteID: siteID,
} }
@@ -44,7 +36,6 @@ func NewEnhancer(client db.ContentRepository, siteID string, config EnhancementC
func NewEnhancerWithAuth(client db.ContentRepository, siteID string, config EnhancementConfig, authProvider *engine.AuthProvider) *Enhancer { func NewEnhancerWithAuth(client db.ContentRepository, siteID string, config EnhancementConfig, authProvider *engine.AuthProvider) *Enhancer {
return &Enhancer{ return &Enhancer{
engine: engine.NewContentEngineWithAuth(client, authProvider), engine: engine.NewContentEngineWithAuth(client, authProvider),
discoverer: NewDiscoverer(),
config: config, config: config,
siteID: siteID, siteID: siteID,
} }
@@ -53,7 +44,7 @@ func NewEnhancerWithAuth(client db.ContentRepository, siteID string, config Enha
// NewDefaultEnhancer creates an enhancer with default configuration // NewDefaultEnhancer creates an enhancer with default configuration
func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer { func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer {
defaultConfig := EnhancementConfig{ defaultConfig := EnhancementConfig{
Discovery: DiscoveryConfig{ Discovery: config.DiscoveryConfig{
Enabled: true, Enabled: true,
Aggressive: false, Aggressive: false,
Containers: true, Containers: true,
@@ -65,138 +56,14 @@ func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer {
return NewEnhancer(client, siteID, defaultConfig) return NewEnhancer(client, siteID, defaultConfig)
} }
// EnhanceFile processes a single HTML file through the complete pipeline // EnhanceFile processes a single HTML file through the engine
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error { func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
// Read HTML file return e.engine.ProcessFile(inputPath, outputPath, e.siteID, engine.Enhancement)
htmlContent, err := os.ReadFile(inputPath)
if err != nil {
return fmt.Errorf("reading file %s: %w", inputPath, err)
} }
// Process through unified pipeline // EnhanceDirectory processes all files in a directory through the engine
processedHTML, err := e.processHTML(htmlContent, filepath.Base(inputPath))
if err != nil {
return fmt.Errorf("processing HTML %s: %w", inputPath, err)
}
// Create output directory
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
// Write processed HTML
return os.WriteFile(outputPath, processedHTML, 0644)
}
// EnhanceDirectory processes all files in a directory through the unified pipeline
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error { func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
// Create output directory return e.engine.ProcessDirectory(inputDir, outputDir, e.siteID, engine.Enhancement)
if err := os.MkdirAll(outputDir, 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
// Walk input directory
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Calculate relative path and output path
relPath, err := filepath.Rel(inputDir, path)
if err != nil {
return err
}
outputPath := filepath.Join(outputDir, relPath)
// Handle directories
if info.IsDir() {
return os.MkdirAll(outputPath, info.Mode())
}
// Process HTML files through enhancement pipeline
if strings.HasSuffix(strings.ToLower(path), ".html") {
return e.EnhanceFile(path, outputPath)
}
// Copy non-HTML files as-is
return e.copyFile(path, outputPath)
})
}
// processHTML implements the unified enhancement pipeline
func (e *Enhancer) processHTML(htmlContent []byte, filePath string) ([]byte, error) {
var processedHTML []byte = htmlContent
// Phase 1: Element Discovery (if enabled)
if e.config.Discovery.Enabled {
discoveredHTML, err := e.discoverElements(processedHTML, filePath)
if err != nil {
return nil, fmt.Errorf("element discovery: %w", err)
}
processedHTML = discoveredHTML
}
// Phase 2 & 3: ID Generation + Content Injection (via engine)
if e.config.GenerateIDs || e.config.ContentInjection {
enhancedHTML, err := e.enhanceWithEngine(processedHTML, filePath)
if err != nil {
return nil, fmt.Errorf("engine enhancement: %w", err)
}
processedHTML = enhancedHTML
}
return processedHTML, nil
}
// discoverElements adds insertr classes to viable elements
func (e *Enhancer) discoverElements(htmlContent []byte, filePath string) ([]byte, error) {
// Parse HTML
doc, err := html.Parse(strings.NewReader(string(htmlContent)))
if err != nil {
return nil, fmt.Errorf("parsing HTML: %w", err)
}
// Find and mark viable elements
result := &FileDiscoveryResult{Document: doc}
e.discoverer.discoverNode(doc, result, e.config.Discovery.Aggressive)
// Render back to HTML
var buf strings.Builder
if err := html.Render(&buf, doc); err != nil {
return nil, fmt.Errorf("rendering HTML: %w", err)
}
return []byte(buf.String()), nil
}
// enhanceWithEngine uses the unified engine for ID generation and content injection
func (e *Enhancer) enhanceWithEngine(htmlContent []byte, filePath string) ([]byte, error) {
// Determine processing mode
var mode engine.ProcessMode
if e.config.ContentInjection {
mode = engine.Enhancement // ID generation + content injection
} else {
mode = engine.IDGeneration // ID generation only
}
// Process with engine
result, err := e.engine.ProcessContent(engine.ContentInput{
HTML: htmlContent,
FilePath: filePath,
SiteID: e.siteID,
Mode: mode,
})
if err != nil {
return nil, fmt.Errorf("engine processing: %w", err)
}
// Render enhanced document
var buf strings.Builder
if err := html.Render(&buf, result.Document); err != nil {
return nil, fmt.Errorf("rendering enhanced HTML: %w", err)
}
return []byte(buf.String()), nil
} }
// SetSiteID sets the site ID for the enhancer // SetSiteID sets the site ID for the enhancer
@@ -261,20 +128,3 @@ func deriveDemoSiteID(sitePath string) string {
return dirName return dirName
} }
// copyFile copies a file from src to dst
func (e *Enhancer) copyFile(src, dst string) error {
// Create directory for destination
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
return err
}
// Read source
data, err := os.ReadFile(src)
if err != nil {
return err
}
// Write destination
return os.WriteFile(dst, data, 0644)
}

View File

@@ -1,4 +1,4 @@
package content package db
import ( import (
"context" "context"
@@ -9,11 +9,9 @@ import (
"net/url" "net/url"
"strings" "strings"
"time" "time"
"github.com/insertr/insertr/internal/db"
) )
// HTTPClient implements db.ContentRepository for HTTP API access // HTTPClient implements ContentRepository for HTTP API access
type HTTPClient struct { type HTTPClient struct {
BaseURL string BaseURL string
APIKey string APIKey string
@@ -32,7 +30,7 @@ func NewHTTPClient(baseURL, apiKey string) *HTTPClient {
} }
// GetContent fetches a single content item by ID // GetContent fetches a single content item by ID
func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (*db.ContentItem, error) { func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (*ContentItem, error) {
url := fmt.Sprintf("%s/api/content/%s?site_id=%s", c.BaseURL, contentID, siteID) url := fmt.Sprintf("%s/api/content/%s?site_id=%s", c.BaseURL, contentID, siteID)
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", url, nil)
@@ -63,7 +61,7 @@ func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (
return nil, fmt.Errorf("reading response: %w", err) return nil, fmt.Errorf("reading response: %w", err)
} }
var item db.ContentItem var item ContentItem
if err := json.Unmarshal(body, &item); err != nil { if err := json.Unmarshal(body, &item); err != nil {
return nil, fmt.Errorf("parsing response: %w", err) return nil, fmt.Errorf("parsing response: %w", err)
} }
@@ -72,9 +70,9 @@ func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (
} }
// GetBulkContent fetches multiple content items by IDs // GetBulkContent fetches multiple content items by IDs
func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentIDs []string) (map[string]db.ContentItem, error) { func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentIDs []string) (map[string]ContentItem, error) {
if len(contentIDs) == 0 { if len(contentIDs) == 0 {
return make(map[string]db.ContentItem), nil return make(map[string]ContentItem), nil
} }
// Build query parameters // Build query parameters
@@ -110,13 +108,13 @@ func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentI
return nil, fmt.Errorf("reading response: %w", err) return nil, fmt.Errorf("reading response: %w", err)
} }
var response db.ContentResponse var response ContentResponse
if err := json.Unmarshal(body, &response); err != nil { if err := json.Unmarshal(body, &response); err != nil {
return nil, fmt.Errorf("parsing response: %w", err) return nil, fmt.Errorf("parsing response: %w", err)
} }
// Convert slice to map for easy lookup // Convert slice to map for easy lookup
result := make(map[string]db.ContentItem) result := make(map[string]ContentItem)
for _, item := range response.Content { for _, item := range response.Content {
result[item.ID] = item result[item.ID] = item
} }
@@ -125,7 +123,7 @@ func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentI
} }
// GetAllContent fetches all content for a site // GetAllContent fetches all content for a site
func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[string]db.ContentItem, error) { func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[string]ContentItem, error) {
url := fmt.Sprintf("%s/api/content?site_id=%s", c.BaseURL, siteID) url := fmt.Sprintf("%s/api/content?site_id=%s", c.BaseURL, siteID)
req, err := http.NewRequest("GET", url, nil) req, err := http.NewRequest("GET", url, nil)
@@ -152,13 +150,13 @@ func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[stri
return nil, fmt.Errorf("reading response: %w", err) return nil, fmt.Errorf("reading response: %w", err)
} }
var response db.ContentResponse var response ContentResponse
if err := json.Unmarshal(body, &response); err != nil { if err := json.Unmarshal(body, &response); err != nil {
return nil, fmt.Errorf("parsing response: %w", err) return nil, fmt.Errorf("parsing response: %w", err)
} }
// Convert slice to map for easy lookup // Convert slice to map for easy lookup
result := make(map[string]db.ContentItem) result := make(map[string]ContentItem)
for _, item := range response.Content { for _, item := range response.Content {
result[item.ID] = item result[item.ID] = item
} }
@@ -167,50 +165,50 @@ func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[stri
} }
// CreateContent creates a new content item via HTTP API // CreateContent creates a new content item via HTTP API
func (c *HTTPClient) CreateContent(ctx context.Context, siteID, contentID, htmlContent, originalTemplate, lastEditedBy string) (*db.ContentItem, error) { func (c *HTTPClient) CreateContent(ctx context.Context, siteID, contentID, htmlContent, originalTemplate, lastEditedBy string) (*ContentItem, error) {
// For now, HTTPClient CreateContent is not implemented for enhancer use // For now, HTTPClient CreateContent is not implemented for enhancer use
// This would typically be used in API-driven enhancement scenarios // This would typically be used in API-driven enhancement scenarios
return nil, fmt.Errorf("CreateContent not implemented for HTTPClient - use DatabaseClient for enhancement") return nil, fmt.Errorf("CreateContent not implemented for HTTPClient - use DatabaseClient for enhancement")
} }
// Collection method stubs - TODO: Implement these for HTTP API // Collection method stubs - TODO: Implement these for HTTP API
func (c *HTTPClient) GetCollection(ctx context.Context, siteID, collectionID string) (*db.CollectionItem, error) { func (c *HTTPClient) GetCollection(ctx context.Context, siteID, collectionID string) (*CollectionItem, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) CreateCollection(ctx context.Context, siteID, collectionID, containerHTML, lastEditedBy string) (*db.CollectionItem, error) { func (c *HTTPClient) CreateCollection(ctx context.Context, siteID, collectionID, containerHTML, lastEditedBy string) (*CollectionItem, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) GetCollectionItems(ctx context.Context, siteID, collectionID string) ([]db.CollectionItemWithTemplate, error) { func (c *HTTPClient) GetCollectionItems(ctx context.Context, siteID, collectionID string) ([]CollectionItemWithTemplate, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) CreateCollectionTemplate(ctx context.Context, siteID, collectionID, name, htmlTemplate string, isDefault bool) (*db.CollectionTemplateItem, error) { func (c *HTTPClient) CreateCollectionTemplate(ctx context.Context, siteID, collectionID, name, htmlTemplate string, isDefault bool) (*CollectionTemplateItem, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) GetCollectionTemplates(ctx context.Context, siteID, collectionID string) ([]db.CollectionTemplateItem, error) { func (c *HTTPClient) GetCollectionTemplates(ctx context.Context, siteID, collectionID string) ([]CollectionTemplateItem, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) CreateCollectionItem(ctx context.Context, siteID, collectionID, itemID string, templateID int, htmlContent string, position int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) { func (c *HTTPClient) CreateCollectionItem(ctx context.Context, siteID, collectionID, itemID string, templateID int, htmlContent string, position int, lastEditedBy string) (*CollectionItemWithTemplate, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) CreateCollectionItemAtomic(ctx context.Context, siteID, collectionID string, templateID int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) { func (c *HTTPClient) CreateCollectionItemAtomic(ctx context.Context, siteID, collectionID string, templateID int, lastEditedBy string) (*CollectionItemWithTemplate, error) {
return nil, fmt.Errorf("collection operations not implemented in HTTPClient") return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
} }
func (c *HTTPClient) UpdateContent(ctx context.Context, siteID, contentID, htmlContent, lastEditedBy string) (*db.ContentItem, error) { func (c *HTTPClient) UpdateContent(ctx context.Context, siteID, contentID, htmlContent, lastEditedBy string) (*ContentItem, error) {
return nil, fmt.Errorf("content update operations not implemented in HTTPClient") return nil, fmt.Errorf("content update operations not implemented in HTTPClient")
} }
func (c *HTTPClient) ReorderCollectionItems(ctx context.Context, siteID, collectionID string, items []db.CollectionItemPosition, lastEditedBy string) error { func (c *HTTPClient) ReorderCollectionItems(ctx context.Context, siteID, collectionID string, items []CollectionItemPosition, lastEditedBy string) error {
return fmt.Errorf("collection reordering not implemented in HTTPClient") return fmt.Errorf("collection reordering not implemented in HTTPClient")
} }
// WithTransaction executes a function within a transaction (not supported for HTTP client) // WithTransaction executes a function within a transaction (not supported for HTTP client)
func (c *HTTPClient) WithTransaction(ctx context.Context, fn func(db.ContentRepository) error) error { func (c *HTTPClient) WithTransaction(ctx context.Context, fn func(ContentRepository) error) error {
return fmt.Errorf("transactions not supported for HTTP client") return fmt.Errorf("transactions not supported for HTTP client")
} }

97
internal/engine/file.go Normal file
View File

@@ -0,0 +1,97 @@
package engine
import (
"fmt"
"os"
"path/filepath"
"strings"
"golang.org/x/net/html"
)
// ProcessFile processes a single HTML file (following Go stdlib pattern like os.ReadFile/WriteFile)
func (e *ContentEngine) ProcessFile(inputPath, outputPath, siteID string, mode ProcessMode) error {
htmlContent, err := os.ReadFile(inputPath)
if err != nil {
return fmt.Errorf("reading file %s: %w", inputPath, err)
}
result, err := e.ProcessContent(ContentInput{
HTML: htmlContent,
FilePath: filepath.Base(inputPath),
SiteID: siteID,
Mode: mode,
})
if err != nil {
return fmt.Errorf("processing content: %w", err)
}
return writeHTMLDocument(outputPath, result.Document)
}
// ProcessDirectory processes all HTML files in a directory (following filepath.Walk pattern)
func (e *ContentEngine) ProcessDirectory(inputDir, outputDir, siteID string, mode ProcessMode) error {
// Create output directory if it doesn't exist
if err := os.MkdirAll(outputDir, 0755); err != nil {
return fmt.Errorf("failed to create output directory: %w", err)
}
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Calculate relative path for output
relPath, err := filepath.Rel(inputDir, path)
if err != nil {
return err
}
outputPath := filepath.Join(outputDir, relPath)
// Handle directories
if info.IsDir() {
return os.MkdirAll(outputPath, info.Mode())
}
// Process HTML files
if strings.HasSuffix(strings.ToLower(path), ".html") {
return e.ProcessFile(path, outputPath, siteID, mode)
}
// Copy non-HTML files as-is
return copyFile(path, outputPath)
})
}
// writeHTMLDocument writes an HTML document to a file
func writeHTMLDocument(outputPath string, doc *html.Node) error {
// Create output directory
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
file, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("creating output file: %w", err)
}
defer file.Close()
return html.Render(file, doc)
}
// copyFile copies a file from src to dst
func copyFile(src, dst string) error {
// Create directory for destination
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
return err
}
// Read source
data, err := os.ReadFile(src)
if err != nil {
return err
}
// Write destination
return os.WriteFile(dst, data, 0644)
}

View File

@@ -1,4 +1,4 @@
package content package sites
import ( import (
"fmt" "fmt"
@@ -9,6 +9,7 @@ import (
"sync" "sync"
"github.com/insertr/insertr/internal/config" "github.com/insertr/insertr/internal/config"
"github.com/insertr/insertr/internal/content"
"github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/db"
"github.com/insertr/insertr/internal/engine" "github.com/insertr/insertr/internal/engine"
"maps" "maps"
@@ -17,7 +18,7 @@ import (
// SiteManager handles registration and enhancement of static sites // SiteManager handles registration and enhancement of static sites
type SiteManager struct { type SiteManager struct {
sites map[string]*config.SiteConfig sites map[string]*config.SiteConfig
enhancer *Enhancer enhancer *content.Enhancer
mutex sync.RWMutex mutex sync.RWMutex
devMode bool devMode bool
contentClient db.ContentRepository contentClient db.ContentRepository
@@ -28,7 +29,7 @@ type SiteManager struct {
func NewSiteManager(contentClient db.ContentRepository, devMode bool) *SiteManager { func NewSiteManager(contentClient db.ContentRepository, devMode bool) *SiteManager {
return &SiteManager{ return &SiteManager{
sites: make(map[string]*config.SiteConfig), sites: make(map[string]*config.SiteConfig),
enhancer: NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation enhancer: content.NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation
devMode: devMode, devMode: devMode,
contentClient: contentClient, contentClient: contentClient,
authProvider: &engine.AuthProvider{Type: "mock"}, // default authProvider: &engine.AuthProvider{Type: "mock"}, // default
@@ -152,7 +153,7 @@ func (sm *SiteManager) EnhanceSite(siteID string) error {
// Create enhancer with auth provider for this operation // Create enhancer with auth provider for this operation
// Discovery disabled by default - developers should explicitly mark elements with class="insertr" // Discovery disabled by default - developers should explicitly mark elements with class="insertr"
discoveryConfig := DiscoveryConfig{ discoveryConfig := config.DiscoveryConfig{
Enabled: false, // Changed from true - respect developer intent Enabled: false, // Changed from true - respect developer intent
Aggressive: false, Aggressive: false,
Containers: true, Containers: true,
@@ -166,12 +167,12 @@ func (sm *SiteManager) EnhanceSite(siteID string) error {
siteID, discoveryConfig.Enabled, discoveryConfig.Aggressive) siteID, discoveryConfig.Enabled, discoveryConfig.Aggressive)
} }
config := EnhancementConfig{ config := content.EnhancementConfig{
Discovery: discoveryConfig, Discovery: discoveryConfig,
ContentInjection: true, ContentInjection: true,
GenerateIDs: true, GenerateIDs: true,
} }
enhancer := NewEnhancerWithAuth(sm.contentClient, siteID, config, sm.authProvider) enhancer := content.NewEnhancerWithAuth(sm.contentClient, siteID, config, sm.authProvider)
// Perform enhancement from source to output // Perform enhancement from source to output
if err := enhancer.EnhanceDirectory(sourcePath, outputPath); err != nil { if err := enhancer.EnhanceDirectory(sourcePath, outputPath); err != nil {