From dbdd4361b7f5785359313a78a4018150796b1e8a Mon Sep 17 00:00:00 2001 From: Joakim Date: Sun, 19 Oct 2025 22:37:26 +0200 Subject: [PATCH] Refactor architecture: eliminate auto-discovery and consolidate packages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove auto-discovery entirely (~450 lines) * Delete internal/content/discoverer.go * Simplify enhancer to single-phase processing * Remove duplicate container expansion logic - Consolidate repository implementations * Move internal/content/client.go → internal/db/http_client.go * Group all repository implementations in db/ package - Add file utilities to engine following Go stdlib patterns * Add engine.ProcessFile() and ProcessDirectory() methods * Engine now handles both content processing AND file operations - Move site management to dedicated package * Move internal/content/site_manager.go → internal/sites/manager.go * Clear separation of site lifecycle from content processing - Preserve container expansion (syntactic sugar) * .insertr on containers still auto-applies to viable children * Container detection logic consolidated in engine/utils.go Result: Clean architecture with single source of truth for .insertr processing --- cmd/enhance.go | 5 +- cmd/serve.go | 10 +- internal/api/handlers.go | 6 +- internal/content/discoverer.go | 462 ------------------ internal/content/enhancer.go | 180 +------ .../{content/client.go => db/http_client.go} | 46 +- internal/engine/file.go | 97 ++++ .../site_manager.go => sites/manager.go} | 13 +- 8 files changed, 152 insertions(+), 667 deletions(-) delete mode 100644 internal/content/discoverer.go rename internal/{content/client.go => db/http_client.go} (83%) create mode 100644 internal/engine/file.go rename internal/{content/site_manager.go => sites/manager.go} (94%) diff --git a/cmd/enhance.go b/cmd/enhance.go index 62583ed..d1d69d6 100644 --- a/cmd/enhance.go +++ b/cmd/enhance.go @@ -9,6 +9,7 @@ import ( "github.com/spf13/cobra" + "github.com/insertr/insertr/internal/config" "github.com/insertr/insertr/internal/content" "github.com/insertr/insertr/internal/db" ) @@ -95,7 +96,7 @@ func runEnhance(cmd *cobra.Command, args []string) { var client db.ContentRepository if cfg.API.URL != "" { fmt.Printf("🌐 Using content API: %s\n", cfg.API.URL) - client = content.NewHTTPClient(cfg.API.URL, cfg.API.Key) + client = db.NewHTTPClient(cfg.API.URL, cfg.API.Key) } else if cfg.Database.Path != "" { fmt.Printf("🗄️ Using database: %s\n", cfg.Database.Path) database, err := db.NewDatabase(cfg.Database.Path) @@ -110,7 +111,7 @@ func runEnhance(cmd *cobra.Command, args []string) { // Load site-specific configuration enhancementConfig := content.EnhancementConfig{ - Discovery: content.DiscoveryConfig{ + Discovery: config.DiscoveryConfig{ Enabled: false, // Default: disabled for explicit class="insertr" markings only Aggressive: false, Containers: true, diff --git a/cmd/serve.go b/cmd/serve.go index 383834f..5b5b161 100644 --- a/cmd/serve.go +++ b/cmd/serve.go @@ -17,9 +17,9 @@ import ( "github.com/insertr/insertr/internal/api" "github.com/insertr/insertr/internal/auth" "github.com/insertr/insertr/internal/config" - "github.com/insertr/insertr/internal/content" "github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/engine" + "github.com/insertr/insertr/internal/sites" ) var serveCmd = &cobra.Command{ @@ -76,14 +76,14 @@ func runServe(cmd *cobra.Command, args []string) { } // Create legacy auth config for compatibility - authConfig := &auth.AuthConfig{ + authConfig := &config.AuthConfig{ DevMode: cfg.Auth.DevMode, Provider: cfg.Auth.Provider, JWTSecret: cfg.Auth.JWTSecret, } if cfg.Auth.OIDC != nil { - authConfig.OIDC = &auth.OIDCConfig{ + authConfig.OIDC = &config.OIDCConfig{ Endpoint: cfg.Auth.OIDC.Endpoint, ClientID: cfg.Auth.OIDC.ClientID, ClientSecret: cfg.Auth.OIDC.ClientSecret, @@ -107,7 +107,7 @@ func runServe(cmd *cobra.Command, args []string) { // Initialize site manager with auth provider authProvider := &engine.AuthProvider{Type: cfg.Auth.Provider} - siteManager := content.NewSiteManagerWithAuth(contentClient, cfg.Auth.DevMode, authProvider) + siteManager := sites.NewSiteManagerWithAuth(contentClient, cfg.Auth.DevMode, authProvider) // Convert config sites to legacy format and register var legacySites []*config.SiteConfig @@ -120,7 +120,7 @@ func runServe(cmd *cobra.Command, args []string) { AutoEnhance: site.AutoEnhance, } if site.Discovery != nil { - legacySite.Discovery = &content.DiscoveryConfig{ + legacySite.Discovery = &config.DiscoveryConfig{ Enabled: site.Discovery.Enabled, Aggressive: site.Discovery.Aggressive, Containers: site.Discovery.Containers, diff --git a/internal/api/handlers.go b/internal/api/handlers.go index c2a0229..a6c3ed8 100644 --- a/internal/api/handlers.go +++ b/internal/api/handlers.go @@ -9,16 +9,16 @@ import ( "github.com/go-chi/chi/v5" "github.com/insertr/insertr/internal/auth" - "github.com/insertr/insertr/internal/content" "github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/engine" + "github.com/insertr/insertr/internal/sites" ) // ContentHandler handles all content-related HTTP requests type ContentHandler struct { repository db.ContentRepository authService *auth.AuthService - siteManager *content.SiteManager + siteManager *sites.SiteManager engine *engine.ContentEngine } @@ -36,7 +36,7 @@ func NewContentHandler(database *db.Database, authService *auth.AuthService) *Co } // SetSiteManager sets the site manager for file enhancement -func (h *ContentHandler) SetSiteManager(siteManager *content.SiteManager) { +func (h *ContentHandler) SetSiteManager(siteManager *sites.SiteManager) { h.siteManager = siteManager } diff --git a/internal/content/discoverer.go b/internal/content/discoverer.go deleted file mode 100644 index 22c8a08..0000000 --- a/internal/content/discoverer.go +++ /dev/null @@ -1,462 +0,0 @@ -package content - -import ( - "fmt" - "io/fs" - "os" - "path/filepath" - "strings" - - "github.com/insertr/insertr/internal/engine" - "golang.org/x/net/html" -) - -// Discoverer handles automatic discovery of editable elements in HTML -type Discoverer struct { - // Element discovery is now self-contained and configurable -} - -// NewDiscoverer creates a new Discoverer instance -func NewDiscoverer() *Discoverer { - return &Discoverer{} -} - -// DiscoveryResult contains statistics about element discovery -type DiscoveryResult struct { - FilesProcessed int - ElementsEnhanced int - ContainersAdded int - IndividualsAdded int - SkippedFiles []string - EnhancedFiles []string -} - -// DiscoverDirectory discovers editable elements in all HTML files in a directory -func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) { - result := &DiscoveryResult{ - SkippedFiles: []string{}, - EnhancedFiles: []string{}, - } - - // Create output directory if it doesn't exist - if err := os.MkdirAll(outputDir, 0755); err != nil { - return nil, fmt.Errorf("failed to create output directory: %w", err) - } - - err := filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - // Skip directories - if d.IsDir() { - return nil - } - - // Only process HTML files - if !strings.HasSuffix(strings.ToLower(path), ".html") { - // Copy non-HTML files as-is - return disc.copyFile(path, inputDir, outputDir) - } - - // Discover elements in HTML file - enhanced, err := disc.discoverFile(path, aggressive) - if err != nil { - result.SkippedFiles = append(result.SkippedFiles, path) - // Copy original file on error - return disc.copyFile(path, inputDir, outputDir) - } - - // Write enhanced file - outputPath := disc.getOutputPath(path, inputDir, outputDir) - if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil { - return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err) - } - - result.FilesProcessed++ - result.ElementsEnhanced += enhanced.ElementsEnhanced - result.ContainersAdded += enhanced.ContainersAdded - result.IndividualsAdded += enhanced.IndividualsAdded - result.EnhancedFiles = append(result.EnhancedFiles, outputPath) - - return nil - }) - - return result, err -} - -// FileDiscoveryResult contains details about a single file discovery -type FileDiscoveryResult struct { - ElementsEnhanced int - ContainersAdded int - IndividualsAdded int - SugarTransformed int // Count of syntactic sugar transformations - Document *html.Node -} - -// discoverFile processes a single HTML file and adds insertr classes -func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) { - file, err := os.Open(filePath) - if err != nil { - return nil, fmt.Errorf("error opening file: %w", err) - } - defer file.Close() - - doc, err := html.Parse(file) - if err != nil { - return nil, fmt.Errorf("error parsing HTML: %w", err) - } - - result := &FileDiscoveryResult{Document: doc} - - // Find candidates for enhancement - disc.discoverNode(doc, result, aggressive) - - return result, nil -} - -// discoverNode recursively discovers editable nodes in the document -func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) { - if node.Type != html.ElementNode { - // Recursively check children - for child := node.FirstChild; child != nil; child = child.NextSibling { - disc.discoverNode(child, result, aggressive) - } - return - } - - // Handle syntactic sugar: containers with .insertr class - if disc.hasInsertrClass(node) && disc.isGoodContainer(node) { - // Syntactic sugar transformation: remove .insertr from container, add to viable children - viableChildren := engine.FindViableChildren(node) - if len(viableChildren) >= 1 { - disc.removeInsertrClass(node) // Remove from container - for _, child := range viableChildren { - if !disc.hasInsertrClass(child) { - disc.addInsertrClass(child) - result.IndividualsAdded++ - result.ElementsEnhanced++ - } - } - result.ContainersAdded++ - result.SugarTransformed++ // Track sugar transformations - - // Don't process children since we just processed them - return - } - // If no viable children, leave .insertr on the element (individual editing) - return - } - - // Skip if already has insertr class (individual editing elements) - if disc.hasInsertrClass(node) { - return - } - - // Check if this is a container that should use expansion - if disc.isGoodContainer(node) { - viableChildren := engine.FindViableChildren(node) - if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) { - // Container expansion: add insertr class to each viable child, not the container - for _, child := range viableChildren { - if !disc.hasInsertrClass(child) { - disc.addInsertrClass(child) - result.IndividualsAdded++ - result.ElementsEnhanced++ - } - } - result.ContainersAdded++ - - // Don't process children since we just processed them - return - } - } - - // Check if this individual element should be enhanced - if disc.isGoodIndividualElement(node) { - disc.addInsertrClass(node) - result.IndividualsAdded++ - result.ElementsEnhanced++ - - // Don't process children of enhanced individual elements - return - } - - // Recursively check children - for child := node.FirstChild; child != nil; child = child.NextSibling { - disc.discoverNode(child, result, aggressive) - } -} - -// isGoodContainer checks if an element is a good candidate for container expansion -func (disc *Discoverer) isGoodContainer(node *html.Node) bool { - containerTags := map[string]bool{ - "div": true, - "section": true, - "article": true, - "header": true, - "footer": true, - "main": true, - "aside": true, - "nav": true, - } - - tag := strings.ToLower(node.Data) - if !containerTags[tag] { - return false - } - - // Skip containers that are clearly non-content - if disc.isNonContentElement(node) { - return false - } - - // Skip containers in the head section - if disc.isInHead(node) { - return false - } - - // Skip containers with technical/framework-specific classes that suggest they're not content - classes := disc.getClasses(node) - for _, class := range classes { - lowerClass := strings.ToLower(class) - // Skip Next.js internal classes and other framework artifacts - if strings.Contains(lowerClass, "__next") || - strings.Contains(lowerClass, "webpack") || - strings.Contains(lowerClass, "hydration") || - strings.Contains(lowerClass, "react") || - strings.Contains(lowerClass, "gatsby") { - return false - } - } - - return true -} - -// isGoodIndividualElement checks if an element is a good candidate for individual enhancement -func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool { - // Skip self-closing elements - if disc.isSelfClosing(node) { - return false - } - - // Skip non-content elements that should never be editable - if disc.isNonContentElement(node) { - return false - } - - // Skip elements inside head section - if disc.isInHead(node) { - return false - } - - // Skip elements with no meaningful content - if disc.hasNoMeaningfulContent(node) { - return false - } - - // Check if element has editable content - return disc.hasEditableContent(node) -} - -// hasEditableContent uses the engine's enhanced detection logic -func (disc *Discoverer) hasEditableContent(node *html.Node) bool { - return engine.HasEditableContent(node) -} - -// hasInsertrClass checks if a node already has the insertr class -func (disc *Discoverer) hasInsertrClass(node *html.Node) bool { - classes := disc.getClasses(node) - for _, class := range classes { - if class == "insertr" { - return true - } - } - return false -} - -// addInsertrClass adds the insertr class to a node -func (disc *Discoverer) addInsertrClass(node *html.Node) { - classes := disc.getClasses(node) - classes = append(classes, "insertr") - disc.setClasses(node, classes) -} - -// removeInsertrClass removes the insertr class from a node -func (disc *Discoverer) removeInsertrClass(node *html.Node) { - classes := disc.getClasses(node) - var filteredClasses []string - for _, class := range classes { - if class != "insertr" { - filteredClasses = append(filteredClasses, class) - } - } - disc.setClasses(node, filteredClasses) -} - -// getClasses extracts CSS classes from a node -func (disc *Discoverer) getClasses(node *html.Node) []string { - for i, attr := range node.Attr { - if attr.Key == "class" { - if attr.Val == "" { - return []string{} - } - return strings.Fields(attr.Val) - } - // Update existing class attribute - if attr.Key == "class" { - node.Attr[i] = attr - return strings.Fields(attr.Val) - } - } - return []string{} -} - -// setClasses sets CSS classes on a node -func (disc *Discoverer) setClasses(node *html.Node, classes []string) { - classValue := strings.Join(classes, " ") - - // Update existing class attribute or add new one - for i, attr := range node.Attr { - if attr.Key == "class" { - node.Attr[i].Val = classValue - return - } - } - - // Add new class attribute - node.Attr = append(node.Attr, html.Attribute{ - Key: "class", - Val: classValue, - }) -} - -// isSelfClosing checks if an element is self-closing -func (disc *Discoverer) isSelfClosing(node *html.Node) bool { - selfClosingTags := map[string]bool{ - "img": true, "input": true, "br": true, "hr": true, - "meta": true, "link": true, "area": true, "base": true, - "col": true, "embed": true, "source": true, "track": true, "wbr": true, - } - return selfClosingTags[strings.ToLower(node.Data)] -} - -// isNonContentElement checks if an element should never be editable -func (disc *Discoverer) isNonContentElement(node *html.Node) bool { - nonContentTags := map[string]bool{ - "script": true, // JavaScript code - "style": true, // CSS styles - "meta": true, // Metadata - "link": true, // Links to resources - "title": true, // Document title (handled separately) - "head": true, // Document head - "html": true, // Root element - "body": true, // Body element (too broad) - "noscript": true, // Fallback content - "template": true, // HTML templates - "svg": true, // SVG graphics (complex) - "canvas": true, // Canvas graphics - "iframe": true, // Embedded content - "object": true, // Embedded objects - "embed": true, // Embedded content - "video": true, // Video elements (complex) - "audio": true, // Audio elements (complex) - "map": true, // Image maps - "area": true, // Image map areas - "base": true, // Base URL - "col": true, // Table columns - "colgroup": true, // Table column groups - "track": true, // Video/audio tracks - "source": true, // Media sources - "param": true, // Object parameters - "wbr": true, // Word break opportunities - } - return nonContentTags[strings.ToLower(node.Data)] -} - -// isInHead checks if a node is inside the document head -func (disc *Discoverer) isInHead(node *html.Node) bool { - current := node.Parent - for current != nil { - if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" { - return true - } - current = current.Parent - } - return false -} - -// hasNoMeaningfulContent checks if an element has no meaningful text content -func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool { - if node.Type != html.ElementNode { - return true - } - - // Extract text content - content := engine.ExtractTextContent(node) - - // Empty or whitespace-only content - if content == "" { - return true - } - - // Very short content that's likely not meaningful - if len(content) < 2 { - return true - } - - // Content that looks like technical artifacts - technicalPatterns := []string{ - "$", "", "{", "}", "[", "]", - "function", "var ", "const ", "let ", "return", - "import", "export", "require", "module.exports", - "/*", "*/", "//", "", "<%", "%>", - } - - for _, pattern := range technicalPatterns { - if strings.Contains(content, pattern) { - return true - } - } - - return false -} - -// copyFile copies a file from input to output directory -func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error { - outputPath := disc.getOutputPath(filePath, inputDir, outputDir) - - // Create output directory for the file - if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { - return err - } - - input, err := os.ReadFile(filePath) - if err != nil { - return err - } - - return os.WriteFile(outputPath, input, 0644) -} - -// getOutputPath converts input path to output path -func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string { - relPath, _ := filepath.Rel(inputDir, filePath) - return filepath.Join(outputDir, relPath) -} - -// writeEnhancedFile writes the enhanced HTML document to a file -func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error { - // Create output directory - if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { - return err - } - - file, err := os.Create(outputPath) - if err != nil { - return err - } - defer file.Close() - - return html.Render(file, enhanced.Document) -} diff --git a/internal/content/enhancer.go b/internal/content/enhancer.go index 5013d12..5506b14 100644 --- a/internal/content/enhancer.go +++ b/internal/content/enhancer.go @@ -1,9 +1,6 @@ package content import ( - "fmt" - "golang.org/x/net/html" - "os" "path/filepath" "strings" @@ -14,46 +11,40 @@ import ( // EnhancementConfig configures the enhancement pipeline type EnhancementConfig struct { - Discovery DiscoveryConfig + Discovery config.DiscoveryConfig ContentInjection bool GenerateIDs bool } -// Type alias for backward compatibility -type DiscoveryConfig = config.DiscoveryConfig - // Enhancer combines discovery, ID generation, and content injection in unified pipeline type Enhancer struct { - engine *engine.ContentEngine - discoverer *Discoverer - config EnhancementConfig - siteID string + engine *engine.ContentEngine + config EnhancementConfig + siteID string } // NewEnhancer creates a new HTML enhancer with unified pipeline func NewEnhancer(client db.ContentRepository, siteID string, config EnhancementConfig) *Enhancer { return &Enhancer{ - engine: engine.NewContentEngine(client), - discoverer: NewDiscoverer(), - config: config, - siteID: siteID, + engine: engine.NewContentEngine(client), + config: config, + siteID: siteID, } } // NewEnhancerWithAuth creates a new HTML enhancer with auth provider func NewEnhancerWithAuth(client db.ContentRepository, siteID string, config EnhancementConfig, authProvider *engine.AuthProvider) *Enhancer { return &Enhancer{ - engine: engine.NewContentEngineWithAuth(client, authProvider), - discoverer: NewDiscoverer(), - config: config, - siteID: siteID, + engine: engine.NewContentEngineWithAuth(client, authProvider), + config: config, + siteID: siteID, } } // NewDefaultEnhancer creates an enhancer with default configuration func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer { defaultConfig := EnhancementConfig{ - Discovery: DiscoveryConfig{ + Discovery: config.DiscoveryConfig{ Enabled: true, Aggressive: false, Containers: true, @@ -65,138 +56,14 @@ func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer { return NewEnhancer(client, siteID, defaultConfig) } -// EnhanceFile processes a single HTML file through the complete pipeline +// EnhanceFile processes a single HTML file through the engine func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error { - // Read HTML file - htmlContent, err := os.ReadFile(inputPath) - if err != nil { - return fmt.Errorf("reading file %s: %w", inputPath, err) - } - - // Process through unified pipeline - processedHTML, err := e.processHTML(htmlContent, filepath.Base(inputPath)) - if err != nil { - return fmt.Errorf("processing HTML %s: %w", inputPath, err) - } - - // Create output directory - if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { - return fmt.Errorf("creating output directory: %w", err) - } - - // Write processed HTML - return os.WriteFile(outputPath, processedHTML, 0644) + return e.engine.ProcessFile(inputPath, outputPath, e.siteID, engine.Enhancement) } -// EnhanceDirectory processes all files in a directory through the unified pipeline +// EnhanceDirectory processes all files in a directory through the engine func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error { - // Create output directory - if err := os.MkdirAll(outputDir, 0755); err != nil { - return fmt.Errorf("creating output directory: %w", err) - } - - // Walk input directory - return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - // Calculate relative path and output path - relPath, err := filepath.Rel(inputDir, path) - if err != nil { - return err - } - outputPath := filepath.Join(outputDir, relPath) - - // Handle directories - if info.IsDir() { - return os.MkdirAll(outputPath, info.Mode()) - } - - // Process HTML files through enhancement pipeline - if strings.HasSuffix(strings.ToLower(path), ".html") { - return e.EnhanceFile(path, outputPath) - } - - // Copy non-HTML files as-is - return e.copyFile(path, outputPath) - }) -} - -// processHTML implements the unified enhancement pipeline -func (e *Enhancer) processHTML(htmlContent []byte, filePath string) ([]byte, error) { - var processedHTML []byte = htmlContent - - // Phase 1: Element Discovery (if enabled) - if e.config.Discovery.Enabled { - discoveredHTML, err := e.discoverElements(processedHTML, filePath) - if err != nil { - return nil, fmt.Errorf("element discovery: %w", err) - } - processedHTML = discoveredHTML - } - - // Phase 2 & 3: ID Generation + Content Injection (via engine) - if e.config.GenerateIDs || e.config.ContentInjection { - enhancedHTML, err := e.enhanceWithEngine(processedHTML, filePath) - if err != nil { - return nil, fmt.Errorf("engine enhancement: %w", err) - } - processedHTML = enhancedHTML - } - - return processedHTML, nil -} - -// discoverElements adds insertr classes to viable elements -func (e *Enhancer) discoverElements(htmlContent []byte, filePath string) ([]byte, error) { - // Parse HTML - doc, err := html.Parse(strings.NewReader(string(htmlContent))) - if err != nil { - return nil, fmt.Errorf("parsing HTML: %w", err) - } - - // Find and mark viable elements - result := &FileDiscoveryResult{Document: doc} - e.discoverer.discoverNode(doc, result, e.config.Discovery.Aggressive) - - // Render back to HTML - var buf strings.Builder - if err := html.Render(&buf, doc); err != nil { - return nil, fmt.Errorf("rendering HTML: %w", err) - } - - return []byte(buf.String()), nil -} - -// enhanceWithEngine uses the unified engine for ID generation and content injection -func (e *Enhancer) enhanceWithEngine(htmlContent []byte, filePath string) ([]byte, error) { - // Determine processing mode - var mode engine.ProcessMode - if e.config.ContentInjection { - mode = engine.Enhancement // ID generation + content injection - } else { - mode = engine.IDGeneration // ID generation only - } - - // Process with engine - result, err := e.engine.ProcessContent(engine.ContentInput{ - HTML: htmlContent, - FilePath: filePath, - SiteID: e.siteID, - Mode: mode, - }) - if err != nil { - return nil, fmt.Errorf("engine processing: %w", err) - } - - // Render enhanced document - var buf strings.Builder - if err := html.Render(&buf, result.Document); err != nil { - return nil, fmt.Errorf("rendering enhanced HTML: %w", err) - } - - return []byte(buf.String()), nil + return e.engine.ProcessDirectory(inputDir, outputDir, e.siteID, engine.Enhancement) } // SetSiteID sets the site ID for the enhancer @@ -261,20 +128,3 @@ func deriveDemoSiteID(sitePath string) string { return dirName } - -// copyFile copies a file from src to dst -func (e *Enhancer) copyFile(src, dst string) error { - // Create directory for destination - if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { - return err - } - - // Read source - data, err := os.ReadFile(src) - if err != nil { - return err - } - - // Write destination - return os.WriteFile(dst, data, 0644) -} diff --git a/internal/content/client.go b/internal/db/http_client.go similarity index 83% rename from internal/content/client.go rename to internal/db/http_client.go index 0221152..a0ad514 100644 --- a/internal/content/client.go +++ b/internal/db/http_client.go @@ -1,4 +1,4 @@ -package content +package db import ( "context" @@ -9,11 +9,9 @@ import ( "net/url" "strings" "time" - - "github.com/insertr/insertr/internal/db" ) -// HTTPClient implements db.ContentRepository for HTTP API access +// HTTPClient implements ContentRepository for HTTP API access type HTTPClient struct { BaseURL string APIKey string @@ -32,7 +30,7 @@ func NewHTTPClient(baseURL, apiKey string) *HTTPClient { } // GetContent fetches a single content item by ID -func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (*db.ContentItem, error) { +func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (*ContentItem, error) { url := fmt.Sprintf("%s/api/content/%s?site_id=%s", c.BaseURL, contentID, siteID) req, err := http.NewRequest("GET", url, nil) @@ -63,7 +61,7 @@ func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) ( return nil, fmt.Errorf("reading response: %w", err) } - var item db.ContentItem + var item ContentItem if err := json.Unmarshal(body, &item); err != nil { return nil, fmt.Errorf("parsing response: %w", err) } @@ -72,9 +70,9 @@ func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) ( } // GetBulkContent fetches multiple content items by IDs -func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentIDs []string) (map[string]db.ContentItem, error) { +func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentIDs []string) (map[string]ContentItem, error) { if len(contentIDs) == 0 { - return make(map[string]db.ContentItem), nil + return make(map[string]ContentItem), nil } // Build query parameters @@ -110,13 +108,13 @@ func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentI return nil, fmt.Errorf("reading response: %w", err) } - var response db.ContentResponse + var response ContentResponse if err := json.Unmarshal(body, &response); err != nil { return nil, fmt.Errorf("parsing response: %w", err) } // Convert slice to map for easy lookup - result := make(map[string]db.ContentItem) + result := make(map[string]ContentItem) for _, item := range response.Content { result[item.ID] = item } @@ -125,7 +123,7 @@ func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentI } // GetAllContent fetches all content for a site -func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[string]db.ContentItem, error) { +func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[string]ContentItem, error) { url := fmt.Sprintf("%s/api/content?site_id=%s", c.BaseURL, siteID) req, err := http.NewRequest("GET", url, nil) @@ -152,13 +150,13 @@ func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[stri return nil, fmt.Errorf("reading response: %w", err) } - var response db.ContentResponse + var response ContentResponse if err := json.Unmarshal(body, &response); err != nil { return nil, fmt.Errorf("parsing response: %w", err) } // Convert slice to map for easy lookup - result := make(map[string]db.ContentItem) + result := make(map[string]ContentItem) for _, item := range response.Content { result[item.ID] = item } @@ -167,50 +165,50 @@ func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[stri } // CreateContent creates a new content item via HTTP API -func (c *HTTPClient) CreateContent(ctx context.Context, siteID, contentID, htmlContent, originalTemplate, lastEditedBy string) (*db.ContentItem, error) { +func (c *HTTPClient) CreateContent(ctx context.Context, siteID, contentID, htmlContent, originalTemplate, lastEditedBy string) (*ContentItem, error) { // For now, HTTPClient CreateContent is not implemented for enhancer use // This would typically be used in API-driven enhancement scenarios return nil, fmt.Errorf("CreateContent not implemented for HTTPClient - use DatabaseClient for enhancement") } // Collection method stubs - TODO: Implement these for HTTP API -func (c *HTTPClient) GetCollection(ctx context.Context, siteID, collectionID string) (*db.CollectionItem, error) { +func (c *HTTPClient) GetCollection(ctx context.Context, siteID, collectionID string) (*CollectionItem, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) CreateCollection(ctx context.Context, siteID, collectionID, containerHTML, lastEditedBy string) (*db.CollectionItem, error) { +func (c *HTTPClient) CreateCollection(ctx context.Context, siteID, collectionID, containerHTML, lastEditedBy string) (*CollectionItem, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) GetCollectionItems(ctx context.Context, siteID, collectionID string) ([]db.CollectionItemWithTemplate, error) { +func (c *HTTPClient) GetCollectionItems(ctx context.Context, siteID, collectionID string) ([]CollectionItemWithTemplate, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) CreateCollectionTemplate(ctx context.Context, siteID, collectionID, name, htmlTemplate string, isDefault bool) (*db.CollectionTemplateItem, error) { +func (c *HTTPClient) CreateCollectionTemplate(ctx context.Context, siteID, collectionID, name, htmlTemplate string, isDefault bool) (*CollectionTemplateItem, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) GetCollectionTemplates(ctx context.Context, siteID, collectionID string) ([]db.CollectionTemplateItem, error) { +func (c *HTTPClient) GetCollectionTemplates(ctx context.Context, siteID, collectionID string) ([]CollectionTemplateItem, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) CreateCollectionItem(ctx context.Context, siteID, collectionID, itemID string, templateID int, htmlContent string, position int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) { +func (c *HTTPClient) CreateCollectionItem(ctx context.Context, siteID, collectionID, itemID string, templateID int, htmlContent string, position int, lastEditedBy string) (*CollectionItemWithTemplate, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) CreateCollectionItemAtomic(ctx context.Context, siteID, collectionID string, templateID int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) { +func (c *HTTPClient) CreateCollectionItemAtomic(ctx context.Context, siteID, collectionID string, templateID int, lastEditedBy string) (*CollectionItemWithTemplate, error) { return nil, fmt.Errorf("collection operations not implemented in HTTPClient") } -func (c *HTTPClient) UpdateContent(ctx context.Context, siteID, contentID, htmlContent, lastEditedBy string) (*db.ContentItem, error) { +func (c *HTTPClient) UpdateContent(ctx context.Context, siteID, contentID, htmlContent, lastEditedBy string) (*ContentItem, error) { return nil, fmt.Errorf("content update operations not implemented in HTTPClient") } -func (c *HTTPClient) ReorderCollectionItems(ctx context.Context, siteID, collectionID string, items []db.CollectionItemPosition, lastEditedBy string) error { +func (c *HTTPClient) ReorderCollectionItems(ctx context.Context, siteID, collectionID string, items []CollectionItemPosition, lastEditedBy string) error { return fmt.Errorf("collection reordering not implemented in HTTPClient") } // WithTransaction executes a function within a transaction (not supported for HTTP client) -func (c *HTTPClient) WithTransaction(ctx context.Context, fn func(db.ContentRepository) error) error { +func (c *HTTPClient) WithTransaction(ctx context.Context, fn func(ContentRepository) error) error { return fmt.Errorf("transactions not supported for HTTP client") } diff --git a/internal/engine/file.go b/internal/engine/file.go new file mode 100644 index 0000000..4a5f296 --- /dev/null +++ b/internal/engine/file.go @@ -0,0 +1,97 @@ +package engine + +import ( + "fmt" + "os" + "path/filepath" + "strings" + + "golang.org/x/net/html" +) + +// ProcessFile processes a single HTML file (following Go stdlib pattern like os.ReadFile/WriteFile) +func (e *ContentEngine) ProcessFile(inputPath, outputPath, siteID string, mode ProcessMode) error { + htmlContent, err := os.ReadFile(inputPath) + if err != nil { + return fmt.Errorf("reading file %s: %w", inputPath, err) + } + + result, err := e.ProcessContent(ContentInput{ + HTML: htmlContent, + FilePath: filepath.Base(inputPath), + SiteID: siteID, + Mode: mode, + }) + if err != nil { + return fmt.Errorf("processing content: %w", err) + } + + return writeHTMLDocument(outputPath, result.Document) +} + +// ProcessDirectory processes all HTML files in a directory (following filepath.Walk pattern) +func (e *ContentEngine) ProcessDirectory(inputDir, outputDir, siteID string, mode ProcessMode) error { + // Create output directory if it doesn't exist + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Calculate relative path for output + relPath, err := filepath.Rel(inputDir, path) + if err != nil { + return err + } + outputPath := filepath.Join(outputDir, relPath) + + // Handle directories + if info.IsDir() { + return os.MkdirAll(outputPath, info.Mode()) + } + + // Process HTML files + if strings.HasSuffix(strings.ToLower(path), ".html") { + return e.ProcessFile(path, outputPath, siteID, mode) + } + + // Copy non-HTML files as-is + return copyFile(path, outputPath) + }) +} + +// writeHTMLDocument writes an HTML document to a file +func writeHTMLDocument(outputPath string, doc *html.Node) error { + // Create output directory + if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + + file, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("creating output file: %w", err) + } + defer file.Close() + + return html.Render(file, doc) +} + +// copyFile copies a file from src to dst +func copyFile(src, dst string) error { + // Create directory for destination + if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil { + return err + } + + // Read source + data, err := os.ReadFile(src) + if err != nil { + return err + } + + // Write destination + return os.WriteFile(dst, data, 0644) +} diff --git a/internal/content/site_manager.go b/internal/sites/manager.go similarity index 94% rename from internal/content/site_manager.go rename to internal/sites/manager.go index f41f1ae..c44a6e8 100644 --- a/internal/content/site_manager.go +++ b/internal/sites/manager.go @@ -1,4 +1,4 @@ -package content +package sites import ( "fmt" @@ -9,6 +9,7 @@ import ( "sync" "github.com/insertr/insertr/internal/config" + "github.com/insertr/insertr/internal/content" "github.com/insertr/insertr/internal/db" "github.com/insertr/insertr/internal/engine" "maps" @@ -17,7 +18,7 @@ import ( // SiteManager handles registration and enhancement of static sites type SiteManager struct { sites map[string]*config.SiteConfig - enhancer *Enhancer + enhancer *content.Enhancer mutex sync.RWMutex devMode bool contentClient db.ContentRepository @@ -28,7 +29,7 @@ type SiteManager struct { func NewSiteManager(contentClient db.ContentRepository, devMode bool) *SiteManager { return &SiteManager{ sites: make(map[string]*config.SiteConfig), - enhancer: NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation + enhancer: content.NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation devMode: devMode, contentClient: contentClient, authProvider: &engine.AuthProvider{Type: "mock"}, // default @@ -152,7 +153,7 @@ func (sm *SiteManager) EnhanceSite(siteID string) error { // Create enhancer with auth provider for this operation // Discovery disabled by default - developers should explicitly mark elements with class="insertr" - discoveryConfig := DiscoveryConfig{ + discoveryConfig := config.DiscoveryConfig{ Enabled: false, // Changed from true - respect developer intent Aggressive: false, Containers: true, @@ -166,12 +167,12 @@ func (sm *SiteManager) EnhanceSite(siteID string) error { siteID, discoveryConfig.Enabled, discoveryConfig.Aggressive) } - config := EnhancementConfig{ + config := content.EnhancementConfig{ Discovery: discoveryConfig, ContentInjection: true, GenerateIDs: true, } - enhancer := NewEnhancerWithAuth(sm.contentClient, siteID, config, sm.authProvider) + enhancer := content.NewEnhancerWithAuth(sm.contentClient, siteID, config, sm.authProvider) // Perform enhancement from source to output if err := enhancer.EnhanceDirectory(sourcePath, outputPath); err != nil {