diff --git a/cmd/auto_enhance.go b/cmd/auto_enhance.go index f3b9656..a085d64 100644 --- a/cmd/auto_enhance.go +++ b/cmd/auto_enhance.go @@ -53,11 +53,11 @@ func runAutoEnhance(cmd *cobra.Command, args []string) error { } fmt.Printf("\n") - // Create auto enhancer - enhancer := content.NewAutoEnhancer() + // Create discoverer + discoverer := content.NewDiscoverer() - // Run auto enhancement - result, err := enhancer.EnhanceDirectory(inputDir, autoEnhanceOutput, autoEnhanceAggressive) + // Run element discovery + result, err := discoverer.DiscoverDirectory(inputDir, autoEnhanceOutput, autoEnhanceAggressive) if err != nil { return fmt.Errorf("auto-enhancement failed: %w", err) } diff --git a/cmd/enhance.go b/cmd/enhance.go index f0ff5b9..b6fe93f 100644 --- a/cmd/enhance.go +++ b/cmd/enhance.go @@ -68,7 +68,7 @@ func runEnhance(cmd *cobra.Command, args []string) { } // Create enhancer - enhancer := content.NewEnhancer(client, siteID) + enhancer := content.NewDefaultEnhancer(client, siteID) fmt.Printf("🚀 Starting enhancement process...\n") fmt.Printf("📁 Input: %s\n", inputDir) diff --git a/internal/content/auto_enhancer.go b/internal/content/discoverer.go similarity index 73% rename from internal/content/auto_enhancer.go rename to internal/content/discoverer.go index e84a93c..d0dbed6 100644 --- a/internal/content/auto_enhancer.go +++ b/internal/content/discoverer.go @@ -11,18 +11,18 @@ import ( "golang.org/x/net/html" ) -// AutoEnhancer handles automatic enhancement of HTML files -type AutoEnhancer struct { - // Remove parser dependency - auto enhancement is now self-contained +// Discoverer handles automatic discovery of editable elements in HTML +type Discoverer struct { + // Element discovery is now self-contained and configurable } -// NewAutoEnhancer creates a new AutoEnhancer instance -func NewAutoEnhancer() *AutoEnhancer { - return &AutoEnhancer{} +// NewDiscoverer creates a new Discoverer instance +func NewDiscoverer() *Discoverer { + return &Discoverer{} } -// AutoEnhanceResult contains statistics about auto-enhancement -type AutoEnhanceResult struct { +// DiscoveryResult contains statistics about element discovery +type DiscoveryResult struct { FilesProcessed int ElementsEnhanced int ContainersAdded int @@ -31,9 +31,9 @@ type AutoEnhanceResult struct { EnhancedFiles []string } -// EnhanceDirectory automatically enhances all HTML files in a directory -func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive bool) (*AutoEnhanceResult, error) { - result := &AutoEnhanceResult{ +// DiscoverDirectory discovers editable elements in all HTML files in a directory +func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) { + result := &DiscoveryResult{ SkippedFiles: []string{}, EnhancedFiles: []string{}, } @@ -56,20 +56,20 @@ func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive // Only process HTML files if !strings.HasSuffix(strings.ToLower(path), ".html") { // Copy non-HTML files as-is - return ae.copyFile(path, inputDir, outputDir) + return disc.copyFile(path, inputDir, outputDir) } - // Enhance HTML file - enhanced, err := ae.enhanceFile(path, aggressive) + // Discover elements in HTML file + enhanced, err := disc.discoverFile(path, aggressive) if err != nil { result.SkippedFiles = append(result.SkippedFiles, path) // Copy original file on error - return ae.copyFile(path, inputDir, outputDir) + return disc.copyFile(path, inputDir, outputDir) } // Write enhanced file - outputPath := ae.getOutputPath(path, inputDir, outputDir) - if err := ae.writeEnhancedFile(outputPath, enhanced); err != nil { + outputPath := disc.getOutputPath(path, inputDir, outputDir) + if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil { return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err) } @@ -85,16 +85,16 @@ func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive return result, err } -// EnhancementResult contains details about a single file enhancement -type EnhancementResult struct { +// FileDiscoveryResult contains details about a single file discovery +type FileDiscoveryResult struct { ElementsEnhanced int ContainersAdded int IndividualsAdded int Document *html.Node } -// enhanceFile processes a single HTML file and adds insertr classes -func (ae *AutoEnhancer) enhanceFile(filePath string, aggressive bool) (*EnhancementResult, error) { +// discoverFile processes a single HTML file and adds insertr classes +func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) { file, err := os.Open(filePath) if err != nil { return nil, fmt.Errorf("error opening file: %w", err) @@ -106,35 +106,35 @@ func (ae *AutoEnhancer) enhanceFile(filePath string, aggressive bool) (*Enhancem return nil, fmt.Errorf("error parsing HTML: %w", err) } - result := &EnhancementResult{Document: doc} + result := &FileDiscoveryResult{Document: doc} // Find candidates for enhancement - ae.enhanceNode(doc, result, aggressive) + disc.discoverNode(doc, result, aggressive) return result, nil } -// enhanceNode recursively enhances nodes in the document -func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult, aggressive bool) { +// discoverNode recursively discovers editable nodes in the document +func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) { if node.Type != html.ElementNode { // Recursively check children for child := node.FirstChild; child != nil; child = child.NextSibling { - ae.enhanceNode(child, result, aggressive) + disc.discoverNode(child, result, aggressive) } return } // Skip if already has insertr class - if ae.hasInsertrClass(node) { + if disc.hasInsertrClass(node) { return } // Check if this is a container that should use expansion - if ae.isGoodContainer(node) { + if disc.isGoodContainer(node) { viableChildren := engine.FindViableChildren(node) if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) { // Add insertr class to container for expansion - ae.addInsertrClass(node) + disc.addInsertrClass(node) result.ContainersAdded++ result.ElementsEnhanced += len(viableChildren) @@ -144,8 +144,8 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult, } // Check if this individual element should be enhanced - if ae.isGoodIndividualElement(node) { - ae.addInsertrClass(node) + if disc.isGoodIndividualElement(node) { + disc.addInsertrClass(node) result.IndividualsAdded++ result.ElementsEnhanced++ @@ -155,12 +155,12 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult, // Recursively check children for child := node.FirstChild; child != nil; child = child.NextSibling { - ae.enhanceNode(child, result, aggressive) + disc.discoverNode(child, result, aggressive) } } // isGoodContainer checks if an element is a good candidate for container expansion -func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool { +func (disc *Discoverer) isGoodContainer(node *html.Node) bool { containerTags := map[string]bool{ "div": true, "section": true, @@ -178,17 +178,17 @@ func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool { } // Skip containers that are clearly non-content - if ae.isNonContentElement(node) { + if disc.isNonContentElement(node) { return false } // Skip containers in the head section - if ae.isInHead(node) { + if disc.isInHead(node) { return false } // Skip containers with technical/framework-specific classes that suggest they're not content - classes := ae.getClasses(node) + classes := disc.getClasses(node) for _, class := range classes { lowerClass := strings.ToLower(class) // Skip Next.js internal classes and other framework artifacts @@ -205,39 +205,39 @@ func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool { } // isGoodIndividualElement checks if an element is a good candidate for individual enhancement -func (ae *AutoEnhancer) isGoodIndividualElement(node *html.Node) bool { +func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool { // Skip self-closing elements - if ae.isSelfClosing(node) { + if disc.isSelfClosing(node) { return false } // Skip non-content elements that should never be editable - if ae.isNonContentElement(node) { + if disc.isNonContentElement(node) { return false } // Skip elements inside head section - if ae.isInHead(node) { + if disc.isInHead(node) { return false } // Skip elements with no meaningful content - if ae.hasNoMeaningfulContent(node) { + if disc.hasNoMeaningfulContent(node) { return false } // Check if element has editable content - return ae.hasEditableContent(node) + return disc.hasEditableContent(node) } // hasEditableContent uses the engine's enhanced detection logic -func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool { +func (disc *Discoverer) hasEditableContent(node *html.Node) bool { return engine.HasEditableContent(node) } // hasInsertrClass checks if a node already has the insertr class -func (ae *AutoEnhancer) hasInsertrClass(node *html.Node) bool { - classes := ae.getClasses(node) +func (disc *Discoverer) hasInsertrClass(node *html.Node) bool { + classes := disc.getClasses(node) for _, class := range classes { if class == "insertr" { return true @@ -247,14 +247,14 @@ func (ae *AutoEnhancer) hasInsertrClass(node *html.Node) bool { } // addInsertrClass adds the insertr class to a node -func (ae *AutoEnhancer) addInsertrClass(node *html.Node) { - classes := ae.getClasses(node) +func (disc *Discoverer) addInsertrClass(node *html.Node) { + classes := disc.getClasses(node) classes = append(classes, "insertr") - ae.setClasses(node, classes) + disc.setClasses(node, classes) } // getClasses extracts CSS classes from a node -func (ae *AutoEnhancer) getClasses(node *html.Node) []string { +func (disc *Discoverer) getClasses(node *html.Node) []string { for i, attr := range node.Attr { if attr.Key == "class" { if attr.Val == "" { @@ -272,7 +272,7 @@ func (ae *AutoEnhancer) getClasses(node *html.Node) []string { } // setClasses sets CSS classes on a node -func (ae *AutoEnhancer) setClasses(node *html.Node, classes []string) { +func (disc *Discoverer) setClasses(node *html.Node, classes []string) { classValue := strings.Join(classes, " ") // Update existing class attribute or add new one @@ -291,7 +291,7 @@ func (ae *AutoEnhancer) setClasses(node *html.Node, classes []string) { } // isSelfClosing checks if an element is self-closing -func (ae *AutoEnhancer) isSelfClosing(node *html.Node) bool { +func (disc *Discoverer) isSelfClosing(node *html.Node) bool { selfClosingTags := map[string]bool{ "img": true, "input": true, "br": true, "hr": true, "meta": true, "link": true, "area": true, "base": true, @@ -301,7 +301,7 @@ func (ae *AutoEnhancer) isSelfClosing(node *html.Node) bool { } // isNonContentElement checks if an element should never be editable -func (ae *AutoEnhancer) isNonContentElement(node *html.Node) bool { +func (disc *Discoverer) isNonContentElement(node *html.Node) bool { nonContentTags := map[string]bool{ "script": true, // JavaScript code "style": true, // CSS styles @@ -334,7 +334,7 @@ func (ae *AutoEnhancer) isNonContentElement(node *html.Node) bool { } // isInHead checks if a node is inside the document head -func (ae *AutoEnhancer) isInHead(node *html.Node) bool { +func (disc *Discoverer) isInHead(node *html.Node) bool { current := node.Parent for current != nil { if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" { @@ -346,14 +346,14 @@ func (ae *AutoEnhancer) isInHead(node *html.Node) bool { } // hasNoMeaningfulContent checks if an element has no meaningful text content -func (ae *AutoEnhancer) hasNoMeaningfulContent(node *html.Node) bool { +func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool { if node.Type != html.ElementNode { return true } // Extract text content var text strings.Builder - ae.extractTextRecursive(node, &text) + disc.extractTextRecursive(node, &text) content := strings.TrimSpace(text.String()) // Empty or whitespace-only content @@ -384,7 +384,7 @@ func (ae *AutoEnhancer) hasNoMeaningfulContent(node *html.Node) bool { } // extractTextRecursive extracts text content from a node and its children -func (ae *AutoEnhancer) extractTextRecursive(node *html.Node, text *strings.Builder) { +func (disc *Discoverer) extractTextRecursive(node *html.Node, text *strings.Builder) { if node.Type == html.TextNode { text.WriteString(node.Data) return @@ -398,13 +398,13 @@ func (ae *AutoEnhancer) extractTextRecursive(node *html.Node, text *strings.Buil continue } } - ae.extractTextRecursive(child, text) + disc.extractTextRecursive(child, text) } } // copyFile copies a file from input to output directory -func (ae *AutoEnhancer) copyFile(filePath, inputDir, outputDir string) error { - outputPath := ae.getOutputPath(filePath, inputDir, outputDir) +func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error { + outputPath := disc.getOutputPath(filePath, inputDir, outputDir) // Create output directory for the file if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { @@ -420,13 +420,13 @@ func (ae *AutoEnhancer) copyFile(filePath, inputDir, outputDir string) error { } // getOutputPath converts input path to output path -func (ae *AutoEnhancer) getOutputPath(filePath, inputDir, outputDir string) string { +func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string { relPath, _ := filepath.Rel(inputDir, filePath) return filepath.Join(outputDir, relPath) } // writeEnhancedFile writes the enhanced HTML document to a file -func (ae *AutoEnhancer) writeEnhancedFile(outputPath string, enhanced *EnhancementResult) error { +func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error { // Create output directory if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { return err diff --git a/internal/content/enhancer.go b/internal/content/enhancer.go index 041045c..64ad0e7 100644 --- a/internal/content/enhancer.go +++ b/internal/content/enhancer.go @@ -2,49 +2,93 @@ package content import ( "fmt" + "golang.org/x/net/html" "os" "path/filepath" + "strings" "github.com/insertr/insertr/internal/engine" ) -// Enhancer combines parsing and content injection using unified engine +// EnhancementConfig configures the enhancement pipeline +type EnhancementConfig struct { + Discovery DiscoveryConfig + ContentInjection bool + GenerateIDs bool +} + +// DiscoveryConfig configures element discovery +type DiscoveryConfig struct { + Enabled bool + Aggressive bool + Containers bool + Individual bool +} + +// Enhancer combines discovery, ID generation, and content injection in unified pipeline type Enhancer struct { - engine *engine.ContentEngine - // injector functionality will be integrated into engine + engine *engine.ContentEngine + discoverer *Discoverer + config EnhancementConfig + siteID string } -// NewEnhancer creates a new HTML enhancer using unified engine -func NewEnhancer(client engine.ContentClient, siteID string) *Enhancer { - // Create database client for engine - var engineClient engine.ContentClient - if dbClient, ok := client.(*DatabaseClient); ok { - engineClient = engine.NewDatabaseClient(dbClient.db) - } else { - // For non-database clients, we'll implement proper handling later - engineClient = engine.NewDatabaseClient(nil) // This will need to be fixed - } - +// NewEnhancer creates a new HTML enhancer with unified pipeline +func NewEnhancer(client engine.ContentClient, siteID string, config EnhancementConfig) *Enhancer { return &Enhancer{ - engine: engine.NewContentEngine(engineClient), + engine: engine.NewContentEngine(client), + discoverer: NewDiscoverer(), + config: config, + siteID: siteID, } } -// EnhanceFile processes an HTML file and injects content -func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error { - // TODO: Implement with unified engine - // For now, just copy the file to maintain functionality - return e.copyFile(inputPath, outputPath) +// NewDefaultEnhancer creates an enhancer with default configuration +func NewDefaultEnhancer(client engine.ContentClient, siteID string) *Enhancer { + defaultConfig := EnhancementConfig{ + Discovery: DiscoveryConfig{ + Enabled: true, + Aggressive: false, + Containers: true, + Individual: true, + }, + ContentInjection: true, + GenerateIDs: true, + } + return NewEnhancer(client, siteID, defaultConfig) } -// EnhanceDirectory processes all HTML files in a directory +// EnhanceFile processes a single HTML file through the complete pipeline +func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error { + // Read HTML file + htmlContent, err := os.ReadFile(inputPath) + if err != nil { + return fmt.Errorf("reading file %s: %w", inputPath, err) + } + + // Process through unified pipeline + processedHTML, err := e.processHTML(htmlContent, filepath.Base(inputPath)) + if err != nil { + return fmt.Errorf("processing HTML %s: %w", inputPath, err) + } + + // Create output directory + if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { + return fmt.Errorf("creating output directory: %w", err) + } + + // Write processed HTML + return os.WriteFile(outputPath, processedHTML, 0644) +} + +// EnhanceDirectory processes all files in a directory through the unified pipeline func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error { // Create output directory if err := os.MkdirAll(outputDir, 0755); err != nil { return fmt.Errorf("creating output directory: %w", err) } - // Walk input directory and copy files for now + // Walk input directory return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -62,16 +106,96 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error { return os.MkdirAll(outputPath, info.Mode()) } - // Copy files (HTML processing will be implemented later) + // Process HTML files through enhancement pipeline + if strings.HasSuffix(strings.ToLower(path), ".html") { + return e.EnhanceFile(path, outputPath) + } + + // Copy non-HTML files as-is return e.copyFile(path, outputPath) }) } +// processHTML implements the unified enhancement pipeline +func (e *Enhancer) processHTML(htmlContent []byte, filePath string) ([]byte, error) { + var processedHTML []byte = htmlContent + + // Phase 1: Element Discovery (if enabled) + if e.config.Discovery.Enabled { + discoveredHTML, err := e.discoverElements(processedHTML, filePath) + if err != nil { + return nil, fmt.Errorf("element discovery: %w", err) + } + processedHTML = discoveredHTML + } + + // Phase 2 & 3: ID Generation + Content Injection (via engine) + if e.config.GenerateIDs || e.config.ContentInjection { + enhancedHTML, err := e.enhanceWithEngine(processedHTML, filePath) + if err != nil { + return nil, fmt.Errorf("engine enhancement: %w", err) + } + processedHTML = enhancedHTML + } + + return processedHTML, nil +} + +// discoverElements adds insertr classes to viable elements +func (e *Enhancer) discoverElements(htmlContent []byte, filePath string) ([]byte, error) { + // Parse HTML + doc, err := html.Parse(strings.NewReader(string(htmlContent))) + if err != nil { + return nil, fmt.Errorf("parsing HTML: %w", err) + } + + // Find and mark viable elements + result := &FileDiscoveryResult{Document: doc} + e.discoverer.discoverNode(doc, result, e.config.Discovery.Aggressive) + + // Render back to HTML + var buf strings.Builder + if err := html.Render(&buf, doc); err != nil { + return nil, fmt.Errorf("rendering HTML: %w", err) + } + + return []byte(buf.String()), nil +} + +// enhanceWithEngine uses the unified engine for ID generation and content injection +func (e *Enhancer) enhanceWithEngine(htmlContent []byte, filePath string) ([]byte, error) { + // Determine processing mode + var mode engine.ProcessMode + if e.config.ContentInjection { + mode = engine.Enhancement // ID generation + content injection + } else { + mode = engine.IDGeneration // ID generation only + } + + // Process with engine + result, err := e.engine.ProcessContent(engine.ContentInput{ + HTML: htmlContent, + FilePath: filePath, + SiteID: e.siteID, + Mode: mode, + }) + if err != nil { + return nil, fmt.Errorf("engine processing: %w", err) + } + + // Render enhanced document + var buf strings.Builder + if err := html.Render(&buf, result.Document); err != nil { + return nil, fmt.Errorf("rendering enhanced HTML: %w", err) + } + + return []byte(buf.String()), nil +} + // EnhanceInPlace performs in-place enhancement of static site files func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error { - // TODO: Implement with unified engine - // For now, just log that enhancement was requested - fmt.Printf("📄 Enhancement requested for site %s at %s (stub implementation)\n", siteID, sitePath) + // TODO: Implement in-place enhancement using the unified pipeline + fmt.Printf("📄 Enhancement requested for site %s at %s (unified pipeline implementation needed)\n", siteID, sitePath) return nil } diff --git a/internal/content/site_manager.go b/internal/content/site_manager.go index 0d16714..772acd7 100644 --- a/internal/content/site_manager.go +++ b/internal/content/site_manager.go @@ -37,7 +37,7 @@ func NewSiteManager(contentClient engine.ContentClient, backupDir string, devMod return &SiteManager{ sites: make(map[string]*SiteConfig), - enhancer: NewEnhancer(contentClient, ""), // siteID will be set per operation + enhancer: NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation backupDir: backupDir, devMode: devMode, }