Implement unified Discoverer + Enhancer architecture

- Rename AutoEnhancer to Discoverer with clear element discovery focus
- Implement unified enhancement pipeline in Enhancer:
  * Phase 1: Element Discovery (configurable, respects existing insertr classes)
  * Phase 2: ID Generation via engine
  * Phase 3: Content Injection via engine
- Add EnhancementConfig and DiscoveryConfig for flexible configuration
- Update all method names and references (discoverNode, DiscoveryResult, etc.)
- Support both manual class insertion and automatic discovery
- Maintain single enhance command interface while providing unified internal pipeline
- Update all constructors to use new configuration-based approach

This establishes the clean Discoverer + Enhancer architecture discussed, with discovery as configurable first phase and enhancement as unified pipeline.
This commit is contained in:
2025-09-16 16:50:07 +02:00
parent d877366be0
commit 35855ac0f5
5 changed files with 217 additions and 93 deletions

View File

@@ -53,11 +53,11 @@ func runAutoEnhance(cmd *cobra.Command, args []string) error {
}
fmt.Printf("\n")
// Create auto enhancer
enhancer := content.NewAutoEnhancer()
// Create discoverer
discoverer := content.NewDiscoverer()
// Run auto enhancement
result, err := enhancer.EnhanceDirectory(inputDir, autoEnhanceOutput, autoEnhanceAggressive)
// Run element discovery
result, err := discoverer.DiscoverDirectory(inputDir, autoEnhanceOutput, autoEnhanceAggressive)
if err != nil {
return fmt.Errorf("auto-enhancement failed: %w", err)
}

View File

@@ -68,7 +68,7 @@ func runEnhance(cmd *cobra.Command, args []string) {
}
// Create enhancer
enhancer := content.NewEnhancer(client, siteID)
enhancer := content.NewDefaultEnhancer(client, siteID)
fmt.Printf("🚀 Starting enhancement process...\n")
fmt.Printf("📁 Input: %s\n", inputDir)

View File

@@ -11,18 +11,18 @@ import (
"golang.org/x/net/html"
)
// AutoEnhancer handles automatic enhancement of HTML files
type AutoEnhancer struct {
// Remove parser dependency - auto enhancement is now self-contained
// Discoverer handles automatic discovery of editable elements in HTML
type Discoverer struct {
// Element discovery is now self-contained and configurable
}
// NewAutoEnhancer creates a new AutoEnhancer instance
func NewAutoEnhancer() *AutoEnhancer {
return &AutoEnhancer{}
// NewDiscoverer creates a new Discoverer instance
func NewDiscoverer() *Discoverer {
return &Discoverer{}
}
// AutoEnhanceResult contains statistics about auto-enhancement
type AutoEnhanceResult struct {
// DiscoveryResult contains statistics about element discovery
type DiscoveryResult struct {
FilesProcessed int
ElementsEnhanced int
ContainersAdded int
@@ -31,9 +31,9 @@ type AutoEnhanceResult struct {
EnhancedFiles []string
}
// EnhanceDirectory automatically enhances all HTML files in a directory
func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive bool) (*AutoEnhanceResult, error) {
result := &AutoEnhanceResult{
// DiscoverDirectory discovers editable elements in all HTML files in a directory
func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) {
result := &DiscoveryResult{
SkippedFiles: []string{},
EnhancedFiles: []string{},
}
@@ -56,20 +56,20 @@ func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive
// Only process HTML files
if !strings.HasSuffix(strings.ToLower(path), ".html") {
// Copy non-HTML files as-is
return ae.copyFile(path, inputDir, outputDir)
return disc.copyFile(path, inputDir, outputDir)
}
// Enhance HTML file
enhanced, err := ae.enhanceFile(path, aggressive)
// Discover elements in HTML file
enhanced, err := disc.discoverFile(path, aggressive)
if err != nil {
result.SkippedFiles = append(result.SkippedFiles, path)
// Copy original file on error
return ae.copyFile(path, inputDir, outputDir)
return disc.copyFile(path, inputDir, outputDir)
}
// Write enhanced file
outputPath := ae.getOutputPath(path, inputDir, outputDir)
if err := ae.writeEnhancedFile(outputPath, enhanced); err != nil {
outputPath := disc.getOutputPath(path, inputDir, outputDir)
if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil {
return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err)
}
@@ -85,16 +85,16 @@ func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive
return result, err
}
// EnhancementResult contains details about a single file enhancement
type EnhancementResult struct {
// FileDiscoveryResult contains details about a single file discovery
type FileDiscoveryResult struct {
ElementsEnhanced int
ContainersAdded int
IndividualsAdded int
Document *html.Node
}
// enhanceFile processes a single HTML file and adds insertr classes
func (ae *AutoEnhancer) enhanceFile(filePath string, aggressive bool) (*EnhancementResult, error) {
// discoverFile processes a single HTML file and adds insertr classes
func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, fmt.Errorf("error opening file: %w", err)
@@ -106,35 +106,35 @@ func (ae *AutoEnhancer) enhanceFile(filePath string, aggressive bool) (*Enhancem
return nil, fmt.Errorf("error parsing HTML: %w", err)
}
result := &EnhancementResult{Document: doc}
result := &FileDiscoveryResult{Document: doc}
// Find candidates for enhancement
ae.enhanceNode(doc, result, aggressive)
disc.discoverNode(doc, result, aggressive)
return result, nil
}
// enhanceNode recursively enhances nodes in the document
func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult, aggressive bool) {
// discoverNode recursively discovers editable nodes in the document
func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) {
if node.Type != html.ElementNode {
// Recursively check children
for child := node.FirstChild; child != nil; child = child.NextSibling {
ae.enhanceNode(child, result, aggressive)
disc.discoverNode(child, result, aggressive)
}
return
}
// Skip if already has insertr class
if ae.hasInsertrClass(node) {
if disc.hasInsertrClass(node) {
return
}
// Check if this is a container that should use expansion
if ae.isGoodContainer(node) {
if disc.isGoodContainer(node) {
viableChildren := engine.FindViableChildren(node)
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
// Add insertr class to container for expansion
ae.addInsertrClass(node)
disc.addInsertrClass(node)
result.ContainersAdded++
result.ElementsEnhanced += len(viableChildren)
@@ -144,8 +144,8 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult,
}
// Check if this individual element should be enhanced
if ae.isGoodIndividualElement(node) {
ae.addInsertrClass(node)
if disc.isGoodIndividualElement(node) {
disc.addInsertrClass(node)
result.IndividualsAdded++
result.ElementsEnhanced++
@@ -155,12 +155,12 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult,
// Recursively check children
for child := node.FirstChild; child != nil; child = child.NextSibling {
ae.enhanceNode(child, result, aggressive)
disc.discoverNode(child, result, aggressive)
}
}
// isGoodContainer checks if an element is a good candidate for container expansion
func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool {
func (disc *Discoverer) isGoodContainer(node *html.Node) bool {
containerTags := map[string]bool{
"div": true,
"section": true,
@@ -178,17 +178,17 @@ func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool {
}
// Skip containers that are clearly non-content
if ae.isNonContentElement(node) {
if disc.isNonContentElement(node) {
return false
}
// Skip containers in the head section
if ae.isInHead(node) {
if disc.isInHead(node) {
return false
}
// Skip containers with technical/framework-specific classes that suggest they're not content
classes := ae.getClasses(node)
classes := disc.getClasses(node)
for _, class := range classes {
lowerClass := strings.ToLower(class)
// Skip Next.js internal classes and other framework artifacts
@@ -205,39 +205,39 @@ func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool {
}
// isGoodIndividualElement checks if an element is a good candidate for individual enhancement
func (ae *AutoEnhancer) isGoodIndividualElement(node *html.Node) bool {
func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool {
// Skip self-closing elements
if ae.isSelfClosing(node) {
if disc.isSelfClosing(node) {
return false
}
// Skip non-content elements that should never be editable
if ae.isNonContentElement(node) {
if disc.isNonContentElement(node) {
return false
}
// Skip elements inside head section
if ae.isInHead(node) {
if disc.isInHead(node) {
return false
}
// Skip elements with no meaningful content
if ae.hasNoMeaningfulContent(node) {
if disc.hasNoMeaningfulContent(node) {
return false
}
// Check if element has editable content
return ae.hasEditableContent(node)
return disc.hasEditableContent(node)
}
// hasEditableContent uses the engine's enhanced detection logic
func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool {
func (disc *Discoverer) hasEditableContent(node *html.Node) bool {
return engine.HasEditableContent(node)
}
// hasInsertrClass checks if a node already has the insertr class
func (ae *AutoEnhancer) hasInsertrClass(node *html.Node) bool {
classes := ae.getClasses(node)
func (disc *Discoverer) hasInsertrClass(node *html.Node) bool {
classes := disc.getClasses(node)
for _, class := range classes {
if class == "insertr" {
return true
@@ -247,14 +247,14 @@ func (ae *AutoEnhancer) hasInsertrClass(node *html.Node) bool {
}
// addInsertrClass adds the insertr class to a node
func (ae *AutoEnhancer) addInsertrClass(node *html.Node) {
classes := ae.getClasses(node)
func (disc *Discoverer) addInsertrClass(node *html.Node) {
classes := disc.getClasses(node)
classes = append(classes, "insertr")
ae.setClasses(node, classes)
disc.setClasses(node, classes)
}
// getClasses extracts CSS classes from a node
func (ae *AutoEnhancer) getClasses(node *html.Node) []string {
func (disc *Discoverer) getClasses(node *html.Node) []string {
for i, attr := range node.Attr {
if attr.Key == "class" {
if attr.Val == "" {
@@ -272,7 +272,7 @@ func (ae *AutoEnhancer) getClasses(node *html.Node) []string {
}
// setClasses sets CSS classes on a node
func (ae *AutoEnhancer) setClasses(node *html.Node, classes []string) {
func (disc *Discoverer) setClasses(node *html.Node, classes []string) {
classValue := strings.Join(classes, " ")
// Update existing class attribute or add new one
@@ -291,7 +291,7 @@ func (ae *AutoEnhancer) setClasses(node *html.Node, classes []string) {
}
// isSelfClosing checks if an element is self-closing
func (ae *AutoEnhancer) isSelfClosing(node *html.Node) bool {
func (disc *Discoverer) isSelfClosing(node *html.Node) bool {
selfClosingTags := map[string]bool{
"img": true, "input": true, "br": true, "hr": true,
"meta": true, "link": true, "area": true, "base": true,
@@ -301,7 +301,7 @@ func (ae *AutoEnhancer) isSelfClosing(node *html.Node) bool {
}
// isNonContentElement checks if an element should never be editable
func (ae *AutoEnhancer) isNonContentElement(node *html.Node) bool {
func (disc *Discoverer) isNonContentElement(node *html.Node) bool {
nonContentTags := map[string]bool{
"script": true, // JavaScript code
"style": true, // CSS styles
@@ -334,7 +334,7 @@ func (ae *AutoEnhancer) isNonContentElement(node *html.Node) bool {
}
// isInHead checks if a node is inside the document head
func (ae *AutoEnhancer) isInHead(node *html.Node) bool {
func (disc *Discoverer) isInHead(node *html.Node) bool {
current := node.Parent
for current != nil {
if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" {
@@ -346,14 +346,14 @@ func (ae *AutoEnhancer) isInHead(node *html.Node) bool {
}
// hasNoMeaningfulContent checks if an element has no meaningful text content
func (ae *AutoEnhancer) hasNoMeaningfulContent(node *html.Node) bool {
func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return true
}
// Extract text content
var text strings.Builder
ae.extractTextRecursive(node, &text)
disc.extractTextRecursive(node, &text)
content := strings.TrimSpace(text.String())
// Empty or whitespace-only content
@@ -384,7 +384,7 @@ func (ae *AutoEnhancer) hasNoMeaningfulContent(node *html.Node) bool {
}
// extractTextRecursive extracts text content from a node and its children
func (ae *AutoEnhancer) extractTextRecursive(node *html.Node, text *strings.Builder) {
func (disc *Discoverer) extractTextRecursive(node *html.Node, text *strings.Builder) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
return
@@ -398,13 +398,13 @@ func (ae *AutoEnhancer) extractTextRecursive(node *html.Node, text *strings.Buil
continue
}
}
ae.extractTextRecursive(child, text)
disc.extractTextRecursive(child, text)
}
}
// copyFile copies a file from input to output directory
func (ae *AutoEnhancer) copyFile(filePath, inputDir, outputDir string) error {
outputPath := ae.getOutputPath(filePath, inputDir, outputDir)
func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error {
outputPath := disc.getOutputPath(filePath, inputDir, outputDir)
// Create output directory for the file
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
@@ -420,13 +420,13 @@ func (ae *AutoEnhancer) copyFile(filePath, inputDir, outputDir string) error {
}
// getOutputPath converts input path to output path
func (ae *AutoEnhancer) getOutputPath(filePath, inputDir, outputDir string) string {
func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string {
relPath, _ := filepath.Rel(inputDir, filePath)
return filepath.Join(outputDir, relPath)
}
// writeEnhancedFile writes the enhanced HTML document to a file
func (ae *AutoEnhancer) writeEnhancedFile(outputPath string, enhanced *EnhancementResult) error {
func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error {
// Create output directory
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return err

View File

@@ -2,49 +2,93 @@ package content
import (
"fmt"
"golang.org/x/net/html"
"os"
"path/filepath"
"strings"
"github.com/insertr/insertr/internal/engine"
)
// Enhancer combines parsing and content injection using unified engine
// EnhancementConfig configures the enhancement pipeline
type EnhancementConfig struct {
Discovery DiscoveryConfig
ContentInjection bool
GenerateIDs bool
}
// DiscoveryConfig configures element discovery
type DiscoveryConfig struct {
Enabled bool
Aggressive bool
Containers bool
Individual bool
}
// Enhancer combines discovery, ID generation, and content injection in unified pipeline
type Enhancer struct {
engine *engine.ContentEngine
// injector functionality will be integrated into engine
discoverer *Discoverer
config EnhancementConfig
siteID string
}
// NewEnhancer creates a new HTML enhancer using unified engine
func NewEnhancer(client engine.ContentClient, siteID string) *Enhancer {
// Create database client for engine
var engineClient engine.ContentClient
if dbClient, ok := client.(*DatabaseClient); ok {
engineClient = engine.NewDatabaseClient(dbClient.db)
} else {
// For non-database clients, we'll implement proper handling later
engineClient = engine.NewDatabaseClient(nil) // This will need to be fixed
}
// NewEnhancer creates a new HTML enhancer with unified pipeline
func NewEnhancer(client engine.ContentClient, siteID string, config EnhancementConfig) *Enhancer {
return &Enhancer{
engine: engine.NewContentEngine(engineClient),
engine: engine.NewContentEngine(client),
discoverer: NewDiscoverer(),
config: config,
siteID: siteID,
}
}
// EnhanceFile processes an HTML file and injects content
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
// TODO: Implement with unified engine
// For now, just copy the file to maintain functionality
return e.copyFile(inputPath, outputPath)
// NewDefaultEnhancer creates an enhancer with default configuration
func NewDefaultEnhancer(client engine.ContentClient, siteID string) *Enhancer {
defaultConfig := EnhancementConfig{
Discovery: DiscoveryConfig{
Enabled: true,
Aggressive: false,
Containers: true,
Individual: true,
},
ContentInjection: true,
GenerateIDs: true,
}
return NewEnhancer(client, siteID, defaultConfig)
}
// EnhanceDirectory processes all HTML files in a directory
// EnhanceFile processes a single HTML file through the complete pipeline
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
// Read HTML file
htmlContent, err := os.ReadFile(inputPath)
if err != nil {
return fmt.Errorf("reading file %s: %w", inputPath, err)
}
// Process through unified pipeline
processedHTML, err := e.processHTML(htmlContent, filepath.Base(inputPath))
if err != nil {
return fmt.Errorf("processing HTML %s: %w", inputPath, err)
}
// Create output directory
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
// Write processed HTML
return os.WriteFile(outputPath, processedHTML, 0644)
}
// EnhanceDirectory processes all files in a directory through the unified pipeline
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
// Create output directory
if err := os.MkdirAll(outputDir, 0755); err != nil {
return fmt.Errorf("creating output directory: %w", err)
}
// Walk input directory and copy files for now
// Walk input directory
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
@@ -62,16 +106,96 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
return os.MkdirAll(outputPath, info.Mode())
}
// Copy files (HTML processing will be implemented later)
// Process HTML files through enhancement pipeline
if strings.HasSuffix(strings.ToLower(path), ".html") {
return e.EnhanceFile(path, outputPath)
}
// Copy non-HTML files as-is
return e.copyFile(path, outputPath)
})
}
// processHTML implements the unified enhancement pipeline
func (e *Enhancer) processHTML(htmlContent []byte, filePath string) ([]byte, error) {
var processedHTML []byte = htmlContent
// Phase 1: Element Discovery (if enabled)
if e.config.Discovery.Enabled {
discoveredHTML, err := e.discoverElements(processedHTML, filePath)
if err != nil {
return nil, fmt.Errorf("element discovery: %w", err)
}
processedHTML = discoveredHTML
}
// Phase 2 & 3: ID Generation + Content Injection (via engine)
if e.config.GenerateIDs || e.config.ContentInjection {
enhancedHTML, err := e.enhanceWithEngine(processedHTML, filePath)
if err != nil {
return nil, fmt.Errorf("engine enhancement: %w", err)
}
processedHTML = enhancedHTML
}
return processedHTML, nil
}
// discoverElements adds insertr classes to viable elements
func (e *Enhancer) discoverElements(htmlContent []byte, filePath string) ([]byte, error) {
// Parse HTML
doc, err := html.Parse(strings.NewReader(string(htmlContent)))
if err != nil {
return nil, fmt.Errorf("parsing HTML: %w", err)
}
// Find and mark viable elements
result := &FileDiscoveryResult{Document: doc}
e.discoverer.discoverNode(doc, result, e.config.Discovery.Aggressive)
// Render back to HTML
var buf strings.Builder
if err := html.Render(&buf, doc); err != nil {
return nil, fmt.Errorf("rendering HTML: %w", err)
}
return []byte(buf.String()), nil
}
// enhanceWithEngine uses the unified engine for ID generation and content injection
func (e *Enhancer) enhanceWithEngine(htmlContent []byte, filePath string) ([]byte, error) {
// Determine processing mode
var mode engine.ProcessMode
if e.config.ContentInjection {
mode = engine.Enhancement // ID generation + content injection
} else {
mode = engine.IDGeneration // ID generation only
}
// Process with engine
result, err := e.engine.ProcessContent(engine.ContentInput{
HTML: htmlContent,
FilePath: filePath,
SiteID: e.siteID,
Mode: mode,
})
if err != nil {
return nil, fmt.Errorf("engine processing: %w", err)
}
// Render enhanced document
var buf strings.Builder
if err := html.Render(&buf, result.Document); err != nil {
return nil, fmt.Errorf("rendering enhanced HTML: %w", err)
}
return []byte(buf.String()), nil
}
// EnhanceInPlace performs in-place enhancement of static site files
func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error {
// TODO: Implement with unified engine
// For now, just log that enhancement was requested
fmt.Printf("📄 Enhancement requested for site %s at %s (stub implementation)\n", siteID, sitePath)
// TODO: Implement in-place enhancement using the unified pipeline
fmt.Printf("📄 Enhancement requested for site %s at %s (unified pipeline implementation needed)\n", siteID, sitePath)
return nil
}

View File

@@ -37,7 +37,7 @@ func NewSiteManager(contentClient engine.ContentClient, backupDir string, devMod
return &SiteManager{
sites: make(map[string]*SiteConfig),
enhancer: NewEnhancer(contentClient, ""), // siteID will be set per operation
enhancer: NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation
backupDir: backupDir,
devMode: devMode,
}