feat: implement zero-configuration auto-enhancement demo workflow
- Add intelligent auto-enhancement that detects viable content elements - Replace manual enhancement with automated container-first detection - Support inline formatting (strong, em, span, links) within editable content - Streamline demo workflow: just demo shows options, auto-enhances on demand - Clean up legacy commands and simplify directory structure - Auto-enhancement goes directly from source to demo-ready (no intermediate dirs) - Add Dan Eden portfolio and simple test sites for real-world validation - Auto-enhanced 40 elements in Dan Eden portfolio, 5 in simple site - Achieve true zero-configuration CMS experience
This commit is contained in:
444
internal/content/auto_enhancer.go
Normal file
444
internal/content/auto_enhancer.go
Normal file
@@ -0,0 +1,444 @@
|
||||
package content
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/insertr/insertr/internal/parser"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// AutoEnhancer handles automatic enhancement of HTML files
|
||||
type AutoEnhancer struct {
|
||||
parser *parser.Parser
|
||||
}
|
||||
|
||||
// NewAutoEnhancer creates a new AutoEnhancer instance
|
||||
func NewAutoEnhancer() *AutoEnhancer {
|
||||
return &AutoEnhancer{
|
||||
parser: parser.New(),
|
||||
}
|
||||
}
|
||||
|
||||
// AutoEnhanceResult contains statistics about auto-enhancement
|
||||
type AutoEnhanceResult struct {
|
||||
FilesProcessed int
|
||||
ElementsEnhanced int
|
||||
ContainersAdded int
|
||||
IndividualsAdded int
|
||||
SkippedFiles []string
|
||||
EnhancedFiles []string
|
||||
}
|
||||
|
||||
// EnhanceDirectory automatically enhances all HTML files in a directory
|
||||
func (ae *AutoEnhancer) EnhanceDirectory(inputDir, outputDir string, aggressive bool) (*AutoEnhanceResult, error) {
|
||||
result := &AutoEnhanceResult{
|
||||
SkippedFiles: []string{},
|
||||
EnhancedFiles: []string{},
|
||||
}
|
||||
|
||||
// Create output directory if it doesn't exist
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create output directory: %w", err)
|
||||
}
|
||||
|
||||
err := filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Only process HTML files
|
||||
if !strings.HasSuffix(strings.ToLower(path), ".html") {
|
||||
// Copy non-HTML files as-is
|
||||
return ae.copyFile(path, inputDir, outputDir)
|
||||
}
|
||||
|
||||
// Enhance HTML file
|
||||
enhanced, err := ae.enhanceFile(path, aggressive)
|
||||
if err != nil {
|
||||
result.SkippedFiles = append(result.SkippedFiles, path)
|
||||
// Copy original file on error
|
||||
return ae.copyFile(path, inputDir, outputDir)
|
||||
}
|
||||
|
||||
// Write enhanced file
|
||||
outputPath := ae.getOutputPath(path, inputDir, outputDir)
|
||||
if err := ae.writeEnhancedFile(outputPath, enhanced); err != nil {
|
||||
return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err)
|
||||
}
|
||||
|
||||
result.FilesProcessed++
|
||||
result.ElementsEnhanced += enhanced.ElementsEnhanced
|
||||
result.ContainersAdded += enhanced.ContainersAdded
|
||||
result.IndividualsAdded += enhanced.IndividualsAdded
|
||||
result.EnhancedFiles = append(result.EnhancedFiles, outputPath)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
// EnhancementResult contains details about a single file enhancement
|
||||
type EnhancementResult struct {
|
||||
ElementsEnhanced int
|
||||
ContainersAdded int
|
||||
IndividualsAdded int
|
||||
Document *html.Node
|
||||
}
|
||||
|
||||
// enhanceFile processes a single HTML file and adds insertr classes
|
||||
func (ae *AutoEnhancer) enhanceFile(filePath string, aggressive bool) (*EnhancementResult, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
doc, err := html.Parse(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing HTML: %w", err)
|
||||
}
|
||||
|
||||
result := &EnhancementResult{Document: doc}
|
||||
|
||||
// Find candidates for enhancement
|
||||
ae.enhanceNode(doc, result, aggressive)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// enhanceNode recursively enhances nodes in the document
|
||||
func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult, aggressive bool) {
|
||||
if node.Type != html.ElementNode {
|
||||
// Recursively check children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
ae.enhanceNode(child, result, aggressive)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Skip if already has insertr class
|
||||
if ae.hasInsertrClass(node) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this is a container that should use expansion
|
||||
if ae.isGoodContainer(node) {
|
||||
viableChildren := parser.FindViableChildren(node)
|
||||
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
|
||||
// Add insertr class to container for expansion
|
||||
ae.addInsertrClass(node)
|
||||
result.ContainersAdded++
|
||||
result.ElementsEnhanced += len(viableChildren)
|
||||
|
||||
// Don't process children since container expansion handles them
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this individual element should be enhanced
|
||||
if ae.isGoodIndividualElement(node) {
|
||||
ae.addInsertrClass(node)
|
||||
result.IndividualsAdded++
|
||||
result.ElementsEnhanced++
|
||||
|
||||
// Don't process children of enhanced individual elements
|
||||
return
|
||||
}
|
||||
|
||||
// Recursively check children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
ae.enhanceNode(child, result, aggressive)
|
||||
}
|
||||
}
|
||||
|
||||
// isGoodContainer checks if an element is a good candidate for container expansion
|
||||
func (ae *AutoEnhancer) isGoodContainer(node *html.Node) bool {
|
||||
containerTags := map[string]bool{
|
||||
"div": true,
|
||||
"section": true,
|
||||
"article": true,
|
||||
"header": true,
|
||||
"footer": true,
|
||||
"main": true,
|
||||
"aside": true,
|
||||
"nav": true,
|
||||
}
|
||||
|
||||
tag := strings.ToLower(node.Data)
|
||||
if !containerTags[tag] {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers that are clearly non-content
|
||||
if ae.isNonContentElement(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers in the head section
|
||||
if ae.isInHead(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers with technical/framework-specific classes that suggest they're not content
|
||||
classes := ae.getClasses(node)
|
||||
for _, class := range classes {
|
||||
lowerClass := strings.ToLower(class)
|
||||
// Skip Next.js internal classes and other framework artifacts
|
||||
if strings.Contains(lowerClass, "__next") ||
|
||||
strings.Contains(lowerClass, "webpack") ||
|
||||
strings.Contains(lowerClass, "hydration") ||
|
||||
strings.Contains(lowerClass, "react") ||
|
||||
strings.Contains(lowerClass, "gatsby") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// isGoodIndividualElement checks if an element is a good candidate for individual enhancement
|
||||
func (ae *AutoEnhancer) isGoodIndividualElement(node *html.Node) bool {
|
||||
// Skip self-closing elements
|
||||
if ae.isSelfClosing(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip non-content elements that should never be editable
|
||||
if ae.isNonContentElement(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip elements inside head section
|
||||
if ae.isInHead(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip elements with no meaningful content
|
||||
if ae.hasNoMeaningfulContent(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if element has editable content
|
||||
return ae.hasEditableContent(node)
|
||||
}
|
||||
|
||||
// hasEditableContent uses the parser's enhanced detection logic
|
||||
func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool {
|
||||
return parser.HasEditableContent(node)
|
||||
}
|
||||
|
||||
// hasInsertrClass checks if a node already has the insertr class
|
||||
func (ae *AutoEnhancer) hasInsertrClass(node *html.Node) bool {
|
||||
classes := ae.getClasses(node)
|
||||
for _, class := range classes {
|
||||
if class == "insertr" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// addInsertrClass adds the insertr class to a node
|
||||
func (ae *AutoEnhancer) addInsertrClass(node *html.Node) {
|
||||
classes := ae.getClasses(node)
|
||||
classes = append(classes, "insertr")
|
||||
ae.setClasses(node, classes)
|
||||
}
|
||||
|
||||
// getClasses extracts CSS classes from a node
|
||||
func (ae *AutoEnhancer) getClasses(node *html.Node) []string {
|
||||
for i, attr := range node.Attr {
|
||||
if attr.Key == "class" {
|
||||
if attr.Val == "" {
|
||||
return []string{}
|
||||
}
|
||||
return strings.Fields(attr.Val)
|
||||
}
|
||||
// Update existing class attribute
|
||||
if attr.Key == "class" {
|
||||
node.Attr[i] = attr
|
||||
return strings.Fields(attr.Val)
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// setClasses sets CSS classes on a node
|
||||
func (ae *AutoEnhancer) setClasses(node *html.Node, classes []string) {
|
||||
classValue := strings.Join(classes, " ")
|
||||
|
||||
// Update existing class attribute or add new one
|
||||
for i, attr := range node.Attr {
|
||||
if attr.Key == "class" {
|
||||
node.Attr[i].Val = classValue
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Add new class attribute
|
||||
node.Attr = append(node.Attr, html.Attribute{
|
||||
Key: "class",
|
||||
Val: classValue,
|
||||
})
|
||||
}
|
||||
|
||||
// isSelfClosing checks if an element is self-closing
|
||||
func (ae *AutoEnhancer) isSelfClosing(node *html.Node) bool {
|
||||
selfClosingTags := map[string]bool{
|
||||
"img": true, "input": true, "br": true, "hr": true,
|
||||
"meta": true, "link": true, "area": true, "base": true,
|
||||
"col": true, "embed": true, "source": true, "track": true, "wbr": true,
|
||||
}
|
||||
return selfClosingTags[strings.ToLower(node.Data)]
|
||||
}
|
||||
|
||||
// isNonContentElement checks if an element should never be editable
|
||||
func (ae *AutoEnhancer) isNonContentElement(node *html.Node) bool {
|
||||
nonContentTags := map[string]bool{
|
||||
"script": true, // JavaScript code
|
||||
"style": true, // CSS styles
|
||||
"meta": true, // Metadata
|
||||
"link": true, // Links to resources
|
||||
"title": true, // Document title (handled separately)
|
||||
"head": true, // Document head
|
||||
"html": true, // Root element
|
||||
"body": true, // Body element (too broad)
|
||||
"noscript": true, // Fallback content
|
||||
"template": true, // HTML templates
|
||||
"svg": true, // SVG graphics (complex)
|
||||
"canvas": true, // Canvas graphics
|
||||
"iframe": true, // Embedded content
|
||||
"object": true, // Embedded objects
|
||||
"embed": true, // Embedded content
|
||||
"video": true, // Video elements (complex)
|
||||
"audio": true, // Audio elements (complex)
|
||||
"map": true, // Image maps
|
||||
"area": true, // Image map areas
|
||||
"base": true, // Base URL
|
||||
"col": true, // Table columns
|
||||
"colgroup": true, // Table column groups
|
||||
"track": true, // Video/audio tracks
|
||||
"source": true, // Media sources
|
||||
"param": true, // Object parameters
|
||||
"wbr": true, // Word break opportunities
|
||||
}
|
||||
return nonContentTags[strings.ToLower(node.Data)]
|
||||
}
|
||||
|
||||
// isInHead checks if a node is inside the document head
|
||||
func (ae *AutoEnhancer) isInHead(node *html.Node) bool {
|
||||
current := node.Parent
|
||||
for current != nil {
|
||||
if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" {
|
||||
return true
|
||||
}
|
||||
current = current.Parent
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasNoMeaningfulContent checks if an element has no meaningful text content
|
||||
func (ae *AutoEnhancer) hasNoMeaningfulContent(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return true
|
||||
}
|
||||
|
||||
// Extract text content
|
||||
var text strings.Builder
|
||||
ae.extractTextRecursive(node, &text)
|
||||
content := strings.TrimSpace(text.String())
|
||||
|
||||
// Empty or whitespace-only content
|
||||
if content == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Very short content that's likely not meaningful
|
||||
if len(content) < 2 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Content that looks like technical artifacts
|
||||
technicalPatterns := []string{
|
||||
"$", "<!--", "-->", "{", "}", "[", "]",
|
||||
"function", "var ", "const ", "let ", "return",
|
||||
"import", "export", "require", "module.exports",
|
||||
"/*", "*/", "//", "<?", "?>", "<%", "%>",
|
||||
}
|
||||
|
||||
for _, pattern := range technicalPatterns {
|
||||
if strings.Contains(content, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// extractTextRecursive extracts text content from a node and its children
|
||||
func (ae *AutoEnhancer) extractTextRecursive(node *html.Node, text *strings.Builder) {
|
||||
if node.Type == html.TextNode {
|
||||
text.WriteString(node.Data)
|
||||
return
|
||||
}
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
// Skip script and style content
|
||||
if child.Type == html.ElementNode {
|
||||
tag := strings.ToLower(child.Data)
|
||||
if tag == "script" || tag == "style" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
ae.extractTextRecursive(child, text)
|
||||
}
|
||||
}
|
||||
|
||||
// copyFile copies a file from input to output directory
|
||||
func (ae *AutoEnhancer) copyFile(filePath, inputDir, outputDir string) error {
|
||||
outputPath := ae.getOutputPath(filePath, inputDir, outputDir)
|
||||
|
||||
// Create output directory for the file
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
input, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(outputPath, input, 0644)
|
||||
}
|
||||
|
||||
// getOutputPath converts input path to output path
|
||||
func (ae *AutoEnhancer) getOutputPath(filePath, inputDir, outputDir string) string {
|
||||
relPath, _ := filepath.Rel(inputDir, filePath)
|
||||
return filepath.Join(outputDir, relPath)
|
||||
}
|
||||
|
||||
// writeEnhancedFile writes the enhanced HTML document to a file
|
||||
func (ae *AutoEnhancer) writeEnhancedFile(outputPath string, enhanced *EnhancementResult) error {
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
file, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
return html.Render(file, enhanced.Document)
|
||||
}
|
||||
@@ -61,6 +61,7 @@ func extractTextRecursive(node *html.Node, text *strings.Builder) {
|
||||
}
|
||||
|
||||
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
|
||||
// DEPRECATED: Use hasEditableContent for more sophisticated detection
|
||||
func hasOnlyTextContent(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return false
|
||||
@@ -82,6 +83,87 @@ func hasOnlyTextContent(node *html.Node) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// Inline formatting elements that are safe for editing
|
||||
var inlineFormattingTags = map[string]bool{
|
||||
"strong": true,
|
||||
"b": true,
|
||||
"em": true,
|
||||
"i": true,
|
||||
"span": true,
|
||||
"code": true,
|
||||
"small": true,
|
||||
"sub": true,
|
||||
"sup": true,
|
||||
"a": true, // Links within content are fine
|
||||
}
|
||||
|
||||
// Elements that should NOT be nested within editable content
|
||||
var blockingElements = map[string]bool{
|
||||
"button": true, // Buttons shouldn't be nested in paragraphs
|
||||
"input": true,
|
||||
"select": true,
|
||||
"textarea": true,
|
||||
"img": true,
|
||||
"video": true,
|
||||
"audio": true,
|
||||
"canvas": true,
|
||||
"svg": true,
|
||||
"iframe": true,
|
||||
"object": true,
|
||||
"embed": true,
|
||||
"div": true, // Nested divs usually indicate complex structure
|
||||
"section": true, // Block-level semantic elements
|
||||
"article": true,
|
||||
"header": true,
|
||||
"footer": true,
|
||||
"nav": true,
|
||||
"aside": true,
|
||||
"main": true,
|
||||
"form": true,
|
||||
"table": true,
|
||||
"ul": true,
|
||||
"ol": true,
|
||||
"dl": true,
|
||||
}
|
||||
|
||||
// hasEditableContent checks if a node contains content that can be safely edited
|
||||
// This includes text and safe inline formatting elements
|
||||
func hasEditableContent(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return false
|
||||
}
|
||||
|
||||
return hasOnlyTextAndSafeFormatting(node)
|
||||
}
|
||||
|
||||
// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
|
||||
func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
switch child.Type {
|
||||
case html.TextNode:
|
||||
continue // Text is always safe
|
||||
case html.ElementNode:
|
||||
// Check if it's a blocking element
|
||||
if blockingElements[child.Data] {
|
||||
return false
|
||||
}
|
||||
// Allow safe inline formatting
|
||||
if inlineFormattingTags[child.Data] {
|
||||
// Recursively validate the formatting element
|
||||
if !hasOnlyTextAndSafeFormatting(child) {
|
||||
return false
|
||||
}
|
||||
continue
|
||||
}
|
||||
// Unknown/unsafe element
|
||||
return false
|
||||
default:
|
||||
continue // Comments, whitespace, etc.
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// isContainer checks if a tag is typically used as a container element
|
||||
func isContainer(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
@@ -124,7 +206,34 @@ func findViableChildren(node *html.Node) []*html.Node {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if element has only text content
|
||||
// Check if element has editable content (improved logic)
|
||||
if hasEditableContent(child) {
|
||||
viable = append(viable, child)
|
||||
}
|
||||
}
|
||||
|
||||
return viable
|
||||
}
|
||||
|
||||
// findViableChildrenLegacy uses the old text-only logic for backwards compatibility
|
||||
func findViableChildrenLegacy(node *html.Node) []*html.Node {
|
||||
var viable []*html.Node
|
||||
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
if child.Type == html.TextNode {
|
||||
if strings.TrimSpace(child.Data) == "" {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if child.Type != html.ElementNode {
|
||||
continue
|
||||
}
|
||||
|
||||
if isSelfClosing(child) {
|
||||
continue
|
||||
}
|
||||
|
||||
if hasOnlyTextContent(child) {
|
||||
viable = append(viable, child)
|
||||
}
|
||||
@@ -193,3 +302,13 @@ func findElementWithContext(node *html.Node, target Element) *html.Node {
|
||||
func GetAttribute(node *html.Node, key string) string {
|
||||
return getAttribute(node, key)
|
||||
}
|
||||
|
||||
// HasEditableContent checks if a node has editable content (exported version)
|
||||
func HasEditableContent(node *html.Node) bool {
|
||||
return hasEditableContent(node)
|
||||
}
|
||||
|
||||
// FindViableChildren finds viable children for editing (exported version)
|
||||
func FindViableChildren(node *html.Node) []*html.Node {
|
||||
return findViableChildren(node)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user