- Add syntactic sugar for container transformation: .insertr containers → children get .insertr - Fix discovery auto-running when disabled with site-specific config loading - Add comprehensive styling test examples for HTML attribute preservation - Include test input for syntactic sugar validation - Update discovery defaults to respect developer intent (disabled by default)
484 lines
13 KiB
Go
484 lines
13 KiB
Go
package content
|
|
|
|
import (
|
|
"fmt"
|
|
"io/fs"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"github.com/insertr/insertr/internal/engine"
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// Discoverer handles automatic discovery of editable elements in HTML
|
|
type Discoverer struct {
|
|
// Element discovery is now self-contained and configurable
|
|
}
|
|
|
|
// NewDiscoverer creates a new Discoverer instance
|
|
func NewDiscoverer() *Discoverer {
|
|
return &Discoverer{}
|
|
}
|
|
|
|
// DiscoveryResult contains statistics about element discovery
|
|
type DiscoveryResult struct {
|
|
FilesProcessed int
|
|
ElementsEnhanced int
|
|
ContainersAdded int
|
|
IndividualsAdded int
|
|
SkippedFiles []string
|
|
EnhancedFiles []string
|
|
}
|
|
|
|
// DiscoverDirectory discovers editable elements in all HTML files in a directory
|
|
func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) {
|
|
result := &DiscoveryResult{
|
|
SkippedFiles: []string{},
|
|
EnhancedFiles: []string{},
|
|
}
|
|
|
|
// Create output directory if it doesn't exist
|
|
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
|
return nil, fmt.Errorf("failed to create output directory: %w", err)
|
|
}
|
|
|
|
err := filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Skip directories
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
// Only process HTML files
|
|
if !strings.HasSuffix(strings.ToLower(path), ".html") {
|
|
// Copy non-HTML files as-is
|
|
return disc.copyFile(path, inputDir, outputDir)
|
|
}
|
|
|
|
// Discover elements in HTML file
|
|
enhanced, err := disc.discoverFile(path, aggressive)
|
|
if err != nil {
|
|
result.SkippedFiles = append(result.SkippedFiles, path)
|
|
// Copy original file on error
|
|
return disc.copyFile(path, inputDir, outputDir)
|
|
}
|
|
|
|
// Write enhanced file
|
|
outputPath := disc.getOutputPath(path, inputDir, outputDir)
|
|
if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil {
|
|
return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err)
|
|
}
|
|
|
|
result.FilesProcessed++
|
|
result.ElementsEnhanced += enhanced.ElementsEnhanced
|
|
result.ContainersAdded += enhanced.ContainersAdded
|
|
result.IndividualsAdded += enhanced.IndividualsAdded
|
|
result.EnhancedFiles = append(result.EnhancedFiles, outputPath)
|
|
|
|
return nil
|
|
})
|
|
|
|
return result, err
|
|
}
|
|
|
|
// FileDiscoveryResult contains details about a single file discovery
|
|
type FileDiscoveryResult struct {
|
|
ElementsEnhanced int
|
|
ContainersAdded int
|
|
IndividualsAdded int
|
|
SugarTransformed int // Count of syntactic sugar transformations
|
|
Document *html.Node
|
|
}
|
|
|
|
// discoverFile processes a single HTML file and adds insertr classes
|
|
func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) {
|
|
file, err := os.Open(filePath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error opening file: %w", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
doc, err := html.Parse(file)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("error parsing HTML: %w", err)
|
|
}
|
|
|
|
result := &FileDiscoveryResult{Document: doc}
|
|
|
|
// Find candidates for enhancement
|
|
disc.discoverNode(doc, result, aggressive)
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// discoverNode recursively discovers editable nodes in the document
|
|
func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) {
|
|
if node.Type != html.ElementNode {
|
|
// Recursively check children
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
disc.discoverNode(child, result, aggressive)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Handle syntactic sugar: containers with .insertr class
|
|
if disc.hasInsertrClass(node) && disc.isGoodContainer(node) {
|
|
// Syntactic sugar transformation: remove .insertr from container, add to viable children
|
|
viableChildren := engine.FindViableChildren(node)
|
|
if len(viableChildren) >= 1 {
|
|
disc.removeInsertrClass(node) // Remove from container
|
|
for _, child := range viableChildren {
|
|
if !disc.hasInsertrClass(child) {
|
|
disc.addInsertrClass(child)
|
|
result.IndividualsAdded++
|
|
result.ElementsEnhanced++
|
|
}
|
|
}
|
|
result.ContainersAdded++
|
|
result.SugarTransformed++ // Track sugar transformations
|
|
|
|
// Don't process children since we just processed them
|
|
return
|
|
}
|
|
// If no viable children, leave .insertr on the element (individual editing)
|
|
return
|
|
}
|
|
|
|
// Skip if already has insertr class (individual editing elements)
|
|
if disc.hasInsertrClass(node) {
|
|
return
|
|
}
|
|
|
|
// Check if this is a container that should use expansion
|
|
if disc.isGoodContainer(node) {
|
|
viableChildren := engine.FindViableChildren(node)
|
|
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
|
|
// Container expansion: add insertr class to each viable child, not the container
|
|
for _, child := range viableChildren {
|
|
if !disc.hasInsertrClass(child) {
|
|
disc.addInsertrClass(child)
|
|
result.IndividualsAdded++
|
|
result.ElementsEnhanced++
|
|
}
|
|
}
|
|
result.ContainersAdded++
|
|
|
|
// Don't process children since we just processed them
|
|
return
|
|
}
|
|
}
|
|
|
|
// Check if this individual element should be enhanced
|
|
if disc.isGoodIndividualElement(node) {
|
|
disc.addInsertrClass(node)
|
|
result.IndividualsAdded++
|
|
result.ElementsEnhanced++
|
|
|
|
// Don't process children of enhanced individual elements
|
|
return
|
|
}
|
|
|
|
// Recursively check children
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
disc.discoverNode(child, result, aggressive)
|
|
}
|
|
}
|
|
|
|
// isGoodContainer checks if an element is a good candidate for container expansion
|
|
func (disc *Discoverer) isGoodContainer(node *html.Node) bool {
|
|
containerTags := map[string]bool{
|
|
"div": true,
|
|
"section": true,
|
|
"article": true,
|
|
"header": true,
|
|
"footer": true,
|
|
"main": true,
|
|
"aside": true,
|
|
"nav": true,
|
|
}
|
|
|
|
tag := strings.ToLower(node.Data)
|
|
if !containerTags[tag] {
|
|
return false
|
|
}
|
|
|
|
// Skip containers that are clearly non-content
|
|
if disc.isNonContentElement(node) {
|
|
return false
|
|
}
|
|
|
|
// Skip containers in the head section
|
|
if disc.isInHead(node) {
|
|
return false
|
|
}
|
|
|
|
// Skip containers with technical/framework-specific classes that suggest they're not content
|
|
classes := disc.getClasses(node)
|
|
for _, class := range classes {
|
|
lowerClass := strings.ToLower(class)
|
|
// Skip Next.js internal classes and other framework artifacts
|
|
if strings.Contains(lowerClass, "__next") ||
|
|
strings.Contains(lowerClass, "webpack") ||
|
|
strings.Contains(lowerClass, "hydration") ||
|
|
strings.Contains(lowerClass, "react") ||
|
|
strings.Contains(lowerClass, "gatsby") {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// isGoodIndividualElement checks if an element is a good candidate for individual enhancement
|
|
func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool {
|
|
// Skip self-closing elements
|
|
if disc.isSelfClosing(node) {
|
|
return false
|
|
}
|
|
|
|
// Skip non-content elements that should never be editable
|
|
if disc.isNonContentElement(node) {
|
|
return false
|
|
}
|
|
|
|
// Skip elements inside head section
|
|
if disc.isInHead(node) {
|
|
return false
|
|
}
|
|
|
|
// Skip elements with no meaningful content
|
|
if disc.hasNoMeaningfulContent(node) {
|
|
return false
|
|
}
|
|
|
|
// Check if element has editable content
|
|
return disc.hasEditableContent(node)
|
|
}
|
|
|
|
// hasEditableContent uses the engine's enhanced detection logic
|
|
func (disc *Discoverer) hasEditableContent(node *html.Node) bool {
|
|
return engine.HasEditableContent(node)
|
|
}
|
|
|
|
// hasInsertrClass checks if a node already has the insertr class
|
|
func (disc *Discoverer) hasInsertrClass(node *html.Node) bool {
|
|
classes := disc.getClasses(node)
|
|
for _, class := range classes {
|
|
if class == "insertr" {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// addInsertrClass adds the insertr class to a node
|
|
func (disc *Discoverer) addInsertrClass(node *html.Node) {
|
|
classes := disc.getClasses(node)
|
|
classes = append(classes, "insertr")
|
|
disc.setClasses(node, classes)
|
|
}
|
|
|
|
// removeInsertrClass removes the insertr class from a node
|
|
func (disc *Discoverer) removeInsertrClass(node *html.Node) {
|
|
classes := disc.getClasses(node)
|
|
var filteredClasses []string
|
|
for _, class := range classes {
|
|
if class != "insertr" {
|
|
filteredClasses = append(filteredClasses, class)
|
|
}
|
|
}
|
|
disc.setClasses(node, filteredClasses)
|
|
}
|
|
|
|
// getClasses extracts CSS classes from a node
|
|
func (disc *Discoverer) getClasses(node *html.Node) []string {
|
|
for i, attr := range node.Attr {
|
|
if attr.Key == "class" {
|
|
if attr.Val == "" {
|
|
return []string{}
|
|
}
|
|
return strings.Fields(attr.Val)
|
|
}
|
|
// Update existing class attribute
|
|
if attr.Key == "class" {
|
|
node.Attr[i] = attr
|
|
return strings.Fields(attr.Val)
|
|
}
|
|
}
|
|
return []string{}
|
|
}
|
|
|
|
// setClasses sets CSS classes on a node
|
|
func (disc *Discoverer) setClasses(node *html.Node, classes []string) {
|
|
classValue := strings.Join(classes, " ")
|
|
|
|
// Update existing class attribute or add new one
|
|
for i, attr := range node.Attr {
|
|
if attr.Key == "class" {
|
|
node.Attr[i].Val = classValue
|
|
return
|
|
}
|
|
}
|
|
|
|
// Add new class attribute
|
|
node.Attr = append(node.Attr, html.Attribute{
|
|
Key: "class",
|
|
Val: classValue,
|
|
})
|
|
}
|
|
|
|
// isSelfClosing checks if an element is self-closing
|
|
func (disc *Discoverer) isSelfClosing(node *html.Node) bool {
|
|
selfClosingTags := map[string]bool{
|
|
"img": true, "input": true, "br": true, "hr": true,
|
|
"meta": true, "link": true, "area": true, "base": true,
|
|
"col": true, "embed": true, "source": true, "track": true, "wbr": true,
|
|
}
|
|
return selfClosingTags[strings.ToLower(node.Data)]
|
|
}
|
|
|
|
// isNonContentElement checks if an element should never be editable
|
|
func (disc *Discoverer) isNonContentElement(node *html.Node) bool {
|
|
nonContentTags := map[string]bool{
|
|
"script": true, // JavaScript code
|
|
"style": true, // CSS styles
|
|
"meta": true, // Metadata
|
|
"link": true, // Links to resources
|
|
"title": true, // Document title (handled separately)
|
|
"head": true, // Document head
|
|
"html": true, // Root element
|
|
"body": true, // Body element (too broad)
|
|
"noscript": true, // Fallback content
|
|
"template": true, // HTML templates
|
|
"svg": true, // SVG graphics (complex)
|
|
"canvas": true, // Canvas graphics
|
|
"iframe": true, // Embedded content
|
|
"object": true, // Embedded objects
|
|
"embed": true, // Embedded content
|
|
"video": true, // Video elements (complex)
|
|
"audio": true, // Audio elements (complex)
|
|
"map": true, // Image maps
|
|
"area": true, // Image map areas
|
|
"base": true, // Base URL
|
|
"col": true, // Table columns
|
|
"colgroup": true, // Table column groups
|
|
"track": true, // Video/audio tracks
|
|
"source": true, // Media sources
|
|
"param": true, // Object parameters
|
|
"wbr": true, // Word break opportunities
|
|
}
|
|
return nonContentTags[strings.ToLower(node.Data)]
|
|
}
|
|
|
|
// isInHead checks if a node is inside the document head
|
|
func (disc *Discoverer) isInHead(node *html.Node) bool {
|
|
current := node.Parent
|
|
for current != nil {
|
|
if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" {
|
|
return true
|
|
}
|
|
current = current.Parent
|
|
}
|
|
return false
|
|
}
|
|
|
|
// hasNoMeaningfulContent checks if an element has no meaningful text content
|
|
func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return true
|
|
}
|
|
|
|
// Extract text content
|
|
var text strings.Builder
|
|
disc.extractTextRecursive(node, &text)
|
|
content := strings.TrimSpace(text.String())
|
|
|
|
// Empty or whitespace-only content
|
|
if content == "" {
|
|
return true
|
|
}
|
|
|
|
// Very short content that's likely not meaningful
|
|
if len(content) < 2 {
|
|
return true
|
|
}
|
|
|
|
// Content that looks like technical artifacts
|
|
technicalPatterns := []string{
|
|
"$", "<!--", "-->", "{", "}", "[", "]",
|
|
"function", "var ", "const ", "let ", "return",
|
|
"import", "export", "require", "module.exports",
|
|
"/*", "*/", "//", "<?", "?>", "<%", "%>",
|
|
}
|
|
|
|
for _, pattern := range technicalPatterns {
|
|
if strings.Contains(content, pattern) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// extractTextRecursive extracts text content from a node and its children
|
|
func (disc *Discoverer) extractTextRecursive(node *html.Node, text *strings.Builder) {
|
|
if node.Type == html.TextNode {
|
|
text.WriteString(node.Data)
|
|
return
|
|
}
|
|
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
// Skip script and style content
|
|
if child.Type == html.ElementNode {
|
|
tag := strings.ToLower(child.Data)
|
|
if tag == "script" || tag == "style" {
|
|
continue
|
|
}
|
|
}
|
|
disc.extractTextRecursive(child, text)
|
|
}
|
|
}
|
|
|
|
// copyFile copies a file from input to output directory
|
|
func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error {
|
|
outputPath := disc.getOutputPath(filePath, inputDir, outputDir)
|
|
|
|
// Create output directory for the file
|
|
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
|
return err
|
|
}
|
|
|
|
input, err := os.ReadFile(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return os.WriteFile(outputPath, input, 0644)
|
|
}
|
|
|
|
// getOutputPath converts input path to output path
|
|
func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string {
|
|
relPath, _ := filepath.Rel(inputDir, filePath)
|
|
return filepath.Join(outputDir, relPath)
|
|
}
|
|
|
|
// writeEnhancedFile writes the enhanced HTML document to a file
|
|
func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error {
|
|
// Create output directory
|
|
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
|
return err
|
|
}
|
|
|
|
file, err := os.Create(outputPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
return html.Render(file, enhanced.Document)
|
|
}
|