insertr/internal/engine/utils.go

package engine

import (
	"strings"

	"golang.org/x/net/html"
)

// GetClasses extracts CSS classes from an HTML node
func GetClasses(node *html.Node) []string {
	classAttr := getAttribute(node, "class")
	if classAttr == "" {
		return []string{}
	}

	classes := strings.Fields(classAttr)
	return classes
}

// ContainsClass checks if a class list contains a specific class
func ContainsClass(classes []string, target string) bool {
	for _, class := range classes {
		if class == target {
			return true
		}
	}
	return false
}

// getAttribute gets an attribute value from an HTML node
func getAttribute(node *html.Node, key string) string {
	for _, attr := range node.Attr {
		if attr.Key == key {
			return attr.Val
		}
	}
	return ""
}

// extractTextContent gets the text content from an HTML node
func extractTextContent(node *html.Node) string {
	var text strings.Builder
	extractTextRecursive(node, &text)
	return strings.TrimSpace(text.String())
}

// extractTextRecursive recursively extracts text from node and children
func extractTextRecursive(node *html.Node, text *strings.Builder) {
	if node.Type == html.TextNode {
		text.WriteString(node.Data)
	}

	for child := node.FirstChild; child != nil; child = child.NextSibling {
		// Skip script and style elements
		if child.Type == html.ElementNode &&
			(child.Data == "script" || child.Data == "style") {
			continue
		}
		extractTextRecursive(child, text)
	}
}

// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
// DEPRECATED: Use hasEditableContent for more sophisticated detection
func hasOnlyTextContent(node *html.Node) bool {
	if node.Type != html.ElementNode {
		return false
	}

	for child := node.FirstChild; child != nil; child = child.NextSibling {
		switch child.Type {
		case html.ElementNode:
			// Found a nested HTML element - not text-only
			return false
		case html.TextNode:
			// Text nodes are fine, continue checking
			continue
		default:
			// Comments, etc. - continue checking
			continue
		}
	}
	return true
}

// Inline formatting elements that are safe for editing
var inlineFormattingTags = map[string]bool{
	"strong": true,
	"b":      true,
	"em":     true,
	"i":      true,
	"span":   true,
	"code":   true,
	"small":  true,
	"sub":    true,
	"sup":    true,
	"a":      true, // Links within content are fine
}

// Elements that should NOT be nested within editable content
var blockingElements = map[string]bool{
	"button":   true, // Buttons shouldn't be nested in paragraphs
	"input":    true,
	"select":   true,
	"textarea": true,
	"img":      true,
	"video":    true,
	"audio":    true,
	"canvas":   true,
	"svg":      true,
	"iframe":   true,
	"object":   true,
	"embed":    true,
	"div":      true, // Nested divs usually indicate complex structure
	"section":  true, // Block-level semantic elements
	"article":  true,
	"header":   true,
	"footer":   true,
	"nav":      true,
	"aside":    true,
	"main":     true,
	"form":     true,
	"table":    true,
	"ul":       true,
	"ol":       true,
	"dl":       true,
}

// hasEditableContent checks if a node contains content that can be safely edited
// This includes text and safe inline formatting elements
func hasEditableContent(node *html.Node) bool {
	if node.Type != html.ElementNode {
		return false
	}

	return hasOnlyTextAndSafeFormatting(node)
}

// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
	for child := node.FirstChild; child != nil; child = child.NextSibling {
		switch child.Type {
		case html.TextNode:
			continue // Text is always safe
		case html.ElementNode:
			// Check if it's a blocking element
			if blockingElements[child.Data] {
				return false
			}
			// Allow safe inline formatting
			if inlineFormattingTags[child.Data] {
				// Recursively validate the formatting element
				if !hasOnlyTextAndSafeFormatting(child) {
					return false
				}
				continue
			}
			// Unknown/unsafe element
			return false
		default:
			continue // Comments, whitespace, etc.
		}
	}
	return true
}

// isContainer checks if a tag is typically used as a container element
func isContainer(node *html.Node) bool {
	if node.Type != html.ElementNode {
		return false
	}

	containerTags := map[string]bool{
		"div":     true,
		"section": true,
		"article": true,
		"header":  true,
		"footer":  true,
		"main":    true,
		"aside":   true,
		"nav":     true,
	}

	return containerTags[node.Data]
}

// findViableChildren finds all child elements that are viable for editing
func findViableChildren(node *html.Node) []*html.Node {
	var viable []*html.Node

	for child := node.FirstChild; child != nil; child = child.NextSibling {
		// Skip whitespace-only text nodes
		if child.Type == html.TextNode {
			if strings.TrimSpace(child.Data) == "" {
				continue
			}
		}

		// Only consider element nodes
		if child.Type != html.ElementNode {
			continue
		}

		// Skip self-closing elements for now
		if isSelfClosing(child) {
			continue
		}

		// Check if element has editable content (improved logic)
		if hasEditableContent(child) {
			viable = append(viable, child)
		}
	}

	return viable
}

// findViableChildrenLegacy uses the old text-only logic for backwards compatibility
func findViableChildrenLegacy(node *html.Node) []*html.Node {
	var viable []*html.Node

	for child := node.FirstChild; child != nil; child = child.NextSibling {
		if child.Type == html.TextNode {
			if strings.TrimSpace(child.Data) == "" {
				continue
			}
		}

		if child.Type != html.ElementNode {
			continue
		}

		if isSelfClosing(child) {
			continue
		}

		if hasOnlyTextContent(child) {
			viable = append(viable, child)
		}
	}

	return viable
}

// isSelfClosing checks if an element is typically self-closing
func isSelfClosing(node *html.Node) bool {
	if node.Type != html.ElementNode {
		return false
	}

	selfClosingTags := map[string]bool{
		"img":    true,
		"input":  true,
		"br":     true,
		"hr":     true,
		"meta":   true,
		"link":   true,
		"area":   true,
		"base":   true,
		"col":    true,
		"embed":  true,
		"source": true,
		"track":  true,
		"wbr":    true,
	}

	return selfClosingTags[node.Data]
}

// FindElementInDocument finds an element in HTML document tree using content matching
func FindElementInDocument(doc *html.Node, tag, content string) *html.Node {
	return findElementWithContent(doc, tag, content)
}

// findElementWithContent uses content-based matching to find the correct element
func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node {
	normalizedTarget := strings.TrimSpace(targetContent)

	if node.Type == html.ElementNode && node.Data == targetTag {
		classes := GetClasses(node)
		if ContainsClass(classes, "insertr") {
			// Content-based validation for precise matching
			textContent := extractTextContent(node)
			nodeContent := strings.TrimSpace(textContent)

			if nodeContent == normalizedTarget {
				return node
			}
		}
	}

	// Recursively search children
	for child := node.FirstChild; child != nil; child = child.NextSibling {
		if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil {
			return result
		}
	}

	return nil
}

// GetAttribute gets an attribute value from an HTML node (exported version)
func GetAttribute(node *html.Node, key string) string {
	return getAttribute(node, key)
}

// HasEditableContent checks if a node has editable content (exported version)
func HasEditableContent(node *html.Node) bool {
	return hasEditableContent(node)
}

// FindViableChildren finds viable children for editing (exported version)
func FindViableChildren(node *html.Node) []*html.Node {
	return findViableChildren(node)
}