package engine import ( "strings" "golang.org/x/net/html" ) // GetClasses extracts CSS classes from an HTML node func GetClasses(node *html.Node) []string { classAttr := getAttribute(node, "class") if classAttr == "" { return []string{} } classes := strings.Fields(classAttr) return classes } // ContainsClass checks if a class list contains a specific class func ContainsClass(classes []string, target string) bool { for _, class := range classes { if class == target { return true } } return false } // getAttribute gets an attribute value from an HTML node func getAttribute(node *html.Node, key string) string { for _, attr := range node.Attr { if attr.Key == key { return attr.Val } } return "" } // extractTextContent gets the text content from an HTML node func extractTextContent(node *html.Node) string { var text strings.Builder extractTextRecursive(node, &text) return strings.TrimSpace(text.String()) } // extractTextRecursive recursively extracts text from node and children func extractTextRecursive(node *html.Node, text *strings.Builder) { if node.Type == html.TextNode { text.WriteString(node.Data) } for child := node.FirstChild; child != nil; child = child.NextSibling { // Skip script and style elements if child.Type == html.ElementNode && (child.Data == "script" || child.Data == "style") { continue } extractTextRecursive(child, text) } } // hasOnlyTextContent checks if a node contains only text content (no nested HTML elements) // DEPRECATED: Use hasEditableContent for more sophisticated detection func hasOnlyTextContent(node *html.Node) bool { if node.Type != html.ElementNode { return false } for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.ElementNode: // Found a nested HTML element - not text-only return false case html.TextNode: // Text nodes are fine, continue checking continue default: // Comments, etc. - continue checking continue } } return true } // Inline formatting elements that are safe for editing var inlineFormattingTags = map[string]bool{ "strong": true, "b": true, "em": true, "i": true, "span": true, "code": true, "small": true, "sub": true, "sup": true, "a": true, // Links within content are fine } // Elements that should NOT be nested within editable content var blockingElements = map[string]bool{ "button": true, // Buttons shouldn't be nested in paragraphs "input": true, "select": true, "textarea": true, "img": true, "video": true, "audio": true, "canvas": true, "svg": true, "iframe": true, "object": true, "embed": true, "div": true, // Nested divs usually indicate complex structure "section": true, // Block-level semantic elements "article": true, "header": true, "footer": true, "nav": true, "aside": true, "main": true, "form": true, "table": true, "ul": true, "ol": true, "dl": true, } // hasEditableContent checks if a node contains content that can be safely edited // This includes text and safe inline formatting elements func hasEditableContent(node *html.Node) bool { if node.Type != html.ElementNode { return false } return hasOnlyTextAndSafeFormatting(node) } // hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing func hasOnlyTextAndSafeFormatting(node *html.Node) bool { for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.TextNode: continue // Text is always safe case html.ElementNode: // Check if it's a blocking element if blockingElements[child.Data] { return false } // Allow safe inline formatting if inlineFormattingTags[child.Data] { // Recursively validate the formatting element if !hasOnlyTextAndSafeFormatting(child) { return false } continue } // Unknown/unsafe element return false default: continue // Comments, whitespace, etc. } } return true } // isContainer checks if a tag is typically used as a container element func isContainer(node *html.Node) bool { if node.Type != html.ElementNode { return false } containerTags := map[string]bool{ "div": true, "section": true, "article": true, "header": true, "footer": true, "main": true, "aside": true, "nav": true, } return containerTags[node.Data] } // findViableChildren finds all child elements that are viable for editing func findViableChildren(node *html.Node) []*html.Node { var viable []*html.Node for child := node.FirstChild; child != nil; child = child.NextSibling { // Skip whitespace-only text nodes if child.Type == html.TextNode { if strings.TrimSpace(child.Data) == "" { continue } } // Only consider element nodes if child.Type != html.ElementNode { continue } // Skip self-closing elements for now if isSelfClosing(child) { continue } // Check if element has editable content (improved logic) if hasEditableContent(child) { viable = append(viable, child) } } return viable } // findViableChildrenLegacy uses the old text-only logic for backwards compatibility func findViableChildrenLegacy(node *html.Node) []*html.Node { var viable []*html.Node for child := node.FirstChild; child != nil; child = child.NextSibling { if child.Type == html.TextNode { if strings.TrimSpace(child.Data) == "" { continue } } if child.Type != html.ElementNode { continue } if isSelfClosing(child) { continue } if hasOnlyTextContent(child) { viable = append(viable, child) } } return viable } // isSelfClosing checks if an element is typically self-closing func isSelfClosing(node *html.Node) bool { if node.Type != html.ElementNode { return false } selfClosingTags := map[string]bool{ "img": true, "input": true, "br": true, "hr": true, "meta": true, "link": true, "area": true, "base": true, "col": true, "embed": true, "source": true, "track": true, "wbr": true, } return selfClosingTags[node.Data] } // Note: FindElementInDocument functions removed - will be reimplemented in engine if needed // GetAttribute gets an attribute value from an HTML node (exported version) func GetAttribute(node *html.Node, key string) string { return getAttribute(node, key) } // HasEditableContent checks if a node has editable content (exported version) func HasEditableContent(node *html.Node) bool { return hasEditableContent(node) } // FindViableChildren finds viable children for editing (exported version) func FindViableChildren(node *html.Node) []*html.Node { return findViableChildren(node) }