package engine import ( "strings" "golang.org/x/net/html" ) // GetClasses extracts CSS classes from an HTML node func GetClasses(node *html.Node) []string { classAttr := getAttribute(node, "class") if classAttr == "" { return []string{} } classes := strings.Fields(classAttr) return classes } // ContainsClass checks if a class list contains a specific class func ContainsClass(classes []string, target string) bool { for _, class := range classes { if class == target { return true } } return false } // getAttribute gets an attribute value from an HTML node func getAttribute(node *html.Node, key string) string { for _, attr := range node.Attr { if attr.Key == key { return attr.Val } } return "" } // hasOnlyTextContent checks if a node contains only text content (no nested HTML elements) // DEPRECATED: Use hasEditableContent for more sophisticated detection func hasOnlyTextContent(node *html.Node) bool { if node.Type != html.ElementNode { return false } for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.ElementNode: // Found a nested HTML element - not text-only return false case html.TextNode: // Text nodes are fine, continue checking continue default: // Comments, etc. - continue checking continue } } return true } // Inline formatting elements that are safe for editing var inlineFormattingTags = map[string]bool{ "strong": true, "b": true, "em": true, "i": true, "span": true, "code": true, "small": true, "sub": true, "sup": true, "a": true, // Links within content are fine } // Elements that should NOT be nested within editable content var blockingElements = map[string]bool{ "button": true, // Buttons shouldn't be nested in paragraphs "input": true, "select": true, "textarea": true, "img": true, "video": true, "audio": true, "canvas": true, "svg": true, "iframe": true, "object": true, "embed": true, "div": true, // Nested divs usually indicate complex structure "section": true, // Block-level semantic elements "article": true, "header": true, "footer": true, "nav": true, "aside": true, "main": true, "form": true, "table": true, "ul": true, "ol": true, "dl": true, } // hasEditableContent checks if a node contains content that can be safely edited // This includes text and safe inline formatting elements func hasEditableContent(node *html.Node) bool { if node.Type != html.ElementNode { return false } return hasOnlyTextAndSafeFormatting(node) } // hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing func hasOnlyTextAndSafeFormatting(node *html.Node) bool { for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.TextNode: continue // Text is always safe case html.ElementNode: // Check if it's a blocking element if blockingElements[child.Data] { return false } // Allow safe inline formatting if inlineFormattingTags[child.Data] { // Recursively validate the formatting element if !hasOnlyTextAndSafeFormatting(child) { return false } continue } // Unknown/unsafe element return false default: continue // Comments, whitespace, etc. } } return true } // isContainer checks if a tag is typically used as a container element func isContainer(node *html.Node) bool { if node.Type != html.ElementNode { return false } containerTags := map[string]bool{ "div": true, "section": true, "article": true, "header": true, "footer": true, "main": true, "aside": true, "nav": true, "ul": true, // Phase 3: Lists are containers "ol": true, } return containerTags[node.Data] } // findViableChildren finds all descendant elements that should get .insertr class // Phase 3: Recursive traversal with block/inline classification and boundary respect func findViableChildren(node *html.Node) []*html.Node { var viable []*html.Node traverseForViableElements(node, &viable) return viable } // traverseForViableElements recursively traverses all descendants, stopping at .insertr boundaries func traverseForViableElements(node *html.Node, viable *[]*html.Node) { for child := node.FirstChild; child != nil; child = child.NextSibling { // Only consider element nodes if child.Type != html.ElementNode { continue } // BOUNDARY: Stop if element already has .insertr class if hasInsertrClass(child) { continue } // Skip deferred complex elements (tables, forms) if isDeferredElement(child) { continue } // Determine if this element should get .insertr if shouldGetInsertrClass(child) { *viable = append(*viable, child) // Don't traverse children - they're handled by this element's expansion continue } // Continue traversing if this is just a container traverseForViableElements(child, viable) } } // Phase 3: Block vs Inline element classification func isBlockElement(node *html.Node) bool { blockTags := map[string]bool{ // Content blocks "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, "p": true, "div": true, "article": true, "section": true, "nav": true, "header": true, "footer": true, "main": true, "aside": true, // Lists "ul": true, "ol": true, "li": true, // Interactive (when at block level) "button": true, "a": true, "img": true, "video": true, "audio": true, } return blockTags[node.Data] } // isInlineElement checks if element is inline formatting (never gets .insertr) func isInlineElement(node *html.Node) bool { inlineTags := map[string]bool{ "strong": true, "b": true, "em": true, "i": true, "span": true, "code": true, "small": true, "sub": true, "sup": true, "br": true, "mark": true, "kbd": true, } return inlineTags[node.Data] } // isContextSensitive checks if element can be block or inline (a, button) func isContextSensitive(node *html.Node) bool { contextTags := map[string]bool{ "a": true, "button": true, } return contextTags[node.Data] } // isInBlockContext determines if context-sensitive element should be treated as block func isInBlockContext(node *html.Node) bool { parent := node.Parent if parent == nil || parent.Type != html.ElementNode { return true } // If parent is a content element, this is inline formatting contentElements := map[string]bool{ "p": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, "li": true, "td": true, "th": true, } return !contentElements[parent.Data] } // shouldGetInsertrClass determines if element should receive .insertr class func shouldGetInsertrClass(node *html.Node) bool { // Always block elements get .insertr if isBlockElement(node) && !isContextSensitive(node) { return true } // Context-sensitive elements depend on parent context if isContextSensitive(node) { return isInBlockContext(node) } // Inline elements never get .insertr if isInlineElement(node) { return false } // Self-closing elements - only img gets .insertr when block-level if isSelfClosing(node) { return node.Data == "img" && isInBlockContext(node) } return false } // isDeferredElement checks for complex elements that need separate planning func isDeferredElement(node *html.Node) bool { deferredTags := map[string]bool{ "table": true, "tr": true, "td": true, "th": true, "thead": true, "tbody": true, "tfoot": true, "form": true, "input": true, "textarea": true, "select": true, "option": true, } return deferredTags[node.Data] } // hasInsertrClass checks if node has class="insertr" func hasInsertrClass(node *html.Node) bool { classes := GetClasses(node) for _, class := range classes { if class == "insertr" { return true } } return false } // isSelfClosing checks if an element is typically self-closing func isSelfClosing(node *html.Node) bool { if node.Type != html.ElementNode { return false } selfClosingTags := map[string]bool{ "img": true, "input": true, "br": true, "hr": true, "meta": true, "link": true, "area": true, "base": true, "col": true, "embed": true, "source": true, "track": true, "wbr": true, } return selfClosingTags[node.Data] } // FindElementInDocument finds an element in HTML document tree using content matching func FindElementInDocument(doc *html.Node, tag, content string) *html.Node { return findElementWithContent(doc, tag, content) } // findElementWithContent uses content-based matching to find the correct element func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node { normalizedTarget := strings.TrimSpace(targetContent) if node.Type == html.ElementNode && node.Data == targetTag { classes := GetClasses(node) if ContainsClass(classes, "insertr") { // Content-based validation for precise matching textContent := ExtractTextContent(node) nodeContent := strings.TrimSpace(textContent) if nodeContent == normalizedTarget { return node } } } // Recursively search children for child := node.FirstChild; child != nil; child = child.NextSibling { if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil { return result } } return nil } // GetAttribute gets an attribute value from an HTML node (exported version) func GetAttribute(node *html.Node, key string) string { return getAttribute(node, key) } // HasEditableContent checks if a node has editable content (exported version) func HasEditableContent(node *html.Node) bool { return hasEditableContent(node) } // FindViableChildren finds viable children for editing (exported version) func FindViableChildren(node *html.Node) []*html.Node { return findViableChildren(node) } // Text extraction utility functions // ExtractTextContent extracts all text content from an HTML node recursively func ExtractTextContent(node *html.Node) string { var text strings.Builder extractTextRecursiveUnified(node, &text) return strings.TrimSpace(text.String()) } // extractTextRecursiveUnified is the internal unified implementation func extractTextRecursiveUnified(node *html.Node, text *strings.Builder) { if node.Type == html.TextNode { text.WriteString(node.Data) } for child := node.FirstChild; child != nil; child = child.NextSibling { // Skip script and style elements if child.Type == html.ElementNode && (child.Data == "script" || child.Data == "style") { continue } extractTextRecursiveUnified(child, text) } }