package parser import ( "strings" "golang.org/x/net/html" ) // GetClasses extracts CSS classes from an HTML node func GetClasses(node *html.Node) []string { classAttr := getAttribute(node, "class") if classAttr == "" { return []string{} } classes := strings.Fields(classAttr) return classes } // ContainsClass checks if a class list contains a specific class func ContainsClass(classes []string, target string) bool { for _, class := range classes { if class == target { return true } } return false } // getAttribute gets an attribute value from an HTML node func getAttribute(node *html.Node, key string) string { for _, attr := range node.Attr { if attr.Key == key { return attr.Val } } return "" } // extractTextContent gets the text content from an HTML node func extractTextContent(node *html.Node) string { var text strings.Builder extractTextRecursive(node, &text) return strings.TrimSpace(text.String()) } // extractTextRecursive recursively extracts text from node and children func extractTextRecursive(node *html.Node, text *strings.Builder) { if node.Type == html.TextNode { text.WriteString(node.Data) } for child := node.FirstChild; child != nil; child = child.NextSibling { // Skip script and style elements if child.Type == html.ElementNode && (child.Data == "script" || child.Data == "style") { continue } extractTextRecursive(child, text) } } // hasOnlyTextContent checks if a node contains only text content (no nested HTML elements) func hasOnlyTextContent(node *html.Node) bool { if node.Type != html.ElementNode { return false } for child := node.FirstChild; child != nil; child = child.NextSibling { switch child.Type { case html.ElementNode: // Found a nested HTML element - not text-only return false case html.TextNode: // Text nodes are fine, continue checking continue default: // Comments, etc. - continue checking continue } } return true } // isContainer checks if a tag is typically used as a container element func isContainer(node *html.Node) bool { if node.Type != html.ElementNode { return false } containerTags := map[string]bool{ "div": true, "section": true, "article": true, "header": true, "footer": true, "main": true, "aside": true, "nav": true, } return containerTags[node.Data] } // findViableChildren finds all child elements that are viable for editing func findViableChildren(node *html.Node) []*html.Node { var viable []*html.Node for child := node.FirstChild; child != nil; child = child.NextSibling { // Skip whitespace-only text nodes if child.Type == html.TextNode { if strings.TrimSpace(child.Data) == "" { continue } } // Only consider element nodes if child.Type != html.ElementNode { continue } // Skip self-closing elements for now if isSelfClosing(child) { continue } // Check if element has only text content if hasOnlyTextContent(child) { viable = append(viable, child) } } return viable } // isSelfClosing checks if an element is typically self-closing func isSelfClosing(node *html.Node) bool { if node.Type != html.ElementNode { return false } selfClosingTags := map[string]bool{ "img": true, "input": true, "br": true, "hr": true, "meta": true, "link": true, "area": true, "base": true, "col": true, "embed": true, "source": true, "track": true, "wbr": true, } return selfClosingTags[node.Data] } // FindElementInDocument finds a parser element in HTML document tree using semantic matching func FindElementInDocument(doc *html.Node, element Element) *html.Node { return findElementWithContext(doc, element) } // findElementWithContext uses the parser's semantic understanding to find the correct element func findElementWithContext(node *html.Node, target Element) *html.Node { if node.Type == html.ElementNode && node.Data == target.Tag { classes := GetClasses(node) if ContainsClass(classes, "insertr") { // Content-based validation for precise matching textContent := extractTextContent(node) nodeContent := strings.TrimSpace(textContent) targetContent := strings.TrimSpace(target.Content) if nodeContent == targetContent { return node } } } // Recursively search children for child := node.FirstChild; child != nil; child = child.NextSibling { if result := findElementWithContext(child, target); result != nil { return result } } return nil } // GetAttribute gets an attribute value from an HTML node (exported version) func GetAttribute(node *html.Node, key string) string { return getAttribute(node, key) }