- Move addClass and setAttribute from ContentEngine/Injector to utils.go - Remove duplicate hasInsertrClass implementation - Add RemoveClass and HasClass utilities for completeness - Eliminates 74+ lines of exact duplication across files
453 lines
11 KiB
Go
453 lines
11 KiB
Go
package engine
|
|
|
|
import (
|
|
"strings"
|
|
|
|
"golang.org/x/net/html"
|
|
"slices"
|
|
)
|
|
|
|
func GetClasses(node *html.Node) []string {
|
|
classAttr := GetAttribute(node, "class")
|
|
if classAttr == "" {
|
|
return []string{}
|
|
}
|
|
|
|
classes := strings.Fields(classAttr)
|
|
return classes
|
|
}
|
|
|
|
// ContainsClass checks if a class list contains a specific class
|
|
func ContainsClass(classes []string, target string) bool {
|
|
return slices.Contains(classes, target)
|
|
}
|
|
|
|
// GetAttribute gets an attribute value from an HTML node
|
|
func GetAttribute(node *html.Node, key string) string {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
return attr.Val
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// SetAttribute sets an attribute on an HTML node
|
|
func SetAttribute(node *html.Node, key, value string) {
|
|
// Check for existing attribute and update in place
|
|
for i, attr := range node.Attr {
|
|
if attr.Key == key {
|
|
node.Attr[i].Val = value
|
|
return
|
|
}
|
|
}
|
|
// Add new attribute if not found
|
|
node.Attr = append(node.Attr, html.Attribute{
|
|
Key: key,
|
|
Val: value,
|
|
})
|
|
}
|
|
|
|
// AddClass safely adds a class to an HTML node
|
|
func AddClass(node *html.Node, className string) {
|
|
var classAttr *html.Attribute
|
|
var classIndex int = -1
|
|
|
|
// Find existing class attribute
|
|
for idx, attr := range node.Attr {
|
|
if attr.Key == "class" {
|
|
classAttr = &attr
|
|
classIndex = idx
|
|
break
|
|
}
|
|
}
|
|
|
|
var classes []string
|
|
if classAttr != nil {
|
|
classes = strings.Fields(classAttr.Val)
|
|
}
|
|
|
|
// Check if class already exists
|
|
if slices.Contains(classes, className) {
|
|
return
|
|
}
|
|
|
|
// Add new class
|
|
classes = append(classes, className)
|
|
newClassValue := strings.Join(classes, " ")
|
|
|
|
if classIndex >= 0 {
|
|
// Update existing class attribute
|
|
node.Attr[classIndex].Val = newClassValue
|
|
} else {
|
|
// Add new class attribute
|
|
node.Attr = append(node.Attr, html.Attribute{
|
|
Key: "class",
|
|
Val: newClassValue,
|
|
})
|
|
}
|
|
}
|
|
|
|
// RemoveClass safely removes a class from an HTML node
|
|
func RemoveClass(node *html.Node, className string) {
|
|
var classIndex int = -1
|
|
|
|
// Find existing class attribute
|
|
for idx, attr := range node.Attr {
|
|
if attr.Key == "class" {
|
|
classIndex = idx
|
|
break
|
|
}
|
|
}
|
|
|
|
if classIndex == -1 {
|
|
return // No class attribute found
|
|
}
|
|
|
|
// Parse existing classes
|
|
classes := strings.Fields(node.Attr[classIndex].Val)
|
|
|
|
// Filter out the target class
|
|
var newClasses []string
|
|
for _, class := range classes {
|
|
if class != className {
|
|
newClasses = append(newClasses, class)
|
|
}
|
|
}
|
|
|
|
// Update or remove class attribute
|
|
if len(newClasses) == 0 {
|
|
// Remove class attribute entirely if no classes remain
|
|
node.Attr = slices.Delete(node.Attr, classIndex, classIndex+1)
|
|
} else {
|
|
// Update class attribute with remaining classes
|
|
node.Attr[classIndex].Val = strings.Join(newClasses, " ")
|
|
}
|
|
}
|
|
|
|
// HasClass checks if a node has a specific class
|
|
func HasClass(node *html.Node, className string) bool {
|
|
for _, attr := range node.Attr {
|
|
if attr.Key == "class" {
|
|
classes := strings.Fields(attr.Val)
|
|
if slices.Contains(classes, className) {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Inline formatting elements that are safe for editing
|
|
var inlineFormattingTags = map[string]bool{
|
|
"strong": true,
|
|
"b": true,
|
|
"em": true,
|
|
"i": true,
|
|
"span": true,
|
|
"code": true,
|
|
"small": true,
|
|
"sub": true,
|
|
"sup": true,
|
|
"a": true, // Links within content are fine
|
|
}
|
|
|
|
// Elements that should NOT be nested within editable content
|
|
var blockingElements = map[string]bool{
|
|
"button": true, // Buttons shouldn't be nested in paragraphs
|
|
"input": true,
|
|
"select": true,
|
|
"textarea": true,
|
|
"img": true,
|
|
"video": true,
|
|
"audio": true,
|
|
"canvas": true,
|
|
"svg": true,
|
|
"iframe": true,
|
|
"object": true,
|
|
"embed": true,
|
|
"div": true, // Nested divs usually indicate complex structure
|
|
"section": true, // Block-level semantic elements
|
|
"article": true,
|
|
"header": true,
|
|
"footer": true,
|
|
"nav": true,
|
|
"aside": true,
|
|
"main": true,
|
|
"form": true,
|
|
"table": true,
|
|
"ul": true,
|
|
"ol": true,
|
|
"dl": true,
|
|
}
|
|
|
|
// HasEditableContent checks if a node contains content that can be safely edited
|
|
// This includes text and safe inline formatting elements
|
|
func HasEditableContent(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
return hasOnlyTextAndSafeFormatting(node)
|
|
}
|
|
|
|
// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
|
|
func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
switch child.Type {
|
|
case html.TextNode:
|
|
continue // Text is always safe
|
|
case html.ElementNode:
|
|
// Check if it's a blocking element
|
|
if blockingElements[child.Data] {
|
|
return false
|
|
}
|
|
// Allow safe inline formatting
|
|
if inlineFormattingTags[child.Data] {
|
|
// Recursively validate the formatting element
|
|
if !hasOnlyTextAndSafeFormatting(child) {
|
|
return false
|
|
}
|
|
continue
|
|
}
|
|
// Unknown/unsafe element
|
|
return false
|
|
default:
|
|
continue // Comments, whitespace, etc.
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
// isContainer checks if a tag is typically used as a container element
|
|
func isContainer(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
containerTags := map[string]bool{
|
|
"div": true,
|
|
"section": true,
|
|
"article": true,
|
|
"header": true,
|
|
"footer": true,
|
|
"main": true,
|
|
"aside": true,
|
|
"nav": true,
|
|
"ul": true,
|
|
"ol": true,
|
|
}
|
|
|
|
return containerTags[node.Data]
|
|
}
|
|
|
|
// FindViableChildren finds all descendant elements that should get .insertr class
|
|
func FindViableChildren(node *html.Node) []*html.Node {
|
|
var viable []*html.Node
|
|
traverseForViableElements(node, &viable)
|
|
return viable
|
|
}
|
|
|
|
// traverseForViableElements recursively traverses all descendants, stopping at .insertr boundaries
|
|
func traverseForViableElements(node *html.Node, viable *[]*html.Node) {
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
// Only consider element nodes
|
|
if child.Type != html.ElementNode {
|
|
continue
|
|
}
|
|
|
|
// BOUNDARY: Stop if element already has .insertr class
|
|
if hasInsertrClass(child) {
|
|
continue
|
|
}
|
|
|
|
// Skip deferred complex elements (tables, forms)
|
|
if isDeferredElement(child) {
|
|
continue
|
|
}
|
|
|
|
// Determine if this element should get .insertr
|
|
if shouldGetInsertrClass(child) {
|
|
*viable = append(*viable, child)
|
|
// Don't traverse children - they're handled by this element's expansion
|
|
continue
|
|
}
|
|
|
|
// Continue traversing if this is just a container
|
|
traverseForViableElements(child, viable)
|
|
}
|
|
}
|
|
|
|
// Phase 3: Block vs Inline element classification
|
|
func isBlockElement(node *html.Node) bool {
|
|
blockTags := map[string]bool{
|
|
// Content blocks
|
|
"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
|
|
"p": true, "div": true, "article": true, "section": true, "nav": true,
|
|
"header": true, "footer": true, "main": true, "aside": true,
|
|
// Lists
|
|
"ul": true, "ol": true, "li": true,
|
|
// Interactive (when at block level)
|
|
"button": true, "a": true, "img": true, "video": true, "audio": true,
|
|
}
|
|
|
|
return blockTags[node.Data]
|
|
}
|
|
|
|
// isInlineElement checks if element is inline formatting (never gets .insertr)
|
|
func isInlineElement(node *html.Node) bool {
|
|
inlineTags := map[string]bool{
|
|
"strong": true, "b": true, "em": true, "i": true, "span": true,
|
|
"code": true, "small": true, "sub": true, "sup": true, "br": true,
|
|
"mark": true, "kbd": true,
|
|
}
|
|
|
|
return inlineTags[node.Data]
|
|
}
|
|
|
|
// isContextSensitive checks if element can be block or inline (a, button)
|
|
func isContextSensitive(node *html.Node) bool {
|
|
contextTags := map[string]bool{
|
|
"a": true,
|
|
"button": true,
|
|
}
|
|
|
|
return contextTags[node.Data]
|
|
}
|
|
|
|
// isInBlockContext determines if context-sensitive element should be treated as block
|
|
func isInBlockContext(node *html.Node) bool {
|
|
parent := node.Parent
|
|
if parent == nil || parent.Type != html.ElementNode {
|
|
return true
|
|
}
|
|
|
|
// If parent is a content element, this is inline formatting
|
|
contentElements := map[string]bool{
|
|
"p": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
|
|
"li": true, "td": true, "th": true,
|
|
}
|
|
|
|
return !contentElements[parent.Data]
|
|
}
|
|
|
|
// shouldGetInsertrClass determines if element should receive .insertr class
|
|
func shouldGetInsertrClass(node *html.Node) bool {
|
|
// Always block elements get .insertr
|
|
if isBlockElement(node) && !isContextSensitive(node) {
|
|
return true
|
|
}
|
|
|
|
// Context-sensitive elements depend on parent context
|
|
if isContextSensitive(node) {
|
|
return isInBlockContext(node)
|
|
}
|
|
|
|
// Inline elements never get .insertr
|
|
if isInlineElement(node) {
|
|
return false
|
|
}
|
|
|
|
// Self-closing elements - only img gets .insertr when block-level
|
|
if isSelfClosing(node) {
|
|
return node.Data == "img" && isInBlockContext(node)
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isDeferredElement checks for complex elements that need separate planning
|
|
func isDeferredElement(node *html.Node) bool {
|
|
deferredTags := map[string]bool{
|
|
"table": true, "tr": true, "td": true, "th": true,
|
|
"thead": true, "tbody": true, "tfoot": true,
|
|
"form": true, "input": true, "textarea": true, "select": true, "option": true,
|
|
}
|
|
|
|
return deferredTags[node.Data]
|
|
}
|
|
|
|
// hasInsertrClass checks if node has class="insertr"
|
|
func hasInsertrClass(node *html.Node) bool {
|
|
classes := GetClasses(node)
|
|
return slices.Contains(classes, "insertr")
|
|
}
|
|
|
|
// isSelfClosing checks if an element is typically self-closing
|
|
func isSelfClosing(node *html.Node) bool {
|
|
if node.Type != html.ElementNode {
|
|
return false
|
|
}
|
|
|
|
selfClosingTags := map[string]bool{
|
|
"img": true,
|
|
"input": true,
|
|
"br": true,
|
|
"hr": true,
|
|
"meta": true,
|
|
"link": true,
|
|
"area": true,
|
|
"base": true,
|
|
"col": true,
|
|
"embed": true,
|
|
"source": true,
|
|
"track": true,
|
|
"wbr": true,
|
|
}
|
|
|
|
return selfClosingTags[node.Data]
|
|
}
|
|
|
|
// FindElementInDocument finds an element in HTML document tree using content matching
|
|
func FindElementInDocument(doc *html.Node, tag, content string) *html.Node {
|
|
return findElementWithContent(doc, tag, content)
|
|
}
|
|
|
|
// findElementWithContent uses content-based matching to find the correct element
|
|
func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node {
|
|
normalizedTarget := strings.TrimSpace(targetContent)
|
|
|
|
if node.Type == html.ElementNode && node.Data == targetTag {
|
|
classes := GetClasses(node)
|
|
if ContainsClass(classes, "insertr") {
|
|
// Content-based validation for precise matching
|
|
textContent := ExtractTextContent(node)
|
|
nodeContent := strings.TrimSpace(textContent)
|
|
|
|
if nodeContent == normalizedTarget {
|
|
return node
|
|
}
|
|
}
|
|
}
|
|
|
|
// Recursively search children
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil {
|
|
return result
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ExtractTextContent extracts all text content from an HTML node recursively
|
|
func ExtractTextContent(node *html.Node) string {
|
|
var text strings.Builder
|
|
extractTextRecursiveUnified(node, &text)
|
|
return strings.TrimSpace(text.String())
|
|
}
|
|
|
|
func extractTextRecursiveUnified(node *html.Node, text *strings.Builder) {
|
|
if node.Type == html.TextNode {
|
|
text.WriteString(node.Data)
|
|
}
|
|
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
|
// Skip script and style elements
|
|
if child.Type == html.ElementNode &&
|
|
(child.Data == "script" || child.Data == "style") {
|
|
continue
|
|
}
|
|
extractTextRecursiveUnified(child, text)
|
|
}
|
|
}
|