Files
insertr/internal/engine/utils.go
Joakim b75eda2a87 feat: complete HTML-first architecture implementation (Phase 1 & 2)
Major architectural simplification removing content type complexity:

Database Schema:
- Remove 'type' field from content and content_versions tables
- Simplify to pure HTML storage with html_content + original_template
- Regenerate all sqlc models for SQLite and PostgreSQL

API Simplification:
- Remove content type routing and validation
- Eliminate type-specific handlers (text/markdown/structured)
- Unified HTML-first approach for all content operations
- Simplify CreateContent and UpdateContent to HTML-only

Backend Enhancements:
- Update enhancer to only generate data-content-id (no data-content-type)
- Improve container expansion utilities with comprehensive block/inline rules
- Add Phase 3 preparation with boundary-respecting traversal logic
- Strengthen element classification for viable children detection

Documentation:
- Update TODO.md to reflect Phase 1-3 completion status
- Add WORKING_ON.md documenting the architectural transformation
- Mark container expansion and HTML-first architecture as complete

This completes the transition to a unified HTML-first content management system
with automatic style detection and element-based behavior, eliminating the
complex multi-type system in favor of semantic HTML-driven editing.
2025-09-21 19:23:54 +02:00

425 lines
11 KiB
Go

package engine
import (
"strings"
"golang.org/x/net/html"
)
// GetClasses extracts CSS classes from an HTML node
func GetClasses(node *html.Node) []string {
classAttr := getAttribute(node, "class")
if classAttr == "" {
return []string{}
}
classes := strings.Fields(classAttr)
return classes
}
// ContainsClass checks if a class list contains a specific class
func ContainsClass(classes []string, target string) bool {
for _, class := range classes {
if class == target {
return true
}
}
return false
}
// getAttribute gets an attribute value from an HTML node
func getAttribute(node *html.Node, key string) string {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val
}
}
return ""
}
// extractTextContent gets the text content from an HTML node
func extractTextContent(node *html.Node) string {
var text strings.Builder
extractTextRecursive(node, &text)
return strings.TrimSpace(text.String())
}
// extractTextRecursive recursively extracts text from node and children
func extractTextRecursive(node *html.Node, text *strings.Builder) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Skip script and style elements
if child.Type == html.ElementNode &&
(child.Data == "script" || child.Data == "style") {
continue
}
extractTextRecursive(child, text)
}
}
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
// DEPRECATED: Use hasEditableContent for more sophisticated detection
func hasOnlyTextContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.ElementNode:
// Found a nested HTML element - not text-only
return false
case html.TextNode:
// Text nodes are fine, continue checking
continue
default:
// Comments, etc. - continue checking
continue
}
}
return true
}
// Inline formatting elements that are safe for editing
var inlineFormattingTags = map[string]bool{
"strong": true,
"b": true,
"em": true,
"i": true,
"span": true,
"code": true,
"small": true,
"sub": true,
"sup": true,
"a": true, // Links within content are fine
}
// Elements that should NOT be nested within editable content
var blockingElements = map[string]bool{
"button": true, // Buttons shouldn't be nested in paragraphs
"input": true,
"select": true,
"textarea": true,
"img": true,
"video": true,
"audio": true,
"canvas": true,
"svg": true,
"iframe": true,
"object": true,
"embed": true,
"div": true, // Nested divs usually indicate complex structure
"section": true, // Block-level semantic elements
"article": true,
"header": true,
"footer": true,
"nav": true,
"aside": true,
"main": true,
"form": true,
"table": true,
"ul": true,
"ol": true,
"dl": true,
}
// hasEditableContent checks if a node contains content that can be safely edited
// This includes text and safe inline formatting elements
func hasEditableContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
return hasOnlyTextAndSafeFormatting(node)
}
// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.TextNode:
continue // Text is always safe
case html.ElementNode:
// Check if it's a blocking element
if blockingElements[child.Data] {
return false
}
// Allow safe inline formatting
if inlineFormattingTags[child.Data] {
// Recursively validate the formatting element
if !hasOnlyTextAndSafeFormatting(child) {
return false
}
continue
}
// Unknown/unsafe element
return false
default:
continue // Comments, whitespace, etc.
}
}
return true
}
// isContainer checks if a tag is typically used as a container element
func isContainer(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
containerTags := map[string]bool{
"div": true,
"section": true,
"article": true,
"header": true,
"footer": true,
"main": true,
"aside": true,
"nav": true,
"ul": true, // Phase 3: Lists are containers
"ol": true,
}
return containerTags[node.Data]
}
// findViableChildren finds all descendant elements that should get .insertr class
// Phase 3: Recursive traversal with block/inline classification and boundary respect
func findViableChildren(node *html.Node) []*html.Node {
var viable []*html.Node
traverseForViableElements(node, &viable)
return viable
}
// traverseForViableElements recursively traverses all descendants, stopping at .insertr boundaries
func traverseForViableElements(node *html.Node, viable *[]*html.Node) {
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Only consider element nodes
if child.Type != html.ElementNode {
continue
}
// BOUNDARY: Stop if element already has .insertr class
if hasInsertrClass(child) {
continue
}
// Skip deferred complex elements (tables, forms)
if isDeferredElement(child) {
continue
}
// Determine if this element should get .insertr
if shouldGetInsertrClass(child) {
*viable = append(*viable, child)
// Don't traverse children - they're handled by this element's expansion
continue
}
// Continue traversing if this is just a container
traverseForViableElements(child, viable)
}
}
// Phase 3: Block vs Inline element classification
func isBlockElement(node *html.Node) bool {
blockTags := map[string]bool{
// Content blocks
"h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
"p": true, "div": true, "article": true, "section": true, "nav": true,
"header": true, "footer": true, "main": true, "aside": true,
// Lists
"ul": true, "ol": true, "li": true,
// Interactive (when at block level)
"button": true, "a": true, "img": true, "video": true, "audio": true,
}
return blockTags[node.Data]
}
// isInlineElement checks if element is inline formatting (never gets .insertr)
func isInlineElement(node *html.Node) bool {
inlineTags := map[string]bool{
"strong": true, "b": true, "em": true, "i": true, "span": true,
"code": true, "small": true, "sub": true, "sup": true, "br": true,
"mark": true, "kbd": true,
}
return inlineTags[node.Data]
}
// isContextSensitive checks if element can be block or inline (a, button)
func isContextSensitive(node *html.Node) bool {
contextTags := map[string]bool{
"a": true,
"button": true,
}
return contextTags[node.Data]
}
// isInBlockContext determines if context-sensitive element should be treated as block
func isInBlockContext(node *html.Node) bool {
parent := node.Parent
if parent == nil || parent.Type != html.ElementNode {
return true
}
// If parent is a content element, this is inline formatting
contentElements := map[string]bool{
"p": true, "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true,
"li": true, "td": true, "th": true,
}
return !contentElements[parent.Data]
}
// shouldGetInsertrClass determines if element should receive .insertr class
func shouldGetInsertrClass(node *html.Node) bool {
// Always block elements get .insertr
if isBlockElement(node) && !isContextSensitive(node) {
return true
}
// Context-sensitive elements depend on parent context
if isContextSensitive(node) {
return isInBlockContext(node)
}
// Inline elements never get .insertr
if isInlineElement(node) {
return false
}
// Self-closing elements - only img gets .insertr when block-level
if isSelfClosing(node) {
return node.Data == "img" && isInBlockContext(node)
}
return false
}
// isDeferredElement checks for complex elements that need separate planning
func isDeferredElement(node *html.Node) bool {
deferredTags := map[string]bool{
"table": true, "tr": true, "td": true, "th": true,
"thead": true, "tbody": true, "tfoot": true,
"form": true, "input": true, "textarea": true, "select": true, "option": true,
}
return deferredTags[node.Data]
}
// hasInsertrClass checks if node has class="insertr"
func hasInsertrClass(node *html.Node) bool {
classes := GetClasses(node)
for _, class := range classes {
if class == "insertr" {
return true
}
}
return false
}
// findViableChildrenLegacy uses the old text-only logic for backwards compatibility
func findViableChildrenLegacy(node *html.Node) []*html.Node {
var viable []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.TextNode {
if strings.TrimSpace(child.Data) == "" {
continue
}
}
if child.Type != html.ElementNode {
continue
}
if isSelfClosing(child) {
continue
}
if hasOnlyTextContent(child) {
viable = append(viable, child)
}
}
return viable
}
// isSelfClosing checks if an element is typically self-closing
func isSelfClosing(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
selfClosingTags := map[string]bool{
"img": true,
"input": true,
"br": true,
"hr": true,
"meta": true,
"link": true,
"area": true,
"base": true,
"col": true,
"embed": true,
"source": true,
"track": true,
"wbr": true,
}
return selfClosingTags[node.Data]
}
// FindElementInDocument finds an element in HTML document tree using content matching
func FindElementInDocument(doc *html.Node, tag, content string) *html.Node {
return findElementWithContent(doc, tag, content)
}
// findElementWithContent uses content-based matching to find the correct element
func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node {
normalizedTarget := strings.TrimSpace(targetContent)
if node.Type == html.ElementNode && node.Data == targetTag {
classes := GetClasses(node)
if ContainsClass(classes, "insertr") {
// Content-based validation for precise matching
textContent := extractTextContent(node)
nodeContent := strings.TrimSpace(textContent)
if nodeContent == normalizedTarget {
return node
}
}
}
// Recursively search children
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil {
return result
}
}
return nil
}
// GetAttribute gets an attribute value from an HTML node (exported version)
func GetAttribute(node *html.Node, key string) string {
return getAttribute(node, key)
}
// HasEditableContent checks if a node has editable content (exported version)
func HasEditableContent(node *html.Node) bool {
return hasEditableContent(node)
}
// FindViableChildren finds viable children for editing (exported version)
func FindViableChildren(node *html.Node) []*html.Node {
return findViableChildren(node)
}