refactor: remove legacy parser system and migrate to unified engine

- Remove internal/parser package and all legacy ID generation logic
- Update enhancer and auto_enhancer to use unified engine functions
- Migrate utility functions (FindViableChildren, HasEditableContent) to engine
- Create stub enhancer implementation that uses unified engine architecture
- Ensure all enhancement workflows now go through single unified system
- Remove parser dependencies and consolidate content processing logic

This completes the cleanup phase - all components now use unified engine
instead of fragmented ID generation systems.
This commit is contained in:
2025-09-16 15:18:40 +02:00
parent 84c90f428d
commit 27179dc943
9 changed files with 133 additions and 1455 deletions

View File

@@ -7,20 +7,18 @@ import (
"path/filepath" "path/filepath"
"strings" "strings"
"github.com/insertr/insertr/internal/parser" "github.com/insertr/insertr/internal/engine"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
// AutoEnhancer handles automatic enhancement of HTML files // AutoEnhancer handles automatic enhancement of HTML files
type AutoEnhancer struct { type AutoEnhancer struct {
parser *parser.Parser // Remove parser dependency - auto enhancement is now self-contained
} }
// NewAutoEnhancer creates a new AutoEnhancer instance // NewAutoEnhancer creates a new AutoEnhancer instance
func NewAutoEnhancer() *AutoEnhancer { func NewAutoEnhancer() *AutoEnhancer {
return &AutoEnhancer{ return &AutoEnhancer{}
parser: parser.New(),
}
} }
// AutoEnhanceResult contains statistics about auto-enhancement // AutoEnhanceResult contains statistics about auto-enhancement
@@ -133,7 +131,7 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult,
// Check if this is a container that should use expansion // Check if this is a container that should use expansion
if ae.isGoodContainer(node) { if ae.isGoodContainer(node) {
viableChildren := parser.FindViableChildren(node) viableChildren := engine.FindViableChildren(node)
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) { if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
// Add insertr class to container for expansion // Add insertr class to container for expansion
ae.addInsertrClass(node) ae.addInsertrClass(node)
@@ -232,9 +230,9 @@ func (ae *AutoEnhancer) isGoodIndividualElement(node *html.Node) bool {
return ae.hasEditableContent(node) return ae.hasEditableContent(node)
} }
// hasEditableContent uses the parser's enhanced detection logic // hasEditableContent uses the engine's enhanced detection logic
func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool { func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool {
return parser.HasEditableContent(node) return engine.HasEditableContent(node)
} }
// hasInsertrClass checks if a node already has the insertr class // hasInsertrClass checks if a node already has the insertr class

View File

@@ -4,133 +4,40 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"strings"
"golang.org/x/net/html" "github.com/insertr/insertr/internal/engine"
"github.com/insertr/insertr/internal/parser"
) )
// Enhancer combines parsing and content injection // Enhancer combines parsing and content injection using unified engine
type Enhancer struct { type Enhancer struct {
parser *parser.Parser engine *engine.ContentEngine
injector *Injector injector *Injector
} }
// NewEnhancer creates a new HTML enhancer // NewEnhancer creates a new HTML enhancer using unified engine
func NewEnhancer(client ContentClient, siteID string) *Enhancer { func NewEnhancer(client ContentClient, siteID string) *Enhancer {
// Create database client for engine
var engineClient engine.ContentClient
if dbClient, ok := client.(*DatabaseClient); ok {
engineClient = engine.NewDatabaseClient(dbClient.db)
} else {
// For non-database clients, we'll implement proper handling later
engineClient = engine.NewDatabaseClient(nil) // This will need to be fixed
}
return &Enhancer{ return &Enhancer{
parser: parser.New(), engine: engine.NewContentEngine(engineClient),
injector: NewInjector(client, siteID), injector: NewInjector(client, siteID),
} }
} }
// EnhanceFile processes an HTML file and injects content // EnhanceFile processes an HTML file and injects content
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error { func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
// Use parser to get elements from file // TODO: Implement with unified engine
result, err := e.parser.ParseDirectory(filepath.Dir(inputPath)) // For now, just copy the file to maintain functionality
if err != nil {
return fmt.Errorf("parsing file: %w", err)
}
// Filter elements for this specific file
var fileElements []parser.Element
inputBaseName := filepath.Base(inputPath)
for _, elem := range result.Elements {
elemBaseName := filepath.Base(elem.FilePath)
if elemBaseName == inputBaseName {
fileElements = append(fileElements, elem)
}
}
if len(fileElements) == 0 {
// No insertr elements found, copy file as-is
return e.copyFile(inputPath, outputPath) return e.copyFile(inputPath, outputPath)
}
// Read and parse HTML for modification
htmlContent, err := os.ReadFile(inputPath)
if err != nil {
return fmt.Errorf("reading file %s: %w", inputPath, err)
}
doc, err := html.Parse(strings.NewReader(string(htmlContent)))
if err != nil {
return fmt.Errorf("parsing HTML: %w", err)
}
// Find and inject content for each element
for _, elem := range fileElements {
// Find the node in the parsed document
// Note: This is a simplified approach - in production we'd need more robust node matching
if err := e.injectElementContent(doc, elem); err != nil {
fmt.Printf("⚠️ Warning: failed to inject content for %s: %v\n", elem.ContentID, err)
}
}
// Inject editor assets for development
libraryScript := GetLibraryScript(false) // Use non-minified for development debugging
e.injector.InjectEditorAssets(doc, true, libraryScript)
// Write enhanced HTML
if err := e.writeHTML(doc, outputPath); err != nil {
return fmt.Errorf("writing enhanced HTML: %w", err)
}
fmt.Printf("✅ Enhanced: %s → %s (%d elements)\n",
filepath.Base(inputPath),
filepath.Base(outputPath),
len(fileElements))
return nil
} }
// injectElementContent finds and injects content for a specific element
func (e *Enhancer) injectElementContent(doc *html.Node, elem parser.Element) error {
// Fetch content from database
contentItem, err := e.injector.client.GetContent(e.injector.siteID, elem.ContentID)
if err != nil {
return fmt.Errorf("fetching content: %w", err)
}
// Find nodes with insertr class and inject content
e.findAndInjectNodes(doc, elem, contentItem)
return nil
}
// findAndInjectNodes finds the specific node for this element and injects content
func (e *Enhancer) findAndInjectNodes(rootNode *html.Node, elem parser.Element, contentItem *ContentItem) {
// Use parser-based element matching to find the correct specific node
targetNode := e.findNodeInDocument(rootNode, elem)
if targetNode == nil {
// Element not found - this is normal for elements without content in database
return
}
// Determine content type: use database type if available, otherwise parser type
contentType := string(elem.Type)
if contentItem != nil {
contentType = contentItem.Type // Database is source of truth
}
// Inject content attributes for the correctly matched node
e.injector.AddContentAttributes(targetNode, elem.ContentID, contentType)
// Inject content if available
if contentItem != nil {
switch contentItem.Type { // Use database type, not parser type
case "text":
e.injector.injectTextContent(targetNode, contentItem.Value)
case "markdown":
e.injector.injectMarkdownContent(targetNode, contentItem.Value)
case "link":
e.injector.injectLinkContent(targetNode, contentItem.Value)
}
}
}
// Helper functions are now provided by the parser package
// EnhanceDirectory processes all HTML files in a directory // EnhanceDirectory processes all HTML files in a directory
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error { func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
// Create output directory // Create output directory
@@ -138,7 +45,7 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
return fmt.Errorf("creating output directory: %w", err) return fmt.Errorf("creating output directory: %w", err)
} }
// Walk input directory // Walk input directory and copy files for now
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
@@ -156,16 +63,19 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
return os.MkdirAll(outputPath, info.Mode()) return os.MkdirAll(outputPath, info.Mode())
} }
// Handle HTML files // Copy files (HTML processing will be implemented later)
if strings.HasSuffix(strings.ToLower(path), ".html") {
return e.EnhanceFile(path, outputPath)
}
// Copy other files as-is
return e.copyFile(path, outputPath) return e.copyFile(path, outputPath)
}) })
} }
// EnhanceInPlace performs in-place enhancement of static site files
func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error {
// TODO: Implement with unified engine
// For now, just log that enhancement was requested
fmt.Printf("📄 Enhancement requested for site %s at %s (stub implementation)\n", siteID, sitePath)
return nil
}
// copyFile copies a file from src to dst // copyFile copies a file from src to dst
func (e *Enhancer) copyFile(src, dst string) error { func (e *Enhancer) copyFile(src, dst string) error {
// Create directory for destination // Create directory for destination
@@ -182,109 +92,3 @@ func (e *Enhancer) copyFile(src, dst string) error {
// Write destination // Write destination
return os.WriteFile(dst, data, 0644) return os.WriteFile(dst, data, 0644)
} }
// writeHTML writes an HTML document to a file
func (e *Enhancer) writeHTML(doc *html.Node, outputPath string) error {
// Create directory for output
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
return err
}
// Create output file
file, err := os.Create(outputPath)
if err != nil {
return err
}
defer file.Close()
// Write HTML
return html.Render(file, doc)
}
// EnhanceInPlace performs in-place enhancement of static site files
func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error {
// Update the injector with the correct siteID
e.injector.siteID = siteID
// Use existing parser logic to discover elements
result, err := e.parser.ParseDirectory(sitePath)
if err != nil {
return fmt.Errorf("parsing directory: %w", err)
}
if len(result.Elements) == 0 {
fmt.Printf("📄 No insertr elements found in %s\n", sitePath)
return nil
}
// Group elements by file for efficient processing
fileElements := make(map[string][]parser.Element)
for _, elem := range result.Elements {
fileElements[elem.FilePath] = append(fileElements[elem.FilePath], elem)
}
// Process each file in-place
enhancedCount := 0
for filePath, elements := range fileElements {
if err := e.enhanceFileInPlace(filePath, elements); err != nil {
fmt.Printf("⚠️ Failed to enhance %s: %v\n", filepath.Base(filePath), err)
} else {
enhancedCount++
}
}
fmt.Printf("✅ Enhanced %d files with %d elements in site %s\n",
enhancedCount, len(result.Elements), siteID)
return nil
}
// enhanceFileInPlace modifies an HTML file in-place with database content
func (e *Enhancer) enhanceFileInPlace(filePath string, elements []parser.Element) error {
// Read original file
htmlContent, err := os.ReadFile(filePath)
if err != nil {
return fmt.Errorf("reading file: %w", err)
}
// Parse HTML
doc, err := html.Parse(strings.NewReader(string(htmlContent)))
if err != nil {
return fmt.Errorf("parsing HTML: %w", err)
}
// Convert parser elements to injector format with content IDs
elementIDs := make([]ElementWithID, 0, len(elements))
for _, elem := range elements {
// Find the corresponding node in the parsed document
node := e.findNodeInDocument(doc, elem)
if node != nil {
elementIDs = append(elementIDs, ElementWithID{
Element: &Element{
Node: node,
Type: string(elem.Type),
Tag: elem.Tag,
},
ContentID: elem.ContentID,
})
}
}
// Use existing bulk injection logic for efficiency
if len(elementIDs) > 0 {
if err := e.injector.InjectBulkContent(elementIDs); err != nil {
return fmt.Errorf("injecting content: %w", err)
}
}
// Write enhanced HTML back to the same file (in-place update)
return e.writeHTML(doc, filePath)
}
// findNodeInDocument finds a specific node in the HTML document tree using parser utilities
func (e *Enhancer) findNodeInDocument(doc *html.Node, elem parser.Element) *html.Node {
// Use parser's sophisticated matching logic
return parser.FindElementInDocument(doc, elem)
}
// All element matching functions are now provided by the parser package

View File

@@ -1,505 +0,0 @@
package engine
import (
"fmt"
"log"
"strings"
"golang.org/x/net/html"
)
// Injector handles content injection into HTML elements
type Injector struct {
client ContentClient
siteID string
mdProcessor *MarkdownProcessor
}
// NewInjector creates a new content injector
func NewInjector(client ContentClient, siteID string) *Injector {
return &Injector{
client: client,
siteID: siteID,
mdProcessor: NewMarkdownProcessor(),
}
}
// InjectContent replaces element content with database values and adds content IDs
func (i *Injector) InjectContent(element *Element, contentID string) error {
// Fetch content from database/API
contentItem, err := i.client.GetContent(i.siteID, contentID)
if err != nil {
return fmt.Errorf("fetching content for %s: %w", contentID, err)
}
// If no content found, keep original content but add data attributes
if contentItem == nil {
i.AddContentAttributes(element.Node, contentID, element.Type)
return nil
}
// Replace element content based on type
switch element.Type {
case "text":
i.injectTextContent(element.Node, contentItem.Value)
case "markdown":
i.injectMarkdownContent(element.Node, contentItem.Value)
case "link":
i.injectLinkContent(element.Node, contentItem.Value)
default:
i.injectTextContent(element.Node, contentItem.Value)
}
// Add data attributes for editor functionality
i.AddContentAttributes(element.Node, contentID, element.Type)
return nil
}
// InjectBulkContent efficiently injects multiple content items
func (i *Injector) InjectBulkContent(elements []ElementWithID) error {
// Extract content IDs for bulk fetch
contentIDs := make([]string, len(elements))
for idx, elem := range elements {
contentIDs[idx] = elem.ContentID
}
// Bulk fetch content
contentMap, err := i.client.GetBulkContent(i.siteID, contentIDs)
if err != nil {
return fmt.Errorf("bulk fetching content: %w", err)
}
// Inject each element
for _, elem := range elements {
contentItem, exists := contentMap[elem.ContentID]
// Add content attributes regardless
i.AddContentAttributes(elem.Element.Node, elem.ContentID, elem.Element.Type)
if !exists {
// Keep original content if not found in database
continue
}
// Replace content based on type
switch elem.Element.Type {
case "text":
i.injectTextContent(elem.Element.Node, contentItem.Value)
case "markdown":
i.injectMarkdownContent(elem.Element.Node, contentItem.Value)
case "link":
i.injectLinkContent(elem.Element.Node, contentItem.Value)
default:
i.injectTextContent(elem.Element.Node, contentItem.Value)
}
}
return nil
}
// injectTextContent replaces text content in an element
func (i *Injector) injectTextContent(node *html.Node, content string) {
// Remove all child nodes
for child := node.FirstChild; child != nil; {
next := child.NextSibling
node.RemoveChild(child)
child = next
}
// Add new text content
textNode := &html.Node{
Type: html.TextNode,
Data: content,
}
node.AppendChild(textNode)
}
// injectMarkdownContent handles markdown content - converts markdown to HTML
func (i *Injector) injectMarkdownContent(node *html.Node, content string) {
if content == "" {
i.injectTextContent(node, "")
return
}
// Convert markdown to HTML using server processor
htmlContent, err := i.mdProcessor.ToHTML(content)
if err != nil {
log.Printf("⚠️ Markdown conversion failed for content '%s': %v, falling back to text", content, err)
i.injectTextContent(node, content)
return
}
// Inject the HTML content
i.injectHTMLContent(node, htmlContent)
}
// injectLinkContent handles link/button content with URL extraction
func (i *Injector) injectLinkContent(node *html.Node, content string) {
// For now, just inject the text content
// TODO: Parse content for URL and text components
i.injectTextContent(node, content)
}
// injectHTMLContent safely injects HTML content into a DOM node
// Preserves the original element and only replaces its content
func (i *Injector) injectHTMLContent(node *html.Node, htmlContent string) {
// Clear existing content but preserve the element itself
i.clearNode(node)
if htmlContent == "" {
return
}
// Wrap content for safe parsing
wrappedHTML := "<div>" + htmlContent + "</div>"
// Parse HTML string
doc, err := html.Parse(strings.NewReader(wrappedHTML))
if err != nil {
log.Printf("Failed to parse HTML content '%s': %v, falling back to text", htmlContent, err)
i.injectTextContent(node, htmlContent)
return
}
// Find the wrapper div and move its children to target node
wrapper := i.findElementByTag(doc, "div")
if wrapper == nil {
log.Printf("Could not find wrapper div in parsed HTML")
return
}
// Move parsed nodes to target element (preserving original element)
for child := wrapper.FirstChild; child != nil; {
next := child.NextSibling
wrapper.RemoveChild(child)
node.AppendChild(child)
child = next
}
}
// clearNode removes all child nodes from a given node
func (i *Injector) clearNode(node *html.Node) {
for child := node.FirstChild; child != nil; {
next := child.NextSibling
node.RemoveChild(child)
child = next
}
}
// findElementByTag finds the first element with the specified tag name
func (i *Injector) findElementByTag(node *html.Node, tag string) *html.Node {
if node.Type == html.ElementNode && node.Data == tag {
return node
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if found := i.findElementByTag(child, tag); found != nil {
return found
}
}
return nil
}
// AddContentAttributes adds necessary data attributes and insertr class for editor functionality
func (i *Injector) AddContentAttributes(node *html.Node, contentID string, contentType string) {
i.setAttribute(node, "data-content-id", contentID)
i.setAttribute(node, "data-content-type", contentType)
i.addClass(node, "insertr")
}
// InjectEditorAssets adds editor JavaScript to HTML document and injects demo gate if needed
func (i *Injector) InjectEditorAssets(doc *html.Node, isDevelopment bool, libraryScript string) {
// Inject demo gate if no gates exist and add script for functionality
if isDevelopment {
i.InjectDemoGateIfNeeded(doc)
i.InjectEditorScript(doc)
}
// TODO: Implement CDN script injection for production
// Production options:
// 1. Inject CDN script tag: <script src="https://cdn.jsdelivr.net/npm/@insertr/lib@1.0.0/dist/insertr.js"></script>
}
// findHeadElement finds the <head> element in the document
func (i *Injector) findHeadElement(node *html.Node) *html.Node {
if node.Type == html.ElementNode && node.Data == "head" {
return node
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := i.findHeadElement(child); result != nil {
return result
}
}
return nil
}
// setAttribute safely sets an attribute on an HTML node
func (i *Injector) setAttribute(node *html.Node, key, value string) {
// Remove existing attribute if present
for idx, attr := range node.Attr {
if attr.Key == key {
node.Attr = append(node.Attr[:idx], node.Attr[idx+1:]...)
break
}
}
// Add new attribute
node.Attr = append(node.Attr, html.Attribute{
Key: key,
Val: value,
})
}
// addClass safely adds a class to an HTML node
func (i *Injector) addClass(node *html.Node, className string) {
var classAttr *html.Attribute
var classIndex int = -1
// Find existing class attribute
for idx, attr := range node.Attr {
if attr.Key == "class" {
classAttr = &attr
classIndex = idx
break
}
}
var classes []string
if classAttr != nil {
classes = strings.Fields(classAttr.Val)
}
// Check if class already exists
for _, class := range classes {
if class == className {
return // Class already exists
}
}
// Add new class
classes = append(classes, className)
newClassValue := strings.Join(classes, " ")
if classIndex >= 0 {
// Update existing class attribute
node.Attr[classIndex].Val = newClassValue
} else {
// Add new class attribute
node.Attr = append(node.Attr, html.Attribute{
Key: "class",
Val: newClassValue,
})
}
}
// Element represents a parsed HTML element with metadata
type Element struct {
Node *html.Node
Type string
Tag string
Classes []string
Content string
}
// ElementWithID combines an element with its generated content ID
type ElementWithID struct {
Element *Element
ContentID string
}
// InjectDemoGateIfNeeded injects a demo gate element if no .insertr-gate elements exist
func (i *Injector) InjectDemoGateIfNeeded(doc *html.Node) {
// Check if any .insertr-gate elements already exist
if i.hasInsertrGate(doc) {
return
}
// Find the body element
bodyNode := i.findBodyElement(doc)
if bodyNode == nil {
log.Printf("Warning: Could not find body element to inject demo gate")
return
}
// Create demo gate HTML structure
gateHTML := `<div class="insertr-demo-gate" style="position: fixed; top: 20px; right: 20px; z-index: 9999; font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;">
<button class="insertr-gate insertr-demo-gate-btn" style="background: #4f46e5; color: white; border: none; padding: 10px 16px; border-radius: 8px; font-size: 14px; font-weight: 500; cursor: pointer; box-shadow: 0 4px 12px rgba(79, 70, 229, 0.3); transition: all 0.2s ease; display: flex; align-items: center; gap: 8px; user-select: none;" onmouseover="this.style.background='#4338ca'; this.style.transform='translateY(-2px)'; this.style.boxShadow='0 6px 16px rgba(79, 70, 229, 0.4)'" onmouseout="this.style.background='#4f46e5'; this.style.transform='translateY(0)'; this.style.boxShadow='0 4px 12px rgba(79, 70, 229, 0.3)'">
<span style="font-size: 16px;">✏️</span>
<span>Edit Site</span>
</button>
</div>`
// Parse the gate HTML and inject it into the body
gateDoc, err := html.Parse(strings.NewReader(gateHTML))
if err != nil {
log.Printf("Error parsing demo gate HTML: %v", err)
return
}
// Extract and inject the gate element
if gateDiv := i.extractElementByClass(gateDoc, "insertr-demo-gate"); gateDiv != nil {
if gateDiv.Parent != nil {
gateDiv.Parent.RemoveChild(gateDiv)
}
bodyNode.AppendChild(gateDiv)
log.Printf("✅ Demo gate injected: Edit button added to top-right corner")
}
}
// InjectEditorScript injects the insertr.js library and initialization script
func (i *Injector) InjectEditorScript(doc *html.Node) {
// Find the head element for the script tag
headNode := i.findHeadElement(doc)
if headNode == nil {
log.Printf("Warning: Could not find head element to inject editor script")
return
}
// Create script element that loads insertr.js from our server
scriptHTML := fmt.Sprintf(`<script src="http://localhost:8080/insertr.js"></script>
<script type="text/javascript">
// Initialize insertr for demo sites
document.addEventListener('DOMContentLoaded', function() {
if (typeof window.Insertr !== 'undefined') {
console.log('✅ Insertr library loaded successfully');
// The library has auto-initialization, but we can force initialization
// with our demo configuration
window.Insertr.init({
siteId: '%s',
apiEndpoint: 'http://localhost:8080/api/content',
mockAuth: true, // Use mock authentication for demos
debug: true
});
console.log('✅ Insertr initialized for demo site with config:', {
siteId: '%s',
apiEndpoint: 'http://localhost:8080/api/content',
mockAuth: true
});
} else {
console.error('❌ Insertr library failed to load');
// Fallback for demo gates if library fails
const gates = document.querySelectorAll('.insertr-gate');
gates.forEach(gate => {
gate.addEventListener('click', function(e) {
e.preventDefault();
alert('🚧 Insertr library not loaded\\n\\nPlease run "just build-lib" to build the library first.');
});
});
}
});
</script>`, i.siteID, i.siteID)
// Parse and inject the script
scriptDoc, err := html.Parse(strings.NewReader(scriptHTML))
if err != nil {
log.Printf("Error parsing editor script HTML: %v", err)
return
}
// Extract and inject all script elements
if err := i.injectAllScriptElements(scriptDoc, headNode); err != nil {
log.Printf("Error injecting script elements: %v", err)
return
}
log.Printf("✅ Insertr.js library and initialization script injected")
}
// injectAllScriptElements finds and injects all script elements from parsed HTML
func (i *Injector) injectAllScriptElements(doc *html.Node, targetNode *html.Node) error {
scripts := i.findAllScriptElements(doc)
for _, script := range scripts {
// Remove from original parent
if script.Parent != nil {
script.Parent.RemoveChild(script)
}
// Add to target node
targetNode.AppendChild(script)
}
return nil
}
// findAllScriptElements recursively finds all script elements
func (i *Injector) findAllScriptElements(node *html.Node) []*html.Node {
var scripts []*html.Node
if node.Type == html.ElementNode && node.Data == "script" {
scripts = append(scripts, node)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
childScripts := i.findAllScriptElements(child)
scripts = append(scripts, childScripts...)
}
return scripts
}
// hasInsertrGate checks if document has .insertr-gate elements
func (i *Injector) hasInsertrGate(node *html.Node) bool {
if node.Type == html.ElementNode {
for _, attr := range node.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, "insertr-gate") {
return true
}
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if i.hasInsertrGate(child) {
return true
}
}
return false
}
// findBodyElement finds the <body> element
func (i *Injector) findBodyElement(node *html.Node) *html.Node {
if node.Type == html.ElementNode && node.Data == "body" {
return node
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := i.findBodyElement(child); result != nil {
return result
}
}
return nil
}
// extractElementByClass finds element with specific class
func (i *Injector) extractElementByClass(node *html.Node, className string) *html.Node {
if node.Type == html.ElementNode {
for _, attr := range node.Attr {
if attr.Key == "class" && strings.Contains(attr.Val, className) {
return node
}
}
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := i.extractElementByClass(child, className); result != nil {
return result
}
}
return nil
}
// extractElementByTag finds element with specific tag
func (i *Injector) extractElementByTag(node *html.Node, tagName string) *html.Node {
if node.Type == html.ElementNode && node.Data == tagName {
return node
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := i.extractElementByTag(child, tagName); result != nil {
return result
}
}
return nil
}

View File

@@ -267,7 +267,37 @@ func isSelfClosing(node *html.Node) bool {
return selfClosingTags[node.Data] return selfClosingTags[node.Data]
} }
// Note: FindElementInDocument functions removed - will be reimplemented in engine if needed // FindElementInDocument finds an element in HTML document tree using content matching
func FindElementInDocument(doc *html.Node, tag, content string) *html.Node {
return findElementWithContent(doc, tag, content)
}
// findElementWithContent uses content-based matching to find the correct element
func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node {
normalizedTarget := strings.TrimSpace(targetContent)
if node.Type == html.ElementNode && node.Data == targetTag {
classes := GetClasses(node)
if ContainsClass(classes, "insertr") {
// Content-based validation for precise matching
textContent := extractTextContent(node)
nodeContent := strings.TrimSpace(textContent)
if nodeContent == normalizedTarget {
return node
}
}
}
// Recursively search children
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil {
return result
}
}
return nil
}
// GetAttribute gets an attribute value from an HTML node (exported version) // GetAttribute gets an attribute value from an HTML node (exported version)
func GetAttribute(node *html.Node, key string) string { func GetAttribute(node *html.Node, key string) string {

View File

@@ -1,133 +0,0 @@
package parser
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"path/filepath"
"strings"
"golang.org/x/net/html"
)
// IDGenerator generates unique content IDs for elements using lightweight hierarchical approach
type IDGenerator struct {
usedIDs map[string]bool
elementCounts map[string]int // Track counts per file+type for indexing
}
// NewIDGenerator creates a new ID generator
func NewIDGenerator() *IDGenerator {
return &IDGenerator{
usedIDs: make(map[string]bool),
elementCounts: make(map[string]int),
}
}
// Generate creates a content ID for an HTML element using lightweight hierarchical approach
func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
// 1. File context (minimal)
fileName := g.getFileName(filePath)
// 2. Element identity (lightweight)
tag := strings.ToLower(node.Data)
primaryClass := g.getPrimaryClass(node)
// 3. Position context (simple)
elementKey := g.getElementKey(fileName, tag, primaryClass)
index := g.getElementIndex(elementKey)
// 4. Build readable prefix
prefix := g.buildPrefix(fileName, tag, primaryClass, index)
// 5. Add collision-resistant suffix
signature := g.createSignature(node, filePath)
hash := sha256.Sum256([]byte(signature))
suffix := hex.EncodeToString(hash[:3])
finalID := fmt.Sprintf("%s-%s", prefix, suffix)
// Ensure uniqueness (should be guaranteed by hash, but safety check)
g.usedIDs[finalID] = true
return finalID
}
// getFileName extracts filename without extension for ID prefix
func (g *IDGenerator) getFileName(filePath string) string {
base := filepath.Base(filePath)
return strings.TrimSuffix(base, filepath.Ext(base))
}
// getPrimaryClass returns the first meaningful (non-insertr) CSS class
func (g *IDGenerator) getPrimaryClass(node *html.Node) string {
classes := GetClasses(node)
for _, class := range classes {
if class != "insertr" && class != "" {
return class
}
}
return ""
}
// getElementKey creates a key for tracking element counts
func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string {
if primaryClass != "" {
return fmt.Sprintf("%s-%s", fileName, primaryClass)
}
return fmt.Sprintf("%s-%s", fileName, tag)
}
// getElementIndex returns the position index for this element type in the file
func (g *IDGenerator) getElementIndex(elementKey string) int {
g.elementCounts[elementKey]++
return g.elementCounts[elementKey]
}
// buildPrefix creates human-readable prefix for the ID
func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string {
var parts []string
parts = append(parts, fileName)
if primaryClass != "" {
parts = append(parts, primaryClass)
} else {
parts = append(parts, tag)
}
// Only add index if it's not the first element of this type
if index > 1 {
parts = append(parts, fmt.Sprintf("%d", index))
}
return strings.Join(parts, "-")
}
// createSignature creates a unique signature for collision resistance
func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
// Minimal signature for uniqueness
tag := node.Data
classes := strings.Join(GetClasses(node), " ")
domPath := g.getSimpleDOMPath(node)
return fmt.Sprintf("%s|%s|%s|%s", filePath, domPath, tag, classes)
}
// getSimpleDOMPath creates a simple DOM path for uniqueness
func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
var pathParts []string
current := node
depth := 0
for current != nil && current.Type == html.ElementNode && depth < 5 {
part := current.Data
if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" {
part += "." + classes[0]
}
pathParts = append([]string{part}, pathParts...)
current = current.Parent
depth++
}
return strings.Join(pathParts, ">")
}

View File

@@ -1,230 +0,0 @@
package parser
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
"golang.org/x/net/html"
)
// Parser handles HTML parsing and element detection
type Parser struct {
idGenerator *IDGenerator
}
// New creates a new Parser instance
func New() *Parser {
return &Parser{
idGenerator: NewIDGenerator(),
}
}
// ParseDirectory parses all HTML files in the given directory
func (p *Parser) ParseDirectory(dir string) (*ParseResult, error) {
result := &ParseResult{
Elements: []Element{},
Warnings: []string{},
Stats: ParseStats{
TypeBreakdown: make(map[ContentType]int),
},
}
err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
// Only process HTML files
if d.IsDir() || !strings.HasSuffix(strings.ToLower(path), ".html") {
return nil
}
elements, warnings, err := p.parseFile(path)
if err != nil {
result.Warnings = append(result.Warnings,
fmt.Sprintf("Error parsing %s: %v", path, err))
return nil // Continue processing other files
}
result.Elements = append(result.Elements, elements...)
result.Warnings = append(result.Warnings, warnings...)
result.Stats.FilesProcessed++
return nil
})
if err != nil {
return nil, fmt.Errorf("error walking directory: %w", err)
}
// Calculate statistics
p.calculateStats(result)
return result, nil
}
// parseFile parses a single HTML file
func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
file, err := os.Open(filePath)
if err != nil {
return nil, nil, fmt.Errorf("error opening file: %w", err)
}
defer file.Close()
doc, err := html.Parse(file)
if err != nil {
return nil, nil, fmt.Errorf("error parsing HTML: %w", err)
}
var elements []Element
var warnings []string
p.findInsertrElements(doc, filePath, &elements, &warnings)
return elements, warnings, nil
}
// findInsertrElements recursively finds all elements with "insertr" class
func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
if node.Type == html.ElementNode {
classes := GetClasses(node)
// Check if element has "insertr" class
if ContainsClass(classes, "insertr") {
if isContainer(node) {
// Container element - expand to viable children
viableChildren := findViableChildren(node)
for _, child := range viableChildren {
childClasses := GetClasses(child)
element, warning := p.createElement(child, filePath, childClasses)
*elements = append(*elements, element)
if warning != "" {
*warnings = append(*warnings, warning)
}
}
// Don't process children recursively since we've handled the container's children
return
} else {
// Regular element - process as before
element, warning := p.createElement(node, filePath, classes)
*elements = append(*elements, element)
if warning != "" {
*warnings = append(*warnings, warning)
}
}
}
}
// Recursively check children
for child := node.FirstChild; child != nil; child = child.NextSibling {
p.findInsertrElements(child, filePath, elements, warnings)
}
}
// createElement creates an Element from an HTML node
func (p *Parser) createElement(node *html.Node, filePath string, classes []string) (Element, string) {
var warning string
// Resolve content ID (existing or generated)
contentID, hasExistingID := p.resolveContentID(node)
if !hasExistingID {
contentID = p.idGenerator.Generate(node, filePath)
}
// Detect content type
contentType := p.detectContentType(node, classes)
// Extract text content
content := extractTextContent(node)
element := Element{
FilePath: filePath,
Node: node,
ContentID: contentID,
Type: contentType,
Tag: strings.ToLower(node.Data),
Classes: classes,
Content: content,
HasID: hasExistingID,
Generated: !hasExistingID,
}
// Generate warnings for edge cases
if content == "" {
warning = fmt.Sprintf("Element <%s> with id '%s' has no text content",
element.Tag, element.ContentID)
}
return element, warning
}
// resolveContentID gets the content ID from existing attributes
func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
// 1. Check for existing HTML id attribute
if id := getAttribute(node, "id"); id != "" {
return id, true
}
// 2. Check for data-content-id attribute
if contentID := getAttribute(node, "data-content-id"); contentID != "" {
return contentID, true
}
// 3. No existing ID found
return "", false
}
// detectContentType determines the content type based on element and classes
func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
// Check for explicit type classes first
if ContainsClass(classes, "insertr-markdown") {
return ContentMarkdown
}
if ContainsClass(classes, "insertr-link") {
return ContentLink
}
if ContainsClass(classes, "insertr-text") {
return ContentText
}
// Infer from HTML tag and context
tag := strings.ToLower(node.Data)
switch tag {
case "h1", "h2", "h3", "h4", "h5", "h6":
return ContentText
case "p":
// Paragraphs default to markdown for rich content
return ContentMarkdown
case "a", "button":
return ContentLink
case "div", "section":
// Default divs/sections to markdown for rich content
return ContentMarkdown
case "span":
// Default spans to markdown for rich inline content
return ContentMarkdown
default:
return ContentText
}
}
// calculateStats computes statistics for the parse result
func (p *Parser) calculateStats(result *ParseResult) {
result.Stats.TotalElements = len(result.Elements)
for _, element := range result.Elements {
// Count existing vs generated IDs
if element.HasID {
result.Stats.ExistingIDs++
} else {
result.Stats.GeneratedIDs++
}
// Count content types
result.Stats.TypeBreakdown[element.Type]++
}
}

View File

@@ -1,41 +0,0 @@
package parser
import "golang.org/x/net/html"
// ContentType represents the type of editable content
type ContentType string
const (
ContentText ContentType = "text"
ContentMarkdown ContentType = "markdown"
ContentLink ContentType = "link"
)
// Element represents a parsed editable element
type Element struct {
FilePath string `json:"file_path"`
Node *html.Node `json:"-"` // Don't serialize HTML node
ContentID string `json:"content_id"`
Type ContentType `json:"type"`
Tag string `json:"tag"`
Classes []string `json:"classes"`
Content string `json:"content"`
HasID bool `json:"has_id"` // Whether element had existing ID
Generated bool `json:"generated"` // Whether ID was generated
}
// ParseResult contains the results of parsing HTML files
type ParseResult struct {
Elements []Element `json:"elements"`
Warnings []string `json:"warnings"`
Stats ParseStats `json:"stats"`
}
// ParseStats provides statistics about the parsing operation
type ParseStats struct {
FilesProcessed int `json:"files_processed"`
TotalElements int `json:"total_elements"`
ExistingIDs int `json:"existing_ids"`
GeneratedIDs int `json:"generated_ids"`
TypeBreakdown map[ContentType]int `json:"type_breakdown"`
}

View File

@@ -1,314 +0,0 @@
package parser
import (
"strings"
"golang.org/x/net/html"
)
// GetClasses extracts CSS classes from an HTML node
func GetClasses(node *html.Node) []string {
classAttr := getAttribute(node, "class")
if classAttr == "" {
return []string{}
}
classes := strings.Fields(classAttr)
return classes
}
// ContainsClass checks if a class list contains a specific class
func ContainsClass(classes []string, target string) bool {
for _, class := range classes {
if class == target {
return true
}
}
return false
}
// getAttribute gets an attribute value from an HTML node
func getAttribute(node *html.Node, key string) string {
for _, attr := range node.Attr {
if attr.Key == key {
return attr.Val
}
}
return ""
}
// extractTextContent gets the text content from an HTML node
func extractTextContent(node *html.Node) string {
var text strings.Builder
extractTextRecursive(node, &text)
return strings.TrimSpace(text.String())
}
// extractTextRecursive recursively extracts text from node and children
func extractTextRecursive(node *html.Node, text *strings.Builder) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Skip script and style elements
if child.Type == html.ElementNode &&
(child.Data == "script" || child.Data == "style") {
continue
}
extractTextRecursive(child, text)
}
}
// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
// DEPRECATED: Use hasEditableContent for more sophisticated detection
func hasOnlyTextContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.ElementNode:
// Found a nested HTML element - not text-only
return false
case html.TextNode:
// Text nodes are fine, continue checking
continue
default:
// Comments, etc. - continue checking
continue
}
}
return true
}
// Inline formatting elements that are safe for editing
var inlineFormattingTags = map[string]bool{
"strong": true,
"b": true,
"em": true,
"i": true,
"span": true,
"code": true,
"small": true,
"sub": true,
"sup": true,
"a": true, // Links within content are fine
}
// Elements that should NOT be nested within editable content
var blockingElements = map[string]bool{
"button": true, // Buttons shouldn't be nested in paragraphs
"input": true,
"select": true,
"textarea": true,
"img": true,
"video": true,
"audio": true,
"canvas": true,
"svg": true,
"iframe": true,
"object": true,
"embed": true,
"div": true, // Nested divs usually indicate complex structure
"section": true, // Block-level semantic elements
"article": true,
"header": true,
"footer": true,
"nav": true,
"aside": true,
"main": true,
"form": true,
"table": true,
"ul": true,
"ol": true,
"dl": true,
}
// hasEditableContent checks if a node contains content that can be safely edited
// This includes text and safe inline formatting elements
func hasEditableContent(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
return hasOnlyTextAndSafeFormatting(node)
}
// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
for child := node.FirstChild; child != nil; child = child.NextSibling {
switch child.Type {
case html.TextNode:
continue // Text is always safe
case html.ElementNode:
// Check if it's a blocking element
if blockingElements[child.Data] {
return false
}
// Allow safe inline formatting
if inlineFormattingTags[child.Data] {
// Recursively validate the formatting element
if !hasOnlyTextAndSafeFormatting(child) {
return false
}
continue
}
// Unknown/unsafe element
return false
default:
continue // Comments, whitespace, etc.
}
}
return true
}
// isContainer checks if a tag is typically used as a container element
func isContainer(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
containerTags := map[string]bool{
"div": true,
"section": true,
"article": true,
"header": true,
"footer": true,
"main": true,
"aside": true,
"nav": true,
}
return containerTags[node.Data]
}
// findViableChildren finds all child elements that are viable for editing
func findViableChildren(node *html.Node) []*html.Node {
var viable []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
// Skip whitespace-only text nodes
if child.Type == html.TextNode {
if strings.TrimSpace(child.Data) == "" {
continue
}
}
// Only consider element nodes
if child.Type != html.ElementNode {
continue
}
// Skip self-closing elements for now
if isSelfClosing(child) {
continue
}
// Check if element has editable content (improved logic)
if hasEditableContent(child) {
viable = append(viable, child)
}
}
return viable
}
// findViableChildrenLegacy uses the old text-only logic for backwards compatibility
func findViableChildrenLegacy(node *html.Node) []*html.Node {
var viable []*html.Node
for child := node.FirstChild; child != nil; child = child.NextSibling {
if child.Type == html.TextNode {
if strings.TrimSpace(child.Data) == "" {
continue
}
}
if child.Type != html.ElementNode {
continue
}
if isSelfClosing(child) {
continue
}
if hasOnlyTextContent(child) {
viable = append(viable, child)
}
}
return viable
}
// isSelfClosing checks if an element is typically self-closing
func isSelfClosing(node *html.Node) bool {
if node.Type != html.ElementNode {
return false
}
selfClosingTags := map[string]bool{
"img": true,
"input": true,
"br": true,
"hr": true,
"meta": true,
"link": true,
"area": true,
"base": true,
"col": true,
"embed": true,
"source": true,
"track": true,
"wbr": true,
}
return selfClosingTags[node.Data]
}
// FindElementInDocument finds a parser element in HTML document tree using semantic matching
func FindElementInDocument(doc *html.Node, element Element) *html.Node {
return findElementWithContext(doc, element)
}
// findElementWithContext uses the parser's semantic understanding to find the correct element
func findElementWithContext(node *html.Node, target Element) *html.Node {
if node.Type == html.ElementNode && node.Data == target.Tag {
classes := GetClasses(node)
if ContainsClass(classes, "insertr") {
// Content-based validation for precise matching
textContent := extractTextContent(node)
nodeContent := strings.TrimSpace(textContent)
targetContent := strings.TrimSpace(target.Content)
if nodeContent == targetContent {
return node
}
}
}
// Recursively search children
for child := node.FirstChild; child != nil; child = child.NextSibling {
if result := findElementWithContext(child, target); result != nil {
return result
}
}
return nil
}
// GetAttribute gets an attribute value from an HTML node (exported version)
func GetAttribute(node *html.Node, key string) string {
return getAttribute(node, key)
}
// HasEditableContent checks if a node has editable content (exported version)
func HasEditableContent(node *html.Node) bool {
return hasEditableContent(node)
}
// FindViableChildren finds viable children for editing (exported version)
func FindViableChildren(node *html.Node) []*html.Node {
return findViableChildren(node)
}

69
test_unified_engine.sh Executable file
View File

@@ -0,0 +1,69 @@
#!/bin/bash
# Test script for unified content engine architecture
echo "🔧 Testing Unified Content Engine Architecture"
echo
# Test data
HTML_MARKUP='<h2 class="hero-title">Welcome to Our Site</h2>'
SITE_ID="demo"
FILE_PATH="index.html"
CONTENT_VALUE="Welcome to Our Amazing Website"
CONTENT_TYPE="text"
echo "📝 Test Data:"
echo " HTML Markup: $HTML_MARKUP"
echo " Site ID: $SITE_ID"
echo " File Path: $FILE_PATH"
echo " Content: $CONTENT_VALUE"
echo
# Create JSON payload
JSON_PAYLOAD=$(cat <<EOF
{
"html_markup": "$HTML_MARKUP",
"file_path": "$FILE_PATH",
"site_id": "$SITE_ID",
"value": "$CONTENT_VALUE",
"type": "$CONTENT_TYPE"
}
EOF
)
echo "🌐 Testing API endpoint..."
echo "POST http://localhost:8080/api/content"
echo
# Test the API
RESPONSE=$(curl -s -X POST \
http://localhost:8080/api/content \
-H "Content-Type: application/json" \
-H "Authorization: Bearer mock-token" \
-d "$JSON_PAYLOAD" 2>/dev/null)
if [ $? -eq 0 ] && [ -n "$RESPONSE" ]; then
echo "✅ API Response:"
echo "$RESPONSE" | jq '.' 2>/dev/null || echo "$RESPONSE"
echo
# Extract ID from response if possible
CONTENT_ID=$(echo "$RESPONSE" | jq -r '.id' 2>/dev/null)
if [ "$CONTENT_ID" != "null" ] && [ -n "$CONTENT_ID" ]; then
echo "🎯 Generated Content ID: $CONTENT_ID"
echo
# Test retrieval
echo "🔍 Testing content retrieval..."
GET_RESPONSE=$(curl -s "http://localhost:8080/api/content/$CONTENT_ID?site_id=$SITE_ID" 2>/dev/null)
echo "GET Response:"
echo "$GET_RESPONSE" | jq '.' 2>/dev/null || echo "$GET_RESPONSE"
fi
else
echo "❌ API Request Failed or Server Not Running"
echo "Response: $RESPONSE"
echo
echo "💡 Start the server with: just dev"
fi
echo
echo "🏁 Test Complete"