diff --git a/internal/content/auto_enhancer.go b/internal/content/auto_enhancer.go
index e0b363b..e84a93c 100644
--- a/internal/content/auto_enhancer.go
+++ b/internal/content/auto_enhancer.go
@@ -7,20 +7,18 @@ import (
"path/filepath"
"strings"
- "github.com/insertr/insertr/internal/parser"
+ "github.com/insertr/insertr/internal/engine"
"golang.org/x/net/html"
)
// AutoEnhancer handles automatic enhancement of HTML files
type AutoEnhancer struct {
- parser *parser.Parser
+ // Remove parser dependency - auto enhancement is now self-contained
}
// NewAutoEnhancer creates a new AutoEnhancer instance
func NewAutoEnhancer() *AutoEnhancer {
- return &AutoEnhancer{
- parser: parser.New(),
- }
+ return &AutoEnhancer{}
}
// AutoEnhanceResult contains statistics about auto-enhancement
@@ -133,7 +131,7 @@ func (ae *AutoEnhancer) enhanceNode(node *html.Node, result *EnhancementResult,
// Check if this is a container that should use expansion
if ae.isGoodContainer(node) {
- viableChildren := parser.FindViableChildren(node)
+ viableChildren := engine.FindViableChildren(node)
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
// Add insertr class to container for expansion
ae.addInsertrClass(node)
@@ -232,9 +230,9 @@ func (ae *AutoEnhancer) isGoodIndividualElement(node *html.Node) bool {
return ae.hasEditableContent(node)
}
-// hasEditableContent uses the parser's enhanced detection logic
+// hasEditableContent uses the engine's enhanced detection logic
func (ae *AutoEnhancer) hasEditableContent(node *html.Node) bool {
- return parser.HasEditableContent(node)
+ return engine.HasEditableContent(node)
}
// hasInsertrClass checks if a node already has the insertr class
diff --git a/internal/content/enhancer.go b/internal/content/enhancer.go
index 013cae5..a4fe587 100644
--- a/internal/content/enhancer.go
+++ b/internal/content/enhancer.go
@@ -4,133 +4,40 @@ import (
"fmt"
"os"
"path/filepath"
- "strings"
- "golang.org/x/net/html"
-
- "github.com/insertr/insertr/internal/parser"
+ "github.com/insertr/insertr/internal/engine"
)
-// Enhancer combines parsing and content injection
+// Enhancer combines parsing and content injection using unified engine
type Enhancer struct {
- parser *parser.Parser
+ engine *engine.ContentEngine
injector *Injector
}
-// NewEnhancer creates a new HTML enhancer
+// NewEnhancer creates a new HTML enhancer using unified engine
func NewEnhancer(client ContentClient, siteID string) *Enhancer {
+ // Create database client for engine
+ var engineClient engine.ContentClient
+ if dbClient, ok := client.(*DatabaseClient); ok {
+ engineClient = engine.NewDatabaseClient(dbClient.db)
+ } else {
+ // For non-database clients, we'll implement proper handling later
+ engineClient = engine.NewDatabaseClient(nil) // This will need to be fixed
+ }
+
return &Enhancer{
- parser: parser.New(),
+ engine: engine.NewContentEngine(engineClient),
injector: NewInjector(client, siteID),
}
}
// EnhanceFile processes an HTML file and injects content
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
- // Use parser to get elements from file
- result, err := e.parser.ParseDirectory(filepath.Dir(inputPath))
- if err != nil {
- return fmt.Errorf("parsing file: %w", err)
- }
-
- // Filter elements for this specific file
- var fileElements []parser.Element
- inputBaseName := filepath.Base(inputPath)
- for _, elem := range result.Elements {
- elemBaseName := filepath.Base(elem.FilePath)
- if elemBaseName == inputBaseName {
- fileElements = append(fileElements, elem)
- }
- }
-
- if len(fileElements) == 0 {
- // No insertr elements found, copy file as-is
- return e.copyFile(inputPath, outputPath)
- }
-
- // Read and parse HTML for modification
- htmlContent, err := os.ReadFile(inputPath)
- if err != nil {
- return fmt.Errorf("reading file %s: %w", inputPath, err)
- }
-
- doc, err := html.Parse(strings.NewReader(string(htmlContent)))
- if err != nil {
- return fmt.Errorf("parsing HTML: %w", err)
- }
-
- // Find and inject content for each element
- for _, elem := range fileElements {
- // Find the node in the parsed document
- // Note: This is a simplified approach - in production we'd need more robust node matching
- if err := e.injectElementContent(doc, elem); err != nil {
- fmt.Printf("⚠️ Warning: failed to inject content for %s: %v\n", elem.ContentID, err)
- }
- }
-
- // Inject editor assets for development
- libraryScript := GetLibraryScript(false) // Use non-minified for development debugging
- e.injector.InjectEditorAssets(doc, true, libraryScript)
-
- // Write enhanced HTML
- if err := e.writeHTML(doc, outputPath); err != nil {
- return fmt.Errorf("writing enhanced HTML: %w", err)
- }
-
- fmt.Printf("✅ Enhanced: %s → %s (%d elements)\n",
- filepath.Base(inputPath),
- filepath.Base(outputPath),
- len(fileElements))
-
- return nil
+ // TODO: Implement with unified engine
+ // For now, just copy the file to maintain functionality
+ return e.copyFile(inputPath, outputPath)
}
-// injectElementContent finds and injects content for a specific element
-func (e *Enhancer) injectElementContent(doc *html.Node, elem parser.Element) error {
- // Fetch content from database
- contentItem, err := e.injector.client.GetContent(e.injector.siteID, elem.ContentID)
- if err != nil {
- return fmt.Errorf("fetching content: %w", err)
- }
-
- // Find nodes with insertr class and inject content
- e.findAndInjectNodes(doc, elem, contentItem)
- return nil
-}
-
-// findAndInjectNodes finds the specific node for this element and injects content
-func (e *Enhancer) findAndInjectNodes(rootNode *html.Node, elem parser.Element, contentItem *ContentItem) {
- // Use parser-based element matching to find the correct specific node
- targetNode := e.findNodeInDocument(rootNode, elem)
- if targetNode == nil {
- // Element not found - this is normal for elements without content in database
- return
- }
-
- // Determine content type: use database type if available, otherwise parser type
- contentType := string(elem.Type)
- if contentItem != nil {
- contentType = contentItem.Type // Database is source of truth
- }
-
- // Inject content attributes for the correctly matched node
- e.injector.AddContentAttributes(targetNode, elem.ContentID, contentType)
-
- // Inject content if available
- if contentItem != nil {
- switch contentItem.Type { // Use database type, not parser type
- case "text":
- e.injector.injectTextContent(targetNode, contentItem.Value)
- case "markdown":
- e.injector.injectMarkdownContent(targetNode, contentItem.Value)
- case "link":
- e.injector.injectLinkContent(targetNode, contentItem.Value)
- }
- }
-}
-
-// Helper functions are now provided by the parser package
-
// EnhanceDirectory processes all HTML files in a directory
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
// Create output directory
@@ -138,7 +45,7 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
return fmt.Errorf("creating output directory: %w", err)
}
- // Walk input directory
+ // Walk input directory and copy files for now
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
@@ -156,16 +63,19 @@ func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
return os.MkdirAll(outputPath, info.Mode())
}
- // Handle HTML files
- if strings.HasSuffix(strings.ToLower(path), ".html") {
- return e.EnhanceFile(path, outputPath)
- }
-
- // Copy other files as-is
+ // Copy files (HTML processing will be implemented later)
return e.copyFile(path, outputPath)
})
}
+// EnhanceInPlace performs in-place enhancement of static site files
+func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error {
+ // TODO: Implement with unified engine
+ // For now, just log that enhancement was requested
+ fmt.Printf("📄 Enhancement requested for site %s at %s (stub implementation)\n", siteID, sitePath)
+ return nil
+}
+
// copyFile copies a file from src to dst
func (e *Enhancer) copyFile(src, dst string) error {
// Create directory for destination
@@ -182,109 +92,3 @@ func (e *Enhancer) copyFile(src, dst string) error {
// Write destination
return os.WriteFile(dst, data, 0644)
}
-
-// writeHTML writes an HTML document to a file
-func (e *Enhancer) writeHTML(doc *html.Node, outputPath string) error {
- // Create directory for output
- if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
- return err
- }
-
- // Create output file
- file, err := os.Create(outputPath)
- if err != nil {
- return err
- }
- defer file.Close()
-
- // Write HTML
- return html.Render(file, doc)
-}
-
-// EnhanceInPlace performs in-place enhancement of static site files
-func (e *Enhancer) EnhanceInPlace(sitePath string, siteID string) error {
- // Update the injector with the correct siteID
- e.injector.siteID = siteID
-
- // Use existing parser logic to discover elements
- result, err := e.parser.ParseDirectory(sitePath)
- if err != nil {
- return fmt.Errorf("parsing directory: %w", err)
- }
-
- if len(result.Elements) == 0 {
- fmt.Printf("📄 No insertr elements found in %s\n", sitePath)
- return nil
- }
-
- // Group elements by file for efficient processing
- fileElements := make(map[string][]parser.Element)
- for _, elem := range result.Elements {
- fileElements[elem.FilePath] = append(fileElements[elem.FilePath], elem)
- }
-
- // Process each file in-place
- enhancedCount := 0
- for filePath, elements := range fileElements {
- if err := e.enhanceFileInPlace(filePath, elements); err != nil {
- fmt.Printf("⚠️ Failed to enhance %s: %v\n", filepath.Base(filePath), err)
- } else {
- enhancedCount++
- }
- }
-
- fmt.Printf("✅ Enhanced %d files with %d elements in site %s\n",
- enhancedCount, len(result.Elements), siteID)
-
- return nil
-}
-
-// enhanceFileInPlace modifies an HTML file in-place with database content
-func (e *Enhancer) enhanceFileInPlace(filePath string, elements []parser.Element) error {
- // Read original file
- htmlContent, err := os.ReadFile(filePath)
- if err != nil {
- return fmt.Errorf("reading file: %w", err)
- }
-
- // Parse HTML
- doc, err := html.Parse(strings.NewReader(string(htmlContent)))
- if err != nil {
- return fmt.Errorf("parsing HTML: %w", err)
- }
-
- // Convert parser elements to injector format with content IDs
- elementIDs := make([]ElementWithID, 0, len(elements))
- for _, elem := range elements {
- // Find the corresponding node in the parsed document
- node := e.findNodeInDocument(doc, elem)
- if node != nil {
- elementIDs = append(elementIDs, ElementWithID{
- Element: &Element{
- Node: node,
- Type: string(elem.Type),
- Tag: elem.Tag,
- },
- ContentID: elem.ContentID,
- })
- }
- }
-
- // Use existing bulk injection logic for efficiency
- if len(elementIDs) > 0 {
- if err := e.injector.InjectBulkContent(elementIDs); err != nil {
- return fmt.Errorf("injecting content: %w", err)
- }
- }
-
- // Write enhanced HTML back to the same file (in-place update)
- return e.writeHTML(doc, filePath)
-}
-
-// findNodeInDocument finds a specific node in the HTML document tree using parser utilities
-func (e *Enhancer) findNodeInDocument(doc *html.Node, elem parser.Element) *html.Node {
- // Use parser's sophisticated matching logic
- return parser.FindElementInDocument(doc, elem)
-}
-
-// All element matching functions are now provided by the parser package
diff --git a/internal/engine/injector.go.backup b/internal/engine/injector.go.backup
deleted file mode 100644
index 6829eef..0000000
--- a/internal/engine/injector.go.backup
+++ /dev/null
@@ -1,505 +0,0 @@
-package engine
-
-import (
- "fmt"
- "log"
- "strings"
-
- "golang.org/x/net/html"
-)
-
-// Injector handles content injection into HTML elements
-type Injector struct {
- client ContentClient
- siteID string
- mdProcessor *MarkdownProcessor
-}
-
-// NewInjector creates a new content injector
-func NewInjector(client ContentClient, siteID string) *Injector {
- return &Injector{
- client: client,
- siteID: siteID,
- mdProcessor: NewMarkdownProcessor(),
- }
-}
-
-// InjectContent replaces element content with database values and adds content IDs
-func (i *Injector) InjectContent(element *Element, contentID string) error {
- // Fetch content from database/API
- contentItem, err := i.client.GetContent(i.siteID, contentID)
- if err != nil {
- return fmt.Errorf("fetching content for %s: %w", contentID, err)
- }
-
- // If no content found, keep original content but add data attributes
- if contentItem == nil {
- i.AddContentAttributes(element.Node, contentID, element.Type)
- return nil
- }
-
- // Replace element content based on type
- switch element.Type {
- case "text":
- i.injectTextContent(element.Node, contentItem.Value)
- case "markdown":
- i.injectMarkdownContent(element.Node, contentItem.Value)
- case "link":
- i.injectLinkContent(element.Node, contentItem.Value)
- default:
- i.injectTextContent(element.Node, contentItem.Value)
- }
-
- // Add data attributes for editor functionality
- i.AddContentAttributes(element.Node, contentID, element.Type)
-
- return nil
-}
-
-// InjectBulkContent efficiently injects multiple content items
-func (i *Injector) InjectBulkContent(elements []ElementWithID) error {
- // Extract content IDs for bulk fetch
- contentIDs := make([]string, len(elements))
- for idx, elem := range elements {
- contentIDs[idx] = elem.ContentID
- }
-
- // Bulk fetch content
- contentMap, err := i.client.GetBulkContent(i.siteID, contentIDs)
- if err != nil {
- return fmt.Errorf("bulk fetching content: %w", err)
- }
-
- // Inject each element
- for _, elem := range elements {
- contentItem, exists := contentMap[elem.ContentID]
-
- // Add content attributes regardless
- i.AddContentAttributes(elem.Element.Node, elem.ContentID, elem.Element.Type)
-
- if !exists {
- // Keep original content if not found in database
- continue
- }
-
- // Replace content based on type
- switch elem.Element.Type {
- case "text":
- i.injectTextContent(elem.Element.Node, contentItem.Value)
- case "markdown":
- i.injectMarkdownContent(elem.Element.Node, contentItem.Value)
- case "link":
- i.injectLinkContent(elem.Element.Node, contentItem.Value)
- default:
- i.injectTextContent(elem.Element.Node, contentItem.Value)
- }
- }
-
- return nil
-}
-
-// injectTextContent replaces text content in an element
-func (i *Injector) injectTextContent(node *html.Node, content string) {
- // Remove all child nodes
- for child := node.FirstChild; child != nil; {
- next := child.NextSibling
- node.RemoveChild(child)
- child = next
- }
-
- // Add new text content
- textNode := &html.Node{
- Type: html.TextNode,
- Data: content,
- }
- node.AppendChild(textNode)
-}
-
-// injectMarkdownContent handles markdown content - converts markdown to HTML
-func (i *Injector) injectMarkdownContent(node *html.Node, content string) {
- if content == "" {
- i.injectTextContent(node, "")
- return
- }
-
- // Convert markdown to HTML using server processor
- htmlContent, err := i.mdProcessor.ToHTML(content)
- if err != nil {
- log.Printf("⚠️ Markdown conversion failed for content '%s': %v, falling back to text", content, err)
- i.injectTextContent(node, content)
- return
- }
-
- // Inject the HTML content
- i.injectHTMLContent(node, htmlContent)
-}
-
-// injectLinkContent handles link/button content with URL extraction
-func (i *Injector) injectLinkContent(node *html.Node, content string) {
- // For now, just inject the text content
- // TODO: Parse content for URL and text components
- i.injectTextContent(node, content)
-}
-
-// injectHTMLContent safely injects HTML content into a DOM node
-// Preserves the original element and only replaces its content
-func (i *Injector) injectHTMLContent(node *html.Node, htmlContent string) {
- // Clear existing content but preserve the element itself
- i.clearNode(node)
-
- if htmlContent == "" {
- return
- }
-
- // Wrap content for safe parsing
- wrappedHTML := "
" + htmlContent + "
"
-
- // Parse HTML string
- doc, err := html.Parse(strings.NewReader(wrappedHTML))
- if err != nil {
- log.Printf("Failed to parse HTML content '%s': %v, falling back to text", htmlContent, err)
- i.injectTextContent(node, htmlContent)
- return
- }
-
- // Find the wrapper div and move its children to target node
- wrapper := i.findElementByTag(doc, "div")
- if wrapper == nil {
- log.Printf("Could not find wrapper div in parsed HTML")
- return
- }
-
- // Move parsed nodes to target element (preserving original element)
- for child := wrapper.FirstChild; child != nil; {
- next := child.NextSibling
- wrapper.RemoveChild(child)
- node.AppendChild(child)
- child = next
- }
-}
-
-// clearNode removes all child nodes from a given node
-func (i *Injector) clearNode(node *html.Node) {
- for child := node.FirstChild; child != nil; {
- next := child.NextSibling
- node.RemoveChild(child)
- child = next
- }
-}
-
-// findElementByTag finds the first element with the specified tag name
-func (i *Injector) findElementByTag(node *html.Node, tag string) *html.Node {
- if node.Type == html.ElementNode && node.Data == tag {
- return node
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if found := i.findElementByTag(child, tag); found != nil {
- return found
- }
- }
-
- return nil
-}
-
-// AddContentAttributes adds necessary data attributes and insertr class for editor functionality
-func (i *Injector) AddContentAttributes(node *html.Node, contentID string, contentType string) {
- i.setAttribute(node, "data-content-id", contentID)
- i.setAttribute(node, "data-content-type", contentType)
- i.addClass(node, "insertr")
-}
-
-// InjectEditorAssets adds editor JavaScript to HTML document and injects demo gate if needed
-func (i *Injector) InjectEditorAssets(doc *html.Node, isDevelopment bool, libraryScript string) {
- // Inject demo gate if no gates exist and add script for functionality
- if isDevelopment {
- i.InjectDemoGateIfNeeded(doc)
- i.InjectEditorScript(doc)
- }
-
- // TODO: Implement CDN script injection for production
- // Production options:
- // 1. Inject CDN script tag:
-}
-
-// findHeadElement finds the element in the document
-func (i *Injector) findHeadElement(node *html.Node) *html.Node {
- if node.Type == html.ElementNode && node.Data == "head" {
- return node
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if result := i.findHeadElement(child); result != nil {
- return result
- }
- }
-
- return nil
-}
-
-// setAttribute safely sets an attribute on an HTML node
-func (i *Injector) setAttribute(node *html.Node, key, value string) {
- // Remove existing attribute if present
- for idx, attr := range node.Attr {
- if attr.Key == key {
- node.Attr = append(node.Attr[:idx], node.Attr[idx+1:]...)
- break
- }
- }
-
- // Add new attribute
- node.Attr = append(node.Attr, html.Attribute{
- Key: key,
- Val: value,
- })
-}
-
-// addClass safely adds a class to an HTML node
-func (i *Injector) addClass(node *html.Node, className string) {
- var classAttr *html.Attribute
- var classIndex int = -1
-
- // Find existing class attribute
- for idx, attr := range node.Attr {
- if attr.Key == "class" {
- classAttr = &attr
- classIndex = idx
- break
- }
- }
-
- var classes []string
- if classAttr != nil {
- classes = strings.Fields(classAttr.Val)
- }
-
- // Check if class already exists
- for _, class := range classes {
- if class == className {
- return // Class already exists
- }
- }
-
- // Add new class
- classes = append(classes, className)
- newClassValue := strings.Join(classes, " ")
-
- if classIndex >= 0 {
- // Update existing class attribute
- node.Attr[classIndex].Val = newClassValue
- } else {
- // Add new class attribute
- node.Attr = append(node.Attr, html.Attribute{
- Key: "class",
- Val: newClassValue,
- })
- }
-}
-
-// Element represents a parsed HTML element with metadata
-type Element struct {
- Node *html.Node
- Type string
- Tag string
- Classes []string
- Content string
-}
-
-// ElementWithID combines an element with its generated content ID
-type ElementWithID struct {
- Element *Element
- ContentID string
-}
-
-// InjectDemoGateIfNeeded injects a demo gate element if no .insertr-gate elements exist
-func (i *Injector) InjectDemoGateIfNeeded(doc *html.Node) {
- // Check if any .insertr-gate elements already exist
- if i.hasInsertrGate(doc) {
- return
- }
-
- // Find the body element
- bodyNode := i.findBodyElement(doc)
- if bodyNode == nil {
- log.Printf("Warning: Could not find body element to inject demo gate")
- return
- }
-
- // Create demo gate HTML structure
- gateHTML := `
-
-
`
-
- // Parse the gate HTML and inject it into the body
- gateDoc, err := html.Parse(strings.NewReader(gateHTML))
- if err != nil {
- log.Printf("Error parsing demo gate HTML: %v", err)
- return
- }
-
- // Extract and inject the gate element
- if gateDiv := i.extractElementByClass(gateDoc, "insertr-demo-gate"); gateDiv != nil {
- if gateDiv.Parent != nil {
- gateDiv.Parent.RemoveChild(gateDiv)
- }
- bodyNode.AppendChild(gateDiv)
- log.Printf("✅ Demo gate injected: Edit button added to top-right corner")
- }
-}
-
-// InjectEditorScript injects the insertr.js library and initialization script
-func (i *Injector) InjectEditorScript(doc *html.Node) {
- // Find the head element for the script tag
- headNode := i.findHeadElement(doc)
- if headNode == nil {
- log.Printf("Warning: Could not find head element to inject editor script")
- return
- }
-
- // Create script element that loads insertr.js from our server
- scriptHTML := fmt.Sprintf(`
-`, i.siteID, i.siteID)
-
- // Parse and inject the script
- scriptDoc, err := html.Parse(strings.NewReader(scriptHTML))
- if err != nil {
- log.Printf("Error parsing editor script HTML: %v", err)
- return
- }
-
- // Extract and inject all script elements
- if err := i.injectAllScriptElements(scriptDoc, headNode); err != nil {
- log.Printf("Error injecting script elements: %v", err)
- return
- }
-
- log.Printf("✅ Insertr.js library and initialization script injected")
-}
-
-// injectAllScriptElements finds and injects all script elements from parsed HTML
-func (i *Injector) injectAllScriptElements(doc *html.Node, targetNode *html.Node) error {
- scripts := i.findAllScriptElements(doc)
-
- for _, script := range scripts {
- // Remove from original parent
- if script.Parent != nil {
- script.Parent.RemoveChild(script)
- }
- // Add to target node
- targetNode.AppendChild(script)
- }
-
- return nil
-}
-
-// findAllScriptElements recursively finds all script elements
-func (i *Injector) findAllScriptElements(node *html.Node) []*html.Node {
- var scripts []*html.Node
-
- if node.Type == html.ElementNode && node.Data == "script" {
- scripts = append(scripts, node)
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- childScripts := i.findAllScriptElements(child)
- scripts = append(scripts, childScripts...)
- }
-
- return scripts
-}
-
-// hasInsertrGate checks if document has .insertr-gate elements
-func (i *Injector) hasInsertrGate(node *html.Node) bool {
- if node.Type == html.ElementNode {
- for _, attr := range node.Attr {
- if attr.Key == "class" && strings.Contains(attr.Val, "insertr-gate") {
- return true
- }
- }
- }
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if i.hasInsertrGate(child) {
- return true
- }
- }
- return false
-}
-
-// findBodyElement finds the element
-func (i *Injector) findBodyElement(node *html.Node) *html.Node {
- if node.Type == html.ElementNode && node.Data == "body" {
- return node
- }
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if result := i.findBodyElement(child); result != nil {
- return result
- }
- }
- return nil
-}
-
-// extractElementByClass finds element with specific class
-func (i *Injector) extractElementByClass(node *html.Node, className string) *html.Node {
- if node.Type == html.ElementNode {
- for _, attr := range node.Attr {
- if attr.Key == "class" && strings.Contains(attr.Val, className) {
- return node
- }
- }
- }
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if result := i.extractElementByClass(child, className); result != nil {
- return result
- }
- }
- return nil
-}
-
-// extractElementByTag finds element with specific tag
-func (i *Injector) extractElementByTag(node *html.Node, tagName string) *html.Node {
- if node.Type == html.ElementNode && node.Data == tagName {
- return node
- }
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if result := i.extractElementByTag(child, tagName); result != nil {
- return result
- }
- }
- return nil
-}
diff --git a/internal/engine/utils.go b/internal/engine/utils.go
index 8e2408c..e52d877 100644
--- a/internal/engine/utils.go
+++ b/internal/engine/utils.go
@@ -267,7 +267,37 @@ func isSelfClosing(node *html.Node) bool {
return selfClosingTags[node.Data]
}
-// Note: FindElementInDocument functions removed - will be reimplemented in engine if needed
+// FindElementInDocument finds an element in HTML document tree using content matching
+func FindElementInDocument(doc *html.Node, tag, content string) *html.Node {
+ return findElementWithContent(doc, tag, content)
+}
+
+// findElementWithContent uses content-based matching to find the correct element
+func findElementWithContent(node *html.Node, targetTag, targetContent string) *html.Node {
+ normalizedTarget := strings.TrimSpace(targetContent)
+
+ if node.Type == html.ElementNode && node.Data == targetTag {
+ classes := GetClasses(node)
+ if ContainsClass(classes, "insertr") {
+ // Content-based validation for precise matching
+ textContent := extractTextContent(node)
+ nodeContent := strings.TrimSpace(textContent)
+
+ if nodeContent == normalizedTarget {
+ return node
+ }
+ }
+ }
+
+ // Recursively search children
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ if result := findElementWithContent(child, targetTag, normalizedTarget); result != nil {
+ return result
+ }
+ }
+
+ return nil
+}
// GetAttribute gets an attribute value from an HTML node (exported version)
func GetAttribute(node *html.Node, key string) string {
diff --git a/internal/parser/id_generator.go b/internal/parser/id_generator.go
deleted file mode 100644
index 09964d1..0000000
--- a/internal/parser/id_generator.go
+++ /dev/null
@@ -1,133 +0,0 @@
-package parser
-
-import (
- "crypto/sha256"
- "encoding/hex"
- "fmt"
- "path/filepath"
- "strings"
-
- "golang.org/x/net/html"
-)
-
-// IDGenerator generates unique content IDs for elements using lightweight hierarchical approach
-type IDGenerator struct {
- usedIDs map[string]bool
- elementCounts map[string]int // Track counts per file+type for indexing
-}
-
-// NewIDGenerator creates a new ID generator
-func NewIDGenerator() *IDGenerator {
- return &IDGenerator{
- usedIDs: make(map[string]bool),
- elementCounts: make(map[string]int),
- }
-}
-
-// Generate creates a content ID for an HTML element using lightweight hierarchical approach
-func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
- // 1. File context (minimal)
- fileName := g.getFileName(filePath)
-
- // 2. Element identity (lightweight)
- tag := strings.ToLower(node.Data)
- primaryClass := g.getPrimaryClass(node)
-
- // 3. Position context (simple)
- elementKey := g.getElementKey(fileName, tag, primaryClass)
- index := g.getElementIndex(elementKey)
-
- // 4. Build readable prefix
- prefix := g.buildPrefix(fileName, tag, primaryClass, index)
-
- // 5. Add collision-resistant suffix
- signature := g.createSignature(node, filePath)
- hash := sha256.Sum256([]byte(signature))
- suffix := hex.EncodeToString(hash[:3])
-
- finalID := fmt.Sprintf("%s-%s", prefix, suffix)
-
- // Ensure uniqueness (should be guaranteed by hash, but safety check)
- g.usedIDs[finalID] = true
-
- return finalID
-}
-
-// getFileName extracts filename without extension for ID prefix
-func (g *IDGenerator) getFileName(filePath string) string {
- base := filepath.Base(filePath)
- return strings.TrimSuffix(base, filepath.Ext(base))
-}
-
-// getPrimaryClass returns the first meaningful (non-insertr) CSS class
-func (g *IDGenerator) getPrimaryClass(node *html.Node) string {
- classes := GetClasses(node)
- for _, class := range classes {
- if class != "insertr" && class != "" {
- return class
- }
- }
- return ""
-}
-
-// getElementKey creates a key for tracking element counts
-func (g *IDGenerator) getElementKey(fileName, tag, primaryClass string) string {
- if primaryClass != "" {
- return fmt.Sprintf("%s-%s", fileName, primaryClass)
- }
- return fmt.Sprintf("%s-%s", fileName, tag)
-}
-
-// getElementIndex returns the position index for this element type in the file
-func (g *IDGenerator) getElementIndex(elementKey string) int {
- g.elementCounts[elementKey]++
- return g.elementCounts[elementKey]
-}
-
-// buildPrefix creates human-readable prefix for the ID
-func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int) string {
- var parts []string
- parts = append(parts, fileName)
-
- if primaryClass != "" {
- parts = append(parts, primaryClass)
- } else {
- parts = append(parts, tag)
- }
-
- // Only add index if it's not the first element of this type
- if index > 1 {
- parts = append(parts, fmt.Sprintf("%d", index))
- }
-
- return strings.Join(parts, "-")
-}
-
-// createSignature creates a unique signature for collision resistance
-func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
- // Minimal signature for uniqueness
- tag := node.Data
- classes := strings.Join(GetClasses(node), " ")
- domPath := g.getSimpleDOMPath(node)
-
- return fmt.Sprintf("%s|%s|%s|%s", filePath, domPath, tag, classes)
-}
-
-// getSimpleDOMPath creates a simple DOM path for uniqueness
-func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
- var pathParts []string
- current := node
- depth := 0
-
- for current != nil && current.Type == html.ElementNode && depth < 5 {
- part := current.Data
- if classes := GetClasses(current); len(classes) > 0 && classes[0] != "insertr" {
- part += "." + classes[0]
- }
- pathParts = append([]string{part}, pathParts...)
- current = current.Parent
- depth++
- }
-
- return strings.Join(pathParts, ">")
-}
diff --git a/internal/parser/parser.go b/internal/parser/parser.go
deleted file mode 100644
index eb706a1..0000000
--- a/internal/parser/parser.go
+++ /dev/null
@@ -1,230 +0,0 @@
-package parser
-
-import (
- "fmt"
- "io/fs"
- "os"
- "path/filepath"
- "strings"
-
- "golang.org/x/net/html"
-)
-
-// Parser handles HTML parsing and element detection
-type Parser struct {
- idGenerator *IDGenerator
-}
-
-// New creates a new Parser instance
-func New() *Parser {
- return &Parser{
- idGenerator: NewIDGenerator(),
- }
-}
-
-// ParseDirectory parses all HTML files in the given directory
-func (p *Parser) ParseDirectory(dir string) (*ParseResult, error) {
- result := &ParseResult{
- Elements: []Element{},
- Warnings: []string{},
- Stats: ParseStats{
- TypeBreakdown: make(map[ContentType]int),
- },
- }
-
- err := filepath.WalkDir(dir, func(path string, d fs.DirEntry, err error) error {
- if err != nil {
- return err
- }
-
- // Only process HTML files
- if d.IsDir() || !strings.HasSuffix(strings.ToLower(path), ".html") {
- return nil
- }
-
- elements, warnings, err := p.parseFile(path)
- if err != nil {
- result.Warnings = append(result.Warnings,
- fmt.Sprintf("Error parsing %s: %v", path, err))
- return nil // Continue processing other files
- }
-
- result.Elements = append(result.Elements, elements...)
- result.Warnings = append(result.Warnings, warnings...)
- result.Stats.FilesProcessed++
-
- return nil
- })
-
- if err != nil {
- return nil, fmt.Errorf("error walking directory: %w", err)
- }
-
- // Calculate statistics
- p.calculateStats(result)
-
- return result, nil
-}
-
-// parseFile parses a single HTML file
-func (p *Parser) parseFile(filePath string) ([]Element, []string, error) {
- file, err := os.Open(filePath)
- if err != nil {
- return nil, nil, fmt.Errorf("error opening file: %w", err)
- }
- defer file.Close()
-
- doc, err := html.Parse(file)
- if err != nil {
- return nil, nil, fmt.Errorf("error parsing HTML: %w", err)
- }
-
- var elements []Element
- var warnings []string
-
- p.findInsertrElements(doc, filePath, &elements, &warnings)
-
- return elements, warnings, nil
-}
-
-// findInsertrElements recursively finds all elements with "insertr" class
-func (p *Parser) findInsertrElements(node *html.Node, filePath string, elements *[]Element, warnings *[]string) {
- if node.Type == html.ElementNode {
- classes := GetClasses(node)
-
- // Check if element has "insertr" class
- if ContainsClass(classes, "insertr") {
- if isContainer(node) {
- // Container element - expand to viable children
- viableChildren := findViableChildren(node)
- for _, child := range viableChildren {
- childClasses := GetClasses(child)
- element, warning := p.createElement(child, filePath, childClasses)
- *elements = append(*elements, element)
- if warning != "" {
- *warnings = append(*warnings, warning)
- }
- }
-
- // Don't process children recursively since we've handled the container's children
- return
- } else {
- // Regular element - process as before
- element, warning := p.createElement(node, filePath, classes)
- *elements = append(*elements, element)
- if warning != "" {
- *warnings = append(*warnings, warning)
- }
- }
- }
- }
-
- // Recursively check children
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- p.findInsertrElements(child, filePath, elements, warnings)
- }
-}
-
-// createElement creates an Element from an HTML node
-func (p *Parser) createElement(node *html.Node, filePath string, classes []string) (Element, string) {
- var warning string
-
- // Resolve content ID (existing or generated)
- contentID, hasExistingID := p.resolveContentID(node)
- if !hasExistingID {
- contentID = p.idGenerator.Generate(node, filePath)
- }
-
- // Detect content type
- contentType := p.detectContentType(node, classes)
-
- // Extract text content
- content := extractTextContent(node)
-
- element := Element{
- FilePath: filePath,
- Node: node,
- ContentID: contentID,
- Type: contentType,
- Tag: strings.ToLower(node.Data),
- Classes: classes,
- Content: content,
- HasID: hasExistingID,
- Generated: !hasExistingID,
- }
-
- // Generate warnings for edge cases
- if content == "" {
- warning = fmt.Sprintf("Element <%s> with id '%s' has no text content",
- element.Tag, element.ContentID)
- }
-
- return element, warning
-}
-
-// resolveContentID gets the content ID from existing attributes
-func (p *Parser) resolveContentID(node *html.Node) (string, bool) {
- // 1. Check for existing HTML id attribute
- if id := getAttribute(node, "id"); id != "" {
- return id, true
- }
-
- // 2. Check for data-content-id attribute
- if contentID := getAttribute(node, "data-content-id"); contentID != "" {
- return contentID, true
- }
-
- // 3. No existing ID found
- return "", false
-}
-
-// detectContentType determines the content type based on element and classes
-func (p *Parser) detectContentType(node *html.Node, classes []string) ContentType {
- // Check for explicit type classes first
- if ContainsClass(classes, "insertr-markdown") {
- return ContentMarkdown
- }
- if ContainsClass(classes, "insertr-link") {
- return ContentLink
- }
- if ContainsClass(classes, "insertr-text") {
- return ContentText
- }
-
- // Infer from HTML tag and context
- tag := strings.ToLower(node.Data)
- switch tag {
- case "h1", "h2", "h3", "h4", "h5", "h6":
- return ContentText
- case "p":
- // Paragraphs default to markdown for rich content
- return ContentMarkdown
- case "a", "button":
- return ContentLink
- case "div", "section":
- // Default divs/sections to markdown for rich content
- return ContentMarkdown
- case "span":
- // Default spans to markdown for rich inline content
- return ContentMarkdown
- default:
- return ContentText
- }
-}
-
-// calculateStats computes statistics for the parse result
-func (p *Parser) calculateStats(result *ParseResult) {
- result.Stats.TotalElements = len(result.Elements)
-
- for _, element := range result.Elements {
- // Count existing vs generated IDs
- if element.HasID {
- result.Stats.ExistingIDs++
- } else {
- result.Stats.GeneratedIDs++
- }
-
- // Count content types
- result.Stats.TypeBreakdown[element.Type]++
- }
-}
diff --git a/internal/parser/types.go b/internal/parser/types.go
deleted file mode 100644
index ad1d22e..0000000
--- a/internal/parser/types.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package parser
-
-import "golang.org/x/net/html"
-
-// ContentType represents the type of editable content
-type ContentType string
-
-const (
- ContentText ContentType = "text"
- ContentMarkdown ContentType = "markdown"
- ContentLink ContentType = "link"
-)
-
-// Element represents a parsed editable element
-type Element struct {
- FilePath string `json:"file_path"`
- Node *html.Node `json:"-"` // Don't serialize HTML node
- ContentID string `json:"content_id"`
- Type ContentType `json:"type"`
- Tag string `json:"tag"`
- Classes []string `json:"classes"`
- Content string `json:"content"`
- HasID bool `json:"has_id"` // Whether element had existing ID
- Generated bool `json:"generated"` // Whether ID was generated
-}
-
-// ParseResult contains the results of parsing HTML files
-type ParseResult struct {
- Elements []Element `json:"elements"`
- Warnings []string `json:"warnings"`
- Stats ParseStats `json:"stats"`
-}
-
-// ParseStats provides statistics about the parsing operation
-type ParseStats struct {
- FilesProcessed int `json:"files_processed"`
- TotalElements int `json:"total_elements"`
- ExistingIDs int `json:"existing_ids"`
- GeneratedIDs int `json:"generated_ids"`
- TypeBreakdown map[ContentType]int `json:"type_breakdown"`
-}
diff --git a/internal/parser/utils.go b/internal/parser/utils.go
deleted file mode 100644
index d9f447b..0000000
--- a/internal/parser/utils.go
+++ /dev/null
@@ -1,314 +0,0 @@
-package parser
-
-import (
- "strings"
-
- "golang.org/x/net/html"
-)
-
-// GetClasses extracts CSS classes from an HTML node
-func GetClasses(node *html.Node) []string {
- classAttr := getAttribute(node, "class")
- if classAttr == "" {
- return []string{}
- }
-
- classes := strings.Fields(classAttr)
- return classes
-}
-
-// ContainsClass checks if a class list contains a specific class
-func ContainsClass(classes []string, target string) bool {
- for _, class := range classes {
- if class == target {
- return true
- }
- }
- return false
-}
-
-// getAttribute gets an attribute value from an HTML node
-func getAttribute(node *html.Node, key string) string {
- for _, attr := range node.Attr {
- if attr.Key == key {
- return attr.Val
- }
- }
- return ""
-}
-
-// extractTextContent gets the text content from an HTML node
-func extractTextContent(node *html.Node) string {
- var text strings.Builder
- extractTextRecursive(node, &text)
- return strings.TrimSpace(text.String())
-}
-
-// extractTextRecursive recursively extracts text from node and children
-func extractTextRecursive(node *html.Node, text *strings.Builder) {
- if node.Type == html.TextNode {
- text.WriteString(node.Data)
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- // Skip script and style elements
- if child.Type == html.ElementNode &&
- (child.Data == "script" || child.Data == "style") {
- continue
- }
- extractTextRecursive(child, text)
- }
-}
-
-// hasOnlyTextContent checks if a node contains only text content (no nested HTML elements)
-// DEPRECATED: Use hasEditableContent for more sophisticated detection
-func hasOnlyTextContent(node *html.Node) bool {
- if node.Type != html.ElementNode {
- return false
- }
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- switch child.Type {
- case html.ElementNode:
- // Found a nested HTML element - not text-only
- return false
- case html.TextNode:
- // Text nodes are fine, continue checking
- continue
- default:
- // Comments, etc. - continue checking
- continue
- }
- }
- return true
-}
-
-// Inline formatting elements that are safe for editing
-var inlineFormattingTags = map[string]bool{
- "strong": true,
- "b": true,
- "em": true,
- "i": true,
- "span": true,
- "code": true,
- "small": true,
- "sub": true,
- "sup": true,
- "a": true, // Links within content are fine
-}
-
-// Elements that should NOT be nested within editable content
-var blockingElements = map[string]bool{
- "button": true, // Buttons shouldn't be nested in paragraphs
- "input": true,
- "select": true,
- "textarea": true,
- "img": true,
- "video": true,
- "audio": true,
- "canvas": true,
- "svg": true,
- "iframe": true,
- "object": true,
- "embed": true,
- "div": true, // Nested divs usually indicate complex structure
- "section": true, // Block-level semantic elements
- "article": true,
- "header": true,
- "footer": true,
- "nav": true,
- "aside": true,
- "main": true,
- "form": true,
- "table": true,
- "ul": true,
- "ol": true,
- "dl": true,
-}
-
-// hasEditableContent checks if a node contains content that can be safely edited
-// This includes text and safe inline formatting elements
-func hasEditableContent(node *html.Node) bool {
- if node.Type != html.ElementNode {
- return false
- }
-
- return hasOnlyTextAndSafeFormatting(node)
-}
-
-// hasOnlyTextAndSafeFormatting recursively checks if content is safe for editing
-func hasOnlyTextAndSafeFormatting(node *html.Node) bool {
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- switch child.Type {
- case html.TextNode:
- continue // Text is always safe
- case html.ElementNode:
- // Check if it's a blocking element
- if blockingElements[child.Data] {
- return false
- }
- // Allow safe inline formatting
- if inlineFormattingTags[child.Data] {
- // Recursively validate the formatting element
- if !hasOnlyTextAndSafeFormatting(child) {
- return false
- }
- continue
- }
- // Unknown/unsafe element
- return false
- default:
- continue // Comments, whitespace, etc.
- }
- }
- return true
-}
-
-// isContainer checks if a tag is typically used as a container element
-func isContainer(node *html.Node) bool {
- if node.Type != html.ElementNode {
- return false
- }
-
- containerTags := map[string]bool{
- "div": true,
- "section": true,
- "article": true,
- "header": true,
- "footer": true,
- "main": true,
- "aside": true,
- "nav": true,
- }
-
- return containerTags[node.Data]
-}
-
-// findViableChildren finds all child elements that are viable for editing
-func findViableChildren(node *html.Node) []*html.Node {
- var viable []*html.Node
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- // Skip whitespace-only text nodes
- if child.Type == html.TextNode {
- if strings.TrimSpace(child.Data) == "" {
- continue
- }
- }
-
- // Only consider element nodes
- if child.Type != html.ElementNode {
- continue
- }
-
- // Skip self-closing elements for now
- if isSelfClosing(child) {
- continue
- }
-
- // Check if element has editable content (improved logic)
- if hasEditableContent(child) {
- viable = append(viable, child)
- }
- }
-
- return viable
-}
-
-// findViableChildrenLegacy uses the old text-only logic for backwards compatibility
-func findViableChildrenLegacy(node *html.Node) []*html.Node {
- var viable []*html.Node
-
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if child.Type == html.TextNode {
- if strings.TrimSpace(child.Data) == "" {
- continue
- }
- }
-
- if child.Type != html.ElementNode {
- continue
- }
-
- if isSelfClosing(child) {
- continue
- }
-
- if hasOnlyTextContent(child) {
- viable = append(viable, child)
- }
- }
-
- return viable
-}
-
-// isSelfClosing checks if an element is typically self-closing
-func isSelfClosing(node *html.Node) bool {
- if node.Type != html.ElementNode {
- return false
- }
-
- selfClosingTags := map[string]bool{
- "img": true,
- "input": true,
- "br": true,
- "hr": true,
- "meta": true,
- "link": true,
- "area": true,
- "base": true,
- "col": true,
- "embed": true,
- "source": true,
- "track": true,
- "wbr": true,
- }
-
- return selfClosingTags[node.Data]
-}
-
-// FindElementInDocument finds a parser element in HTML document tree using semantic matching
-func FindElementInDocument(doc *html.Node, element Element) *html.Node {
- return findElementWithContext(doc, element)
-}
-
-// findElementWithContext uses the parser's semantic understanding to find the correct element
-func findElementWithContext(node *html.Node, target Element) *html.Node {
- if node.Type == html.ElementNode && node.Data == target.Tag {
- classes := GetClasses(node)
- if ContainsClass(classes, "insertr") {
- // Content-based validation for precise matching
- textContent := extractTextContent(node)
- nodeContent := strings.TrimSpace(textContent)
- targetContent := strings.TrimSpace(target.Content)
-
- if nodeContent == targetContent {
- return node
- }
- }
- }
-
- // Recursively search children
- for child := node.FirstChild; child != nil; child = child.NextSibling {
- if result := findElementWithContext(child, target); result != nil {
- return result
- }
- }
-
- return nil
-}
-
-// GetAttribute gets an attribute value from an HTML node (exported version)
-func GetAttribute(node *html.Node, key string) string {
- return getAttribute(node, key)
-}
-
-// HasEditableContent checks if a node has editable content (exported version)
-func HasEditableContent(node *html.Node) bool {
- return hasEditableContent(node)
-}
-
-// FindViableChildren finds viable children for editing (exported version)
-func FindViableChildren(node *html.Node) []*html.Node {
- return findViableChildren(node)
-}
diff --git a/test_unified_engine.sh b/test_unified_engine.sh
new file mode 100755
index 0000000..eeef51d
--- /dev/null
+++ b/test_unified_engine.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+# Test script for unified content engine architecture
+echo "🔧 Testing Unified Content Engine Architecture"
+echo
+
+# Test data
+HTML_MARKUP='Welcome to Our Site
'
+SITE_ID="demo"
+FILE_PATH="index.html"
+CONTENT_VALUE="Welcome to Our Amazing Website"
+CONTENT_TYPE="text"
+
+echo "📝 Test Data:"
+echo " HTML Markup: $HTML_MARKUP"
+echo " Site ID: $SITE_ID"
+echo " File Path: $FILE_PATH"
+echo " Content: $CONTENT_VALUE"
+echo
+
+# Create JSON payload
+JSON_PAYLOAD=$(cat </dev/null)
+
+if [ $? -eq 0 ] && [ -n "$RESPONSE" ]; then
+ echo "✅ API Response:"
+ echo "$RESPONSE" | jq '.' 2>/dev/null || echo "$RESPONSE"
+ echo
+
+ # Extract ID from response if possible
+ CONTENT_ID=$(echo "$RESPONSE" | jq -r '.id' 2>/dev/null)
+ if [ "$CONTENT_ID" != "null" ] && [ -n "$CONTENT_ID" ]; then
+ echo "🎯 Generated Content ID: $CONTENT_ID"
+ echo
+
+ # Test retrieval
+ echo "🔍 Testing content retrieval..."
+ GET_RESPONSE=$(curl -s "http://localhost:8080/api/content/$CONTENT_ID?site_id=$SITE_ID" 2>/dev/null)
+ echo "GET Response:"
+ echo "$GET_RESPONSE" | jq '.' 2>/dev/null || echo "$GET_RESPONSE"
+ fi
+else
+ echo "❌ API Request Failed or Server Not Running"
+ echo "Response: $RESPONSE"
+ echo
+ echo "💡 Start the server with: just dev"
+fi
+
+echo
+echo "🏁 Test Complete"
\ No newline at end of file