Refactor architecture: eliminate auto-discovery and consolidate packages
- Remove auto-discovery entirely (~450 lines) * Delete internal/content/discoverer.go * Simplify enhancer to single-phase processing * Remove duplicate container expansion logic - Consolidate repository implementations * Move internal/content/client.go → internal/db/http_client.go * Group all repository implementations in db/ package - Add file utilities to engine following Go stdlib patterns * Add engine.ProcessFile() and ProcessDirectory() methods * Engine now handles both content processing AND file operations - Move site management to dedicated package * Move internal/content/site_manager.go → internal/sites/manager.go * Clear separation of site lifecycle from content processing - Preserve container expansion (syntactic sugar) * .insertr on containers still auto-applies to viable children * Container detection logic consolidated in engine/utils.go Result: Clean architecture with single source of truth for .insertr processing
This commit is contained in:
@@ -1,216 +0,0 @@
|
||||
package content
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/insertr/insertr/internal/db"
|
||||
)
|
||||
|
||||
// HTTPClient implements db.ContentRepository for HTTP API access
|
||||
type HTTPClient struct {
|
||||
BaseURL string
|
||||
APIKey string
|
||||
HTTPClient *http.Client
|
||||
}
|
||||
|
||||
// NewHTTPClient creates a new HTTP content client
|
||||
func NewHTTPClient(baseURL, apiKey string) *HTTPClient {
|
||||
return &HTTPClient{
|
||||
BaseURL: strings.TrimSuffix(baseURL, "/"),
|
||||
APIKey: apiKey,
|
||||
HTTPClient: &http.Client{
|
||||
Timeout: 30 * time.Second,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// GetContent fetches a single content item by ID
|
||||
func (c *HTTPClient) GetContent(ctx context.Context, siteID, contentID string) (*db.ContentItem, error) {
|
||||
url := fmt.Sprintf("%s/api/content/%s?site_id=%s", c.BaseURL, contentID, siteID)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
|
||||
if c.APIKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+c.APIKey)
|
||||
}
|
||||
|
||||
resp, err := c.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode == 404 {
|
||||
return nil, nil // Content not found, return nil without error
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("API error: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading response: %w", err)
|
||||
}
|
||||
|
||||
var item db.ContentItem
|
||||
if err := json.Unmarshal(body, &item); err != nil {
|
||||
return nil, fmt.Errorf("parsing response: %w", err)
|
||||
}
|
||||
|
||||
return &item, nil
|
||||
}
|
||||
|
||||
// GetBulkContent fetches multiple content items by IDs
|
||||
func (c *HTTPClient) GetBulkContent(ctx context.Context, siteID string, contentIDs []string) (map[string]db.ContentItem, error) {
|
||||
if len(contentIDs) == 0 {
|
||||
return make(map[string]db.ContentItem), nil
|
||||
}
|
||||
|
||||
// Build query parameters
|
||||
params := url.Values{}
|
||||
params.Set("site_id", siteID)
|
||||
for _, id := range contentIDs {
|
||||
params.Add("ids", id)
|
||||
}
|
||||
|
||||
url := fmt.Sprintf("%s/api/content/bulk?%s", c.BaseURL, params.Encode())
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
|
||||
if c.APIKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+c.APIKey)
|
||||
}
|
||||
|
||||
resp, err := c.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("API error: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading response: %w", err)
|
||||
}
|
||||
|
||||
var response db.ContentResponse
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
return nil, fmt.Errorf("parsing response: %w", err)
|
||||
}
|
||||
|
||||
// Convert slice to map for easy lookup
|
||||
result := make(map[string]db.ContentItem)
|
||||
for _, item := range response.Content {
|
||||
result[item.ID] = item
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// GetAllContent fetches all content for a site
|
||||
func (c *HTTPClient) GetAllContent(ctx context.Context, siteID string) (map[string]db.ContentItem, error) {
|
||||
url := fmt.Sprintf("%s/api/content?site_id=%s", c.BaseURL, siteID)
|
||||
|
||||
req, err := http.NewRequest("GET", url, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating request: %w", err)
|
||||
}
|
||||
|
||||
if c.APIKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+c.APIKey)
|
||||
}
|
||||
|
||||
resp, err := c.HTTPClient.Do(req)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("making request: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("API error: %s", resp.Status)
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading response: %w", err)
|
||||
}
|
||||
|
||||
var response db.ContentResponse
|
||||
if err := json.Unmarshal(body, &response); err != nil {
|
||||
return nil, fmt.Errorf("parsing response: %w", err)
|
||||
}
|
||||
|
||||
// Convert slice to map for easy lookup
|
||||
result := make(map[string]db.ContentItem)
|
||||
for _, item := range response.Content {
|
||||
result[item.ID] = item
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// CreateContent creates a new content item via HTTP API
|
||||
func (c *HTTPClient) CreateContent(ctx context.Context, siteID, contentID, htmlContent, originalTemplate, lastEditedBy string) (*db.ContentItem, error) {
|
||||
// For now, HTTPClient CreateContent is not implemented for enhancer use
|
||||
// This would typically be used in API-driven enhancement scenarios
|
||||
return nil, fmt.Errorf("CreateContent not implemented for HTTPClient - use DatabaseClient for enhancement")
|
||||
}
|
||||
|
||||
// Collection method stubs - TODO: Implement these for HTTP API
|
||||
func (c *HTTPClient) GetCollection(ctx context.Context, siteID, collectionID string) (*db.CollectionItem, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) CreateCollection(ctx context.Context, siteID, collectionID, containerHTML, lastEditedBy string) (*db.CollectionItem, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) GetCollectionItems(ctx context.Context, siteID, collectionID string) ([]db.CollectionItemWithTemplate, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) CreateCollectionTemplate(ctx context.Context, siteID, collectionID, name, htmlTemplate string, isDefault bool) (*db.CollectionTemplateItem, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) GetCollectionTemplates(ctx context.Context, siteID, collectionID string) ([]db.CollectionTemplateItem, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) CreateCollectionItem(ctx context.Context, siteID, collectionID, itemID string, templateID int, htmlContent string, position int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) CreateCollectionItemAtomic(ctx context.Context, siteID, collectionID string, templateID int, lastEditedBy string) (*db.CollectionItemWithTemplate, error) {
|
||||
return nil, fmt.Errorf("collection operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) UpdateContent(ctx context.Context, siteID, contentID, htmlContent, lastEditedBy string) (*db.ContentItem, error) {
|
||||
return nil, fmt.Errorf("content update operations not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
func (c *HTTPClient) ReorderCollectionItems(ctx context.Context, siteID, collectionID string, items []db.CollectionItemPosition, lastEditedBy string) error {
|
||||
return fmt.Errorf("collection reordering not implemented in HTTPClient")
|
||||
}
|
||||
|
||||
// WithTransaction executes a function within a transaction (not supported for HTTP client)
|
||||
func (c *HTTPClient) WithTransaction(ctx context.Context, fn func(db.ContentRepository) error) error {
|
||||
return fmt.Errorf("transactions not supported for HTTP client")
|
||||
}
|
||||
@@ -1,462 +0,0 @@
|
||||
package content
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/insertr/insertr/internal/engine"
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
// Discoverer handles automatic discovery of editable elements in HTML
|
||||
type Discoverer struct {
|
||||
// Element discovery is now self-contained and configurable
|
||||
}
|
||||
|
||||
// NewDiscoverer creates a new Discoverer instance
|
||||
func NewDiscoverer() *Discoverer {
|
||||
return &Discoverer{}
|
||||
}
|
||||
|
||||
// DiscoveryResult contains statistics about element discovery
|
||||
type DiscoveryResult struct {
|
||||
FilesProcessed int
|
||||
ElementsEnhanced int
|
||||
ContainersAdded int
|
||||
IndividualsAdded int
|
||||
SkippedFiles []string
|
||||
EnhancedFiles []string
|
||||
}
|
||||
|
||||
// DiscoverDirectory discovers editable elements in all HTML files in a directory
|
||||
func (disc *Discoverer) DiscoverDirectory(inputDir, outputDir string, aggressive bool) (*DiscoveryResult, error) {
|
||||
result := &DiscoveryResult{
|
||||
SkippedFiles: []string{},
|
||||
EnhancedFiles: []string{},
|
||||
}
|
||||
|
||||
// Create output directory if it doesn't exist
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
return nil, fmt.Errorf("failed to create output directory: %w", err)
|
||||
}
|
||||
|
||||
err := filepath.WalkDir(inputDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Only process HTML files
|
||||
if !strings.HasSuffix(strings.ToLower(path), ".html") {
|
||||
// Copy non-HTML files as-is
|
||||
return disc.copyFile(path, inputDir, outputDir)
|
||||
}
|
||||
|
||||
// Discover elements in HTML file
|
||||
enhanced, err := disc.discoverFile(path, aggressive)
|
||||
if err != nil {
|
||||
result.SkippedFiles = append(result.SkippedFiles, path)
|
||||
// Copy original file on error
|
||||
return disc.copyFile(path, inputDir, outputDir)
|
||||
}
|
||||
|
||||
// Write enhanced file
|
||||
outputPath := disc.getOutputPath(path, inputDir, outputDir)
|
||||
if err := disc.writeEnhancedFile(outputPath, enhanced); err != nil {
|
||||
return fmt.Errorf("failed to write enhanced file %s: %w", outputPath, err)
|
||||
}
|
||||
|
||||
result.FilesProcessed++
|
||||
result.ElementsEnhanced += enhanced.ElementsEnhanced
|
||||
result.ContainersAdded += enhanced.ContainersAdded
|
||||
result.IndividualsAdded += enhanced.IndividualsAdded
|
||||
result.EnhancedFiles = append(result.EnhancedFiles, outputPath)
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return result, err
|
||||
}
|
||||
|
||||
// FileDiscoveryResult contains details about a single file discovery
|
||||
type FileDiscoveryResult struct {
|
||||
ElementsEnhanced int
|
||||
ContainersAdded int
|
||||
IndividualsAdded int
|
||||
SugarTransformed int // Count of syntactic sugar transformations
|
||||
Document *html.Node
|
||||
}
|
||||
|
||||
// discoverFile processes a single HTML file and adds insertr classes
|
||||
func (disc *Discoverer) discoverFile(filePath string, aggressive bool) (*FileDiscoveryResult, error) {
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening file: %w", err)
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
doc, err := html.Parse(file)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing HTML: %w", err)
|
||||
}
|
||||
|
||||
result := &FileDiscoveryResult{Document: doc}
|
||||
|
||||
// Find candidates for enhancement
|
||||
disc.discoverNode(doc, result, aggressive)
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// discoverNode recursively discovers editable nodes in the document
|
||||
func (disc *Discoverer) discoverNode(node *html.Node, result *FileDiscoveryResult, aggressive bool) {
|
||||
if node.Type != html.ElementNode {
|
||||
// Recursively check children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
disc.discoverNode(child, result, aggressive)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Handle syntactic sugar: containers with .insertr class
|
||||
if disc.hasInsertrClass(node) && disc.isGoodContainer(node) {
|
||||
// Syntactic sugar transformation: remove .insertr from container, add to viable children
|
||||
viableChildren := engine.FindViableChildren(node)
|
||||
if len(viableChildren) >= 1 {
|
||||
disc.removeInsertrClass(node) // Remove from container
|
||||
for _, child := range viableChildren {
|
||||
if !disc.hasInsertrClass(child) {
|
||||
disc.addInsertrClass(child)
|
||||
result.IndividualsAdded++
|
||||
result.ElementsEnhanced++
|
||||
}
|
||||
}
|
||||
result.ContainersAdded++
|
||||
result.SugarTransformed++ // Track sugar transformations
|
||||
|
||||
// Don't process children since we just processed them
|
||||
return
|
||||
}
|
||||
// If no viable children, leave .insertr on the element (individual editing)
|
||||
return
|
||||
}
|
||||
|
||||
// Skip if already has insertr class (individual editing elements)
|
||||
if disc.hasInsertrClass(node) {
|
||||
return
|
||||
}
|
||||
|
||||
// Check if this is a container that should use expansion
|
||||
if disc.isGoodContainer(node) {
|
||||
viableChildren := engine.FindViableChildren(node)
|
||||
if len(viableChildren) >= 2 || (aggressive && len(viableChildren) >= 1) {
|
||||
// Container expansion: add insertr class to each viable child, not the container
|
||||
for _, child := range viableChildren {
|
||||
if !disc.hasInsertrClass(child) {
|
||||
disc.addInsertrClass(child)
|
||||
result.IndividualsAdded++
|
||||
result.ElementsEnhanced++
|
||||
}
|
||||
}
|
||||
result.ContainersAdded++
|
||||
|
||||
// Don't process children since we just processed them
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this individual element should be enhanced
|
||||
if disc.isGoodIndividualElement(node) {
|
||||
disc.addInsertrClass(node)
|
||||
result.IndividualsAdded++
|
||||
result.ElementsEnhanced++
|
||||
|
||||
// Don't process children of enhanced individual elements
|
||||
return
|
||||
}
|
||||
|
||||
// Recursively check children
|
||||
for child := node.FirstChild; child != nil; child = child.NextSibling {
|
||||
disc.discoverNode(child, result, aggressive)
|
||||
}
|
||||
}
|
||||
|
||||
// isGoodContainer checks if an element is a good candidate for container expansion
|
||||
func (disc *Discoverer) isGoodContainer(node *html.Node) bool {
|
||||
containerTags := map[string]bool{
|
||||
"div": true,
|
||||
"section": true,
|
||||
"article": true,
|
||||
"header": true,
|
||||
"footer": true,
|
||||
"main": true,
|
||||
"aside": true,
|
||||
"nav": true,
|
||||
}
|
||||
|
||||
tag := strings.ToLower(node.Data)
|
||||
if !containerTags[tag] {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers that are clearly non-content
|
||||
if disc.isNonContentElement(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers in the head section
|
||||
if disc.isInHead(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip containers with technical/framework-specific classes that suggest they're not content
|
||||
classes := disc.getClasses(node)
|
||||
for _, class := range classes {
|
||||
lowerClass := strings.ToLower(class)
|
||||
// Skip Next.js internal classes and other framework artifacts
|
||||
if strings.Contains(lowerClass, "__next") ||
|
||||
strings.Contains(lowerClass, "webpack") ||
|
||||
strings.Contains(lowerClass, "hydration") ||
|
||||
strings.Contains(lowerClass, "react") ||
|
||||
strings.Contains(lowerClass, "gatsby") {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// isGoodIndividualElement checks if an element is a good candidate for individual enhancement
|
||||
func (disc *Discoverer) isGoodIndividualElement(node *html.Node) bool {
|
||||
// Skip self-closing elements
|
||||
if disc.isSelfClosing(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip non-content elements that should never be editable
|
||||
if disc.isNonContentElement(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip elements inside head section
|
||||
if disc.isInHead(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Skip elements with no meaningful content
|
||||
if disc.hasNoMeaningfulContent(node) {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check if element has editable content
|
||||
return disc.hasEditableContent(node)
|
||||
}
|
||||
|
||||
// hasEditableContent uses the engine's enhanced detection logic
|
||||
func (disc *Discoverer) hasEditableContent(node *html.Node) bool {
|
||||
return engine.HasEditableContent(node)
|
||||
}
|
||||
|
||||
// hasInsertrClass checks if a node already has the insertr class
|
||||
func (disc *Discoverer) hasInsertrClass(node *html.Node) bool {
|
||||
classes := disc.getClasses(node)
|
||||
for _, class := range classes {
|
||||
if class == "insertr" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// addInsertrClass adds the insertr class to a node
|
||||
func (disc *Discoverer) addInsertrClass(node *html.Node) {
|
||||
classes := disc.getClasses(node)
|
||||
classes = append(classes, "insertr")
|
||||
disc.setClasses(node, classes)
|
||||
}
|
||||
|
||||
// removeInsertrClass removes the insertr class from a node
|
||||
func (disc *Discoverer) removeInsertrClass(node *html.Node) {
|
||||
classes := disc.getClasses(node)
|
||||
var filteredClasses []string
|
||||
for _, class := range classes {
|
||||
if class != "insertr" {
|
||||
filteredClasses = append(filteredClasses, class)
|
||||
}
|
||||
}
|
||||
disc.setClasses(node, filteredClasses)
|
||||
}
|
||||
|
||||
// getClasses extracts CSS classes from a node
|
||||
func (disc *Discoverer) getClasses(node *html.Node) []string {
|
||||
for i, attr := range node.Attr {
|
||||
if attr.Key == "class" {
|
||||
if attr.Val == "" {
|
||||
return []string{}
|
||||
}
|
||||
return strings.Fields(attr.Val)
|
||||
}
|
||||
// Update existing class attribute
|
||||
if attr.Key == "class" {
|
||||
node.Attr[i] = attr
|
||||
return strings.Fields(attr.Val)
|
||||
}
|
||||
}
|
||||
return []string{}
|
||||
}
|
||||
|
||||
// setClasses sets CSS classes on a node
|
||||
func (disc *Discoverer) setClasses(node *html.Node, classes []string) {
|
||||
classValue := strings.Join(classes, " ")
|
||||
|
||||
// Update existing class attribute or add new one
|
||||
for i, attr := range node.Attr {
|
||||
if attr.Key == "class" {
|
||||
node.Attr[i].Val = classValue
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Add new class attribute
|
||||
node.Attr = append(node.Attr, html.Attribute{
|
||||
Key: "class",
|
||||
Val: classValue,
|
||||
})
|
||||
}
|
||||
|
||||
// isSelfClosing checks if an element is self-closing
|
||||
func (disc *Discoverer) isSelfClosing(node *html.Node) bool {
|
||||
selfClosingTags := map[string]bool{
|
||||
"img": true, "input": true, "br": true, "hr": true,
|
||||
"meta": true, "link": true, "area": true, "base": true,
|
||||
"col": true, "embed": true, "source": true, "track": true, "wbr": true,
|
||||
}
|
||||
return selfClosingTags[strings.ToLower(node.Data)]
|
||||
}
|
||||
|
||||
// isNonContentElement checks if an element should never be editable
|
||||
func (disc *Discoverer) isNonContentElement(node *html.Node) bool {
|
||||
nonContentTags := map[string]bool{
|
||||
"script": true, // JavaScript code
|
||||
"style": true, // CSS styles
|
||||
"meta": true, // Metadata
|
||||
"link": true, // Links to resources
|
||||
"title": true, // Document title (handled separately)
|
||||
"head": true, // Document head
|
||||
"html": true, // Root element
|
||||
"body": true, // Body element (too broad)
|
||||
"noscript": true, // Fallback content
|
||||
"template": true, // HTML templates
|
||||
"svg": true, // SVG graphics (complex)
|
||||
"canvas": true, // Canvas graphics
|
||||
"iframe": true, // Embedded content
|
||||
"object": true, // Embedded objects
|
||||
"embed": true, // Embedded content
|
||||
"video": true, // Video elements (complex)
|
||||
"audio": true, // Audio elements (complex)
|
||||
"map": true, // Image maps
|
||||
"area": true, // Image map areas
|
||||
"base": true, // Base URL
|
||||
"col": true, // Table columns
|
||||
"colgroup": true, // Table column groups
|
||||
"track": true, // Video/audio tracks
|
||||
"source": true, // Media sources
|
||||
"param": true, // Object parameters
|
||||
"wbr": true, // Word break opportunities
|
||||
}
|
||||
return nonContentTags[strings.ToLower(node.Data)]
|
||||
}
|
||||
|
||||
// isInHead checks if a node is inside the document head
|
||||
func (disc *Discoverer) isInHead(node *html.Node) bool {
|
||||
current := node.Parent
|
||||
for current != nil {
|
||||
if current.Type == html.ElementNode && strings.ToLower(current.Data) == "head" {
|
||||
return true
|
||||
}
|
||||
current = current.Parent
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// hasNoMeaningfulContent checks if an element has no meaningful text content
|
||||
func (disc *Discoverer) hasNoMeaningfulContent(node *html.Node) bool {
|
||||
if node.Type != html.ElementNode {
|
||||
return true
|
||||
}
|
||||
|
||||
// Extract text content
|
||||
content := engine.ExtractTextContent(node)
|
||||
|
||||
// Empty or whitespace-only content
|
||||
if content == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
// Very short content that's likely not meaningful
|
||||
if len(content) < 2 {
|
||||
return true
|
||||
}
|
||||
|
||||
// Content that looks like technical artifacts
|
||||
technicalPatterns := []string{
|
||||
"$", "<!--", "-->", "{", "}", "[", "]",
|
||||
"function", "var ", "const ", "let ", "return",
|
||||
"import", "export", "require", "module.exports",
|
||||
"/*", "*/", "//", "<?", "?>", "<%", "%>",
|
||||
}
|
||||
|
||||
for _, pattern := range technicalPatterns {
|
||||
if strings.Contains(content, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// copyFile copies a file from input to output directory
|
||||
func (disc *Discoverer) copyFile(filePath, inputDir, outputDir string) error {
|
||||
outputPath := disc.getOutputPath(filePath, inputDir, outputDir)
|
||||
|
||||
// Create output directory for the file
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
input, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return os.WriteFile(outputPath, input, 0644)
|
||||
}
|
||||
|
||||
// getOutputPath converts input path to output path
|
||||
func (disc *Discoverer) getOutputPath(filePath, inputDir, outputDir string) string {
|
||||
relPath, _ := filepath.Rel(inputDir, filePath)
|
||||
return filepath.Join(outputDir, relPath)
|
||||
}
|
||||
|
||||
// writeEnhancedFile writes the enhanced HTML document to a file
|
||||
func (disc *Discoverer) writeEnhancedFile(outputPath string, enhanced *FileDiscoveryResult) error {
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
file, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
return html.Render(file, enhanced.Document)
|
||||
}
|
||||
@@ -1,9 +1,6 @@
|
||||
package content
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"golang.org/x/net/html"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
@@ -14,46 +11,40 @@ import (
|
||||
|
||||
// EnhancementConfig configures the enhancement pipeline
|
||||
type EnhancementConfig struct {
|
||||
Discovery DiscoveryConfig
|
||||
Discovery config.DiscoveryConfig
|
||||
ContentInjection bool
|
||||
GenerateIDs bool
|
||||
}
|
||||
|
||||
// Type alias for backward compatibility
|
||||
type DiscoveryConfig = config.DiscoveryConfig
|
||||
|
||||
// Enhancer combines discovery, ID generation, and content injection in unified pipeline
|
||||
type Enhancer struct {
|
||||
engine *engine.ContentEngine
|
||||
discoverer *Discoverer
|
||||
config EnhancementConfig
|
||||
siteID string
|
||||
engine *engine.ContentEngine
|
||||
config EnhancementConfig
|
||||
siteID string
|
||||
}
|
||||
|
||||
// NewEnhancer creates a new HTML enhancer with unified pipeline
|
||||
func NewEnhancer(client db.ContentRepository, siteID string, config EnhancementConfig) *Enhancer {
|
||||
return &Enhancer{
|
||||
engine: engine.NewContentEngine(client),
|
||||
discoverer: NewDiscoverer(),
|
||||
config: config,
|
||||
siteID: siteID,
|
||||
engine: engine.NewContentEngine(client),
|
||||
config: config,
|
||||
siteID: siteID,
|
||||
}
|
||||
}
|
||||
|
||||
// NewEnhancerWithAuth creates a new HTML enhancer with auth provider
|
||||
func NewEnhancerWithAuth(client db.ContentRepository, siteID string, config EnhancementConfig, authProvider *engine.AuthProvider) *Enhancer {
|
||||
return &Enhancer{
|
||||
engine: engine.NewContentEngineWithAuth(client, authProvider),
|
||||
discoverer: NewDiscoverer(),
|
||||
config: config,
|
||||
siteID: siteID,
|
||||
engine: engine.NewContentEngineWithAuth(client, authProvider),
|
||||
config: config,
|
||||
siteID: siteID,
|
||||
}
|
||||
}
|
||||
|
||||
// NewDefaultEnhancer creates an enhancer with default configuration
|
||||
func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer {
|
||||
defaultConfig := EnhancementConfig{
|
||||
Discovery: DiscoveryConfig{
|
||||
Discovery: config.DiscoveryConfig{
|
||||
Enabled: true,
|
||||
Aggressive: false,
|
||||
Containers: true,
|
||||
@@ -65,138 +56,14 @@ func NewDefaultEnhancer(client db.ContentRepository, siteID string) *Enhancer {
|
||||
return NewEnhancer(client, siteID, defaultConfig)
|
||||
}
|
||||
|
||||
// EnhanceFile processes a single HTML file through the complete pipeline
|
||||
// EnhanceFile processes a single HTML file through the engine
|
||||
func (e *Enhancer) EnhanceFile(inputPath, outputPath string) error {
|
||||
// Read HTML file
|
||||
htmlContent, err := os.ReadFile(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading file %s: %w", inputPath, err)
|
||||
}
|
||||
|
||||
// Process through unified pipeline
|
||||
processedHTML, err := e.processHTML(htmlContent, filepath.Base(inputPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("processing HTML %s: %w", inputPath, err)
|
||||
}
|
||||
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil {
|
||||
return fmt.Errorf("creating output directory: %w", err)
|
||||
}
|
||||
|
||||
// Write processed HTML
|
||||
return os.WriteFile(outputPath, processedHTML, 0644)
|
||||
return e.engine.ProcessFile(inputPath, outputPath, e.siteID, engine.Enhancement)
|
||||
}
|
||||
|
||||
// EnhanceDirectory processes all files in a directory through the unified pipeline
|
||||
// EnhanceDirectory processes all files in a directory through the engine
|
||||
func (e *Enhancer) EnhanceDirectory(inputDir, outputDir string) error {
|
||||
// Create output directory
|
||||
if err := os.MkdirAll(outputDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating output directory: %w", err)
|
||||
}
|
||||
|
||||
// Walk input directory
|
||||
return filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Calculate relative path and output path
|
||||
relPath, err := filepath.Rel(inputDir, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
outputPath := filepath.Join(outputDir, relPath)
|
||||
|
||||
// Handle directories
|
||||
if info.IsDir() {
|
||||
return os.MkdirAll(outputPath, info.Mode())
|
||||
}
|
||||
|
||||
// Process HTML files through enhancement pipeline
|
||||
if strings.HasSuffix(strings.ToLower(path), ".html") {
|
||||
return e.EnhanceFile(path, outputPath)
|
||||
}
|
||||
|
||||
// Copy non-HTML files as-is
|
||||
return e.copyFile(path, outputPath)
|
||||
})
|
||||
}
|
||||
|
||||
// processHTML implements the unified enhancement pipeline
|
||||
func (e *Enhancer) processHTML(htmlContent []byte, filePath string) ([]byte, error) {
|
||||
var processedHTML []byte = htmlContent
|
||||
|
||||
// Phase 1: Element Discovery (if enabled)
|
||||
if e.config.Discovery.Enabled {
|
||||
discoveredHTML, err := e.discoverElements(processedHTML, filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("element discovery: %w", err)
|
||||
}
|
||||
processedHTML = discoveredHTML
|
||||
}
|
||||
|
||||
// Phase 2 & 3: ID Generation + Content Injection (via engine)
|
||||
if e.config.GenerateIDs || e.config.ContentInjection {
|
||||
enhancedHTML, err := e.enhanceWithEngine(processedHTML, filePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("engine enhancement: %w", err)
|
||||
}
|
||||
processedHTML = enhancedHTML
|
||||
}
|
||||
|
||||
return processedHTML, nil
|
||||
}
|
||||
|
||||
// discoverElements adds insertr classes to viable elements
|
||||
func (e *Enhancer) discoverElements(htmlContent []byte, filePath string) ([]byte, error) {
|
||||
// Parse HTML
|
||||
doc, err := html.Parse(strings.NewReader(string(htmlContent)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing HTML: %w", err)
|
||||
}
|
||||
|
||||
// Find and mark viable elements
|
||||
result := &FileDiscoveryResult{Document: doc}
|
||||
e.discoverer.discoverNode(doc, result, e.config.Discovery.Aggressive)
|
||||
|
||||
// Render back to HTML
|
||||
var buf strings.Builder
|
||||
if err := html.Render(&buf, doc); err != nil {
|
||||
return nil, fmt.Errorf("rendering HTML: %w", err)
|
||||
}
|
||||
|
||||
return []byte(buf.String()), nil
|
||||
}
|
||||
|
||||
// enhanceWithEngine uses the unified engine for ID generation and content injection
|
||||
func (e *Enhancer) enhanceWithEngine(htmlContent []byte, filePath string) ([]byte, error) {
|
||||
// Determine processing mode
|
||||
var mode engine.ProcessMode
|
||||
if e.config.ContentInjection {
|
||||
mode = engine.Enhancement // ID generation + content injection
|
||||
} else {
|
||||
mode = engine.IDGeneration // ID generation only
|
||||
}
|
||||
|
||||
// Process with engine
|
||||
result, err := e.engine.ProcessContent(engine.ContentInput{
|
||||
HTML: htmlContent,
|
||||
FilePath: filePath,
|
||||
SiteID: e.siteID,
|
||||
Mode: mode,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("engine processing: %w", err)
|
||||
}
|
||||
|
||||
// Render enhanced document
|
||||
var buf strings.Builder
|
||||
if err := html.Render(&buf, result.Document); err != nil {
|
||||
return nil, fmt.Errorf("rendering enhanced HTML: %w", err)
|
||||
}
|
||||
|
||||
return []byte(buf.String()), nil
|
||||
return e.engine.ProcessDirectory(inputDir, outputDir, e.siteID, engine.Enhancement)
|
||||
}
|
||||
|
||||
// SetSiteID sets the site ID for the enhancer
|
||||
@@ -261,20 +128,3 @@ func deriveDemoSiteID(sitePath string) string {
|
||||
|
||||
return dirName
|
||||
}
|
||||
|
||||
// copyFile copies a file from src to dst
|
||||
func (e *Enhancer) copyFile(src, dst string) error {
|
||||
// Create directory for destination
|
||||
if err := os.MkdirAll(filepath.Dir(dst), 0755); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Read source
|
||||
data, err := os.ReadFile(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write destination
|
||||
return os.WriteFile(dst, data, 0644)
|
||||
}
|
||||
|
||||
@@ -1,226 +0,0 @@
|
||||
package content
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"github.com/insertr/insertr/internal/config"
|
||||
"github.com/insertr/insertr/internal/db"
|
||||
"github.com/insertr/insertr/internal/engine"
|
||||
"maps"
|
||||
)
|
||||
|
||||
// SiteManager handles registration and enhancement of static sites
|
||||
type SiteManager struct {
|
||||
sites map[string]*config.SiteConfig
|
||||
enhancer *Enhancer
|
||||
mutex sync.RWMutex
|
||||
devMode bool
|
||||
contentClient db.ContentRepository
|
||||
authProvider *engine.AuthProvider
|
||||
}
|
||||
|
||||
// NewSiteManager creates a new site manager
|
||||
func NewSiteManager(contentClient db.ContentRepository, devMode bool) *SiteManager {
|
||||
return &SiteManager{
|
||||
sites: make(map[string]*config.SiteConfig),
|
||||
enhancer: NewDefaultEnhancer(contentClient, ""), // siteID will be set per operation
|
||||
devMode: devMode,
|
||||
contentClient: contentClient,
|
||||
authProvider: &engine.AuthProvider{Type: "mock"}, // default
|
||||
}
|
||||
}
|
||||
|
||||
// NewSiteManagerWithAuth creates a new site manager with auth provider
|
||||
func NewSiteManagerWithAuth(contentClient db.ContentRepository, devMode bool, authProvider *engine.AuthProvider) *SiteManager {
|
||||
if authProvider == nil {
|
||||
authProvider = &engine.AuthProvider{Type: "mock"}
|
||||
}
|
||||
return &SiteManager{
|
||||
sites: make(map[string]*config.SiteConfig),
|
||||
contentClient: contentClient,
|
||||
authProvider: authProvider,
|
||||
devMode: devMode,
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterSite adds a site to the manager
|
||||
func (sm *SiteManager) RegisterSite(config *config.SiteConfig) error {
|
||||
sm.mutex.Lock()
|
||||
defer sm.mutex.Unlock()
|
||||
|
||||
// Validate site configuration
|
||||
if config.SiteID == "" {
|
||||
return fmt.Errorf("site_id is required")
|
||||
}
|
||||
if config.Path == "" {
|
||||
return fmt.Errorf("path is required for site %s", config.SiteID)
|
||||
}
|
||||
|
||||
// Check if path exists, auto-create enhancement directories
|
||||
if _, err := os.Stat(config.Path); os.IsNotExist(err) {
|
||||
// Auto-create directory if it appears to be an enhancement target
|
||||
if strings.HasSuffix(config.Path, "_enhanced") {
|
||||
log.Printf("📁 Creating enhancement directory: %s", config.Path)
|
||||
if err := os.MkdirAll(config.Path, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create enhancement directory %s: %w", config.Path, err)
|
||||
}
|
||||
} else {
|
||||
return fmt.Errorf("site path does not exist: %s", config.Path)
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to absolute path
|
||||
absPath, err := filepath.Abs(config.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to resolve absolute path for %s: %w", config.Path, err)
|
||||
}
|
||||
config.Path = absPath
|
||||
|
||||
sm.sites[config.SiteID] = config
|
||||
log.Printf("📁 Registered site %s at %s", config.SiteID, config.Path)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RegisterSites bulk registers multiple sites from configuration
|
||||
func (sm *SiteManager) RegisterSites(configs []*config.SiteConfig) error {
|
||||
for _, config := range configs {
|
||||
if err := sm.RegisterSite(config); err != nil {
|
||||
return fmt.Errorf("failed to register site %s: %w", config.SiteID, err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetSite returns a registered site configuration
|
||||
func (sm *SiteManager) GetSite(siteID string) (*config.SiteConfig, bool) {
|
||||
sm.mutex.RLock()
|
||||
defer sm.mutex.RUnlock()
|
||||
|
||||
site, exists := sm.sites[siteID]
|
||||
return site, exists
|
||||
}
|
||||
|
||||
// GetAllSites returns all registered sites
|
||||
func (sm *SiteManager) GetAllSites() map[string]*config.SiteConfig {
|
||||
sm.mutex.RLock()
|
||||
defer sm.mutex.RUnlock()
|
||||
|
||||
// Return a copy to prevent external modification
|
||||
result := make(map[string]*config.SiteConfig)
|
||||
maps.Copy(result, sm.sites)
|
||||
return result
|
||||
}
|
||||
|
||||
// IsAutoEnhanceEnabled checks if a site has auto-enhancement enabled
|
||||
func (sm *SiteManager) IsAutoEnhanceEnabled(siteID string) bool {
|
||||
sm.mutex.RLock()
|
||||
defer sm.mutex.RUnlock()
|
||||
|
||||
site, exists := sm.sites[siteID]
|
||||
return exists && site.AutoEnhance
|
||||
}
|
||||
|
||||
// EnhanceSite performs enhancement from source to output directory
|
||||
func (sm *SiteManager) EnhanceSite(siteID string) error {
|
||||
sm.mutex.RLock()
|
||||
site, exists := sm.sites[siteID]
|
||||
sm.mutex.RUnlock()
|
||||
|
||||
if !exists {
|
||||
return fmt.Errorf("site %s is not registered", siteID)
|
||||
}
|
||||
|
||||
// Use source path if available, otherwise use main path (for backwards compatibility)
|
||||
sourcePath := site.SourcePath
|
||||
if sourcePath == "" {
|
||||
sourcePath = site.Path
|
||||
}
|
||||
outputPath := site.Path
|
||||
|
||||
log.Printf("🔄 Enhancing site %s from %s to %s", siteID, sourcePath, outputPath)
|
||||
|
||||
// Create output directory if it doesn't exist
|
||||
if err := os.MkdirAll(outputPath, 0755); err != nil {
|
||||
return fmt.Errorf("failed to create output directory %s: %w", outputPath, err)
|
||||
}
|
||||
|
||||
// Create enhancer with auth provider for this operation
|
||||
// Discovery disabled by default - developers should explicitly mark elements with class="insertr"
|
||||
discoveryConfig := DiscoveryConfig{
|
||||
Enabled: false, // Changed from true - respect developer intent
|
||||
Aggressive: false,
|
||||
Containers: true,
|
||||
Individual: true,
|
||||
}
|
||||
|
||||
// Override with site-specific discovery config if provided
|
||||
if site.Discovery != nil {
|
||||
discoveryConfig = *site.Discovery
|
||||
log.Printf("🔧 Using site-specific discovery config for %s: enabled=%v, aggressive=%v",
|
||||
siteID, discoveryConfig.Enabled, discoveryConfig.Aggressive)
|
||||
}
|
||||
|
||||
config := EnhancementConfig{
|
||||
Discovery: discoveryConfig,
|
||||
ContentInjection: true,
|
||||
GenerateIDs: true,
|
||||
}
|
||||
enhancer := NewEnhancerWithAuth(sm.contentClient, siteID, config, sm.authProvider)
|
||||
|
||||
// Perform enhancement from source to output
|
||||
if err := enhancer.EnhanceDirectory(sourcePath, outputPath); err != nil {
|
||||
return fmt.Errorf("failed to enhance site %s: %w", siteID, err)
|
||||
}
|
||||
|
||||
log.Printf("✅ Successfully enhanced site %s", siteID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnhanceAllSites enhances all registered sites that have auto-enhancement enabled
|
||||
func (sm *SiteManager) EnhanceAllSites() error {
|
||||
sm.mutex.RLock()
|
||||
sites := make([]*config.SiteConfig, 0, len(sm.sites))
|
||||
for _, site := range sm.sites {
|
||||
if site.AutoEnhance {
|
||||
sites = append(sites, site)
|
||||
}
|
||||
}
|
||||
sm.mutex.RUnlock()
|
||||
|
||||
var errors []error
|
||||
for _, site := range sites {
|
||||
if err := sm.EnhanceSite(site.SiteID); err != nil {
|
||||
errors = append(errors, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(errors) > 0 {
|
||||
return fmt.Errorf("enhancement failed for some sites: %v", errors)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetStats returns statistics about registered sites
|
||||
func (sm *SiteManager) GetStats() map[string]any {
|
||||
sm.mutex.RLock()
|
||||
defer sm.mutex.RUnlock()
|
||||
|
||||
autoEnhanceCount := 0
|
||||
for _, site := range sm.sites {
|
||||
if site.AutoEnhance {
|
||||
autoEnhanceCount++
|
||||
}
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"total_sites": len(sm.sites),
|
||||
"auto_enhance_sites": autoEnhanceCount,
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user