feat: Complete HTML-first architecture implementation with API integration

- Replace value field with html_content for direct HTML storage
- Add original_template field for style detection preservation
- Remove all markdown processing from injector (delete markdown.go)
- Fix critical content extraction/injection bugs in engine
- Add missing UpdateContent PUT handler for content persistence
- Fix API client field names and add updateContent() method
- Resolve content type validation (only allow text/link types)
- Add UUID-based ID generation to prevent collisions
- Complete first-pass processing workflow for unprocessed elements
- Verify end-to-end: Enhancement → Database → API → Editor → Persistence

All 37 files updated for HTML-first content management system.
Phase 3a implementation complete and production ready.
This commit is contained in:
2025-09-20 16:42:00 +02:00
parent bb5ea6f873
commit 2177055c76
37 changed files with 1189 additions and 737 deletions

View File

@@ -2,6 +2,7 @@ package engine
import (
"context"
"database/sql"
"fmt"
"github.com/insertr/insertr/internal/db"
@@ -9,6 +10,14 @@ import (
"github.com/insertr/insertr/internal/db/sqlite"
)
// Helper function to convert sql.NullString to string
func getStringFromNullString(ns sql.NullString) string {
if ns.Valid {
return ns.String
}
return ""
}
// DatabaseClient implements ContentClient interface using the database
type DatabaseClient struct {
database *db.Database
@@ -33,11 +42,12 @@ func (c *DatabaseClient) GetContent(siteID, contentID string) (*ContentItem, err
return nil, err
}
return &ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}, nil
case "postgresql":
@@ -49,11 +59,12 @@ func (c *DatabaseClient) GetContent(siteID, contentID string) (*ContentItem, err
return nil, err
}
return &ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}, nil
default:
@@ -76,11 +87,12 @@ func (c *DatabaseClient) GetBulkContent(siteID string, contentIDs []string) (map
items := make(map[string]ContentItem)
for _, content := range contents {
items[content.ID] = ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}
}
return items, nil
@@ -97,11 +109,12 @@ func (c *DatabaseClient) GetBulkContent(siteID string, contentIDs []string) (map
items := make(map[string]ContentItem)
for _, content := range contents {
items[content.ID] = ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}
}
return items, nil
@@ -123,11 +136,12 @@ func (c *DatabaseClient) GetAllContent(siteID string) (map[string]ContentItem, e
items := make(map[string]ContentItem)
for _, content := range contents {
items[content.ID] = ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}
}
return items, nil
@@ -141,11 +155,12 @@ func (c *DatabaseClient) GetAllContent(siteID string) (map[string]ContentItem, e
items := make(map[string]ContentItem)
for _, content := range contents {
items[content.ID] = ContentItem{
ID: content.ID,
SiteID: content.SiteID,
Value: content.Value,
Type: content.Type,
LastEditedBy: content.LastEditedBy,
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}
}
return items, nil
@@ -154,3 +169,61 @@ func (c *DatabaseClient) GetAllContent(siteID string) (map[string]ContentItem, e
return nil, fmt.Errorf("unsupported database type: %s", c.database.GetDBType())
}
}
// CreateContent creates a new content item
func (c *DatabaseClient) CreateContent(siteID, contentID, htmlContent, originalTemplate, contentType, lastEditedBy string) (*ContentItem, error) {
switch c.database.GetDBType() {
case "sqlite3":
content, err := c.database.GetSQLiteQueries().CreateContent(context.Background(), sqlite.CreateContentParams{
ID: contentID,
SiteID: siteID,
HtmlContent: htmlContent,
OriginalTemplate: toNullString(originalTemplate),
Type: contentType,
LastEditedBy: lastEditedBy,
})
if err != nil {
return nil, err
}
return &ContentItem{
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}, nil
case "postgresql":
content, err := c.database.GetPostgreSQLQueries().CreateContent(context.Background(), postgresql.CreateContentParams{
ID: contentID,
SiteID: siteID,
HtmlContent: htmlContent,
OriginalTemplate: toNullString(originalTemplate),
Type: contentType,
LastEditedBy: lastEditedBy,
})
if err != nil {
return nil, err
}
return &ContentItem{
ID: content.ID,
SiteID: content.SiteID,
HTMLContent: content.HtmlContent,
OriginalTemplate: getStringFromNullString(content.OriginalTemplate),
Type: content.Type,
LastEditedBy: content.LastEditedBy,
}, nil
default:
return nil, fmt.Errorf("unsupported database type: %s", c.database.GetDBType())
}
}
// Helper function to convert string to sql.NullString
func toNullString(s string) sql.NullString {
if s == "" {
return sql.NullString{Valid: false}
}
return sql.NullString{String: s, Valid: true}
}

View File

@@ -17,14 +17,17 @@ type ContentEngine struct {
idGenerator *IDGenerator
client ContentClient
authProvider *AuthProvider
injector *Injector
}
// NewContentEngine creates a new content processing engine
func NewContentEngine(client ContentClient) *ContentEngine {
authProvider := &AuthProvider{Type: "mock"} // default
return &ContentEngine{
idGenerator: NewIDGenerator(),
client: client,
authProvider: &AuthProvider{Type: "mock"}, // default
authProvider: authProvider,
injector: NewInjector(client, ""), // siteID will be set per operation
}
}
@@ -37,6 +40,7 @@ func NewContentEngineWithAuth(client ContentClient, authProvider *AuthProvider)
idGenerator: NewIDGenerator(),
client: client,
authProvider: authProvider,
injector: NewInjectorWithAuth(client, "", authProvider), // siteID will be set per operation
}
}
@@ -84,6 +88,20 @@ func (e *ContentEngine) ProcessContent(input ContentInput) (*ContentResult, erro
// Add/update content attributes to the node
e.addContentAttributes(elem.Node, id, elem.Type)
// Store content and template for newly discovered elements (first-pass)
if wasGenerated && (input.Mode == Enhancement || input.Mode == ContentInjection) {
// Extract content and template from the unprocessed element
htmlContent := e.extractHTMLContent(elem.Node)
originalTemplate := e.extractOriginalTemplate(elem.Node)
// Store in database via content client
_, err := e.client.CreateContent(input.SiteID, id, htmlContent, originalTemplate, elem.Type, "system")
if err != nil {
// Log error but don't fail the enhancement - content just won't be stored
fmt.Printf("⚠️ Failed to store content for %s: %v\n", id, err)
}
}
}
// 4. Inject content if required by mode
@@ -157,7 +175,7 @@ func (e *ContentEngine) determineContentType(node *html.Node) string {
case "h1", "h2", "h3", "h4", "h5", "h6":
return "text"
case "p", "div", "section", "article", "span":
return "markdown"
return "text"
default:
return "text"
}
@@ -211,28 +229,35 @@ func (e *ContentEngine) injectContent(elements []ProcessedElement, siteID string
if contentItem != nil {
// Inject the content into the element
elem.Content = contentItem.Value
e.injectContentIntoNode(elem.Node, contentItem.Value, contentItem.Type)
elem.Content = contentItem.HTMLContent
// Update injector siteID for this operation
e.injector.siteID = siteID
e.injector.injectHTMLContent(elem.Node, contentItem.HTMLContent)
}
}
return nil
}
// injectContentIntoNode injects content value into an HTML node
func (e *ContentEngine) injectContentIntoNode(node *html.Node, content, contentType string) {
// Clear existing text content
for child := node.FirstChild; child != nil; {
next := child.NextSibling
if child.Type == html.TextNode {
node.RemoveChild(child)
// extractHTMLContent extracts the inner HTML content from a node
func (e *ContentEngine) extractHTMLContent(node *html.Node) string {
var content strings.Builder
// Render all child nodes in order to preserve HTML structure
for child := node.FirstChild; child != nil; child = child.NextSibling {
if err := html.Render(&content, child); err == nil {
// All nodes (text and element) rendered in correct order
}
child = next
}
// Add new text content
textNode := &html.Node{
Type: html.TextNode,
Data: content,
}
node.AppendChild(textNode)
return strings.TrimSpace(content.String())
}
// extractOriginalTemplate extracts the outer HTML of the element (including the element itself)
func (e *ContentEngine) extractOriginalTemplate(node *html.Node) string {
var buf strings.Builder
if err := html.Render(&buf, node); err != nil {
return ""
}
return buf.String()
}

View File

@@ -1,12 +1,11 @@
package engine
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"path/filepath"
"strings"
"github.com/google/uuid"
"golang.org/x/net/html"
)
@@ -36,12 +35,10 @@ func (g *IDGenerator) Generate(node *html.Node, filePath string) string {
// 3. Build readable prefix (deterministic, no runtime counting)
prefix := g.buildDeterministicPrefix(fileName, tag, primaryClass)
// 5. Add collision-resistant suffix
signature := g.createSignature(node, filePath)
hash := sha256.Sum256([]byte(signature))
suffix := hex.EncodeToString(hash[:3])
// 5. Add UUID-based suffix for guaranteed uniqueness
uuidSuffix := uuid.New().String()[:6] // Use first 6 chars of UUID
finalID := fmt.Sprintf("%s-%s", prefix, suffix)
finalID := fmt.Sprintf("%s-%s", prefix, uuidSuffix)
// Ensure uniqueness (should be guaranteed by hash, but safety check)
g.usedIDs[finalID] = true
@@ -114,14 +111,10 @@ func (g *IDGenerator) buildPrefix(fileName, tag, primaryClass string, index int)
return strings.Join(parts, "-")
}
// createSignature creates a unique signature for collision resistance
// createSignature creates a unique signature for collision resistance (DEPRECATED - using UUID now)
func (g *IDGenerator) createSignature(node *html.Node, filePath string) string {
// Minimal signature for uniqueness
tag := node.Data
classes := strings.Join(GetClasses(node), " ")
domPath := g.getSimpleDOMPath(node)
return fmt.Sprintf("%s|%s|%s|%s", filePath, domPath, tag, classes)
// This method is kept for compatibility but not used in UUID-based generation
return ""
}
// getSimpleDOMPath creates a simple DOM path for uniqueness
@@ -142,3 +135,68 @@ func (g *IDGenerator) getSimpleDOMPath(node *html.Node) string {
return strings.Join(pathParts, ">")
}
// getContentPreview extracts first 50 characters of text content for uniqueness
func (g *IDGenerator) getContentPreview(node *html.Node) string {
var text strings.Builder
g.extractTextContent(node, &text)
content := strings.TrimSpace(text.String())
if len(content) > 50 {
content = content[:50]
}
// Remove newlines and normalize whitespace
content = strings.ReplaceAll(content, "\n", " ")
content = strings.ReplaceAll(content, "\t", " ")
for strings.Contains(content, " ") {
content = strings.ReplaceAll(content, " ", " ")
}
return content
}
// extractTextContent recursively extracts text content from a node
func (g *IDGenerator) extractTextContent(node *html.Node, text *strings.Builder) {
if node.Type == html.TextNode {
text.WriteString(node.Data)
}
for child := node.FirstChild; child != nil; child = child.NextSibling {
g.extractTextContent(child, text)
}
}
// getSiblingIndex returns the position of this element among its siblings of the same type
func (g *IDGenerator) getSiblingIndex(node *html.Node) int {
if node.Parent == nil {
return 0
}
index := 0
tag := node.Data
classes := GetClasses(node)
for sibling := node.Parent.FirstChild; sibling != nil; sibling = sibling.NextSibling {
if sibling.Type == html.ElementNode && sibling.Data == tag {
siblingClasses := GetClasses(sibling)
// Check if classes match (for more precise positioning)
if g.classesMatch(classes, siblingClasses) {
if sibling == node {
return index
}
index++
}
}
}
return index
}
// classesMatch checks if two class lists are equivalent
func (g *IDGenerator) classesMatch(classes1, classes2 []string) bool {
if len(classes1) != len(classes2) {
return false
}
for i, class := range classes1 {
if i >= len(classes2) || class != classes2[i] {
return false
}
}
return true
}

View File

@@ -12,7 +12,6 @@ import (
type Injector struct {
client ContentClient
siteID string
mdProcessor *MarkdownProcessor
authProvider *AuthProvider
}
@@ -21,7 +20,6 @@ func NewInjector(client ContentClient, siteID string) *Injector {
return &Injector{
client: client,
siteID: siteID,
mdProcessor: NewMarkdownProcessor(),
authProvider: &AuthProvider{Type: "mock"}, // default
}
}
@@ -34,7 +32,6 @@ func NewInjectorWithAuth(client ContentClient, siteID string, authProvider *Auth
return &Injector{
client: client,
siteID: siteID,
mdProcessor: NewMarkdownProcessor(),
authProvider: authProvider,
}
}
@@ -53,17 +50,8 @@ func (i *Injector) InjectContent(element *Element, contentID string) error {
return nil
}
// Replace element content based on type
switch element.Type {
case "text":
i.injectTextContent(element.Node, contentItem.Value)
case "markdown":
i.injectMarkdownContent(element.Node, contentItem.Value)
case "link":
i.injectLinkContent(element.Node, contentItem.Value)
default:
i.injectTextContent(element.Node, contentItem.Value)
}
// Direct HTML injection for all content types
i.injectHTMLContent(element.Node, contentItem.HTMLContent)
// Add data attributes for editor functionality
i.AddContentAttributes(element.Node, contentID, element.Type)
@@ -97,65 +85,13 @@ func (i *Injector) InjectBulkContent(elements []ElementWithID) error {
continue
}
// Replace content based on type
switch elem.Element.Type {
case "text":
i.injectTextContent(elem.Element.Node, contentItem.Value)
case "markdown":
i.injectMarkdownContent(elem.Element.Node, contentItem.Value)
case "link":
i.injectLinkContent(elem.Element.Node, contentItem.Value)
default:
i.injectTextContent(elem.Element.Node, contentItem.Value)
}
// Direct HTML injection for all content types
i.injectHTMLContent(elem.Element.Node, contentItem.HTMLContent)
}
return nil
}
// injectTextContent replaces text content in an element
func (i *Injector) injectTextContent(node *html.Node, content string) {
// Remove all child nodes
for child := node.FirstChild; child != nil; {
next := child.NextSibling
node.RemoveChild(child)
child = next
}
// Add new text content
textNode := &html.Node{
Type: html.TextNode,
Data: content,
}
node.AppendChild(textNode)
}
// injectMarkdownContent handles markdown content - converts markdown to HTML
func (i *Injector) injectMarkdownContent(node *html.Node, content string) {
if content == "" {
i.injectTextContent(node, "")
return
}
// Convert markdown to HTML using server processor
htmlContent, err := i.mdProcessor.ToHTML(content)
if err != nil {
log.Printf("⚠️ Markdown conversion failed for content '%s': %v, falling back to text", content, err)
i.injectTextContent(node, content)
return
}
// Inject the HTML content
i.injectHTMLContent(node, htmlContent)
}
// injectLinkContent handles link/button content with URL extraction
func (i *Injector) injectLinkContent(node *html.Node, content string) {
// For now, just inject the text content
// TODO: Parse content for URL and text components
i.injectTextContent(node, content)
}
// injectHTMLContent safely injects HTML content into a DOM node
// Preserves the original element and only replaces its content
func (i *Injector) injectHTMLContent(node *html.Node, htmlContent string) {
@@ -172,8 +108,14 @@ func (i *Injector) injectHTMLContent(node *html.Node, htmlContent string) {
// Parse HTML string
doc, err := html.Parse(strings.NewReader(wrappedHTML))
if err != nil {
log.Printf("Failed to parse HTML content '%s': %v, falling back to text", htmlContent, err)
i.injectTextContent(node, htmlContent)
log.Printf("Failed to parse HTML content '%s': %v, falling back to text node", htmlContent, err)
// Fallback: inject as text node
i.clearNode(node)
textNode := &html.Node{
Type: html.TextNode,
Data: htmlContent,
}
node.AppendChild(textNode)
return
}

View File

@@ -1,76 +0,0 @@
package engine
import (
"bytes"
"log"
"strings"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/renderer/html"
"github.com/yuin/goldmark/util"
)
// MarkdownProcessor handles minimal markdown processing
// Supports only: **bold**, *italic*, and [link](url)
type MarkdownProcessor struct {
parser goldmark.Markdown
}
// NewMarkdownProcessor creates a new markdown processor with minimal configuration
func NewMarkdownProcessor() *MarkdownProcessor {
// Configure goldmark to only support basic inline formatting
md := goldmark.New(
goldmark.WithParserOptions(
parser.WithInlineParsers(
// Bold (**text**) and italic (*text*) - same parser handles both
util.Prioritized(parser.NewEmphasisParser(), 500),
// Links [text](url)
util.Prioritized(parser.NewLinkParser(), 600),
),
// Disable all block parsers except paragraph (no headings, lists, etc.)
parser.WithBlockParsers(
util.Prioritized(parser.NewParagraphParser(), 200),
),
),
goldmark.WithRendererOptions(
html.WithXHTML(), // <br /> instead of <br>
html.WithHardWraps(), // Line breaks become <br />
html.WithUnsafe(), // Allow existing HTML to pass through
),
)
return &MarkdownProcessor{parser: md}
}
// ToHTML converts markdown string to HTML
func (mp *MarkdownProcessor) ToHTML(markdown string) (string, error) {
if markdown == "" {
return "", nil
}
var buf bytes.Buffer
if err := mp.parser.Convert([]byte(markdown), &buf); err != nil {
log.Printf("Markdown conversion failed: %v", err)
return "", err
}
html := buf.String()
// Clean up goldmark's paragraph wrapping for inline content
// If content is wrapped in a single <p> tag, extract just the inner content
html = strings.TrimSpace(html)
if strings.HasPrefix(html, "<p>") && strings.HasSuffix(html, "</p>") {
// Check if this is a single paragraph (no other <p> tags inside)
inner := html[3 : len(html)-4] // Remove <p> and </p>
if !strings.Contains(inner, "<p>") {
// Single paragraph - return just the inner content for inline injection
return inner, nil
}
}
// Multiple paragraphs or other block content - return as-is
return html, nil
}

View File

@@ -35,7 +35,7 @@ type ContentResult struct {
type ProcessedElement struct {
Node *html.Node // HTML node
ID string // Generated content ID
Type string // Content type (text, markdown, link)
Type string // Content type (text, link)
Content string // Injected content (if any)
Generated bool // Whether ID was generated (vs existing)
Tag string // Element tag name
@@ -48,16 +48,18 @@ type ContentClient interface {
GetContent(siteID, contentID string) (*ContentItem, error)
GetBulkContent(siteID string, contentIDs []string) (map[string]ContentItem, error)
GetAllContent(siteID string) (map[string]ContentItem, error)
CreateContent(siteID, contentID, htmlContent, originalTemplate, contentType, lastEditedBy string) (*ContentItem, error)
}
// ContentItem represents a piece of content from the database
type ContentItem struct {
ID string `json:"id"`
SiteID string `json:"site_id"`
Value string `json:"value"`
Type string `json:"type"`
UpdatedAt string `json:"updated_at"`
LastEditedBy string `json:"last_edited_by,omitempty"`
ID string `json:"id"`
SiteID string `json:"site_id"`
HTMLContent string `json:"html_content"`
OriginalTemplate string `json:"original_template"`
Type string `json:"type"`
UpdatedAt string `json:"updated_at"`
LastEditedBy string `json:"last_edited_by,omitempty"`
}
// ContentResponse represents the API response structure