Implement hash-based content ID generation
- Replace content text sampling with SHA1 content hashing - Generate IDs with format: context-purpose-hash (e.g., services-text-0d96da) - Achieve content independence: IDs stable when text changes - Maintain semantic meaning through context and purpose components - Support disposable development IDs with production stability
This commit is contained in:
@@ -25,9 +25,9 @@ func NewIDGenerator() *IDGenerator {
|
||||
func (g *IDGenerator) Generate(node *html.Node) string {
|
||||
context := g.getSemanticContext(node)
|
||||
purpose := g.getPurpose(node)
|
||||
content := g.getContentSample(node)
|
||||
contentHash := g.getContentHash(node)
|
||||
|
||||
baseID := g.createBaseID(context, purpose, content)
|
||||
baseID := g.createBaseID(context, purpose, contentHash)
|
||||
return g.ensureUnique(baseID)
|
||||
}
|
||||
|
||||
@@ -111,27 +111,19 @@ func (g *IDGenerator) getPurpose(node *html.Node) string {
|
||||
}
|
||||
}
|
||||
|
||||
// getContentSample gets a sample of content for ID generation
|
||||
func (g *IDGenerator) getContentSample(node *html.Node) string {
|
||||
// getContentHash creates a short hash of the content for ID generation
|
||||
func (g *IDGenerator) getContentHash(node *html.Node) string {
|
||||
text := extractTextContent(node)
|
||||
|
||||
// Clean and normalize text
|
||||
text = strings.ToLower(text)
|
||||
text = regexp.MustCompile(`[^a-z0-9\s]+`).ReplaceAllString(text, "")
|
||||
text = regexp.MustCompile(`\s+`).ReplaceAllString(text, " ")
|
||||
text = strings.TrimSpace(text)
|
||||
// Create hash of the text content
|
||||
hash := fmt.Sprintf("%x", sha1.Sum([]byte(text)))
|
||||
|
||||
// Take first few words
|
||||
words := strings.Fields(text)
|
||||
if len(words) > 3 {
|
||||
words = words[:3]
|
||||
}
|
||||
|
||||
return strings.Join(words, "-")
|
||||
// Return first 6 characters for brevity
|
||||
return hash[:6]
|
||||
}
|
||||
|
||||
// createBaseID creates the base ID from components
|
||||
func (g *IDGenerator) createBaseID(context, purpose, content string) string {
|
||||
func (g *IDGenerator) createBaseID(context, purpose, contentHash string) string {
|
||||
parts := []string{}
|
||||
|
||||
// Add context if meaningful
|
||||
@@ -142,10 +134,8 @@ func (g *IDGenerator) createBaseID(context, purpose, content string) string {
|
||||
// Add purpose
|
||||
parts = append(parts, purpose)
|
||||
|
||||
// Add content sample if available and meaningful
|
||||
if content != "" && content != purpose {
|
||||
parts = append(parts, content)
|
||||
}
|
||||
// Always add content hash for uniqueness
|
||||
parts = append(parts, contentHash)
|
||||
|
||||
baseID := strings.Join(parts, "-")
|
||||
|
||||
@@ -155,7 +145,7 @@ func (g *IDGenerator) createBaseID(context, purpose, content string) string {
|
||||
|
||||
// Ensure it's not empty
|
||||
if baseID == "" {
|
||||
baseID = "content"
|
||||
baseID = fmt.Sprintf("content-%s", contentHash)
|
||||
}
|
||||
|
||||
return baseID
|
||||
|
||||
Reference in New Issue
Block a user