/** * Markdown conversion utilities using Marked and Turndown */ import { marked } from 'marked'; import TurndownService from 'turndown'; /** * MarkdownConverter - Handles bidirectional HTML ↔ Markdown conversion */ export class MarkdownConverter { constructor() { this.initializeMarked(); this.initializeTurndown(); } /** * Configure marked for HTML output - MINIMAL MODE * Only supports: **bold**, *italic*, and [links](url) * Matches server-side goldmark configuration */ initializeMarked() { marked.setOptions({ gfm: false, // Disable GFM to match server minimal mode breaks: true, // Convert \n to
(matches server) pedantic: false, // Don't be overly strict sanitize: false, // Allow HTML (we control the input) smartLists: false, // Disable lists (not supported on server) smartypants: false // Don't convert quotes/dashes }); // Override renderers to restrict to minimal feature set marked.use({ renderer: { // Disable headings - treat as plain text heading(text, level) { return text; }, // Disable lists - treat as plain text list(body, ordered, start) { return body.replace(/<\/?li>/g, ''); }, listitem(text) { return text + '\n'; }, // Disable code blocks - treat as plain text code(code, language) { return code; }, blockquote(quote) { return quote; // Disable blockquotes - treat as plain text }, // Disable horizontal rules hr() { return ''; }, // Disable tables table(header, body) { return header + body; }, tablecell(content, flags) { return content; }, tablerow(content) { return content; } } }); } /** * Configure turndown for markdown output - MINIMAL MODE * Only supports: **bold**, *italic*, and [links](url) * Matches server-side goldmark configuration */ initializeTurndown() { this.turndown = new TurndownService({ // Minimal configuration - only basic formatting headingStyle: 'atx', // # headers (but will be disabled) hr: '---', // horizontal rule (but will be disabled) bulletListMarker: '-', // bullet list (but will be disabled) codeBlockStyle: 'fenced', // code blocks (but will be disabled) fence: '```', // fence marker (but will be disabled) emDelimiter: '*', // *italic* - matches server strongDelimiter: '**', // **bold** - matches server linkStyle: 'inlined', // [text](url) - matches server linkReferenceStyle: 'full' // full reference links }); // Add custom rules for better conversion this.addTurndownRules(); } /** * Add custom turndown rules - MINIMAL MODE * Only supports: **bold**, *italic*, and [links](url) * Disables all other formatting to match server */ addTurndownRules() { // Handle paragraph spacing properly - ensure double newlines between paragraphs this.turndown.addRule('paragraph', { filter: 'p', replacement: function (content) { if (!content.trim()) return ''; return content.trim() + '\n\n'; } }); // Handle bold text in markdown - keep this (supported) this.turndown.addRule('bold', { filter: ['strong', 'b'], replacement: function (content) { if (!content.trim()) return ''; return '**' + content + '**'; } }); // Handle italic text in markdown - keep this (supported) this.turndown.addRule('italic', { filter: ['em', 'i'], replacement: function (content) { if (!content.trim()) return ''; return '*' + content + '*'; } }); // DISABLE unsupported features - convert to plain text this.turndown.addRule('disableHeadings', { filter: ['h1', 'h2', 'h3', 'h4', 'h5', 'h6'], replacement: function (content) { return content; // Just return text content, no # markup } }); this.turndown.addRule('disableLists', { filter: ['ul', 'ol', 'li'], replacement: function (content) { return content; // Just return text content, no list markup } }); this.turndown.addRule('disableCode', { filter: ['pre', 'code'], replacement: function (content) { return content; // Just return text content, no code markup } }); this.turndown.addRule('disableBlockquotes', { filter: 'blockquote', replacement: function (content) { return content; // Just return text content, no > markup } }); this.turndown.addRule('disableHR', { filter: 'hr', replacement: function () { return ''; // Remove horizontal rules entirely } }); } /** * Convert HTML to Markdown * @param {string} html - HTML string to convert * @returns {string} - Markdown string */ htmlToMarkdown(html) { if (!html || html.trim() === '') { return ''; } try { const markdown = this.turndown.turndown(html); // Clean up and normalize newlines for proper paragraph separation return markdown .replace(/\n{3,}/g, '\n\n') // Replace 3+ newlines with 2 .replace(/^\n+|\n+$/g, '') // Remove leading/trailing newlines .trim(); // Remove other whitespace } catch (error) { console.warn('HTML to Markdown conversion failed:', error); // Fallback: extract text content const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; return tempDiv.textContent || tempDiv.innerText || ''; } } /** * Convert Markdown to HTML * @param {string} markdown - Markdown string to convert * @returns {string} - HTML string */ markdownToHtml(markdown) { if (!markdown || markdown.trim() === '') { return ''; } try { const html = marked(markdown); return html; } catch (error) { console.warn('Markdown to HTML conversion failed:', error); // Fallback: convert line breaks to paragraphs return markdown .split(/\n\s*\n/) .filter(p => p.trim()) .map(p => `

${p.trim()}

`) .join(''); } } /** * Extract HTML content from a group of elements * @param {HTMLElement[]} elements - Array of DOM elements * @returns {string} - Combined HTML content */ extractGroupHTML(elements) { const htmlParts = []; elements.forEach(element => { // Wrap inner content in paragraph tags to preserve structure const html = element.innerHTML.trim(); if (html) { // If element is already a paragraph, use its outer HTML if (element.tagName.toLowerCase() === 'p') { htmlParts.push(element.outerHTML); } else { // Wrap in paragraph tags htmlParts.push(`

${html}

`); } } }); return htmlParts.join('\n'); } /** * Convert HTML content from group elements to markdown * @param {HTMLElement[]} elements - Array of DOM elements * @returns {string} - Markdown representation */ extractGroupMarkdown(elements) { const html = this.extractGroupHTML(elements); const markdown = this.htmlToMarkdown(html); return markdown; } /** * Update group elements with markdown content * @param {HTMLElement[]} elements - Array of DOM elements to update * @param {string} markdown - Markdown content to render */ updateGroupElements(elements, markdown) { const html = this.markdownToHtml(markdown); // Split HTML into paragraphs const tempDiv = document.createElement('div'); tempDiv.innerHTML = html; const paragraphs = Array.from(tempDiv.querySelectorAll('p, div, h1, h2, h3, h4, h5, h6')); // Handle case where we have more/fewer paragraphs than elements const maxCount = Math.max(elements.length, paragraphs.length); for (let i = 0; i < maxCount; i++) { if (i < elements.length && i < paragraphs.length) { // Update existing element with corresponding paragraph elements[i].innerHTML = paragraphs[i].innerHTML; } else if (i < elements.length) { // More elements than paragraphs - clear extra elements elements[i].innerHTML = ''; } else if (i < paragraphs.length) { // More paragraphs than elements - create new element const newElement = document.createElement('p'); newElement.innerHTML = paragraphs[i].innerHTML; // Insert after the last existing element const lastElement = elements[elements.length - 1]; lastElement.parentNode.insertBefore(newElement, lastElement.nextSibling); elements.push(newElement); // Add to our elements array for future updates } } } } // Export singleton instance export const markdownConverter = new MarkdownConverter();