import TurndownService from "turndown";

import { htmlTags } from "./html-tags";

const excludeTags = ["pre", "code"];

const turndownService = new TurndownService({
  codeBlockStyle: "fenced",
  fence: "```",
});

const filterTags = htmlTags.filter((item) => !excludeTags?.includes(item));

/**
 * Custom rule
 * https://github.com/mixmark-io/turndown/issues/241#issuecomment-400591362
 */
turndownService.addRule("keep", {
  filter: filterTags,
  replacement(content, node) {
    const { isBlock, outerHTML } = node;

    return node && isBlock ? `\n\n${outerHTML}\n\n` : outerHTML;
  },
});

// ----------------------------------------------------------------------

export function htmlToMarkdown(html) {
  return turndownService.turndown(html);
}

// ----------------------------------------------------------------------

export function isMarkdownContent(content) {
  // Checking if the content contains Markdown-specific patterns
  const markdownPatterns = [
    /* YAML Front Matter (Common in Jekyll, Hugo, etc.) */
    /^---\s*\n([\s\S]*?)\n---\s*$/m,

    /* Heading (H1-H6) */
    /^#{1,6}\s.+/,

    /* Setext-style Headings (Underlined H1 and H2) */
    /^(.+)\n(=+|-{2,})$/m,

    /* Unordered List (Bullets: *, -, +) */
    /^(\s*)[*+-] [^\r\n]+/,

    /* Ordered List (Numbers with dots) */
    /^(\s*)\d+\.\s[^\r\n]+/,

    /* Nested Lists (Detects indentation-based nesting) */
    /^(\s{2,})?[*+-]\s+.+/,

    /* Task List (Checkbox) */
    /^(\s*)[-*]\s\[(\s|x)\]\s[^\r\n]+/,

    /* Blockquote (Supports nested `>>>` syntax) */
    /^(>{1,})\s.+/,

    /* Horizontal Rule (---, ***, ___) */
    /^(\-{3,}|\*{3,}|_{3,})$/,

    /* Fenced Code Block (Triple Backticks or Tildes with Optional Language) */
    /^(```|~~~)([a-zA-Z0-9+#-]*)?\s*$/,

    /* Indented Code Block (Four Spaces or a Tab) */
    /^(\t| {4})[^\r\n]+/,

    /* Inline Code (Single Backticks, Escaped) */
    /`[^`\r\n]+`/,

    /* Strikethrough (~~text~~) */
    /~~(.*?)~~/,

    /* Bold (Double Asterisks or Underscores) */
    /(\*\*|__)(.*?)\1/,

    /* Italics (Single Asterisks or Underscores, but not in bold) */
    /(^|[^\w])(\*|_)([^\s].*?[^\s])\2([^\w]|$)/,

    /* Bold + Italics (Triple Asterisks or Underscores) */
    /(\*\*\*|___)(.*?)\1/,

    /* Image (With Alt Text) */
    /!\[([^\]]*)\]\(([^)]+)\)/,

    /* Links (Standard Markdown Links) */
    /\[([^\]]+)\]\(([^)]+)\)/,

    /* Reference Links */
    /^\[.+?\]:\s+<?(https?:\/\/[^>\s]+)>?\s*(".*?")?/,

    /* Footnote Definition */
    /\[\^([^\]]+)\]:\s+(.*)/,

    /* Inline Footnote Reference */
    /\[\^([^\]]+)\]/,

    /* Table (Pipe-separated) */
    /^\|(.+\|)+\s*$/,

    /* Table Separator Row (---, :---:, etc.) */
    /^\|?(\s*:?-+:?\s*\|)+\s*$/,

    /* Definition List (Term: Definition) */
    /^(.+)\n:\s+.+/m,

    /* HTML Block Elements (Handles multi-line HTML blocks) */
    /^<([a-z]+)([^<]+)*(?:>([\s\S]*?)<\/\1>|\s+\/>)$/m,

    /* HTML Inline Elements (Handles single-line tags) */
    /<([a-z]+)([^<]+)*(?:>(.*?)<\/\1>|\s+\/>)/,

    /* Escaped Characters (Markdown Special Characters) */
    /\\[\\`*_{}\[\]()#+\-.!]/,

    /* Automatic Links (<http://example.com> or <email@example.com>) */
    /<([a-zA-Z]+:\/\/[^>]+)>|<([^@\s]+@[^@\s]+\.[^@\s]+)>/,

    /* HTML Comments */
    /<!--[\s\S]*?-->/,

    /* Table of Contents Placeholder */
    /^\[TOC\]$/i,
  ];

  // Checking if any of the patterns match
  return markdownPatterns.some((pattern) => pattern.test(content));
}
