fix-llms-txt.cjs 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. const fs = require('fs');
  2. const path = require('path');
  3. const { globSync } = require('glob');
  4. const distDir = path.join(__dirname, '../docs/.vitepress/dist');
  5. const docsDir = path.join(__dirname, '../docs');
  6. const codesSrcDir = path.join(__dirname, '../docs/codes');
  7. const files = ['llms.txt', 'llms-full.txt'];
  8. function cleanTitle(title) {
  9. // Remove markdown anchors like {#...}
  10. let cleaned = title.replace(/\s*\{#[^}]+\}\s*/g, '');
  11. // Decode HTML entities (basic ones)
  12. cleaned = cleaned.replace(/ /g, ' ')
  13. .replace(/&lt;/g, '<')
  14. .replace(/&gt;/g, '>')
  15. .replace(/&amp;/g, '&')
  16. .replace(/&quot;/g, '"')
  17. .replace(/&apos;/g, "'");
  18. return cleaned.trim();
  19. }
  20. function getProjectCodes(rootFilesDir) {
  21. const fullProjectDir = path.join(codesSrcDir, rootFilesDir);
  22. if (!fs.existsSync(fullProjectDir)) return '';
  23. const projectFiles = globSync('**/*.*', {
  24. cwd: fullProjectDir,
  25. ignore: [
  26. '**/build/**',
  27. '**/.*',
  28. '**/*.o',
  29. '**/*.obj',
  30. '**/*.exe',
  31. '**/*.bin',
  32. '**/test.lua',
  33. '**/*.cache/**',
  34. '**/*.gcm',
  35. '**/compile_commands.json',
  36. '**/compile_command.json'
  37. ],
  38. nodir: true
  39. });
  40. if (projectFiles.length === 0) return '';
  41. let output = `\n\n### Code Examples (${rootFilesDir})\n`;
  42. // Sort files
  43. projectFiles.sort((a, b) => {
  44. if (a === 'xmake.lua') return -1;
  45. if (b === 'xmake.lua') return 1;
  46. return a.localeCompare(b);
  47. });
  48. for (const file of projectFiles) {
  49. const fullPath = path.join(fullProjectDir, file);
  50. const content = fs.readFileSync(fullPath, 'utf-8');
  51. const ext = path.extname(file).toLowerCase().substring(1);
  52. const lang = (file.endsWith('xmake.lua') || ext === 'lua') ? 'lua' :
  53. (['c', 'cpp', 'h', 'hpp', 'm', 'mm'].includes(ext) ? 'cpp' :
  54. (ext || 'text'));
  55. output += `\n#### ${file}\n\n\`\`\`${lang}\n${content}\n\`\`\`\n`;
  56. }
  57. return output;
  58. }
  59. function processFile(filename) {
  60. const filePath = path.join(distDir, filename);
  61. if (!fs.existsSync(filePath)) {
  62. console.log(`File not found: ${filePath}`);
  63. return;
  64. }
  65. let content = fs.readFileSync(filePath, 'utf8');
  66. // Inject FileExplorer codes
  67. const parts = content.split(/^---\r?\nurl: (.*?)\r?\n---/gm);
  68. if (parts.length > 1) {
  69. let newContent = parts[0];
  70. for (let i = 1; i < parts.length; i += 2) {
  71. const url = parts[i];
  72. let sectionContent = parts[i+1];
  73. const relPath = url.startsWith('/') ? url.substring(1) : url;
  74. // Remove .html if present and add .md, but usually url in llms.txt keeps .md or original
  75. // The Read result showed /zh/guide/.../install-and-uninstall.md
  76. // So we assume it maps to docs/...
  77. // If url ends with .html, replace with .md
  78. let mdRelPath = relPath;
  79. if (mdRelPath.endsWith('.html')) {
  80. mdRelPath = mdRelPath.replace(/\.html$/, '.md');
  81. }
  82. const mdPath = path.join(docsDir, mdRelPath);
  83. if (fs.existsSync(mdPath)) {
  84. const mdContent = fs.readFileSync(mdPath, 'utf8');
  85. // Find FileExplorer tags
  86. const regex = /<FileExplorer\s+rootFilesDir="([^"]+)"\s*\/>/g;
  87. let match;
  88. const codesToAdd = [];
  89. while ((match = regex.exec(mdContent)) !== null) {
  90. const rootFilesDir = match[1];
  91. const codes = getProjectCodes(rootFilesDir);
  92. if (codes) {
  93. codesToAdd.push(codes);
  94. }
  95. }
  96. if (codesToAdd.length > 0) {
  97. sectionContent += codesToAdd.join('\n');
  98. }
  99. }
  100. newContent += `---
  101. url: ${url}
  102. ---${sectionContent}`;
  103. }
  104. content = newContent;
  105. }
  106. // Fix TOC titles in format: - [Title](link)
  107. // We match the line starting with - [
  108. content = content.replace(/^-\s+\[(.*?)\]\((.*?)\)/gm, (match, title, link) => {
  109. const newTitle = cleanTitle(title);
  110. return `- [${newTitle}](${link})`;
  111. });
  112. // Also fix titles in the content (for llms-full.txt)
  113. content = content.replace(/^(#+)\s+(.*?)(\s+\{#[^}]+\})?\s*$/gm, (match, hashes, title, anchor) => {
  114. return match; // Don't change headers in content for now
  115. });
  116. // Add BOM if not present
  117. if (!content.startsWith('\uFEFF')) {
  118. content = '\uFEFF' + content;
  119. }
  120. fs.writeFileSync(filePath, content, 'utf8');
  121. console.log(`Fixed ${filename}`);
  122. }
  123. files.forEach(processFile);