# Ultralytics YOLO 🚀, AGPL-3.0 license """ Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt. This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/ directory, the script updates the link to point to the corresponding file in the /en/ directory. It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and remain in English. """ import re from pathlib import Path class MarkdownLinkFixer: """Class to fix Markdown links and front matter in language-specific directories.""" def __init__(self, base_dir, update_links=True, update_text=True): """Initialize the MarkdownLinkFixer with the base directory.""" self.base_dir = Path(base_dir) self.update_links = update_links self.update_text = update_text self.md_link_regex = re.compile(r"\[([^]]+)]\(([^:)]+)\.md\)") @staticmethod def replace_front_matter(content, lang_dir): """Ensure front matter keywords remain in English.""" english = ["comments", "description", "keywords"] translations = { "zh": ["评论", "描述", "关键词"], # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字 "es": ["comentarios", "descripción", "palabras clave"], # Spanish "ru": ["комментарии", "описание", "ключевые слова"], # Russian "pt": ["comentários", "descrição", "palavras-chave"], # Portuguese "fr": ["commentaires", "description", "mots-clés"], # French "de": ["kommentare", "beschreibung", "schlüsselwörter"], # German "ja": ["コメント", "説明", "キーワード"], # Japanese "ko": ["댓글", "설명", "키워드"], # Korean "hi": ["टिप्पणियाँ", "विवरण", "कीवर्ड"], # Hindi "ar": ["التعليقات", "الوصف", "الكلمات الرئيسية"], # Arabic } # front matter translations for comments, description, keyword for term, eng_key in zip(translations.get(lang_dir.stem, []), english): content = ( re.sub(rf"{term} *[::].*", f"{eng_key}: true", content, flags=re.IGNORECASE) if eng_key == "comments" else re.sub(rf"{term} *[::] *", f"{eng_key}: ", content, flags=re.IGNORECASE) ) return content @staticmethod def replace_admonitions(content, lang_dir): """Ensure front matter keywords remain in English.""" english = [ "Note", "Summary", "Tip", "Info", "Success", "Question", "Warning", "Failure", "Danger", "Bug", "Example", "Quote", "Abstract", "Seealso", "Admonition", ] translations = { "en": english, "zh": [ "笔记", "摘要", "提示", "信息", "成功", "问题", "警告", "失败", "危险", "故障", "示例", "引用", "摘要", "另见", "警告", ], "es": [ "Nota", "Resumen", "Consejo", "Información", "Éxito", "Pregunta", "Advertencia", "Fracaso", "Peligro", "Error", "Ejemplo", "Cita", "Abstracto", "Véase También", "Amonestación", ], "ru": [ "Заметка", "Сводка", "Совет", "Информация", "Успех", "Вопрос", "Предупреждение", "Неудача", "Опасность", "Ошибка", "Пример", "Цитата", "Абстракт", "См. Также", "Предостережение", ], "pt": [ "Nota", "Resumo", "Dica", "Informação", "Sucesso", "Questão", "Aviso", "Falha", "Perigo", "Bug", "Exemplo", "Citação", "Abstrato", "Veja Também", "Advertência", ], "fr": [ "Note", "Résumé", "Conseil", "Info", "Succès", "Question", "Avertissement", "Échec", "Danger", "Bug", "Exemple", "Citation", "Abstrait", "Voir Aussi", "Admonestation", ], "de": [ "Hinweis", "Zusammenfassung", "Tipp", "Info", "Erfolg", "Frage", "Warnung", "Ausfall", "Gefahr", "Fehler", "Beispiel", "Zitat", "Abstrakt", "Siehe Auch", "Ermahnung", ], "ja": [ "ノート", "要約", "ヒント", "情報", "成功", "質問", "警告", "失敗", "危険", "バグ", "例", "引用", "抄録", "参照", "訓告", ], "ko": [ "노트", "요약", "팁", "정보", "성공", "질문", "경고", "실패", "위험", "버그", "예제", "인용", "추상", "참조", "경고", ], "hi": [ "नोट", "सारांश", "सुझाव", "जानकारी", "सफलता", "प्रश्न", "चेतावनी", "विफलता", "खतरा", "बग", "उदाहरण", "उद्धरण", "सार", "देखें भी", "आगाही", ], "ar": [ "ملاحظة", "ملخص", "نصيحة", "معلومات", "نجاح", "سؤال", "تحذير", "فشل", "خطر", "عطل", "مثال", "اقتباس", "ملخص", "انظر أيضاً", "تحذير", ], } for term, eng_key in zip(translations.get(lang_dir.stem, []), english): if lang_dir.stem != "en": content = re.sub(rf"!!! *{eng_key} *\n", f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) content = re.sub(rf"!!! *{term} *\n", f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) content = re.sub(rf"!!! *{term}", f"!!! {eng_key}", content, flags=re.IGNORECASE) content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE) return content @staticmethod def update_iframe(content): """Update the 'allow' attribute of iframe if it does not contain the specific English permissions.""" english = "accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"') return pattern.sub(f'allow="{english}"', content) def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False): """Replace broken links with corresponding links in the /en/ directory.""" text, path = match.groups() linked_path = (parent_dir / path).resolve().with_suffix(".md") if not linked_path.exists(): en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / "en"))) if en_linked_path.exists(): if use_abs_link: # Use absolute links WARNING: BUGS, DO NOT USE docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent) updated_path = str(docs_root_relative_path).replace("en/", "/../") else: # Use relative links steps_up = len(parent_dir.relative_to(self.base_dir).parts) updated_path = Path("../" * steps_up) / en_linked_path.relative_to(self.base_dir) updated_path = str(updated_path).replace("/en/", "/") print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}") return f"[{text}]({updated_path})" else: print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.") return match.group(0) @staticmethod def update_html_tags(content): """Updates HTML tags in docs.""" alt_tag = "MISSING" # Remove closing slashes from self-closing HTML tags pattern = re.compile(r"<([^>]+?)\s*/>") content = re.sub(pattern, r"<\1>", content) # Find all images without alt tags and add placeholder alt text pattern = re.compile(r"!\[(.*?)\]\((.*?)\)") content, num_replacements = re.subn( pattern, lambda match: f"![{match.group(1) or alt_tag}]({match.group(2)})", content ) # Add missing alt tags to HTML images pattern = re.compile(r']*src=["\'](.*?)["\'][^>]*>') content, num_replacements = re.subn( pattern, lambda match: match.group(0).replace(">", f' alt="{alt_tag}">', 1), content ) return content def process_markdown_file(self, md_file_path, lang_dir): """Process each markdown file in the language directory.""" print(f"Processing file: {md_file_path}") with open(md_file_path, encoding="utf-8") as file: content = file.read() if self.update_links: content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content) if self.update_text: content = self.replace_front_matter(content, lang_dir) content = self.replace_admonitions(content, lang_dir) content = self.update_iframe(content) content = self.update_html_tags(content) with open(md_file_path, "w", encoding="utf-8") as file: file.write(content) def process_language_directory(self, lang_dir): """Process each language-specific directory.""" print(f"Processing language directory: {lang_dir}") for md_file in lang_dir.rglob("*.md"): self.process_markdown_file(md_file, lang_dir) def run(self): """Run the link fixing and front matter updating process for each language-specific directory.""" for subdir in self.base_dir.iterdir(): if subdir.is_dir() and re.match(r"^\w\w$", subdir.name): self.process_language_directory(subdir) if __name__ == "__main__": # Set the path to your MkDocs 'docs' directory here docs_dir = str(Path(__file__).parent.resolve()) fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True) fixer.run()