You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

345 lines
12 KiB

# Ultralytics YOLO 🚀, AGPL-3.0 license
"""
Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt.
This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks
their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/
directory, the script updates the link to point to the corresponding file in the /en/ directory.
It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and
remain in English.
"""
import re
from pathlib import Path
class MarkdownLinkFixer:
"""Class to fix Markdown links and front matter in language-specific directories."""
def __init__(self, base_dir, update_links=True, update_text=True):
"""Initialize the MarkdownLinkFixer with the base directory."""
self.base_dir = Path(base_dir)
self.update_links = update_links
self.update_text = update_text
self.md_link_regex = re.compile(r"\[([^]]+)]\(([^:)]+)\.md\)")
@staticmethod
def replace_front_matter(content, lang_dir):
"""Ensure front matter keywords remain in English."""
english = ["comments", "description", "keywords"]
translations = {
"zh": ["评论", "描述", "关键词"], # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字
"es": ["comentarios", "descripción", "palabras clave"], # Spanish
"ru": ["комментарии", "описание", "ключевые слова"], # Russian
"pt": ["comentários", "descrição", "palavras-chave"], # Portuguese
"fr": ["commentaires", "description", "mots-clés"], # French
"de": ["kommentare", "beschreibung", "schlüsselwörter"], # German
"ja": ["コメント", "説明", "キーワード"], # Japanese
"ko": ["댓글", "설명", "키워드"], # Korean
"hi": ["िपणि", "िवरण", "वर"], # Hindi
"ar": ["التعليقات", "الوصف", "الكلمات الرئيسية"], # Arabic
} # front matter translations for comments, description, keyword
for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
content = (
re.sub(rf"{term} *[::].*", f"{eng_key}: true", content, flags=re.IGNORECASE)
if eng_key == "comments"
else re.sub(rf"{term} *[::] *", f"{eng_key}: ", content, flags=re.IGNORECASE)
)
return content
@staticmethod
def replace_admonitions(content, lang_dir):
"""Ensure front matter keywords remain in English."""
english = [
"Note",
"Summary",
"Tip",
"Info",
"Success",
"Question",
"Warning",
"Failure",
"Danger",
"Bug",
"Example",
"Quote",
"Abstract",
"Seealso",
"Admonition",
]
translations = {
"en": english,
"zh": [
"笔记",
"摘要",
"提示",
"信息",
"成功",
"问题",
"警告",
"失败",
"危险",
"故障",
"示例",
"引用",
"摘要",
"另见",
"警告",
],
"es": [
"Nota",
"Resumen",
"Consejo",
"Información",
"Éxito",
"Pregunta",
"Advertencia",
"Fracaso",
"Peligro",
"Error",
"Ejemplo",
"Cita",
"Abstracto",
"Véase También",
"Amonestación",
],
"ru": [
"Заметка",
"Сводка",
"Совет",
"Информация",
"Успех",
"Вопрос",
"Предупреждение",
"Неудача",
"Опасность",
"Ошибка",
"Пример",
"Цитата",
"Абстракт",
"См. Также",
"Предостережение",
],
"pt": [
"Nota",
"Resumo",
"Dica",
"Informação",
"Sucesso",
"Questão",
"Aviso",
"Falha",
"Perigo",
"Bug",
"Exemplo",
"Citação",
"Abstrato",
"Veja Também",
"Advertência",
],
"fr": [
"Note",
"Résumé",
"Conseil",
"Info",
"Succès",
"Question",
"Avertissement",
"Échec",
"Danger",
"Bug",
"Exemple",
"Citation",
"Abstrait",
"Voir Aussi",
"Admonestation",
],
"de": [
"Hinweis",
"Zusammenfassung",
"Tipp",
"Info",
"Erfolg",
"Frage",
"Warnung",
"Ausfall",
"Gefahr",
"Fehler",
"Beispiel",
"Zitat",
"Abstrakt",
"Siehe Auch",
"Ermahnung",
],
"ja": [
"ノート",
"要約",
"ヒント",
"情報",
"成功",
"質問",
"警告",
"失敗",
"危険",
"バグ",
"",
"引用",
"抄録",
"参照",
"訓告",
],
"ko": [
"노트",
"요약",
"",
"정보",
"성공",
"질문",
"경고",
"실패",
"위험",
"버그",
"예제",
"인용",
"추상",
"참조",
"경고",
],
"hi": [
"",
"",
"",
"नक",
"सफलत",
"रश",
"वन",
"िफलत",
"खतर",
"बग",
"उदहरण",
"उदधरण",
"",
"",
"आग",
],
"ar": [
"ملاحظة",
"ملخص",
"نصيحة",
"معلومات",
"نجاح",
"سؤال",
"تحذير",
"فشل",
"خطر",
"عطل",
"مثال",
"اقتباس",
"ملخص",
"انظر أيضاً",
"تحذير",
],
}
for term, eng_key in zip(translations.get(lang_dir.stem, []), english):
if lang_dir.stem != "en":
content = re.sub(rf"!!! *{eng_key} *\n", f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
content = re.sub(rf"!!! *{term} *\n", f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE)
content = re.sub(rf"!!! *{term}", f"!!! {eng_key}", content, flags=re.IGNORECASE)
content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE)
return content
@staticmethod
def update_iframe(content):
"""Update the 'allow' attribute of iframe if it does not contain the specific English permissions."""
english = "accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"
pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"')
return pattern.sub(f'allow="{english}"', content)
def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False):
"""Replace broken links with corresponding links in the /en/ directory."""
text, path = match.groups()
linked_path = (parent_dir / path).resolve().with_suffix(".md")
if not linked_path.exists():
en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / "en")))
if en_linked_path.exists():
if use_abs_link:
# Use absolute links WARNING: BUGS, DO NOT USE
docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent)
updated_path = str(docs_root_relative_path).replace("en/", "/../")
else:
# Use relative links
steps_up = len(parent_dir.relative_to(self.base_dir).parts)
updated_path = Path("../" * steps_up) / en_linked_path.relative_to(self.base_dir)
updated_path = str(updated_path).replace("/en/", "/")
print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}")
return f"[{text}]({updated_path})"
else:
print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.")
return match.group(0)
@staticmethod
def update_html_tags(content):
"""Updates HTML tags in docs."""
alt_tag = "MISSING"
# Remove closing slashes from self-closing HTML tags
pattern = re.compile(r"<([^>]+?)\s*/>")
content = re.sub(pattern, r"<\1>", content)
# Find all images without alt tags and add placeholder alt text
pattern = re.compile(r"!\[(.*?)\]\((.*?)\)")
content, num_replacements = re.subn(
pattern, lambda match: f"![{match.group(1) or alt_tag}]({match.group(2)})", content
)
# Add missing alt tags to HTML images
pattern = re.compile(r'<img\s+(?!.*?\balt\b)[^>]*src=["\'](.*?)["\'][^>]*>')
content, num_replacements = re.subn(
pattern, lambda match: match.group(0).replace(">", f' alt="{alt_tag}">', 1), content
)
return content
def process_markdown_file(self, md_file_path, lang_dir):
"""Process each markdown file in the language directory."""
print(f"Processing file: {md_file_path}")
with open(md_file_path, encoding="utf-8") as file:
content = file.read()
if self.update_links:
content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content)
if self.update_text:
content = self.replace_front_matter(content, lang_dir)
content = self.replace_admonitions(content, lang_dir)
content = self.update_iframe(content)
content = self.update_html_tags(content)
with open(md_file_path, "w", encoding="utf-8") as file:
file.write(content)
def process_language_directory(self, lang_dir):
"""Process each language-specific directory."""
print(f"Processing language directory: {lang_dir}")
for md_file in lang_dir.rglob("*.md"):
self.process_markdown_file(md_file, lang_dir)
def run(self):
"""Run the link fixing and front matter updating process for each language-specific directory."""
for subdir in self.base_dir.iterdir():
if subdir.is_dir() and re.match(r"^\w\w$", subdir.name):
self.process_language_directory(subdir)
if __name__ == "__main__":
# Set the path to your MkDocs 'docs' directory here
docs_dir = str(Path(__file__).parent.resolve())
fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True)
fixer.run()