|
|
# Ultralytics YOLO 🚀, AGPL-3.0 license |
|
|
""" |
|
|
Script to fix broken Markdown links and front matter in language-specific directories zh, ko, ja, ru, de, fr, es, pt. |
|
|
|
|
|
This script processes markdown files in language-specific directories (like /zh/). It finds Markdown links and checks |
|
|
their existence. If a link is broken and does not exist in the language-specific directory but exists in the /en/ |
|
|
directory, the script updates the link to point to the corresponding file in the /en/ directory. |
|
|
|
|
|
It also ensures that front matter keywords like 'comments:', 'description:', and 'keywords:' are not translated and |
|
|
remain in English. |
|
|
""" |
|
|
|
|
|
import re |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
class MarkdownLinkFixer: |
|
|
"""Class to fix Markdown links and front matter in language-specific directories.""" |
|
|
|
|
|
def __init__(self, base_dir, update_links=True, update_text=True): |
|
|
"""Initialize the MarkdownLinkFixer with the base directory.""" |
|
|
self.base_dir = Path(base_dir) |
|
|
self.update_links = update_links |
|
|
self.update_text = update_text |
|
|
self.md_link_regex = re.compile(r'\[([^]]+)]\(([^:)]+)\.md\)') |
|
|
|
|
|
@staticmethod |
|
|
def replace_front_matter(content, lang_dir): |
|
|
"""Ensure front matter keywords remain in English.""" |
|
|
english = ['comments', 'description', 'keywords'] |
|
|
translations = { |
|
|
'zh': ['评论', '描述', '关键词'], # Mandarin Chinese (Simplified) warning, sometimes translates as 关键字 |
|
|
'es': ['comentarios', 'descripción', 'palabras clave'], # Spanish |
|
|
'ru': ['комментарии', 'описание', 'ключевые слова'], # Russian |
|
|
'pt': ['comentários', 'descrição', 'palavras-chave'], # Portuguese |
|
|
'fr': ['commentaires', 'description', 'mots-clés'], # French |
|
|
'de': ['kommentare', 'beschreibung', 'schlüsselwörter'], # German |
|
|
'ja': ['コメント', '説明', 'キーワード'], # Japanese |
|
|
'ko': ['댓글', '설명', '키워드'], # Korean |
|
|
'hi': ['टिप्पणियाँ', 'विवरण', 'कीवर्ड'], # Hindi |
|
|
'ar': ['التعليقات', 'الوصف', 'الكلمات الرئيسية'] # Arabic |
|
|
} # front matter translations for comments, description, keyword |
|
|
|
|
|
for term, eng_key in zip(translations.get(lang_dir.stem, []), english): |
|
|
content = re.sub(rf'{term} *[::].*', f'{eng_key}: true', content, flags=re.IGNORECASE) if \ |
|
|
eng_key == 'comments' else re.sub(rf'{term} *[::] *', f'{eng_key}: ', content, flags=re.IGNORECASE) |
|
|
return content |
|
|
|
|
|
@staticmethod |
|
|
def replace_admonitions(content, lang_dir): |
|
|
"""Ensure front matter keywords remain in English.""" |
|
|
english = [ |
|
|
'Note', 'Summary', 'Tip', 'Info', 'Success', 'Question', 'Warning', 'Failure', 'Danger', 'Bug', 'Example', |
|
|
'Quote', 'Abstract', 'Seealso', 'Admonition'] |
|
|
translations = { |
|
|
'en': |
|
|
english, |
|
|
'zh': ['笔记', '摘要', '提示', '信息', '成功', '问题', '警告', '失败', '危险', '故障', '示例', '引用', '摘要', '另见', '警告'], |
|
|
'es': [ |
|
|
'Nota', 'Resumen', 'Consejo', 'Información', 'Éxito', 'Pregunta', 'Advertencia', 'Fracaso', 'Peligro', |
|
|
'Error', 'Ejemplo', 'Cita', 'Abstracto', 'Véase También', 'Amonestación'], |
|
|
'ru': [ |
|
|
'Заметка', 'Сводка', 'Совет', 'Информация', 'Успех', 'Вопрос', 'Предупреждение', 'Неудача', 'Опасность', |
|
|
'Ошибка', 'Пример', 'Цитата', 'Абстракт', 'См. Также', 'Предостережение'], |
|
|
'pt': [ |
|
|
'Nota', 'Resumo', 'Dica', 'Informação', 'Sucesso', 'Questão', 'Aviso', 'Falha', 'Perigo', 'Bug', |
|
|
'Exemplo', 'Citação', 'Abstrato', 'Veja Também', 'Advertência'], |
|
|
'fr': [ |
|
|
'Note', 'Résumé', 'Conseil', 'Info', 'Succès', 'Question', 'Avertissement', 'Échec', 'Danger', 'Bug', |
|
|
'Exemple', 'Citation', 'Abstrait', 'Voir Aussi', 'Admonestation'], |
|
|
'de': [ |
|
|
'Hinweis', 'Zusammenfassung', 'Tipp', 'Info', 'Erfolg', 'Frage', 'Warnung', 'Ausfall', 'Gefahr', |
|
|
'Fehler', 'Beispiel', 'Zitat', 'Abstrakt', 'Siehe Auch', 'Ermahnung'], |
|
|
'ja': ['ノート', '要約', 'ヒント', '情報', '成功', '質問', '警告', '失敗', '危険', 'バグ', '例', '引用', '抄録', '参照', '訓告'], |
|
|
'ko': ['노트', '요약', '팁', '정보', '성공', '질문', '경고', '실패', '위험', '버그', '예제', '인용', '추상', '참조', '경고'], |
|
|
'hi': [ |
|
|
'नोट', 'सारांश', 'सुझाव', 'जानकारी', 'सफलता', 'प्रश्न', 'चेतावनी', 'विफलता', 'खतरा', 'बग', 'उदाहरण', |
|
|
'उद्धरण', 'सार', 'देखें भी', 'आगाही'], |
|
|
'ar': [ |
|
|
'ملاحظة', 'ملخص', 'نصيحة', 'معلومات', 'نجاح', 'سؤال', 'تحذير', 'فشل', 'خطر', 'عطل', 'مثال', 'اقتباس', |
|
|
'ملخص', 'انظر أيضاً', 'تحذير']} |
|
|
|
|
|
for term, eng_key in zip(translations.get(lang_dir.stem, []), english): |
|
|
if lang_dir.stem != 'en': |
|
|
content = re.sub(rf'!!! *{eng_key} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) |
|
|
content = re.sub(rf'!!! *{term} *\n', f'!!! {eng_key} "{term}"\n', content, flags=re.IGNORECASE) |
|
|
content = re.sub(rf'!!! *{term}', f'!!! {eng_key}', content, flags=re.IGNORECASE) |
|
|
content = re.sub(r'!!! *"', '!!! Example "', content, flags=re.IGNORECASE) |
|
|
|
|
|
return content |
|
|
|
|
|
@staticmethod |
|
|
def update_iframe(content): |
|
|
"""Update the 'allow' attribute of iframe if it does not contain the specific English permissions.""" |
|
|
english = 'accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share' |
|
|
pattern = re.compile(f'allow="(?!{re.escape(english)}).+?"') |
|
|
return pattern.sub(f'allow="{english}"', content) |
|
|
|
|
|
def link_replacer(self, match, parent_dir, lang_dir, use_abs_link=False): |
|
|
"""Replace broken links with corresponding links in the /en/ directory.""" |
|
|
text, path = match.groups() |
|
|
linked_path = (parent_dir / path).resolve().with_suffix('.md') |
|
|
|
|
|
if not linked_path.exists(): |
|
|
en_linked_path = Path(str(linked_path).replace(str(lang_dir), str(lang_dir.parent / 'en'))) |
|
|
if en_linked_path.exists(): |
|
|
if use_abs_link: |
|
|
# Use absolute links WARNING: BUGS, DO NOT USE |
|
|
docs_root_relative_path = en_linked_path.relative_to(lang_dir.parent) |
|
|
updated_path = str(docs_root_relative_path).replace('en/', '/../') |
|
|
else: |
|
|
# Use relative links |
|
|
steps_up = len(parent_dir.relative_to(self.base_dir).parts) |
|
|
updated_path = Path('../' * steps_up) / en_linked_path.relative_to(self.base_dir) |
|
|
updated_path = str(updated_path).replace('/en/', '/') |
|
|
|
|
|
print(f"Redirecting link '[{text}]({path})' from {parent_dir} to {updated_path}") |
|
|
return f'[{text}]({updated_path})' |
|
|
else: |
|
|
print(f"Warning: Broken link '[{text}]({path})' found in {parent_dir} does not exist in /docs/en/.") |
|
|
|
|
|
return match.group(0) |
|
|
|
|
|
@staticmethod |
|
|
def update_html_tags(content): |
|
|
"""Updates HTML tags in docs.""" |
|
|
alt_tag = 'MISSING' |
|
|
|
|
|
# Remove closing slashes from self-closing HTML tags |
|
|
pattern = re.compile(r'<([^>]+?)\s*/>') |
|
|
content = re.sub(pattern, r'<\1>', content) |
|
|
|
|
|
# Find all images without alt tags and add placeholder alt text |
|
|
pattern = re.compile(r'!\[(.*?)\]\((.*?)\)') |
|
|
content, num_replacements = re.subn(pattern, lambda match: f'![{match.group(1) or alt_tag}]({match.group(2)})', |
|
|
content) |
|
|
|
|
|
# Add missing alt tags to HTML images |
|
|
pattern = re.compile(r'<img\s+(?!.*?\balt\b)[^>]*src=["\'](.*?)["\'][^>]*>') |
|
|
content, num_replacements = re.subn(pattern, lambda match: match.group(0).replace('>', f' alt="{alt_tag}">', 1), |
|
|
content) |
|
|
|
|
|
return content |
|
|
|
|
|
def process_markdown_file(self, md_file_path, lang_dir): |
|
|
"""Process each markdown file in the language directory.""" |
|
|
print(f'Processing file: {md_file_path}') |
|
|
with open(md_file_path, encoding='utf-8') as file: |
|
|
content = file.read() |
|
|
|
|
|
if self.update_links: |
|
|
content = self.md_link_regex.sub(lambda m: self.link_replacer(m, md_file_path.parent, lang_dir), content) |
|
|
|
|
|
if self.update_text: |
|
|
content = self.replace_front_matter(content, lang_dir) |
|
|
content = self.replace_admonitions(content, lang_dir) |
|
|
content = self.update_iframe(content) |
|
|
content = self.update_html_tags(content) |
|
|
|
|
|
with open(md_file_path, 'w', encoding='utf-8') as file: |
|
|
file.write(content) |
|
|
|
|
|
def process_language_directory(self, lang_dir): |
|
|
"""Process each language-specific directory.""" |
|
|
print(f'Processing language directory: {lang_dir}') |
|
|
for md_file in lang_dir.rglob('*.md'): |
|
|
self.process_markdown_file(md_file, lang_dir) |
|
|
|
|
|
def run(self): |
|
|
"""Run the link fixing and front matter updating process for each language-specific directory.""" |
|
|
for subdir in self.base_dir.iterdir(): |
|
|
if subdir.is_dir() and re.match(r'^\w\w$', subdir.name): |
|
|
self.process_language_directory(subdir) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
# Set the path to your MkDocs 'docs' directory here |
|
|
docs_dir = str(Path(__file__).parent.resolve()) |
|
|
fixer = MarkdownLinkFixer(docs_dir, update_links=True, update_text=True) |
|
|
fixer.run()
|
|
|
|