I’m starting to have too many images that are not and will no longer be used in my vault.
So I was looking for a way to have a list of unlinked images so I could check them one by one and delete them by hand.
I haven’t really found any plugins that work for me, because I use an image library with the excalidraw plugin, in which all my images are linked. So I need to be able to exclude files from the link search.
I don’t know enough about javascript, but I do know a bit about python, so I wanted to make a python script that browses the images in my images folder, and looks for links to images throughout my vault, if an image has no link anywhere, it adds it to a list that it then inserts into a markdown file.
This seemed to work, but I end up with images that are listed but linked elsewhere, and the opposite…
I tried to use chatGPT, but it didn’t really help.
So if anyone knows a bit about python and can look at my code, thanks in advance.
import os
import re
from pathlib import Path
# Root folder for Markdown files
dossier_racine = Path("D:/Base_de_connaissance")
# Root media folder (with emoji)
dossier_media = dossier_racine / "10🖼️ Médias"
# Output file for unused images
fichier_sortie = "D:\\Base_de_connaissance\\unused_images.md"
# List for storing file names of unused images
images_utilisees = set()
# Regular expression to search for Markdown image links in wikilinks format
regex_lien_image = re.compile(r"(\!?\[\[!?)\s*([^|\]]+)(?:\|[^|\]]+)?\s*]]")
# Function for extracting image names from a Markdown file
def extraire_noms_images(contenu):
liens_images = regex_lien_image.findall(contenu)
noms_images = [lien[1] for lien in liens_images]
return noms_images
# Recursive function for browsing Markdown files
def parcourir_fichiers_markdown(dossier):
for chemin_fichier in dossier.glob("**/*.md"):
if chemin_fichier not in fichiers_a_exclure:
with open(chemin_fichier, 'r', encoding='utf-8') as file:
contenu = file.read()
noms_images_fichier = extraire_noms_images(contenu)
for nom_image in noms_images_fichier:
# Marquez le nom de l'image comme utilisé
images_utilisees.add(nom_image)
# Get a list of images in the media folder
images_dossier_media = set([f.name for f in dossier_media.glob("**/*") if f.is_file()]) # Utiliser les noms avec extension
# List of Markdown files to exclude
fichiers_a_exclure = [
str(Path(dossier_media, "Excalidraw/Librairie d'image.excalidraw.md")),
str(Path(dossier_media, "Excalidraw/Librairie d'icone.excalidraw.md")),
"D:\\Base_de_connaissance\\unused_images.md" # Ajout du fichier à exclure
]
# Call the recursive function to browse Markdown files
parcourir_fichiers_markdown(dossier_racine)
# Find unused images by comparing names with extension
images_non_utilisees = images_dossier_media - images_utilisees
# Write unused image links to output file with extension
with open(fichier_sortie, 'w', encoding='utf-8') as sortie:
for image in images_non_utilisees:
sortie.write(f"[[{image}]]\n")
print(f"Les liens d'images non utilisées (avec extension) ont été enregistrés dans {fichier_sortie}")