I’ve been using Diigo for 11 years to keep my bookmarks after del.icio.us decommissioned .
I’ve almost 4.5k+ bookmark there with 2k of tags.
Today, I thought to move those bookmarks as notes in separate Obsidian Vault.
Here is the steps:
- From Diigo Tools, export all bookmarks to csv format
- Download and uncompress
- Run python script to convert every record in the csv into a note, where:
a. Bookmark title = filename and first header in the file,
b. Bookmark tags = [[link]] and not #tag to emphasis on the Obsidian graph power,
c. If bookmark URl doesn’t have a tag, [[no_tag]] is added,
d. Adding a new “URL Response” bullet, where it says if links still exist. Script checks the URL response HTTP status code (200, 404, …).
e. Other fields written as bullets in the file
Here is the python script:
import csv, os, re, requests, unicodedata
def slugify(value, allow_unicode=False):
"""
Adapted from https://github.com/django/django/blob/master/django/utils/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value)
value = re.sub(r'[-]+', '-', value).strip('-_')
value = re.sub(r'\n', '-', value)
return value
def checkURL(url):
try:
r = requests.head(url)
return r.status_code
except requests.ConnectionError:
return 0 #failed to connect
def main():
file = r'C:\Archives\Diigo\csv_2021_11_12.csv'
with open(file) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count > 0:
filename = slugify(row[0], True)
if os.path.isfile(filename + ".md"):
filename = filename + " (" + str(line_count) + ").md"
else:
filename = filename + ".md"
f = open(filename, "w")
f.write("# " + row[0] + "\n")
## Replace tags tag1,tag2 with links [[tag1]], [[tag2]]
if not row[2]:
row[2] = "no_tag"
f.write("\n- Tags: " + re.sub('$', ']]', re.sub('^', '[[', re.sub(r',', ']], [[', re.sub(r':', '_', row[2])))))
f.write("\n- Description: " + row[3])
f.write("\n- Comments: " + row[4])
f.write("\n- Annotations: " + row[5])
f.write("\n- Created_at: " + row[6])
f.write("\n- URL Response: #" + str(checkURL(row[1])))
f.write("\n- URL: " + row[1])
f.close()
line_count += 1
print(f'Processed {line_count} lines.')
if __name__ == "__main__":
main()