HI!
New on Obsidian and new to this forum, so forgive me if this has already been discussed before (I did a quick search, but I couldn’t find anything about what I’m about to write).
The topic, of course, is migrating from Roam to Obsidian, and I have a (quite) big graph “locked” in some EDN/JSON file.
I tried importing all my graph in Logseq (don’t ask me how I stumbled in this workaround… too long story) and then opening Logseq directory with Obsidian.
Everything is there and correctly (back)linked!!!
I did a quick “tour” of my notes and I only found some minor glitches with {{alias:SOMETHING… and with (arbitrary) id’s appended to blocks due to a Table of Content creator plugin I installed, but this seems to me the easiest path to Obsidian from Roam.
Anyone else tried this? How did you manage to solve glitches?
The migration flow above from standardnotes to Obsidian didn’t work for me.
So I’ve wrote simple python script, that migrates notes and tags structure.
Sharing:
import json
from argparse import ArgumentParser
from pathlib import Path
import os
def _parse_args():
description = """Migrates standardnotes unencrypted backupfile to the obsidian compatible, markdown tree folder structure.
If tags are enabled - convert tags to front matter yaml notation.
First tag found on the note - converted to note folder, preserving tag structure in the file system.
"""
argument_parser = ArgumentParser(description=description)
argument_parser.add_argument('--output', type=str, help='Output folder. Defaults to input folder', default=None)
argument_parser.add_argument('--skip-tags', dest='skip_tags', action='store_true', help='Ignores tags.', default=False)
argument_parser.add_argument('input', type=str, help='Standardnotes backup folder')
args = argument_parser.parse_args()
return args
def prepare_notes_content(items, root_folder):
notes = {}
for item in items:
if item['content_type'] != 'Note':
continue
content = item['content']['text']
for clean_rich_text_words in [
"<p>",
"</p>",
"<br>",
'<p dir="auto">',
]:
content = content.replace(clean_rich_text_words, "")
notes[item['uuid']] = {
'title': item['content']['title'],
'content': content,
"tags": [],
"folder": ""
}
return notes
def _prepare_tags_parents(tag_folder):
tag_parents = {}
for tag_file in tag_folder.glob("*.txt"):
with open(tag_file, "r") as f:
tag_data = json.load(f)
tag_uuid = tag_file.name.replace("Tag-", "").replace(".txt", "")
tag_parents[tag_uuid] = {
"parent": None,
"title": tag_data['title']
}
for reference in tag_data['references']:
if reference['content_type'] != "Tag":
continue
parent_uuid = reference['uuid'].split("-")[0]
tag_parents[tag_uuid]['parent'] = parent_uuid
return tag_parents
def prepare_tags(backup_folder, notes):
tag_folder = Path(backup_folder) / "Items" / "Tag"
if not os.path.isdir(tag_folder):
return
tag_parents = _prepare_tags_parents(tag_folder)
for tag_file in tag_folder.glob("*.txt"):
with open(tag_file, "r") as f:
tag_data = json.load(f)
references = tag_data['references']
for reference in references:
if reference['content_type'] != 'Note':
continue
try:
item = notes[reference['uuid']]
except KeyError:
print(f'Wrong tag reference: missing uuid {reference["uuid"]}')
tag_uuid = tag_file.name.replace("Tag-", "").replace(".txt", "")
current_tag = tag_parents[tag_uuid]
folder = current_tag['title']
while current_tag['parent']:
current_tag = tag_parents[current_tag['parent']]
folder = f"{current_tag['title']}/{folder}"
if not item['folder']:
item['folder'] = folder
item['tags'].append(folder)
def write_output(output, notes):
output_destination = Path(output) / "markdown_migration"
create_folder(output_destination)
for item in notes.values():
create_folder(output_destination / item['folder'])
file_name = (output_destination / item['folder'] / item['title'].replace(
" ", "_"
).replace("/", "_")).with_suffix('.md')
with open(file_name, "w") as f:
if item['tags']:
f.write("---\n")
f.write("tags:\n")
for tag in item['tags']:
f.write(f" - {tag}\n")
f.write("---\n\n")
f.write(item['content'])
print(f"Output written to f{output_destination}")
def create_folder(folder):
if os.path.isdir(folder):
return
try:
os.makedirs(folder)
except Exception:
pass
def main(backup_folder, output=None, skip_tags=False):
if output is None:
output = backup_folder
with open(Path(backup_folder) / "Standard Notes Backup and Import File.txt", "r") as f:
backup = json.load(f)
items = backup['items']
notes = prepare_notes_content(items, output)
if not skip_tags:
prepare_tags(backup_folder, notes)
write_output(output, notes)
if __name__ == '__main__':
args = _parse_args()
main(args.input, output=args.output, skip_tags=args.skip_tags)
Just found your question.
It is using backup folder from standardnotes as input - so firstly save a backup folder somewhere - then pass it as input to the script.
Exporting notes from diaro to .txt creates one long .txt file where notes are separated by ----------------------------------------- and Tags are included as Tags: <tag1>, <tag2>
I have created some simple python scripts that will help parsing the txt file into individual markdown files and modify the tags. E.g I had a bunch of notes relating to the corona pandemic I wanted to aggregate using dataview. The notes only had a Tag(Diaro tagged) and a title. I created a script that will search my notes for ones tagged with Corona by diaro. then create a dataview inline tag summary:: with the title of the note
Use at your own risk!!
import re
import os
import uuid
from datetime import datetime
def parse_big_file():
with open('entryExport.txt', 'rt') as fo:
counter = 0
pattern = re.compile(r'----+') # this is the regex pattern
for group in re.split(pattern, fo.read()):
# the re.split function used in the loop splits text by the pattern
with open("parsed/"+str(counter)+'.txt','a+') as opf:
if counter > 0: #fixing the header for the firs entry
group = group[2:]
opf.write(group)
counter += 1
def rename_files_in_dir(dir):
for filename in os.listdir(dir):
with open(f"./parsed/{filename}") as openfile:
firstline = openfile.readline()
#08 July 2022, Friday 04:16 PM
uuid_long = uuid.uuid4()
date_time_obj = datetime.strptime(firstline.strip(), '%d %B %Y, %A %H:%M %p')
fname = str(date_time_obj.year)+'-'+str(date_time_obj.month)+'-'+str(date_time_obj.day)+str(uuid_long)[0:6]
os.rename(f"./parsed/{filename}", f"./renamed/{str(date_time_obj)}".replace(":","")+'.md')
if __name__ == "__main__":
parse_big_file()
rename_files_in_dir("./parsed")
import os
from glob import glob
all_md_files = [f for f in glob("daily\**", recursive = True) if os.path.isfile(f)]
substring1 = "hvad::"
substring2 = "Corona"
for filename in all_md_files:
with open(filename, 'r+', encoding='cp437') as fp:
lines = fp.readlines()
if any([(l.find(substring1) and l.find(substring2)) for l in lines]):
if not any([(l.find("summary::")) for l in lines]):
fp.write(f"summary:: {lines[2][1:]}")