Meta - Migration Workflows

parse notes from Diaro to markdown.

Exporting notes from diaro to .txt creates one long .txt file where notes are separated by ----------------------------------------- and Tags are included as Tags: <tag1>, <tag2>

I have created some simple python scripts that will help parsing the txt file into individual markdown files and modify the tags. E.g I had a bunch of notes relating to the corona pandemic I wanted to aggregate using dataview. The notes only had a Tag(Diaro tagged) and a title. I created a script that will search my notes for ones tagged with Corona by diaro. then create a dataview inline tag summary:: with the title of the note

Use at your own risk!!

import re
import os
import uuid
from datetime import datetime

def parse_big_file():
    with open('entryExport.txt', 'rt') as fo:
        counter = 0
        pattern = re.compile(r'----+')  # this is the regex pattern
        for group in re.split(pattern, fo.read()):
            # the re.split function used in the loop splits text by the pattern
            with open("parsed/"+str(counter)+'.txt','a+') as opf:
                if counter > 0: #fixing the header for the firs entry
                    group = group[2:]
                opf.write(group)
            counter += 1

def rename_files_in_dir(dir):

    for filename in os.listdir(dir):
        with open(f"./parsed/{filename}") as openfile:
            firstline = openfile.readline()
            #08 July 2022, Friday 04:16 PM
            uuid_long = uuid.uuid4()
            date_time_obj = datetime.strptime(firstline.strip(), '%d %B %Y, %A %H:%M %p')
            fname = str(date_time_obj.year)+'-'+str(date_time_obj.month)+'-'+str(date_time_obj.day)+str(uuid_long)[0:6]
        os.rename(f"./parsed/{filename}", f"./renamed/{str(date_time_obj)}".replace(":","")+'.md')

if __name__ == "__main__":
    parse_big_file()
    rename_files_in_dir("./parsed")
import os
from glob import glob
all_md_files = [f for f in glob("daily\**", recursive = True) if os.path.isfile(f)]
substring1 = "hvad::"
substring2 = "Corona"
for filename in all_md_files:
    with open(filename, 'r+', encoding='cp437') as fp:
        lines = fp.readlines()
        if any([(l.find(substring1) and l.find(substring2)) for l  in lines]):
            if not any([(l.find("summary::")) for l  in lines]):
                fp.write(f"summary:: {lines[2][1:]}")

3 Likes