Dataview JS Getting The Tree Root

After some major headache, and twists and turns, I finally got a version which is able to both pick roots automagically, detect multiple presences of a node in the tree (and avoid re-presenting it), detect loops, and finally to be able to specify which nodes you want to present as root nodes. This latter option does however require for the roots to be within the predefined nodes of the automated search.

The only thing missing now, is to get the nodes to collapse, which should be doable by changing from a dataviewjs script into a Templater execution command template. But that’s for another day. The code is presented as a standalone script, but I strongly recommend to make it into a dv.view() script, and pass the parameters at the top into the view script.

The full script in all it glory(?)
```dataviewjs

// Change this to suit your liking
const childrenKey= 'child',
  loopIndicator = " ➰",
  consoleDebug = false   // change to true, if you want to see some debug

let myRootNames = []
// // If you want to use predefined roots, uncomment
// // and adapt the next line to list your note names
// myRootNames = [ "J", "Shapes" ]

let 
  nodes = {}, 
  nodeIdCounter = 0,
  allIds = {},
  markdownOutput = ""

if ( consoleDebug )
  console.log(`\n\n\n******   New run ******\n\n`)

// Query all notes once and for all
// aka build the cache for nodes
dv.pages()
  .where(p => p[childrenKey])
  .forEach(note => handleNote(note))


// Do some setup in order to build the tree
const multipleVisitation = new Set()
const allVisitedNodes = new Set()
let loopDetection = {}
let currentRootNodeId
const DRY_RUN = true


if ( myRootNames.length == 0 ) {

  // Find nodes with no parents
  const noParentNodes =
    Object.keys(nodes)
      .filter(k => nodes[k].parents.length == 0)
      .map(k => k) // nodes[k].value)

  if ( consoleDebug )
    console.log("Automated root nodes: ", noParentNodes)
  
  // Do a dry run to check which nodes are presented
  // multiple times. These are stored in multipleVisitation set
  loopRootNodes(noParentNodes, false, DRY_RUN) // Builds

  if ( consoleDebug )
    console.log("loopDetection: ", loopDetection)

  // Reset some variable before the real run
  allVisitedNodes.clear()
  loopDetection = {}
  // We're leaving multipleVisitation as is from the DryRun

  loopRootNodes(noParentNodes, false)
} else {

  // Build the nodeId list of your predefined names
  const rootNodes = []
  for (let name of myRootNames) {
    const page = await dv.page(name)
    console.log("What the ... ")
    console.log("name: " +  name + ", page: ",  page)
    
	const nodeId = getNodeId(page.file.link.path, false, false)
	if ( nodeId ) 
	  rootNodes.push(nodeId)
  }
  
  if ( consoleDebug )
    console.log("Pre-defined root nodes: ", rootNodes)
  
  loopRootNodes(rootNodes, true, DRY_RUN)
  
  if ( consoleDebug )
    console.log("loopDetection: ", loopDetection)
    
  // Reset some variable before the real run
  allVisitedNodes.clear()
  loopDetection = {}
  // We're leaving multipleVisitation as is from the DryRun

  loopRootNodes(rootNodes, true)
}

/*********************************************/
/*  Function definitions                     */
/*********************************************/

function handleNote(note) {
  // Prepare note[childrenKey] for loop handling
  if ( note[childrenKey] instanceof Link ||
       typeof(note[childrenKey]) == "string" ) 
    note[childrenKey] = [ note[childrenKey] ]
    
  // Parent node 
  const filePath = note.file.path
  const fileId = insertNode(note.file.link)
  if ( consoleDebug )
    console.log(`\n${ fileId } (${ nodes[fileId].type }) = ${ nodes[fileId].value }`) 
      
  for (let connection of note[childrenKey] ) {
    // console.log(connection)
	let nodeType, nodeId
	  
	if ( typeof(connection) == "string" ) {
	  nodeId = insertNode(connection, filePath, true)
	   
	} else if ( connection instanceof Link ) {
	  nodeId = insertNode(connection, filePath)
	}
	nodes[fileId].children.push(nodeId)

    if ( consoleDebug )
      console.log(`  ${ nodeId } (${ nodes[nodeId].type }) = ${ nodes[nodeId].value }`) 
  }
}

function insertNode(value, parentPath = null, textNode = false) {
  const nodeId = getNodeId(textNode ? value : value.path, textNode)
    
  if ( nodeId in nodes ) {
    nodes[nodeId].occurences += 1 
    
  } else {
    const nodeType =  textNode ? 
      "text" : ( value.path?.endsWith(".md") ? 
        "link" : "newlink" )
  
    nodes[nodeId] = { 
	  value: value,
	  parents: [],
	  occurences: 1,
	  children: [],
	  type: nodeType
	}
  }
  
  if ( parentPath ) 
    nodes[nodeId].parents.push(parentPath)

  return nodeId
}

function renderNode(nodeId, currentLevel, isDryRun = false) {
  // console.log("pre renderNode: ", nodeId)

  let previouslyRendered = false
  let loopDetected = false
  
  if ( currentLevel == 0 ) {
    currentRootNodeId = nodeId
    loopDetection[currentRootNodeId] = new Set()
  }

  // Check for multiple visitations
  if ( allVisitedNodes.has(nodeId) ) {
    // Don't traverse children, but tag is multiple visited
    multipleVisitation.add(nodeId)
    previouslyRendered = true
  } else {
    allVisitedNodes.add(nodeId)
  }

  // Check for loops
  if ( loopDetection[currentRootNodeId].has(nodeId) ) {
    loopDetected = true
  } else {
    loopDetection[currentRootNodeId].add(nodeId)
  }

  // Present this node
  if ( !isDryRun ) {
    let itemText = "   ".repeat(currentLevel)
    itemText += "- "
    /*
    if ( nodes[nodeId].type == "text" ) 
      itemText += nodes[nodeId].value
    else
      itemText += `${ nodes[nodeId].value }` */
    itemText += nodes[nodeId].value
    
    if ( multipleVisitation.has(nodeId))
      itemText += ` [^${ nodeId }]`
      
    if ( loopDetected )
      itemText += loopIndicator // " loop:" + nodeId
      
    markdownOutput += itemText + "\n"
    if ( consoleDebug )
      console.log(itemText)
  }

  if ( !previouslyRendered && nodes[nodeId].children.length > 0 ) {
    for (let child of nodes[nodeId].children) {
       renderNode(child, currentLevel + 1, isDryRun)  
    }
  }
  if ( loopDetection[currentRootNodeId].has(nodeId) )
    loopDetection[currentRootNodeId].delete(nodeId)

  // console.log("post renderNode: ", nodeId)
}

function loopRootNodes(rootNodes, ignoreOthers = true, dryRun = false) {
  for (let rootNode of rootNodes) {
    renderNode(rootNode, 0, dryRun)
    if ( !dryRun ) 
      markdownOutput += "\n"
  }

  if ( !ignoreOthers ) {
    while (Object.keys(nodes).length != allVisitedNodes.size) {
      let remainingNodes = Object.keys(nodes)
        .filter(k => !allVisitedNodes.has(k))
      renderNode(remainingNodes[0], 0, dryRun)
    
      if ( !dryRun ) 
        markdownOutput += "\n"
    }
  }

  if ( !dryRun) {
    // Prepare for footnote output
    markdownOutput += "\n"
    for (let multi of multipleVisitation) {
      let footnoteText = `[^${ multi }]: `
      footnoteText += nodes[multi].value

      markdownOutput += footnoteText + "\n"
    }
    dv.paragraph(markdownOutput)
  }
  
  if ( consoleDebug )
    console.log("The final output: \n", markdownOutput)
}

function getNodeId(key, textNode, assumeInCache=true) {
  let keyPrefix = ""
  
  if ( textNode ) {
    key = "///" + key
    keyPrefix = "_"
  } 
  
  if ( !(key in allIds) ) {
    if ( assumeInCache ) {
      nodeIdCounter += 1   
      allIds[key] = keyPrefix + toLetters( nodeIdCounter )
    } else
      return false
  } 
  
  return allIds[key]
}

function toLetters(num) {
 let mod = num % 26,
     pow = num / 26 | 0,
     out = mod ? String.fromCharCode(64 + mod) : (--pow, 'Z')
  return pow ? toLetters(pow) + out : out;
}

A very few notes on this script:

  • It doesn’t fold since it uses markdown output through dv.paragraph(). I haven’t found a better way to output it as markdown within dataviewjs. Any ideas will be welcomed

  • The script runs in several phases:

    • First it builds the nodes list by traversing all pages with the childrenKey, and this cache is used throughout the rest of the script
    • Then it chooses whether to use predefined roots, and ignore other nodes in the list, or to search for nodes without a parent (and then complement with other nodes until all nodes are presented)
    • When it’s ready to do the root nodes, it’ll first do a dry run with no output, to check whether any nodes will be presented multiple times, and whether there exists any loops in the tree
    • After the dry run, it’ll build the markdownOutput, and add footnote references to all nodes being presented multiple times, and loop indicators if that’s needed. It’ll also now build the footnote list, so one can check where the different presentation of a node is given
      • In the tree view you’ll see references like “2”, “2-1”, “2-2”, when it’s referencing the same node from multiple places
      • Through the footnote list, you can access each of these places by clicking the various arrow links after the link presentation
  • It allows for children fields to be all of the following:

    • A pure link
    • A link to a non-existing file
    • A link with either a path or an alias (and it’ll keep the alias)
    • Pure simple text
    • and even embedded image links (to some extent (that is as good as dataview handles them… )
  • A little note on the building of the nodes list, it builds a node for each of the fields referenced by the childKey, and this node holds the following information:

    • “the key” – It’s a running numerical key as letters, that is A, B, C, … Z, AA, AB, … This format is chosen for ease of reference (and debugging), and for use when building the footnote references
    • value - This holds either a full Link variable, or the plain text of the field
    • parents[]– An array of all the nodeId’s to which this node is a parent of
    • children[] – An array of all the nodeId’s to all the children of this node
    • occurences – An initial attempt to keep track of how many times we’ve encountered this node when building the tree. It’s now deprecated, and will most likely be removed
    • type – Either “text” to denote the plain text of the node, or “link” or “newlink” to denote either an existing link or non-existing link. In the end, this also turned out to be futile, as when presenting a full Link object, it handles the difference between them by itself.
    • There is a redundancy in the node list with regards to keeping track of both the children and the parents, but it makes some of the later decisions a lot easier, so I kept using that. Surely it a few bytes of memory is wasted, but not a whole lot. It keeps the logic simpler

So there you have it, a fully automated tree builder based on the definition of children fields scattered around your vault. To finish off this rather longish post, here are two views of my (somewhat evil) test setup. One using a mermaid graph, and the other as the tree list presented by this script.

Mermaid graph of my test setup

Treeview of my test setup

The image in the middle is a downscaled (through alias notation on the embed) children of “Shapes”.

5 Likes