Pandoc Lua Filter to pass image size

Hi,

I would like to share this lua filter I made to pass the obsidian image resize workflow to Pandoc.
It should be robust for all(?) cases:

  • space and no space between separator and size
  • space and no space between width and height
  • if only width is passed
    Of course options for sizes should be sound

I am not an expert in Lua, so there might be room for improvement. If you see any holes in my code, suggestions and ideas are welcome!

Enjoy :slight_smile:

pandoc.utils = require 'pandoc.utils'

function Image (img)
  local size_sep = {} -- Init size table (for width x height)
  for i,v in ipairs(img.caption) do -- Loop on the caption table
    caption_string = pandoc.utils.stringify(v) -- stringify every table item
    if string.find(caption_string, "|") then
		index = i
		last_word = string.match(caption_string, "(.*)|") -- Last word before | if no space
		size = string.match(caption_string, "|(.*)") -- We store the size
		if size == '' then -- There is a space between | and size, we look further ahead
			for w in string.gmatch(pandoc.utils.stringify(img.caption[index+2]), "([^x]+)") do
				table.insert(size_sep, w)
			end
			if size_sep[2] == nil then -- There might be a space between width and height
				size_sep[2] = pandoc.utils.stringify(img.caption[#img.caption])
				-- If height is not specified then size_sep[2] = size_sep[1]
			end
		else -- There is no space, we can split directly
			for w in string.gmatch(size, "([^x]+)") do
				table.insert(size_sep, w)
			end
		end
	end
  end
  for i = #img.caption, index, -1 do -- Iterate from end to avoid out of bounds error by successive deletes
	img.caption:remove(i) -- Remove all the caption at and after the separator
  end
  img.caption[index] = pandoc.Str(last_word) -- We put back the last word that have been removed if no space
  if size_sep[2] == size_sep[1] then
	img.attributes.width = size_sep[1] 
  else
  	img.attributes.width = size_sep[1]
	img.attributes.height = size_sep[2]
  end
  return img
end
2 Likes

Hi, thanks for this, it looks like exactly what I need !
Everything is working fine except the image caption that keeps the "| size" part.
I have done tons of tests without any success. Is your filter still working with the latest Pandoc 3.1.1 ? I start wondering if there is a bug with image captions.

1 Like

Have exactly the same problem.
Do you found a solution?

I’ve been using this:

In beamer, you have to add a backslash “\” after your image, to make pandoc treat it like an inline image:

[[path/to/image.jpg| 200]] \

otherwise it doesn’t get passed to the filter.

I was using the filter from the original post, but it stopped working properly when I switched from MD to wikilinks.

I’m currently using this one which works with wikilinks, but so far I haven’t been able to find a way to fix caption display with any of them

NOTE: This was generated by chatgpt, so might have issues that I haven’t noticed xD

pandoc.utils = require 'pandoc.utils'

-- Helper function to trim whitespace.
local function trim(s)
  return (s:gsub("^%s*(.-)%s*$", "%1"))
end

function Image(img)
  -- Combine all caption inlines into one string.
  local cap = pandoc.utils.stringify(img.caption or "")
  
  -- First, check if the caption is a pure size specification.
  local width, height = nil, nil
  if cap:match("^%d+$") then
    -- e.g. "400"
    width = trim(cap)
  elseif cap:match("^(%d+)%s*[xX]%s*(%d+)$") then
    -- e.g. "400x300" or "400 X 300"
    width, height = cap:match("^(%d+)%s*[xX]%s*(%d+)$")
    width = trim(width)
    height = trim(height)
  end

  if width then
    -- Apply the detected size.
    img.attributes.width = width
    if height and height ~= "" then
      img.attributes.height = height
    end
    -- For accessibility, we prefer not to use the numeric size as alt text.
    -- Here we try to use the file name (if available and not a data URI).
    local target = img.target and img.target[1] or ""
    if target and not target:match("^data:") then
      local filename = target:match("([^/\\]+)$") or ""
      img.caption = { pandoc.Str(filename) }
    else
      img.caption = {}
    end
    return img
  end

  -- Otherwise, check if the caption text actually contains a pipe. (For regular markdown images.)
  local size_sep = {}
  local found_pipe = false
  for i, v in ipairs(img.caption) do
    local caption_string = pandoc.utils.stringify(v)
    if caption_string:find("|") then
      found_pipe = true
      local caption_before, size_info = caption_string:match("^(.-)%|(.*)$")
      caption_before = trim(caption_before or "")
      size_info = trim(size_info or "")
      for token in string.gmatch(size_info, "([^xX]+)") do
        table.insert(size_sep, trim(token))
      end
      -- Replace caption with what comes before the pipe.
      img.caption = {}
      if caption_before ~= "" then
        table.insert(img.caption, pandoc.Str(caption_before))
      end
      break
    end
  end

  if found_pipe and #size_sep > 0 then
    if #size_sep == 1 then
      img.attributes.width = size_sep[1]
    elseif #size_sep >= 2 then
      img.attributes.width = size_sep[1]
      if size_sep[2] and size_sep[2] ~= "" and size_sep[2] ~= size_sep[1] then
        img.attributes.height = size_sep[2]
      end
    end
    return img
  end

  return img
end

EDIT: The one from jeetsukumaran’s comment also works well