I would like to share this lua filter I made to pass the obsidian image resize workflow to Pandoc.
It should be robust for all(?) cases:
space and no space between separator and size
space and no space between width and height
if only width is passed
Of course options for sizes should be sound
I am not an expert in Lua, so there might be room for improvement. If you see any holes in my code, suggestions and ideas are welcome!
Enjoy
pandoc.utils = require 'pandoc.utils'
function Image (img)
local size_sep = {} -- Init size table (for width x height)
for i,v in ipairs(img.caption) do -- Loop on the caption table
caption_string = pandoc.utils.stringify(v) -- stringify every table item
if string.find(caption_string, "|") then
index = i
last_word = string.match(caption_string, "(.*)|") -- Last word before | if no space
size = string.match(caption_string, "|(.*)") -- We store the size
if size == '' then -- There is a space between | and size, we look further ahead
for w in string.gmatch(pandoc.utils.stringify(img.caption[index+2]), "([^x]+)") do
table.insert(size_sep, w)
end
if size_sep[2] == nil then -- There might be a space between width and height
size_sep[2] = pandoc.utils.stringify(img.caption[#img.caption])
-- If height is not specified then size_sep[2] = size_sep[1]
end
else -- There is no space, we can split directly
for w in string.gmatch(size, "([^x]+)") do
table.insert(size_sep, w)
end
end
end
end
for i = #img.caption, index, -1 do -- Iterate from end to avoid out of bounds error by successive deletes
img.caption:remove(i) -- Remove all the caption at and after the separator
end
img.caption[index] = pandoc.Str(last_word) -- We put back the last word that have been removed if no space
if size_sep[2] == size_sep[1] then
img.attributes.width = size_sep[1]
else
img.attributes.width = size_sep[1]
img.attributes.height = size_sep[2]
end
return img
end
Hi, thanks for this, it looks like exactly what I need !
Everything is working fine except the image caption that keeps the "| size" part.
I have done tons of tests without any success. Is your filter still working with the latest Pandoc 3.1.1 ? I start wondering if there is a bug with image captions.
I was using the filter from the original post, but it stopped working properly when I switched from MD to wikilinks.
I’m currently using this one which works with wikilinks, but so far I haven’t been able to find a way to fix caption display with any of them
NOTE: This was generated by chatgpt, so might have issues that I haven’t noticed xD
pandoc.utils = require 'pandoc.utils'
-- Helper function to trim whitespace.
local function trim(s)
return (s:gsub("^%s*(.-)%s*$", "%1"))
end
function Image(img)
-- Combine all caption inlines into one string.
local cap = pandoc.utils.stringify(img.caption or "")
-- First, check if the caption is a pure size specification.
local width, height = nil, nil
if cap:match("^%d+$") then
-- e.g. "400"
width = trim(cap)
elseif cap:match("^(%d+)%s*[xX]%s*(%d+)$") then
-- e.g. "400x300" or "400 X 300"
width, height = cap:match("^(%d+)%s*[xX]%s*(%d+)$")
width = trim(width)
height = trim(height)
end
if width then
-- Apply the detected size.
img.attributes.width = width
if height and height ~= "" then
img.attributes.height = height
end
-- For accessibility, we prefer not to use the numeric size as alt text.
-- Here we try to use the file name (if available and not a data URI).
local target = img.target and img.target[1] or ""
if target and not target:match("^data:") then
local filename = target:match("([^/\\]+)$") or ""
img.caption = { pandoc.Str(filename) }
else
img.caption = {}
end
return img
end
-- Otherwise, check if the caption text actually contains a pipe. (For regular markdown images.)
local size_sep = {}
local found_pipe = false
for i, v in ipairs(img.caption) do
local caption_string = pandoc.utils.stringify(v)
if caption_string:find("|") then
found_pipe = true
local caption_before, size_info = caption_string:match("^(.-)%|(.*)$")
caption_before = trim(caption_before or "")
size_info = trim(size_info or "")
for token in string.gmatch(size_info, "([^xX]+)") do
table.insert(size_sep, trim(token))
end
-- Replace caption with what comes before the pipe.
img.caption = {}
if caption_before ~= "" then
table.insert(img.caption, pandoc.Str(caption_before))
end
break
end
end
if found_pipe and #size_sep > 0 then
if #size_sep == 1 then
img.attributes.width = size_sep[1]
elseif #size_sep >= 2 then
img.attributes.width = size_sep[1]
if size_sep[2] and size_sep[2] ~= "" and size_sep[2] ~= size_sep[1] then
img.attributes.height = size_sep[2]
end
end
return img
end
return img
end
EDIT: The one from jeetsukumaran’s comment also works well