Module:Convert

From Random Island Wiki
Revision as of 23:35, 9 September 2012 by >Johnuniq (add names to SIprefix table (preparing to add table of unit names) and change its lookup)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Convert/doc

--[[
TODO Too many items to list, but following are some points:
- Output needs   rather than space in several places.
- Some conversions require two outputs: {{convert|55|nmi|km mi}}.
- Some units have two values: {{convert|3.21|m|ftin}}.
- Remove commas from input numbers; add to output (always?).
- Use U+2212 MINUS SIGN for input + output number, not '-'.
]]--

--[[-----BEGIN DATA TABLE-----
Plan to write a program to generate the conversion tables below.
The input would be a text file in human-friendly format, and
the output would be the following tables.
When a lot of data is added, it might be useful to put this in another module.
Values from http://en.wikipedia.org/wiki/Conversion_of_units
Check with  http://en.wikipedia.org/wiki/Template:Convert/list_of_units
]]--

local SIprefixes = {
    ['Y'] = { exponent = 24, name = 'yotta' },
    ['Z'] = { exponent = 21, name = 'zetta' },
    ['E'] = { exponent = 18, name = 'exa  ' },
    ['P'] = { exponent = 15, name = 'peta ' },
    ['T'] = { exponent = 12, name = 'tera ' },
    ['G'] = { exponent =  9, name = 'giga ' },
    ['M'] = { exponent =  6, name = 'mega ' },
    ['k'] = { exponent =  3, name = 'kilo ' },
    ['H'] = { exponent =  2, name = 'hecto' },  -- not an SI prefix, but allow for people typing this
    ['h'] = { exponent =  2, name = 'hecto' },
    ['da']= { exponent =  1, name = 'deca ' },
    ['D'] = { exponent =  1, name = 'deca ' },  -- not an SI prefix, but allow for people typing this
    ['d'] = { exponent = -1, name = 'deci ' },
    ['c'] = { exponent = -2, name = 'centi' },
    ['m'] = { exponent = -3, name = 'milli' },
    ['µ'] = { exponent = -6, name = 'micro' },
    ['u'] = { exponent = -6, name = 'micro' },  -- not an SI prefix, but allow for people typing this
    ['n'] = { exponent = -9, name = 'nano ' },
    ['p'] = { exponent =-12, name = 'pico ' },
    ['f'] = { exponent =-15, name = 'femto' },
    ['a'] = { exponent =-18, name = 'atto ' },
    ['z'] = { exponent =-21, name = 'zepto' },
    ['y'] = { exponent =-24, name = 'yocto' },
}

local units = {
    lookup = function (self, unit)
        -- Return true, t where t is the unit's converter table (or false, message).
        -- Given 'unit' is a symbol (like 'g'), with an optional SI prefix (as in 'kg').
        -- If, for example, 'kg' is in this table, that entry is used; otherwise prefix is applied.
        local t = self[unit]
        if t ~= nil then
            return true, { utype = t.utype, scale = t.scale, offset = t.offset, baseunit = unit, prefix = "" }
        end
        for plen = 2, 1, -1 do
            -- Check for longer prefix first ('dam' is decametre).
            local prefix = string.sub(unit, 1, plen)
            local si = SIprefixes[prefix]
            if si ~= nil then
                local baseunit = unit:sub(plen+1)
                local t = self[baseunit]
                if t ~= nil and t.prefixes == true then
                    return true, { utype = t.utype, scale = t.scale * 10^si.exponent, offset = t.offset, baseunit = baseunit, prefix = prefix }
                end
            end
        end
        local msg = 'Unit %s is not known.[[Category:Convert unknown unit]]'
        return false, msg:format(unit)
    end,
    -- The scales and offsets for mass convert to kilogramme as the intermediary unit.
    ['g'] =   { utype = 'mass',         scale = 0.001,          offset = 0,             prefixes = true},
    ['lb'] =  { utype = 'mass',         scale = 0.45359237,     offset = 0,             },
    ['oz'] =  { utype = 'mass',         scale = 0.45359237/16,  offset = 0,             },
    ['toz'] = { utype = 'mass',         scale = 0.0311034768,   offset = 0,             },
    -- The scales and offsets for length convert to metre as the intermediary unit.
    ['m'] =   { utype = 'length',       scale = 1,              offset = 0,             prefixes = true},
    ['mi'] =  { utype = 'length',       scale = 1609.344,       offset = 0,             },
    ['ft'] =  { utype = 'length',       scale = 0.3048,         offset = 0,             },
    ['yd'] =  { utype = 'length',       scale = 0.3048 * 3,     offset = 0,             },
    ['in'] =  { utype = 'length',       scale = 0.0254,         offset = 0,             },
    -- The scales and offsets for temperature convert to Kelvin as the intermediary unit.
    ['K'] =   { utype = 'temperature',  scale = 1,              offset = 0,             },
    ['C'] =   { utype = 'temperature',  scale = 1,              offset = -273.15,       },
    ['F'] =   { utype = 'temperature',  scale = 5/9,            offset = 32-273.15*(9/5),},
    -- The scales and offsets for area convert to square metre as the intermediary unit.
    ['m2'] =  { utype = 'area',         scale = 1,              offset = 0,             },
    ['a'] =   { utype = 'area',         scale = 100,            offset = 0,             prefixes = true},
    ['sqyd'] ={ utype = 'area',         scale = 0.83612736,     offset = 0,             },
    -- The scales and offsets for volume convert to cubic metre as the intermediary unit.
    ['m3'] =  { utype = 'volume',       scale = 1,              offset = 0,             },
    ['L'] =   { utype = 'volume',       scale = 0.001,          offset = 0,             prefixes = true},
}
-- Aliases.
units['°K'] = units['K']
units['°C'] = units['C']
units['°F'] = units['F']

local defaultunits = {
    lookup = function (self, unit_table)
        -- Return true, s where s = name of unit's default output unit (or false, message).
        local baseunit = unit_table.baseunit
        local unit = unit_table.prefix .. baseunit
        local t = self[unit]
        if t ~= nil then return true, t.defaultunit end
        t = self[baseunit]
        if t ~= nil and t.prefixes == true then
            return true, t.defaultunit
        end
        local msg = 'Unit %s has no default target conversion.[[Category:Convert unknown unit]]'
        return false, msg:format(unit)
    end,
    -- Non-metric units default to one metric equivalent.
    ['ft'] =  { defaultunit = 'm'},
    ['yd'] =  { defaultunit = 'm'},
    ['in'] =  { defaultunit = 'cm'},
    ['lb'] =  { defaultunit = 'kg'},
    ['oz'] =  { defaultunit = 'g'},
    ['toz'] = { defaultunit = 'g'},
    ['mi'] =  { defaultunit = 'km'},
    ['F'] =   { defaultunit = 'C'},
    ['°F'] =  { defaultunit = '°C'},
    ['sqyd'] ={ defaultunit = 'm2'},
    -- Metric units default to various non-metric units, according to SI prefix.
    ['g'] =   { defaultunit = 'lb',     prefixes = true},
    ['mm'] =  { defaultunit = 'in'},
    ['cm'] =  { defaultunit = 'in'},
    ['dm'] =  { defaultunit = 'ft'},
    ['m'] =   { defaultunit = 'ft',     prefixes = true},
    ['dam'] = { defaultunit = 'yd'},
    ['Hm'] =  { defaultunit = 'yd'},
    ['km'] =  { defaultunit = 'mi'},
    ['K'] =   { defaultunit = 'C'},
    ['C'] =   { defaultunit = 'F'},
    ['°K'] =  { defaultunit = '°C'},
    ['°C'] =  { defaultunit = '°F'},
    ['m2'] =  { defaultunit = 'sqyd'},
    ['a'] =   { defaultunit = 'acre',   prefixes = true},
    ['m3'] =  { defaultunit = 'cuyd'},
    ['mL'] =  { defaultunit = 'floz'},
    ['cL'] =  { defaultunit = 'floz'},
    ['dL'] =  { defaultunit = 'floz'},
    ['L'] =   { defaultunit = 'pint',   prefixes = true},
}

-------END DATA TABLE-----

-- Configuration options to keep magic values in one location.
local config = {}

local function get_config(frame)
    -- Return table of configuration options.
    -- Unclear if this is currently needed, but it may help if adapting
    -- code for a different wiki.
    local cfg = {}
    -- Following settings are defaults that can be overridden by template.
    cfg.numdot = '.'        -- decimal mark before fractional digits
    cfg.numsep = ','        -- thousands separator for numbers (',', '.', or nil)
    for k,v in frame:argumentPairs() do
        cfg[k] = v          -- arguments from template's {{#invoke:}}
    end
    -- Following settings are mandatory (to limit abuse).
    cfg.maxsigfig = 20      -- maximum number of significant figures
    return cfg
end

local function withseparator(text)
    -- Return string for a number with thousand separators inserted.
    -- Parameter text is a string like "-12345" or "12345.6789".
    -- Separator is inserted only in the integer part (not in fraction).
    -- Four-digit integer parts have a separator (like "1,234").
    local numsep = config.numsep
    if #numsep == 0 then
        return text
    end
    local function insert(text, first, last)
        local result = ''
        while last >= first do
            if last >= first + 3 then
                result = numsep .. text:sub(last-2, last) .. result
                last = last - 3
            else
                return text:sub(first, last) .. result
            end
        end
        return result
    end
    local first = 1
    local sign = text:sub(first, 1)
    if sign == '+' or sign == '-' then
        -- To handle Unicode minus (multibyte), perhaps use following:
        -- first = text:find('%d')
        first = 2
    else
        sign = ''
    end
    local last = text:find(config.numdot, first, true)
    if last == nil then
        last = #text
    else
        last = last - 1
    end
    return sign .. insert(text, first, last) .. text:sub(last+1)
end

local function formatnumber(value, sigfig)
    -- Return result of converting number 'value' to a string,
    -- rounded to 'sigfig' significant figures.
    local format = string.format
    local rep = string.rep
    local sign = ''
    local numdot = config.numdot
    local function zeropad(text, dot)
        count = sigfig - #text
        if count <= 0 then
            return text
        end
        return text .. dot .. rep('0', count)
    end
    if sigfig <= 0 then
        sigfig = 1
    elseif sigfig > config.maxsigfig then
        sigfig = config.maxsigfig
    end
    if value == 0 then
        return zeropad('0', numdot)
    end
    if value < 0 then
        sign = '-'  -- need proper Unicode minus
        value = -value
    end
    local digits
    local exp, frac = math.modf(math.log10(value))
    if frac == 0 then
        -- Value 1 gives frac = 0, and 0.1 gives frac = -0 (negative zero).
        -- Both results give true in 'if frac == 0'.
        digits = zeropad('1', '')
        exp = exp + 1  -- adjust so dot is before digits
    else
        local prec = sigfig
        if value > 1 then
            prec = prec - 1  -- will be one sig fig before dot
        end
        digits = format(format('%%.%df', prec), 10^frac)
        if value < 1 then
            -- Is MediaWiki run in a locale where following might be '0,'?
            assert(digits:sub(1, 2) == '0.', 'Bug: rounded number not 0.xxx')
            digits = digits:sub(3)
        else
            if prec == 0 then
                assert(digits:find(numdot, 1, true) == nil, 'Bug: unexpected dot')
            else
                assert(digits:sub(2, 2) == numdot, 'Bug: rounded number not x.xxx')
                digits = digits:sub(1, 1) .. digits:sub(3)
            end
            exp = exp + 1  -- adjust so dot is before digits
        end
    end
    if exp >= #digits then
        digits = digits .. rep('0', exp - #digits)  -- result has no dot
    elseif exp <= 0 then
        digits = '0' .. numdot .. rep('0', -exp) .. digits
    else
        digits = digits:sub(1, exp) .. numdot .. digits:sub(exp+1)
    end
    return sign .. digits
end

local function require_number(value, missing, invalid)
    -- Return true, n where n = number equivalent to given value (or false, message).
    -- Thousand separators (valid or not) are first removed.
    if value == nil then return false, missing end
    if type(value) == 'number' then return true, value end
    local numsep = config.numsep
    if #numsep > 0 then value = string.gsub(value, numsep, '') end
    local number = tonumber(value)
    if number == nil then return false, invalid:format(value) end
    return true, number
end

local function require_integer(value, missing, invalid)
    -- Return true, n where n = integer equivalent to given value (or false, message).
    local success, number = require_number(value, missing, invalid)
    if not success then return success, number end
    if number ~= math.floor(number) then return false, invalid:format(value) end
    return true, number
end

local function get_parms(pframe)
    -- Return true, t where t is a table with all arguments passed to the template
    -- converted to named arguments. The numeric args are used to add named args:
    --   in_text, in_text2 (strings given for value, value2)
    --   value, in_unit, out_unit, value2, range, round_to
    -- (except for range, which is nil or a table, the named args that are
    -- added here could be provided by the user of the template).
    local range_types = {  -- text to separate input, output ranges
        ['and'] = {' and ', ' and '},
        ['by'] = {' by ', ' by '},
        ['to'] = {' to ', ' to '},
        ['-'] = {'–', '–'},
        ['to(-)'] = {' to ', '–'},
        ['x'] = {' by ', ' × '},
        ['+/-'] = {' ± ', ' ± '},
    }
    local success, t
    local args = {}                         -- arguments passed to template
    for k,v in pframe:argumentPairs() do
        args[k] = v
    end
    args.in_text = args[1]
    success, t = require_number(args.in_text, 'Need value', 'Value "%s" must be a number')
    if not success then return success, t else args.value = t end
    local in_unit = args[2]
    local i = 3
    local range = range_types[in_unit]
    if range ~= nil then
        args.in_text2 = args[3]
        success, t = require_number(args.in_text2, 'Need second value', 'Second value "%s" must be a number')
        if not success then return success, t else args.value2 = t end
        in_unit = args[4]
        i = 5
    end
    local out_unit = args[i]
    local round_to = args[i+1]
    if in_unit == nil then return false, 'Need input unit' end
    args.in_unit = in_unit
    args.out_unit = out_unit
    args.range = range
    args.round_to = args.round_to or round_to  -- allow named parameter
    return true, args
end

local function default_roundto(intext, factor)
    -- Return a default value for round_to (an integer like 2, 0, -2).
    -- prec = (precision implied in intext)
    --      = (#digits after dot, or negative of #zeroes before dot)
    -- If conversion is multiplication by a factor, and
    -- if factor >= 0.02, compensate prec by adding N where:
    --     N    factor is in range
    --     1     .02  :   .2   =    .1/5 :   .1*2
    --     0     .2   :   2    =    1/5  :   1*2
    --    -1     2    :  20    =   10/5  :  10*2
    --    -2    20    : 200    =  100/5  : 100*2  etc.
    -- TODO Exception required for temperature.
    prec = 0
    dot = intext:find('.', 1, true)
    if dot ~= nil then
        prec = intext:sub(dot+1):len()
        if prec == 0 then
            intext = intext:sub(1, -2)
        end
    end
    if prec == 0 then
        prec = -intext:match('0*$'):len()
    end
    if factor ~= nil and factor >= 0.02 then
        prec = prec - math.floor(math.log10(factor*5))
    end
    return prec
end

local function scaled(value, in_unit, out_unit)
    -- Return scaled value for a simple convert.
    return (value - in_unit.offset)
        * (in_unit.scale / out_unit.scale)
        + out_unit.offset
end

local function cvtround(invalue, intext, parms)
    -- Return true, s where s = rounded, formatted string from converting invalue,
    -- using the rounding specified in parms (s = '' if invalue == nil).
    -- This code combines convert/round because some rounding requires
    -- knowledge of what we are converting.
    -- TODO Lots of checking required. Will need tweaks for special cases
    -- handled by old Template:Convert.
    -- TODO Limit values to avoid abuse (for example, can currently set
    -- round_to to very large values like 999).
    local text = ''
    if invalue == nil then return true, text end
    local outvalue = scaled(invalue, parms.in_unit_table, parms.out_unit_table)
    local round_to = parms.round_to
    local sigfig = parms.sigfig
    local disp = parms.disp
    local auto = false
    if round_to then
        -- Ignore sigfig, disp.
        success, round_to = require_integer(round_to, 'Need value', 'round_to "%s" must be an integer')
        if not success then return success, round_to end
    elseif sigfig then
        -- Ignore disp.
        success, sigfig = require_integer(sigfig, 'Need value', 'sigfig "%s" must be an integer')
        if not success then return success, sigfig end
        if sigfig <= 0 then
            msg = 'sigfig "%s" must be positive'
            return false, msg:format(parms.sigfig)
        end
        text = formatnumber(outvalue, sigfig)
    elseif disp == '5' then
        local negative = false
        if outvalue < 0 then
            negative = true
            outvalue = -outvalue
        end
        outvalue = math.floor((outvalue / 5) + 0.5) * 5
        if negative then
            outvalue = -outvalue
        end
        text = string.format('%.0f', outvalue)
    else
        auto = true  -- using default rounding
        -- TODO If conversion is not multiplication by a number, need factor = nil.
        local factor = outvalue / invalue
        round_to = default_roundto(intext, factor)
    end
    if round_to then
        if round_to >= 0 then
            if auto then
                -- TODO No less than two significant figures.
            end
            -- It seems format('%d', x) uses modf to extract integer from x
            -- with result '0' if x is 0 or -0 (negative zero).
            -- Using format('%.0f', x) gives '-0' if x is negative zero.
            local fmt = '%.' .. string.format('%d', round_to) .. 'f'
            text = string.format(fmt, outvalue)
        else
            -- This always keeps two sig figs. Should that be done if not auto?
            round_to = -round_to  -- #digits that want to zero
            local maxzeroes = 0  -- maximum #digits that should be zeroed
            if outvalue > 100 then
                maxzeroes = math.modf(math.log10(outvalue)) - 1
            end
            if round_to > maxzeroes then
                round_to = maxzeroes
            end
            if round_to > 0 then
                local scaled = string.format('%.0f', outvalue/(10^round_to))
                text = scaled .. string.rep('0', round_to)
            else
                text = formatnumber(outvalue, 2)  -- can't zero digits; keep 2 sig figs
            end
        end
    end
    return true, withseparator(text)
end

local disp_single = {
    ['or'] = '%s %s or %s %s',
    ['sqbr'] = '%s %s [%s %s]',
    ['comma'] = '%s %s, %s %s',
    ['b'] = '%s %s (%s %s)',
}

local disp_double = {
    ['or'] = '%s%s%s %s or %s%s%s %s',
    ['sqbr'] = '%s%s%s %s [%s%s%s %s]',
    ['comma'] = '%s%s%s %s, %s%s%s %s',
    ['b'] = '%s%s%s %s (%s%s%s %s)',
}

local function process(parms)
    -- Return true, s where s = final wikitext result (or false, message).
    local success, t
    success, t = units:lookup(parms.in_unit)
    if not success then return success, t else parms.in_unit_table = t end
    if parms.out_unit == nil then           -- need to catch empty string also?
        success, t = defaultunits:lookup(parms.in_unit_table)
        if not success then return success, t else parms.out_unit = t end
    end
    success, t = units:lookup(parms.out_unit)
    if not success then return success, t else parms.out_unit_table = t end
    if parms.in_unit_table.utype ~= parms.out_unit_table.utype then
        local msg = 'Cannot convert %s to %s.[[Category:Convert dimension mismatch]]'
        return false, msg:format(parms.in_unit_table.utype, parms.out_unit_table.utype)
    end
    local intext1, outtext1 = parms.in_text, nil
    local intext2, outtext2 = parms.in_text2, nil
    success, outtext1 = cvtround(parms.value, intext1, parms)
    if not success then return success, outtext1 end
    success, outtext2 = cvtround(parms.value2, intext2, parms)
    if not success then return success, outtext2 end
    local range = parms.range
    local disp = parms.disp
    local wikitext = disp_single[disp] or disp_single['b']
    intext1 = withseparator(intext1)  -- TODO what if intext1 already has commas?
    if range == nil then
        wikitext = wikitext:format(intext1, parms.in_unit, outtext1, parms.out_unit)
    else
        wikitext = wikitext:format(intext1, range[1], intext2, parms.in_unit, outtext1, range[2], outtext2, parms.out_unit)
    end
    return true, wikitext
end

-- Used by template {{convert2}}.
-- We will have to keep old {{convert}} for a long time, and run
-- {{convert2}} in parallel with {{convert}} while testing/developing.
local p = {}
local bodge = require "Module:mw" -- This fixes up mw.text.tag for us.

function p.convert(frame)
    config = get_config(frame)
    local pframe = frame:getParent()
    local parms, text
    success, parms = get_parms(pframe)
    if success then
        success, text = process(parms)
    end
    if not success then
        local params = {style="color:black; background-color:orange;"}
        text=mw.text.tag({name="span", contents="[[Module talk:Convert|Conversion error]]: " .. text, params=params})
    end
    return text
end

return p