Module:Convert

From Random Island Wiki
Revision as of 05:55, 12 September 2012 by >WOSlinker (disp_double fix)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Convert/doc

--[[
TODO Too many items to list, but following are some points:
- Output needs   rather than space in several places.
- Some conversions require two outputs: {{convert|55|nmi|km mi}}.
- Some units have two values: {{convert|3.21|m|ftin}}.
- Remove commas from input numbers; add to output (always?).
- Use U+2212 MINUS SIGN for input + output number, not '-'.
]]--

--[[-----BEGIN DATA TABLE-----
Plan to write a program to generate the conversion tables below.
The input would be a text file in human-friendly format, and
the output would be the following tables.
When a lot of data is added, it might be useful to put this in another module.
Values from http://en.wikipedia.org/wiki/Conversion_of_units
Check with  http://en.wikipedia.org/wiki/Template:Convert/list_of_units
]]--

local SIprefixes = {
    ['Y'] = { exponent = 24, name = 'yotta' },
    ['Z'] = { exponent = 21, name = 'zetta' },
    ['E'] = { exponent = 18, name = 'exa  ' },
    ['P'] = { exponent = 15, name = 'peta ' },
    ['T'] = { exponent = 12, name = 'tera ' },
    ['G'] = { exponent =  9, name = 'giga ' },
    ['M'] = { exponent =  6, name = 'mega ' },
    ['k'] = { exponent =  3, name = 'kilo ' },
    ['H'] = { exponent =  2, name = 'hecto' },  -- not an SI prefix, but allow for people typing this
    ['h'] = { exponent =  2, name = 'hecto' },
    ['da']= { exponent =  1, name = 'deca ' },
    ['D'] = { exponent =  1, name = 'deca ' },  -- not an SI prefix, but allow for people typing this
    ['d'] = { exponent = -1, name = 'deci ' },
    ['c'] = { exponent = -2, name = 'centi' },
    ['m'] = { exponent = -3, name = 'milli' },
    ['µ'] = { exponent = -6, name = 'micro' },
    ['u'] = { exponent = -6, name = 'micro' },  -- not an SI prefix, but allow for people typing this
    ['n'] = { exponent = -9, name = 'nano ' },
    ['p'] = { exponent =-12, name = 'pico ' },
    ['f'] = { exponent =-15, name = 'femto' },
    ['a'] = { exponent =-18, name = 'atto ' },
    ['z'] = { exponent =-21, name = 'zepto' },
    ['y'] = { exponent =-24, name = 'yocto' },
}

local units = {
    lookup = function (self, unit)
        -- Return true, t where t is the unit's converter table (or false, message).
        -- Given 'unit' is a symbol (like 'g'), with an optional SI prefix (as in 'kg').
        -- If, for example, 'kg' is in this table, that entry is used; otherwise prefix is applied.
        local t = self[unit]
        if t ~= nil then
            return true, { utype = t.utype, scale = t.scale, offset = t.offset, baseunit = unit, prefix = "" }
        end
        for plen = 2, 1, -1 do
            -- Check for longer prefix first ('dam' is decametre).
            local prefix = string.sub(unit, 1, plen)
            local si = SIprefixes[prefix]
            if si ~= nil then
                local baseunit = unit:sub(plen+1)
                local t = self[baseunit]
                if t ~= nil and t.prefixes == true then
                    return true, { utype = t.utype, scale = t.scale * 10^si.exponent, offset = t.offset, baseunit = baseunit, prefix = prefix }
                end
            end
        end
        local msg = 'Unit %s is not known.[[Category:Convert unknown unit]]'
        return false, msg:format(unit)
    end,
    -- The scales and offsets for mass convert to kilogramme as the intermediary unit.
    ['g'] =   { utype = 'mass',         scale = 0.001,          offset = 0,             prefixes = true},
    ['lb'] =  { utype = 'mass',         scale = 0.45359237,     offset = 0,             },
    ['oz'] =  { utype = 'mass',         scale = 0.45359237/16,  offset = 0,             },
    ['toz'] = { utype = 'mass',         scale = 0.0311034768,   offset = 0,             },
    -- The scales and offsets for length convert to metre as the intermediary unit.
    ['m'] =   { utype = 'length',       scale = 1,              offset = 0,             prefixes = true},
    ['mi'] =  { utype = 'length',       scale = 1609.344,       offset = 0,             },
    ['ft'] =  { utype = 'length',       scale = 0.3048,         offset = 0,             },
    ['yd'] =  { utype = 'length',       scale = 0.3048 * 3,     offset = 0,             },
    ['in'] =  { utype = 'length',       scale = 0.0254,         offset = 0,             },
    -- The scales and offsets for temperature convert to Kelvin as the intermediary unit.
    ['K'] =   { utype = 'temperature',  scale = 1,              offset = 0,             },
    ['C'] =   { utype = 'temperature',  scale = 1,              offset = -273.15,       },
    ['F'] =   { utype = 'temperature',  scale = 5/9,            offset = 32-273.15*(9/5),},
    -- The scales and offsets for area convert to square metre as the intermediary unit.
    ['m2'] =  { utype = 'area',         scale = 1,              offset = 0,             },
    ['a'] =   { utype = 'area',         scale = 100,            offset = 0,             prefixes = true},
    ['sqft'] ={ utype = 'area',         scale = 0.09290304,     offset = 0,             },
    ['sqyd'] ={ utype = 'area',         scale = 0.83612736,     offset = 0,             },
    ['acre'] ={ utype = 'area',         scale = 4046.8564224,   offset = 0,             },
    -- The scales and offsets for volume convert to cubic metre as the intermediary unit.
    ['m3'] =  { utype = 'volume',       scale = 1,              offset = 0,             },
    ['L'] =   { utype = 'volume',       scale = 0.001,          offset = 0,             prefixes = true},
    ['cuyd'] ={ utype = 'volume',       scale = 0.764554857984, offset = 0,             },
    ['USgal']={ utype = 'volume',       scale = 0.003785411784, offset = 0,             },
    ['USfloz']={ utype = 'volume',      scale = 0.003785411784/128, offset = 0,         },
    ['USpint']={ utype = 'volume',      scale = 0.003785411784/8, offset = 0,         },
    ['gal']=  { utype = 'volume',       scale = 0.00454609,     offset = 0,             },
    ['floz']= { utype = 'volume',       scale = 0.00454609/160, offset = 0,             },
    ['pint']= { utype = 'volume',       scale = 0.00454609/8,   offset = 0,             },
}
-- Aliases.
units['°K'] = units['K']
units['°C'] = units['C']
units['°F'] = units['F']

local defaultunits = {
    lookup = function (self, unit_table)
        -- Return true, s where s = name of unit's default output unit (or false, message).
        local baseunit = unit_table.baseunit
        local prefix = unit_table.prefix
        local unit = prefix .. baseunit
        local t = self[unit]
        if t ~= nil then return true, t[1] end
        t = self[baseunit]
        if t ~= nil and t.prefixes == true then
            local defaultunit = t[prefix] or t[1]
            return true, defaultunit
        end
        local msg = 'Unit %s has no default target conversion.[[Category:Convert unknown unit]]'
        return false, msg:format(unit)
    end,
    -- Non-metric units default to one metric equivalent.
    ['ft'] =  { 'm' },
    ['yd'] =  { 'm' },
    ['in'] =  { 'cm' },
    ['lb'] =  { 'kg' },
    ['oz'] =  { 'g' },
    ['toz'] = { 'g' },
    ['mi'] =  { 'km' },
    ['F'] =   { 'C' },
    ['°F'] =  { '°C' },
    ['sqft'] ={ 'm2' },
    ['sqyd'] ={ 'm2' },
    ['sqmi'] ={ 'km2' },
    ['USgal']={ 'L' },
    ['USfloz']={ 'cL' },
    ['USpint']={ 'dL' },
    ['gal']=  { 'L' },
    ['floz']= { 'cL' },
    ['pint']= { 'dL' },
    -- Metric units default to various non-metric units, according to SI prefix.
    ['g'] =   { 'lb',     prefixes = true},
    ['m'] =   { 'ft',     prefixes = true,  m = 'in', c = 'in', d = 'in', da = 'yd', H = 'yd', k = 'mi', M = 'mi', G = 'mi' },
    ['K'] =   { 'C' },
    ['C'] =   { 'F' },
    ['°K'] =  { '°C' },
    ['°C'] =  { '°F' },
    ['m2'] =  { 'sqyd' },
    ['a'] =   { 'acre',   prefixes = true},
    ['m3'] =  { 'cuyd' },
    ['L'] =   { 'pint',   prefixes = true, m = 'floz', c = 'floz', d = 'floz', da = 'gal', H = 'gal'},
}

-------END DATA TABLE-----

-- Configuration options to keep magic values in one location.
local config = {}

local function get_config(frame)
    -- Return table of configuration options.
    -- Unclear if this is currently needed, but it may help if adapting
    -- code for a different wiki.
    local cfg = {}
    -- Following settings are defaults that can be overridden by template.
    cfg.numdot = '.'        -- decimal mark before fractional digits
    cfg.numsep = ','        -- thousands separator for numbers (',', '.', or nil)
    for k,v in frame:argumentPairs() do
        cfg[k] = v          -- arguments from template's {{#invoke:}}
    end
    -- Following settings are mandatory (to limit abuse).
    cfg.maxsigfig = 20      -- maximum number of significant figures
    return cfg
end

local function withseparator(text)
    -- Return string for a number with thousand separators inserted.
    -- Parameter text is a string like "-12345" or "12345.6789".
    -- Separator is inserted only in the integer part (not in fraction).
    -- Four-digit integer parts have a separator (like "1,234").
    local numsep = config.numsep
    if #numsep == 0 then
        return text
    end
    local function insert(text, first, last)
        local result = ''
        while last >= first do
            if last >= first + 3 then
                result = numsep .. text:sub(last-2, last) .. result
                last = last - 3
            else
                return text:sub(first, last) .. result
            end
        end
        return result
    end
    local first = 1
    local sign = text:sub(first, 1)
    if sign == '+' or sign == '-' then
        -- To handle Unicode minus (multibyte), perhaps use following:
        -- first = text:find('%d')
        first = 2
    else
        sign = ''
    end
    local last = text:find(config.numdot, first, true)
    if last == nil then
        last = #text
    else
        last = last - 1
    end
    return sign .. insert(text, first, last) .. text:sub(last+1)
end

local function formatnumber(value, sigfig)
    -- Return result of converting number 'value' to a string,
    -- rounded to 'sigfig' significant figures.
    local format = string.format
    local rep = string.rep
    local sign = ''
    local numdot = config.numdot
    local function zeropad(text, dot)
        count = sigfig - #text
        if count <= 0 then
            return text
        end
        return text .. dot .. rep('0', count)
    end
    if sigfig <= 0 then
        sigfig = 1
    elseif sigfig > config.maxsigfig then
        sigfig = config.maxsigfig
    end
    if value == 0 then
        return zeropad('0', numdot)
    end
    if value < 0 then
        sign = '-'  -- need proper Unicode minus
        value = -value
    end
    local digits
    local exp, frac = math.modf(math.log10(value))
    if frac == 0 then
        -- Value 1 gives frac = 0, and 0.1 gives frac = -0 (negative zero).
        -- Both results give true in 'if frac == 0'.
        digits = zeropad('1', '')
        exp = exp + 1  -- adjust so dot is before digits
    else
        local prec = sigfig
        if value > 1 then
            prec = prec - 1  -- will be one sig fig before dot
        end
        digits = format(format('%%.%df', prec), 10^frac)
        if value < 1 then
            -- Is MediaWiki run in a locale where following might be '0,'?
            assert(digits:sub(1, 2) == '0.', 'Bug: rounded number not 0.xxx')
            digits = digits:sub(3)
        else
            if prec == 0 then
                assert(digits:find(numdot, 1, true) == nil, 'Bug: unexpected dot')
            else
                assert(digits:sub(2, 2) == numdot, 'Bug: rounded number not x.xxx')
                digits = digits:sub(1, 1) .. digits:sub(3)
            end
            exp = exp + 1  -- adjust so dot is before digits
        end
    end
    if exp >= #digits then
        digits = digits .. rep('0', exp - #digits)  -- result has no dot
    elseif exp <= 0 then
        digits = '0' .. numdot .. rep('0', -exp) .. digits
    else
        digits = digits:sub(1, exp) .. numdot .. digits:sub(exp+1)
    end
    return sign .. digits
end

local function require_number(value, missing, invalid)
    -- Return true, n where n = number equivalent to given value (or false, message).
    -- Thousand separators (valid or not) are first removed.
    if value == nil then return false, missing end
    if type(value) == 'number' then return true, value end
    local numsep = config.numsep
    if #numsep > 0 then value = string.gsub(value, numsep, '') end
    local number = tonumber(value)
    if number == nil then return false, invalid:format(value) end
    return true, number
end

local function require_integer(value, missing, invalid)
    -- Return true, n where n = integer equivalent to given value (or false, message).
    local success, number = require_number(value, missing, invalid)
    if not success then return success, number end
    if number ~= math.floor(number) then return false, invalid:format(value) end
    return true, number
end

local function get_parms(pframe)
    -- Return true, t where t is a table with all arguments passed to the template
    -- converted to named arguments. The numeric args are used to add named args:
    --   in_text, in_text2 (strings given for value, value2)
    --   value, in_unit, out_unit, value2, range, round_to
    -- (except for range, which is nil or a table, the named args that are
    -- added here could be provided by the user of the template).
    local range_types = {  -- text to separate input, output ranges
        ['and'] = {' and ', ' and '},
        ['by'] = {' by ', ' by '},
        ['to'] = {' to ', ' to '},
        ['-'] = {'–', '–'},
        ['to(-)'] = {' to ', '–'},
        ['x'] = {' by ', ' × '},
        ['+/-'] = {' ± ', ' ± '},
    }
    local success, t
    local args = {}                         -- arguments passed to template
    for k,v in pframe:argumentPairs() do
        args[k] = v
    end
    args.in_text = args[1]
    success, t = require_number(args.in_text, 'Need value', 'Value "%s" must be a number')
    if not success then return success, t else args.value = t end
    local in_unit = args[2]
    local i = 3
    local range = range_types[in_unit]
    if range ~= nil then
        args.in_text2 = args[3]
        success, t = require_number(args.in_text2, 'Need second value', 'Second value "%s" must be a number')
        if not success then return success, t else args.value2 = t end
        in_unit = args[4]
        i = 5
    end
    local out_unit = args[i]
    local round_to = args[i+1]
    if in_unit == nil then return false, 'Need input unit' end
    args.in_unit = in_unit
    args.out_unit = out_unit
    args.range = range
    args.round_to = args.round_to or round_to  -- allow named parameter
    return true, args
end

local function default_roundto(intext, factor)
    -- Return a default value for round_to (an integer like 2, 0, -2).
    -- prec = (precision implied in intext)
    --      = (#digits after dot, or negative of #zeroes before dot)
    -- If conversion is multiplication by a factor, and
    -- if factor >= 0.02, compensate prec by adding N where:
    --     N    factor is in range
    --     1     .02  :   .2   =    .1/5 :   .1*2
    --     0     .2   :   2    =    1/5  :   1*2
    --    -1     2    :  20    =   10/5  :  10*2
    --    -2    20    : 200    =  100/5  : 100*2  etc.
    -- TODO Exception required for temperature.
    prec = 0
    dot = intext:find('.', 1, true)
    if dot ~= nil then
        prec = intext:sub(dot+1):len()
        if prec == 0 then
            intext = intext:sub(1, -2)
        end
    end
    if prec == 0 then
        prec = -intext:match('0*$'):len()
    end
    if factor ~= nil and factor >= 0.02 then
        prec = prec - math.floor(math.log10(factor*5))
    end
    return prec
end

local function scaled(value, in_unit, out_unit)
    -- Return scaled value for a simple convert.
    return (value - in_unit.offset)
        * (in_unit.scale / out_unit.scale)
        + out_unit.offset
end

local function cvtround(invalue, intext, parms)
    -- Return true, s where s = rounded, formatted string from converting invalue,
    -- using the rounding specified in parms (s = '' if invalue == nil).
    -- This code combines convert/round because some rounding requires
    -- knowledge of what we are converting.
    -- TODO Lots of checking required. Will need tweaks for special cases
    -- handled by old Template:Convert.
    -- TODO Limit values to avoid abuse (for example, can currently set
    -- round_to to very large values like 999).
    local text = ''
    if invalue == nil then return true, text end
    local outvalue = scaled(invalue, parms.in_unit_table, parms.out_unit_table)
    local round_to = parms.round_to
    local sigfig = parms.sigfig
    local disp = parms.disp
    local auto = false
    if round_to then
        -- Ignore sigfig, disp.
        success, round_to = require_integer(round_to, 'Need value', 'round_to "%s" must be an integer')
        if not success then return success, round_to end
    elseif sigfig then
        -- Ignore disp.
        success, sigfig = require_integer(sigfig, 'Need value', 'sigfig "%s" must be an integer')
        if not success then return success, sigfig end
        if sigfig <= 0 then
            msg = 'sigfig "%s" must be positive'
            return false, msg:format(parms.sigfig)
        end
        text = formatnumber(outvalue, sigfig)
    elseif disp == '5' then
        local negative = false
        if outvalue < 0 then
            negative = true
            outvalue = -outvalue
        end
        outvalue = math.floor((outvalue / 5) + 0.5) * 5
        if negative then
            outvalue = -outvalue
        end
        text = string.format('%.0f', outvalue)
    else
        auto = true  -- using default rounding
        -- TODO If conversion is not multiplication by a number, need factor = nil.
        local factor = outvalue / invalue
        round_to = default_roundto(intext, factor)
    end
    if round_to then
        if round_to >= 0 then
            if auto then
                -- TODO No less than two significant figures.
            end
            -- It seems format('%d', x) uses modf to extract integer from x
            -- with result '0' if x is 0 or -0 (negative zero).
            -- Using format('%.0f', x) gives '-0' if x is negative zero.
            local fmt = '%.' .. string.format('%d', round_to) .. 'f'
            text = string.format(fmt, outvalue)
        else
            -- This always keeps two sig figs. Should that be done if not auto?
            round_to = -round_to  -- #digits that want to zero
            local maxzeroes = 0  -- maximum #digits that should be zeroed
            if outvalue > 100 then
                maxzeroes = math.modf(math.log10(outvalue)) - 1
            end
            if round_to > maxzeroes then
                round_to = maxzeroes
            end
            if round_to > 0 then
                local scaled = string.format('%.0f', outvalue/(10^round_to))
                text = scaled .. string.rep('0', round_to)
            else
                text = formatnumber(outvalue, 2)  -- can't zero digits; keep 2 sig figs
            end
        end
    end
    return true, withseparator(text)
end

local disp_single = {
    ['or'] = '%s %s or %s %s',
    ['sqbr'] = '%s %s [%s %s]',
    ['comma'] = '%s %s, %s %s',
    ['b'] = '%s %s (%s %s)',
}

local disp_double = {
    ['or'] = '%s%s%s %s or %s%s%s %s',
    ['sqbr'] = '%s%s%s %s [%s%s%s %s]',
    ['comma'] = '%s%s%s %s, %s%s%s %s',
    ['b'] = '%s%s%s %s (%s%s%s %s)',
}

local function process(parms)
    -- Return true, s where s = final wikitext result (or false, message).
    local success, t
    success, t = units:lookup(parms.in_unit)
    if not success then return success, t else parms.in_unit_table = t end
    if parms.out_unit == nil then           -- need to catch empty string also?
        success, t = defaultunits:lookup(parms.in_unit_table)
        if not success then return success, t else parms.out_unit = t end
    end
    success, t = units:lookup(parms.out_unit)
    if not success then return success, t else parms.out_unit_table = t end
    if parms.in_unit_table.utype ~= parms.out_unit_table.utype then
        local msg = 'Cannot convert %s to %s.[[Category:Convert dimension mismatch]]'
        return false, msg:format(parms.in_unit_table.utype, parms.out_unit_table.utype)
    end
    local intext1, outtext1 = parms.in_text, nil
    local intext2, outtext2 = parms.in_text2, nil
    success, outtext1 = cvtround(parms.value, intext1, parms)
    if not success then return success, outtext1 end
    success, outtext2 = cvtround(parms.value2, intext2, parms)
    if not success then return success, outtext2 end
    local range = parms.range
    local disp = parms.disp
    local wikitext
    intext1 = withseparator(intext1)  -- TODO what if intext1 already has commas?
    if range == nil then
        wikitext = disp_single[disp] or disp_single['b']
        wikitext = wikitext:format(intext1, parms.in_unit, outtext1, parms.out_unit)
    else
        wikitext = disp_double[disp] or disp_double['b']
        wikitext = wikitext:format(intext1, range[1], intext2, parms.in_unit, outtext1, range[2], outtext2, parms.out_unit)
    end
    return true, wikitext
end

-- Used by template {{convert2}}.
-- We will have to keep old {{convert}} for a long time, and run
-- {{convert2}} in parallel with {{convert}} while testing/developing.
local p = {}
local bodge = require "Module:mw" -- This fixes up mw.text.tag for us.

function p.convert(frame)
    config = get_config(frame)
    local pframe = frame:getParent()
    local parms, text
    success, parms = get_parms(pframe)
    if success then
        success, text = process(parms)
    end
    if not success then
        local params = {style="color:black; background-color:orange;"}
        text=mw.text.tag({name="span", contents="[[Module talk:Convert|Conversion error]]: " .. text, params=params})
    end
    return text
end

return p