Modul:ConvertNumeric/sandbox

-- Module for converting between different representations of numbers. See talk page for user documentation.
-- For unit tests see: [[Module:ConvertNumeric/tests]]

local ones_position = {
    [0] = 'nol',
    [1] = 'satu',
    [2] = 'dua',
    [3] = 'tiga',
    [4] = 'empat',
    [5] = 'lima',
    [6] = 'enam',
    [7] = 'tujuh',
    [8] = 'delapan',
    [9] = 'sembilan',
    [10] = 'sepuluh',
    [11] = 'sebelas',
    [12] = 'dua belas',
    [13] = 'tiga belas',
    [14] = 'empat belas',
    [15] = 'lima belas',
    [16] = 'enam belas',
    [17] = 'tujuh belas',
    [18] = 'delapan belas',
    [19] = 'sembilan belas'
}

local ones_position_ord = {
    [0] = 'ke-nol',
    [1] = 'kesatu',
    [2] = 'kedua',
    [3] = 'ketiga',
    [4] = 'keempat',
    [5] = 'kelima',
    [6] = 'keenam',
    [7] = 'ketujuh',
    [8] = 'kedelapan',
    [9] = 'kesembilan',
    [10] = 'kesepuluh',
    [11] = 'kesebelas',
    [12] = 'kedua belas',
    [13] = 'ketiga belas',
    [14] = 'keempat belas',
    [15] = 'kelima belas',
    [16] = 'keenam belas',
    [17] = 'ketujuh belas',
    [18] = 'kedelapan belas',
    [19] = 'kesembilan belas'
}

local ones_position_plural = {
    [0] = 'nolan',
    [1] = 'satuan',
    [2] = 'duaan',
    [3] = 'tigaan',
    [4] = 'empatan',
    [5] = 'limaan',
    [6] = 'enaman',
    [7] = 'tujuhan',
    [8] = 'delapanan',
    [9] = 'sembilanan',
    [10] = 'sepuluhan',
    [11] = 'sebelasan',
    [12] = 'dua belasan',
    [13] = 'tiga belasan',
    [14] = 'empat belasan',
    [15] = 'lima belasan',
    [16] = 'enam belasan',
    [17] = 'tujuh belasan',
    [18] = 'delapan belasan',
    [19] = 'sembilan belasan'
}

local tens_position = {
    [2] = 'dua puluh',
    [3] = 'tiga puluh',
    [4] = 'empat puluh',
    [5] = 'lima puluh',
    [6] = 'enam puluh',
    [7] = 'tujuh puluh',
    [8] = 'delapan puluh',
    [9] = 'sembilan puluh',
    [10]= 'seratus'
}

local tens_position_ord = {
    [2] = 'kedua puluh',
    [3] = 'ketiga puluh',
    [4] = 'keempat puluh',
    [5] = 'kelimah puluh',
    [6] = 'keenam puluh',
    [7] = 'ketujuh puluh',
    [8] = 'kedelapan puluh',
    [9] = 'kesembilan puluh'
}

local tens_position_plural = {
    [2] = 'dua puluhan',
    [3] = 'tiga puluhan',
    [4] = 'empat puluhan',
    [5] = 'lima puluhan',
    [6] = 'enam puluhan',
    [7] = 'tujuh puluhan',
    [8] = 'delapan puluhan',
    [9] = 'sembilan puluhan'
}

local groups = {
    [1] = 'ribu',
    [2] = 'juta',
    [3] = 'milyar',
    [4] = 'triliun',
    [5] = 'quadriliun',
    [6] = 'quintiliun',
    [7] = 'sekstiliun',
    [8] = 'septiliun',
    [9] = 'oktiliun',
    [10] = 'noniliun',
    [11] = 'desiliun',
    [12] = 'undesiliun',
    [13] = 'duodesiliun',
    [14] = 'tredesiliun',
    [15] = 'quattuordesiliun',
    [16] = 'quindesiliun',
    [17] = 'seksdesiliun',
    [18] = 'septendesiliun',
    [19] = 'oktodesiliun',
    [20] = 'novemdesiliun',
    [21] = 'vigintiliun',
    [22] = 'unvigintiliun',
    [23] = 'duovigintiliun',
    [24] = 'tresvigintiliun',
    [25] = 'quattuorvigintiliun',
    [26] = 'quinquavigintiliun',
    [27] = 'sesvigintiliun',
    [28] = 'septemvigintiliun',
    [29] = 'octovigintiliun',
    [30] = 'novemvigintiliun',
    [31] = 'trigintiliun',
    [32] = 'untrigintiliun',
    [33] = 'duotrigintiliun',
    [34] = 'trestrigintiliun',
    [35] = 'quattuortrigintiliun',
    [36] = 'quinquatrigintiliun',
    [37] = 'sestrigintiliun',
    [38] = 'septentrigintiliun',
    [39] = 'octotrigintiliun',
    [40] = 'noventrigintiliun',
    [41] = 'quadragintiliun',
    [51] = 'quinquagintiliun',
    [61] = 'sexagintiliun',
    [71] = 'septuagintiliun',
    [81] = 'octogintiliun',
    [91] = 'nonagintiliun',
    [101] = 'sentiliun',
    [102] = 'unsentiliun',
    [103] = 'duosentillion',
    [104] = 'tresentiliun',
    [111] = 'desisentiliun',
    [112] = 'undesisentiliun',
    [121] = 'vigintisentiliun',
    [122] = 'unvigintisentiliun',
    [131] = 'trigintasentiliun',
    [141] = 'quadragintasentiliun',
    [151] = 'quinquagintasentiliun',
    [161] = 'sexagintasentiliun',
    [171] = 'septuagintasentiliun',
    [181] = 'octogintasentiliun',
    [191] = 'nonagintasentiliun',
    [201] = 'ducentillion',
    [301] = 'trecentillion',
    [401] = 'quadringentillion',
    [501] = 'quingentillion',
    [601] = 'sescentillion',
    [701] = 'septingentillion',
    [801] = 'octingentillion',
    [901] = 'nongentillion',
    [1001] = 'millinillion',
}

roman_numerals = {
    I = 1,
    V = 5,
    X = 10,
    L = 50,
    C = 100,
    D = 500,
    M = 1000
}

-- Mengubah suatu angka Romawi valid (atau beberapa yang invalid) menjadi suatu angka. Memberikan -1, errorstring kalau error
function roman_to_numeral(roman)
    if type(roman) ~= "string" then return -1, "angka Romawi bukan suatu string" end
    local rev = roman:reverse()
    local raising = true
    local last = 0
    local result = 0
    for i = 1, #rev do
        local c = rev:sub(i, i)
        local next = roman_numerals[c]
        if next == nil then return -1, "angka Romawi memuat kesalahan karakter " .. c end
        if next > last then
            result = result + next
            raising = true
        elseif next < last then
            result = result - next
            raising = false
        elseif raising then
            result = result + next
        else
            result = result - next
        end
        last = next
    end
    return result
end

        
            
    
    

-- Mengubah suatu bilangan bulat antara 0 dan 100 ke dalam bahasa Indonesia (misalnya 47 -> empat puluh tujuh)
function numeral_to_indonesian_less_100(num, ordinal, plural)
    local terminal_ones, terminal_tens
    if ordinal then
        terminal_ones = ones_position
        terminal_tens = tens_position_ord
    elseif plural then
        terminal_ones = ones_position_plural
        terminal_tens = tens_position_ord
    else
        terminal_ones = ones_position
        terminal_tens = tens_position
    end

    if num < 20 then
        return terminal_ones[num]
    elseif num % 10 == 0 then
        return terminal_tens[num / 10]
    else
        return tens_position[math.floor(num / 10)] .. ' ' .. terminal_ones[num % 10]
    end
end

function standard_suffix(ordinal, plural)
    if ordinal then return ' ' end
    if plural then return ' ' end
    return ''
end

-- Mengubah suatu bilangan bulat (dalam bentuk kata-kata) antara 0 dan 1000 ke dalam bahasa Indonesia (misalnya 47 -> empat puluh tujuh)
function numeral_to_indonesian_less_1000(num, use_and, ordinal, plural)
    num = tonumber(num)
    if num < 100 then
        return numeral_to_indonesian_less_100(num, ordinal, plural)
    elseif num % 100 == 0 then
    	if num == 100 then
    		return 'seratus'
    	else
        	return ones_position[num/100] .. ' ratus' .. standard_suffix(ordinal, plural)
        end
    else
    	if math.floor(num/100) == 1 then
    		return 'seratus ' .. (use_and and '' or '') .. numeral_to_indonesian_less_100(num % 100, plural)
    	else
        	return ones_position[math.floor(num/100)] .. ' ratus ' .. (use_and and '' or '') .. numeral_to_indonesian_less_100(num % 100, plural)
        end		
    end
end

-- Converts a number expressed as a string in scientific notation to a string in standard decimal notation
-- e.g. 1.23E5 -> 123000, 1.23E-5 = .0000123. Conversion is exact, no rounding is performed.
function scientific_notation_to_decimal(num)
    local exponent, subs = num:gsub("^%-?%d*%.?%d*%-?[Ee]([+%-]?%d+)$", "%1")
    if subs == 0 then return num end  -- Input bukan dalam notasi ilmiah, tidak ada perubahan
    exponent = tonumber(exponent)

    local negative = num:find("^%-")
    _, decimal_pos = num:find("%.")
    -- Mantissa terdiri dari semua angka desimal tanpa koma
    local mantissa = num:gsub("^%-?(%d*)%.?(%d*)%-?[Ee][+%-]?%d+$", "%1%2")
    if negative and decimal_pos then decimal_pos = decimal_pos - 1 end
    if not decimal_pos then decimal_pos = #mantissa + 1 end
    local prev_len = #num
    
    -- Remove leading zeros unless decimal point is in first position
    while decimal_pos > 1 and mantissa:sub(1,1) == '0' do
        mantissa = mantissa:sub(2)
        decimal_pos = decimal_pos - 1
    end
    -- Shift decimal point right for exponent > 0
    while exponent > 0 do
        decimal_pos = decimal_pos + 1
        exponent = exponent - 1
        if decimal_pos > #mantissa + 1 then mantissa = mantissa .. '0' end
        -- Remove leading zeros unless decimal point is in first position
        while decimal_pos > 1 and mantissa:sub(1,1) == '0' do
            mantissa = mantissa:sub(2)
            decimal_pos = decimal_pos - 1
        end
    end
    -- Shift decimal point left for exponent < 0
    while exponent < 0 do
        if decimal_pos == 1 then
            mantissa = '0' .. mantissa
        else
            decimal_pos = decimal_pos - 1
        end
        exponent = exponent + 1
    end
    
    -- Insert decimal point in correct position and return
    return (negative and '-' or '') .. mantissa:sub(1, decimal_pos - 1) .. '.' .. mantissa:sub(decimal_pos)
end

-- Rounds a number to the nearest integer
function round_num(x)
    if x%1 >= 0.5 then
        return math.ceil(x)
    else 
        return math.floor(x)
    end
end

-- Rounds a number to the nearest two-word number (round = up, down, or "on" for round to nearest)
-- Numbers with two digits before the decimal will be rounded to an integer as specified by round.
-- Larger numbers will be rounded to a number with only one nonzero digit in front and all other digits zero.
-- Negative sign is preserved and does not count towards word limit.
function round_for_english(num, round)
    -- If an integer with at most two digits, just return
    if num:find("^%-?%d?%d%.?$") then return num end

    local negative = num:find("^%-")
    if negative then
        -- We're rounding magnitude so flip it
        if round == 'up' then round = 'down' elseif round == 'down' then round = 'up' end
    end

    -- If at most two digits before decimal, round to integer and return
    local _, _, small_int, trailing_digits, round_digit = num:find("^%-?(%d?%d?)%.((%d)%d*)$")
    if small_int then
        local small_int_len = #small_int
        if small_int == '' then small_int = '0' end
        if (round == 'up' and trailing_digits:find('[1-9]')) or (round == 'on' and tonumber(round_digit) >= 5) then
            small_int = tostring(tonumber(small_int) + 1)
        end
        return (negative and '-' or '') .. small_int
    end

    -- When rounding up, any number with > 1 nonzero digit will round up (e.g. 1000000.001 rounds up to 2000000)
    local nonzero_digits = 0
    for digit in num:gfind("[1-9]") do
        nonzero_digits = nonzero_digits + 1
    end

    num = num:gsub("%.%d*$", "") -- Remove decimal part
    -- Second digit used to determine which way to round lead digit
    local _, _, lead_digit, round_digit, round_digit_2, rest = num:find("^%-?(%d)(%d)(%d)(%d*)$")
    if tonumber(lead_digit .. round_digit) < 20 and (1 + #rest) % 3 == 0 then
        -- In English numbers < 20 are one word so put 2 digits in lead and round based on 3rd
        lead_digit = lead_digit .. round_digit
        round_digit = round_digit_2
    else
        rest = round_digit_2 .. rest
    end

    if (round == 'up' and nonzero_digits > 1) or (round == 'on' and tonumber(round_digit) >= 5) then
        lead_digit = tostring(tonumber(lead_digit) + 1)
    end
    -- All digits but lead digit will turn to zero
    rest = rest:gsub("%d", "0")
    return (negative and '-' or '') .. lead_digit .. '0' .. rest
end

-- Takes a decimal number and converts it ke dalam bahasa Indonesia.
-- num (string): the number to convert. Can be arbitrarily large decimal, such as "-123456789123456789.345",
--               and can use scientific notation (e.g. "1.23E5"). May fail for very large numbers not listed in "groups" such as "1E4000".
-- capitalize (boolean): whether to capitalize the result (e.g. 'One' instead of 'one')
-- use_and (boolean): whether to use the word 'and' between tens/ones place and higher places
-- hyphenate (boolean): whether to hyphenate all words in the result, useful for use as an adjective
-- ordinal (boolean): whether to produce an ordinal (e.g. 'first' instead of 'one')
-- plural (boolean): whether to pluralize the resulting number
-- links: nil: do not add any links; 'on': link "billion" and larger to Orders of magnitude article;
--        any other text: list of numbers to link (e.g. billion,quadrillion)
-- negative_word: Word to use for negative sign (typically 'negative' or 'minus')
-- round: nil or '': no rounding; 'on': round to nearest two-word number; 'up'/'down': round up/down to two-word number
function _numeral_to_indonesian(num, capitalize, use_and, hyphenate, ordinal, plural, links, negative_word, round)
    num = scientific_notation_to_decimal(num)

    if round and round ~= '' and round ~= 'on' and round ~= 'up' and round ~= 'down' then return 'Mode pembulatan invalid' end
    if round and round ~= '' then num = round_for_english(num, round) end

    -- Pisahkan tanda negatif, num (digit sebelum desimal), decimal_places (digit setelah desimal)
    local negative = num:find("^%-")
    local decimal_places, subs = num:gsub("^%-?%d*%.(%d+)$", "%1")
    if subs == 0 then decimal_places = nil end
    num, subs = num:gsub("^%-?(%d*)%.?%d*$", "%1")
    if num == '' and decimal_places then num = '0' end
    if subs == 0 or num == '' then return 'Angka desimal invalid' end
    
    -- For each group of 3 digits except the last one, print with appropriate group name (e.g. juta)
    local s = ''
    while #num > 3 do
        if s ~= '' then s = s .. ' ' end
        local group_num = math.floor((#num - 1) / 3)
        local group = groups[group_num]
        local group_digits = #num - group_num*3
        s = s .. numeral_to_indonesian_less_1000(num:sub(1, group_digits), false, false, false) .. ' '
        if ((links == 'on' and group_num >= 3) or links:find(group)) and group_num <= 13 then
            s = s .. '[[Orders_of_magnitude_(numbers)#10' .. group_num*3 .. '|' .. group .. ']]'
        else
            s = s .. group
        end
        num = num:sub(1 + group_digits)
        num = num:gsub("^0*", "")   -- Hilangkan angka nol di depan
    end
    
    -- Handle final three digits of integer part
    
    if s ~= '' and num ~= '' then
        if #num > 2 or omit_and then
            s = s .. ' '
        else
            s = s .. ' dan '
        end
    end
    if s == '' or num ~= '' then
        s = s .. numeral_to_indonesian_less_1000(num, use_and, ordinal, plural)
    elseif ordinal or plural then
        -- Round numbers like "one million" take standard suffixes for ordinal/plural
        s = s .. standard_suffix(ordinal, plural)
    end
    
    -- For decimal places (if any) output "point" followed by spelling out digit by digit
    if decimal_places then
        s = s .. ' point'
        for i = 1, #decimal_places do
            s = s .. ' ' .. ones_position[tonumber(decimal_places:sub(i,i))]
        end
    end
    
    s = s:gsub("^%s*(.-)%s*$", "%1")   -- Trim whitespace
    if ordinal and plural then s = s .. 's' end  -- s suffix works for all ordinals
    if negative and s ~= 'zero' then s = negative_word .. ' ' .. s end
    if hyphenate then s = s:gsub("%s", "-") end
    if capitalize then s = s:gsub("^%l", string.upper) end
    
    return s
end

local p = {}

function p.numeral_to_indonesian(frame)
    local num = frame.args[1]
    num = num:gsub("^%s*(.-)%s*$", "%1")   -- Trim whitespace
    num = num:gsub(",", "")   -- Hilangkan koma
    if not num:find("^%-?%d*%.?%d*%-?[Ee]?[+%-]?%d*$") then
        -- Input not in a valid format, try to pass it through #expr to see if that produces a number
        -- (e.g. "3 + 5" will become 8)
        num = frame:preprocess('{{#expr: ' .. num .. '}}')
    end

    -- Convert all args passed through frame into normal arguments to helper function
    local case = frame.args['case']
    local sp = frame.args['sp']
    local adj = frame.args['adj']
    local ord = frame.args['ord']
    local pl = frame.args['pl']
    local lk = frame.args['lk'] or ''
    local negative = frame.args['negative'] or 'negative'
    local round = frame.args['round'] or nil    
    return _numeral_to_indonesian(num, case == 'U' or case == 'u', sp ~= 'us', adj == 'on', ord == 'on', pl == 'on', lk, negative, round)
end

---- recursive function for p.decToHex
function decToHexDigit(dec)
   local dig = {"0","1","2","3","4","5","6","7","8","9","A","B","C","D","E","F"}
   local div = math.floor(dec/16)
   local mod = dec-(16*div)
   if div >= 1 then return decToHexDigit(div)..dig[mod+1] else return dig[mod+1] end
end -- I think this is supposed to be done with a tail call but first I want something that works at all
---- finds all the decimal numbers in the input text and hexes each of them
function p.decToHex(frame)
   local args=frame.args
   local parent=frame.getParent(frame)
   local pargs={}
   if parent then pargs=parent.args end
   local text=args[1] or pargs[1] or ""
   local minlength=args.minlength or pargs.minlength or 1
   minlength=tonumber(minlength)
   prowl=mw.ustring.gmatch(text,"(.-)(%d+)")
   local output=""
   repeat
      local chaff,dec=prowl()
      if not(dec) then break end
      local hex=decToHexDigit(dec)
      while (mw.ustring.len(hex)<minlength) do hex="0"..hex end
      output=output..chaff..hex
   until false
   local chaff=mw.ustring.match(text,"(%D+)$") or ""
   return output..chaff
end

return p