Module:Wikidata

From Random Island Wiki
Revision as of 12:30, 7 May 2015 by >Izkala (`time' is now (?) in ISO format, somebody can properly integrate this with the rest of the code later)
Jump to navigation Jump to search

Documentation for this module may be created at Module:Wikidata/doc

local p = {}

-- module local variables
local wiki = 
{
	langcode = mw.language.getContentLanguage().code
}

-- internationalisation
local i18n = {
    ["errors"] = {
        ["property-not-found"] = "Property not found.",
        ["entity-not-found"] = "Wikidata entity not found.",
        ["unknown-claim-type"] = "Unknown claim type.",
        ["unknown-entity-type"] = "Unknown entity type.",
        ["qualifier-not-found"] = "Qualifier not found.",
        ["site-not-found"] = "Wikimedia project not found.",
    },
    ["datetime"] =
	{
		-- $1 is a placeholder for the actual number
		[0] = "$1 billion years",	-- precision: billion years
		[1] = "$100 million years",	-- precision: hundred million years
		[2] = "$10 million years",	-- precision: ten million years
		[3] = "$1 million years",	-- precision: million years
		[4] = "$100,000 years",		-- precision: hundred thousand years
		[5] = "$10,000 years",		-- precision: ten thousand years
		[6] = "$1  millenium",	 	-- precision: millennium
		[7] = "$1 century",			-- precision: century
		[8] = "$1s",				-- precision: decade
		-- the following use the format of #time parser function
		[9]  = "Y",					-- precision: year, 
		[10] = "F Y",				-- precision: month
		[11] = "F j, Y",			-- precision: day
		[12] = "F j, Y ga",			-- precision: hour
		[13] = "F j, Y g:ia",		-- precision: minute
		[14] = "F j, Y g:i:sa",		-- precision: second
		["beforenow"] = "$1 BCE",	-- how to format negative numbers for precisions 0 to 5
		["afternow"] = "$1 CE",		-- how to format positive numbers for precisions 0 to 5
		["bc"] = '$1 "BCE"',		-- how print negative years
		["ad"] = "$1"				-- how print positive years
	},
	["monolingualtext"] = '<span lang="%language">%text</span>',
	["warnDump"] = "[[Category:Called function 'Dump' from module Wikidata]]"
}

function p.descriptionIn(frame)
	local langcode = frame.args[1]
	local id = frame.args[2]	-- "id" must be nil, as access to other Wikidata objects is disabled in Mediawiki configuration
	-- return description of a Wikidata entity in the given language or the default language of this Wikipedia site
	return mw.wikibase.getEntityObject(id).descriptions[langcode or wiki.langcode].value
end

function p.labelIn(frame)
	local langcode = frame.args[1]
	local id = frame.args[2]	-- "id" must be nil, as access to other Wikidata objects is disabled in Mediawiki configuration
	-- return label of a Wikidata entity in the given language or the default language of this Wikipedia site
	return mw.wikibase.getEntityObject(id).labels[langcode or wiki.langcode].value
end

-- This is used to get a value, or a comma separated list of them if multiple values exist
p.getValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local input_parm = mw.text.trim(frame.args[2] or "")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		local claims = entity.claims[propertyID]
		if claims then
			-- if wiki-linked value output as link if possible
			if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "wikibase-entityid") then
				local out = {}
				for k, v in pairs(claims) do
					local sitelink = mw.wikibase.sitelink("Q" .. v.mainsnak.datavalue.value["numeric-id"])
					local label = mw.wikibase.label("Q" .. v.mainsnak.datavalue.value["numeric-id"])
					if label == nil then label = "Q" .. v.mainsnak.datavalue.value["numeric-id"] end
							
					if sitelink then
						out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
					else
						out[#out + 1] = "[[:d:Q" .. v.mainsnak.datavalue.value["numeric-id"] .. "|" .. label .. "]]<abbr title='Article is not yet available in this wiki'>[*]</abbr>"
					end
				end
				return table.concat(out, ", ")
			else
				return entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value
			end
		else
			return ""
		end
	else
		return input_parm
	end
end

p.getQualifierValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local qualifierID = mw.text.trim(frame.args[2] or "")
	local input_parm = mw.text.trim(frame.args[3] or "")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		if entity.claims[propertyID] ~= nil then
			local out = {}
			for k, v in pairs(entity.claims[propertyID]) do
				for k2, v2 in pairs(v.qualifiers[qualifierID]) do
					if v2.snaktype == 'value' then
						if (mw.wikibase.sitelink("Q" .. v2.datavalue.value["numeric-id"])) then
							out[#out + 1] = "[[" .. mw.wikibase.sitelink("Q" .. v2.datavalue.value["numeric-id"]) .. "]]"
						else
							out[#out + 1] = "[[:d:Q" .. v2.datavalue.value["numeric-id"] .. "|" .. mw.wikibase.label("Q" .. v2.datavalue.value["numeric-id"]) .. "]]<abbr title='Article is not yet available in this wiki'>[*]</abbr>"
						end
					end
				end
			end
			return table.concat(out, ", ")
		else
			return ""
		end
	else
		return input_parm
	end
end

-- This is used to get a value like 'male' (for property p21) which won't be linked and numbers without the thousand separators
p.getRawValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local input_parm = mw.text.trim(frame.args[2] or "")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		if entity then claims = entity.claims[propertyID] end
		if claims then
			local result = entity:formatPropertyValues(propertyID, mw.wikibase.entity.claimRanks).value
		
			-- if number type: remove thousand separators
			if (claims[1] and claims[1].mainsnak.snaktype == "value" and claims[1].mainsnak.datavalue.type == "quantity") then
				result = mw.ustring.gsub(result, "(%d),(%d)", "%1%2")
			end
			return result
		else
			return ""
		end
	else
		return input_parm
	end
end

p.getRawQualifierValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local qualifierID = mw.text.trim(frame.args[2] or "")
	local input_parm = mw.text.trim(frame.args[3] or "")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		if entity.claims[propertyID] ~= nil then
			local out = {}
			for k, v in pairs(entity.claims[propertyID]) do
				for k2, v2 in pairs(v.qualifiers[qualifierID]) do
					if v2.snaktype == 'value' then
						if v2.datavalue.value["numeric-id"] then
							out[#out + 1] = mw.wikibase.label("Q" .. v2.datavalue.value["numeric-id"])
						else
							out[#out + 1] = v2.datavalue.value
						end
					end
				end
			end
			local ret = table.concat(out, ", ")
			return string.upper(string.sub(ret, 1, 1)) .. string.sub(ret, 2)
		else
			return ""
		end
	else
		return input_parm
	end
end

-- This is used to get a date value for date_of_birth (p569), etc. which won't be linked -- consolidate by testing if entity.claims[propertyID].mainsnak.datavalue.type is "time"
-- Dates are stored as 28 characters if the year  >99 -- e.g. +00000002014-01-01T00:00:00Z for 2014
-- Dates are stored as 26 characters if the year =<99 -- e.g. +000000050-01-01T00:00:00Z   for 50 CE
p.getDateValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local input_parm = mw.text.trim(frame.args[2] or "")
	local date_format = mw.text.trim(frame.args[3] or "dmy")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		if entity.claims[propertyID] ~= nil then
			local out = {}
			local dt = {}
			for k, v in pairs(entity.claims[propertyID]) do
				if v.mainsnak.snaktype == 'value' then
					local d = v.mainsnak.datavalue.value.time
					if #d > 26 then
						dt.year = string.sub(d, 9, 12)
						dt.month = string.sub(d, 14, 15)
						dt.day = string.sub(d, 17, 18)
					else
						dt.year = string.sub(d, 9, 10)
						dt.month = string.sub(d, 12, 13)
						dt.day = string.sub(d, 15, 16)
					end
					if date_format == "mdy" then
						out[#out + 1] = os.date("%B %e, %Y", os.time(dt))
					elseif date_format == "my" then
						out[#out + 1] = os.date("%B %Y", os.time(dt))
					elseif date_format == "y" then
						out[#out + 1] = os.date("%Y", os.time(dt))
					else
						out[#out + 1] = os.date("%e %B %Y", os.time(dt))
					end
				end
			end
			return table.concat(out, ", ")
		else
			return ""
		end
	else
		return input_parm
	end
end

p.getQualifierDateValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	local qualifierID = mw.text.trim(frame.args[2] or "")
	local input_parm = mw.text.trim(frame.args[3] or "")
	local date_format = mw.text.trim(frame.args[4] or "dmy")
	if input_parm == "FETCH_WIKIDATA" then
		local entity = mw.wikibase.getEntityObject()
		if entity.claims[propertyID] ~= nil then
			local out = {}
			for k, v in pairs(entity.claims[propertyID]) do
				for k2, v2 in pairs(v.qualifiers[qualifierID]) do
					if v2.snaktype == 'value' then
						local d = v2.datavalue.value.time
						if date_format == "mdy" then
							out[#out + 1] = mw.language.new(wiki.langcode):formatDate("F j, Y", d)
						elseif date_format == "my" then
							out[#out + 1] = mw.language.new(wiki.langcode):formatDate("F Y", d)
						elseif date_format == "y" then
							out[#out + 1] = mw.language.new(wiki.langcode):formatDate("Y", d)
						else
							out[#out + 1] = mw.language.new(wiki.langcode):formatDate("j F Y", d)
						end
					end
				end
			end
			return table.concat(out, ", ")
		else
			return ""
		end
	else
		return input_parm
	end
end

-- This is used to get the TA98 (Terminologia Anatomica first edition 1998) values like 'A01.1.00.005' (property P1323)
-- which are then linked to http://www.unifr.ch/ifaa/Public/EntryPage/TA98%20Tree/Entity%20TA98%20EN/01.1.00.005%20Entity%20TA98%20EN.htm
-- uses the newer mw.wikibase calls instead of directly using the snaks
-- formatPropertyValues returns a table with the P1323 values concatenated with ", " so we have to split them out into a table in order to construct the return string
p.getTAValue = function(frame)
	local ent = mw.wikibase.getEntityObject()
	local props = ent:formatPropertyValues('P1323')
	local out = {}
	local t = {}
	for k, v in pairs(props) do
		if k == 'value' then
			t = mw.text.split( v, ", ")
			for k2, v2 in pairs(t) do
				out[#out + 1] = "[http://www.unifr.ch/ifaa/Public/EntryPage/TA98%20Tree/Entity%20TA98%20EN/" .. string.sub(v2, 2) .. "%20Entity%20TA98%20EN.htm " .. v2 .. "]"
			end
		end
	end
	ret = table.concat(out, "<br> ")
	if #ret == 0 then
		ret = "Invalid TA"
	end
	return ret
end

-- returns the page id (Q...) of the current page or nothing of the page is not connected to Wikidata
function p.pageId(frame)
	local entity = mw.wikibase.getEntityObject()
	if not entity then return nil else return entity.id end
end

-- the "qualifiers" and "snaks" field have a respective "qualifiers-order" and "snaks-order" field
-- use these as the second parameter and this function instead of the built-in "pairs" function
-- to iterate over all qualifiers and snaks in the intended order.
local function orderedpairs(array, order)
	if not order then return pairs(array) end
 
	-- return iterator function
    local i = 0
    return function()
        i = i + 1
        if order[i] then
            return order[i], array[order[i]]
        end
    end	
end

-- precision: 0 - billion years, 1 - hundred million years, ..., 6 - millennia, 7 - century, 8 - decade, 9 - year, 10 - month, 11 - day, 12 - hour, 13 - minute, 14 - second
local function normalizeDate(date)
	date = mw.text.trim(date, "+")
	-- extract year
	local yearstr = mw.ustring.match(date, "^\-?%d+")
	local year = tonumber(yearstr)
	-- remove leading zeros of year
	return year .. mw.ustring.sub(date, #yearstr + 1), year
end

function formatDate(date, precision, timezone)
	precision = precision or 11
	date, year = normalizeDate(date)
	if year == 0 and precision <= 9 then return "" end
 
 	-- precision is 10000 years or more
	if precision <= 5 then
		local factor = 10 ^ ((5 - precision) + 4)
		local y2 = math.ceil(math.abs(year) / factor)
		local relative = mw.ustring.gsub(i18n.datetime[precision], "$1", tostring(y2))
		if year < 0 then
			relative = mw.ustring.gsub(i18n.datetime.beforenow, "$1", relative)
		else
			relative = mw.ustring.gsub(i18n.datetime.afternow, "$1", relative)
		end			
		return relative
	end
 
 	-- precision is decades, centuries and millennia
	local era
	if precision == 6 then era = mw.ustring.gsub(i18n.datetime[6], "$1", tostring(math.floor((math.abs(year) - 1) / 1000) + 1)) end
	if precision == 7 then era = mw.ustring.gsub(i18n.datetime[7], "$1", tostring(math.floor((math.abs(year) - 1) / 100) + 1)) end
	if precision == 8 then era = mw.ustring.gsub(i18n.datetime[8], "$1", tostring(math.floor(math.abs(year) / 10) * 10)) end
	if era then
		if year < 0 then era = mw.ustring.gsub(mw.ustring.gsub(i18n.datetime.bc, '"', ""), "$1", era)
		elseif year > 0 then era = mw.ustring.gsub(mw.ustring.gsub(i18n.datetime.ad, '"', ""), "$1", era) end
		return era
	end
 
 	-- precision is year
 	if precision == 9 then
 		return year
 	end
 
	-- precision is less than years
	if precision > 9 then
		--[[ the following code replaces the UTC suffix with the given negated timezone to convert the global time to the given local time
		timezone = tonumber(timezone)
		if timezone and timezone ~= 0 then
			timezone = -timezone
			timezone = string.format("%.2d%.2d", timezone / 60, timezone % 60)
			if timezone[1] ~= '-' then timezone = "+" .. timezone end
			date = mw.text.trim(date, "Z") .. " " .. timezone
		end
		]]--
 
		local formatstr = i18n.datetime[precision]
		if year == 0 then formatstr = mw.ustring.gsub(formatstr, i18n.datetime[9], "")
		elseif year < 0 then
			-- Mediawiki formatDate doesn't support negative years
			date = mw.ustring.sub(date, 2)
			formatstr = mw.ustring.gsub(formatstr, i18n.datetime[9], mw.ustring.gsub(i18n.datetime.bc, "$1", i18n.datetime[9]))
		elseif year > 0 and i18n.datetime.ad ~= "$1" then
			formatstr = mw.ustring.gsub(formatstr, i18n.datetime[9], mw.ustring.gsub(i18n.datetime.ad, "$1", i18n.datetime[9]))
		end
		return mw.language.new(wiki.langcode):formatDate(formatstr, date)
	end
end

local function printDatavalueEntity(data, parameter)
	-- data fields: entity-type [string], numeric-id [int, Wikidata id]
	local id
	
	if data["entity-type"] == "item" then id = "Q" .. data["numeric-id"]
	elseif data["entity-type"] == "property" then id = "P" .. data["numeric-id"]
	else return printError("unknown-entity-type")
	end
	
	if parameter then
		if parameter == "link" then
			local linkTarget = mw.wikibase.sitelink(id)
			local linkName = mw.wikibase.label(id)
			if linkTarget then
				-- if there is a local Wikipedia article link to it using the label or the article title
				return "[[" .. linkTarget  .. "|" .. (linkName or linkTarget) .. "]]"
			else
				-- if there is no local Wikipedia article output the label or link to the Wikidata object to let the user input a proper label
				if linkName then return linkName else return "[[:d:" .. id .. "|" .. id .. "]]" end
			end
		else
			return data[parameter]
		end
	else
		return mw.wikibase.label(id) or id
	end
end

local function printDatavalueTime(data, parameter)
	-- data fields: time [ISO 8601 time], timezone [int in minutes], before [int], after [int], precision [int], calendarmodel [wikidata URI]
	--   precision: 0 - billion years, 1 - hundred million years, ..., 6 - millennia, 7 - century, 8 - decade, 9 - year, 10 - month, 11 - day, 12 - hour, 13 - minute, 14 - second
	--   calendarmodel: e.g. http://www.wikidata.org/entity/Q1985727 for the proleptic Gregorian calendar or http://www.wikidata.org/wiki/Q11184 for the Julian calendar]
	if parameter then
		if parameter == "calendarmodel" then data.calendarmodel = mw.ustring.match(data.calendarmodel, "Q%d+") -- extract entity id from the calendar model URI
		elseif parameter == "time" then data.time = normalizeDate(data.time) end
		return data[parameter]
	else
		return formatDate(data.time, data.precision, data.timezone)
	end
end

local function printDatavalueMonolingualText(data, parameter)
	-- data fields: language [string], text [string]
	if parameter then
		return data[parameter]
	else
		local result = mw.ustring.gsub(mw.ustring.gsub(i18n.monolingualtext, "%%language", data["language"]), "%%text", data["text"])
		return result
	end
end

function findClaims(entity, property)
	if not property or not entity or not entity.claims then return end
 
	if mw.ustring.match(property, "^P%d+$") then
		-- if the property is given by an id (P..) access the claim list by this id
		return entity.claims[property]
	else
		-- otherwise, iterate over all properties, fetch their labels and compare this to the given property name
		for k, v in pairs(entity.claims) do
			if mw.wikibase.label(k) == property then return v end
		end
		return
	end
end

function getSnakValue(snak, parameter)
	if snak.snaktype == "value" then
		-- call the respective snak parser
		if snak.datavalue.type == "string" then return snak.datavalue.value
		elseif snak.datavalue.type == "globecoordinate" then return printDatavalueCoordinate(snak.datavalue.value, parameter)
		elseif snak.datavalue.type == "quantity" then return printDatavalueQuantity(snak.datavalue.value, parameter)
		elseif snak.datavalue.type == "time" then return printDatavalueTime(snak.datavalue.value, parameter)
		elseif snak.datavalue.type == "wikibase-entityid" then return printDatavalueEntity(snak.datavalue.value, parameter)
		elseif snak.datavalue.type == "monolingualtext" then return printDatavalueMonolingualText(snak.datavalue.value, parameter)
		end
	end
	return mw.wikibase.renderSnak(snak)
end
 
function getQualifierSnak(claim, qualifierId)
	-- a "snak" is Wikidata terminology for a typed key/value pair
	-- a claim consists of a main snak holding the main information of this claim,
	-- as well as a list of attribute snaks and a list of references snaks
	if qualifierId then
		-- search the attribute snak with the given qualifier as key
		if claim.qualifiers then
			local qualifier = claim.qualifiers[qualifierId]
			if qualifier then return qualifier[1] end
		end
		return nil, printError("qualifier-not-found")
	else
		-- otherwise return the main snak
		return claim.mainsnak
	end
end
 
function getValueOfClaim(claim, qualifierId, parameter)
	local error
	local snak
	snak, error = getQualifierSnak(claim, qualifierId)
	if snak then
		return getSnakValue(snak, parameter)
	else
		return nil, error
	end
end

function getReferences(frame, claim)
	local result = ""
	-- traverse through all references
	for ref in pairs(claim.references or {}) do
		local refparts
		-- traverse through all parts of the current reference
		for snakkey, snakval in orderedpairs(claim.references[ref].snaks or {}, claim.references[ref]["snaks-order"]) do
			if refparts then refparts = refparts .. ", " else refparts = "" end
			-- output the label of the property of the reference part, e.g. "imported from" for P143
			refparts = refparts .. tostring(mw.wikibase.label(snakkey)) .. ": " 
			-- output all values of this reference part, e.g. "German Wikipedia" and "English Wikipedia" if the referenced claim was imported from both sites
			for snakidx = 1, #snakval do
				if snakidx > 1 then refparts = refparts .. ", " end
				refparts = refparts .. getSnakValue(snakval[snakidx])
			end
		end
		if refparts then result = result .. frame:extensionTag("ref", refparts) end
	end
	return result
end

function p.claim(frame)
	local property = frame.args[1] or ""
	local id = frame.args["id"]	-- "id" must be nil, as access to other Wikidata objects is disabled in Mediawiki configuration
	local qualifierId = frame.args["qualifier"]
	local parameter = frame.args["parameter"]
	local list = frame.args["list"]
	local references = frame.args["references"]
	local showerrors = frame.args["showerrors"]
	local default = frame.args["default"]
	if default then showerrors = nil end
 
	-- get wikidata entity
	local entity = mw.wikibase.getEntityObject(id)
	if not entity then
		if showerrors then return printError("entity-not-found") else return default end
	end
	-- fetch the first claim of satisfying the given property
	local claims = findClaims(entity, property)
	if not claims or not claims[1] then
		if showerrors then return printError("property-not-found") else return default end
	end
 
	-- get initial sort indices
	local sortindices = {}
	for idx in pairs(claims) do
		sortindices[#sortindices + 1] = idx
	end
	-- sort by claim rank
	local comparator = function(a, b)
		local rankmap = { deprecated = 2, normal = 1, preferred = 0 }
		local ranka = rankmap[claims[a].rank or "normal"] ..  string.format("%08d", a)
		local rankb = rankmap[claims[b].rank or "normal"] ..  string.format("%08d", b)
		return ranka < rankb
 	end
	table.sort(sortindices, comparator)
 
	local result
	local error
	if list then
		local value
		-- iterate over all elements and return their value (if existing)
		result = {}
		for idx in pairs(claims) do
			local claim = claims[sortindices[idx]]
			value, error =  getValueOfClaim(claim, qualifierId, parameter)
			if not value and showerrors then value = error end
			if value and references then value = value .. getReferences(frame, claim) end
			result[#result + 1] = value
		end
		result = table.concat(result, list)
	else
		-- return first element	
		local claim = claims[sortindices[1]]
		result, error = getValueOfClaim(claim, qualifierId, parameter)
		if result and references then result = result .. getReferences(frame, claim) end
	end
 
	if result then return result else
		if showerrors then return error else return default end
	end
end

-- look into entity object
function p.ViewSomething(frame)
	local data = mw.wikibase.getEntityObject()
	if not data then
		return nil
	end

	local f = frame.args[1] and frame or frame:getParent()

	local i = 1
	while true do
		local index = f.args[i]
		if not index then
			return tostring(data)
		end
		
		data = data[index] or data[tonumber(index)]
		if not data then
			return
		end
		
		i = i + 1
	end
end

function p.Dump(frame)
	local data = mw.wikibase.getEntityObject()
	if not data then
		return i18n.warnDump
	end

	local f = frame.args[1] and frame or frame:getParent()

	local i = 1
	while true do
		local index = f.args[i]
		if not index then
			return "<pre>"..mw.dumpObject(data).."</pre>".. i18n.warnDump
		end

		data = data[index] or data[tonumber(index)]
		if not data then
			return i18n.warnDump
		end

		i = i + 1
	end
end

return p