Module:WikidataIB

From Random Island Wiki
Revision as of 18:56, 1 July 2017 by >RexxS (get logic right)
Jump to navigation Jump to search

Documentation for this module may be created at Module:WikidataIB/doc

-- Module to try out use of a blacklist and whitelist for infobox fields
-- can take a named parameter |qid which is the Wikidata ID for the article. This will not normally be used
-- Fields in blacklist are never to be displayed, i.e. module must return nil in all circumstances
-- Fields in whitelist return local value if it exists or the Wikidata value otherwise
-- The name of the field that this function is called from is passed in named parameter |name
-- The name is compulsory when blacklist or whitelist is used, so the module returns nil if it is not supplied
-- blacklist is passed in named parameter |suppressfields
-- whitelist is passed in named parameter |fetchwikidata

local p = {}

local i18n =
{
    ["errors"] =
    {
        ["property-not-found"] = "Property not found.",
        ["entity-not-found"] = "Wikidata entity not found.",
        ["unknown-claim-type"] = "Unknown claim type.",
        ["unknown-entity-type"] = "Unknown entity type.",
        ["qualifier-not-found"] = "Qualifier not found.",
        ["site-not-found"] = "Wikimedia project not found.",
		["unknown-datetime-format"] = "Unknown datetime format.",
		["local-article-not-found"] = "Article is available on Wikidata, but not on Wikipedia"
    },
    ["months"] =
    {
    	"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"
    }
}

-------------------------------------------------------------------------------
-- Private functions
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
-- formatDate takes a datetime of the usual format from mw.wikibase.entity:formatPropertyValues
-- like "1 August 30 BCE" as parameter 1 and formats it according to the df (date format) and bc parameters
-- df = ["dmy" / "mdy" / "y"] default will be "dmy"
-- bc = ["BC" / "BCE"] default will be "BCE"
-- first the local version
local format_Date = function(datetime, dateformat, bc)
	local datetime = datetime or "1 August 30 BCE" -- in case of nil value
	-- chop off multiple vales and/or any hours, mins, etc.
	-- keep anything before punctuation - we just want a single date:
	local dateval = string.match( datetime, "[%w ]+")
	
	local dateformat = string.lower(dateformat or "dmy") -- default to dmy
	
	local bc = string.upper(bc or "") -- can't use nil for bc
	-- we only want to accept two possibilities: BC or default to BCE
	if bc=="BC" then
		bc = " BC" -- prepend the space. **internationalise later**
	else
		bc = " BCE"
	end
	
	local postchrist = true -- start by assuming no BCE
	local dateparts = {}
	for word in string.gmatch(dateval, "%w+") do
		if word == "BCE" or word == "BC" then -- **internationalise later**
			postchrist = false
		else
			-- we'll keep the parts that are not 'BCE' in a table
			dateparts[#dateparts + 1] =  word
		end
	end
	if postchrist then bc = "" end -- set AD dates to no suffix **internationalise later**
	
	local sep = " " -- separator is nbsp
	local fdate = table.concat(dateparts, " ") -- formatted date defaults to same order as input
	
	-- if we have day month year, check dateformat
	if #dateparts == 3 then
		if dateformat == "y" then
			fdate = dateparts[3]
		elseif dateformat == "mdy" then
			fdate = dateparts[2] .. sep .. dateparts[1] .. "," .. sep .. dateparts[3]
		end
	elseif #dateparts == 2 and dateformat == "y" then
		fdate = dateparts[2]
	end
	
	return fdate .. bc
end

-------------------------------------------------------------------------------
-- parseParam takes a (string) parameter, e.g. from the list of frame arguments,
-- and makes "false", "no", and "0" into the (boolean) false
-- it makes the empty string and nil into the (boolean) value passed as default
-- allowing the parameter to be true or false by default.
local parseParam = function(param, default)
	if param and (#param > 0) then
		param = param:lower()
		if (param == "false") or (param == "no") or (param == "0") then
			return false
		else
			return true
		end
	else
		return default
	end
end

-------------------------------------------------------------------------------
-- The label in a Wikidata item is subject to vulnerabilities 
-- that an attacker might try to exploit.
-- It needs to be 'sanitised' by removing any wikitext before use.
-- If it doesn't exist, just return the id for the item
local labelOrId = function (id)
	local label = mw.wikibase.label(id)
	if label then
		return mw.text.nowiki(label)
	else
		return id
	end
end

-------------------------------------------------------------------------------
-- sourced takes a table representing a statement that may or may not have references
-- it counts how many references are sourced to something not contianing the word "wikipedia"
-- the reference string "ref" is available for debugging
-- it returns a boolean = true if there are any sourced references.
local sourced = function(claim)
	local refs = 0
	if claim.references then
		for kr, vr in pairs(claim.references) do
			local ref = mw.wikibase.renderSnaks(vr.snaks)
			if not ref:find("Wikipedia") then refs = refs + 1 end
		end
	end
	return refs > 0
end

-- parseInput processes the Q-id , the blacklist and the whitelist
-- if an input parameter is supplied, it returns that and ends the call.
-- it returns a boolean indicating whether or not the call should continue
-- and an object containing all of the Wikidata for the Qid supplied or the current page
local parseInput = function(frame, input_parm)
	-- can take a named parameter |qid which is the Wikidata ID for the article.
	-- This will not normally be used because it's an expensive call.
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- The blacklist is passed in named parameter |suppressfields
	local blacklist = frame.args.suppressfields
	
	-- The whitelist is passed in named parameter |fetchwikidata
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field that this function is called from is passed in named parameter |name
	local fieldname = frame.args.name
	if blacklist then
		-- The name is compulsory when blacklist is used, so return nil if it is not supplied
		if not fieldname or (#fieldname == 0) then return false, nil end
		-- If this field is on the blacklist, then return nil
		if blacklist:find(fieldname) then return false, nil end
	end
	
	-- If we got this far then we're not on the blacklist
	-- The blacklist overrides any locally supplied parameter as well
	-- If a non-blank input parameter was supplied return it
	if input_parm then return false, input_parm end
	
	-- Otherwise see if this field is on the whitelist:
	if not (whitelist and (whitelist == 'ALL' or whitelist:find(fieldname))) then
		-- not on the whitelist so just return what should be a nil input parameter
		return false, input_parm
	end
	
	-- See what's on Wikidata:
	local entity = mw.wikibase.getEntityObject(qid)
	return true, entity
end
-------------------------------------------------------------------------------
-- Public functions
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
-- getValue is used to get a value, or a comma separated list of them if multiple values exist
--
p.getValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	
	-- There may be a local parameter supplied, if it's blank, set it to nil
	local input_parm =  mw.text.trim(frame.args[2] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	-- onlysourced is a boolean passed to return only values sourced to other than Wikipedia
	-- if nothing or an empty string is passed set it true
	-- if "false" or "no" or "0" is passed set it false
	local onlysrc = parseParam(frame.args.onlysourced, true)
	
	-- noicon is a boolean passed to suppress the trailing "edit at Wikidata" icon
	-- for use when the value is processed further by the infobox
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local noic = parseParam(frame.args.noicon, false)
	
	-- wdlinks is a boolean passed to enable links to Wikidata when no article exists
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local wdl = parseParam(frame.args.wdlinks, false)
	
	-- sorted is a boolean passed to enable sorting of the values returned
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local sorted = parseParam(frame.args.sorted, false)
	
	-- separator is a string that is used to separate mutiple returned values
	-- if nothing or an empty string is passed set it to the default
	-- any double-quotes " are stripped out, so that spaces may be passed
	-- e.g. |sep=" - "
	local sepdefault = ", " -- **internationalise later**
	local separator = frame.args.sep or ""
	separator = string.gsub(separator, '"', '')
	if #separator == 0 then
		separator = sepdefault
	end
	
	-- list is a string that may be "", "hlist" or "ubl"
	-- this controls whether multiple values are output as comma-separated
	-- as a horizontal list (hlist) or unbulleted list (ubl)
	local list = frame.args.list or ""
	if list~="hlist" and list~="ubl" then list = "" end
	
	local entity, props
	local success, errorOrEntity = parseInput(frame, input_parm)
	if not success then
		return errorOrEntity
	else
		entity = errorOrEntity
	end
	if entity and entity.claims then
		props = entity.claims[propertyID]
	else
		-- there's no such entity or no claims for the entity
		return input_parm
	end

	-- Make sure it actually has the property requested
	if not props or not props[1] then 
		return input_parm
	end
	
	-- So now we have something to return: deal with (1) a Wikibase-entity; (3) a date; (3) anything else
	local lang = mw.language.getContentLanguage().code
	local thisQid = entity.id
	-- table 'out' is going to to store the return value(s):
	local out = {}
	local icon = " [[File:Blue pencil.svg |frameless |text-top |10px |alt=Edit this on Wikidata |link=https://www.wikidata.org/wiki/" .. thisQid .. "?uselang=" .. lang .. "#" .. propertyID .. "|Edit this on Wikidata]]"
	if props[1].mainsnak.datavalue.type == "wikibase-entityid" then
		-- it's wiki-linked value, so output as link if possible
		for k, v in pairs(props) do
			if (onlysrc == false) or sourced(v) then -- has valid refs or all values required
				local qnumber = "Q" .. v.mainsnak.datavalue.value["numeric-id"]
				local sitelink = mw.wikibase.sitelink(qnumber)
				local label = labelOrId(qnumber)
				if sitelink then
					out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
				else
					-- no sitelink, so check first for a redirect with that label
					local artitle = mw.title.new(label, 0)
					if artitle.id > 0 then
						if artitle.isRedirect then
							-- no sitelink, but there's a redirect with the same title as the label
							-- let's link to that
							out[#out + 1] = "[[" .. label .. "]]"
						else
							-- no sitelink and not a redirect but an article exists with the same title as the label
							-- that's probably a dab page, so output the plain label
							out[#out + 1] = label
						end
					else
						-- no article or redirect with the same title as the label
						if wdl then
							-- show that there's a Wikidata entry available
							out[#out + 1] = "[[:d:Q" .. v.mainsnak.datavalue.value["numeric-id"] .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
						else
							-- no wikidata links required, so just give the plain label
							out[#out + 1] = label
						end
					end
				end
			end
		end
	elseif props[1].mainsnak.datatype == "time" then
		-- it's a date value, so output according to formatting preferences
		for k, v in pairs(props) do
			-- check for references, and count valid references
			if (onlysrc == false) or sourced(v) then
				local timestamp = v.mainsnak.datavalue.value.time
				-- A year can be stored like this: "+1872-00-00T00:00:00Z",
				-- which is processed here as if it were the day before "+1872-01-01T00:00:00Z",
				-- and that's the last day of 1871, so the year is wrong.
				-- So fix the month 0, day 0 timestamp to become 1 January instead:
				timestamp = timestamp:gsub("%-00%-00T", "-01-01T")
				local dateprecision = v.mainsnak.datavalue.value.precision
				local dateformat = "y"
				local fpvdate = tonumber(timestamp:sub(2, 5))
				if timestamp:sub(1, 1) == "-" then
					fpvdate = fpvdate .. " BCE"
				end
				if dateprecision > 9 then -- add month
					fpvdate = i18n.months[tonumber(timestamp:sub(7, 8))] .. " " .. fpvdate
					dateformat = frame.args.df
				end
				if dateprecision > 10 then -- add day
						fpvdate = tonumber(timestamp:sub(10, 11)) .. " " .. fpvdate
				end
				out[#out+1] = format_Date(fpvdate, dateformat, frame.args.bc)
			end
		end
	else
		-- not a linkable article title or a date
		-- this needs to be expanded to cater for multiple values
		local reflist = ""
		local refs = 0
		for k, v in pairs(props) do
			-- check for references,
			-- construct a reference list for debugging
			-- and count valid references
			if v.references then
				for kr, vr in pairs(v.references) do
					local ref = mw.wikibase.renderSnaks(vr.snaks)
					reflist = reflist .. " <span style='color:#0DD;'>" .. ref .. "</span>"
					if not ref:find("Wikipedia") then refs = refs + 1 end
				end
			end
		end
		local propertyValue = entity:formatPropertyValues(propertyID).value
		if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
			out[#out+1] = propertyValue
		end
	end
	
	-- if there's anything to return, then return a list
	-- comma-separated by default, but may be specified by the sep parameter
	-- optionally specify a hlist or 
	if #out > 0 then
		if sorted then table.sort(out) end
		if list == "hlist" then
			return frame:expandTemplate{title = 'Hlist', args = out}
		elseif list == "ubl" then
			return frame:expandTemplate{title = 'Unbulleted list', args = out}
		else
			if noic then
				return table.concat(out, separator)
			else
				return table.concat(out, separator) .. icon
			end
		end
	else
		return nil -- no items had valid reference
	end
end


-------------------------------------------------------------------------------
-- getSourcedValue is used to get a value, or a comma separated list of them if multiple values exist
-- but only values that are sourced are returned
-- redundant to getValue with onlysourced=true but kept for backwards compatibility
-- now defined via getValue
--
p.getSourcedValue = function(frame)
	frame.args.onlysourced = "yes"
	return p.getValue(frame)
end


-------------------------------------------------------------------------------
-- formatDate is a wrapper to export the private function format_Date
p.formatDate = function(frame)
	return format_Date(frame.args[1], frame.args.df, frame.args.bc)
end

-------------------------------------------------------------------------------
-- getCoords is used to get coordinates for display in an infobox
-- whitelist and blacklist are implemented
-- optional 'display' parameter is allowed, defaults to "inline, title"
--
p.getCoords = function(frame)
	local propertyID = "P625"
	local input_parm =  mw.text.trim(frame.args[1] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	
	-- if there is a 'display' parameter supplied, use it
	-- otherwise default to "inline, title"
	local disp = frame.args.display
	if (not disp) or (#disp == 0) then
		disp = "inline, title"
	end
	
	local success, errorOrEntity = parseInput(frame, input_parm)
	if not success then
		return errorOrEntity
	else
		local entity = errorOrEntity
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			local lat_long = {}
			local coords = entity:formatPropertyValues(propertyID).value
			-- the latitude and longitude are returned like this: nn°nn&#39;nn.n&#34;
			-- using html entities with hex values really screws up parsing the numbers - thanks devs
			local lat = mw.ustring.match(coords, "^[^,]*")  -- everything from the start to before the comma
			local long = mw.ustring.match(coords, "[^ ]*$") -- everything from after the space to the end
			lat = lat:gsub("&#%d%d;", ":")                  -- clean out the html entities
			long = long:gsub("&#%d%d;", ":")                -- clean out the html entities
			-- read the latitude numbers into a table
			for num in mw.ustring.gmatch(lat, "%d+%.?%d*") do
  				lat_long[#lat_long + 1] = num
			end
			-- add the N/S
			lat_long[#lat_long + 1] = lat:sub(-1)
			-- read the longitude numbers into a table
			for num in mw.ustring.gmatch(long, "%d+%.?%d*") do
				lat_long[#lat_long + 1] = num
			end
			-- add E/W for long
			lat_long[#lat_long + 1] = long:sub(-1)
			-- add named parameter for display
			lat_long["display"] = disp
			-- invoke template Coord with the values stored in the table
			return frame:expandTemplate{title = 'coord', args = lat_long}
		else
			-- no coords in Wikidata for this article
			return nil
		end
	end
end


-------------------------------------------------------------------------------
-- getQualifierValue is used to get a formatted value of a qualifier
-- 
-- The call needs:	a property (the unnamed parameter or 1=)
-- 					a target value for that property (pval=)
--					a qualifier for that target value (qual=)
-- The usual whitelisting and blacklisting of the property is implemented
-- The boolean onlysourced= parameter can be set to return nothing
-- when the property is unsourced (or only sourced to Wikipedia)
-- 
p.getQualifierValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")

	-- The PropertyID of the target value of the property
	-- whose qualifier is to be returned is passed in named parameter |pval=
	local propvalue = frame.args.pval
	
	-- The PropertyID of the qualifier
	-- whose value is to be returned is passed in named parameter |qual=
	local qualifierID = frame.args.qual

	-- onlysourced is a boolean passed to return qualifiers
	-- only when property values are sourced to something other than Wikipedia
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or 0 is passed set it false
	local onlysrc = parseParam(frame.args.onlysourced,false)
	
	local success, errorOrEntity = parseInput(frame,nil)
	if not success then
		return errorOrEntity
	else
		local entity = errorOrEntity
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			-- Scan through the values of the property
			-- we want something like property is P793, significant event (in propertyID)
			-- whose value is something like Q385378, construction (in propvalue)
			-- then we can return the value(s) of a qualifier such as P580, start time (in qualifierID)
			for k1, v1 in pairs(props) do
				if v1.mainsnak.snaktype == "value" and v1.mainsnak.datavalue.type == "wikibase-entityid" then
					-- It's a wiki-linked value, so check if it's the target (in propvalue)
					-- and if it has qualifiers
					if v1.mainsnak.datavalue.value.id == propvalue and v1.qualifiers then
						if (onlysrc == true) and not sourced(v1) then
							return
						end
						-- if we've got this far, we have a (sourced) claim with qualifiers
						-- which matches the target, so find the value(s) of the qualifier we want
						local quals = v1.qualifiers[qualifierID]
						local out = {}
						if quals then
							if quals[1].datatype == "wikibase-item" then
								for k3, v3 in pairs(quals) do
									local valueID = v3.datavalue.value.id
									local sitelink = mw.wikibase.sitelink(valueID)
									local label = labelOrId(valueID)
									if sitelink then
										out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
									else
										out[#out + 1] = "[[:d:" .. valueID .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
									end
								end
								return table.concat(out, ", ")
							else
								return mw.wikibase.renderSnaks(quals)
							end
								
						end
					end
				end
			end -- of loop through values of propertyID
		end
	end
	return nil
end

-------------------------------------------------------------------------------
-- getLink returns the label for a Qid wiki-linked to the local article (if the article exists)
-- if label doesn't exist, it returns the Qid wiki-linked to the local article (if the article exists)
--
p.getLink = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	local sitelink = mw.wikibase.sitelink(itemID)
	local label = labelOrId(itemID)
	if sitelink then
		return "[[" .. sitelink .. "|" .. label .. "]]"
	else
		return label
	end
end


-------------------------------------------------------------------------------
-- getLabel returns the label for a Qid
-- if label doesn't exist, it returns the Qid
--
p.getLabel = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	return labelOrId(itemID)
end

-------------------------------------------------------------------------------
-- checkBlacklist allows a test to check whether a named field is suppressed
-- {{#if:{{#invoke:WikidataIB |checkBlacklist |name=Joe |suppressfields=Dave; Joe; Fred}} | not blacklisted | blacklisted}}
-- displays "blacklisted"
-- {{#if:{{#invoke:WikidataIB |checkBlacklist |name=Jim |suppressfields=Dave; Joe; Fred}} | not blacklisted | blacklisted}}
-- displays "not blacklisted"
--
p.checkBlacklist = function(frame)
	local blacklist = frame.args.suppressfields
	local fieldname = frame.args.name
	if blacklist and fieldname then
		if blacklist:find(fieldname) then return nil end
		return true
	end
end


return p