Module:WikidataIB

From Random Island Wiki
Revision as of 11:56, 16 May 2017 by >RexxS (large overhaul to sort out problems caused by having multiple dates, some of which may not be referenced)
Jump to navigation Jump to search

Documentation for this module may be created at Module:WikidataIB/doc

-- Module to try out use of a blacklist and whitelist for infobox fields
-- can take a named parameter |qid which is the Wikidata ID for the article. This will not normally be used
-- Fields in blacklist are never to be displayed, i.e. module must return nil in all circumstances
-- Fields in whitelist return local value if it exists or the Wikidata value otherwise
-- The name of the field that this function is called from is passed in named parameter |name
-- The name is compulsory when blacklist or whitelist is used, so the module returns nil if it is not supplied
-- blacklist is passed in named parameter |suppressfields
-- whitelist is passed in named parameter |fetchwikidata

local p = {}

local i18n =
{
    ["errors"] =
    {
        ["property-not-found"] = "Property not found.",
        ["entity-not-found"] = "Wikidata entity not found.",
        ["unknown-claim-type"] = "Unknown claim type.",
        ["unknown-entity-type"] = "Unknown entity type.",
        ["qualifier-not-found"] = "Qualifier not found.",
        ["site-not-found"] = "Wikimedia project not found.",
		["unknown-datetime-format"] = "Unknown datetime format.",
		["local-article-not-found"] = "Article is available on Wikidata, but not on Wikipedia"
    },
    ["months"] =
    {
    	"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"
    }
}

-------------------------------------------------------------------------------
-- Private functions
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
-- formatDate takes a datetime of the usual format from mw.wikibase.entity:formatPropertyValues
-- like "1 August 30 BCE" as parameter 1 and formats it according to the df (date format) and bc parameters
-- df = ["dmy" / "mdy" / "y"] default will be "dmy"
-- bc = ["BC" / "BCE"] default will be "BCE"
-- first the local version
local format_Date = function(datetime, dateformat, bc)
	local datetime = datetime or "1 August 30 BCE" -- in case of nil value
	-- chop off multiple vales and/or any hours, mins, etc.
	-- keep anything before punctuation - we just want a single date:
	local dateval = string.match( datetime, "[%w ]+")
	
	local dateformat = string.lower(dateformat or "dmy") -- default to dmy
	
	local bc = string.upper(bc or "") -- can't use nil for bc
	-- we only want to accept two possibilities: BC or default to BCE
	if bc=="BC" then
		bc = " BC" -- prepend the space. **internationalise later**
	else
		bc = " BCE"
	end
	
	local postchrist = true -- start by assuming no BCE
	local dateparts = {}
	for word in string.gmatch(dateval, "%w+") do
		if word == "BCE" or word == "BC" then -- **internationalise later**
			postchrist = false
		else
			-- we'll keep the parts that are not 'BCE' in a table
			dateparts[#dateparts + 1] =  word
		end
	end
	if postchrist then bc = "" end -- set AD dates to no suffix **internationalise later**
	
	local sep = " " -- separator is nbsp
	local fdate = table.concat(dateparts, " ") -- formatted date defaults to same order as input
	
	-- if we have day month year, check dateformat
	if #dateparts == 3 then
		if dateformat == "y" then
			fdate = dateparts[3]
		elseif dateformat == "mdy" then
			fdate = dateparts[2] .. sep .. dateparts[1] .. "," .. sep .. dateparts[3]
		end
	elseif #dateparts == 2 and dateformat == "y" then
		fdate = dateparts[2]
	end
	
	return fdate .. bc
end


-------------------------------------------------------------------------------
-- parseParam takes a (string) parameter, e.g. from the list of frame arguments,
-- and makes "false", "no", and "0" into the (boolean) false
-- it makes the empty string and nil into the (boolean) value passed as default
-- allowing the parameter to be true or false by default.
local parseParam = function(param, default)
	if param and (#param > 0) then
		param = param:lower();
		if (param == "false") or (param == "no") or (param == "0") then
			return false;
		else
			return true;
		end
	else
		return default;
	end
end

-------------------------------------------------------------------------------
-- The label in a Wikidata item is subject to vulnerabilities 
-- that an attacker might try to exploit.
-- It needs to be 'sanitised' by removing any wikitext before use.
-- If it doesn't exist, just return the id for the item
local labelOrId = function (id)
	local label = mw.wikibase.label(id);
	if label then
		return mw.text.nowiki(label);
	else
		return id
	end
end

-------------------------------------------------------------------------------
-- Public functions
-------------------------------------------------------------------------------

-------------------------------------------------------------------------------
-- getValue is used to get a value, or a comma separated list of them if multiple values exist
--
p.getValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	
	-- There may be a local parameter supplied, if it's blank, set it to nil
	local input_parm =  mw.text.trim(frame.args[2] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	
	-- can take a named parameter |qid which is the Wikidata ID for the article.
	-- This will not normally be used because it's an expensive call.
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- The blacklist is passed in named parameter |suppressfields
	local blacklist = frame.args.suppressfields
	
	-- The whitelist is passed in named parameter |fetchwikidata
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field that this function is called from is passed in named parameter |name
	local fieldname = frame.args.name
	
	-- onlysourced is a boolean passed to return only values sourced to other than Wikipedia
	-- if nothing or an empty string is passed set it true
	-- if "false" or "no" or "0" is passed set it false
	local onlysrc = parseParam(frame.args.onlysourced, true);
	
	-- noicon is a boolean passed to suppress the trailing "edit at Wikidata" icon
	-- for use when the value is processed further by the infobox
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local noic = parseParam(frame.args.noicon, false);
	
	-- wdlinks is a boolean passed to enable links to Wikidata when no article exists
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local wdl = parseParam(frame.args.wdlinks, false);
	
	-- sorted is a boolean passed to enable sorting of the values returned
	-- if nothing or an empty string is passed set it false
	-- if "true" or "yes" or "1" is passed set it true; otherwise false
	local sorted = frame.args.sorted
	if sorted and (#sorted > 0) then
		sorted = sorted:lower()
		if (sorted == "true") or (sorted == "yes") or (sorted == "1") then
			sorted = true
		else
			sorted = false
		end
	else 
		sorted = false
	end
	
	if blacklist then
		-- The name is compulsory when blacklist is used, so return nil if it is not supplied
		if not fieldname or (#fieldname == 0) then return nil end
		-- If this field is on the blacklist, then return nil
		if blacklist:find(fieldname) then return nil end
	end
	
	-- If we got this far then we're not on the blacklist
	-- The blacklist overrides any locally supplied parameter as well
	-- If a non-blank input parameter was supplied return it
	if input_parm then return input_parm end
	
	-- Otherwise see if this field is on the whitelist:
	if not (whitelist and (whitelist == 'ALL' or whitelist:find(fieldname))) then
		-- not on the whitelist so just return what should be a nil input parameter
		return input_parm
	end
	
	-- See what's on Wikidata:
	local entity = mw.wikibase.getEntityObject(qid)
	local props
	if entity and entity.claims then
		props = entity.claims[propertyID]
	else
		-- there's no such entity or no claims for the entity
		return input_parm
	end

	-- Make sure it actually has the property requested
	if not props or not props[1] then 
		return input_parm
	end
	
	-- So now we have something to return: deal with (1) a Wikibase-entity; (3) a date; (3) anything else
	local lang = mw.language.getContentLanguage().code
	local thisQid = entity.id
	-- table 'out' is going to to store the return value(s):
	local out = {}
	local icon = " [[File:Blue pencil.svg |frameless |text-top |10px |alt=Edit this on Wikidata |link=https://www.wikidata.org/wiki/" .. thisQid .. "?uselang=" .. lang .. "#" .. propertyID .. "|Edit this on Wikidata]]"
	if props[1].mainsnak.datavalue.type == "wikibase-entityid" then
		-- it's wiki-linked value, so output as link if possible
		for k, v in pairs(props) do
			-- check for references, and count valid references
			local refs = 0
			if v.references then
				for kr, vr in pairs(v.references) do
					local ref = mw.wikibase.renderSnaks(vr.snaks)
					if not ref:find("Wikipedia") then refs = refs + 1 end
				end
			end
			if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
				local qnumber = "Q" .. v.mainsnak.datavalue.value["numeric-id"]
				local sitelink = mw.wikibase.sitelink(qnumber)
				local label = labelOrId(qnumber)
				if sitelink then
					out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
				else
					-- no sitelink, so check first for a redirect with that label
					local artitle = mw.title.new(label, 0)
					if artitle.id > 0 then
						if artitle.isRedirect then
							-- no sitelink, but there's a redirect with the same title as the label
							-- let's link to that
							out[#out + 1] = "[[" .. label .. "]]"
						else
							-- no sitelink and not a redirect but an article exists with the same title as the label
							-- that's odd, so ignore it for now
						end
					else
						-- no article or redirect with the same title as the label
						if wdl then
							-- show that there's a Wikidata entry available
							out[#out + 1] = "[[:d:Q" .. v.mainsnak.datavalue.value["numeric-id"] .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
						else
							-- no wikidata links required, so just give the plain label
							out[#out + 1] = label
						end
					end
				end
			end
		end
	elseif props[1].mainsnak.datatype == "time" then
		-- it's a date value, so output according to formatting preferences
		for k, v in pairs(props) do
			-- check for references, and count valid references
			local refs = 0
			if v.references then
				for kr, vr in pairs(v.references) do
					local ref = mw.wikibase.renderSnaks(vr.snaks)
					if not ref:find("Wikipedia") then refs = refs + 1 end
				end
			end
			if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
				local timestamp = v.mainsnak.datavalue.value.time
				-- A year can be stored like this: "+1872-00-00T00:00:00Z",
				-- which is processed here as if it were the day before "+1872-01-01T00:00:00Z",
				-- and that's the last day of 1871, so the year is wrong.
				-- So fix the month 0, day 0 timestamp to become 1 January instead:
				timestamp = timestamp:gsub("%-00%-00T", "-01-01T")
				local dateprecision = v.mainsnak.datavalue.value.precision
				if dateprecision > 0 then -- we'll deal with just years separately
					-- construct the sort of date that formatPropertyValues returns
					local d = tonumber(timestamp:sub(10, 11))
					local m = tonumber(timestamp:sub(7, 8))
					local y = tonumber(timestamp:sub(2, 5))
					local fpvdate = d .. " " .. i18n.months[m] .. " " .. y
					if timestamp:sub(1, 1) == "-" then
						fpvdate = fpvdate .. " BCE"
					end
					out[#out+1] = format_Date(fpvdate, frame.args.df, frame.args.bc)
				else
					local formated_date = ""
				end
			end
		end
	else
		-- not a linkable article title or a date
		-- this needs to be expanded to cater for multiple values
		local reflist = ""
		local refs = 0
		for k, v in pairs(props) do
			-- check for references,
			-- construct a reference list for debugging
			-- and count valid references
			if v.references then
				for kr, vr in pairs(v.references) do
					local ref = mw.wikibase.renderSnaks(vr.snaks)
					reflist = reflist .. " <span style='color:#0DD;'>" .. ref .. "</span>"
					if not ref:find("Wikipedia") then refs = refs + 1 end
				end
			end
		end
		local propertyValue = entity:formatPropertyValues(propertyID).value
		if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
			if noic then
				out[#out+1] = propertyValue
			else
				out[#out+1] = propertyValue .. icon
			end
		end
	end
	
	-- if there's anything to return, then return a comma-separated list
	if #out > 0 then
		if sorted then table.sort(out) end
		if noic then
			return table.concat(out, ", ")
		else
			return table.concat(out, ", ") .. icon
		end
	else
		return nil -- no items had valid reference
	end
end


-------------------------------------------------------------------------------
-- getSourcedValue is used to get a value, or a comma separated list of them if multiple values exist
-- but only values that are sourced are returned
-- redundant to getValue with onlysourced=true but kept for backwards compatibility
-- now defined via getValue
--
p.getSourcedValue = function(frame)
	frame.args.onlysourced = "yes"
	return p.getValue(frame)
end


-------------------------------------------------------------------------------
-- formatDate is a wrapper to export the private function format_Date
p.formatDate = function(frame)
	return format_Date(frame.args[1], frame.args.df, frame.args.bc)
end

-------------------------------------------------------------------------------
-- getCoords is used to get coordinates for display in an infobox
-- whitelist and blacklist are implemented
-- optional 'display' parameter is allowed, defaults to "inline, title"
--
p.getCoords = function(frame)
	local propertyID = "P625"
	
	local input_parm =  mw.text.trim(frame.args[1] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- if there is a 'display' parameter supplied, use it
	-- otherwise default to "inline, title"
	local disp = frame.args.display
	if (not disp) or (#disp == 0) then
		disp = "inline, title"
	end
	
	local blacklist = frame.args.suppressfields
	
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field that this function is called from is passed in named parameter |name
	-- it's probably 'coords' but we can't be certain
	local fieldname = frame.args.name
	
	if blacklist then
		if not fieldname or (#fieldname == 0) then return nil end
		if blacklist:find(fieldname) then return nil end
	end
	
	if input_parm then return input_parm end
	
	if whitelist and (whitelist == 'ALL' or whitelist:find(fieldname)) then
		local entity = mw.wikibase.getEntityObject(qid)
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			local lat_long = {}
			local coords = entity:formatPropertyValues(propertyID).value
			-- the latitude and longitude are returned like this: nn°nn&#39;nn.n&#34;
			-- using html entities with hex values really screws up parsing the numbers - thanks devs
			local lat = mw.ustring.match(coords, "^[^,]*")  -- everything from the start to before the comma
			local long = mw.ustring.match(coords, "[^ ]*$") -- everything from after the space to the end
			lat = lat:gsub("&#%d%d;", ":")                  -- clean out the html entities
			long = long:gsub("&#%d%d;", ":")                -- clean out the html entities
			-- read the latitude numbers into a table
			for num in mw.ustring.gmatch(lat, "%d+%.?%d*") do
  				lat_long[#lat_long + 1] = num
			end
			-- add the N/S
			lat_long[#lat_long + 1] = lat:sub(-1)
			-- read the longitude numbers into a table
			for num in mw.ustring.gmatch(long, "%d+%.?%d*") do
				lat_long[#lat_long + 1] = num
			end
			-- add E/W for long
			lat_long[#lat_long + 1] = long:sub(-1)
			-- add named parameter for display
			lat_long["display"] = disp
			-- invoke template Coord with the values stored in the table
			return frame:expandTemplate{title = 'coord', args = lat_long}
		else
			-- no coords in Wikidata for this article
			return nil
		end
	else
	-- not on the whitelist so just return what should be a nil input parameter
		return input_parm
	end
end


-------------------------------------------------------------------------------
-- getQualifierValue is used to get a formatted value of a qualifier
-- 
-- The call needs:	a property (the unnamed parameter or 1=)
-- 					a target value for that property (pval=)
--					a qualifier for that target value (qual=)
-- The usual whitelisting and blacklisting of the property is implemented
-- The boolean onlysourced= parameter can be set to return nothing
-- when the property is unsourced (or only sourced to Wikipedia)
-- 
p.getQualifierValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")

	-- The PropertyID of the target value of the property
	-- whose qualifier is to be returned is passed in named parameter |pval=
	local propvalue = frame.args.pval
	
	-- The PropertyID of the qualifier
	-- whose value is to be returned is passed in named parameter |qual=
	local qualifierID = frame.args.qual
	
	-- Can take a named parameter |qid which is the Wikidata ID for the article.
	-- This will not normally be used because it's an expensive call.
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- The blacklist is passed in named parameter |suppressfields=
	local blacklist = frame.args.suppressfields
	
	-- The whitelist is passed in named parameter |fetchwikidata=
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field to check against the whitelist and blacklist
	-- is passed in named parameter |name
	local fieldname = frame.args.name

	-- onlysourced is a boolean passed to return qualifiers
	-- only when property values are sourced to something other than Wikipedia
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or 0 is passed set it false
	local onlysrc = frame.args.onlysourced
	if onlysrc and (#onlysrc > 0) then
		onlysrc = onlysrc:lower()
		if (onlysrc == "false") or (onlysrc == "no") or (onlysrc == 0) then
			onlysrc = false
		else
			onlysrc = true
		end
	else 
		onlysrc = false
	end
	
	if blacklist then
		-- The name is compulsory when blacklist is used, so return nil if it is not supplied
		if not fieldname or (#fieldname == 0) then return nil end
		-- If this field is on the blacklist, then return nil
		if blacklist:find(fieldname) then return nil end
	end
	
	-- If we got this far then we're not on the blacklist
	-- So see if this field is on the whitelist:
	if whitelist and (whitelist == 'ALL' or whitelist:find(fieldname)) then
		local entity = mw.wikibase.getEntityObject(qid)
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			-- Scan through the values of the property
			-- we want something like property is P793, significant event (in propertyID)
			-- whose value is something like Q385378, construction (in propvalue)
			-- then we can return the value(s) of a qualifier such as P580, start time (in qualifierID)
			for k1, v1 in pairs(props) do
				if v1.mainsnak.snaktype == "value" and v1.mainsnak.datavalue.type == "wikibase-entityid" then
					-- It's a wiki-linked value, so check if it's the target (in propvalue)
					-- and if it has qualifiers
					if v1.mainsnak.datavalue.value.id == propvalue and v1.qualifiers then
						if v1.references then
							-- count how many refs are sourced
							local numrefs = 0
							for k2, v2 in pairs(v1.references) do
								local ref = mw.wikibase.renderSnaks(v2.snaks)
								if not ref:find("Wikipedia") then numrefs = numrefs + 1 end
							end
							if (numrefs == 0) and (onlysrc == true) then
								-- no sourced refs and sourced is required
								return nil
							end
						else
							if onlysrc == true then
								-- no refs and sourced refs is required
								return nil
							end
						end
						-- if we've got this far, we have a (sourced) claim with qualifiers
						-- which matches the target, so find the value(s) of the qualifier we want
						local quals = v1.qualifiers[qualifierID]
						local out = {}
						if quals then
							if quals[1].datatype == "wikibase-item" then
								for k3, v3 in pairs(quals) do
									local valueID = v3.datavalue.value.id
									local sitelink = mw.wikibase.sitelink(valueID)
									local label = labelOrId(valueID)
									if sitelink then
										out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
									else
										out[#out + 1] = "[[:d:" .. valueID .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
									end
								end
								return table.concat(out, ", ")
							else
								return mw.wikibase.renderSnaks(quals)
							end
								
						end
					end
				end
			end -- of loop through values of propertyID
		end
	end
	return nil
end

-------------------------------------------------------------------------------
-- getLink returns the label for a Qid wiki-linked to the local article (if the article exists)
-- if label doesn't exist, it returns the Qid wiki-linked to the local article (if the article exists)
--
p.getLink = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	local sitelink = mw.wikibase.sitelink(itemID)
	local label = mw.wikibase.label(itemID)
	if label then
		label = mw.text.nowiki(label)
	else
		label = itemID
	end
	if sitelink then
		return "[[" .. sitelink .. "|" .. label .. "]]"
	else
		return label
	end
end


-------------------------------------------------------------------------------
-- getLabel returns the label for a Qid
-- if label doesn't exist, it returns the Qid
--
p.getLabel = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	return labelOrId(itemID)
end


return p