Module:WikidataIB

From Random Island Wiki
Revision as of 09:03, 16 May 2017 by >RexxS (internal documentation)
Jump to navigation Jump to search

Documentation for this module may be created at Module:WikidataIB/doc

-- Module to try out use of a blacklist and whitelist for infobox fields
-- can take a named parameter |qid which is the Wikidata ID for the article. This will not normally be used
-- Fields in blacklist are never to be displayed, i.e. module must return nil in all circumstances
-- Fields in whitelist return local value if it exists or the Wikidata value otherwise
-- The name of the field that this function is called from is passed in named parameter |name
-- The name is compulsory when blacklist or whitelist is used, so the module returns nil if it is not supplied
-- blacklist is passed in named parameter |suppressfields
-- whitelist is passed in named parameter |fetchwikidata

local p = {}

local i18n =
{
    ["errors"] =
    {
        ["property-not-found"] = "Property not found.",
        ["entity-not-found"] = "Wikidata entity not found.",
        ["unknown-claim-type"] = "Unknown claim type.",
        ["unknown-entity-type"] = "Unknown entity type.",
        ["qualifier-not-found"] = "Qualifier not found.",
        ["site-not-found"] = "Wikimedia project not found.",
		["unknown-datetime-format"] = "Unknown datetime format.",
		["local-article-not-found"] = "Article is available on Wikidata, but not on Wikipedia"
    },
}

-------------------------------------------------------------------------------
-- Private functions
-------------------------------------------------------------------------------

-- formatDate takes a datetime of the usual format from mw.wikibase.entity:formatPropertyValues
-- like "1 August 30 BCE" as parameter 1 and formats it according to the df (date format) and bc parameters
-- df = ["dmy" / "mdy" / "y"] default will be "dmy"
-- bc = ["BC" / "BCE"] default will be "BCE"
-- first the local version
local format_Date = function(datetime, dateformat, bc)
	local datetime = datetime or "1 August 30 BCE" -- in case of nil value
	-- chop off multiple vales and/or any hours, mins, etc.
	-- keep anything before punctuation - we just want a single date:
	local dateval = string.match( datetime, "[%w ]+")
	
	local dateformat = string.lower(dateformat or "dmy") -- default to dmy
	
	local bc = string.upper(bc or "") -- can't use nil for bc
	-- we only want to accept two possibilities: BC or default to BCE
	if bc=="BC" then
		bc = " BC" -- prepend the space. **internationalise later**
	else
		bc = " BCE"
	end
	
	local postchrist = true -- start by assuming no BCE
	local dateparts = {}
	for word in string.gmatch(dateval, "%w+") do
		if word == "BCE" then -- **internationalise later**
			postchrist = false
		else
			-- we'll keep the parts that are not 'BCE' in a table
			dateparts[#dateparts + 1] =  word
		end
	end
	if postchrist then bc = "" end -- set AD dates to no suffix **internationalise later**
	
	local sep = " " -- separator is nbsp
	local fdate = table.concat(dateparts, " ") -- formatted date defaults to same order as input
	
	-- if we have day month year, check dateformat
	if #dateparts == 3 then
		if dateformat == "y" then
			fdate = dateparts[3]
		elseif dateformat == "mdy" then
			fdate = dateparts[2] .. sep .. dateparts[1] .. "," .. sep .. dateparts[3]
		end
	elseif #dateparts == 2 and dateformat == "y" then
		fdate = dateparts[2]
	end
	
	return fdate .. bc
end

-- now wrap it up to export it
p.formatDate = function(frame)
	return format_Date(frame.args[1], frame.args.df, frame.args.bc)
end

-- parseParam takes a (string) parameter, e.g. from the list of frame arguments,
-- and makes "false", "no", and "0" into the (boolean) false
-- it makes the empty string and nil into the (boolean) value passed as default
-- allowing the parameter to be true or false by default.
local parseParam = function(param, default)
	if param and (#param > 0) then
		param = param:lower();
		if (param == "false") or (param == "no") or (param == "0") then
			return false;
		else
			return true;
		end
	else
		return default;
	end
end

-- The label in a Wikidata item is subject to vulnerabilities 
-- that an attacker might try to exploit.
-- It needs to be 'sanitised' by removing any wikitext before use.
-- If it doesn't exist, just return the id for the item
local labelOrId = function (id)
	local label = mw.wikibase.label(id);
	if label then
		return mw.text.nowiki(label);
	else
		return id
	end
end

-------------------------------------------------------------------------------
-- Public functions
-------------------------------------------------------------------------------

-- getValue is used to get a value, or a comma separated list of them if multiple values exist
--
p.getValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")
	
	-- There may be a local parameter supplied, if it's blank, set it to nil
	local input_parm =  mw.text.trim(frame.args[2] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	
	-- can take a named parameter |qid which is the Wikidata ID for the article.
	-- This will not normally be used because it's an expensive call.
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- The blacklist is passed in named parameter |suppressfields
	local blacklist = frame.args.suppressfields
	
	-- The whitelist is passed in named parameter |fetchwikidata
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field that this function is called from is passed in named parameter |name
	local fieldname = frame.args.name
	
	-- onlysourced is a boolean passed to return only values sourced to other than Wikipedia
	-- if nothing or an empty string is passed set it true
	-- if "false" or "no" or "0" is passed set it false
	local onlysrc = parseParam(frame.args.onlysourced, true);
	
	-- noicon is a boolean passed to suppress the trailing "edit at Wikidata" icon
	-- for use when the value is processed further by the infobox
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local noic = parseParam(frame.args.noicon, false);
	
	-- wdlinks is a boolean passed to enable links to Wikidata when no article exists
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or "0" is passed set it false
	local wdl = parseParam(frame.args.wdlinks, false);
	
	-- sorted is a boolean passed to enable sorting of the values returned
	-- if nothing or an empty string is passed set it false
	-- if "true" or "yes" or "1" is passed set it true; otherwise false
	local sorted = frame.args.sorted
	if sorted and (#sorted > 0) then
		sorted = sorted:lower()
		if (sorted == "true") or (sorted == "yes") or (sorted == "1") then
			sorted = true
		else
			sorted = false
		end
	else 
		sorted = false
	end
	
	if blacklist then
		-- The name is compulsory when blacklist is used, so return nil if it is not supplied
		if not fieldname or (#fieldname == 0) then return nil end
		-- If this field is on the blacklist, then return nil
		if blacklist:find(fieldname) then return nil end
	end
	
	-- If we got this far then we're not on the blacklist
	-- The blacklist overrides any locally supplied parameter as well
	-- If a non-blank input parameter was supplied return it
	if input_parm then return input_parm end
	
	-- Otherwise see if this field is on the whitelist:
	if whitelist and (whitelist == 'ALL' or whitelist:find(fieldname)) then
		local entity = mw.wikibase.getEntityObject(qid)
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			local lang = mw.language.getContentLanguage().code
			local thisQid
			if qid then thisQid = qid else thisQid = entity.id end
			local icon = " [[File:Blue pencil.svg |frameless |text-top |10px |alt=Edit this on Wikidata |link=https://www.wikidata.org/wiki/" .. thisQid .. "?uselang=" .. lang .. "#" .. propertyID .. "|Edit this on Wikidata]]"
			if props[1] and props[1].mainsnak.snaktype == "value" and props[1].mainsnak.datavalue.type == "wikibase-entityid" then
				-- it's wiki-linked value, so output as link if possible
				local out = {}
				for k, v in pairs(props) do
					-- check for references,
					-- construct a reference list for debugging
					-- and count valid references
					local reflist = ""
					local refs = 0
					if v.references then
						for kr, vr in pairs(v.references) do
							local ref = mw.wikibase.renderSnaks(vr.snaks)
							reflist = reflist .. " <span style='color:#0DD;'>" .. ref .. "</span>"
							if not ref:find("Wikipedia") then refs = refs + 1 end
						end
					end
					if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
						local qnumber = "Q" .. v.mainsnak.datavalue.value["numeric-id"]
						local sitelink = mw.wikibase.sitelink(qnumber)
						local label = labelOrId(qnumber)
						if sitelink then
							out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
						else
							-- no sitelink, so check first for a redirect with that label
							local artitle = mw.title.new(label, 0)
							if artitle.id > 0 then
								if artitle.isRedirect then
									-- no sitelink, but there's a redirect with the same title as the label
									-- let's link to that
									out[#out + 1] = "[[" .. label .. "]]"
								else
									-- no sitelink and not a redirect but an article exists with the same title as the label
									-- that's odd, so ignore it for now
								end
							else
								-- no article or redirect with the same title as the label
								if wdl then
									-- show that there's a Wikidata entry available
									out[#out + 1] = "[[:d:Q" .. v.mainsnak.datavalue.value["numeric-id"] .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
								else
									-- no wikidata links required, so just give the plain label
									out[#out + 1] = label
								end
							end
						end
					end
				end
				if #out > 0 then
					if sorted then table.sort(out) end
					if noic then
						return table.concat(out, ", ")
					else
						return table.concat(out, ", ") .. icon
					end
				else
					return nil -- no items had valid reference
				end
			else
				-- not a linkable article title
				-- this needs to be expanded to cater for multiple values
				local reflist = ""
				local refs = 0
				for k, v in pairs(props) do
					-- check for references,
					-- construct a reference list for debugging
					-- and count valid references
					if v.references then
						for kr, vr in pairs(v.references) do
							local ref = mw.wikibase.renderSnaks(vr.snaks)
							reflist = reflist .. " <span style='color:#0DD;'>" .. ref .. "</span>"
							if not ref:find("Wikipedia") then refs = refs + 1 end
						end
					end
				end
				local propertyValue = entity:formatPropertyValues(propertyID).value
				if props[1].mainsnak.datatype == "time" then
					propertyValue = format_Date(propertyValue, frame.args.df, frame.args.bc)
				end
				if (refs > 0) or (onlysrc == false) then -- has valid refs or all values required
					if noic then
						return propertyValue
					else
						return propertyValue .. icon
					end
				else
					return nil
				end
			end
		else
			-- no property stored for this article
			return nil
		end
	else
	-- not on the whitelist so just return what should be a nil input parameter
		return input_parm
	end
end


-------------------------------------------------------------------------------
-- getSourcedValue is used to get a value, or a comma separated list of them if multiple values exist
-- but only values that are sourced are returned
-- redundant to getValue with onlysourced=true but kept for backwards compatibility
-- now defined via getValue
--
p.getSourcedValue = function(frame)
	frame.args.onlysourced = "yes"
	return p.getValue(frame)
end


-------------------------------------------------------------------------------
-- getCoords is used to get coordinates for display in an infobox
-- whitelist and blacklist are implemented
-- optional 'display' parameter is allowed, defaults to "inline, title"
--
p.getCoords = function(frame)
	local propertyID = "P625"
	
	local input_parm =  mw.text.trim(frame.args[1] or "")
	if input_parm and (#input_parm == 0) then input_parm = nil end
	
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- if there is a 'display' parameter supplied, use it
	-- otherwise default to "inline, title"
	local disp = frame.args.display
	if (not disp) or (#disp == 0) then
		disp = "inline, title"
	end
	
	local blacklist = frame.args.suppressfields
	
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field that this function is called from is passed in named parameter |name
	-- it's probably 'coords' but we can't be certain
	local fieldname = frame.args.name
	
	if blacklist then
		if not fieldname or (#fieldname == 0) then return nil end
		if blacklist:find(fieldname) then return nil end
	end
	
	if input_parm then return input_parm end
	
	if whitelist and (whitelist == 'ALL' or whitelist:find(fieldname)) then
		local entity = mw.wikibase.getEntityObject(qid)
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			local lat_long = {}
			local coords = entity:formatPropertyValues(propertyID).value
			-- the latitude and longitude are returned like this: nn°nn&#39;nn.n&#34;
			-- using html entities with hex values really screws up parsing the numbers - thanks devs
			local lat = mw.ustring.match(coords, "^[^,]*")  -- everything from the start to before the comma
			local long = mw.ustring.match(coords, "[^ ]*$") -- everything from after the space to the end
			lat = lat:gsub("&#%d%d;", ":")                  -- clean out the html entities
			long = long:gsub("&#%d%d;", ":")                -- clean out the html entities
			-- read the latitude numbers into a table
			for num in mw.ustring.gmatch(lat, "%d+%.?%d*") do
  				lat_long[#lat_long + 1] = num
			end
			-- add the N/S
			lat_long[#lat_long + 1] = lat:sub(-1)
			-- read the longitude numbers into a table
			for num in mw.ustring.gmatch(long, "%d+%.?%d*") do
				lat_long[#lat_long + 1] = num
			end
			-- add E/W for long
			lat_long[#lat_long + 1] = long:sub(-1)
			-- add named parameter for display
			lat_long["display"] = disp
			-- invoke template Coord with the values stored in the table
			return frame:expandTemplate{title = 'coord', args = lat_long}
		else
			-- no coords in Wikidata for this article
			return nil
		end
	else
	-- not on the whitelist so just return what should be a nil input parameter
		return input_parm
	end
end


-------------------------------------------------------------------------------
-- getQualifierValue is used to get a formatted value of a qualifier
-- 
-- The call needs:	a property (the unnamed parameter or 1=)
-- 					a target value for that property (pval=)
--					a qualifier for that target value (qual=)
-- The usual whitelisting and blacklisting of the property is implemented
-- The boolean onlysourced= parameter can be set to return nothing
-- when the property is unsourced (or only sourced to Wikipedia)
-- 
p.getQualifierValue = function(frame)
	local propertyID = mw.text.trim(frame.args[1] or "")

	-- The PropertyID of the target value of the property
	-- whose qualifier is to be returned is passed in named parameter |pval=
	local propvalue = frame.args.pval
	
	-- The PropertyID of the qualifier
	-- whose value is to be returned is passed in named parameter |qual=
	local qualifierID = frame.args.qual
	
	-- Can take a named parameter |qid which is the Wikidata ID for the article.
	-- This will not normally be used because it's an expensive call.
	local qid = frame.args.qid
	if qid and (#qid == 0) then qid = nil end
	
	-- The blacklist is passed in named parameter |suppressfields=
	local blacklist = frame.args.suppressfields
	
	-- The whitelist is passed in named parameter |fetchwikidata=
	local whitelist = frame.args.fetchwikidata
	
	-- The name of the field to check against the whitelist and blacklist
	-- is passed in named parameter |name
	local fieldname = frame.args.name

	-- onlysourced is a boolean passed to return qualifiers
	-- only when property values are sourced to something other than Wikipedia
	-- if nothing or an empty string is passed set it false
	-- if "false" or "no" or 0 is passed set it false
	local onlysrc = frame.args.onlysourced
	if onlysrc and (#onlysrc > 0) then
		onlysrc = onlysrc:lower()
		if (onlysrc == "false") or (onlysrc == "no") or (onlysrc == 0) then
			onlysrc = false
		else
			onlysrc = true
		end
	else 
		onlysrc = false
	end
	
	if blacklist then
		-- The name is compulsory when blacklist is used, so return nil if it is not supplied
		if not fieldname or (#fieldname == 0) then return nil end
		-- If this field is on the blacklist, then return nil
		if blacklist:find(fieldname) then return nil end
	end
	
	-- If we got this far then we're not on the blacklist
	-- So see if this field is on the whitelist:
	if whitelist and (whitelist == 'ALL' or whitelist:find(fieldname)) then
		local entity = mw.wikibase.getEntityObject(qid)
		local props
		if entity and entity.claims then
			props = entity.claims[propertyID]
		end
		if props then
			-- Scan through the values of the property
			-- we want something like property is P793, significant event (in propertyID)
			-- whose value is something like Q385378, construction (in propvalue)
			-- then we can return the value(s) of a qualifier such as P580, start time (in qualifierID)
			for k1, v1 in pairs(props) do
				if v1.mainsnak.snaktype == "value" and v1.mainsnak.datavalue.type == "wikibase-entityid" then
					-- It's a wiki-linked value, so check if it's the target (in propvalue)
					-- and if it has qualifiers
					if v1.mainsnak.datavalue.value.id == propvalue and v1.qualifiers then
						if v1.references then
							-- count how many refs are sourced
							local numrefs = 0
							for k2, v2 in pairs(v1.references) do
								local ref = mw.wikibase.renderSnaks(v2.snaks)
								if not ref:find("Wikipedia") then numrefs = numrefs + 1 end
							end
							if (numrefs == 0) and (onlysrc == true) then
								-- no sourced refs and sourced is required
								return nil
							end
						else
							if onlysrc == true then
								-- no refs and sourced refs is required
								return nil
							end
						end
						-- if we've got this far, we have a (sourced) claim with qualifiers
						-- which matches the target, so find the value(s) of the qualifier we want
						local quals = v1.qualifiers[qualifierID]
						local out = {}
						if quals then
							if quals[1].datatype == "wikibase-item" then
								for k3, v3 in pairs(quals) do
									local valueID = v3.datavalue.value.id
									local sitelink = mw.wikibase.sitelink(valueID)
									local label = labelOrId(valueID)
									if sitelink then
										out[#out + 1] = "[[" .. sitelink .. "|" .. label .. "]]"
									else
										out[#out + 1] = "[[:d:" .. valueID .. "|" .. label .. "]]&nbsp;<span title='" .. i18n["errors"]["local-article-not-found"] .. "'>[[File:Wikidata-logo.svg|16px|alt=|link=]]</span>"
									end
								end
								return table.concat(out, ", ")
							else
								return mw.wikibase.renderSnaks(quals)
							end
								
						end
					end
				end
			end -- of loop through values of propertyID
		end
	end
	return nil
end

-------------------------------------------------------------------------------
-- getLink returns the label for a Qid wiki-linked to the local article (if the article exists)
-- if label doesn't exist, it returns the Qid wiki-linked to the local article (if the article exists)
--
p.getLink = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	local sitelink = mw.wikibase.sitelink(itemID)
	local label = mw.wikibase.label(itemID)
	if label then
		label = mw.text.nowiki(label)
	else
		label = itemID
	end
	if sitelink then
		return "[[" .. sitelink .. "|" .. label .. "]]"
	else
		return label
	end
end


-------------------------------------------------------------------------------
-- getLabel returns the label for a Qid
-- if label doesn't exist, it returns the Qid
--
p.getLabel = function(frame)
	local itemID = mw.text.trim(frame.args[1] or "")
	if itemID == "" then return end
	return labelOrId(itemID)
end


return p