Moduł:Vespertviro10000/parseEO

Z Wikisłownika – wolnego słownika wielojęzycznego

Dokumentacja dla tego modułu może zostać utworzona pod nazwą Moduł:Vespertviro10000/parseEO/opis

local itemSeparator = true
local itemNotSeparator = false

local withExamplesCheck = true
local withoutExamplesCheck = false

local withUsageCheck = true
local withoutUsageCheck = false

local w = {
	pagesCache = {},
	targetsCache = {},
	
	endings = {
		a = { 'an', 'aj', 'ajn' },
		o = { 'on', 'oj', 'ojn' },
		i = { 'as', 'is', 'os', 'us', 'u', 'anta', 'inta', 'onta', 'ante', 'inte', 'onte', 'ata', 'ita', 'ota' },
		e = { 'en' }
	}
}

local eoInWordCharacters = {
	"A", "B", "C", "Ĉ", "D", "E", "F", "G", "Ĝ", "H", "Ĥ", "I", "J", "Ĵ", "K", "L", "M", "N", "O", "P", "R", "S", "Ŝ", "T", "U", "Ŭ", "V", "Z",
	"a", "b", "c", "ĉ", "d", "e", "f", "g", "ĝ", "h", "ĥ", "i", "j", "ĵ", "k", "l", "m", "n", "o", "p", "r", "s", "ŝ", "t", "u", "ŭ", "v", "z",
	"0", "1", "2", "3", "4", "5", "6", "7", "8", "9",
	"-"
}

function w.getPageContent(pageName)
	local pageContent = w.pagesCache[pageName]
	if pageContent == nil then
		local title=mw.title.makeTitle( "", pageName )
		if title ~= nil then
			pageContent = title:getContent()
		end
		if pageContent == nil then
			pageContent = ''
		end
		w.pagesCache[pageName] = pageContent
	end
	return pageContent
end

function w.existsInEO(pageName)
	local pageContent = w.getPageContent(pageName)
	if w.find( pageContent, '{{esperanto}}' ) ~= nil then
		return true
	end
	return false
end

function w.find( mainString, subString )
	return mw.ustring.find( mainString, subString, 1, true )
end

function w.findSeparators( mainString )
	local separators = mainString
	for _, value in pairs(eoInWordCharacters) do
		separators = table.concat(mw.text.split( separators, value, true ))
	end
	local uniqueSeparators = ''
	for i = 1, mw.ustring.len(separators) do
    	local c = mw.ustring.sub(separators,i,i)
    	if w.find( uniqueSeparators, c ) == nil then
    		uniqueSeparators = uniqueSeparators .. c
    	end
	end
	return uniqueSeparators
end

function w.getCharacterType( character , separators)
   	local itemType = itemNotSeparator
    if w.find( separators , character ) ~= nil then
    	itemType = itemSeparator
    end
    return itemType
end

function w.splitIntoItems( mainString, separators )
	local items = {}
	for i = 1, mw.ustring.len(mainString) do
    	local c = mw.ustring.sub(mainString, i, i)
    	local cType = w.getCharacterType( c , separators )
  		local item = { content = c , contentType = cType }
  		local lastItemType = nil
  		if (i > 1) then
    		local lastItem = items[ #items ]
    		lastItemType = lastItem.contentType
    		if cType == lastItemType then
	    		table.remove(items)
	    		item.content = lastItem.content .. item.content
    		end
  		end
   		table.insert(items, item)
	end
	return items
end

function w.tryInEO( target )
	if w.existsInEO(target) then
		return target
	end
	local lowTarget = mw.ustring.lower( mw.ustring.sub( target, 1, 1 ) ) .. mw.ustring.sub( target , 2 )
	if w.existsInEO(lowTarget) then
		return lowTarget
	end
	return nil
end

function w.applyEndings( word )
	for targetEnding, testEndings in pairs( w.endings ) do
		for _, testEnding in pairs( testEndings ) do
			if mw.ustring.len(word) > mw.ustring.len(testEnding) then
				local mainPart = mw.ustring.sub( word, 1 , mw.ustring.len(word) - mw.ustring.len(testEnding) )
				local oldEnding = mw.ustring.sub( word, mw.ustring.len(mainPart) + 1 )
				if testEnding == oldEnding then
					return mainPart .. targetEnding
				end
			end
		end
	end
	return word
end

function w.findTarget( label )
	local target = w.tryInEO( label )
	if target ~= nil then
		return target
	end
	local possibleTarget = w.applyEndings( label )
	target = w.tryInEO( possibleTarget )
	if target ~= nil then
		return target
	end
	return possibleTarget
end

function w.findEnding( item )
	item.ending = ''
	if w.find( item.content , item.target ) == 1 then
		item.ending = mw.ustring.sub( item.content, mw.ustring.len(item.target) + 1 )
		item.content = item.target
	end
	return item
end

function w.parseItems( items )
	local parsedItems = {}
	for _, item in pairs( items ) do
		if item.contentType == itemNotSeparator then
			local cachedItem = w.targetsCache[ item.content ]
			if cachedItem == nil then
				item.target = w.findTarget( item.content )
				item = w.findEnding( item )
				item.used = 1
				w.targetsCache[ item.content ] = item
			else
				cachedItem.used = cachedItem.used + 1
				w.targetsCache[ item.content ] = cachedItem
			end
			item = w.targetsCache[ item.content ]
		end
		table.insert(parsedItems, item)
	end
	return parsedItems
end

function w.between( s , s1 , s2 )
	local f = w.find( s , s1 )
	if f == nil then
		return ''
	end
	local newS = mw.ustring.sub( s , f + mw.ustring.len(s1) )
	f = w.find( newS , s2 )
	if f == nil then
		return ''
	end
	return mw.ustring.sub( newS , 1, f - 1 )
end

function w.replace( s , s1 , s2 )
	return table.concat(mw.text.split( s, s1, true ), s2)
end

function w.countMeaningsAndExamples( target )
	-- todo regexp
	local c = {}
	c.meanings = 0
	c.examples = 0
	if not w.existsInEO( target ) then
		return c
	end
	local content = w.getPageContent( target )
	content = w.between( content , "{{esperanto}}", "{{źródła}}" )
	content = w.replace( content , '\n: (1.1)\n' , '\n' )
	content = w.replace( content , '\n: (' , '\n:(x' )
	local meanings = mw.text.split( w.between( content , "{{znaczenia}}", "{{odmiana}}" ) , '\n:(', true )
	local meaningsCounter = {}
	for _,line in pairs(meanings) do
		local n = w.between( line , 'x' , ')' )
		if n ~= '' then
			table.insert( meaningsCounter, n )
		end
	end
	c.meanings = #meaningsCounter
	--
	local examples = mw.text.split( w.between( content , "{{przykłady}}", "{{składnia}}" ) , '\n:(', true )
	local examplesCounter = {}
	for _,line in pairs(examples) do
		local n = w.between( line , 'x' , ')' )
		if n ~= '' then
			local present = false
			for __,item in pairs(examplesCounter) do
				present = present or ( item == n )
			end
			if not present then
				table.insert( examplesCounter, n )
			end
		end
	end
	c.examples = #examplesCounter
	return c
end

function w.itemToWikiText( item , examplesMode , usageMode )
	out = '[['
	if item.content ~= item.target then
		out = out .. item.target .. '|'
	end
	out = out .. item.content .. ']]' .. item.ending
	if examplesMode == withExamplesCheck and w.existsInEO(item.target) then
		local count = w.countMeaningsAndExamples( item.target ) 
		if count.meanings > count.examples then
			out = out .. '<sup style="color:green;">' .. count.examples .. '/' .. count.meanings .. '</sup>'
		end
	end
	if usageMode == withUsageCheck then
		local used = w.targetsCache[item.content].used
		if used > 1 then
			out = out .. '<sub style="color:gray;">' .. used .. '</sub>'
		end
	end
	return out
end

function w.itemsToText( items , examplesMode , usageMode )
	local out = ''
	for _, item in pairs( items ) do
		if item.contentType == itemSeparator then
			out = out .. item.content
		else
			out = out .. w.itemToWikiText( item , examplesMode , usageMode )
		end
	end
	return out
end

function w.parseEO( frame )
	local pageName = frame.args[ 1 ] or ''
	local content = pageName
	local title=mw.title.makeTitle( "", pageName )
	if title ~= nil then
		content = title:getContent()
		if content == nil then
			content = pageName
		end
	end
	local separators = w.findSeparators( content )
	local items = w.splitIntoItems( content , separators )
	local parsedItems = w.parseItems( items )
	local outFull = w.itemsToText( parsedItems , withExamplesCheck , withUsageCheck )
	local outSimple = w.itemsToText( parsedItems , withoutExamplesCheck , withoutUsageCheck )

    return '\n<hr><small>org.</small>\n' .. content .. '\n\n<hr><small>wikilinks</small>\n' .. outFull .. '\n\n<hr><small>wikitext</small>\n' .. frame:callParserFunction( '#tag', 'nowiki', outSimple ) .. '\n\n<hr>\n'
end

return w