Jump to content

Module:User:Sarri.greek/grk-stems/data

From Wiktionary, the free dictionary
-- tests at [[Module talk:User:Sarri.greek/grk-stems]]
--[=[
-- Explanations, at bottom of this page.
-- See everything veryyyy big, zoom your browser 200% or see at [[wikt:en:User:Sarri.greek/fonts#big]]
-- check codes at  [[wikt:en:Module:grc-utilities]] & [[wikt:en:Module:grc-utilities/data]]
INSTRUCTIONS
	Load this module using require(), not using mw.loadData().
	USE e.g.: local m_data = require("Module:XXX/data")
		IF: local module_path = 'Module:Yyyy'
		USE: local m_data = require(module_path .."/data")
	DO NOT USE: local m_data = mw.loadData("XXX")
	HOW to call it:
		m_data.xxxxxx e.g. m_data.unaccented_to_accented

CONTENTS
	a) simple sequences
	b) conversions
	 accented_to_unaccented // unaccented_to_oxia // perispomeni_to_oxia // oxia_to_perispomeni
	c) diphthongs and digraphs (2-vowel-sequences)
	 digraphs // digraphs_accent_back // digraphs_accented_to_unaccented

PROBLEMS SOLVED
UNORTHODOX characters: They exist only in some old editions, where whole words are in capitals, retaining their diacritics
	= Principle: when you do not use unicode at one part, then do not use unicode to the other part either
	= write those at a .txt, show it at .htm and copypaste
* CAPITAL+diaeresis+tonos. 
	= copy the capital.with.diaersis (as one character) & copy next to it the invisible tonos
	= the tonos unicodes are invisible
	Example: IOTA.with.diaeresis+tonos as Ϊ + the invisible oxeia  - ◌́ (U+0301)
* CAPITAL+prosdiegrammeno iota+tonos
	Example Άͅ ALPHA.prosdiegrammene+tonos as ᾼ + the invisible oxia (U+030 & # x 0 3 0 1 ;) write at .txt, show it at .htm and copy
PROBLEMS cf ??
* FORBID all family-fonts that present the accent tonos or oxia as a small vertical line. E.g. Verdana.
	in case a reader has a personal css with such a font. Can this be controlled?
* Do it with that U? unicode? Show how it is written.
* For dichronon_oxia I do not know how to write all prosodies.
]=]--

local export = {}

-- NEED: FORBID all family-fonts that present the accent tonos or oxia as a small vertical line.

--------------------------------------------------------------------------
--                         a) SIMPLE SEQUENCES                          --
--------------------------------------------------------------------------
--?? DO i need all UNORTHODOX in here? bahh


-- vowel+perispomeni (circumflex)
-- These are always macra (macron), no need for prosody marks 
	-- see big [[wikt:en:User:Sarri.greek/fonts#vowel+perispomeni]]
export.vowel_perispomeni = '[ᾶᾷἆᾆἇᾇῆῇἦᾖἧᾗῖἶἷῗῦὖὗῧῶῷὦᾦὧᾧἎᾎἏᾏἮᾞἯᾟἾἿὟὮᾮὯᾯ]'

-- brachy(short)+oxia (oxia [[acute]], or baria [[grave]] all these accents called tonos
-- There are no prosody marks.
export.brachy_oxia = '[έὲἔἒἕἓόὸὄὂὅὃΈῈἜἚἝἛΌῸὌὊὍὋ]'

-- macron(long)+oxia (oxia [[acute]], or baria [[grave]] all these accents called tonos
-- There are no prosody marks.
export.macron_oxia = '[ήῄὴῂἤᾔἢᾒἥᾕἣᾓώῴὼῲὤᾤὢᾢὥᾥὣᾣΉῊἬᾜἪᾚᾚἭᾝἫᾛῺὬᾬὪᾪὭᾭὫᾫ]'

-- diphthong (2 vowels together) + any tonos (okseia, bareia, perispomene) 
-- NOT dialytics ΐῒῗΰῢῧ
-- These are always macra (macron), no need for prosody marks
export.diphthong_tonos = '[αΑεΕηΗοΟ][ίὶῖἴἲἶἵἳἷΊῚἼἺἾἽἻἿύὺῦὔὒὖὑὕὓὗΎῪὝὛὟ]' 
		-- ΝΟΤ ALL of them [ίὶῖἰἴἲἶἱἵἳἷϊΐῒῗῐῑΙΊῚἸἼἺἾἹἽἻἿῙῘυύὺῦὐὔὒὖὑὕὓὗϋΰῢῧῠῡΥΎῪὙὝὛὟῩῨ]') -- 

-- ?? Do I NEED to write IN the function the ones with prosodies?
--[=[
-- The 3 ambiguous dichrona (dichronon = with 2 possible prosodies) are α ι υ
-- Here, we also need the characters with BOTH PROSODIES
short alpha+tonos	ᾰ̓́  -  Ᾰ̓́  -  ᾰ̔́  -  Ᾰ̔́ 	iota	upsilon	 copypaste from a .txt
long alpha+tonos	..	iota	upsilon	 copypaste from a .txt
]=]--
-- dichronon+oxia (oxia [[acute]], or baria [[grave]] all these accents called tonos
export.dichr_oxia = '[άᾴὰᾲἄᾄἂᾂἅᾅἃᾃίὶἴἲἵἳΐῒύὺὔὒὕὓΰῢΆᾺἌᾌἊᾊᾊἍᾍἋᾋΊῚἼἺἽἻΎῪὝὛ]'

-- all vowels+oxia or baria, or perispomeni (any kind of tonos accent)
export.tonos = '[ᾶᾷἆᾆἇᾇῆῇἦᾖἧᾗῖἶἷῗῦὖὗῧῶῷὦᾦὧᾧἎᾎἏᾏἮᾞἯᾟἾἿὟὮᾮὯᾯέὲἔἒἕἓόὸὄὂὅὃΈῈἜἚἝἛΌῸὌὊὍὋήῄὴῂἤᾔἢᾒἥᾕἣᾓώῴὼῲὤᾤὢᾢὥᾥὣᾣΉῊἬᾜἪᾚᾚἭᾝἫᾛῺὬᾬὪᾪὭᾭὫᾫάᾴὰᾲἄᾄἂᾂἅᾅἃᾃίὶἴἲἵἳΐῒύὺὔὒὕὓΰῢΆᾺἌᾌἊᾊᾊἍᾍἋᾋΊῚἼἺἽἻΎῪὝὛ]'


--------------------------------------------------------------------------
--           b) CONVERSIONS (change the characters)                     --
--------------------------------------------------------------------------
-- to see them, zoom in 170% or 200%

--------------------------------------------------------------------------
-- ? please write notes for unicodes or whatever code too
-- remove accent from accented
export.accented_to_unaccented = {
-- alpha ambiguous dichrononon -- do I need +prosodies here?
-- α no spirits
   ['ά'] = 'α',
   ['Ά'] = 'Α',
   ['ᾴ'] = 'ᾳ',
   ['Άͅ'] = 'ᾼ', -- UNORTHODOX write ALPHA.with.iota + invisible unicode tonos at .txt, show it at .htm and copypaste
   ['ᾶ'] = 'α',
   -- ?? ALPHA + persipomeni -- UNORTHODOX
   ['ᾷ'] = 'ᾳ',
   -- ?? ALPHA.with.i + perisopomeni -- UNORTHODOX
	-- with psile
   ['ἄ'] = 'ἀ', ['Ἄ'] = 'Ἀ', ['ᾄ'] = 'ᾀ', ['ᾌ'] = 'ᾈ',
   ['ἆ'] = 'ἀ', ['Ἆ'] = 'Ἀ', ['ᾆ'] = 'ᾀ', ['ᾎ'] = 'ᾈ',
	-- with dasia
   ['ἅ'] = 'ἁ', ['Ἅ'] = 'Ἁ', ['ᾅ'] = 'ᾁ', ['ᾍ'] = 'ᾉ',
   ['ἇ'] = 'ἁ', ['Ἇ'] = 'Ἁ', ['ᾇ'] = 'ᾁ', ['ᾏ'] = 'ᾉ',
-- ε epsilon (always brachy = short = never persipomene circumflex)
   ['έ'] = 'ε', ['Έ'] = 'Ε', ['ἔ'] = 'ἐ', ['Ἔ'] = 'Ἐ', ['ἕ'] = 'ἑ', ['Ἕ'] = 'Ἑ',
-- η eta (always marcon = long)
   ['ή'] = 'η', ['Ή'] = 'Η', 
   ['ῄ'] = 'ῃ', 
   -- ?? ETA.with.i + oxia -- UNORTHODOX
   ['ῆ'] = 'η', 
   -- ?? ETA + persipomeni -- UNORTHODOX
   ['ῇ'] = 'ῃ',
   -- ?? ETA.with.i + perisopomeni -- UNORTHODOX
   -- with psile
   ['ἤ'] = 'ἠ', ['Ἤ'] = 'Ἠ', ['ᾔ'] = 'ᾐ', ['ᾜ'] = 'ᾘ', 
   ['ἦ'] = 'ἠ', ['Ἦ'] = 'Ἠ', ['ᾖ'] = 'ᾐ', ['ᾞ'] = 'ᾘ',
   -- with dasia
   ['ἥ'] = 'ἡ', ['Ἥ'] = 'Ἡ', ['ᾕ'] = 'ᾑ', ['Ἥ'] = 'Ἡ', 
   ['ἧ'] = 'ἡ', ['Ἧ'] = 'Ἡ', ['ᾗ'] = 'ᾑ', ['Ἧ'] = 'Ἡ',
-- iota ambiguous dichrononon -- do I need +prosodies here?
-- ι no spirits -- possible diaeresis (dialytics)
   ['ί'] = 'ι', ['Ί'] = 'Ι',
   ['ΐ'] = 'ϊ',
--  IOTA+dialytics+tonos -- UNORTHODOX
	-- https://www.compart.com/en/unicode/U+0390 decomposed as Ι (U+0399) - ◌̈ (U+0308) - ◌́ (U+0301)
	-- 1.FAILED write this at .txt, show at .htm and copy:    Ϊ́ 
	-- 2.FAILED write this at .txt, show at .thm and copy: Ϊ́   which is= Ϊ (IOTA.diaeresis) +  (U+0308) - ◌́ (U+0301)
	-- 3.YES copypaste IOTAwithdialytics+ copypaste invisible tonos  Ϊ́ that is Ϊ +   
		-- = when you do not use unicode at one part, then do not use unicode to the other part either
   ['Ϊ́'] = 'Ϊ', -- this is 3. 
   ['ῖ'] = 'ι',
   -- ?? IOTA + perispomeni -- UNORTHODOX
   ['ῗ'] = 'ϊ',
   -- ?? IOTA.with.dialytics + perispomeni -- UNORTHODOX
   -- with psile
-- ?? psile okseia, psile perisp does not convert to IOTA WITH PSILI (U+1F38) in accent shifts
   ['ἴ'] = 'ἰ',
   ['Ἴ'] = 'Ἰ', --	['Ἴ'] = 'Ἰ'
   ['ἶ'] = 'ἰ', 
   ['Ἶ'] = 'Ἰ', --  ['Ἶ'] = 'Ἰ',
   --with dasia
   ['ἵ'] = 'ἱ', 
   ['Ἵ'] = 'Ἱ', 
   ['ἷ'] = 'ἱ', ['Ἷ'] = 'Ἱ',
   -- dialytics ???
-- omicron (always brachy = short = never persipomene circumflex)
   ['ό'] = 'ο', ['Ό'] = 'Ο', ['ὄ'] = 'ὀ', ['Ὄ'] = 'Ὀ', ['ὅ'] = 'ὁ', ['Ὅ'] = 'Ὁ',
-- upsilon ambiguous dichrononon -- do I need +prosodies here?
-- υ no spirits -- possible diaeresis (dialytics)
   ['ύ'] = 'υ', ['Ύ'] = 'Υ', 
   ['ΰ'] = 'ϋ', 
   -- ?? UPSILON.with.diaeresis + oxia -- UNORTHODOX
   ['ῦ'] = 'υ', 
   -- ?? UPSILON + perispomeni -- UNORTHODOX
   -- ?? UPSILON.with.diaeresis + perispomeni -- UNORTHODOX
   -- with psile
   ['ὔ'] = 'ὐ', ['ὖ'] = 'ὐ', 
   -- with daseia
   ['ὕ'] = 'ὑ', ['Ὕ'] = 'Ὑ', 
   ['ὗ'] = 'ὑ', ['Ὗ'] = 'Ὑ',
-- ω omega (always marcon = long)
   ['ώ'] = 'ω', ['Ώ'] = 'Ω',
   ['ῴ'] = 'ῳ', 
   -- ?? OMEGA.with.i + oxeia -- UNORTHODOX
   ['ῶ'] = 'ω', 
   ['ῷ'] = 'ῳ',
   -- with psile
   ['ὤ'] = 'ὠ', ['Ὤ'] = 'Ὠ', ['ᾤ'] = 'ᾠ', ['ᾬ'] = 'ᾨ',
   ['ὦ'] = 'ὠ', ['Ὦ'] = 'Ὠ', ['ᾦ'] = 'ᾠ', ['ᾮ'] = 'ᾨ',
   -- with daseia
   ['ὥ'] = 'ὡ', ['Ὥ'] = 'Ὡ', ['ᾥ'] = 'ᾡ', ['ᾭ'] = 'ᾩ',
   ['ὧ'] = 'ὡ', ['Ὧ'] = 'Ὡ', ['ᾧ'] = 'ᾡ', ['ᾯ'] = 'ᾩ',
}

--------------------------------------------------------------------------
-- place accent (okseia) on unaccented
-- for unaccented-to-perispomeni circumflex (for polytonic): see oxia_to_perispomene
-- ?? NEED: get more pairs & all UNORTHODOX
export.unaccented_to_oxia = { 
-- alpha
	['α'] = 'ά',
	['Α'] = 'Ά',
	['ᾳ'] = 'ᾴ',
	['ἀ'] = 'ἄ',
	--
	['ἁ'] = 'ἅ',
	--
-- epsilon
	['ε'] = 'έ',
	['Ε'] = 'Έ',
	['ἐ'] = 'ἔ',
	--
	['ἑ'] = 'ἕ',
	--
-- eta
	['η'] = 'ή',
	['Η'] = 'Ή',
	['ῃ'] = 'ῄ',
	--
	['ἠ'] = 'ἤ',
	--
	['ἡ'] = 'ἥ',
	--
-- iota
	['ι'] = 'ί',
	['Ι'] = 'Ί',
	['ϊ'] = 'ΐ',
	--
	['ἰ'] = 'ἴ',
	--
	['ἱ'] = 'ἵ',
	--
-- omicron
	['ο'] = 'ό',
	['Ο'] = 'Ό',
	['ὀ'] = 'ὄ',
	--
	['ὁ'] = 'ὅ',
	--
-- upsilon
	['υ'] = 'ύ',
	['Υ'] = 'Ὺ',
	['ϋ'] = 'ΰ',
	--
	-- with psile
	['ὐ'] = 'ὔ',
	--
	-- with daseia
	['ὑ'] = 'ὕ',
	--
-- omega
	['ω'] = 'ώ',
	['Ω'] = 'Ώ',
	['ῳ'] = 'ῴ',
	--
	-- with psile
	['ὠ'] = 'ὤ',
	--
	['ᾠ'] = 'ᾤ', -- [[ᾤα]]
	--
	-- with daseia
	['ὡ'] = 'ὥ',
	--
	--
	--
}

--------------------------------------------------------------------------
-- replace perispomeni (circuflex) with okseia (acute)
-- this is for polytonic
export.perispomeni_to_oxia = {
-- alpha
   ['ᾶ'] = 'ά',
   --
   ['ᾷ'] = 'ᾴ',
   --
   -- with psile
   ['ἆ'] = 'ἄ',
   ['Ἆ'] = 'Ἄ',
   ['ᾆ'] = 'ᾄ',
   --
   -- with daseia
   ['ἇ'] = 'ἅ',
   ['Ἇ'] = 'Ἅ',
   ['ᾇ'] = 'ᾅ',
   --
-- eta
   ['ῆ'] = 'ή',
   --
   ['ῇ'] = 'ῄ',
   --
   -- with psile
   ['ἦ'] = 'ἤ',
   ['Ἦ'] = 'Ἤ',
   ['ᾖ'] = 'ᾔ',
   --
   -- with daseia
   ['ἧ'] = 'ἥ',
   ['Ἧ'] = 'Ἥ',
   ['ᾗ'] = 'ᾕ',
   --
-- iota
   ['ῖ'] = 'ί',
   --
   -- with psile
   ['ἶ'] = 'ἴ',
   ['Ἶ'] = 'Ἴ',  -- psile perispomeni (1F3F)
   -- and dialytics?
   -- with daseia
   ['ἷ'] = 'ἵ', 
   ['Ἷ'] = 'Ἵ',
   -- and dialytics?
-- upsilon
   ['ῦ'] = 'ύ',
   --
   -- and dialytics?
   -- with psile
   ['ὖ'] = 'ὔ',
   --
   -- with daseia
   ['ὗ'] = 'ὕ',
   ['Ὗ'] = 'Ὕ',
-- omega
   ['ῶ'] = 'ώ',
   --
   ['ῷ'] = 'ῴ',
   --
   -- with psile
   ['ὦ'] = 'ὤ',
   ['Ὦ'] = 'Ὤ',
   ['ᾦ'] = 'ᾤ',
   --
   -- with daseia
   ['ὧ'] = 'ὥ',
   ['Ὧ'] = 'Ὥ',
   ['ᾧ'] = 'ᾥ',
}

--------------------------------------------------------------------------
-- ?? add all missing capitals, add unorthodox?
-- replace oxeia (acute) with perispomene (circuflex)
export.oxia_to_perispomeni = {
   ['ά'] = 'ᾶ',
   ['ᾴ'] = 'ᾷ',
   ['ἄ'] = 'ἆ',
   ['ᾄ'] = 'ᾆ',
   ['ἅ'] = 'ἇ',
   ['ᾅ'] = 'ᾇ',
   ['ή'] = 'ῆ',
   ['ῄ'] = 'ῇ',
   ['ἤ'] = 'ἦ',
   ['ᾔ'] = 'ᾖ',
   ['ἥ'] = 'ἧ',
   ['ᾕ'] = 'ᾗ',
   ['ί'] = 'ῖ',
   ['ἴ'] = 'ἶ',
   ['ἵ'] = 'ἷ',
   ['ΐ'] = 'ῗ',
   ['ύ'] = 'ῦ',
   ['ὔ'] = 'ὖ',
   ['ὕ'] = 'ὗ',
   ['ώ'] = 'ῶ',
   ['ῴ'] = 'ῷ',
   ['ὤ'] = 'ὦ',
   ['ᾤ'] = 'ᾦ',
   ['ὥ'] = 'ὧ',
   ['ᾥ'] = 'ᾧ',
}


--------------------------------------------------------------------------
--          c) diphthongs and digraphs (2-vowel-sequences)              --
--------------------------------------------------------------------------

--------------------------------------------------------------------------
-- these are [[diphthong]]s = 2 vowels together as one
export.digraphs = { 'αι', 'ει', 'οι', 'αυ', 'ευ', 'ηυ', 'ου' }
	-- υι ?? is a diphthong, only in polytonic
	-- modern synizeses: εια, ειο, υα ([[γυαλί]]), 

--------------------------------------------------------------------------
-- Move accent backwords. This is called [[recessive]] accent.
--[=[
-- ?? Do i NEED? In polytonic we may have 
	αΐ to άι 
	OR αΐ to άϊ (with redundant, needless dialytics at second letter).
	BOTH exist.
	-- at the moment do as in monotonic
]=]--
export.digraphs_accent_back = { 
	['αΐ'] = 'άι',
	['εΐ'] = 'έι',
	['οΐ'] = 'όι',
	['αΰ'] = 'άυ',
	['εΰ'] = 'έυ',
	['ουί'] = 'ούι'
}
	-- ?? oυϊ with accent only in polytonic? 

--------------------------------------------------------------------------
-- Convert modern greek diphthongs (pronounced as one syllable) to two separate vowels:
export.digraphs_accented_to_unaccented = {
    ['άι'] = 'αϊ',
    ['έι'] = 'εϊ',
    ['όι'] = 'οϊ',
    ['άυ'] = 'αϋ',
    ['έυ'] = 'εϋ',
    ['όυ'] = 'οϋ' 
}
	-- ήυ ??
    -- ['ύι'] = 'υϊ', not in nouns / δεν υπάρχει σε ουσιαστικά, μόνο στο επίθετο δρύινος.
    -- Αντίθετα, θα βάλει διαλυτικά στο βούισμα, βουΐσματα. Πολυτονικό?
	
return export


--[=[
EXPLANATIONS
Conversions of greek characters unaccented <--> accented vowels or digraphs
i) for [[monotonic]] script: only one accent: oxia [[acute]] ⟨ ΄ ⟩
ii) for [[polytonic]] script: The diacritics: 
Accents:
	[[τόνος]] tonos ([[οξεία]] oxia, acute) ⟨ ´ ) is now accepted as identical to the modern accent TONOS and the latin acute accent: ⟨ ´ ⟩.
		So, polytonic includes the functions of monotonic.
		CAREFUL: here, ALL tonos = oxia must NEVER be a VERTICAL line
		FORBID all font-families that present tonos with a little vertical line (like Verdana)
	[[περισπωμένη]] perispomeni ( ῀ ) similar but not identical to the latin circumflex  ( ˆ )
	(The [[βαρεία]], grave accent ( ˋ )  is used only in texts, not isolated words)
Breathings [[πνεύματα]]: 
	[[ψιλή]], psile, soft breathing ( ᾿ ) 
	[[δασεία]], daseia, rough breathing  ῾ )
[[διαίρεσις]] [[diaeresis]] or [[διαλυτικά]] dialytics:  splits digraph-vowels
[[υπογρεγραμμένη]] subscript [[ιώτα]] iota
For more, see https://en.wiktionary.org/wiki/Module:grc-utilities
Prosody is used visibly only for Ancient Greek (and Hellenistic Koine)
* μακρόν (macron) or βραχύ (breve)

Ref
* https://en.wiktionary.org/wiki/Module:grc-utilities/data
* https://www.fileformat.info/info/unicode/block/greek_extended/list.htm
* https://en.wikipedia.org/wiki/Greek_script_in_Unicode
* https://en.wikipedia.org/wiki/Greek_alphabet#Greek_in_Unicode
]=]--