Fix decoding decomp info+include rune equivalence

This commit is contained in:
bloeys
2022-07-05 06:39:32 +04:00
parent a762283b1d
commit c40d1dfdf1
5 changed files with 139 additions and 136 deletions

View File

@ -21,6 +21,10 @@ const (
invalidRune = unicode.ReplacementChar invalidRune = unicode.ReplacementChar
) )
var (
RuneInfos map[rune]RuneInfo
)
type GlyphRend struct { type GlyphRend struct {
Atlas *FontAtlas Atlas *FontAtlas
AtlasTex *assets.Texture AtlasTex *assets.Texture
@ -144,62 +148,44 @@ func (gr *GlyphRend) glyphFromRunes(curr, prev, next rune) *FontAtlasGlyph {
PosCtx_start PosCtx = iota PosCtx_start PosCtx = iota
PosCtx_mid PosCtx_mid
PosCtx_end PosCtx_end
PosCtx_isolated
) )
if prev == invalidRune && next == invalidRune { prevIsLetter := unicode.IsLetter(prev)
nextIsLetter := unicode.IsLetter(next)
//Isolated case
if !prevIsLetter && !nextIsLetter {
g := gr.Atlas.Glyphs[curr] g := gr.Atlas.Glyphs[curr]
return &g return &g
} }
ctx := PosCtx_mid ctx := PosCtx_mid
if prev == invalidRune { if prevIsLetter && nextIsLetter {
ctx = PosCtx_mid
} else if nextIsLetter {
ctx = PosCtx_start ctx = PosCtx_start
} else if next == invalidRune { } else {
ctx = PosCtx_end ctx = PosCtx_end
} }
switch ctx { switch ctx {
case PosCtx_start: case PosCtx_start:
mappings := runeInfos[curr].DecompMappings equivRunes := RuneInfos[curr].EquivalentRunes
for mappedRune := range mappings { for i := 0; i < len(equivRunes); i++ {
mri := runeInfos[mappedRune] otherRune := equivRunes[i]
if mri.IsLigature || mri.DecompTag != CharDecompMap_initial { otherRuneInfo := RuneInfos[otherRune]
continue if otherRuneInfo.DecompTag == DecompTags_initial {
curr = otherRune
break
} }
curr = mappedRune
break
} }
case PosCtx_mid: case PosCtx_mid:
mappings := runeInfos[curr].DecompMappings
for mappedRune := range mappings {
mri := runeInfos[mappedRune]
if mri.IsLigature || mri.DecompTag != CharDecompMap_medial {
continue
}
curr = mappedRune
break
}
case PosCtx_end: case PosCtx_end:
mappings := runeInfos[curr].DecompMappings
for mappedRune := range mappings {
mri := runeInfos[mappedRune]
if mri.IsLigature || mri.DecompTag != CharDecompMap_final {
continue
}
curr = mappedRune
break
}
} }
g := gr.Atlas.Glyphs[curr] g := gr.Atlas.Glyphs[curr]
@ -389,13 +375,14 @@ func NewGlyphRend(fontFile string, fontOptions *truetype.Options, screenWidth, s
gr.SetScreenSize(screenWidth, screenHeight) gr.SetScreenSize(screenWidth, screenHeight)
//TODO: Move this RuneInfos, err = ParseUnicodeData("./unicode-data.txt")
runeInfos, _ = loadUnicodeData("./unicode-data.txt") if err != nil {
return nil, err
}
return gr, nil return gr, nil
} }
var runeInfos map[rune]runeInfo
func roundF32(x float32) float32 { func roundF32(x float32) float32 {
return float32(math.Round(float64(x))) return float32(math.Round(float64(x)))
} }

View File

@ -77,85 +77,94 @@ const (
BidiCategory_ON // Other Neutrals BidiCategory_ON // Other Neutrals
) )
type CharDecompMapTag uint8 type DecompTag uint8
const ( const (
CharDecompMap_font CharDecompMapTag = iota // A font variant (e.g. a blackletter form). DecompTag_font DecompTag = iota // A font variant (e.g. a blackletter form).
CharDecompMap_noBreak // A no-break version of a space or hyphen. DecompTag_noBreak // A no-break version of a space or hyphen.
CharDecompMap_initial // An initial presentation form (Arabic). DecompTags_initial // An initial presentation form (Arabic).
CharDecompMap_medial // A medial presentation form (Arabic). DecompTag_medial // A medial presentation form (Arabic).
CharDecompMap_final // A final presentation form (Arabic). DecompTag_final // A final presentation form (Arabic).
CharDecompMap_isolated // An isolated presentation form (Arabic). DecompTag_isolated // An isolated presentation form (Arabic).
CharDecompMap_circle // An encircled form. DecompTag_circle // An encircled form.
CharDecompMap_super // A superscript form. DecompTag_super // A superscript form.
CharDecompMap_sub // A subscript form. DecompTag_sub // A subscript form.
CharDecompMap_vertical // A vertical layout presentation form. DecompTag_vertical // A vertical layout presentation form.
CharDecompMap_wide // A wide (or zenkaku) compatibility character. DecompTag_wide // A wide (or zenkaku) compatibility character.
CharDecompMap_narrow // A narrow (or hankaku) compatibility character. DecompTag_narrow // A narrow (or hankaku) compatibility character.
CharDecompMap_small // A small variant form (CNS compatibility). DecompTag_small // A small variant form (CNS compatibility).
CharDecompMap_square // A CJK squared font variant. DecompTag_square // A CJK squared font variant.
CharDecompMap_fraction // A vulgar fraction form. DecompTag_fraction // A vulgar fraction form.
CharDecompMap_compat // Otherwise unspecified compatibility character. DecompTag_compat // Otherwise unspecified compatibility character.
CharDecompMap_NONE // Not decomposition mapping tag, which indicates canonical form. DecompTag_NONE // Not decomposition mapping tag, which indicates canonical form.
) )
func (cd CharDecompMapTag) String() string { func (cd DecompTag) String() string {
switch cd { switch cd {
case CharDecompMap_font: case DecompTag_font:
return "font" return "font"
case CharDecompMap_noBreak: case DecompTag_noBreak:
return "noBreak" return "noBreak"
case CharDecompMap_initial: case DecompTags_initial:
return "initial" return "initial"
case CharDecompMap_medial: case DecompTag_medial:
return "medial" return "medial"
case CharDecompMap_final: case DecompTag_final:
return "final" return "final"
case CharDecompMap_isolated: case DecompTag_isolated:
return "isolated" return "isolated"
case CharDecompMap_circle: case DecompTag_circle:
return "circle" return "circle"
case CharDecompMap_super: case DecompTag_super:
return "super" return "super"
case CharDecompMap_sub: case DecompTag_sub:
return "sub" return "sub"
case CharDecompMap_vertical: case DecompTag_vertical:
return "vertical" return "vertical"
case CharDecompMap_wide: case DecompTag_wide:
return "wide" return "wide"
case CharDecompMap_narrow: case DecompTag_narrow:
return "narrow" return "narrow"
case CharDecompMap_small: case DecompTag_small:
return "small" return "small"
case CharDecompMap_square: case DecompTag_square:
return "square" return "square"
case CharDecompMap_fraction: case DecompTag_fraction:
return "fraction" return "fraction"
case CharDecompMap_compat: case DecompTag_compat:
return "compat" return "compat"
case CharDecompMap_NONE: case DecompTag_NONE:
return "NONE" return "NONE"
default: default:
panic(fmt.Sprint("unknown CharDecompMapTag value:", uint8(cd))) panic(fmt.Sprint("unknown CharDecompMapTag value:", uint8(cd)))
} }
} }
type runeInfo struct { type RuneInfo struct {
Name string Name string
Cat Category Cat Category
BidiCat BidiCategory BidiCat BidiCategory
DecompTag DecompTag
DecompTag CharDecompMapTag IsLigature bool
IsLigature bool
DecompMappings map[rune]struct{} //Decomp is the ordered set of runes this rune decomposes into
//as defined by unicodeData.txt
Decomp []rune
//EquivalentRunes are runes that are canonically or compatiability equivalent to this rune
EquivalentRunes []rune
} }
//loadUnicodeData decodes a 'UnicodeData' file according //ParseUnicodeData decodes a 'UnicodeData' file according
//to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html //to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html and returns a map containing information
//on all runes within the passed ranges.
// //
//The latest file is at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt //If no ranges are passed then the full unicode data file will be decoded
func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) { //
//The latest file can be found at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
func ParseUnicodeData(unicodeFile string, rangesToLoad ...*unicode.RangeTable) (map[rune]RuneInfo, error) {
type field int type field int
const ( const (
@ -181,27 +190,30 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) {
return nil, err return nil, err
} }
ris := make(map[rune]runeInfo) ris := make(map[rune]RuneInfo)
lines := strings.Split(string(fBytes), "\n") lines := strings.Split(string(fBytes), "\n")
for _, l := range lines { for _, l := range lines {
fields := strings.SplitN(l, ";", 15) fields := strings.SplitN(l, ";", 15)
r := runeFromHexCodeString(fields[field_codeValue]) r := runeFromHexCodeString(fields[field_codeValue])
if !unicode.Is(unicode.Arabic, r) { if rangesToLoad != nil && !unicode.In(r, rangesToLoad...) {
continue continue
} }
ri := ris[r] ri := ris[r]
ri = runeInfo{ ri = RuneInfo{
Name: fields[field_charName], Name: fields[field_charName],
Cat: categoryStringToCategory(fields[field_generalCategory]), Cat: categoryStringToCategory(fields[field_generalCategory]),
BidiCat: bidiCategoryStringToBidiCategory(fields[field_bidiCategory]), BidiCat: bidiCategoryStringToBidiCategory(fields[field_bidiCategory]),
DecompTag: CharDecompMap_NONE, DecompTag: DecompTag_NONE,
//NOTE: This is not perfect (NamesList.txt notes some additional ligatures), but good enough :)
IsLigature: strings.Contains(fields[field_charName], "LIGATURE"),
} }
//This field might already be set by another char mapping to us //This might already be created for us by a previous ruen
if ri.DecompMappings == nil { if ri.EquivalentRunes == nil {
ri.DecompMappings = make(map[rune]struct{}) ri.EquivalentRunes = []rune{}
} }
if len(fields[field_charDecomp]) > 0 { if len(fields[field_charDecomp]) > 0 {
@ -209,38 +221,38 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) {
fieldItems := strings.Split(fields[field_charDecomp], " ") fieldItems := strings.Split(fields[field_charDecomp], " ")
if fieldItems[0][0] == '<' { if fieldItems[0][0] == '<' {
ri.DecompTag = charDecompMapStringToCharDecompMap(fieldItems[0]) ri.DecompTag = charDecompMapStringToCharDecompMap(fieldItems[0])
fieldItems = fieldItems[1:]
} }
//We consider a unicode codepoint that decomposes to more than one char a ligature //One character decomposition indicates equivalence
ri.IsLigature = len(fieldItems) >= 3 if len(fieldItems) == 1 {
for i := 1; i < len(fieldItems); i++ {
mappedRune := runeFromHexCodeString(fieldItems[i]) decompRune := runeFromHexCodeString(fieldItems[0])
ri.DecompMappings[mappedRune] = struct{}{} ri.Decomp = []rune{decompRune}
ri.EquivalentRunes = append(ri.EquivalentRunes, decompRune)
//Add this rune as a map of the other rune as well //Add this rune as equivalent to decomposed rune
otherRi, ok := ris[mappedRune] decompRuneInfo := ris[decompRune]
if !ok { if decompRuneInfo.EquivalentRunes == nil {
otherRi.DecompMappings = make(map[rune]struct{}) decompRuneInfo.EquivalentRunes = []rune{r}
} else {
decompRuneInfo.EquivalentRunes = append(decompRuneInfo.EquivalentRunes, r)
}
ris[decompRune] = decompRuneInfo
} else {
ri.Decomp = make([]rune, len(fieldItems))
for i := 0; i < len(fieldItems); i++ {
ri.Decomp[i] = runeFromHexCodeString(fieldItems[i])
} }
otherRi.DecompMappings[r] = struct{}{}
ris[mappedRune] = otherRi
} }
} }
ris[r] = ri ris[r] = ri
} }
// meemRi := ris['م']
// for mappedRune := range meemRi.DecompMappings {
// mappedRuneInfo := ris[mappedRune]
// if mappedRuneInfo.IsLigature {
// continue
// }
// fmt.Printf("Meem mapping: %c. code=%x. Type=%s\n", mappedRune, mappedRune, mappedRuneInfo.DecompTag.String())
// }
return ris, nil return ris, nil
} }
@ -248,7 +260,7 @@ func runeFromHexCodeString(c string) rune {
codepointU64, err := strconv.ParseUint(c, 16, 32) codepointU64, err := strconv.ParseUint(c, 16, 32)
if err != nil { if err != nil {
return invalidRune panic("Invalid rune: " + c)
} }
return rune(codepointU64) return rune(codepointU64)
@ -382,43 +394,43 @@ func bidiCategoryStringToBidiCategory(c string) BidiCategory {
} }
} }
func charDecompMapStringToCharDecompMap(c string) CharDecompMapTag { func charDecompMapStringToCharDecompMap(c string) DecompTag {
switch c { switch c {
case "<font>": case "<font>":
return CharDecompMap_font return DecompTag_font
case "<noBreak>": case "<noBreak>":
return CharDecompMap_noBreak return DecompTag_noBreak
case "<initial>": case "<initial>":
return CharDecompMap_initial return DecompTags_initial
case "<medial>": case "<medial>":
return CharDecompMap_medial return DecompTag_medial
case "<final>": case "<final>":
return CharDecompMap_final return DecompTag_final
case "<isolated>": case "<isolated>":
return CharDecompMap_isolated return DecompTag_isolated
case "<circle>": case "<circle>":
return CharDecompMap_circle return DecompTag_circle
case "<super>": case "<super>":
return CharDecompMap_super return DecompTag_super
case "<sub>": case "<sub>":
return CharDecompMap_sub return DecompTag_sub
case "<vertical>": case "<vertical>":
return CharDecompMap_vertical return DecompTag_vertical
case "<wide>": case "<wide>":
return CharDecompMap_wide return DecompTag_wide
case "<narrow>": case "<narrow>":
return CharDecompMap_narrow return DecompTag_narrow
case "<small>": case "<small>":
return CharDecompMap_small return DecompTag_small
case "<square>": case "<square>":
return CharDecompMap_square return DecompTag_square
case "<fraction>": case "<fraction>":
return CharDecompMap_fraction return DecompTag_fraction
case "<compat>": case "<compat>":
return CharDecompMap_compat return DecompTag_compat
case "": case "":
return CharDecompMap_NONE return DecompTag_NONE
default: default:
panic("unknown charDecomMap string: " + c) panic("unknown charDecomMap string: " + c)
} }

1
go.mod
View File

@ -14,4 +14,5 @@ require (
require ( require (
github.com/bloeys/assimp-go v0.4.2 // indirect github.com/bloeys/assimp-go v0.4.2 // indirect
github.com/inkyblackness/imgui-go/v4 v4.3.0 // indirect github.com/inkyblackness/imgui-go/v4 v4.3.0 // indirect
golang.org/x/text v0.3.7 // indirect
) )

1
go.sum
View File

@ -23,5 +23,6 @@ github.com/veandco/go-sdl2 v0.4.10 h1:8QoD2bhWl7SbQDflIAUYWfl9Vq+mT8/boJFAUzAScg
github.com/veandco/go-sdl2 v0.4.10/go.mod h1:OROqMhHD43nT4/i9crJukyVecjPNYYuCofep6SNiAjY= github.com/veandco/go-sdl2 v0.4.10/go.mod h1:OROqMhHD43nT4/i9crJukyVecjPNYYuCofep6SNiAjY=
golang.org/x/image v0.0.0-20220617043117-41969df76e82 h1:KpZB5pUSBvrHltNEdK/tw0xlPeD13M6M6aGP32gKqiw= golang.org/x/image v0.0.0-20220617043117-41969df76e82 h1:KpZB5pUSBvrHltNEdK/tw0xlPeD13M6M6aGP32gKqiw=
golang.org/x/image v0.0.0-20220617043117-41969df76e82/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= golang.org/x/image v0.0.0-20220617043117-41969df76e82/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@ -100,6 +100,9 @@ func (p *program) Init() {
p.gridMat = materials.NewMaterial("grid", "./res/shaders/grid.glsl") p.gridMat = materials.NewMaterial("grid", "./res/shaders/grid.glsl")
p.handleWindowResize() p.handleWindowResize()
fmt.Printf("Beh equivalents: %v\n", string(glyphs.RuneInfos['ب'].EquivalentRunes))
// engine.Quit()
} }
func (p *program) Update() { func (p *program) Update() {
@ -187,12 +190,11 @@ func (p *program) Render() {
} }
textColor := gglm.NewVec4(r, g, b, 1) textColor := gglm.NewVec4(r, g, b, 1)
// str := " مرحبا كب" str := " مرحبا بك"
str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك" // str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك"
// str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ" // str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ"
p.GlyphRend.DrawTextOpenGLAbs(str, gglm.NewVec3(xOff, float32(p.GlyphRend.Atlas.LineHeight)*5+yOff, 0), textColor) p.GlyphRend.DrawTextOpenGLAbs(str, gglm.NewVec3(xOff, float32(p.GlyphRend.Atlas.LineHeight)*5+yOff, 0), textColor)
// strLen := len(str) // strLen := len(str)
// const charsPerFrame = 10_000 // const charsPerFrame = 10_000
// for i := 0; i < charsPerFrame/strLen; i++ { // for i := 0; i < charsPerFrame/strLen; i++ {