Fix decoding decomp info+include rune equivalence

This commit is contained in:
bloeys
2022-07-05 06:39:32 +04:00
parent a762283b1d
commit c40d1dfdf1
5 changed files with 139 additions and 136 deletions

View File

@ -21,6 +21,10 @@ const (
invalidRune = unicode.ReplacementChar
)
var (
RuneInfos map[rune]RuneInfo
)
type GlyphRend struct {
Atlas *FontAtlas
AtlasTex *assets.Texture
@ -144,62 +148,44 @@ func (gr *GlyphRend) glyphFromRunes(curr, prev, next rune) *FontAtlasGlyph {
PosCtx_start PosCtx = iota
PosCtx_mid
PosCtx_end
PosCtx_isolated
)
if prev == invalidRune && next == invalidRune {
prevIsLetter := unicode.IsLetter(prev)
nextIsLetter := unicode.IsLetter(next)
//Isolated case
if !prevIsLetter && !nextIsLetter {
g := gr.Atlas.Glyphs[curr]
return &g
}
ctx := PosCtx_mid
if prev == invalidRune {
if prevIsLetter && nextIsLetter {
ctx = PosCtx_mid
} else if nextIsLetter {
ctx = PosCtx_start
} else if next == invalidRune {
} else {
ctx = PosCtx_end
}
switch ctx {
case PosCtx_start:
mappings := runeInfos[curr].DecompMappings
for mappedRune := range mappings {
equivRunes := RuneInfos[curr].EquivalentRunes
for i := 0; i < len(equivRunes); i++ {
mri := runeInfos[mappedRune]
if mri.IsLigature || mri.DecompTag != CharDecompMap_initial {
continue
}
curr = mappedRune
otherRune := equivRunes[i]
otherRuneInfo := RuneInfos[otherRune]
if otherRuneInfo.DecompTag == DecompTags_initial {
curr = otherRune
break
}
}
case PosCtx_mid:
mappings := runeInfos[curr].DecompMappings
for mappedRune := range mappings {
mri := runeInfos[mappedRune]
if mri.IsLigature || mri.DecompTag != CharDecompMap_medial {
continue
}
curr = mappedRune
break
}
case PosCtx_end:
mappings := runeInfos[curr].DecompMappings
for mappedRune := range mappings {
mri := runeInfos[mappedRune]
if mri.IsLigature || mri.DecompTag != CharDecompMap_final {
continue
}
curr = mappedRune
break
}
}
g := gr.Atlas.Glyphs[curr]
@ -389,12 +375,13 @@ func NewGlyphRend(fontFile string, fontOptions *truetype.Options, screenWidth, s
gr.SetScreenSize(screenWidth, screenHeight)
//TODO: Move this
runeInfos, _ = loadUnicodeData("./unicode-data.txt")
return gr, nil
RuneInfos, err = ParseUnicodeData("./unicode-data.txt")
if err != nil {
return nil, err
}
var runeInfos map[rune]runeInfo
return gr, nil
}
func roundF32(x float32) float32 {
return float32(math.Round(float64(x)))

View File

@ -77,85 +77,94 @@ const (
BidiCategory_ON // Other Neutrals
)
type CharDecompMapTag uint8
type DecompTag uint8
const (
CharDecompMap_font CharDecompMapTag = iota // A font variant (e.g. a blackletter form).
CharDecompMap_noBreak // A no-break version of a space or hyphen.
CharDecompMap_initial // An initial presentation form (Arabic).
CharDecompMap_medial // A medial presentation form (Arabic).
CharDecompMap_final // A final presentation form (Arabic).
CharDecompMap_isolated // An isolated presentation form (Arabic).
CharDecompMap_circle // An encircled form.
CharDecompMap_super // A superscript form.
CharDecompMap_sub // A subscript form.
CharDecompMap_vertical // A vertical layout presentation form.
CharDecompMap_wide // A wide (or zenkaku) compatibility character.
CharDecompMap_narrow // A narrow (or hankaku) compatibility character.
CharDecompMap_small // A small variant form (CNS compatibility).
CharDecompMap_square // A CJK squared font variant.
CharDecompMap_fraction // A vulgar fraction form.
CharDecompMap_compat // Otherwise unspecified compatibility character.
CharDecompMap_NONE // Not decomposition mapping tag, which indicates canonical form.
DecompTag_font DecompTag = iota // A font variant (e.g. a blackletter form).
DecompTag_noBreak // A no-break version of a space or hyphen.
DecompTags_initial // An initial presentation form (Arabic).
DecompTag_medial // A medial presentation form (Arabic).
DecompTag_final // A final presentation form (Arabic).
DecompTag_isolated // An isolated presentation form (Arabic).
DecompTag_circle // An encircled form.
DecompTag_super // A superscript form.
DecompTag_sub // A subscript form.
DecompTag_vertical // A vertical layout presentation form.
DecompTag_wide // A wide (or zenkaku) compatibility character.
DecompTag_narrow // A narrow (or hankaku) compatibility character.
DecompTag_small // A small variant form (CNS compatibility).
DecompTag_square // A CJK squared font variant.
DecompTag_fraction // A vulgar fraction form.
DecompTag_compat // Otherwise unspecified compatibility character.
DecompTag_NONE // Not decomposition mapping tag, which indicates canonical form.
)
func (cd CharDecompMapTag) String() string {
func (cd DecompTag) String() string {
switch cd {
case CharDecompMap_font:
case DecompTag_font:
return "font"
case CharDecompMap_noBreak:
case DecompTag_noBreak:
return "noBreak"
case CharDecompMap_initial:
case DecompTags_initial:
return "initial"
case CharDecompMap_medial:
case DecompTag_medial:
return "medial"
case CharDecompMap_final:
case DecompTag_final:
return "final"
case CharDecompMap_isolated:
case DecompTag_isolated:
return "isolated"
case CharDecompMap_circle:
case DecompTag_circle:
return "circle"
case CharDecompMap_super:
case DecompTag_super:
return "super"
case CharDecompMap_sub:
case DecompTag_sub:
return "sub"
case CharDecompMap_vertical:
case DecompTag_vertical:
return "vertical"
case CharDecompMap_wide:
case DecompTag_wide:
return "wide"
case CharDecompMap_narrow:
case DecompTag_narrow:
return "narrow"
case CharDecompMap_small:
case DecompTag_small:
return "small"
case CharDecompMap_square:
case DecompTag_square:
return "square"
case CharDecompMap_fraction:
case DecompTag_fraction:
return "fraction"
case CharDecompMap_compat:
case DecompTag_compat:
return "compat"
case CharDecompMap_NONE:
case DecompTag_NONE:
return "NONE"
default:
panic(fmt.Sprint("unknown CharDecompMapTag value:", uint8(cd)))
}
}
type runeInfo struct {
type RuneInfo struct {
Name string
Cat Category
BidiCat BidiCategory
DecompTag DecompTag
DecompTag CharDecompMapTag
IsLigature bool
DecompMappings map[rune]struct{}
//Decomp is the ordered set of runes this rune decomposes into
//as defined by unicodeData.txt
Decomp []rune
//EquivalentRunes are runes that are canonically or compatiability equivalent to this rune
EquivalentRunes []rune
}
//loadUnicodeData decodes a 'UnicodeData' file according
//to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html
//ParseUnicodeData decodes a 'UnicodeData' file according
//to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html and returns a map containing information
//on all runes within the passed ranges.
//
//The latest file is at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) {
//If no ranges are passed then the full unicode data file will be decoded
//
//The latest file can be found at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
func ParseUnicodeData(unicodeFile string, rangesToLoad ...*unicode.RangeTable) (map[rune]RuneInfo, error) {
type field int
const (
@ -181,27 +190,30 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) {
return nil, err
}
ris := make(map[rune]runeInfo)
ris := make(map[rune]RuneInfo)
lines := strings.Split(string(fBytes), "\n")
for _, l := range lines {
fields := strings.SplitN(l, ";", 15)
r := runeFromHexCodeString(fields[field_codeValue])
if !unicode.Is(unicode.Arabic, r) {
if rangesToLoad != nil && !unicode.In(r, rangesToLoad...) {
continue
}
ri := ris[r]
ri = runeInfo{
ri = RuneInfo{
Name: fields[field_charName],
Cat: categoryStringToCategory(fields[field_generalCategory]),
BidiCat: bidiCategoryStringToBidiCategory(fields[field_bidiCategory]),
DecompTag: CharDecompMap_NONE,
DecompTag: DecompTag_NONE,
//NOTE: This is not perfect (NamesList.txt notes some additional ligatures), but good enough :)
IsLigature: strings.Contains(fields[field_charName], "LIGATURE"),
}
//This field might already be set by another char mapping to us
if ri.DecompMappings == nil {
ri.DecompMappings = make(map[rune]struct{})
//This might already be created for us by a previous ruen
if ri.EquivalentRunes == nil {
ri.EquivalentRunes = []rune{}
}
if len(fields[field_charDecomp]) > 0 {
@ -209,38 +221,38 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) {
fieldItems := strings.Split(fields[field_charDecomp], " ")
if fieldItems[0][0] == '<' {
ri.DecompTag = charDecompMapStringToCharDecompMap(fieldItems[0])
fieldItems = fieldItems[1:]
}
//We consider a unicode codepoint that decomposes to more than one char a ligature
ri.IsLigature = len(fieldItems) >= 3
for i := 1; i < len(fieldItems); i++ {
//One character decomposition indicates equivalence
if len(fieldItems) == 1 {
mappedRune := runeFromHexCodeString(fieldItems[i])
ri.DecompMappings[mappedRune] = struct{}{}
decompRune := runeFromHexCodeString(fieldItems[0])
ri.Decomp = []rune{decompRune}
ri.EquivalentRunes = append(ri.EquivalentRunes, decompRune)
//Add this rune as a map of the other rune as well
otherRi, ok := ris[mappedRune]
if !ok {
otherRi.DecompMappings = make(map[rune]struct{})
//Add this rune as equivalent to decomposed rune
decompRuneInfo := ris[decompRune]
if decompRuneInfo.EquivalentRunes == nil {
decompRuneInfo.EquivalentRunes = []rune{r}
} else {
decompRuneInfo.EquivalentRunes = append(decompRuneInfo.EquivalentRunes, r)
}
ris[decompRune] = decompRuneInfo
} else {
ri.Decomp = make([]rune, len(fieldItems))
for i := 0; i < len(fieldItems); i++ {
ri.Decomp[i] = runeFromHexCodeString(fieldItems[i])
}
otherRi.DecompMappings[r] = struct{}{}
ris[mappedRune] = otherRi
}
}
ris[r] = ri
}
// meemRi := ris['م']
// for mappedRune := range meemRi.DecompMappings {
// mappedRuneInfo := ris[mappedRune]
// if mappedRuneInfo.IsLigature {
// continue
// }
// fmt.Printf("Meem mapping: %c. code=%x. Type=%s\n", mappedRune, mappedRune, mappedRuneInfo.DecompTag.String())
// }
return ris, nil
}
@ -248,7 +260,7 @@ func runeFromHexCodeString(c string) rune {
codepointU64, err := strconv.ParseUint(c, 16, 32)
if err != nil {
return invalidRune
panic("Invalid rune: " + c)
}
return rune(codepointU64)
@ -382,43 +394,43 @@ func bidiCategoryStringToBidiCategory(c string) BidiCategory {
}
}
func charDecompMapStringToCharDecompMap(c string) CharDecompMapTag {
func charDecompMapStringToCharDecompMap(c string) DecompTag {
switch c {
case "<font>":
return CharDecompMap_font
return DecompTag_font
case "<noBreak>":
return CharDecompMap_noBreak
return DecompTag_noBreak
case "<initial>":
return CharDecompMap_initial
return DecompTags_initial
case "<medial>":
return CharDecompMap_medial
return DecompTag_medial
case "<final>":
return CharDecompMap_final
return DecompTag_final
case "<isolated>":
return CharDecompMap_isolated
return DecompTag_isolated
case "<circle>":
return CharDecompMap_circle
return DecompTag_circle
case "<super>":
return CharDecompMap_super
return DecompTag_super
case "<sub>":
return CharDecompMap_sub
return DecompTag_sub
case "<vertical>":
return CharDecompMap_vertical
return DecompTag_vertical
case "<wide>":
return CharDecompMap_wide
return DecompTag_wide
case "<narrow>":
return CharDecompMap_narrow
return DecompTag_narrow
case "<small>":
return CharDecompMap_small
return DecompTag_small
case "<square>":
return CharDecompMap_square
return DecompTag_square
case "<fraction>":
return CharDecompMap_fraction
return DecompTag_fraction
case "<compat>":
return CharDecompMap_compat
return DecompTag_compat
case "":
return CharDecompMap_NONE
return DecompTag_NONE
default:
panic("unknown charDecomMap string: " + c)
}

1
go.mod
View File

@ -14,4 +14,5 @@ require (
require (
github.com/bloeys/assimp-go v0.4.2 // indirect
github.com/inkyblackness/imgui-go/v4 v4.3.0 // indirect
golang.org/x/text v0.3.7 // indirect
)

1
go.sum
View File

@ -23,5 +23,6 @@ github.com/veandco/go-sdl2 v0.4.10 h1:8QoD2bhWl7SbQDflIAUYWfl9Vq+mT8/boJFAUzAScg
github.com/veandco/go-sdl2 v0.4.10/go.mod h1:OROqMhHD43nT4/i9crJukyVecjPNYYuCofep6SNiAjY=
golang.org/x/image v0.0.0-20220617043117-41969df76e82 h1:KpZB5pUSBvrHltNEdK/tw0xlPeD13M6M6aGP32gKqiw=
golang.org/x/image v0.0.0-20220617043117-41969df76e82/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=

View File

@ -100,6 +100,9 @@ func (p *program) Init() {
p.gridMat = materials.NewMaterial("grid", "./res/shaders/grid.glsl")
p.handleWindowResize()
fmt.Printf("Beh equivalents: %v\n", string(glyphs.RuneInfos['ب'].EquivalentRunes))
// engine.Quit()
}
func (p *program) Update() {
@ -187,12 +190,11 @@ func (p *program) Render() {
}
textColor := gglm.NewVec4(r, g, b, 1)
// str := " مرحبا كب"
str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك"
str := " مرحبا بك"
// str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك"
// str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ"
p.GlyphRend.DrawTextOpenGLAbs(str, gglm.NewVec3(xOff, float32(p.GlyphRend.Atlas.LineHeight)*5+yOff, 0), textColor)
// strLen := len(str)
// const charsPerFrame = 10_000
// for i := 0; i < charsPerFrame/strLen; i++ {