diff --git a/glyphs/glyphs.go b/glyphs/glyphs.go index ce6d662..0a06284 100755 --- a/glyphs/glyphs.go +++ b/glyphs/glyphs.go @@ -21,6 +21,10 @@ const ( invalidRune = unicode.ReplacementChar ) +var ( + RuneInfos map[rune]RuneInfo +) + type GlyphRend struct { Atlas *FontAtlas AtlasTex *assets.Texture @@ -144,62 +148,44 @@ func (gr *GlyphRend) glyphFromRunes(curr, prev, next rune) *FontAtlasGlyph { PosCtx_start PosCtx = iota PosCtx_mid PosCtx_end + PosCtx_isolated ) - if prev == invalidRune && next == invalidRune { + prevIsLetter := unicode.IsLetter(prev) + nextIsLetter := unicode.IsLetter(next) + + //Isolated case + if !prevIsLetter && !nextIsLetter { g := gr.Atlas.Glyphs[curr] return &g } ctx := PosCtx_mid - if prev == invalidRune { + if prevIsLetter && nextIsLetter { + ctx = PosCtx_mid + } else if nextIsLetter { ctx = PosCtx_start - } else if next == invalidRune { + } else { ctx = PosCtx_end } switch ctx { case PosCtx_start: - mappings := runeInfos[curr].DecompMappings - for mappedRune := range mappings { + equivRunes := RuneInfos[curr].EquivalentRunes + for i := 0; i < len(equivRunes); i++ { - mri := runeInfos[mappedRune] - if mri.IsLigature || mri.DecompTag != CharDecompMap_initial { - continue + otherRune := equivRunes[i] + otherRuneInfo := RuneInfos[otherRune] + if otherRuneInfo.DecompTag == DecompTags_initial { + curr = otherRune + break } - - curr = mappedRune - break } case PosCtx_mid: - mappings := runeInfos[curr].DecompMappings - for mappedRune := range mappings { - - mri := runeInfos[mappedRune] - if mri.IsLigature || mri.DecompTag != CharDecompMap_medial { - continue - } - - curr = mappedRune - break - } - case PosCtx_end: - - mappings := runeInfos[curr].DecompMappings - for mappedRune := range mappings { - - mri := runeInfos[mappedRune] - if mri.IsLigature || mri.DecompTag != CharDecompMap_final { - continue - } - - curr = mappedRune - break - } } g := gr.Atlas.Glyphs[curr] @@ -389,13 +375,14 @@ func NewGlyphRend(fontFile string, fontOptions *truetype.Options, screenWidth, s gr.SetScreenSize(screenWidth, screenHeight) - //TODO: Move this - runeInfos, _ = loadUnicodeData("./unicode-data.txt") + RuneInfos, err = ParseUnicodeData("./unicode-data.txt") + if err != nil { + return nil, err + } + return gr, nil } -var runeInfos map[rune]runeInfo - func roundF32(x float32) float32 { return float32(math.Round(float64(x))) } diff --git a/glyphs/unicode.go b/glyphs/unicode.go index 3b4a27b..6b06674 100755 --- a/glyphs/unicode.go +++ b/glyphs/unicode.go @@ -77,85 +77,94 @@ const ( BidiCategory_ON // Other Neutrals ) -type CharDecompMapTag uint8 +type DecompTag uint8 const ( - CharDecompMap_font CharDecompMapTag = iota // A font variant (e.g. a blackletter form). - CharDecompMap_noBreak // A no-break version of a space or hyphen. - CharDecompMap_initial // An initial presentation form (Arabic). - CharDecompMap_medial // A medial presentation form (Arabic). - CharDecompMap_final // A final presentation form (Arabic). - CharDecompMap_isolated // An isolated presentation form (Arabic). - CharDecompMap_circle // An encircled form. - CharDecompMap_super // A superscript form. - CharDecompMap_sub // A subscript form. - CharDecompMap_vertical // A vertical layout presentation form. - CharDecompMap_wide // A wide (or zenkaku) compatibility character. - CharDecompMap_narrow // A narrow (or hankaku) compatibility character. - CharDecompMap_small // A small variant form (CNS compatibility). - CharDecompMap_square // A CJK squared font variant. - CharDecompMap_fraction // A vulgar fraction form. - CharDecompMap_compat // Otherwise unspecified compatibility character. - CharDecompMap_NONE // Not decomposition mapping tag, which indicates canonical form. + DecompTag_font DecompTag = iota // A font variant (e.g. a blackletter form). + DecompTag_noBreak // A no-break version of a space or hyphen. + DecompTags_initial // An initial presentation form (Arabic). + DecompTag_medial // A medial presentation form (Arabic). + DecompTag_final // A final presentation form (Arabic). + DecompTag_isolated // An isolated presentation form (Arabic). + DecompTag_circle // An encircled form. + DecompTag_super // A superscript form. + DecompTag_sub // A subscript form. + DecompTag_vertical // A vertical layout presentation form. + DecompTag_wide // A wide (or zenkaku) compatibility character. + DecompTag_narrow // A narrow (or hankaku) compatibility character. + DecompTag_small // A small variant form (CNS compatibility). + DecompTag_square // A CJK squared font variant. + DecompTag_fraction // A vulgar fraction form. + DecompTag_compat // Otherwise unspecified compatibility character. + DecompTag_NONE // Not decomposition mapping tag, which indicates canonical form. ) -func (cd CharDecompMapTag) String() string { +func (cd DecompTag) String() string { switch cd { - case CharDecompMap_font: + case DecompTag_font: return "font" - case CharDecompMap_noBreak: + case DecompTag_noBreak: return "noBreak" - case CharDecompMap_initial: + case DecompTags_initial: return "initial" - case CharDecompMap_medial: + case DecompTag_medial: return "medial" - case CharDecompMap_final: + case DecompTag_final: return "final" - case CharDecompMap_isolated: + case DecompTag_isolated: return "isolated" - case CharDecompMap_circle: + case DecompTag_circle: return "circle" - case CharDecompMap_super: + case DecompTag_super: return "super" - case CharDecompMap_sub: + case DecompTag_sub: return "sub" - case CharDecompMap_vertical: + case DecompTag_vertical: return "vertical" - case CharDecompMap_wide: + case DecompTag_wide: return "wide" - case CharDecompMap_narrow: + case DecompTag_narrow: return "narrow" - case CharDecompMap_small: + case DecompTag_small: return "small" - case CharDecompMap_square: + case DecompTag_square: return "square" - case CharDecompMap_fraction: + case DecompTag_fraction: return "fraction" - case CharDecompMap_compat: + case DecompTag_compat: return "compat" - case CharDecompMap_NONE: + case DecompTag_NONE: return "NONE" default: panic(fmt.Sprint("unknown CharDecompMapTag value:", uint8(cd))) } } -type runeInfo struct { - Name string - Cat Category - BidiCat BidiCategory +type RuneInfo struct { + Name string + Cat Category + BidiCat BidiCategory + DecompTag DecompTag - DecompTag CharDecompMapTag - IsLigature bool - DecompMappings map[rune]struct{} + IsLigature bool + + //Decomp is the ordered set of runes this rune decomposes into + //as defined by unicodeData.txt + Decomp []rune + + //EquivalentRunes are runes that are canonically or compatiability equivalent to this rune + EquivalentRunes []rune } -//loadUnicodeData decodes a 'UnicodeData' file according -//to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html +//ParseUnicodeData decodes a 'UnicodeData' file according +//to http://www.unicode.org/Public/3.0-Update/UnicodeData-3.0.0.html and returns a map containing information +//on all runes within the passed ranges. // -//The latest file is at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt -func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) { +//If no ranges are passed then the full unicode data file will be decoded +// +//The latest file can be found at https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt +func ParseUnicodeData(unicodeFile string, rangesToLoad ...*unicode.RangeTable) (map[rune]RuneInfo, error) { type field int const ( @@ -181,27 +190,30 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) { return nil, err } - ris := make(map[rune]runeInfo) + ris := make(map[rune]RuneInfo) lines := strings.Split(string(fBytes), "\n") for _, l := range lines { fields := strings.SplitN(l, ";", 15) r := runeFromHexCodeString(fields[field_codeValue]) - if !unicode.Is(unicode.Arabic, r) { + if rangesToLoad != nil && !unicode.In(r, rangesToLoad...) { continue } ri := ris[r] - ri = runeInfo{ + ri = RuneInfo{ Name: fields[field_charName], Cat: categoryStringToCategory(fields[field_generalCategory]), BidiCat: bidiCategoryStringToBidiCategory(fields[field_bidiCategory]), - DecompTag: CharDecompMap_NONE, + DecompTag: DecompTag_NONE, + + //NOTE: This is not perfect (NamesList.txt notes some additional ligatures), but good enough :) + IsLigature: strings.Contains(fields[field_charName], "LIGATURE"), } - //This field might already be set by another char mapping to us - if ri.DecompMappings == nil { - ri.DecompMappings = make(map[rune]struct{}) + //This might already be created for us by a previous ruen + if ri.EquivalentRunes == nil { + ri.EquivalentRunes = []rune{} } if len(fields[field_charDecomp]) > 0 { @@ -209,38 +221,38 @@ func loadUnicodeData(unicodeFile string) (map[rune]runeInfo, error) { fieldItems := strings.Split(fields[field_charDecomp], " ") if fieldItems[0][0] == '<' { ri.DecompTag = charDecompMapStringToCharDecompMap(fieldItems[0]) + fieldItems = fieldItems[1:] } - //We consider a unicode codepoint that decomposes to more than one char a ligature - ri.IsLigature = len(fieldItems) >= 3 - for i := 1; i < len(fieldItems); i++ { + //One character decomposition indicates equivalence + if len(fieldItems) == 1 { - mappedRune := runeFromHexCodeString(fieldItems[i]) - ri.DecompMappings[mappedRune] = struct{}{} + decompRune := runeFromHexCodeString(fieldItems[0]) + ri.Decomp = []rune{decompRune} + ri.EquivalentRunes = append(ri.EquivalentRunes, decompRune) - //Add this rune as a map of the other rune as well - otherRi, ok := ris[mappedRune] - if !ok { - otherRi.DecompMappings = make(map[rune]struct{}) + //Add this rune as equivalent to decomposed rune + decompRuneInfo := ris[decompRune] + if decompRuneInfo.EquivalentRunes == nil { + decompRuneInfo.EquivalentRunes = []rune{r} + } else { + decompRuneInfo.EquivalentRunes = append(decompRuneInfo.EquivalentRunes, r) + } + + ris[decompRune] = decompRuneInfo + + } else { + + ri.Decomp = make([]rune, len(fieldItems)) + for i := 0; i < len(fieldItems); i++ { + ri.Decomp[i] = runeFromHexCodeString(fieldItems[i]) } - otherRi.DecompMappings[r] = struct{}{} - ris[mappedRune] = otherRi } } ris[r] = ri } - // meemRi := ris['م'] - // for mappedRune := range meemRi.DecompMappings { - - // mappedRuneInfo := ris[mappedRune] - // if mappedRuneInfo.IsLigature { - // continue - // } - // fmt.Printf("Meem mapping: %c. code=%x. Type=%s\n", mappedRune, mappedRune, mappedRuneInfo.DecompTag.String()) - // } - return ris, nil } @@ -248,7 +260,7 @@ func runeFromHexCodeString(c string) rune { codepointU64, err := strconv.ParseUint(c, 16, 32) if err != nil { - return invalidRune + panic("Invalid rune: " + c) } return rune(codepointU64) @@ -382,43 +394,43 @@ func bidiCategoryStringToBidiCategory(c string) BidiCategory { } } -func charDecompMapStringToCharDecompMap(c string) CharDecompMapTag { +func charDecompMapStringToCharDecompMap(c string) DecompTag { switch c { case "": - return CharDecompMap_font + return DecompTag_font case "": - return CharDecompMap_noBreak + return DecompTag_noBreak case "": - return CharDecompMap_initial + return DecompTags_initial case "": - return CharDecompMap_medial + return DecompTag_medial case "": - return CharDecompMap_final + return DecompTag_final case "": - return CharDecompMap_isolated + return DecompTag_isolated case "": - return CharDecompMap_circle + return DecompTag_circle case "": - return CharDecompMap_super + return DecompTag_super case "": - return CharDecompMap_sub + return DecompTag_sub case "": - return CharDecompMap_vertical + return DecompTag_vertical case "": - return CharDecompMap_wide + return DecompTag_wide case "": - return CharDecompMap_narrow + return DecompTag_narrow case "": - return CharDecompMap_small + return DecompTag_small case "": - return CharDecompMap_square + return DecompTag_square case "": - return CharDecompMap_fraction + return DecompTag_fraction case "": - return CharDecompMap_compat + return DecompTag_compat case "": - return CharDecompMap_NONE + return DecompTag_NONE default: panic("unknown charDecomMap string: " + c) } diff --git a/go.mod b/go.mod index a1ee36f..e21b628 100755 --- a/go.mod +++ b/go.mod @@ -14,4 +14,5 @@ require ( require ( github.com/bloeys/assimp-go v0.4.2 // indirect github.com/inkyblackness/imgui-go/v4 v4.3.0 // indirect + golang.org/x/text v0.3.7 // indirect ) diff --git a/go.sum b/go.sum index 2570ef8..48de0ca 100755 --- a/go.sum +++ b/go.sum @@ -23,5 +23,6 @@ github.com/veandco/go-sdl2 v0.4.10 h1:8QoD2bhWl7SbQDflIAUYWfl9Vq+mT8/boJFAUzAScg github.com/veandco/go-sdl2 v0.4.10/go.mod h1:OROqMhHD43nT4/i9crJukyVecjPNYYuCofep6SNiAjY= golang.org/x/image v0.0.0-20220617043117-41969df76e82 h1:KpZB5pUSBvrHltNEdK/tw0xlPeD13M6M6aGP32gKqiw= golang.org/x/image v0.0.0-20220617043117-41969df76e82/go.mod h1:doUCurBvlfPMKfmIpRIywoHmhN3VyhnoFDbvIEWF4hY= +golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/main.go b/main.go index 8eb2d2d..5b00055 100755 --- a/main.go +++ b/main.go @@ -100,6 +100,9 @@ func (p *program) Init() { p.gridMat = materials.NewMaterial("grid", "./res/shaders/grid.glsl") p.handleWindowResize() + + fmt.Printf("Beh equivalents: %v\n", string(glyphs.RuneInfos['ب'].EquivalentRunes)) + // engine.Quit() } func (p *program) Update() { @@ -187,12 +190,11 @@ func (p *program) Render() { } textColor := gglm.NewVec4(r, g, b, 1) - // str := " مرحبا كب" - str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك" + str := " مرحبا بك" + // str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ\nمرحبا بك" // str := " ijojo\n\n Hello there, friend|. pq?\n ABCDEFG\tHIJKLMNOPQRSTUVWXYZ" p.GlyphRend.DrawTextOpenGLAbs(str, gglm.NewVec3(xOff, float32(p.GlyphRend.Atlas.LineHeight)*5+yOff, 0), textColor) - // strLen := len(str) // const charsPerFrame = 10_000 // for i := 0; i < charsPerFrame/strLen; i++ {