edo

Experimental text editor.
Log | Files | Refs | LICENSE

commit c5781e38a6882a6e45858d9296e776a5771ccad5
parent 9687b8002c9f51a29fdc403668f63721ff1c9a56
Author: Claudio Alessi <smoppy@gmail.com>
Date:   Mon,  5 Jan 2026 00:33:09 +0100

Improve UTF-8 handling and term inconsistences.

Improve Regional Indicator code and add partial support for ambiguous
characters. Now a visual indicator is shown when such characters are at
the end of the line with a trailing space.

Cleanups are needed and likely a bit of refactoring.

Diffstat:
Medo.c | 5+++++
Mtui.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------------------
2 files changed, 108 insertions(+), 38 deletions(-)

diff --git a/edo.c b/edo.c @@ -496,6 +496,11 @@ run(void) { Line *l = vcur->buf->lines[vcur->line_idx]; fprintf(stderr, "debug current line (%d):\n", vcur->line_idx); fprintf(stderr, "=== START LINE ===\n"); + + unsigned int cp; + utf8_decode(l->buf, l->len, &cp); + + fprintf(stderr, "cp=%d\n", cp); for(int i = 0; i < l->len; i++) { if(!(i % 10)) fprintf(stderr, "\n"); fprintf(stderr, " 0x%0x", l->buf[i]); diff --git a/tui.c b/tui.c @@ -20,6 +20,10 @@ #define CLEARRIGHT "\33[0K" #define CURHIDE "\33[?25l" #define CURSHOW "\33[?25h" +#define ERASECHAR "\33[1X" + +#define IS_RIS(c) ((c) >= 0x1F1E6 && (c) <= 0x1F1FF) +#define IS_AMBI(c) ((c) >= 0x2100 && (c) <= 0x26FF) typedef struct { char *buf; @@ -135,21 +139,27 @@ tui_text_width(char *s, int len, int x) { w += tabstop - x % tabstop; continue; } - if(compat_mode && cp == 0x200D) { - w += 6; // <200d> - continue; - } - wc = wcwidth(cp); - if(!compat_mode) { + + wc = -1; + if(compat_mode) { + if(cp == 0x200D) { + w += 6; + continue; + } + if(IS_RIS(cp)) wc = 2; + } else { /* force 2 cells width for emoji followed by VS16 */ int nxi = i + step; if(nxi < len) { unsigned int nxcp; utf8_decode(s + nxi, len - nxi, &nxcp); + if(nxcp == 0xFE0F && ((cp >= 0x203C && cp <= 0x3299) || cp >= 0x1F000)) wc = 2; } } + + if(wc == -1) wc = wcwidth(cp); if(wc > 0) w += wc; } return w; @@ -179,11 +189,11 @@ tui_move_cursor(int c, int r) { } void -tui_draw_line(UI *ui, int x, int y, Cell *cells, int count) { - assert(x < ws.ws_col && y < ws.ws_row); +tui_draw_line_compat(UI *ui, int x, int y, Cell *cells, int count) { char *txt; unsigned int cp = 0; int was_emoji = 0; + int was_ambi = 0; int i; tui_move_cursor(x, y); @@ -191,47 +201,102 @@ tui_draw_line(UI *ui, int x, int y, Cell *cells, int count) { x += cells[i].width; txt = cell_get_text(cells + i, ui->pool.data); + int cw = cells[i].width; + /* TODO: temp code for testing, we'll se how to deal with this later */ - if(txt[0] == '\t') + if(txt[0] == '\t') { ab_printf(&frame, "%*s", cells[i].width, " "); - else { - if(compat_mode) { - int o = 0; - while(o < cells[i].len) { - int step = utf8_decode(txt + o, cells[i].len - o, &cp); - - if(cp == 0x200D) { - ab_write(&frame, "<200d>", cells[i].width); - } - else { - if(was_emoji) { - const char t[] = "\x1b[0;48;5;232m"; - ab_write(&frame, t, sizeof t); - } - ab_write(&frame, txt + o, step); - if(was_emoji) { - const char t[] = "\x1b[0m"; - ab_write(&frame, t, sizeof t); + } else { + int o = 0; + + while(o < cells[i].len) { + int step = utf8_decode(txt + o, cells[i].len - o, &cp); + + if(cp == 0x200D) { + ab_write(&frame, "<200d>", cells[i].width); + } else { + int w = wcwidth(cp); + if(w > 0) cw = w; + + if(was_emoji) { + if(was_ambi && i == count - 1) { + const char t2[] = "\x1b[48;5;1m"; + ab_write(&frame, t2, sizeof t2 - 1); + } else { + const char t[] = "\x1b[48;5;232m"; + ab_write(&frame, t, sizeof t - 1); + + /* clear eventual garbage state */ + ab_write(&frame, ERASECHAR, strlen(ERASECHAR)); } } - o += step; + ab_write(&frame, txt + o, step); + + if(was_emoji) { + const char t[] = "\x1b[0m"; + ab_write(&frame, t, sizeof t - 1); + } } + o += step; } - else - ab_write(&frame, txt, cells[i].len); } - /* Instead of check for 0xFE0F (or detect other problematic emojis) just reposition - * the cursor for any multi-byte grapheme cluster. This may be further optimized in - * the future but for now it's a robust way to solve any eventual incongruences. - * It's important to skip this for "glue" char to avoid breaking sequences which - * must always be adiacent (e.g. regional indicator symbols). */ - int is_glue = (cp >= 0x1F1E6 && cp <= 0x1F1FF); - if(cells[i].len > 1 && !is_glue) tui_move_cursor(x, y); + /* pad if needed */ + if(cw < cells[i].width) { + tui_move_cursor(x - cells[i].width + cw, y); + + const char t[] = "\x1b[48;5;233m"; + ab_write(&frame, t, sizeof t - 1); + + while(cw++ < cells[i].width) ab_write(&frame, " ", 1); + + const char t2[] = "\x1b[0m"; + ab_write(&frame, t2, sizeof t2 - 1); + } + + was_emoji = cells[i].len > 1 || IS_RIS(cp); + if(was_emoji) tui_move_cursor(x, y); + was_ambi = IS_AMBI(cp); + } + ab_write(&frame, CLEARRIGHT, strlen(CLEARRIGHT)); +} + + +void +tui_draw_line(UI *ui, int x, int y, Cell *cells, int count) { + assert(x < ws.ws_col && y < ws.ws_row); + + if(compat_mode) { + tui_draw_line_compat(ui, x, y, cells, count); + return; + } + + char *txt; + unsigned int cp; + int was_ambi = 0, i; + + tui_move_cursor(x, y); + for(i = 0; i < count; i++) { + x += cells[i].width; + txt = cell_get_text(cells + i, ui->pool.data); + + /* TODO: temp code for testing, we'll se how to deal with this later */ + if(txt[0] == '\t') + ab_printf(&frame, "%*s", cells[i].width, " "); + else { + utf8_decode(txt, cells[i].len, &cp); + if(was_ambi && i == count - 1) { + const char t[] = "\x1b[48;5;1m"; + ab_write(&frame, t, sizeof t - 1); + } - if(compat_mode) was_emoji = cells[i].len > 1 && !is_glue; + ab_write(&frame, txt, cells[i].len); + const char t[] = "\x1b[0m"; + ab_write(&frame, t, sizeof t - 1); + } + was_ambi = IS_AMBI(cp); } ab_write(&frame, CLEARRIGHT, strlen(CLEARRIGHT)); }