100 lines
2.9 KiB
Diff
100 lines
2.9 KiB
Diff
|
From d59b98d934815e54320ad000eebfdaaf8fee344d Mon Sep 17 00:00:00 2001
|
||
|
From: Silvan Jegen <s.jegen@gmail.com>
|
||
|
Date: Sat, 10 Apr 2021 13:40:51 +0200
|
||
|
Subject: [PATCH 1/2] vis-menu: try to preserve valid Unicode points
|
||
|
|
||
|
Before we were not taking non-ascii characters into account properly. With
|
||
|
this patch we still mix byte counts and "grapheme cluster" (i.e. complete
|
||
|
glyphs that are rendered in a terminal cell) counts but the code should
|
||
|
be less broken in the more common case now.
|
||
|
---
|
||
|
vis-complete | 2 +-
|
||
|
vis-menu.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
|
||
|
2 files changed, 44 insertions(+), 5 deletions(-)
|
||
|
|
||
|
--- a/vis-complete
|
||
|
+++ b/vis-complete
|
||
|
@@ -29,7 +29,7 @@ while [ $# -gt 0 ]; do
|
||
|
done
|
||
|
|
||
|
if [ $COMPLETE_WORD = 1 ]; then
|
||
|
- tr -cs '[:alnum:]_' '\n' |
|
||
|
+ tr -s '[:blank:]_' '\n' |
|
||
|
grep "^$(basic_regex_quote "$PATTERN")." |
|
||
|
sort -u
|
||
|
else
|
||
|
--- a/vis-menu.c
|
||
|
+++ b/vis-menu.c
|
||
|
@@ -84,12 +84,46 @@ appenditem(Item *item, Item **list, Item
|
||
|
|
||
|
static size_t
|
||
|
textwn(const char *s, int l) {
|
||
|
- int b, c; /* bytes and UTF-8 characters */
|
||
|
+ int c;
|
||
|
|
||
|
- for(b=c=0; s && s[b] && (l<0 || b<l); b++) if((s[b] & 0xc0) != 0x80) c++;
|
||
|
+ for(c=0; s && s[c] && (l<0 || c<l); ) c++;
|
||
|
return c+4; /* Accomodate for the leading and trailing spaces */
|
||
|
}
|
||
|
|
||
|
+/*
|
||
|
+ * textvalidn returns the highest amount of bytes <= l of string s that
|
||
|
+ * only contains valid Unicode points. This is used to make sure we don't
|
||
|
+ * cut off any valid UTF-8-encoded unicode point in case there is not
|
||
|
+ * enough space to render the whole text string.
|
||
|
+*/
|
||
|
+static ssize_t
|
||
|
+textvalidn(const char *s, int l) {
|
||
|
+ int c, utfcharbytes; /* byte count and UTF-8 codepoint length */
|
||
|
+
|
||
|
+ for (c=0; s && s[c] && (l<0 || c<l); ) {
|
||
|
+ utfcharbytes = 0;
|
||
|
+ if ((s[c] & 0x80) == 0) {
|
||
|
+ utfcharbytes = 1;
|
||
|
+ } else if ((s[c] & 0xf0) == 0xf0) {
|
||
|
+ utfcharbytes = 4;
|
||
|
+ } else if ((s[c] & 0xf0) == 0xe0) {
|
||
|
+ utfcharbytes = 3;
|
||
|
+ } else if ((s[c] & 0xe0) == 0xc0) {
|
||
|
+ utfcharbytes = 2;
|
||
|
+ } else {
|
||
|
+ return -1;
|
||
|
+ }
|
||
|
+
|
||
|
+ if ((l>0 && c + utfcharbytes >= l)) {
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ c += utfcharbytes;
|
||
|
+ }
|
||
|
+
|
||
|
+ return c;
|
||
|
+}
|
||
|
+
|
||
|
static size_t
|
||
|
textw(const char *s) {
|
||
|
return textwn(s, -1);
|
||
|
@@ -130,6 +164,7 @@ static void
|
||
|
drawtext(const char *t, size_t w, Color col) {
|
||
|
const char *prestr, *poststr;
|
||
|
size_t i, tw;
|
||
|
+ ssize_t valid;
|
||
|
char *buf;
|
||
|
|
||
|
if (w<5) return; /* This is the minimum size needed to write a label: 1 char + 4 padding spaces */
|
||
|
@@ -148,8 +183,12 @@ drawtext(const char *t, size_t w, Color
|
||
|
memset(buf, ' ', tw);
|
||
|
buf[tw] = '\0';
|
||
|
memcpy(buf, t, MIN(strlen(t), tw));
|
||
|
- if (textw(t) > w) /* Remember textw returns the width WITH padding */
|
||
|
- for (i = MAX((tw-4), 0); i < tw; i++) buf[i] = '.';
|
||
|
+ if (textw(t) > w) {/* Remember textw returns the width WITH padding */
|
||
|
+ valid = textvalidn(t, w-4);
|
||
|
+ if (valid < 0)
|
||
|
+ die("invalid UTF-8 sequence");
|
||
|
+ for (i = MAX(valid, 0); i < tw; i++) buf[i] = '.';
|
||
|
+ }
|
||
|
|
||
|
fprintf(stderr, "%s %s %s", prestr, buf, poststr);
|
||
|
free(buf);
|