vis-master/946-non-ASCII-completion.patch

100 lines
2.9 KiB
Diff

From d59b98d934815e54320ad000eebfdaaf8fee344d Mon Sep 17 00:00:00 2001
From: Silvan Jegen <s.jegen@gmail.com>
Date: Sat, 10 Apr 2021 13:40:51 +0200
Subject: [PATCH 1/2] vis-menu: try to preserve valid Unicode points
Before we were not taking non-ascii characters into account properly. With
this patch we still mix byte counts and "grapheme cluster" (i.e. complete
glyphs that are rendered in a terminal cell) counts but the code should
be less broken in the more common case now.
---
vis-complete | 2 +-
vis-menu.c | 47 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 44 insertions(+), 5 deletions(-)
--- a/vis-complete
+++ b/vis-complete
@@ -29,7 +29,7 @@ while [ $# -gt 0 ]; do
done
if [ $COMPLETE_WORD = 1 ]; then
- tr -cs '[:alnum:]_' '\n' |
+ tr -s '[:blank:]_' '\n' |
grep "^$(basic_regex_quote "$PATTERN")." |
sort -u
else
--- a/vis-menu.c
+++ b/vis-menu.c
@@ -84,12 +84,46 @@ appenditem(Item *item, Item **list, Item
static size_t
textwn(const char *s, int l) {
- int b, c; /* bytes and UTF-8 characters */
+ int c;
- for(b=c=0; s && s[b] && (l<0 || b<l); b++) if((s[b] & 0xc0) != 0x80) c++;
+ for(c=0; s && s[c] && (l<0 || c<l); ) c++;
return c+4; /* Accomodate for the leading and trailing spaces */
}
+/*
+ * textvalidn returns the highest amount of bytes <= l of string s that
+ * only contains valid Unicode points. This is used to make sure we don't
+ * cut off any valid UTF-8-encoded unicode point in case there is not
+ * enough space to render the whole text string.
+*/
+static ssize_t
+textvalidn(const char *s, int l) {
+ int c, utfcharbytes; /* byte count and UTF-8 codepoint length */
+
+ for (c=0; s && s[c] && (l<0 || c<l); ) {
+ utfcharbytes = 0;
+ if ((s[c] & 0x80) == 0) {
+ utfcharbytes = 1;
+ } else if ((s[c] & 0xf0) == 0xf0) {
+ utfcharbytes = 4;
+ } else if ((s[c] & 0xf0) == 0xe0) {
+ utfcharbytes = 3;
+ } else if ((s[c] & 0xe0) == 0xc0) {
+ utfcharbytes = 2;
+ } else {
+ return -1;
+ }
+
+ if ((l>0 && c + utfcharbytes >= l)) {
+ break;
+ }
+
+ c += utfcharbytes;
+ }
+
+ return c;
+}
+
static size_t
textw(const char *s) {
return textwn(s, -1);
@@ -130,6 +164,7 @@ static void
drawtext(const char *t, size_t w, Color col) {
const char *prestr, *poststr;
size_t i, tw;
+ ssize_t valid;
char *buf;
if (w<5) return; /* This is the minimum size needed to write a label: 1 char + 4 padding spaces */
@@ -148,8 +183,12 @@ drawtext(const char *t, size_t w, Color
memset(buf, ' ', tw);
buf[tw] = '\0';
memcpy(buf, t, MIN(strlen(t), tw));
- if (textw(t) > w) /* Remember textw returns the width WITH padding */
- for (i = MAX((tw-4), 0); i < tw; i++) buf[i] = '.';
+ if (textw(t) > w) {/* Remember textw returns the width WITH padding */
+ valid = textvalidn(t, w-4);
+ if (valid < 0)
+ die("invalid UTF-8 sequence");
+ for (i = MAX(valid, 0); i < tw; i++) buf[i] = '.';
+ }
fprintf(stderr, "%s %s %s", prestr, buf, poststr);
free(buf);