From 1d0ba25a660483da1272a31dd077ed94441e3d9f Mon Sep 17 00:00:00 2001 From: Tatsuya Kinoshita Date: Sat, 2 Jan 2021 09:20:37 +0900 Subject: New upstream version 0.5.3+git20210102 --- entity.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'entity.c') diff --git a/entity.c b/entity.c index fdd8f64..67b8cfb 100644 --- a/entity.c +++ b/entity.c @@ -44,6 +44,8 @@ conv_entity(unsigned int c) return " "; if (c == 0xa0) return NBSP; + if (c == 0xad) /* SOFT HYPHEN */ + return ""; if (c < 0x100) { /* Latin1 (ISO 8859-1) */ if (UseAltEntity) return alt_latin1[c - 0xa0]; @@ -56,11 +58,23 @@ conv_entity(unsigned int c) #ifdef USE_M17N #ifdef USE_UNICODE if (c <= WC_C_UCS4_END) { /* Unicode */ + char *chk; wc_uchar utf8[7]; wc_ucs_to_utf8(c, utf8); - return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; + /* we eventually need to display it so check DisplayCharset */ + chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr; + if (strcmp(chk, "?") != 0) + return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; } #endif #endif + if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033) + return "\""; + if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032) + return "'"; + if (c >= 0x2010 && c < 0x2014) + return "-"; + if (c == 0x2014) + return "--"; return "?"; } -- cgit v1.2.3