aboutsummaryrefslogtreecommitdiffstats
path: root/entity.c
diff options
context:
space:
mode:
authorAmbrose Li <ambrose.li@gmail.com>2020-08-24 02:20:43 +0000
committerAmbrose Li <ambrose.li@gmail.com>2020-08-24 02:20:43 +0000
commit9f18e67a9bb5ff0387f76e5a2870b49558f868ad (patch)
treefeb45c9c4e518c0ab784b82b25b6ea85158e0455 /entity.c
parentUpdate ChangeLog (diff)
downloadw3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.tar.gz
w3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.zip
Cleaned version of 20200823_q branch. Changes the behaviour of the q tag (when m17n and Unicode are configured) to use "smart" quotes if the display charset can handle them. Falls back to old behaviour (ASCII quotes with left/right quote semantics for 6/0 and 2/6) if display charset is us-ascii.
Also changes the behaviour of conv_entity() to convert left/right quotes and some dashes because named entities are needed for the new code for the q tag.
Diffstat (limited to 'entity.c')
-rw-r--r--entity.c14
1 files changed, 13 insertions, 1 deletions
diff --git a/entity.c b/entity.c
index 45dc95e..67b8cfb 100644
--- a/entity.c
+++ b/entity.c
@@ -58,11 +58,23 @@ conv_entity(unsigned int c)
#ifdef USE_M17N
#ifdef USE_UNICODE
if (c <= WC_C_UCS4_END) { /* Unicode */
+ char *chk;
wc_uchar utf8[7];
wc_ucs_to_utf8(c, utf8);
- return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
+ /* we eventually need to display it so check DisplayCharset */
+ chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr;
+ if (strcmp(chk, "?") != 0)
+ return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
}
#endif
#endif
+ if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033)
+ return "\"";
+ if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032)
+ return "'";
+ if (c >= 0x2010 && c < 0x2014)
+ return "-";
+ if (c == 0x2014)
+ return "--";
return "?";
}