diff options
author | Ambrose Li <ambrose.li@gmail.com> | 2020-08-24 02:20:43 +0000 |
---|---|---|
committer | Ambrose Li <ambrose.li@gmail.com> | 2020-08-24 02:20:43 +0000 |
commit | 9f18e67a9bb5ff0387f76e5a2870b49558f868ad (patch) | |
tree | feb45c9c4e518c0ab784b82b25b6ea85158e0455 /entity.c | |
parent | Update ChangeLog (diff) | |
download | w3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.tar.gz w3m-9f18e67a9bb5ff0387f76e5a2870b49558f868ad.zip |
Cleaned version of 20200823_q branch. Changes the behaviour of the q tag (when m17n and Unicode are configured) to use "smart" quotes if the display charset can handle them. Falls back to old behaviour (ASCII quotes with left/right quote semantics for 6/0 and 2/6) if display charset is us-ascii.
Also changes the behaviour of conv_entity() to convert left/right quotes and some dashes because named entities are needed for the new code for the q tag.
Diffstat (limited to 'entity.c')
-rw-r--r-- | entity.c | 14 |
1 files changed, 13 insertions, 1 deletions
@@ -58,11 +58,23 @@ conv_entity(unsigned int c) #ifdef USE_M17N #ifdef USE_UNICODE if (c <= WC_C_UCS4_END) { /* Unicode */ + char *chk; wc_uchar utf8[7]; wc_ucs_to_utf8(c, utf8); - return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; + /* we eventually need to display it so check DisplayCharset */ + chk = wc_conv((char *)utf8, WC_CES_UTF_8, DisplayCharset ? DisplayCharset : WC_CES_US_ASCII)->ptr; + if (strcmp(chk, "?") != 0) + return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr; } #endif #endif + if (c == 0x201c || c == 0x201f || c == 0x201d || c == 0x2033) + return "\""; + if (c == 0x2018 || c == 0x201b || c == 0x2019 || c == 0x2032) + return "'"; + if (c >= 0x2010 && c < 0x2014) + return "-"; + if (c == 0x2014) + return "--"; return "?"; } |