merge m17n patch

add libwc
author: Fumitoshi UKAI <ukai@debian.or.jp> 2003-09-22 21:02:15 +0000
committer: Fumitoshi UKAI <ukai@debian.or.jp> 2003-09-22 21:02:15 +0000
commit: 604c11affe988bab23c87598c02248fff1d73f43 (patch)
tree: 6252cbbfd3cf703691a8ddbf1fdee5c1246b5faa /entity.c
parent: * version.c.in: cvs version (diff)
download: w3m-604c11affe988bab23c87598c02248fff1d73f43.tar.gz
w3m-604c11affe988bab23c87598c02248fff1d73f43.zip
1 files changed, 48 insertions, 115 deletions
diff --git a/entity.c b/entity.c
index 58e33ec..d6db6c9 100644
--- a/entity.c
+++ b/entity.c
@@ -1,133 +1,66 @@
-/* $Id: entity.c,v 1.4 2003/04/07 16:27:10 ukai Exp $ */
-#include "fm.h"
-#include <stdio.h>
-#include "indep.h"
+/* $Id: entity.c,v 1.5 2003/09/22 21:02:18 ukai Exp $ */
+#ifdef DUMMY
 #include "Str.h"
-#include <gc.h>
-
-typedef struct {
-    short ucs;
-    char *ptr;
-} entity_map;
-
-#ifdef JP_CHARSET
-#include "ucs_eucjp.h"
-
-static char *latin1_eucjp_map[96] = {
-    NBSP, "!", "Ўс", "Ўт", "CUR", "Ўп", "|", "Ўш",	/* 32- 39 */
-    "ЎЇ", "(C)", "-a", "ўг", "ўМ", "-", "(R)", "Ў±",	/* 40- 47 */
-    "Ўл", "ЎЮ", "^2", "^3", "'", "¦М", "ўщ", "Ў¦",	/* 48- 55 */
-    ",", "^1", "-o", "ўд", "1/4", "1/2", "3/4", "?",	/* 56- 63 */
-    "A`", "A'", "A^", "A~", "A:", "ўт", "AE", "C,",	/* 64- 71 */
-    "E`", "E'", "E^", "E", "I`", "I'", "I^", "I:",	/* 72- 79 */
-    "D-", "N~", "O`", "O'", "O^", "O~", "Oe", "ЎЯ",	/* 80- 87 */
-    "¦Х", "U`", "U'", "U^", "U:", "Y'", "th", "ss",	/* 88- 95 */
-    "a`", "a'", "a^", "a~", "a:", "a", "ae", "c",	/* 96-103 */
-    "e`", "e'", "e^", "e:", "i`", "i'", "i^", "i:",	/* 104-111 */
-    "d-", "n~", "o`", "o'", "o^", "o~", "oe", "Ўа",	/* 112-119 */
-    "¦Х", "u`", "u'", "u^", "u:", "y'", "th", "y:"	/* 120-127 */
-};
-
-#else
-#ifdef __EMX__
-/*
- * Character conversion table
- * ( to code page 850 from iso-8859-1 )
- *
- * Following character constants are in code page 850.
- */
-static char *latin1_cp850_map[96] = {
-    NBSP, "\255", "\275", "\234", "\317", "\276", "\335", "\365",
-    "\371", "\270", "\246", "\256", "\252", "\360", "\251", "\356",
-    "\370", "\361", "\375", "\374", "\357", "\346", "\364", "\372",
-    "\367", "\373", "\247", "\257", "\254", "\253", "\363", "\250",
-    "\267", "\265", "\266", "\307", "\216", "\217", "\222", "\200",
-    "\324", "\220", "\322", "\323", "\336", "\326", "\327", "\330",
-    "\321", "\245", "\343", "\340", "\342", "\345", "\231", "\236",
-    "\235", "\353", "\351", "\352", "\232", "\355", "\350", "\341",
-    "\205", "\240", "\203", "\306", "\204", "\206", "\221", "\207",
-    "\212", "\202", "\210", "\211", "\215", "\241", "\214", "\213",
-    "\320", "\244", "\225", "\242", "\223", "\344", "\224", "\366",
-    "\233", "\227", "\243", "\226", "\201", "\354", "\347", "\230"
-};
+#define NBSP " "
+#define UseAltEntity 1
+#undef USE_M17N
+#else /* DUMMY */
+#include "fm.h"
+#ifdef USE_M17N
+#ifdef USE_UNICODE
+#include "ucs.h"
+#include "utf8.h"
 #endif
 #endif
-#include "ucs_latin1.h"
+#endif /* DUMMY */
 
-static char *latin1_ascii_map[96] = {
-    NBSP, "!", "-c-", "-L-", "CUR", "=Y=", "|", "S:",	/* 32- 39 */
-    "\"", "(C)", "-a", "<<", "NOT", "-", "(R)", "Ў±",	/* 40- 47 */
-    "DEG", "+-", "^2", "^3", "'", "u", "P:", ".",	/* 48- 55 */
-    ",", "^1", "-o", ">>", "1/4", "1/2", "3/4", "?",	/* 56- 63 */
-    "A`", "A'", "A^", "A~", "A:", "AA", "AE", "C,",	/* 64- 71 */
-    "E`", "E'", "E^", "E", "I`", "I'", "I^", "I:",	/* 72- 79 */
-    "D-", "N~", "O`", "O'", "O^", "O~", "Oe", "x",	/* 80- 87 */
-    "O/", "U`", "U'", "U^", "U:", "Y'", "th", "ss",	/* 88- 95 */
-    "a`", "a'", "a^", "a~", "a:", "a", "ae", "c",	/* 96-103 */
-    "e`", "e'", "e^", "e:", "i`", "i'", "i^", "i:",	/* 104-111 */
-    "d-", "n~", "o`", "o'", "o^", "o~", "oe", "-:",	/* 112-119 */
-    "o/", "u`", "u'", "u^", "u:", "y'", "th", "y:"	/* 120-127 */
+/* *INDENT-OFF* */
+static char *alt_latin1[ 96 ] = {
+    NBSP,  "!",   "-c-", "-L-", "CUR", "=Y=",  "|",  "S:",
+    "\"",  "(C)", "-a",  "<<",  "NOT", "-",   "(R)", "-",
+    "DEG", "+-",  "^2",  "^3",   "'",  "u",   "P:",  ".",
+    ",",   "^1",  "-o",  ">>",  "1/4", "1/2", "3/4", "?", 
+    "A`",  "A'",  "A^",  "A~",  "A:",  "AA",  "AE",  "C,",
+    "E`",  "E'",  "E^",  "E:",  "I`",  "I'",  "I^",  "I:",
+    "D-",  "N~",  "O`",  "O'",  "O^",  "O~",  "O:",  "x",
+    "O/",  "U`",  "U'",  "U^",  "U:",  "Y'",  "TH",  "ss",
+    "a`",  "a'",  "a^",  "a~",  "a:",  "aa",  "ae",  "c,", 
+    "e`",  "e'",  "e^",  "e:",  "i`",  "i'",  "i^",  "i:",
+    "d-",  "n~",  "o`",  "o'",  "o^",  "o~",  "o:",  "-:",
+    "o/",  "u`",  "u'",  "u^",  "u:",  "y'",  "th",  "y:"
 };
-
-char UseAltEntity = FALSE;
-
-static int
-map_cmp(const void *a, const void *b)
-{
-    return *(int *)a - ((entity_map *) b)->ucs;
-}
-
-static char *
-map_search(int c, entity_map * map, size_t n)
-{
-    entity_map *m;
-
-    m = (entity_map *) bsearch((void *)&c, (void *)map, n,
-			       sizeof(entity_map), map_cmp);
-    return m ? m->ptr : NULL;
-}
+/* *INDENT-ON* */
 
 char *
-conv_entity(int c)
+conv_entity(unsigned int c)
 {
-    static char buf[] = { 0, 0 };
-    char *p;
+    char b = c & 0xff;
 
-    if (c < 0)			/* error */
-	return "?";
-    if (c < 0x80) {		/* US-ASCII */
-	buf[0] = (char)c;
-	return buf;
-    }
-    if (c < 0xa0)		/* C1 */
-	return "?";
-    if (c == 0xa0)		/* NBSP */
+    if (c < 0x20)		/* C0 */
+	return " ";
+    if (c < 0x7f)		/* ASCII */
+	return Strnew_charp_n(&b, 1)->ptr;
+    if (c < 0xa0)		/* DEL, C1 */
+	return " ";
+    if (c == 0xa0)
 	return NBSP;
-    if (c < 0x100) {		/* Latin 1 (ISO-8859-1) */
+    if (c < 0x100) {		/* Latin1 (ISO 8859-1) */
 	if (UseAltEntity)
-	    return latin1_ascii_map[c - 0xa0];
-#ifdef JP_CHARSET
-	return latin1_eucjp_map[c - 0xa0];
+	    return alt_latin1[c - 0xa0];
+#ifdef USE_M17N
+	return wc_conv_n(&b, 1, WC_CES_ISO_8859_1, InnerCharset)->ptr;
 #else
-#ifdef __EMX__
-	if (CodePage == 850)
-	    return latin1_cp850_map[c - 0xa0];
-#endif
-	buf[0] = (char)c;
-	return buf;
+	return Strnew_charp_n(&b, 1)->ptr;
 #endif
     }
-    /* Unicode */
-#ifdef JP_CHARSET
-    if (!UseAltEntity) {
-	p = map_search(c, ucs_eucjp_map,
-		       sizeof(ucs_eucjp_map) / sizeof(entity_map));
-	return p ? p : "?";
+#ifdef USE_M17N
+#ifdef USE_UNICODE
+    if (c <= WC_C_UCS4_END) {	/* Unicode */
+	wc_uchar utf8[7];
+	wc_ucs_to_utf8(c, utf8);
+	return wc_conv((char *)utf8, WC_CES_UTF_8, InnerCharset)->ptr;
     }
 #endif
-    p = map_search(c, ucs_latin1_map,
-		   sizeof(ucs_latin1_map) / sizeof(entity_map));
-    if (p && *p & 0x80)		/* ISO-8859-1 */
-	return conv_entity((int)(*p & 0xff));
+#endif
     return p ? p : "?";
 }
author	Fumitoshi UKAI <ukai@debian.or.jp>	2003-09-22 21:02:15 +0000
committer	Fumitoshi UKAI <ukai@debian.or.jp>	2003-09-22 21:02:15 +0000
commit	604c11affe988bab23c87598c02248fff1d73f43 (patch)
tree	6252cbbfd3cf703691a8ddbf1fdee5c1246b5faa /entity.c
parent	* version.c.in: cvs version (diff)
download	w3m-604c11affe988bab23c87598c02248fff1d73f43.tar.gz w3m-604c11affe988bab23c87598c02248fff1d73f43.zip