From 919adb4b57977d5e375dab0fa943b6e81fa145ab Mon Sep 17 00:00:00 2001 From: Ito Hiroyuki Date: Tue, 24 Aug 2010 10:11:51 +0000 Subject: [w3m-dev 04393] [patch] locale-related character management --- libwc/map/mk_ucs_case_map.pl | 48 + libwc/map/mk_ucs_isdigit_map.pl | 56 + libwc/map/ucs_case.map | 2600 +++++++++++++++++++++++++++++++++++++++ libwc/map/ucs_isalpha.map | 469 +++++++ libwc/map/ucs_isdigit.map | 30 + libwc/map/ucs_islower.map | 471 +++++++ libwc/map/ucs_isupper.map | 455 +++++++ libwc/ucs.c | 75 ++ libwc/ucs.h | 8 + 9 files changed, 4212 insertions(+) create mode 100644 libwc/map/mk_ucs_case_map.pl create mode 100644 libwc/map/mk_ucs_isdigit_map.pl create mode 100644 libwc/map/ucs_case.map create mode 100644 libwc/map/ucs_isalpha.map create mode 100644 libwc/map/ucs_isdigit.map create mode 100644 libwc/map/ucs_islower.map create mode 100644 libwc/map/ucs_isupper.map (limited to 'libwc') diff --git a/libwc/map/mk_ucs_case_map.pl b/libwc/map/mk_ucs_case_map.pl new file mode 100644 index 0000000..ee48826 --- /dev/null +++ b/libwc/map/mk_ucs_case_map.pl @@ -0,0 +1,48 @@ + +open(MAP, "> ucs_case.map"); +print MAP <) { + chop; + ($name, $col) = split; + + @cp = (); + + open(UCD, "< private/UnicodeData-4.1.0.txt"); + while() { + chop; + @entry = split(';'); + last if $entry[0] =~ m/.{5,}/; + if ($entry[$col] ne '') { + push (@cp, $entry[0]); + $map{$entry[0]} = $entry[$col]; + } + } + close UCD; + + $nocp = @cp; + + print MAP < ucs_isdigit.map"); +print MAP <) { + chop; + ($name, $class) = split; + + @cp = (); + + open(UCD, "< private/UnicodeData-4.1.0.txt"); + while() { + chop; + @entry = split(';'); + last if $entry[0] =~ m/.{5,}/; + if ($entry[2] eq $class) { + push (@cp, $entry[0]); + } + } + close UCD; + + @bs = (); + $last = -1; + $seq = -1; + for my $e (@cp) { + if (++$last != hex $e) { + $seq = $e; + $last = hex $e; + push (@bs, $seq); + } + $end{$seq} = $e; + } + $nobs = @bs; + + print MAP < WC_F_CS94_END) return WC_C_UCS4_ERROR; map = cs94_ucs_map[f - WC_F_ISO_BASE]; @@ -558,6 +565,74 @@ wc_is_ucs_hangul(wc_uint32 ucs) ucs_hangul_map, N_ucs_hangul_map) != NULL); } +wc_bool +wc_is_ucs_alpha(wc_uint32 ucs) +{ + return (ucs <= WC_C_UCS2_END && + wc_map_range_search((wc_uint16)ucs, + ucs_isalpha_map, N_ucs_isalpha_map) != NULL); +} + +wc_bool +wc_is_ucs_digit(wc_uint32 ucs) +{ + return (ucs <= WC_C_UCS2_END && + wc_map_range_search((wc_uint16)ucs, + ucs_isdigit_map, N_ucs_isdigit_map) != NULL); +} + +wc_bool +wc_is_ucs_alnum(wc_uint32 ucs) +{ + return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs)); +} + +wc_bool +wc_is_ucs_lower(wc_uint32 ucs) +{ + return (ucs <= WC_C_UCS2_END && + wc_map_range_search((wc_uint16)ucs, + ucs_islower_map, N_ucs_islower_map) != NULL); +} + +wc_bool +wc_is_ucs_upper(wc_uint32 ucs) +{ + return (ucs <= WC_C_UCS2_END && + wc_map_range_search((wc_uint16)ucs, + ucs_isupper_map, N_ucs_isupper_map) != NULL); +} + +wc_uint32 +wc_ucs_toupper(wc_uint32 ucs) +{ + wc_map *conv = NULL; + if (ucs <= WC_C_UCS2_END) + conv = wc_map_search((wc_uint16)ucs, + ucs_toupper_map, N_ucs_toupper_map); + return conv ? (wc_uint32)(conv->code2) : ucs; +} + +wc_uint32 +wc_ucs_tolower(wc_uint32 ucs) +{ + wc_map *conv = NULL; + if (ucs <= WC_C_UCS2_END) + conv = wc_map_search((wc_uint16)ucs, + ucs_tolower_map, N_ucs_tolower_map); + return conv ? (wc_uint32)(conv->code2) : ucs; +} + +wc_uint32 +wc_ucs_totitle(wc_uint32 ucs) +{ + wc_map *conv = NULL; + if (ucs <= WC_C_UCS2_END) + conv = wc_map_search((wc_uint16)ucs, + ucs_totitle_map, N_ucs_totitle_map); + return conv ? (wc_uint32)(conv->code2) : ucs; +} + wc_uint32 wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2) { diff --git a/libwc/ucs.h b/libwc/ucs.h index 5a3138f..261351e 100644 --- a/libwc/ucs.h +++ b/libwc/ucs.h @@ -48,6 +48,14 @@ extern wc_bool wc_is_ucs_ambiguous_width(wc_uint32 ucs); extern wc_bool wc_is_ucs_wide(wc_uint32 ucs); extern wc_bool wc_is_ucs_combining(wc_uint32 ucs); extern wc_bool wc_is_ucs_hangul(wc_uint32 ucs); +extern wc_bool wc_is_ucs_alpha(wc_uint32 ucs); +extern wc_bool wc_is_ucs_digit(wc_uint32 ucs); +extern wc_bool wc_is_ucs_alnum(wc_uint32 ucs); +extern wc_bool wc_is_ucs_lower(wc_uint32 ucs); +extern wc_bool wc_is_ucs_upper(wc_uint32 ucs); +extern wc_uint32 wc_ucs_toupper(wc_uint32 ucs); +extern wc_uint32 wc_ucs_tolower(wc_uint32 ucs); +extern wc_uint32 wc_ucs_totitle(wc_uint32 ucs); extern wc_uint32 wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2); extern wc_uint32 wc_ucs_to_fullwidth(wc_uint32 ucs); extern int wc_ucs_put_tag(char *tag); -- cgit v1.2.3