%PDF- %PDF-
Direktori : /usr/local/share/namazu/pl/ |
Current File : //usr/local/share/namazu/pl/extutf8.pl |
# # -*- Perl -*- # $Id: extutf8.pl,v 1.1.4.5 2008-07-26 06:14:37 opengl2772 Exp $ # Copyright (C) 2008 Tadamasa Teranishi All rights reserved. # Copyright (C) 2008 Namazu Project All rights reserved. # This is free software with ABSOLUTELY NO WARRANTY. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either versions 2, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA # 02111-1307, USA # # This file must be encoded in EUC-JP encoding # package extutf8; use strict; use English; require 'util.pl'; eval 'require NKF;'; use vars qw($utf8_to_eucjp $codeconv_document); $utf8_to_eucjp = undef; $codeconv_document = undef; my $_converter = ''; my $_utfconvpath = undef; my $_nkfversion = 0.00; sub setup() { return 'yes' if (defined $codeconv_document); if (!util::islang("ja")) { $utf8_to_eucjp = \&_utf8_to_usascii; $codeconv_document = \&_codeconv_document_usascii; $_converter = 'to_usascii'; return 'yes'; } eval '$_nkfversion = $NKF::VERSION;'; $_nkfversion = 0.00 if (!defined $_nkfversion); if ($_nkfversion >= 2.04) { $utf8_to_eucjp = \&_nkf_utf8_to_eucjp; $codeconv_document = \&_nkf_codeconv_document; $_converter = 'module_nkf'; return 'yes'; } if ($English::PERL_VERSION >= 5.008) { eval 'use Encode qw/from_to Unicode JP/;'; $utf8_to_eucjp = \&_encode_utf8_to_eucjp; $codeconv_document = \&_encode_codeconv_document; $_converter = 'module_encode'; return 'yes'; } $_utfconvpath = util::checkcmd('lv'); if (defined $_utfconvpath) { $utf8_to_eucjp = \&_lv_utf8_to_eucjp; $codeconv_document = \&_lv_codeconv_document; $_converter = 'lv'; return 'yes'; } if (defined util::checklib('unicode.pl')) { eval require 'unicode.pl'; $utf8_to_eucjp = \&_unicode_utf8_to_eucjp; $codeconv_document = \&_unicode_codeconv_document; $_converter = 'unicode.pl'; return 'yes'; } return 'no'; } sub converter() { return $_converter; } # # # sub _utf8_to_usascii($) { my ($contref) = @_; codeconv::tousascii($contref); return $$contref; } sub _codeconv_document_usascii($) { my ($contref) = @_; _utf8_to_usascii($contref); codeconv::normalize_document($contref); return $$contref; } # # Conversion by "Encode" # Perl 5.8 or later # sub _encode_utf8_to_eucjp($) { my ($contref) = @_; Encode::from_to($$contref, "utf-8", "euc-jp"); return $$contref; } sub _encode_codeconv_document($) { my ($contref) = @_; Encode::from_to($$contref, "utf-8", "euc-jp"); codeconv::normalize_eucjp_document($contref); return $$contref; } # # Conversion by "NKF Perl module" # version 2.04 or later # sub _nkf_utf8_to_eucjp($) { my ($contref) = @_; $$contref = NKF::nkf("-Wex", $$contref); return $$contref; } sub _nkf_codeconv_document($) { my ($contref) = @_; $$contref = NKF::nkf("-WemXZ1", $$contref); codeconv::normalize_document($contref); return $$contref; } # # [obsolute] Conversion by "lv" # It is slow. # sub _lv_utf8_to_eucjp($) { my ($contref) = @_; my $tmpfile = util::tmpnam('NMZ.tmp.utf8'); { my $fh = util::efopen("> $tmpfile"); print $fh $$contref; util::fclose($fh); } $$contref = ""; my @cmd = ($_utfconvpath, "-Iu8", "-Oej", $tmpfile); my $status = util::syscmd( command => \@cmd, option => { "stdout" => $contref, "stderr" => "/dev/null", }, ); unlink $tmpfile; return $$contref; } sub _lv_codeconv_document($) { my ($contref) = @_; _lv_utf8_to_eucjp($contref); codeconv::normalize_eucjp_document($contref); return $$contref; } # # [obsolute] Conversion by "unicode.pl" # sub _unicode_utf8_to_eucjp($) { my ($contref) = @_; my @unicodeList = unicode::UTF8toUTF16($$contref); $$contref = unicode::u2e(@unicodeList); $$contref =~ s/\00//g; return $$contref; } sub _unicode_codeconv_document($) { my ($contref) = @_; _unicode_utf8_to_eucjp($contref); codeconv::normalize_eucjp_document($contref); return $$contref; } 1;