#! /usr/bin/perl # # Samba Multibyte convert tool # copyright (c) 2000, Hiroshi MIURA # All rights reserved by Hiroshi MIURA # # Changelog --> see README # # TODO: # - implement UTF-8 and EUC with JIS X0212(so-called 3byte EUC) # # usage: commandname [-l][-h][-p ][-d] # [ [ .... ]] # # -p: default path => same path with original. # # Available command name is.... # euctohex, euctocap, euctojis, euctosjis # jistohex, jistocap, jistoeuc, jistosjis # captohex, captoeuc, captojis, captosjis # hextoeuc, hextocap, hextojis, hextosjis # sjistohex, sjistocap, sjistojis, sjistoeuc # sjistosjis, captocap, hextohex, euctoeuc, jistojis # # these're 25 patterns. :-`) # require 5; use Getopt::Std; use File::stat; # now, I use jcode.pl, does someone knows how to use Jcode.pm?? # # require "jcode.pl"; $version='2.2'; getopts("dhlp:a:"); &Show_Usage($comn) if ($opt_h); # ----------------------------------------- # definition of important table and constants, regex. # @sjisconv= ( 0xfa40, 0xfa49, 0xeeef, 0xfa4a, 0xfa53, 0x8754, 0xfa54, 0xfa54, 0x81ca, 0xfa55, 0xfa57, 0xeefa, 0xfa58, 0xfa58, 0x878a, 0xfa59, 0xfa59, 0x8782, 0xfa5a, 0xfa5a, 0x8784, 0xfa5b, 0xfa5b, 0x81e6, 0xfa5c, 0xfa7e, 0xed40, 0xfa80, 0xfa9b, 0xed63, 0xfa9c, 0xfafc, 0xed80, 0xfb40, 0xfb5b, 0xede1, 0xfb5c, 0xfb7e, 0xee40, 0xfb80, 0xfb9b, 0xee63, 0xfb9c, 0xfbfc, 0xee80, 0xfc40, 0xfc4b, 0xeee1, ); $sjisconvlen=($#sjisconv+1)/3-1; @sjisreg = ( # 0x8470, 0x847e, 0x8440, # if Win95 compatible, # 0x8480, 0x8491, 0x844f, # validate these lines 0x8754, 0x875d, 0xfa4a, 0x8782, 0x8782, 0xfa59, 0x8784, 0x8784, 0xfa5a, 0x878a, 0x878a, 0xfa58, 0x8790, 0x8790, 0x81e0, 0x8791, 0x8791, 0x81df, 0x8792, 0x8792, 0x81e7, 0x8795, 0x8795, 0x81e3, 0x8796, 0x8796, 0x81db, 0x8797, 0x8797, 0x81da, 0x879a, 0x879a, 0x81e6, 0x879b, 0x879b, 0x81bf, 0x879c, 0x879c, 0x81be, 0xed40, 0xed62, 0xfa5c, 0xed63, 0xed7e, 0xfa80, 0xed80, 0xede0, 0xfa9c, 0xede1, 0xedfc, 0xfb40, 0xee40, 0xee62, 0xfb5c, 0xee63, 0xee7e, 0xfb80, 0xee80, 0xeee0, 0xfb9c, 0xeee1, 0xeeec, 0xfc40, 0xeeef, 0xeef8, 0xfa40, 0xeef9, 0xeef9, 0x81ca, 0xeefa, 0xeefc, 0xfa55, 0xfa54, 0xfa54, 0x81ca, 0xfa5b, 0xfa5b, 0x81e6, ); $sjisreglen=($#sjisreg+1)/3-1; %match = ( SJIS_C => '[\x80-\x9f\xe0-\xfc][\x40-\xfc]', SJIS_KANA => '[\xa1-\xdf]', SJIS_O => '[\x2f\x80-\xfc]', SJIS_S => '[\x81-\x9f\xe0-\ec][\x40-\xfc]', SKOS_D => '[\xed-\xfc][\x40-\xfc]' ); $GETA=sprintf "%c%c",0x81,0xac; #--------------------------------------------- # check command name which it called. # if ($opt_a) { $comn=$opt_a; } else { $comn=$0; $comn=~s|(.*/)*(.+)|$2|; } ## who am I? :) ## definition of mb convert if ($comn eq "euctohex"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::euc2sjis(*nname); &sjis2rsjis(*nname); $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo; $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "euctocap"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::euc2sjis(*nname); &sjis2rsjis(*nname); $nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "euctojis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::euc2jis(*nname); ($name ne $nname); }; }elsif ($comn eq "euctosjis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::euc2jis(*nname); ($name ne $nname); }; }elsif ($comn eq "hextoeuc"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2csjis(*nname); return 0 if ($name eq $nname); jcode::sjis2euc(*nname); }; }elsif ($comn eq "hextojis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2csjis(*nname); return 0 if ($name eq $nname); jcode::sjis2jis(*nname); }; }elsif (($comn eq "hextocap")||($comn eq "captocap")){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; return 0 if ($name eq $nname); &sjis2rsjis(*nname); $nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "hextosjis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2rsjis(*nname); ($name ne $nname); }; }elsif ($comn eq "captoeuc"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2csjis(*nname); return 0 if ($name eq $nname); jcode::sjis2euc(*nname); }; }elsif ($comn eq "captojis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2csjis(*nname); return 0 if ($name eq $nname); jcode::sjis2jis(*nname); }; }elsif ($comn eq "captosjis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; return 0 if ($name eq $nname); &sjis2rsjis(*nname); }; }elsif (($comn eq "captohex")||($comn eq "hextohex")){ $convmb= sub { local(*nname) = @_; my $name=$nname; $nname=~ s/\:([0-9a-f][0-9a-f])/sprintf "%c",hex($1)/geo; &sjis2rsjis(*nname); return 0 if ($name eq $nname); $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo; $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; }; }elsif ($comn eq "jistocap"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::jis2jis(*nname); &sjis2rsjis(*nname); $nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "jistohex"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::jis2sjis(*nname); &sjis2rsjis(*nname); $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo; $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "jistoeuc"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::jis2euc(*nname); ($name ne $nname); }; }elsif ($comn eq "jistosjis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::jis2sjis(*nname); &sjis2rsjis(*nname); ($name ne $nname); }; }elsif ($comn eq "sjistocap"){ $convmb= sub { local(*nname) = @_; my $name=$nname; &sjis2rsjis(*nname); $nname=~s/($match{SJIS_O})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "sjistohex"){ $convmb= sub { local(*nname) = @_; my $name=$nname; &sjis2rsjis(*nname); $nname =~ s/($match{SJIS_C})/sprintf ":%2x:%2x",ord($1),ord(substr($1,1))/geo; $nname =~ s/($match{SJIS_KANA})/sprintf ":%2x",ord($1)/geo; ($name ne $nname); }; }elsif ($comn eq "sjistoeuc"){ $convmb= sub { local(*nname) = @_; my $name=$nname; &sjis2csjis(*nname); jcode::sjis2euc(*nname); ($name ne $nname); }; }elsif ($comn eq "sjistojis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; &sjis2csjis(*nname); jcode::sjis2jis(*nname); ($name ne $nname); }; }elsif ($comn eq "sjistosjis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; &sjis2rsjis(*nname); ($name ne $nname); }; }elsif ($comn eq "euctoeuc"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::euc2sjis(*nname); &sjis2csjis(*nname); jcode::sjis2euc(*nname); ($name ne $nname); }; }elsif ($comn eq "jistojis"){ $convmb= sub { local(*nname) = @_; my $name=$nname; jcode::jis2sjis(*nname); &sjis2csjis(*nname); jcode::sjis2jis(*nname); ($name ne $nname); }; }else { &Show_Usage(); exit 1; } #======================================================== # main routine. # umask 0000; if ($opt_l || $opt_p) { # when want to link files &lnconv(@ARGV); } else { # when rename files while ($arg = shift @ARGV) { # $arg =~ s/(\.|\.\.)\/+/\1/o; &fnconv($arg); } } exit; #=========================================================== # subroutines. # #------------------------------------------- # link arguments with file name conversion. # sub lnconv() { open(FILEN, "find @_ -print |") or die "Cannot open directories\n"; while (){ chomp(); $nname=$name=$_; &$convmb(*nname); $nname="$opt_p/$nname" if ($opt_p); if ( -l $name ) { # that is link # only make symbolic link to original pointed. # it may be convert the linked file/directory name? $linked = readlink $name; symlink $linked, $nname; } elsif ( -d $name ) { my $st = stat($name); if ($opt_d) { printf STDERR "mkdir %s %o\n", $nname, $st->mode & 07777; } else { if (mkdir $nname, $st->mode & 07777) { utime $st->atime, $st->mtime, $nname; } else { print STDERR $!; } } } elsif ( -f $name ) { if ($opt_d) { print STDERR "convert $name to $nname\n"; } else { unless (link $name, $nname) { print STDERR "fail link: operation not permitted.\n"; } } } else { # pipe or device file # ignore it } } } #-------------------------------------------------------- # rename argument's basename with &$convmb # sub fnconv() { local($name)=@_; if ( -l $name ) { #only ignore it } elsif ( -d $name ) { $athash{"$name"}=stat($name)->atime; $mthash{"$name"}=stat($name)->mtime; local($dh)="DH_$name"; opendir $dh, $name; while ($dentory=readdir $dh) { next if ($dentory eq "." || $dentory eq ".."); # ignore "." and ".." &fnconv("$name/$dentory"); # recursive call } closedir $dh; $nname = &renfn($name); if ($opt_d) { printf "utime %d, %d, %s\n", $athash{"$name"},$mthash{"$name"},$nname unless (($name eq ".") || ($name eq "..")); } else { utime $athash{"$name"}, $mthash{"$name"}, $nname unless (($name eq ".") || ($name eq "..")); } delete $mthash{"$name"}; delete $athash{"$name"}; } elsif ( -f $name ) { # if name is file then only rename it. &renfn($name); } else { # another case # only ignore it for safety. } } #-------------------------------------------------------- # rename argument's basename with &$convmb # # sub renfn() { local($name)=@_; local($basedir, $nname)=("", $name); if ($name =~ m|(.*/)([^/]*)|o) { # split dir and fname $basedir="$1"; $nname ="$2"; } return $name unless (&$convmb(*nname)); # only fname converted $nname=$basedir . $nname; if ($opt_d) { print STDERR "rename $name to $nname\n"; } else { unless (rename $name, $nname) { print STDERR "fail rename: operation not permitted.\n"; } } $nname; } #--------------------------------------------------------- # convert regular sjis to compressed sjis. # using binary snearch method. # sub sjis2csjis { local(*line) = @_; $line =~ s/($match{SJIS_C})/&s2c($1)/geo; } sub s2c { local($char) = @_; $mb=ord($char)*256+ord(substr($char,1)); return $char if ($mb < $sjisconv[0]); # checkk if it is return $GETA if ($sjisconv[$sjisconvlen*3+1] < $mb); # target to conv? $min=0;$max=$sjisconvlen; # OK is will be conv while($max >= $min){ $j=$min+($max-$min)%2; if ($sjisconv[$j*3+0] > $mb){ $max=$j-1; } elsif ($mb > $sjisconv[$j*3+1]) { $min=$j+1; } else { $mb=$sjisconv[$j*3+2]+$mb-$sjisconv[$j*3+0]; $char=sprintf "%c%c",($mb >> 8) & 0xff,$mb & 0xff; break; } } $char=$GETA if ( $mb > 0xf000); $char; } #--------------------------------------------------------- # # convert regular sjis to regular sjis. # using binary snearch method. # sub sjis2rsjis { local(*line) = @_; $line =~ s/($match{SJIS_C})/&s2r($1)/geo; } sub s2r { local($char) = @_; $mb=ord($char)*256 + ord(substr($char,1)); return $char if ($mb < $sjisreg[0]); # checkk if it is if ($sjisreg[$sjisreglen*3+1] < $mb){ # target to conv? $char=$GETA if ($mb > 0xfc4b); return $char; }; $min=0;$max=$sjisreglen; # OK is will be conv while($max >= $min){ $j=$min+($max-$min)%2; if ($sjisreg[$j*3+0] > $mb){ $max=$j-1; } elsif ($mb > $sjisreg[$j*3+1]) { $min=$j+1; } else { # hit it! $mb=$sjisreg[$j*3+2]+$mb-$sjisreg[$j*3+0]; $char=sprintf "%c%c",($mb >> 8) & 0xff, $mb & 0xff; break; } } $char; } #--------------------------------------------------------- sub Show_Usage { printf "This is samba companion tool. smbchartool Ver.%s\n",$version; print <<__EOL__; This product is distoributed under the GNU Public License Vertion 2. Usage: command [-h][-d][-l][-a ] [-p ] -h: show this help message -d: debug option. Don't actual move and removing, only display how to do -l: make hard link, not move it -p: make link to another directory (it may be use with -l) -a: point out an action Available command names or actions are.... euctohex, euctocap, euctojis, euctosjis jistohex, jistocap, jistoeuc, jistosjis captohex, captoeuc, captojis, captosjis hextoeuc, hextocap, hextojis, hextosjis sjistohex, sjistocap, sjistojis, sjistoeuc sjistosjis, captocap, hextohex, euctoeuc, jistojis Copyright (c) 2000, Hiroshi MIURA __EOL__ exit; }