Edit File by line

# jcode.rb - ruby code to handle japanese (EUC/SJIS) string

[0] Fix | Delete

[1] Fix | Delete

if $VERBOSE && $KCODE == "NONE"

[2] Fix | Delete

warn "Warning: $KCODE is NONE."

[3] Fix | Delete

end

[4] Fix | Delete

[5] Fix | Delete

$vsave, $VERBOSE = $VERBOSE, false

[6] Fix | Delete

class String

[7] Fix | Delete

warn "feel free for some warnings:\n" if $VERBOSE

[8] Fix | Delete

[9] Fix | Delete

def _regex_quote(str)

[10] Fix | Delete

str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do

[11] Fix | Delete

$1 || $2 || '\\' + $3

[12] Fix | Delete

end

[13] Fix | Delete

end

[14] Fix | Delete

private :_regex_quote

[15] Fix | Delete

[16] Fix | Delete

PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'

[17] Fix | Delete

PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'

[18] Fix | Delete

PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'

[19] Fix | Delete

[20] Fix | Delete

RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')

[21] Fix | Delete

RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')

[22] Fix | Delete

RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')

[23] Fix | Delete

[24] Fix | Delete

SUCC = {}

[25] Fix | Delete

SUCC['s'] = Hash.new(1)

[26] Fix | Delete

for i in 0 .. 0x3f

[27] Fix | Delete

SUCC['s'][i.chr] = 0x40 - i

[28] Fix | Delete

end

[29] Fix | Delete

SUCC['s']["\x7e"] = 0x80 - 0x7e

[30] Fix | Delete

SUCC['s']["\xfd"] = 0x100 - 0xfd

[31] Fix | Delete

SUCC['s']["\xfe"] = 0x100 - 0xfe

[32] Fix | Delete

SUCC['s']["\xff"] = 0x100 - 0xff

[33] Fix | Delete

SUCC['e'] = Hash.new(1)

[34] Fix | Delete

for i in 0 .. 0xa0

[35] Fix | Delete

SUCC['e'][i.chr] = 0xa1 - i

[36] Fix | Delete

end

[37] Fix | Delete

SUCC['e']["\xfe"] = 2

[38] Fix | Delete

SUCC['u'] = Hash.new(1)

[39] Fix | Delete

for i in 0 .. 0x7f

[40] Fix | Delete

SUCC['u'][i.chr] = 0x80 - i

[41] Fix | Delete

end

[42] Fix | Delete

SUCC['u']["\xbf"] = 0x100 - 0xbf

[43] Fix | Delete

[44] Fix | Delete

def mbchar?

[45] Fix | Delete

case $KCODE[0]

[46] Fix | Delete

when ?s, ?S

[47] Fix | Delete

self =~ RE_SJIS

[48] Fix | Delete

when ?e, ?E

[49] Fix | Delete

self =~ RE_EUC

[50] Fix | Delete

when ?u, ?U

[51] Fix | Delete

self =~ RE_UTF8

[52] Fix | Delete

else

[53] Fix | Delete

nil

[54] Fix | Delete

end

[55] Fix | Delete

end

[56] Fix | Delete

[57] Fix | Delete

def end_regexp

[58] Fix | Delete

case $KCODE[0]

[59] Fix | Delete

when ?s, ?S

[60] Fix | Delete

/#{PATTERN_SJIS}$/on

[61] Fix | Delete

when ?e, ?E

[62] Fix | Delete

/#{PATTERN_EUC}$/on

[63] Fix | Delete

when ?u, ?U

[64] Fix | Delete

/#{PATTERN_UTF8}$/on

[65] Fix | Delete

else

[66] Fix | Delete

/.$/on

[67] Fix | Delete

end

[68] Fix | Delete

end

[69] Fix | Delete

[70] Fix | Delete

alias original_succ! succ!

[71] Fix | Delete

private :original_succ!

[72] Fix | Delete

[73] Fix | Delete

alias original_succ succ

[74] Fix | Delete

private :original_succ

[75] Fix | Delete

[76] Fix | Delete

def succ!

[77] Fix | Delete

reg = end_regexp

[78] Fix | Delete

if $KCODE != 'NONE' && self =~ reg

[79] Fix | Delete

succ_table = SUCC[$KCODE[0,1].downcase]

[80] Fix | Delete

begin

[81] Fix | Delete

self[-1] += succ_table[self[-1]]

[82] Fix | Delete

self[-2] += 1 if self[-1] == 0

[83] Fix | Delete

end while self !~ reg

[84] Fix | Delete

self

[85] Fix | Delete

else

[86] Fix | Delete

original_succ!

[87] Fix | Delete

end

[88] Fix | Delete

end

[89] Fix | Delete

[90] Fix | Delete

def succ

[91] Fix | Delete

str = self.dup

[92] Fix | Delete

str.succ! or str

[93] Fix | Delete

end

[94] Fix | Delete

[95] Fix | Delete

private

[96] Fix | Delete

[97] Fix | Delete

def _expand_ch str

[98] Fix | Delete

a = []

[99] Fix | Delete

str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do

[100] Fix | Delete

from = $1 || $2

[101] Fix | Delete

to = $3 || $4

[102] Fix | Delete

one = $5 || $6

[103] Fix | Delete

if one

[104] Fix | Delete

a.push one

[105] Fix | Delete

elsif from.length != to.length

[106] Fix | Delete

[107] Fix | Delete

elsif from.length == 1

[108] Fix | Delete

from[0].upto(to[0]) { |c| a.push c.chr }

[109] Fix | Delete

else

[110] Fix | Delete

from.upto(to) { |c| a.push c }

[111] Fix | Delete

end

[112] Fix | Delete

end

[113] Fix | Delete

[114] Fix | Delete

end

[115] Fix | Delete

[116] Fix | Delete

def expand_ch_hash from, to

[117] Fix | Delete

h = {}

[118] Fix | Delete

afrom = _expand_ch(from)

[119] Fix | Delete

ato = _expand_ch(to)

[120] Fix | Delete

afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end

[121] Fix | Delete

[122] Fix | Delete

end

[123] Fix | Delete

[124] Fix | Delete

HashCache = {}

[125] Fix | Delete

TrPatternCache = {}

[126] Fix | Delete

DeletePatternCache = {}

[127] Fix | Delete

SqueezePatternCache = {}

[128] Fix | Delete

[129] Fix | Delete

public

[130] Fix | Delete

[131] Fix | Delete

def tr!(from, to)

[132] Fix | Delete

return nil if from == ""

[133] Fix | Delete

return self.delete!(from) if to == ""

[134] Fix | Delete

[135] Fix | Delete

pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/

[136] Fix | Delete

if from[0] == ?^

[137] Fix | Delete

last = /.$/.match(to)[0]

[138] Fix | Delete

self.gsub!(pattern, last)

[139] Fix | Delete

else

[140] Fix | Delete

h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)

[141] Fix | Delete

self.gsub!(pattern) do |c| h[c] end

[142] Fix | Delete

end

[143] Fix | Delete

end

[144] Fix | Delete

[145] Fix | Delete

def tr(from, to)

[146] Fix | Delete

(str = self.dup).tr!(from, to) or str

[147] Fix | Delete

end

[148] Fix | Delete

[149] Fix | Delete

def delete!(del)

[150] Fix | Delete

return nil if del == ""

[151] Fix | Delete

self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')

[152] Fix | Delete

end

[153] Fix | Delete

[154] Fix | Delete

def delete(del)

[155] Fix | Delete

(str = self.dup).delete!(del) or str

[156] Fix | Delete

end

[157] Fix | Delete

[158] Fix | Delete

def squeeze!(del=nil)

[159] Fix | Delete

return nil if del == ""

[160] Fix | Delete

pattern =

[161] Fix | Delete

if del

[162] Fix | Delete

SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/

[163] Fix | Delete

else

[164] Fix | Delete

/(.|\n)\1+/

[165] Fix | Delete

end

[166] Fix | Delete

self.gsub!(pattern, '\1')

[167] Fix | Delete

end

[168] Fix | Delete

[169] Fix | Delete

def squeeze(del=nil)

[170] Fix | Delete

(str = self.dup).squeeze!(del) or str

[171] Fix | Delete

end

[172] Fix | Delete

[173] Fix | Delete

def tr_s!(from, to)

[174] Fix | Delete

return self.delete!(from) if to.length == 0

[175] Fix | Delete

[176] Fix | Delete

pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/

[177] Fix | Delete

if from[0] == ?^

[178] Fix | Delete

last = /.$/.match(to)[0]

[179] Fix | Delete

self.gsub!(pattern, last)

[180] Fix | Delete

else

[181] Fix | Delete

h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)

[182] Fix | Delete

self.gsub!(pattern) do h[$1] end

[183] Fix | Delete

end

[184] Fix | Delete

end

[185] Fix | Delete

[186] Fix | Delete

def tr_s(from, to)

[187] Fix | Delete

(str = self.dup).tr_s!(from,to) or str

[188] Fix | Delete

end

[189] Fix | Delete

[190] Fix | Delete

def chop!

[191] Fix | Delete

self.gsub!(/(?:.|\r?\n)\z/, '')

[192] Fix | Delete

end

[193] Fix | Delete

[194] Fix | Delete

def chop

[195] Fix | Delete

(str = self.dup).chop! or str

[196] Fix | Delete

end

[197] Fix | Delete

[198] Fix | Delete

def jlength

[199] Fix | Delete

self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length

[200] Fix | Delete

end

[201] Fix | Delete

alias jsize jlength

[202] Fix | Delete

[203] Fix | Delete

def jcount(str)

[204] Fix | Delete

self.delete("^#{str}").jlength

[205] Fix | Delete

end

[206] Fix | Delete

[207] Fix | Delete

def each_char

[208] Fix | Delete

if block_given?

[209] Fix | Delete

scan(/./m) do |x|

[210] Fix | Delete

yield x

[211] Fix | Delete

end

[212] Fix | Delete

else

[213] Fix | Delete

scan(/./m)

[214] Fix | Delete

end

[215] Fix | Delete

end

[216] Fix | Delete

[217] Fix | Delete

end

[218] Fix | Delete

$VERBOSE = $vsave

[219] Fix | Delete

[220] Fix | Delete