# CSV -- module for generating/parsing CSV data.
# Copyright (C) 2000-2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>.
# $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
# redistribute it and/or modify it under the same terms of Ruby's license;
# either the dual license version in 2003, or any later version.
class IllegalFormatError < RuntimeError; end
def initialize(data = "", is_null = false)
super(is_null ? "" : data)
# Open a CSV formatted file for reading or writing.
# CSV.open('csvfile.csv', 'r') do |row|
# reader = CSV.open('csvfile.csv', 'r')
# filename: filename to parse.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# reader instance. To get parse result, see CSV::Reader#each.
# CSV.open('csvfile.csv', 'w') do |writer|
# writer << ['r1c1', 'r1c2']
# writer << ['r2c1', 'r2c2']
# writer = CSV.open('csvfile.csv', 'w')
# writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
# filename: filename to generate.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
# to generate CSV string.
def CSV.open(path, mode, fs = nil, rs = nil, &block)
if mode == 'r' or mode == 'rb'
open_reader(path, mode, fs, rs, &block)
elsif mode == 'w' or mode == 'wb'
open_writer(path, mode, fs, rs, &block)
raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
def CSV.foreach(path, rs = nil, &block)
open_reader(path, 'r', ',', rs, &block)
def CSV.read(path, length = nil, offset = nil)
CSV.parse(IO.read(path, length, offset))
def CSV.readlines(path, rs = nil)
reader = open_reader(path, 'r', ',', rs)
reader.collect { |row| row }
def CSV.generate(path, fs = nil, rs = nil, &block)
open_writer(path, 'w', fs, rs, &block)
# Parse lines from given string or stream. Return rows as an Array of Arrays.
def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
if File.exist?(str_or_readable)
STDERR.puts("CSV.parse(filename) is deprecated." +
" Use CSV.open(filename, 'r') instead.")
return open_reader(str_or_readable, 'r', fs, rs, &block)
CSV::Reader.parse(str_or_readable, fs, rs) do |row|
CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
# Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
# the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
# second line 'c,d' is ignored.
# If you don't know whether a target string to parse is exactly 1 line or
# not, use CSV.parse_row instead of this method.
def CSV.parse_line(src, fs = nil, rs = nil)
if !rs.nil? and rs.is_a?(Fixnum)
while res_type == :DT_COLSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
rescue IllegalFormatError
# Create a line from cells. each cell is stringified by to_s.
def CSV.generate_line(row, fs = nil, rs = nil)
if !rs.nil? and rs.is_a?(Fixnum)
generate_body(row[idx], result_str, fs, rs)
generate_separator(:DT_COLSEP, result_str, fs, rs)
# Parse a line from string. Consider using CSV.parse_line instead.
# To parse lines in CSV string, see EXAMPLE below.
# src = "a,b\r\nc,d\r\ne,f"
# parsed_cells, idx = CSV.parse_row(src, idx, parsed)
# puts "Parsed #{ parsed_cells } cells."
# end while parsed_cells > 0
# src: a CSV data to be parsed. Must respond '[](idx)'.
# src[](idx) must return a char. (Not a string such as 'a', but 97).
# src[](idx_out_of_bounds) must return nil. A String satisfies this
# idx: index of parsing location of 'src'. 0 origin.
# out_dev: buffer for parsed cells. Must respond '<<(aString)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# parsed_cells: num of parsed cells.
# idx: index of next parsing location of 'src'.
def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
if !rs.nil? and rs.is_a?(Fixnum)
while res_type != :DT_ROWSEP
res_type, idx, cell = parse_body(src, idx, fs, rs)
if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
rescue IllegalFormatError
# Convert a line from cells data to string. Consider using CSV.generate_line
# instead. To generate multi-row CSV string, see EXAMPLE below.
# src = [row1, row2, row3]
# parsed_cells = CSV.generate_row(row, 2, buf)
# puts "Created #{ parsed_cells } cells."
# src: an Array of String to be converted to CSV string. Must respond to
# 'size' and '[](idx)'. src[idx] must return String.
# cells: num of cells in a line.
# out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
# col_sep: Column separator. ?, by default. If you want to separate
# fields with semicolon, give ?; here.
# row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
# want to separate records with \r, give ?\r here.
# parsed_cells: num of converted cells.
def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
if !rs.nil? and rs.is_a?(Fixnum)
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
generate_body(src[parsed_cells], out_dev, fs, rs)
while ((parsed_cells < cells) and (parsed_cells != src_size))
generate_separator(:DT_COLSEP, out_dev, fs, rs)
generate_body(src[parsed_cells], out_dev, fs, rs)
if (parsed_cells == cells)
generate_separator(:DT_ROWSEP, out_dev, fs, rs)
generate_separator(:DT_COLSEP, out_dev, fs, rs)
def open_reader(path, mode, fs, rs, &block)
file = File.open(path, mode)
CSV::Reader.parse(file, fs, rs) do |row|
reader = CSV::Reader.create(file, fs, rs)
reader.close_on_terminate
def open_writer(path, mode, fs, rs, &block)
file = File.open(path, mode)
CSV::Writer.generate(file, fs, rs) do |writer|
writer = CSV::Writer.create(file, fs, rs)
writer.close_on_terminate
def parse_body(src, idx, fs, rs)
fschar = (c == fs_str[fs_idx])
rschar = (c == rs_str[rs_idx])
# simple 1 char backtrack
if !fschar and c == fs_str[0]
if !rschar and c == rs_str[0]
cell << src[last_idx, (idx - last_idx)]
if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
return sep, idx + 1, cell;
return sep, idx + 1, cell;
elsif rs.nil? and c == ?\r
# special \r treatment for backward compatibility
cell << src[last_idx, (idx - last_idx)]
if state == :ST_DATA or state == :ST_START
if fs_idx > 0 or rs_idx > 0
cell << src[last_idx, (idx - last_idx)]
return :DT_EOS, idx, cell
def generate_body(cell, out_dev, fs, rs)
if (row_data.gsub!('"', '""') or
(rs and row_data.index(rs)) or
(/[\r\n]/ =~ row_data) or
out_dev << '"' << row_data << '"'