# frozen_string_literal: false
class NotWellFormedError < Error
attr_reader :line, :element
# Create a new NotWellFormedError for an error at +line+
# in +element+. If a block is given the return value of
# the block ends up in the error message.
def initialize(line=nil, element=nil)
message = "This is not well formed XML"
message << "\nerror occurred"
message << " in #{element}" if element
message << " at about #{line} line" if line
message << "\n#{yield}" if block_given?
class XMLParserNotFound < Error
super("available XML parser was not found in " <<
"#{AVAILABLE_PARSER_LIBRARIES.inspect}.")
class NotValidXMLParser < Error
super("#{parser} is not an available XML parser. " <<
"Available XML parser" <<
(AVAILABLE_PARSERS.size > 1 ? "s are " : " is ") <<
"#{AVAILABLE_PARSERS.inspect}.")
class NSError < InvalidRSSError
attr_reader :tag, :prefix, :uri
def initialize(tag, prefix, require_uri)
@tag, @prefix, @uri = tag, prefix, require_uri
super("prefix <#{prefix}> doesn't associate uri " <<
"<#{require_uri}> in tag <#{tag}>")
@@default_parser || AVAILABLE_PARSERS.first
# Set @@default_parser to new_value if it is one of the
# available parsers. Else raise NotValidXMLParser error.
def default_parser=(new_value)
if AVAILABLE_PARSERS.include?(new_value)
@@default_parser = new_value
raise NotValidXMLParser.new(new_value)
do_validate = boolean_argument(args[0], options[:validate], true)
boolean_argument(args[1], options[:ignore_unknown_element], true)
parser_class = args[2] || options[:parser_class] || default_parser
parser = new(rss, parser_class)
parser.do_validate = do_validate
parser.ignore_unknown_element = ignore_unknown_element
def boolean_argument(positioned_value, option_value, default)
if value.nil? and not option_value.nil?
value = default if value.nil?
def_delegators(:@parser, :parse, :rss,
:ignore_unknown_element=, :do_validate,
def initialize(rss, parser_class=self.class.default_parser)
@parser = parser_class.new(normalize_rss(rss))
# Try to get the XML associated with +rss+.
# Return +rss+ if it already looks like XML, or treat it as a URI,
# or a file to get the XML,
return rss if maybe_xml?(rss)
if uri.respond_to?(:read)
elsif (RUBY_VERSION >= '2.7' || !rss.tainted?) and File.readable?(rss)
File.open(rss) {|f| f.read}
# maybe_xml? tests if source is a string that looks like XML.
source.is_a?(String) and /</ =~ source
# Attempt to convert rss to a URI, but just return it if
return rss if rss.is_a?(::URI::Generic)
def raise_for_undefined_entity?
listener.raise_for_undefined_entity?
@listener = self.class.listener.new
def ignore_unknown_element
@listener.ignore_unknown_element
def ignore_unknown_element=(new_value)
@listener.ignore_unknown_element = new_value
def do_validate=(new_value)
@listener.do_validate = new_value
# return the setter for the uri, tag_name pair, or nil.
def setter(uri, tag_name)
_getter = getter(uri, tag_name)
def getter(uri, tag_name)
(@@accessor_bases[uri] || {})[tag_name]
# return the tag_names for setters associated with uri
(@@accessor_bases[uri] || {}).keys
# register uri against this name.
def register_uri(uri, name)
@@registered_uris[name] ||= {}
@@registered_uris[name][uri] = nil
# test if this uri is registered against this name
def uri_registered?(uri, name)
@@registered_uris[name].has_key?(uri)
# record class_name for the supplied uri and tag_name
def install_class_name(uri, tag_name, class_name)
@@class_names[uri] ||= {}
@@class_names[uri][tag_name] = class_name
# retrieve class_name for the supplied uri and tag_name
# If it doesn't exist, capitalize the tag_name
def class_name(uri, tag_name)
name = (@@class_names[uri] || {})[tag_name]
tag_name = tag_name.gsub(/[_\-]([a-z]?)/) {$1.upcase}
tag_name[0, 1].upcase + tag_name[1..-1]
def install_get_text_element(uri, name, accessor_base)
install_accessor_base(uri, name, accessor_base)
def_get_text_element(uri, name, *get_file_and_line_from_caller(1))
def raise_for_undefined_entity?
# set the accessor for the uri, tag_name pair
def install_accessor_base(uri, tag_name, accessor_base)
@@accessor_bases[uri] ||= {}
@@accessor_bases[uri][tag_name] = accessor_base.chomp("=")
def def_get_text_element(uri, element_name, file, line)
register_uri(uri, element_name)
method_name = "start_#{element_name}"
unless private_method_defined?(method_name)
define_method(method_name) do |name, prefix, attrs, ns|
if self.class.uri_registered?(uri, element_name)
start_get_text_element(name, prefix, ns, uri)
start_else_element(name, prefix, attrs, ns)
attr_accessor :ignore_unknown_element
attr_accessor :do_validate
@ignore_unknown_element = true
@ns_stack = [{"xml" => :xml}]
@version = @encoding = @standalone = nil
# set instance vars for version, encoding, standalone
def xmldecl(version, encoding, standalone)
@version, @encoding, @standalone = version, encoding, standalone
def instruction(name, content)
if name == "xml-stylesheet"
params = parse_pi_content(content)
if params.has_key?("href")
@xml_stylesheets << XMLStyleSheet.new(params)
def tag_start(name, attributes)
attributes.each do |n, v|
if /\Axmlns(?:\z|:)/ =~ n
prefix, local = split_name(name)
@tag_stack.last.push([_ns(ns, prefix), local])
previous = @last_xml_element
element_attrs = attributes.dup
ns.each do |ns_prefix, value|
next if ns_prefix == "xml"
key = ns_prefix.empty? ? "xmlns" : "xmlns:#{ns_prefix}"
element_attrs[key] ||= value
next_element = XML::Element.new(local,
prefix.empty? ? nil : prefix,
previous << next_element if previous
@last_xml_element = next_element
pr = Proc.new do |text, tags|
@last_xml_element = previous
@xml_element = @last_xml_element
if @rss.nil? and respond_to?("initial_start_#{local}", true)
__send__("initial_start_#{local}", local, prefix, attrs, ns.dup)
elsif respond_to?("start_#{local}", true)
__send__("start_#{local}", local, prefix, attrs, ns.dup)
start_else_element(local, prefix, attrs, ns.dup)
pr.call(text, tags) unless pr.nil?
@last_xml_element << data if @last_xml_element
CONTENT_PATTERN = /\s*([^=]+)=(["'])([^\2]+?)\2/
# Extract the first name="value" pair from content.
# Works with single quotes according to the constant
# CONTENT_PATTERN. Return a Hash.
def parse_pi_content(content)
content.scan(CONTENT_PATTERN) do |name, quote, value|
def start_else_element(local, prefix, attrs, ns)
class_name = self.class.class_name(_ns(ns, prefix), local)
current_class = @last_element.class
if known_class?(current_class, class_name)
next_class = current_class.const_get(class_name)
start_have_something_element(local, prefix, attrs, ns, next_class)
if !@do_validate or @ignore_unknown_element
@proc_stack.push(setup_next_element_in_unknown_element)
parent = "ROOT ELEMENT???"
if current_class.tag_name
parent = current_class.tag_name
raise NotExpectedTagError.new(local, _ns(ns, prefix), parent)
if Module.method(:const_defined?).arity == -1
def known_class?(target_class, class_name)
(target_class.const_defined?(class_name, false) or
target_class.constants.include?(class_name.to_sym))
def known_class?(target_class, class_name)
(target_class.const_defined?(class_name) or
target_class.constants.include?(class_name))
NAMESPLIT = /^(?:([\w:][-\w.]*):)?([\w:][-\w.]*)/
def check_ns(tag_name, prefix, ns, require_uri, ignore_unknown_element=nil)
if _ns(ns, prefix) == require_uri
if ignore_unknown_element.nil?
ignore_unknown_element = @ignore_unknown_element
if ignore_unknown_element
raise NSError.new(tag_name, prefix, require_uri)
# Force bind required URI with prefix
@ns_stack.last[prefix] = require_uri
def start_get_text_element(tag_name, prefix, ns, required_uri)
pr = Proc.new do |text, tags|
setter = self.class.setter(required_uri, tag_name)
if setter and @last_element.respond_to?(setter)
getter = self.class.getter(required_uri, tag_name)
if @last_element.__send__(getter)
raise TooMuchTagError.new(tag_name, @last_element.tag_name)
@last_element.__send__(setter, text.to_s)
if @do_validate and !@ignore_unknown_element
raise NotExpectedTagError.new(tag_name, _ns(ns, prefix),
def start_have_something_element(tag_name, prefix, attrs, ns, klass)
if check_ns(tag_name, prefix, ns, klass.required_uri)
attributes = collect_attributes(tag_name, prefix, attrs, ns, klass)
@proc_stack.push(setup_next_element(tag_name, klass, attributes))
@proc_stack.push(setup_next_element_in_unknown_element)
def collect_attributes(tag_name, prefix, attrs, ns, klass)
klass.get_attributes.each do |a_name, a_uri, required, element_name|
if a_uri.is_a?(String) or !a_uri.respond_to?(:include?)