# frozen_string_literal: true
# Allows the opening of various resources including URIs.
# If the first argument responds to the 'open' method, 'open' is called on
# it with the rest of the arguments.
# If the first argument is a string that begins with <code>(protocol)://<code>, it is parsed by
# URI.parse. If the parsed object responds to the 'open' method,
# 'open' is called on it with the rest of the arguments.
# Otherwise, Kernel#open is called.
# OpenURI::OpenRead#open provides URI::HTTP#open, URI::HTTPS#open and
# URI::FTP#open, Kernel#open.
# We can accept URIs and strings that begin with http://, https:// and
# ftp://. In these cases, the opened file object is extended by OpenURI::Meta.
def self.open(name, *rest, &block)
if name.respond_to?(:open)
elsif name.respond_to?(:to_str) &&
%r{\A[A-Za-z][A-Za-z0-9+\-\.]*://} =~ name &&
(uri = URI.parse(name)).respond_to?(:open)
# OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.
# It is possible to open an http, https or ftp URL as though it were a file:
# URI.open("http://www.ruby-lang.org/") {|f|
# f.each_line {|line| p line}
# The opened file has several getter methods for its meta-information, as
# follows, since it is extended by OpenURI::Meta.
# URI.open("http://www.ruby-lang.org/en") {|f|
# f.each_line {|line| p line}
# p f.base_uri # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
# p f.content_type # "text/html"
# p f.charset # "iso-8859-1"
# p f.content_encoding # []
# p f.last_modified # Thu Dec 05 02:45:02 UTC 2002
# Additional header fields can be specified by an optional hash argument.
# URI.open("http://www.ruby-lang.org/en/",
# "User-Agent" => "Ruby/#{RUBY_VERSION}",
# "From" => "foo@bar.invalid",
# "Referer" => "http://www.ruby-lang.org/") {|f|
# The environment variables such as http_proxy, https_proxy and ftp_proxy
# are in effect by default. Here we disable proxy:
# URI.open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
# See OpenURI::OpenRead.open and URI.open for more on available options.
# URI objects can be opened in a similar way.
# uri = URI.parse("http://www.ruby-lang.org/en/")
# URI objects can be read directly. The returned string is also extended by
# Author:: Tanaka Akira <akr@m17n.org>
:proxy_http_basic_authentication => true,
:content_length_proc => true,
:http_basic_authentication => true,
:ftp_active_mode => false,
def OpenURI.check_options(options) # :nodoc:
unless Options.include? k
raise ArgumentError, "unrecognized option: #{k}"
def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
if !rest.empty? && (String === rest.first || Integer === rest.first)
if !rest.empty? && Integer === rest.first
def OpenURI.open_uri(name, *rest) # :nodoc:
uri = URI::Generic === name ? name : URI.parse(name)
mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
options = rest.shift if !rest.empty? && Hash === rest.first
raise ArgumentError.new("extra arguments") if !rest.empty?
OpenURI.check_options(options)
if /\Arb?(?:\Z|:([^:]+))/ =~ mode
encoding, = $1,Encoding.find($1) if $1
if options.has_key? :encoding
raise ArgumentError, "encoding specified twice"
encoding = Encoding.find(options[:encoding])
mode == 'r' || mode == 'rb' ||
raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
io = open_loop(uri, options)
io.set_encoding(encoding) if encoding
if io.respond_to? :close!
def OpenURI.open_loop(uri, options) # :nodoc:
proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
proxy_opts << :proxy if options.include? :proxy
raise ArgumentError, "multiple proxy options specified"
when :proxy_http_basic_authentication
opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
proxy_user = proxy_user.to_str
proxy_pass = proxy_pass.to_str
raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
opt_proxy = options.fetch(:proxy)
find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
find_proxy = lambda {|u| nil}
opt_proxy = URI.parse(opt_proxy)
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
redirect = catch(:open_uri_redirect) {
uri.buffer_open(buf, find_proxy.call(uri), options)
# Although it violates RFC2616, Location: field may have relative
# URI. It is converted to absolute URI using uri as a base URI.
redirect = uri + redirect
if !options.fetch(:redirect, true)
raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
unless OpenURI.redirectable?(uri, redirect)
raise "redirection forbidden: #{uri} -> #{redirect}"
if options.include? :http_basic_authentication
# send authentication only for the URI directly specified.
options.delete :http_basic_authentication
raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
def OpenURI.redirectable?(uri1, uri2) # :nodoc:
# This test is intended to forbid a redirection from http://... to
# file:///etc/passwd, file:///dev/zero, etc. CVE-2011-1521
# https to http redirect is also forbidden intentionally.
# It avoids sending secure cookie or referer by non-secure HTTP protocol.
# (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
# However this is ad hoc. It should be extensible/configurable.
uri1.scheme.downcase == uri2.scheme.downcase ||
(/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:https?|ftp)\z/i =~ uri2.scheme)
def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
proxy_uri, proxy_user, proxy_pass = proxy
raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
raise ArgumentError, "userinfo not supported. [RFC3986]"
options.each {|k, v| header[k] = v if String === k }
unless proxy_user && proxy_pass
proxy_user, proxy_pass = proxy_uri.userinfo.split(':') if proxy_uri.userinfo
if proxy_user && proxy_pass
klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
target_host = target.hostname
target_port = target.port
request_uri = target.request_uri
target_host = proxy_uri.hostname
target_port = proxy_uri.port
request_uri = target.to_s
if proxy_user && proxy_pass
header["Proxy-Authorization"] =
'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m0')
http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
if target.class == URI::HTTPS
http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
store = OpenSSL::X509::Store.new
Array(options[:ssl_ca_cert]).each do |cert|
if options.include? :read_timeout
http.read_timeout = options[:read_timeout]
if options.include? :open_timeout
http.open_timeout = options[:open_timeout]
req = Net::HTTP::Get.new(request_uri, header)
if options.include? :http_basic_authentication
user, pass = options[:http_basic_authentication]
req.basic_auth user, pass
http.request(req) {|response|
if options[:content_length_proc] && Net::HTTPSuccess === resp
if resp.key?('Content-Length')
options[:content_length_proc].call(resp['Content-Length'].to_i)
options[:content_length_proc].call(nil)
if options[:progress_proc] && Net::HTTPSuccess === resp
options[:progress_proc].call(buf.size)
io.status = [resp.code, resp.message]
resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
when Net::HTTPMovedPermanently, # 301
Net::HTTPTemporaryRedirect # 307
loc_uri = URI.parse(resp['location'])
rescue URI::InvalidURIError
raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
throw :open_uri_redirect, loc_uri
raise OpenURI::HTTPError.new(io.status.join(' '), io)
class HTTPError < StandardError
def initialize(message, io)
# only occurs when +redirect+ option for HTTP is +false+.
class HTTPRedirect < HTTPError
def initialize(message, io, uri)
class Buffer # :nodoc: all
if StringIO === @io && StringMax < @size
io = Tempfile.new('open-uri')
Meta.init io, @io if Meta === @io
Meta.init @io unless Meta === @io
# Mixin for holding meta-information.
def Meta.init(obj, src=nil) # :nodoc:
@meta = {} # name to string. legacy.
@metas = {} # name to array of strings.
obj.base_uri = src.base_uri
src.metas.each {|name, values|
obj.meta_add_field2(name, values)
# returns an Array that consists of status code and message.
# returns a URI that is the base of relative URIs in the data.
# It may differ from the URI supplied by a user due to redirection.
# returns a Hash that represents header fields.
# The Hash keys are downcased for canonicalization.
# The Hash values are a field body.
# If there are multiple field with same field name,
# the field values are concatenated with a comma.
# returns a Hash that represents header fields.
# The Hash keys are downcased for canonicalization.
# The Hash value are an array of field values.
def meta_setup_encoding # :nodoc:
enc = Encoding.find(charset)
enc = Encoding::ASCII_8BIT unless enc
if self.respond_to? :force_encoding
elsif self.respond_to? :string
self.string.force_encoding(enc)
def meta_add_field2(name, values) # :nodoc:
@meta[name] = values.join(', ')
meta_setup_encoding if name == 'content-type'
def meta_add_field(name, value) # :nodoc:
meta_add_field2(name, [value])
# returns a Time that represents the Last-Modified field.
if vs = @metas['last-modified']
RE_TOKEN = %r{[^\x00- ()<>@,;:\\"/\[\]?={}\x7f]+}n
RE_QUOTED_STRING = %r{"(?:[\r\n\t !#-\[\]-~\x80-\xff]|\\[\x00-\x7f])*"}n
RE_PARAMETERS = %r{(?:;#{RE_LWS}?#{RE_TOKEN}#{RE_LWS}?=#{RE_LWS}?(?:#{RE_TOKEN}|#{RE_QUOTED_STRING})#{RE_LWS}?)*}n
def content_type_parse # :nodoc:
vs = @metas['content-type']
# The last (?:;#{RE_LWS}?)? matches extra ";" which violates RFC2045.
if vs && %r{\A#{RE_LWS}?(#{RE_TOKEN})#{RE_LWS}?/(#{RE_TOKEN})#{RE_LWS}?(#{RE_PARAMETERS})(?:;#{RE_LWS}?)?\z}no =~ vs.join(', ')