Object
URI::Parser.new([opts])
The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generetes set of regexps for parsing URIs.
You can use the following keys:
* <tt>:ESCAPED</tt> (URI::PATTERN::ESCAPED in default) * <tt>:UNRESERVED</tt> (URI::PATTERN::UNRESERVED in default) * <tt>:DOMLABEL</tt> (URI::PATTERN::DOMLABEL in default) * <tt>:TOPLABEL</tt> (URI::PATTERN::TOPLABEL in default) * <tt>:HOSTNAME</tt> (URI::PATTERN::HOSTNAME in default)
p = URI::Parser.new(:ESCPAED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})" u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD> URI.parse(u.to_s) #=> raises URI::InvalidURIError s = "http://examle.com/ABCD" u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD> u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD> u1 == u2 #=> true u1.eql?(u2) #=> false
# File uri/common.rb, line 89
def initialize(opts = {})
@pattern = initialize_pattern(opts)
@pattern.each_value {|v| v.freeze}
@pattern.freeze
@regexp = initialize_regexp(@pattern)
@regexp.each_value {|v| v.freeze}
@regexp.freeze
end
# File uri/common.rb, line 214
def escape(str, unsafe = @regexp[:UNSAFE])
unless unsafe.kind_of?(Regexp)
# perhaps unsafe is String object
unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
end
str.gsub(unsafe) do
us = $&
tmp = ''
us.each_byte do |uc|
tmp << sprintf('%%%02X', uc)
end
tmp
end.force_encoding(Encoding::US_ASCII)
end
# File uri/common.rb, line 195
def extract(str, schemes = nil, &block)
if block_given?
str.scan(make_regexp(schemes)) { yield $& }
nil
else
result = []
str.scan(make_regexp(schemes)) { result.push $& }
result
end
end
# File uri/common.rb, line 234
def inspect
@@to_s.bind(self).call
end
# File uri/common.rb, line 187
def join(*str)
u = self.parse(str[0])
str[1 .. -1].each do |x|
u = u.merge(x)
end
u
end
# File uri/common.rb, line 206
def make_regexp(schemes = nil)
unless schemes
@regexp[:ABS_URI_REF]
else
/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/
end
end
# File uri/common.rb, line 172
def parse(uri)
scheme, userinfo, host, port,
registry, path, opaque, query, fragment = self.split(uri)
if scheme && URI.scheme_list.include?(scheme.upcase)
URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
registry, path, opaque, query,
fragment, self)
else
Generic.new(scheme, userinfo, host, port,
registry, path, opaque, query,
fragment, self)
end
end
# File uri/common.rb, line 100
def split(uri)
case uri
when ''
# null uri
when @regexp[:ABS_URI]
scheme, opaque, userinfo, host, port,
registry, path, query, fragment = $~[1..-1]
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# absoluteURI = scheme ":" ( hier_part | opaque_part )
# hier_part = ( net_path | abs_path ) [ "?" query ]
# opaque_part = uric_no_slash *uric
# abs_path = "/" path_segments
# net_path = "//" authority [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
if !scheme
raise InvalidURIError,
"bad URI(absolute but no scheme): #{uri}"
end
if !opaque && (!path && (!host && !registry))
raise InvalidURIError,
"bad URI(absolute but no path): #{uri}"
end
when @regexp[:REL_URI]
scheme = nil
opaque = nil
userinfo, host, port, registry,
rel_segment, abs_path, query, fragment = $~[1..-1]
if rel_segment && abs_path
path = rel_segment + abs_path
elsif rel_segment
path = rel_segment
elsif abs_path
path = abs_path
end
# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
# relativeURI = ( net_path | abs_path | rel_path ) [ "?" query ]
# net_path = "//" authority [ abs_path ]
# abs_path = "/" path_segments
# rel_path = rel_segment [ abs_path ]
# authority = server | reg_name
# server = [ [ userinfo "@" ] hostport ]
else
raise InvalidURIError, "bad URI(is not URI?): #{uri}"
end
path = '' if !path && !opaque # (see RFC2396 Section 5.2)
ret = [
scheme,
userinfo, host, port, # X
registry, # X
path, # Y
opaque, # Y
query,
fragment
]
return ret
end