Skip to content

Commit

Permalink
Merge branch 'content_type_charset_support' of https://github.com/ben…
Browse files Browse the repository at this point in the history
…mmurphy/httparty into benmmurphy-content_type_charset_support
  • Loading branch information
jnunemaker committed May 7, 2013
2 parents 2a7553e + 9c6dd9b commit 82e351f
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 0 deletions.
71 changes: 71 additions & 0 deletions lib/httparty/request.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def initialize(http_method, path, o={})
self.path = path
self.options = {
:limit => o.delete(:no_follow) ? 1 : 5,
:assume_utf16_is_big_endian => true,
:default_params => {},
:follow_redirects => true,
:parser => Parser,
Expand Down Expand Up @@ -172,6 +173,75 @@ def query_string(uri)
query_string_parts.size > 0 ? query_string_parts.join('&') : nil
end

def get_charset
content_type = last_response["content-type"]
if content_type.nil?
return nil
end

if content_type =~ /;\s*charset\s*=\s*([^=,;"\s]+)/i
return $1
end

if content_type =~ /;\s*charset\s*=\s*"((\\.|[^\\"])+)"/i
return $1.gsub(/\\(.)/, '\1')
end

nil
end

def encode_with_ruby_encoding(body, charset)
begin
encoding = Encoding.find(charset)
body.force_encoding(encoding)
rescue
body
end
end

def assume_utf16_is_big_endian
options[:assume_utf16_is_big_endian]
end

def encode_utf_16(body)
if body.bytesize >= 2
if body.getbyte(0) == 0xFF && body.getbyte(1) == 0xFE
return body.force_encoding("UTF-16LE")
elsif body.getbyte(0) == 0xFE && body.getbyte(1) == 0xFF
return body.force_encoding("UTF-16BE")
end
end

if assume_utf16_is_big_endian
body.force_encoding("UTF-16BE")
else
body.force_encoding("UTF-16LE")
end

end

def _encode_body(body)
charset = get_charset

if charset.nil?
return body
end

if "utf-16".casecmp(charset) == 0
encode_utf_16(body)
else
encode_with_ruby_encoding(body, charset)
end
end

def encode_body(body)
if "".respond_to?(:encoding)
_encode_body(body)
else
body
end
end

def handle_response(body, &block)
if response_redirects?
options[:limit] -= 1
Expand All @@ -182,6 +252,7 @@ def handle_response(body, &block)
perform(&block)
else
body = body || last_response.body
body = encode_body(body)
Response.new(self, last_response, lambda { parse_response(body) }, :body => body)
end
end
Expand Down
71 changes: 71 additions & 0 deletions spec/httparty/request_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,77 @@
@request.perform.headers.should == { "key" => ["value"] }
end

if "".respond_to?(:encoding)

it "should process charset in content type properly" do
response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-8")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-8")
end

it "should process charset in content type properly if it has a different case" do
response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain;CHARSET = utf-8")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-8")
end

it "should process quoted charset in content type properly" do
response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain;charset = \"utf-8\"")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-8")
end

it "should process utf-16 charset with little endian bom correctly" do
@request.options[:assume_utf16_is_big_endian] = true

response = stub_response "\xFF\xFEC\x00o\x00n\x00t\x00e\x00n\x00t\x00"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-16LE")
end

it "should process utf-16 charset with big endian bom correctly" do
@request.options[:assume_utf16_is_big_endian] = false

response = stub_response "\xFE\xFF\x00C\x00o\x00n\x00t\x00e\x00n\x00t"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-16BE")
end

it "should assume utf-16 little endian if options has been chosen" do
@request.options[:assume_utf16_is_big_endian] = false

response = stub_response "C\x00o\x00n\x00t\x00e\x00n\x00t\x00"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-16")
resp = @request.perform
resp.body.encoding.should == Encoding.find("UTF-16LE")
end


it "should perform no encoding if the charset is not available" do

response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain;charset = utf-lols")
resp = @request.perform
resp.body.should == "Content"
resp.body.encoding.should == "Content".encoding
end

it "should perform no encoding if the content type is specified but no charset is specified" do

response = stub_response "Content"
response.initialize_http_header("Content-Type" => "text/plain")
resp = @request.perform
resp.body.should == "Content"
resp.body.encoding.should == "Content".encoding
end
end


describe 'with non-200 responses' do
context "3xx responses" do
it 'returns a valid object for 304 not modified' do
Expand Down

0 comments on commit 82e351f

Please sign in to comment.