diff options
author | 2015-02-24 00:54:59 +0100 | |
---|---|---|
committer | 2015-02-24 00:55:20 +0100 | |
commit | d42179d8d444cc35698e06d2d7520e6f7b383f17 (patch) | |
tree | cc9ac41ea63279d82f910d5cd33fb91f62ebe5ae /lib | |
parent | Fix displayname extraction (diff) | |
download | backend-d42179d8d444cc35698e06d2d7520e6f7b383f17.tar.gz backend-d42179d8d444cc35698e06d2d7520e6f7b383f17.tar.bz2 backend-d42179d8d444cc35698e06d2d7520e6f7b383f17.zip |
Fall back to charlock_holmes; also add stub encoding extraction from the message for later on
Diffstat (limited to 'lib')
-rw-r--r-- | lib/rendering.rb | 22 | ||||
-rw-r--r-- | lib/utils.rb | 4 |
2 files changed, 18 insertions, 8 deletions
diff --git a/lib/rendering.rb b/lib/rendering.rb index 3e77414..7649fcf 100644 --- a/lib/rendering.rb +++ b/lib/rendering.rb @@ -8,33 +8,43 @@ module Ag::Rendering content_type = mime_split(mail.parts.first.content_type) if content_type == 'text/plain' or content_type == 'text/html' - to_content(content_type, mail.parts.first.decoded) + to_content(content_type, mail.parts.first.decoded, get_encoding(mail.parts.first)) else # Nested multipart? if mail.parts.first.multipart? content_type = mime_split(mail.parts.first.parts.first.content_type) if content_type == 'text/plain' or content_type == 'text/html' - to_content(content_type, mail.parts.first.parts.first.decoded) + to_content(content_type, mail.parts.first.parts.first.decoded, get_encoding(mail.parts.first.parts.first)) else raise "Cannot find body: #{mail.message_id}" end # Specialty: Gnus/Emacs signed emails with no explicit multipart type elsif mime_split(mail.content_type) == 'multipart/signed' - to_content('text/plain', mail.parts.first.decoded) + to_content('text/plain', mail.parts.first.decoded, get_encoding(mail.parts.first)) end end else # No Content-Type, assume plain text (git-send-email) if mail.content_type == nil - to_content('text/plain', mail.body.decoded) + to_content('text/plain', mail.body.decoded, get_encoding(mail)) else - to_content(mime_split(mail.content_type), mail.body.decoded) + to_content(mime_split(mail.content_type), mail.body.decoded, get_encoding(mail)) end end end - def self.to_content(content_type, content) + def self.get_encoding(part) + if part.content_type_parameters + part.content_type_parameters['charset'] + else + nil + end + end + + def self.to_content(content_type, content, charset = nil) + #content = content.force_encoding(charset) if charset + if content_type == 'text/plain' escaped_content = CGI::escapeHTML(content) escaped_content.lines.map do |line| diff --git a/lib/utils.rb b/lib/utils.rb index f8d546e..38349e0 100644 --- a/lib/utils.rb +++ b/lib/utils.rb @@ -8,13 +8,13 @@ module Ag module Utils module_function - def fix_encoding(str) + def fix_encoding2(str) s = str.encode('UTF-8', 'UTF-8', invalid: :replace, replace: '') s = s.unpack('C*').pack('U*') unless s.valid_encoding? s end - def fix_encoding_old(str, fail_hard = false) + def fix_encoding(str, fail_hard = false) detection = CharlockHolmes::EncodingDetector.detect(str) CharlockHolmes::Converter.convert(str, detection[:encoding], 'UTF-8') rescue => e |