don't try to decode already unicode

* includes tests for decoding charset
This commit is contained in:
Rossen Georgiev 2015-03-21 23:08:29 +00:00
parent 7a168b2163
commit 9fa62c433a
3 changed files with 46 additions and 8 deletions

View File

@ -79,15 +79,16 @@ def parse(packet):
"""
# attempt to detect encoding
try:
packet = packet.decode('utf-8')
except UnicodeDecodeError:
res = chardet.detect(packet)
if isinstance(packet, str):
try:
packet = packet.decode('utf-8')
except UnicodeDecodeError:
res = chardet.detect(packet)
if res['confidence'] > 0.7:
packet = packet.decode(res['encoding'])
else:
packet = packet.decode('latin-1')
if res['confidence'] > 0.7:
packet = packet.decode(res['encoding'])
else:
packet = packet.decode('latin-1')
packet = packet.rstrip("\r\n")
logger.debug("Parsing: %s", packet)

View File

@ -2,3 +2,4 @@ pylint
nose
coverage
mox
chardet

36
tests/test_parse.py Normal file
View File

@ -0,0 +1,36 @@
# encoding: utf-8
import unittest
from aprslib.parse import parse
class ParseTestCase(unittest.TestCase):
def test_unicode(self):
# 7bit ascii
result = parse("A>B:>status")
self.assertIsInstance(result['status'], unicode)
self.assertEqual(result['status'], u"status")
# string with degree sign
result = parse("A>B:>status\xb0")
self.assertIsInstance(result['status'], unicode)
self.assertEqual(result['status'], u"status\xb0")
# str with unicode
result = parse("A>B:>статус")
self.assertIsInstance(result['status'], unicode)
self.assertEqual(result['status'], u"статус")
# uncide input
result = parse(u"A>B:>статус")
self.assertIsInstance(result['status'], unicode)
self.assertEqual(result['status'], u"статус")
if __name__ == '__main__':
unittest.main()