don't try to decode already unicode
* includes tests for decoding charset
This commit is contained in:
parent
7a168b2163
commit
9fa62c433a
|
|
@ -79,15 +79,16 @@ def parse(packet):
|
|||
"""
|
||||
|
||||
# attempt to detect encoding
|
||||
try:
|
||||
packet = packet.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
res = chardet.detect(packet)
|
||||
if isinstance(packet, str):
|
||||
try:
|
||||
packet = packet.decode('utf-8')
|
||||
except UnicodeDecodeError:
|
||||
res = chardet.detect(packet)
|
||||
|
||||
if res['confidence'] > 0.7:
|
||||
packet = packet.decode(res['encoding'])
|
||||
else:
|
||||
packet = packet.decode('latin-1')
|
||||
if res['confidence'] > 0.7:
|
||||
packet = packet.decode(res['encoding'])
|
||||
else:
|
||||
packet = packet.decode('latin-1')
|
||||
|
||||
packet = packet.rstrip("\r\n")
|
||||
logger.debug("Parsing: %s", packet)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,36 @@
|
|||
# encoding: utf-8
|
||||
|
||||
import unittest
|
||||
|
||||
from aprslib.parse import parse
|
||||
|
||||
|
||||
class ParseTestCase(unittest.TestCase):
|
||||
def test_unicode(self):
|
||||
# 7bit ascii
|
||||
result = parse("A>B:>status")
|
||||
|
||||
self.assertIsInstance(result['status'], unicode)
|
||||
self.assertEqual(result['status'], u"status")
|
||||
|
||||
# string with degree sign
|
||||
result = parse("A>B:>status\xb0")
|
||||
|
||||
self.assertIsInstance(result['status'], unicode)
|
||||
self.assertEqual(result['status'], u"status\xb0")
|
||||
|
||||
# str with unicode
|
||||
result = parse("A>B:>статус")
|
||||
|
||||
self.assertIsInstance(result['status'], unicode)
|
||||
self.assertEqual(result['status'], u"статус")
|
||||
|
||||
# uncide input
|
||||
result = parse(u"A>B:>статус")
|
||||
|
||||
self.assertIsInstance(result['status'], unicode)
|
||||
self.assertEqual(result['status'], u"статус")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
Loading…
Reference in New Issue