From 6ec5e1e8e531a91a7141ee704fe3b5df247c7c7c Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Tue, 10 Mar 2015 13:33:21 +0000 Subject: [PATCH 1/5] tiny syntax fix in parse.py --- aprslib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aprslib/parse.py b/aprslib/parse.py index 37b47d9..f56d770 100644 --- a/aprslib/parse.py +++ b/aprslib/parse.py @@ -93,7 +93,7 @@ def parse(packet): try: parsed.update(_parse_header(head)) - except ParseError, msg: + except ParseError as msg: raise ParseError(str(msg), packet) packet_type = body[0] From 829f1fc8944028d8edc532f2d863d19b32859a93 Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Tue, 10 Mar 2015 17:29:35 +0000 Subject: [PATCH 2/5] fix mic-e dstcall decoding bug --- aprslib/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aprslib/parse.py b/aprslib/parse.py index f56d770..b1222e9 100644 --- a/aprslib/parse.py +++ b/aprslib/parse.py @@ -477,7 +477,7 @@ def _parse_mice(dstcall, body): elif ord(i) > 76: # P-Y tmpdstcall += chr(ord(i) - 32) elif ord(i) > 57: # A-J - tmpdstcall += chr(ord(i) - 16) + tmpdstcall += chr(ord(i) - 17) else: # 0-9 tmpdstcall += i From bd8b1867aa7b85f59fa9224726005b3effe3ca3b Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Tue, 10 Mar 2015 17:55:30 +0000 Subject: [PATCH 3/5] improved _parse_timestamp incorrect timestamps will now return timestamp 0 and get clipped from the body so further parsing can occur. stopped using utc.second to fill the second for z/ formats --- aprslib/parse.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/aprslib/parse.py b/aprslib/parse.py index b1222e9..1020251 100644 --- a/aprslib/parse.py +++ b/aprslib/parse.py @@ -331,7 +331,7 @@ def _parse_timestamp(body, packet_type=''): if packet_type == '>' and form != 'z': pass - if form in "hz/": + else: body = body[7:] try: @@ -341,7 +341,9 @@ def _parse_timestamp(body, packet_type=''): # zulu ddhhmm format # '/' local ddhhmm format elif form in 'z/': - timestamp = "%d%02d%s%02d" % (utc.year, utc.month, ts, utc.second) + timestamp = "%d%02d%s%02d" % (utc.year, utc.month, ts, 0) + else: + timestamp = "19700101000000" timestamp = utc.strptime(timestamp, "%Y%m%d%H%M%S") timestamp = time.mktime(timestamp.timetuple()) From 00bb2f4be7d6cb5322253b109827e60a5f4d5d99 Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Tue, 10 Mar 2015 19:48:47 +0000 Subject: [PATCH 4/5] charset detection, unicode for all --- README.rst | 15 +++++++++++---- aprslib/base91.py | 4 ++-- aprslib/parse.py | 21 +++++++++++++++++++++ tests/test_base91.py | 3 ++- 4 files changed, 36 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index bb2d7a6..6a1428c 100644 --- a/README.rst +++ b/README.rst @@ -4,17 +4,24 @@ APRS library for Python |Build Status| |Coverage Status| A tiny library for dealing with APRS. It can be used to connect and listen to the APRS-IS feed as well as upload. -Parsing of packets is also possible, but the entire spec is not fully implemeneted yet. +Parsing of packets is also possible, but the entire spec is not fully implemented yet. The following is supported: - normal/compressed position reports - objects - mic-e position report - messages (inc. telemetry, bulletins, etc) -- base91 comment telemetry extention -- altitude extention +- base91 comment telemetry extension +- altitude extension - beacons +Packets can often contain characters outside of 7-bit ASCII. +``aprslib.parse()`` will attempt to guess the charset and return ``unicode`` strings using these steps and in that order: + +1. Attempt to decode string as ``utf-8`` +2. Attempt to guess the charset using ``chardet`` module (if installed), decode if confidence factor is sufficient +3. Finally, decode as ``latin-1`` + Install ----------- @@ -116,7 +123,7 @@ Here is a simple example: DEBUG:aprslib.parse:Parsed ok. ... -Uploading packets to APRS-IS is posible through the ``sendall()`` method in ``IS``. +Uploading packets to APRS-IS is possible through the ``sendall()`` method in ``IS``. The method assumes a single line/packet per call. The parameters may end with ``\r\n``, but it's not required. .. code:: python diff --git a/aprslib/base91.py b/aprslib/base91.py index b184170..15392c0 100644 --- a/aprslib/base91.py +++ b/aprslib/base91.py @@ -29,8 +29,8 @@ def to_decimal(text): Takes a base91 char string and returns decimal """ - if not isinstance(text, str): - raise TypeError("expected str") + if not isinstance(text, basestring): + raise TypeError("expected str or unicode, %s given" % type(text)) if findall(r"[\x00-\x20\x7c-\xff]", text): raise ValueError("invalid character in sequence") diff --git a/aprslib/parse.py b/aprslib/parse.py index 1020251..8ee2f1e 100644 --- a/aprslib/parse.py +++ b/aprslib/parse.py @@ -24,6 +24,16 @@ import math import logging from datetime import datetime +try: + import chardet +except ImportError: + # create fake chardet + + class chardet: + @staticmethod + def detect(x): + return {'confidence': 0.0, 'encoding': 'windows-1252'} + from .exceptions import (UnknownFormat, ParseError) from . import base91 @@ -68,6 +78,17 @@ def parse(packet): * status message """ + # attempt to detect encoding + try: + packet = packet.decode('utf-8') + except UnicodeDecodeError: + res = chardet.detect(packet) + + if res['confidence'] > 0.7: + packet = packet.decode(res['encoding']) + else: + packet = packet.decode('latin-1') + packet = packet.rstrip("\r\n") logger.debug("Parsing: %s", packet) diff --git a/tests/test_base91.py b/tests/test_base91.py index c013e1a..ba3cb6c 100644 --- a/tests/test_base91.py +++ b/tests/test_base91.py @@ -64,12 +64,13 @@ class b_ToDecimal(unittest.TestCase): # 91**3 = "!!! # etc testData += [[91**i, '"' + '!'*i] for i in xrange(20)] + testData += [[91**i, u'"' + u'!'*i] for i in xrange(20)] for expected, n in testData: self.assertEqual(expected, base91.to_decimal(n)) def test_invalid_input_type(self): - testData = [-1, 0, 5, unicode('a'), None, ['d']] + testData = [-1, 0, 5, None, ['d']] for n in testData: self.assertRaises(TypeError, base91.to_decimal, n) From 73306aab2cc9eaadc261d83093735808669ceeb5 Mon Sep 17 00:00:00 2001 From: Rossen Georgiev Date: Tue, 10 Mar 2015 20:09:00 +0000 Subject: [PATCH 5/5] bump to v0.6.37 --- aprslib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aprslib/__init__.py b/aprslib/__init__.py index 01dc51e..a181640 100644 --- a/aprslib/__init__.py +++ b/aprslib/__init__.py @@ -37,7 +37,7 @@ from datetime import date as _date __date__ = str(_date.today()) del _date -__version__ = "0.6.36" +__version__ = "0.6.37" __author__ = "Rossen Georgiev" __all__ = ['IS', 'parse', 'passcode']