| Summary: | UnicodeDecodeError: 'utf8' codec can't decode bytes in position 77-80: invalid data | ||
|---|---|---|---|
| Product: | Sisyphus | Reporter: | Andrey V Khavryuchenko <akhavr> |
| Component: | python-module-BeautifulSoup | Assignee: | Vitaly Lipatov <lav> |
| Status: | CLOSED FIXED | QA Contact: | qa-sisyphus |
| Severity: | normal | ||
| Priority: | P2 | CC: | antohami, cas, cow, darktemplar, enp, evg, george, grenka, imz, lav, mithraen, nbr, obirvalger, qa_viy, real.altlinux.org, rider, sem, shaba, sin, slev, vitty, viy |
| Version: | unstable | ||
| Hardware: | all | ||
| OS: | Linux | ||
| Bug Depends on: | 14975 | ||
| Bug Blocks: | |||
Похоже что эту багу нужно перецепить на python-module-BeautifulSoup, т.к. падают
даже тесты из feedparser:
======================================================================
ERROR: ./tests/wellformed/encoding/encoding_attribute_crash_2.xml: crashes
----------------------------------------------------------------------
Traceback (most recent call last):
File "feedparsertest.py", line 164, in <lambda>
method(self, evalString, feedparser.parse(xmlfile))
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 3529, in parse
feedparser.feed(data)
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 1662, in feed
sgmllib.SGMLParser.feed(self, data)
File "/usr/lib/python2.5/sgmllib.py", line 99, in feed
self.goahead(0)
File "/usr/lib/python2.5/sgmllib.py", line 138, in goahead
k = self.parse_endtag(i)
File "/usr/lib/python2.5/sgmllib.py", line 315, in parse_endtag
self.finish_endtag(tag)
File "/usr/lib/python2.5/sgmllib.py", line 355, in finish_endtag
self.unknown_endtag(tag)
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 569, in
unknown_endtag
method()
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 1414, in
_end_description
value = self.popContent('description')
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 849, in
popContent
value = self.pop(tag)
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 764, in pop
mfresults = _parseMicroformats(output, self.baseuri, self.encoding)
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 2222, in
_parseMicroformats
p = _MicroformatsParser(htmlSource, baseURI, encoding)
File "/home/akhavr/src/feedparser/feedparser/feedparser.py", line 1824, in
__init__
self.document = BeautifulSoup.BeautifulSoup(data)
File "/usr/lib/python2.5/site-packages/BeautifulSoup.py", line 1282, in __init__
BeautifulStoneSoup.__init__(self, *args, **kwargs)
File "/usr/lib/python2.5/site-packages/BeautifulSoup.py", line 946, in __init__
self._feed()
File "/usr/lib/python2.5/site-packages/BeautifulSoup.py", line 971, in _feed
SGMLParser.feed(self, markup)
File "/usr/lib/python2.5/sgmllib.py", line 99, in feed
self.goahead(0)
File "/usr/lib/python2.5/sgmllib.py", line 133, in goahead
k = self.parse_starttag(i)
File "/usr/lib/python2.5/sgmllib.py", line 285, in parse_starttag
self._convert_ref, attrvalue)
UnicodeDecodeError: 'ascii' codec can't decode byte 0x80 in position 0: ordinal
not in range(128)
----------------------------------------------------------------------
Ran 4271 tests in 40.250s
Как это сделать в багзилле не заводя новый дефект?
Изменить компонент в поле Component. Перевесил. Похоже, больше не актуально. |
akhavr@t40 ~/src/feedparser $ python Python 2.5.1 (r251:54863, Feb 8 2008, 15:19:12) [GCC 4.1.1 20070105 (ALT Linux, build 4.1.1-alt12)] on linux2 Type "help", "copyright", "credits" or "license" for more information. >>> import feedparser >>> f = feedparser.parse('http://feeds.feedburner.com/semanticfocus/') Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/usr/lib/python2.5/site-packages/feedparser.py", line 2623, in parse feedparser.feed(data) File "/usr/lib/python2.5/site-packages/feedparser.py", line 1441, in feed sgmllib.SGMLParser.feed(self, data) File "/usr/lib/python2.5/sgmllib.py", line 99, in feed self.goahead(0) File "/usr/lib/python2.5/sgmllib.py", line 138, in goahead k = self.parse_endtag(i) File "/usr/lib/python2.5/sgmllib.py", line 315, in parse_endtag self.finish_endtag(tag) File "/usr/lib/python2.5/sgmllib.py", line 355, in finish_endtag self.unknown_endtag(tag) File "/usr/lib/python2.5/site-packages/feedparser.py", line 476, in unknown_endtag method() File "/usr/lib/python2.5/site-packages/feedparser.py", line 1217, in _end_description value = self.popContent('description') File "/usr/lib/python2.5/site-packages/feedparser.py", line 700, in popContent value = self.pop(tag) File "/usr/lib/python2.5/site-packages/feedparser.py", line 641, in pop output = _resolveRelativeURIs(output, self.baseuri, self.encoding) File "/usr/lib/python2.5/site-packages/feedparser.py", line 1594, in _resolveRelativeURIs p.feed(htmlSource) File "/usr/lib/python2.5/site-packages/feedparser.py", line 1441, in feed sgmllib.SGMLParser.feed(self, data) File "/usr/lib/python2.5/sgmllib.py", line 99, in feed self.goahead(0) File "/usr/lib/python2.5/sgmllib.py", line 133, in goahead k = self.parse_starttag(i) File "/usr/lib/python2.5/sgmllib.py", line 291, in parse_starttag self.finish_starttag(tag, attrs) File "/usr/lib/python2.5/sgmllib.py", line 333, in finish_starttag self.unknown_starttag(tag, attrs) File "/usr/lib/python2.5/site-packages/feedparser.py", line 1589, in unknown_starttag _BaseHTMLProcessor.unknown_starttag(self, tag, attrs) File "/usr/lib/python2.5/site-packages/feedparser.py", line 1458, in unknown_starttag value = unicode(value, self.encoding) UnicodeDecodeError: 'utf8' codec can't decode bytes in position 77-80: invalid data