Python论坛的帖子：

星期日十一月 26 16:30:10 HKT 2006

´úÂëÈçÏÂ£º
import re

class GetTextBet2Tags:
    def __init__(self):
        self.text = []

    def getText(self, startTag, endTag, input):
        expression = startTag + '(.*?)' + endTag
        texts = re.findall(expression , input)
        for t in texts:
            self.text.append(t)

if __name__ == "__main__":
    import urllib
    usock = urllib.urlopen("http://www.blogjava.net/")
    input = usock.read().decode('utf-8').encode('gbk', 'ignore')
    g = GetTextBet2Tags()
    g.getText('', '', input)
    usock.close()
    #for t in g.text: print t
    print g.text

ÎÒÏëÊä³öÕû¸ölist£¬µ«´òÓ¡³öÀàËÆÕâÑùµÄ£º
\xdaXML\xb6\xaf\xcc\xac\xbc\xd3\xd4\xd8\xb5\xc4JS\xca\
Èç¹ûÖ´ÐÐfor t in g.text: print t£¬ÔòÊä³öÕý³£¡£µ«ÎÒµÄÐèÒªÊÇÏ£ÍûµÃµ½Õû¸ölist.
Âé·³´ó¼Ò¡£
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒÆ³ý...
URL: http://python.cn/pipermail/python-chinese/attachments/20061126/fdcdfc17/attachment.html

标题：[python-chinese] list中文编码问题

', '

', '