Python论坛  - 讨论区

标题:[python-chinese] libxml2+python win32下内存泄露问题

2007年07月03日 星期二 12:54

追风逐月 chinesexu在gmail.com
星期二 七月 3 12:54:02 HKT 2007

ÓÐÈËÔÚpythonÖÐʹÓùýlibxml2Âð£¬ÎÒ·¢ÏÖÿ´Î½âÎöÒ»¸öxmlÎļþ£¬¶¼»áÔö¼Ó4k×óÓÒµÄÐéÄâÄڴ棬³¤ÆÚʹÓã¬ÄÚ´æ²»¿°Öظº°¡¡£ÇëÎÊÓÐʲôºÃµÄ½â¾ö·½·¨Ã»£¿
×¢£ºÊÇÔÚwindowsÏÂʹÓÃ


ÏÂÃæÊDzâÊÔ´úÂ룺
#!/usr/bin/python -u
import libxml2

#------------------------------------------------------------------------------


# Memory debug specific


#------------------------------------------------------------------------------

def _escape(data):
        """Escape data for XML"""
        data=data.replace("&","&")
        data=data.replace("<","<")
        data=data.replace(">",">")
        data=data.replace("'","'")
        data=data.replace('"',""")
        return data
class callback:
    def __init__(self):
            self._head = ""
            self._tail = ""
            self._current = ""
            self._level = 0
            self._doc = None
            self._root = None

    def startDocument(self):
            return
            print "."

    def endElement(self, tag):
            return
            self._current+="" % (tag,)
            self._level -= 1
            if self._level > 1:
                return
            if self._level==1:
                xml=self._head+self._current+self._tail
                doc=libxml2.parseDoc(xml)
                try:
                    node = doc.getRootElement().children
                    try:
                        node1 = node.docCopyNode(self._doc, 1)
                        try:
                            pass
                            #self._root.addChild(node1)
                            #self._handler.stanza(self._doc, node1)
                        except:
                            node1.unlinkNode()
                            node1.freeNode()
                            del node1
                            pass
                    finally:
                        node1.unlinkNode()
                        node1.freeNode()
                        del node1
                        #del node
                finally:
                    doc.freeDoc()
            else:
                print 'level:%d'%self._level
                xml=self._head+self._tail
                doc=libxml2.parseDoc(xml)
                try:
                    #self._handler.stream_end(self._doc)
                    self._doc.freeDoc()
                    self._doc = None
                    self._root = None
                finally:
                    doc.freeDoc()

    def startElement(self, tag, attrs):
            return
            #print 'startElement_____________'
            s = "<"+tag
            if attrs:
                for a,v in attrs.items():
                    s+=" %s='%s'" % (a,_escape(v))
            s += ">"
            if self._level == 0:
                self._head = s
                self._tail = "" % (tag,)
                xml=self._head+self._tail
##                if self._doc:
##                    self._doc.freeDoc()
##                    self._doc=None

                self._doc = libxml2.parseDoc(xml)
                #self._handler.stream_start(self._doc)
                #self._root = self._doc.getRootElement()
            elif self._level == 1:
                self._current = s
            else:
                self._current += s
            self._level += 1
            #print self._level




    def characters(self, data):
        pass

    def warning(self, msg):
        pass

    def error(self, msg):
        pass

    def fatalError(self, msg):
        pass

#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

import os
import sys

programName = os.path.basename(sys.argv[0])

if len(sys.argv) != 2:
  print "Use: %s " % programName
  sys.exit(1)

inputPath = sys.argv[1]

if not os.path.exists (inputPath):
  print "Error: directory does not exist"
  sys.exit(1)

libxml2.debugMemory(1)

inputFileNames = []
dirContent = os.listdir(inputPath)
for fichero in dirContent:
  extension1=fichero.rfind(".htm")
  extension2=fichero.rfind(".html")
  dot = fichero.rfind(".")
  extension = max(extension1,extension2)
  if extension != -1 and extension == dot:
      inputFileNames.append (fichero)

if len(inputFileNames) == 0:
  print "Error: no input files"
  sys.exit(1)


handler = callback()
NUM_ITERS = 20
isrun=True
while isrun:
    for i in range(NUM_ITERS):
      for inputFileName in inputFileNames:
        ctxt = libxml2.createPushParser(handler, "", 0, inputFileName)
        #libxml2.initParser()
        #print inputFileName
        inputFilePath = inputPath + inputFileName
        f = open(inputFilePath)
        data = f.read()
        #print data
        f.close()


        ctxt.parseChunk(data, len(data), 1)
        #libxml2.pythonCleanupParser()
        #libxml2.cleanupParser()
        ctxt.clearParserCtxt()
        #ctxt = None
        print libxml2.memoryUsed()
        del ctxt
        ctxt = None

    s=raw_input('Quit?')
    isrun=s!='q'
s=raw_input('Press any key...')


# Memory debug specific
libxml2.cleanupParser()
if libxml2.debugMemory(1) == 0:
    print "OK"
else:
    print "Memory leak %d bytes" % (libxml2.debugMemory(1))
    libxml2.dumpMemory()

-- 
python c# and opensource
blog:http://www.chyni.cn
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070703/2d91522d/attachment.html 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

2007年07月03日 星期二 14:00

大郎 iexper在gmail.com
星期二 七月 3 14:00:09 HKT 2007

libxml2(http://xmlsoft.org/python.html)

CÓïÑÔ°æLibxml2(Òµ½ç±ê×¼ÁË)µÄÒ»¸öpython·â×°£¬¾Ý˵Ëٶȷdz£¿ì. ¹¦Äܷdz£Ç¿£¬Ö§³Ö¼¸ºõËùÓеÄXML´¦ÀíÒªÇó¡£ °üÀ¨¶ÔRelax
NGµÈµÄÖ§³Ö¡£

µ«Êǽӿڲ»¹»pythonic£¬ÐèÒª¿¼ÂÇÄÚ´æ´¦Àí£¬ ÔÚWindowsÉÏ»á³öÏÖÎ޹ʹҵôµÄÇé¿ö£¬²»Îȶ¨¡£
------------------------------
from ÅË¿¡ÓµÄBlog <http://blog.czug.org/panjy>


On 7/3/07, ×··çÖðÔÂ <chinesexu在gmail.com> wrote:
>
>
> ÓÐÈËÔÚpythonÖÐʹÓùýlibxml2Âð£¬ÎÒ·¢ÏÖÿ´Î½âÎöÒ»¸öxmlÎļþ£¬¶¼»áÔö¼Ó4k×óÓÒµÄÐéÄâÄڴ棬³¤ÆÚʹÓã¬ÄÚ´æ²»¿°Öظº°¡¡£ÇëÎÊÓÐʲôºÃµÄ½â¾ö·½·¨Ã»£¿
> ×¢£ºÊÇÔÚwindowsÏÂʹÓÃ
>
>
> ÏÂÃæÊDzâÊÔ´úÂ룺
> #!/usr/bin/python -u
> import libxml2
>
> #------------------------------------------------------------------------------
>
>
>
> # Memory debug specific
>
>
>
> #------------------------------------------------------------------------------
>
> def _escape(data):
>         """Escape data for XML"""
>         data=data.replace("&","&")
>         data=data.replace("<","<")
>         data=data.replace(">",">")
>         data=data.replace ("'","'")
>         data=data.replace('"',""")
>         return data
> class callback:
>     def __init__(self):
>             self._head = ""
>             self._tail = ""
>             self._current = ""
>             self._level = 0
>             self._doc = None
>             self._root = None
>
>     def startDocument(self):
>             return
>             print "."
>
>     def endElement(self, tag):
>             return
>             self._current+="" % (tag,)
>             self._level -= 1
>             if self._level > 1:
>                 return
>             if self._level==1:
>                 xml=self._head+self._current+self._tail
>                 doc=libxml2.parseDoc(xml)
>                 try:
>                     node = doc.getRootElement ().children
>                     try:
>                         node1 = node.docCopyNode(self._doc, 1)
>                         try:
>                             pass
>                             #self._root.addChild(node1)
>                             #self._handler.stanza(self._doc, node1)
>                         except:
>                             node1.unlinkNode()
>                             node1.freeNode()
>                             del node1
>                             pass
>                     finally:
>                         node1.unlinkNode()
>                         node1.freeNode()
>                         del node1
>                         #del node
>                 finally:
>                     doc.freeDoc()
>             else:
>                 print 'level:%d'%self._level
>                 xml=self._head+self._tail
>                 doc=libxml2.parseDoc (xml)
>                 try:
>                     #self._handler.stream_end(self._doc)
>                     self._doc.freeDoc()
>                     self._doc = None
>                     self._root = None
>                 finally:
>                     doc.freeDoc()
>
>     def startElement(self, tag, attrs):
>             return
>             #print 'startElement_____________'
>             s = "<"+tag
>             if attrs:
>                 for a,v in attrs.items():
>                     s+=" %s='%s'" % (a,_escape(v))
>             s += ">"
>             if self._level == 0:
>                 self._head = s
>                 self._tail = "" % (tag,)
>                 xml=self._head+self._tail
> ##                if self._doc:
> ##                    self._doc.freeDoc()
> ##                    self._doc=None
>
>                 self._doc = libxml2.parseDoc(xml)
>                 #self._handler.stream_start(self._doc)
>                 #self._root = self._doc.getRootElement()
>             elif self._level == 1:
>                 self._current = s
>             else:
>                 self._current += s
>             self._level += 1
>             #print self._level
>
>
>
>
>     def characters(self, data):
>         pass
>
>     def warning(self, msg):
>         pass
>
>     def error(self, msg):
>         pass
>
>     def fatalError(self, msg):
>         pass
>
>
> #------------------------------------------------------------------------------
> #------------------------------------------------------------------------------
>
>
> import os
> import sys
>
> programName = os.path.basename(sys.argv[0])
>
> if len(sys.argv) != 2:
>   print "Use: %s " % programName
>   sys.exit(1)
>
> inputPath = sys.argv [1]
>
> if not os.path.exists (inputPath):
>   print "Error: directory does not exist"
>   sys.exit(1)
>
> libxml2.debugMemory(1)
>
> inputFileNames = []
> dirContent = os.listdir(inputPath)
> for fichero in dirContent:
>   extension1=fichero.rfind(".htm")
>   extension2=fichero.rfind(".html")
>   dot = fichero.rfind(".")
>   extension = max(extension1,extension2)
>   if extension != -1 and extension == dot:
>       inputFileNames.append (fichero)
>
> if len(inputFileNames) == 0:
>   print "Error: no input files"
>   sys.exit(1)
>
>
> handler = callback()
> NUM_ITERS = 20
> isrun=True
> while isrun:
>     for i in range(NUM_ITERS):
>       for inputFileName in inputFileNames:
>         ctxt = libxml2.createPushParser(handler, "", 0, inputFileName)
>         #libxml2.initParser()
>         #print inputFileName
>         inputFilePath = inputPath + inputFileName
>         f = open(inputFilePath)
>         data = f.read()
>         #print data
>         f.close()
>
>
>         ctxt.parseChunk(data, len(data), 1)
>         #libxml2.pythonCleanupParser()
>         #libxml2.cleanupParser()
>         ctxt.clearParserCtxt()
>         #ctxt = None
>         print libxml2.memoryUsed()
>         del ctxt
>         ctxt = None
>
>     s=raw_input('Quit?')
>     isrun=s!='q'
> s=raw_input('Press any key...')
>
>
> # Memory debug specific
> libxml2.cleanupParser()
> if libxml2.debugMemory(1) == 0:
>     print "OK"
> else:
>     print "Memory leak %d bytes" % ( libxml2.debugMemory(1))
>     libxml2.dumpMemory()
>
> --
> python c# and opensource
> blog:http://www.chyni.cn
> _______________________________________________
> python-chinese
> Post: send python-chinese在lists.python.cn
> Subscribe: send subscribe to python-chinese-request在lists.python.cn
> Unsubscribe: send unsubscribe to  python-chinese-request在lists.python.cn
> Detail Info: http://python.cn/mailman/listinfo/python-chinese
>



-- 
ÎÒ×ßµ½Ò»¸öÄ°ÉúµÄµØ·½, ¸æËß±ðÈË ÎÒҪȥÁ÷ÀË
Ŷ£¬ÎÒҪȥÁÆÉË¡­¡­

Gtalk: iexper(at)gmail.com
ÓòÃû¹ýÆÚÁË
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070703/e5755163/attachment-0001.html 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

如下红色区域有误,请重新填写。

    你的回复:

    请 登录 后回复。还没有在Zeuux哲思注册吗?现在 注册 !

    Zeuux © 2025

    京ICP备05028076号