Python论坛的帖子：

星期二七月 3 12:54:02 HKT 2007

ÓÐÈËÔÚpythonÖÐÊ¹ÓÃ¹ýlibxml2Âð£¬ÎÒ·¢ÏÖÃ¿´Î½âÎöÒ»¸öxmlÎÄ¼þ£¬¶¼»áÔö¼Ó4k×óÓÒµÄÐéÄâÄÚ´æ£¬³¤ÆÚÊ¹ÓÃ£¬ÄÚ´æ²»¿°ÖØ¸º°¡¡£ÇëÎÊÓÐÊ²Ã´ºÃµÄ½â¾ö·½·¨Ã»£¿
×¢£ºÊÇÔÚwindowsÏÂÊ¹ÓÃ


ÏÂÃæÊÇ²âÊÔ´úÂë£º
#!/usr/bin/python -u
import libxml2

#------------------------------------------------------------------------------


# Memory debug specific


#------------------------------------------------------------------------------

def _escape(data):
        """Escape data for XML"""
        data=data.replace("&","&")
        data=data.replace("<","<")
        data=data.replace(">",">")
        data=data.replace("'","'")
        data=data.replace('"',""")
        return data
class callback:
    def __init__(self):
            self._head = ""
            self._tail = ""
            self._current = ""
            self._level = 0
            self._doc = None
            self._root = None

    def startDocument(self):
            return
            print "."

    def endElement(self, tag):
            return
            self._current+="" % (tag,)
            self._level -= 1
            if self._level > 1:
                return
            if self._level==1:
                xml=self._head+self._current+self._tail
                doc=libxml2.parseDoc(xml)
                try:
                    node = doc.getRootElement().children
                    try:
                        node1 = node.docCopyNode(self._doc, 1)
                        try:
                            pass
                            #self._root.addChild(node1)
                            #self._handler.stanza(self._doc, node1)
                        except:
                            node1.unlinkNode()
                            node1.freeNode()
                            del node1
                            pass
                    finally:
                        node1.unlinkNode()
                        node1.freeNode()
                        del node1
                        #del node
                finally:
                    doc.freeDoc()
            else:
                print 'level:%d'%self._level
                xml=self._head+self._tail
                doc=libxml2.parseDoc(xml)
                try:
                    #self._handler.stream_end(self._doc)
                    self._doc.freeDoc()
                    self._doc = None
                    self._root = None
                finally:
                    doc.freeDoc()

    def startElement(self, tag, attrs):
            return
            #print 'startElement_____________'
            s = "<"+tag
            if attrs:
                for a,v in attrs.items():
                    s+=" %s='%s'" % (a,_escape(v))
            s += ">"
            if self._level == 0:
                self._head = s
                self._tail = "" % (tag,)
                xml=self._head+self._tail
##                if self._doc:
##                    self._doc.freeDoc()
##                    self._doc=None

                self._doc = libxml2.parseDoc(xml)
                #self._handler.stream_start(self._doc)
                #self._root = self._doc.getRootElement()
            elif self._level == 1:
                self._current = s
            else:
                self._current += s
            self._level += 1
            #print self._level


    def characters(self, data):
        pass

    def warning(self, msg):
        pass

    def error(self, msg):
        pass

    def fatalError(self, msg):
        pass

#------------------------------------------------------------------------------
#------------------------------------------------------------------------------

import os
import sys

programName = os.path.basename(sys.argv[0])

if len(sys.argv) != 2:
  print "Use: %s " % programName
  sys.exit(1)

inputPath = sys.argv[1]

if not os.path.exists (inputPath):
  print "Error: directory does not exist"
  sys.exit(1)

libxml2.debugMemory(1)

inputFileNames = []
dirContent = os.listdir(inputPath)
for fichero in dirContent:
  extension1=fichero.rfind(".htm")
  extension2=fichero.rfind(".html")
  dot = fichero.rfind(".")
  extension = max(extension1,extension2)
  if extension != -1 and extension == dot:
      inputFileNames.append (fichero)

if len(inputFileNames) == 0:
  print "Error: no input files"
  sys.exit(1)


handler = callback()
NUM_ITERS = 20
isrun=True
while isrun:
    for i in range(NUM_ITERS):
      for inputFileName in inputFileNames:
        ctxt = libxml2.createPushParser(handler, "", 0, inputFileName)
        #libxml2.initParser()
        #print inputFileName
        inputFilePath = inputPath + inputFileName
        f = open(inputFilePath)
        data = f.read()
        #print data
        f.close()


        ctxt.parseChunk(data, len(data), 1)
        #libxml2.pythonCleanupParser()
        #libxml2.cleanupParser()
        ctxt.clearParserCtxt()
        #ctxt = None
        print libxml2.memoryUsed()
        del ctxt
        ctxt = None

    s=raw_input('Quit?')
    isrun=s!='q'
s=raw_input('Press any key...')


# Memory debug specific
libxml2.cleanupParser()
if libxml2.debugMemory(1) == 0:
    print "OK"
else:
    print "Memory leak %d bytes" % (libxml2.debugMemory(1))
    libxml2.dumpMemory()

-- 
python c# and opensource
blog:http://www.chyni.cn
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒÆ³ý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070703/2d91522d/attachment.html

标题：[python-chinese] libxml2+python win32下内存泄露问题