Python论坛  - 讨论区

标题:[python-chinese] 想请教一下python中文件读写的问题。

2007年09月16日 星期日 11:34

fei.He hefei267在gmail.com
星期日 九月 16 11:34:05 HKT 2007

ÎҲſªÊ¼Ñ§Ï°python£¬Åöµ½¸öÎÊÌâÇë½Ìһϴó¼Ò£ºpythonÊDz»ÊÇÄܹ»ÒÔ¶þ½øÖƵķ½Ê½¶ÁÈ¡Ëæ»ú´æÈ¡ÎļþÖеÄÄÚÈÝ£¿¿ÉÄÜÓеã˵µÄ²»Çå³þ°¡£¬ºÇºÇ¡£Êµ¼ÊÉÏÎÒ¾ÍÊÇÏë˵£¬pythonÖÐÓÐûÓÐʲô·½·¨¿ÉÒÔÍê³ÉcÓïÑÔÖеÄfwrite£¬freadºÍfseekÕâÈý¸öº¯ÊýµÄ¹¦ÄÜ£¿

ллÁË£¡
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070916/b4c60a56/attachment-0001.htm 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

2007年09月16日 星期日 11:51

junyi sun ccnusjy在gmail.com
星期日 九月 16 11:51:24 HKT 2007

¿ÉÒÔ°¡£¬file¶ÔÏóÓÐwrite,read,seekµÈ·½·¨¡£file¶ÔÏó¿ÉÒÔͨ¹ýopen(..)º¯ÊýµÃµ½¡£

ÌùÒ»µã¶ùÎÒ¶ÁдÎļþµÄ´úÂëƬ¶Î°É¡£

# -*- coding: cp936 -*-
import struct
import bsddb,os
class CRecord:
    FMT="256s256s64sLfL" #´æ´¢¸ñʽ{url,title,size,updatetime,pageptr}
    RECORD_FILE=os.path.split(__file__)[0]+"/data/record.dat"
    PAGE_FILE=os.path.split(__file__)[0]+"/data/page.dat"
    def __init__(self):

        if not os.path.exists(CRecord.RECORD_FILE):
            self.f1=open(CRecord.RECORD_FILE,"wb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë
        else:
            self.f1=open(CRecord.RECORD_FILE,"rb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë

        if not os.path.exists(CRecord.PAGE_FILE):
            self.f2=open(CRecord.PAGE_FILE,'wb+') #´æÍøÒ³ÄÚÈÝ
        else:
            self.f2=open(CRecord.PAGE_FILE,'rb+') #´æÍøÒ³ÄÚÈÝ

        self.urlix=bsddb.btopen(os.path.split(__file__)[0]+"/data/url.idx","c");
#{URL=>position of record}
        self.posix=bsddb.btopen(os.path.split(__file__)[0]+"/data/pos.idx","c");
#{position of record=>URL}

    def __delete__(self):
        self.f1.close()
        self.f2.close()
        self.urlix.close()
        self.posix.close()

    def add(self,_url,_title,_host,_uptime,_page):
        if not self.urlix.has_key(_url):
            self.f1.seek(0,2)
            self.f2.seek(0,2)
            size=len(_page) #ÍøÒ³ÄÚÈݵĴóС
            self.f2.write(_page)
            pageptr=self.f2.tell()-size #µÃµ½ÍøÒ³ÄÚÈÝÖ¸Õë
            dat=struct.pack(CRecord.FMT,_url,_title,_host,size,_uptime,pageptr)
#ÐγÉÒ»¸örecord
            self.f1.write(dat) #°ÑrecordдÈëÎļþ
            pos=self.f1.tell()-struct.calcsize(CRecord.FMT) #µÃµ½recordµÄÖ¸Õë
            self.urlix[_url]=str(pos)
            self.posix[str(pos)]=_url
            self.urlix.sync()
            self.posix.sync()

        else:#¸üÐÂ
            pos=int(self.urlix[_url])
            self.f1.seek(pos)
            dat=self.f1.read(struct.calcsize(CRecord.FMT))
            tmp=struct.unpack(CRecord.FMT,dat)
            pageptr=tmp[5]

            self.f2.seek(pageptr)
            self.f2.write(_page)

            self.f1.seek(pos)
            size=len(_page)
            dat=struct.pack(CRecord.FMT
,_url,_title,_host,size,_uptime,pageptr)
            self.f1.write(dat)

    def get(self,pos):#»ñȡijһλÖÃÉϵļǼ
        self.f1.seek(pos)
        dat=self.f1.read(struct.calcsize(CRecord.FMT))
        tmp=struct.unpack(CRecord.FMT,dat)
        pageptr=tmp[5]
        self.f2.seek(pageptr)
        size=tmp[3]
        page=self.f2.read(size)
        return
(tmp[0].rstrip('\0'),tmp[1].rstrip('\0'),tmp[2].rstrip('\0'),tmp[3],tmp[4],page)

    def amount(self):#¼Ç¼µÄ×ÜÌõÊý
        return len(self.urlix)

if __name__=="__main__":
    import time
    rs=CRecord()
    #rs.add("http://www.bbb.com/s2.html","hello","www.bbb.com",time.time
(),"ÈËÃñÍæË®".decode('gbk').encode('utf-8'))
    print rs.get(struct.calcsize(CRecord.FMT))
    print rs.amount()




On 9/16/07, fei. He <hefei267在gmail.com> wrote:
>
>
> ÎҲſªÊ¼Ñ§Ï°python£¬Åöµ½¸öÎÊÌâÇë½Ìһϴó¼Ò£ºpythonÊDz»ÊÇÄܹ»ÒÔ¶þ½øÖƵķ½Ê½¶ÁÈ¡Ëæ»ú´æÈ¡ÎļþÖеÄÄÚÈÝ£¿¿ÉÄÜÓеã˵µÄ²»Çå³þ°¡£¬ºÇºÇ¡£Êµ¼ÊÉÏÎÒ¾ÍÊÇÏë˵£¬pythonÖÐÓÐûÓÐʲô·½·¨¿ÉÒÔÍê³ÉcÓïÑÔÖеÄfwrite£¬freadºÍfseekÕâÈý¸öº¯ÊýµÄ¹¦ÄÜ£¿
>
> ллÁË£¡
>
> _______________________________________________
> python-chinese
> Post: send python-chinese在lists.python.cn
> Subscribe: send subscribe to python-chinese-request在lists.python.cn
> Unsubscribe: send unsubscribe to  python-chinese-request在lists.python.cn
> Detail Info: http://python.cn/mailman/listinfo/python-chinese
>
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070916/2658b2f5/attachment.html 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

2007年09月16日 星期日 12:59

fei.He hefei267在gmail.com
星期日 九月 16 12:59:14 HKT 2007

µ«ÊǺÃÏñpythonµÄÎĵµÀï˵file
objectµÄwrite·½·¨Ð´ÈëµÄÊÇÒ»¸ö×Ö·û´®°¡£¬ÄÇÈç¹ûÎÒÒª¾«È·µÄ¿ØÖÆдÈëÒ»¸öintÐ͵ÄÊý¾Ý£¨Õ¼4¸ö×Ö½Ú£©£¬»òÕßдÈëÒ»¸öcharÐ͵ÄÊý¾Ý£¨Õ¼1¸ö×Ö½Ú£©¸ÃÔõô×ö°¡£¿ÓÖ»òÕßÒ»¸öÊý¾ÝµÄÖµÊÇ123£¨intÐ͵ÄÕ¼4¸ö×Ö½Ú£©£¬µ«ÊÇÎÒ·ÇÒª°ÑËüд³ÉÒ»¸ölongÀàÐÍ£¨Õ¼8¸ö×Ö½Ú£©ÄÇÓÖ¸ÃÔõô×öÄØ£¿

ллÁË¡£

ÔÚ07-9-16£¬junyi sun <ccnusjy在gmail.com> дµÀ£º
>
> ¿ÉÒÔ°¡£¬file¶ÔÏóÓÐwrite,read,seekµÈ·½·¨¡£file¶ÔÏó¿ÉÒÔͨ¹ýopen(..)º¯ÊýµÃµ½¡£
>
> ÌùÒ»µã¶ùÎÒ¶ÁдÎļþµÄ´úÂëƬ¶Î°É¡£
>
> # -*- coding: cp936 -*-
> import struct
> import bsddb,os
> class CRecord:
>     FMT="256s256s64sLfL" #´æ´¢¸ñʽ{url,title,size,updatetime,pageptr}
>     RECORD_FILE=os.path.split(__file__)[0]+"/data/record.dat"
>     PAGE_FILE=os.path.split(__file__)[0]+"/data/page.dat"
>     def __init__(self):
>
>         if not os.path.exists(CRecord.RECORD_FILE ):
>             self.f1=open(CRecord.RECORD_FILE,"wb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë
>         else:
>             self.f1=open(CRecord.RECORD_FILE,"rb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë
>
>         if not os.path.exists (CRecord.PAGE_FILE):
>             self.f2=open(CRecord.PAGE_FILE,'wb+') #´æÍøÒ³ÄÚÈÝ
>         else:
>             self.f2=open(CRecord.PAGE_FILE,'rb+') #´æÍøÒ³ÄÚÈÝ
>
>         self.urlix=bsddb.btopen( os.path.split(__file__)[0]+"/data/url.idx","c");
> #{URL=>position of record}
>         self.posix=bsddb.btopen(os.path.split(__file__)[0]+"/data/pos.idx","c");
> #{position of record=>URL}
>
>     def __delete__(self):
>         self.f1.close()
>         self.f2.close()
>         self.urlix.close()
>         self.posix.close()
>
>     def add(self,_url,_title,_host,_uptime,_page):
>         if not self.urlix.has_key(_url):
>             self.f1.seek(0,2)
>             self.f2.seek(0,2)
>             size=len(_page) #ÍøÒ³ÄÚÈݵĴóС
>             self.f2.write(_page)
>             pageptr=self.f2.tell()-size #µÃµ½ÍøÒ³ÄÚÈÝÖ¸Õë
>             dat=struct.pack(CRecord.FMT,_url,_title,_host,size,_uptime,pageptr)
> #ÐγÉÒ»¸örecord
>             self.f1.write(dat) #°ÑrecordдÈëÎļþ
>             pos=self.f1.tell()-struct.calcsize(CRecord.FMT) #µÃµ½recordµÄÖ¸Õë
>             self.urlix[_url]=str(pos)
>             self.posix[str(pos)]=_url
>             self.urlix.sync()
>             self.posix.sync()
>
>         else:#¸üÐÂ
>             pos=int(self.urlix[_url])
>             self.f1.seek(pos)
>             dat=self.f1.read(struct.calcsize(CRecord.FMT))
>             tmp=struct.unpack(CRecord.FMT,dat)
>             pageptr=tmp[5]
>
>             self.f2.seek(pageptr)
>             self.f2.write(_page)
>
>             self.f1.seek(pos)
>             size=len(_page)
>             dat=struct.pack(CRecord.FMT
> ,_url,_title,_host,size,_uptime,pageptr)
>             self.f1.write (dat)
>
>     def get(self,pos):#»ñȡijһλÖÃÉϵļǼ
>         self.f1.seek(pos)
>         dat=self.f1.read(struct.calcsize(CRecord.FMT))
>         tmp=struct.unpack(CRecord.FMT,dat)
>         pageptr=tmp[5]
>         self.f2.seek(pageptr)
>         size=tmp[3]
>         page=self.f2.read(size)
>         return
> (tmp[0].rstrip('\0'),tmp[1].rstrip('\0'),tmp[2].rstrip('\0'),tmp[3],tmp[4],page)
>
>     def amount(self):#¼Ç¼µÄ×ÜÌõÊý
>         return len(self.urlix)
>
> if __name__=="__main__":
>     import time
>     rs=CRecord()
>     #rs.add("http://www.bbb.com/s2.html","hello"," www.bbb.com",time.time
> (),"ÈËÃñÍæË®".decode('gbk').encode('utf-8'))
>     print rs.get(struct.calcsize(CRecord.FMT))
>     print rs.amount()
>
>
>
>
> On 9/16/07, fei. He <hefei267在gmail.com> wrote:
> >
> >
> > ÎҲſªÊ¼Ñ§Ï°python£¬Åöµ½¸öÎÊÌâÇë½Ìһϴó¼Ò£ºpythonÊDz»ÊÇÄܹ»ÒÔ¶þ½øÖƵķ½Ê½¶ÁÈ¡Ëæ»ú´æÈ¡ÎļþÖеÄÄÚÈÝ£¿¿ÉÄÜÓеã˵µÄ²»Çå³þ°¡£¬ºÇºÇ¡£Êµ¼ÊÉÏÎÒ¾ÍÊÇÏë˵£¬pythonÖÐÓÐûÓÐʲô·½·¨¿ÉÒÔÍê³ÉcÓïÑÔÖеÄfwrite£¬freadºÍfseekÕâÈý¸öº¯ÊýµÄ¹¦ÄÜ£¿
> >
> > ллÁË£¡
> >
> > _______________________________________________
> > python-chinese
> > Post: send python-chinese在lists.python.cn
> > Subscribe: send subscribe to python-chinese-request在lists.python.cn
> > Unsubscribe: send unsubscribe to
> > python-chinese-request在lists.python.cn
> > Detail Info: http://python.cn/mailman/listinfo/python-chinese
> >
>
>
> _______________________________________________
> python-chinese
> Post: send python-chinese在lists.python.cn
> Subscribe: send subscribe to python-chinese-request在lists.python.cn
> Unsubscribe: send unsubscribe to  python-chinese-request在lists.python.cn
> Detail Info: http://python.cn/mailman/listinfo/python-chinese
>
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070916/6c4ee210/attachment.html 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

2007年09月16日 星期日 13:05

lvhongqing2008 lvhongqing2008在sohu.com
星期日 九月 16 13:05:04 HKT 2007

¿´Ò»ÏÂstructÄ£¿é¾ÍÖªµÀÁË.





lvhongqing2008
2007-09-16



·¢¼þÈË£º fei.He
·¢ËÍʱ¼ä£º 2007-09-16 12:59:35
ÊÕ¼þÈË£º python-chinese在lists.python.cn
³­ËÍ£º 
Ö÷Ì⣺ Re: [python-chinese]ÏëÇë½ÌÒ»ÏÂpythonÖÐÎļþ¶ÁдµÄÎÊÌâ¡£

µ«ÊǺÃÏñpythonµÄÎĵµÀï˵file objectµÄwrite·½·¨Ð´ÈëµÄÊÇÒ»¸ö×Ö·û´®°¡£¬ÄÇÈç¹ûÎÒÒª¾«È·µÄ¿ØÖÆдÈëÒ»¸öintÐ͵ÄÊý¾Ý£¨Õ¼4¸ö×Ö½Ú£©£¬»òÕßдÈëÒ»¸öcharÐ͵ÄÊý¾Ý£¨Õ¼1¸ö×Ö½Ú£©¸ÃÔõô×ö°¡£¿ÓÖ»òÕßÒ»¸öÊý¾ÝµÄÖµÊÇ123£¨intÐ͵ÄÕ¼4¸ö×Ö½Ú£©£¬µ«ÊÇÎÒ·ÇÒª°ÑËüд³ÉÒ»¸ölongÀàÐÍ£¨Õ¼8¸ö×Ö½Ú£©ÄÇÓÖ¸ÃÔõô×öÄØ£¿

ллÁË¡£


ÔÚ07-9-16£¬ junyi sun <ccnusjy在gmail.com> дµÀ£º
¿ÉÒÔ°¡£¬file¶ÔÏóÓÐwrite,read,seekµÈ·½·¨¡£file¶ÔÏó¿ÉÒÔͨ¹ýopen(..)º¯ÊýµÃµ½¡£ 

ÌùÒ»µã¶ùÎÒ¶ÁдÎļþµÄ´úÂëƬ¶Î°É¡£

# -*- coding: cp936 -*-
import struct
import bsddb,os
class CRecord:
    FMT="256s256s64sLfL" #´æ´¢¸ñʽ{url,title,size,updatetime,pageptr} 
    RECORD_FILE=os.path.split(__file__)[0]+"/data/record.dat"
    PAGE_FILE=os.path.split(__file__)[0]+"/data/page.dat"
    def __init__(self):

        if not os.path.exists(CRecord.RECORD_FILE ):
            self.f1=open(CRecord.RECORD_FILE,"wb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë
        else:
            self.f1=open(CRecord.RECORD_FILE,"rb+") #´æÍøÖ·¡¢±êÌâ¡¢¸üÐÎʱ¼ä¡¢ÄÚÈÝÖ¸Õë
            
        if not os.path.exists (CRecord.PAGE_FILE):
            self.f2=open(CRecord.PAGE_FILE,'wb+') #´æÍøÒ³ÄÚÈÝ
        else:
            self.f2=open(CRecord.PAGE_FILE,'rb+') #´æÍøÒ³ÄÚÈÝ
        
        self.urlix=bsddb.btopen ( os.path.split(__file__)[0]+"/data/url.idx","c"); #{URL=>position of record}
        self.posix=bsddb.btopen(os.path.split(__file__)[0]+"/data/pos.idx","c"); #{position of record=>URL} 
        
    def __delete__(self):
        self.f1.close()
        self.f2.close()
        self.urlix.close()
        self.posix.close()
    
    def add(self,_url,_title,_host,_uptime,_page):
        if not self.urlix.has_key(_url):
            self.f1.seek(0,2)
            self.f2.seek(0,2)
            size=len(_page) #ÍøÒ³ÄÚÈݵĴóС
            self.f2.write(_page) 
            pageptr=self.f2.tell()-size #µÃµ½ÍøÒ³ÄÚÈÝÖ¸Õë 
            dat=struct.pack(CRecord.FMT,_url,_title,_host,size,_uptime,pageptr) #ÐγÉÒ»¸örecord
            self.f1.write(dat) #°ÑrecordдÈëÎļþ
            pos=self.f1.tell()-struct.calcsize(CRecord.FMT) #µÃµ½recordµÄÖ¸Õë
            self.urlix[_url]=str(pos)
            self.posix[str(pos)]=_url
            self.urlix.sync()
            self.posix.sync()
            
        else:#¸üÐÂ
            pos=int(self.urlix[_url]) 
            self.f1.seek(pos)
            dat=self.f1.read(struct.calcsize(CRecord.FMT))
            tmp=struct.unpack(CRecord.FMT,dat)
            pageptr=tmp[5]
            
            self.f2.seek(pageptr) 
            self.f2.write(_page)
            
            self.f1.seek(pos)
            size=len(_page)
            dat=struct.pack(CRecord.FMT,_url,_title,_host,size,_uptime,pageptr)
            self.f1.write (dat)
            
    def get(self,pos):#»ñȡijһλÖÃÉϵļǼ
        self.f1.seek(pos)
        dat=self.f1.read(struct.calcsize(CRecord.FMT))
        tmp=struct.unpack(CRecord.FMT,dat)
        pageptr=tmp[5]
        self.f2.seek(pageptr)
        size=tmp[3]
        page=self.f2.read(size)
        return (tmp[0].rstrip('\0'),tmp[1].rstrip('\0'),tmp[2].rstrip('\0'),tmp[3],tmp[4],page)

    def amount(self):#¼Ç¼µÄ×ÜÌõÊý 
        return len(self.urlix)

if __name__=="__main__":
    import time
    rs=CRecord()
    #rs.add(" http://www.bbb.com/s2.html","hello"," www.bbb.com",time.time(),"ÈËÃñÍæË®".decode('gbk').encode('utf-8'))
    print rs.get(struct.calcsize (CRecord.FMT))
    print rs.amount()
    




On 9/16/07, fei. He <hefei267在gmail.com > wrote:
ÎҲſªÊ¼Ñ§Ï°python£¬Åöµ½¸öÎÊÌâÇë½Ìһϴó¼Ò£ºpythonÊDz»ÊÇÄܹ»ÒÔ¶þ½øÖƵķ½Ê½¶ÁÈ¡Ëæ»ú´æÈ¡ÎļþÖеÄÄÚÈÝ£¿¿ÉÄÜÓеã˵µÄ²»Çå³þ°¡£¬ºÇºÇ¡£Êµ¼ÊÉÏÎÒ¾ÍÊÇÏë˵£¬pythonÖÐÓÐûÓÐʲô·½·¨¿ÉÒÔÍê³ÉcÓïÑÔÖеÄfwrite£¬freadºÍfseekÕâÈý¸öº¯ÊýµÄ¹¦ÄÜ£¿

ллÁË£¡

_______________________________________________
python-chinese
Post: send python-chinese在lists.python.cn 
Subscribe: send subscribe to python-chinese-request在lists.python.cn
Unsubscribe: send unsubscribe to   python-chinese-request在lists.python.cn
Detail Info: http://python.cn/mailman/listinfo/python-chinese



_______________________________________________
python-chinese
Post: send python-chinese在lists.python.cn 
Subscribe: send subscribe to python-chinese-request在lists.python.cn
Unsubscribe: send unsubscribe to   python-chinese-request在lists.python.cn
Detail Info: http://python.cn/mailman/listinfo/python-chinese
-------------- 下一部分 --------------
Ò»¸öHTML¸½¼þ±»ÒƳý...
URL: http://python.cn/pipermail/python-chinese/attachments/20070916/f4d9cfb2/attachment.html 

[导入自Mailman归档:http://www.zeuux.org/pipermail/zeuux-python]

如下红色区域有误,请重新填写。

    你的回复:

    请 登录 后回复。还没有在Zeuux哲思注册吗?现在 注册 !

    Zeuux © 2025

    京ICP备05028076号