罗万华

罗万华的博客

他的个人主页  他的博客

URL Parser C 实现

罗万华  2009年10月28日 星期三 23:57 | 2609次浏览 | 8条评论

没有事儿,正好手头上有个小项目要做,需要解析URL,就随便做了个,大家见笑了.很多地方不完善,基本功能实现了,什么年代了,先将就着用吧.

#include <stdlib.h>
#include <string.h>

struct GET_PARAM
{
 char name[255];
 char value[255];
 struct GET_PARAM * m_pNext ;

};
struct URL_DATA
{

 char PROTOCOL_NAME[32];
 char hostname[255];
 char path[255];
 unsigned int port ;
 struct GET_PARAM *pParamList ;
 
};
#define WORD_FIRST 0
#define WORD_COLON ':'
#define WORD_DIR '/'
#define WORD_PARAM_FIRST '?'
#define WORD_PARAM '&'
#define WORDPARAM_VALUE '='

struct Word
{
 char value[255];
 int nType ;
 struct Word * m_pNext ;
};

URL_DATA * PaserURL(char * strURL);
URL_DATA * AddGET_PARAM(URL_DATA * pData, GET_PARAM * pParam );
void ReleaseURL(URL_DATA * pData);
Word * Lexier(char *string,Word * pHead);
void ReleaseWords (Word * pHead ) ;
Word * AddWord (Word * pHead,Word * pNode );
URL_DATA * AddGET_PARAM(URL_DATA * pData, GET_PARAM * pParam )
{
 GET_PARAM * pParam1 = pData->pParamList;
 if (pParam1 == NULL)
 {
  pData->pParamList = pParam;
  return pData;
 }
 
 while (pParam1->m_pNext != NULL)
 {
  pParam1 = pParam1->m_pNext ;
 }
 pParam1->m_pNext = pParam;
 return pData;


}
Word * AddWord (Word * pHead,Word * pNode )
{
 if (!pHead )
  return pNode;
 
 Word * word1 = pHead;

 while (word1->m_pNext!= NULL)
 {
  word1 = word1->m_pNext;
 }
 word1->m_pNext = pNode;

 return pHead;
}
void ReleaseWords(Word * pHead )
{
 Word * word1 = pHead; 
 while (word1)
 {
  Word * word2 = word1;
  word1 = word1->m_pNext;
  if (word2) delete word2;
 }

}
Word * Lexier(char *string,Word * pHead)
{
 char strWord [255]= {0};
 char *pStr = strWord;
 //Word * pHead = NULL;


 if (*string == 0)
  return pHead;
 
 int nType = 0;
 if (*string == '\\'
  ||*string == '/'
  ||*string == '?'
  ||*string == ':'
  ||*string == '&'
  ||*string == '='
  )
 {
  nType  = *string;
  string ++;
 }

 while (isalpha(*string)
  || isdigit(*string)
  || *string == '.')
 {
  *pStr ++ = *string ++;
 }

 Word * pWord = new Word ;
 memset (pWord , 0 ,sizeof (Word));
 pWord->nType = nType;
 strcpy (pWord->value,strWord);

 pHead  = AddWord (pHead,pWord);
 return Lexier(string,pHead);
 
}
URL_DATA * PaserURL(char * strURL)
{
 URL_DATA * pData = new URL_DATA ;
 memset (pData , 0 ,sizeof (URL_DATA) );
 strcpy (pData->PROTOCOL_NAME,"HTTP");
 pData->port = 80;

 GET_PARAM * pParam = NULL;
 Word * pWord = NULL ;
 Word * pWords = Lexier(strURL,NULL);

 char * PROTOCOLS [] =
 {
  "FTP",
  "HTTP",
  "FILE",
 };
 int PORTS [] = {21,80,0};
 pWord = pWords;
 while (pWord)
 {
  switch (pWord->nType)
  {
  case WORD_FIRST:// 0
   {
    char buffer[255] = {0};
    strcpy (buffer,pWord->value);
    strupr(buffer);
    for (int i = 0 ; i <  sizeof (PROTOCOLS )/ sizeof ( PROTOCOLS[0]);i++)
    {
     if (strcmp (PROTOCOLS[i],buffer)== 0 )
     {
      strcpy(pData->PROTOCOL_NAME,buffer);
      pData->port = PORTS[i];
      break;
     }
    }
   }
   break;
  case WORD_COLON:// ':'
   {
    if (strlen(pWord->value) != 0)
    {
     pData->port = atoi (pWord->value);

    }
    break;
   }
   break;
  case WORD_DIR: //'/'
   if (strlen (pData->hostname) == 0)
   {
    strcpy(pData->hostname,pWord->value);
   }
   else
   {
    char buffer [1024] = {0};
    strcpy (buffer,pData->path);
    sprintf (pData->path,"%s/%s",buffer,pWord->value);
   }
   break;
  case WORD_PARAM_FIRST:// '?'
  case WORD_PARAM: //'&'
   {
    pParam = new GET_PARAM;
    memset (pParam,0,sizeof(GET_PARAM));
    strcpy (pParam->name,pWord->value);
   }
   break;
  case WORDPARAM_VALUE:// '='
   if (pParam)
   {
    strcpy (pParam->value,pWord->value);
    AddGET_PARAM(pData,pParam);
    pParam = NULL;
   }
   break;
  }
  pWord = pWord->m_pNext;
 }
 ReleaseWords(pWords);
 return pData;
}
void ReleaseURL(URL_DATA * pData)
{
 GET_PARAM * pParam  = pData->pParamList;
 while (pParam)
 {
  GET_PARAM * pParam1 = pParam ;
  pParam = pParam->m_pNext;
  if (pParam1)
   delete pParam1 ;
 }
 delete pData;

}


int _tmain(int argc, _TCHAR* argv[])
{
 char url [] = " http://www.163.com/vvv.aspx?acc=1&bdd=2&cvvv=3 ";
 URL_DATA * pData = PaserURL(url);
 printf ("\n %s ",url );
 printf ("\n\tPROTOCOL:%s",pData->PROTOCOL_NAME);
 printf ("\n\tPORT:   %d",pData->port);
 printf ("\n\tHostname:%s",pData->hostname);
 printf ("\n\tPath  :%s\n",pData->path);
 GET_PARAM * pParam = pData->pParamList;
 printf ("\n==PARAM_LIST====");
 while (pParam)
 {
  printf ("\n\tPARAM:%s VALUE %s",pParam->name,pParam->value);
  pParam = pParam->m_pNext;
 }
 printf ("\n==PARAM_LIST====\n");
 ReleaseURL(pData);
 return 0;
}

评论

我的评论:

发表评论

请 登录 后发表评论。还没有在Zeuux哲思注册吗?现在 注册 !
刘磊(V.L.)

回复 刘磊(V.L.)  2009年10月29日 星期四 08:42

同意楼下,没有对%20、‘+’这种转义编码进行解码

6条回复

王依依

回复 王依依  2009年10月29日 星期四 07:11

貌似不是 rfc 规范的。。。。。

0条回复

暂时没有评论

Zeuux © 2024

京ICP备05028076号