C/C++ url 解析/拆解 演算法初始化版本

C/C++ url 解析/拆解 演算法初始化版本

C/C++ url 解析/拆解 演算法初始化版本


資料來源:https://mp.weixin.qq.com/s/dfcFyPTdZOLuVnRopcoxSg

https://gitee.com/yikoulinux/url


code:

#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h>			 /* See NOTES */
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>


#define _URL_DEBUG 0

#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))


#define HEAD_FTP_P "ftp://"
#define HEAD_FTPS_P "ftps://"   
#define HEAD_FTPES_P "ftpes://"
#define HEAD_HTTP_P "http://"
#define HEAD_HTTPS_P "https://"


#define PORT_FTP 21
#define PORT_FTPS_I 990 //implicit
#define PORT_FTPS_E 21 //explicit
#define PORT_HTTP 80
#define PORT_HTTPS 443

#define URL_ERROR -1
#define URL_OK 0


struct  pro_port {
	char  pro_s [ 32 ];
	unsigned  short  port ;
};

struct  pro_port  g_pro_port [] = {
	{ HEAD_FTP_P , PORT_FTP },
	{ HEAD_FTPS_P , PORT_FTPS_I },	
	{ HEAD_FTPES_P , PORT_FTPS_E },	
	{ HEAD_HTTP_P , PORT_HTTP },	
	{ HEAD_HTTPS_P , PORT_HTTPS },
};

#define MAX_COMM_NAME_LEN 128

#define MAX_URL_LEN 1024
#define INET_ADDRSTRLEN 16
#define INET_DOMAINSTRLEN 128
#define MAX_PORT_LEN 6
#define MAX_PATH_FILE_LEN 256
#define MAX_IP_STR_LEN 32

#define MAX_USER_LEN 32
#define MAX_PASS_LEN 32


typedef  struct
{
	char  user [ MAX_USER_LEN ];
	char  pass [ MAX_PASS_LEN ];
	char  domain [ INET_DOMAINSTRLEN ]; //域名
	char  svr_dir [ MAX_PATH_FILE_LEN ];  //文件路徑
	char  svr_ip [ MAX_IP_STR_LEN ];
	int  port ;
} URL_RESULT_T ;


URL_RESULT_T  url_result_t ;



int   parse_domain_dir ( char  * url , URL_RESULT_T  * result )
{
	char  * p ;
	char  * token  =  NULL ;
	char  buf [ MAX_URL_LEN ]  =  { 0 };
	char  dir_str [ MAX_PATH_FILE_LEN * 2 ]  =  { 0 };
	char  svr_dir [ MAX_COMM_NAME_LEN ]  =  { 0 }; 	
	char  ip_port [ MAX_COMM_NAME_LEN * 2 ]  =  { 0 }; //域名:port
	char  domain [ INET_DOMAINSTRLEN ]  =  { 0 };
	char  port_s [ MAX_PORT_LEN ]  =  { 0 };

	char  user_pass [ MAX_USER_LEN  +  MAX_PASS_LEN ]  =  { 0 };
	char  user [ MAX_USER_LEN ]  =  { 0 };
	char  pass [ MAX_PASS_LEN ]  =  { 0 };

	int  test ;

	p  =  buf ;

	//strtok後buf中內容會被修改,所以先把url內容拷貝出來
	strncpy ( buf , url , sizeof ( buf ));
	
	//"baidu.com:8080/dir/index.html"

	token  =  strtok ( buf ,  "@" );
	if ( token  !=  NULL )  {
#if _URL_DEBUG
		printf ( "token=%s \n " , token );
#endif
		if ( strlen ( token )  !=  strlen ( url ))  //找不到@後,返回值指向頭
		{
			if ( strlen ( user_pass )  ==  0 )  {
				strncpy ( user_pass ,  token , sizeof ( user_pass ));
				p  +=  strlen ( user_pass ) + 1 ;  //指向@後字符串
			} 
		}
	}
#if _URL_DEBUG
	printf ( "user_pass=%s \n " , user_pass );
#endif	

	//提取用戶名密碼
	if ( strlen ( user_pass )  >  0 )
	{
		token  =  strtok ( user_pass ,  ":" );
		 while ( token  !=  NULL )  {
			if ( strlen ( user )  ==  0 )  {
				strncpy ( user ,  token , sizeof ( user ));
				if (( strlen ( user ) == 0 ) || ( strlen ( user ) > MAX_USER_LEN ))
				{
					printf ( "%s() %d \n " , __func__ , __LINE__ );
					return  URL_ERROR ;
				}
			}  else {
				strcat ( pass , token );
			}
			token  =  strtok ( NULL ,  "/" );
		}
	}
	
//192.168.1.100:990/LE2.0 192.168.20.10/dir1/dir2/index.html baidu.com/down baidu.com
//body: baidu.com:8080/dir/index.html

	//獲取域名+端口號
	 token  =  strtok ( p ,  "/" );
	 while ( token  !=  NULL )  {
		if ( strlen ( ip_port )  ==  0 )  {
			strncpy ( ip_port ,  token , sizeof ( ip_port ));
			if (( strlen ( ip_port ) == 0 ) || ( strlen ( ip_port ) > INET_DOMAINSTRLEN  +  MAX_PORT_LEN  +  1 ))
			{
				printf ( "%s() %d \n " , __func__ , __LINE__ );
				return  URL_ERROR ;
			}
		}  else {
			strcat ( dir_str , "/" );
			strcat ( dir_str , token );
		}

		token  =  strtok ( NULL ,  "/" );
	}
#if _URL_DEBUG
	printf ( "ip_port=%s \n " , ip_port );
	printf ( "svr_dir:%s \n\n " , dir_str );
#endif	 

	
	token  =  strtok ( ip_port ,  ":" );
	 while ( token  !=  NULL )  {
		if ( strlen ( domain )  ==  0 )  {
			strncpy ( domain ,  token , sizeof ( domain ));
			if (( strlen ( domain ) == 0 ) || ( strlen ( domain ) > INET_DOMAINSTRLEN ))
			{
				printf ( "%s() %d invalid daomain length \n " , __func__ , __LINE__ );
				return  URL_ERROR ;
			}
		} else  if ( strlen ( port_s )  ==  0 ){
			strncpy ( port_s ,  token , sizeof ( port_s ));
			if (( strlen ( port_s ) == 0 ) || ( strlen ( port_s ) > MAX_PORT_LEN ))
			{
				printf ( "%s() %d invalid port length \n " , __func__ , __LINE__ );
				return  URL_ERROR ;
			}
		}
		token  =  strtok ( NULL ,  ":" );
	} 
	if ( strlen ( domain ) == 0 )
	{
		printf ( "%s() %d there is no domain \n " , __func__ , __LINE__ );
		return  URL_ERROR ;
	}
	if ( strlen ( port_s )  >  0 )
	{
		result -> port  =  atoi ( port_s );		
	}

	if ( strlen ( dir_str ) == 0 ){
		//baidu.com 沒有路徑用默認/
		strcat ( dir_str , "/" );
	}

	strncpy ( result -> user , user , sizeof ( result -> user ));
	strncpy ( result -> pass , pass , sizeof ( result -> pass ));
	strncpy ( result -> domain , domain , sizeof ( result -> domain ));
	strncpy ( result -> svr_dir , dir_str , sizeof ( svr_dir ));

#if _URL_DEBUG
	
	printf ( "user:%s \n " , result -> user );
	printf ( "pass:%s \n " , result -> pass );
	printf ( "port:%d \n " , result -> port );
	printf ( "domain:%s \n " , result -> domain );
	printf ( "svr_dir:%s \n\n\n " , result -> svr_dir );
#endif
	return  URL_OK ;
}



void  remove_quotation_mark ( char  * input )
{
    int  i = 0 ;
    char  tmp_buf [ MAX_URL_LEN ];
    char  * tmp_ptr  =  input ;

    while ( * tmp_ptr  !=  '\0' )  {
	if ( * tmp_ptr  !=  '"' )  {
	    tmp_buf [ i ]  =  * tmp_ptr ;
	    i ++ ;
	}
	tmp_ptr ++ ;
    }
    tmp_buf [ i ]  =  '\0' ;
    strcpy ( input ,  tmp_buf );
}


/*
return code
-1 : invalid param 0:domain 1 :ipv4
*/
 int  check_is_ipv4 ( char  * domain )
 {
	struct  in_addr  s ;  
	char  IPdotdec [ 20 ]  =  { 0 }; 

	if (( strlen ( domain )  ==  0 )  ||  ( strlen ( domain ) > INET_DOMAINSTRLEN ))
	{
		printf ( "invalid domain length! \n " );	
		return  - 1 ;
	}
	if ( inet_pton ( AF_INET ,  domain ,  ( void  * ) & s ) == 1 )
	{
		inet_ntop ( AF_INET ,  ( void  * ) & s ,  IPdotdec ,  16 );
		//printf("fota_check_is_ipv4 %s\n", IPdotdec);
		return  1 ;
	} else {
		//printf("may be domain\n");	
		return  - 1 ;
	}
 }


int  parse_url ( char  * raw_url , URL_RESULT_T  * result )
{
	int  i  ;
	int  ret ;
	char  * p  =  NULL ;
	char  out_buf [ MAX_URL_LEN ]; //url
	char  body [ MAX_URL_LEN ];



	strcpy ( out_buf ,  "" );
	strcpy ( body ,  "" );		


	strncpy ( out_buf ,  raw_url , sizeof ( out_buf ));
	p  =  out_buf ;

	if ( strstr ( out_buf ,  " \" " )  !=  NULL ) 
	{	               
		remove_quotation_mark ( out_buf );	            
	}

	for ( i = 0 ; i < ARRAY_SIZE ( g_pro_port ); i ++ )
	{	
		if ( strncasecmp ( g_pro_port [ i ]. pro_s , p , strlen ( g_pro_port [ i ]. pro_s )) == 0 )
		{	
			p  +=  strlen ( g_pro_port [ i ]. pro_s );
			strncpy ( body ,  p , sizeof ( body ));
			result -> port  =  g_pro_port [ i ]. port ;
			break ;
		}
	}
	if ( i  ==  ARRAY_SIZE ( g_pro_port ))
	{
		printf ( "%s: Error: invalid protocol %s \n " ,  __FUNCTION__ , out_buf );
		return  URL_ERROR ;	
	}

#if _URL_DEBUG
	printf ( "%s(): \n body: %s \n " ,  __FUNCTION__ , body );
#endif

	if ( strstr ( body ,  " \" " )  !=  NULL ) 
	{	               
		remove_quotation_mark ( body );	            
	}

//解析域名、端口號、文件目錄
	ret  =  parse_domain_dir ( body , result );
	if ( ret  ==  URL_ERROR )
	{
		ret  =  printf ( "parse_domain_dir() err \n " );
		return  URL_ERROR ;
	}	
	return  ret ;
}



int  dns_resoulve ( char  * svr_ip , const  char  * domain )
{
    char    ** pptr ;
    struct  hostent  * hptr ;
    char    str [ MAX_IP_STR_LEN ];

    if (( hptr  =  gethostbyname ( domain ))  ==  NULL )
    {
        printf ( " gethostbyname error for host:%s \n " ,  domain );
        return  URL_ERROR ;
    }
#if _URL_DEBUG

    printf ( "official hostname:%s \n " , hptr -> h_name );
    for ( pptr  =  hptr -> h_aliases ;  * pptr  !=  NULL ;  pptr ++ )
        printf ( " alias:%s \n " , * pptr );
#endif
    switch ( hptr -> h_addrtype )
    {
        case  AF_INET :
        case  AF_INET6 :
            pptr = hptr -> h_addr_list ;
		#if _URL_DEBUG
            for (;  * pptr != NULL ;  pptr ++ )
            {
                printf ( " address:%s \n " , inet_ntop ( hptr -> h_addrtype ,  * pptr ,  str ,  sizeof ( str )));
            }
            printf ( " first address: %s \n " , inet_ntop ( hptr -> h_addrtype ,  hptr -> h_addr ,  str ,  sizeof ( str )));
		#else	
			inet_ntop ( hptr -> h_addrtype ,  * pptr ,  str ,  sizeof ( str ));
		#endif	
			strncpy ( svr_ip , str , MAX_IP_STR_LEN );
		
        break ;
        default:
            printf ( "unknown address type \n " );
        break ;
    }

    return  URL_OK ;
}


void  main ( void )
{
	int  ret ;
	
	char  url_str1 [ 256 ] = "ftp://peng:pass@baidu.com:8080/dir/index.html" ;
	char  url_str2 [ 256 ] = "ftp://baidu.com:8080/dir/index.html" ;
	char  url_str3 [ 256 ] = "ftp://peng:pass@baidu.com:8080" ;
	char  url_str4 [ 256 ] = "ftp://peng:pass@baidu.com/dir/index.html" ;
/*	
	parse_url(url_str1,&url_result_t);
	parse_url(url_str2,&url_result_t);
	parse_url(url_str3,&url_result_t);
	parse_url(url_str4,&url_result_t);
*/
	parse_url ( url_str4 , & url_result_t );

	ret  =  check_is_ipv4 ( url_result_t . domain );	
	if ( ret  !=  1 )
	{	
		//dns
		dns_resoulve ( url_result_t . svr_ip , url_result_t . domain );
	}
	printf ( " \n -------------result--------------- \n " );

	printf ( "user:%s \n " , url_result_t . user );
	printf ( "pass:%s \n " , url_result_t . pass );
	printf ( "port:%d \n " , url_result_t . port );
	printf ( "domain:%s \n " , url_result_t . domain );
	printf ( "svr_dir:%s \n " , url_result_t . svr_dir );
	printf ( "svr_ip:%s \n " , url_result_t . svr_ip );

	printf ( "-------------end--------------- \n " );
}

發表迴響

你的電子郵件位址並不會被公開。 必要欄位標記為 *