C/C++ url 解析/拆解 演算法初始化版本
C/C++ url 解析/拆解 演算法初始化版本
資料來源:https://mp.weixin.qq.com/s/dfcFyPTdZOLuVnRopcoxSg
https://gitee.com/yikoulinux/url
code:
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <ctype.h>
#include <sys/types.h> /* See NOTES */
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netdb.h>
#define _URL_DEBUG 0
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#define HEAD_FTP_P "ftp://"
#define HEAD_FTPS_P "ftps://"
#define HEAD_FTPES_P "ftpes://"
#define HEAD_HTTP_P "http://"
#define HEAD_HTTPS_P "https://"
#define PORT_FTP 21
#define PORT_FTPS_I 990 //implicit
#define PORT_FTPS_E 21 //explicit
#define PORT_HTTP 80
#define PORT_HTTPS 443
#define URL_ERROR -1
#define URL_OK 0
struct pro_port {
char pro_s [ 32 ];
unsigned short port ;
};
struct pro_port g_pro_port [] = {
{ HEAD_FTP_P , PORT_FTP },
{ HEAD_FTPS_P , PORT_FTPS_I },
{ HEAD_FTPES_P , PORT_FTPS_E },
{ HEAD_HTTP_P , PORT_HTTP },
{ HEAD_HTTPS_P , PORT_HTTPS },
};
#define MAX_COMM_NAME_LEN 128
#define MAX_URL_LEN 1024
#define INET_ADDRSTRLEN 16
#define INET_DOMAINSTRLEN 128
#define MAX_PORT_LEN 6
#define MAX_PATH_FILE_LEN 256
#define MAX_IP_STR_LEN 32
#define MAX_USER_LEN 32
#define MAX_PASS_LEN 32
typedef struct
{
char user [ MAX_USER_LEN ];
char pass [ MAX_PASS_LEN ];
char domain [ INET_DOMAINSTRLEN ]; //域名
char svr_dir [ MAX_PATH_FILE_LEN ]; //文件路徑
char svr_ip [ MAX_IP_STR_LEN ];
int port ;
} URL_RESULT_T ;
URL_RESULT_T url_result_t ;
int parse_domain_dir ( char * url , URL_RESULT_T * result )
{
char * p ;
char * token = NULL ;
char buf [ MAX_URL_LEN ] = { 0 };
char dir_str [ MAX_PATH_FILE_LEN * 2 ] = { 0 };
char svr_dir [ MAX_COMM_NAME_LEN ] = { 0 };
char ip_port [ MAX_COMM_NAME_LEN * 2 ] = { 0 }; //域名:port
char domain [ INET_DOMAINSTRLEN ] = { 0 };
char port_s [ MAX_PORT_LEN ] = { 0 };
char user_pass [ MAX_USER_LEN + MAX_PASS_LEN ] = { 0 };
char user [ MAX_USER_LEN ] = { 0 };
char pass [ MAX_PASS_LEN ] = { 0 };
int test ;
p = buf ;
//strtok後buf中內容會被修改,所以先把url內容拷貝出來
strncpy ( buf , url , sizeof ( buf ));
//"baidu.com:8080/dir/index.html"
token = strtok ( buf , "@" );
if ( token != NULL ) {
#if _URL_DEBUG
printf ( "token=%s \n " , token );
#endif
if ( strlen ( token ) != strlen ( url )) //找不到@後,返回值指向頭
{
if ( strlen ( user_pass ) == 0 ) {
strncpy ( user_pass , token , sizeof ( user_pass ));
p += strlen ( user_pass ) + 1 ; //指向@後字符串
}
}
}
#if _URL_DEBUG
printf ( "user_pass=%s \n " , user_pass );
#endif
//提取用戶名密碼
if ( strlen ( user_pass ) > 0 )
{
token = strtok ( user_pass , ":" );
while ( token != NULL ) {
if ( strlen ( user ) == 0 ) {
strncpy ( user , token , sizeof ( user ));
if (( strlen ( user ) == 0 ) || ( strlen ( user ) > MAX_USER_LEN ))
{
printf ( "%s() %d \n " , __func__ , __LINE__ );
return URL_ERROR ;
}
} else {
strcat ( pass , token );
}
token = strtok ( NULL , "/" );
}
}
//192.168.1.100:990/LE2.0 192.168.20.10/dir1/dir2/index.html baidu.com/down baidu.com
//body: baidu.com:8080/dir/index.html
//獲取域名+端口號
token = strtok ( p , "/" );
while ( token != NULL ) {
if ( strlen ( ip_port ) == 0 ) {
strncpy ( ip_port , token , sizeof ( ip_port ));
if (( strlen ( ip_port ) == 0 ) || ( strlen ( ip_port ) > INET_DOMAINSTRLEN + MAX_PORT_LEN + 1 ))
{
printf ( "%s() %d \n " , __func__ , __LINE__ );
return URL_ERROR ;
}
} else {
strcat ( dir_str , "/" );
strcat ( dir_str , token );
}
token = strtok ( NULL , "/" );
}
#if _URL_DEBUG
printf ( "ip_port=%s \n " , ip_port );
printf ( "svr_dir:%s \n\n " , dir_str );
#endif
token = strtok ( ip_port , ":" );
while ( token != NULL ) {
if ( strlen ( domain ) == 0 ) {
strncpy ( domain , token , sizeof ( domain ));
if (( strlen ( domain ) == 0 ) || ( strlen ( domain ) > INET_DOMAINSTRLEN ))
{
printf ( "%s() %d invalid daomain length \n " , __func__ , __LINE__ );
return URL_ERROR ;
}
} else if ( strlen ( port_s ) == 0 ){
strncpy ( port_s , token , sizeof ( port_s ));
if (( strlen ( port_s ) == 0 ) || ( strlen ( port_s ) > MAX_PORT_LEN ))
{
printf ( "%s() %d invalid port length \n " , __func__ , __LINE__ );
return URL_ERROR ;
}
}
token = strtok ( NULL , ":" );
}
if ( strlen ( domain ) == 0 )
{
printf ( "%s() %d there is no domain \n " , __func__ , __LINE__ );
return URL_ERROR ;
}
if ( strlen ( port_s ) > 0 )
{
result -> port = atoi ( port_s );
}
if ( strlen ( dir_str ) == 0 ){
//baidu.com 沒有路徑用默認/
strcat ( dir_str , "/" );
}
strncpy ( result -> user , user , sizeof ( result -> user ));
strncpy ( result -> pass , pass , sizeof ( result -> pass ));
strncpy ( result -> domain , domain , sizeof ( result -> domain ));
strncpy ( result -> svr_dir , dir_str , sizeof ( svr_dir ));
#if _URL_DEBUG
printf ( "user:%s \n " , result -> user );
printf ( "pass:%s \n " , result -> pass );
printf ( "port:%d \n " , result -> port );
printf ( "domain:%s \n " , result -> domain );
printf ( "svr_dir:%s \n\n\n " , result -> svr_dir );
#endif
return URL_OK ;
}
void remove_quotation_mark ( char * input )
{
int i = 0 ;
char tmp_buf [ MAX_URL_LEN ];
char * tmp_ptr = input ;
while ( * tmp_ptr != '\0' ) {
if ( * tmp_ptr != '"' ) {
tmp_buf [ i ] = * tmp_ptr ;
i ++ ;
}
tmp_ptr ++ ;
}
tmp_buf [ i ] = '\0' ;
strcpy ( input , tmp_buf );
}
/*
return code
-1 : invalid param 0:domain 1 :ipv4
*/
int check_is_ipv4 ( char * domain )
{
struct in_addr s ;
char IPdotdec [ 20 ] = { 0 };
if (( strlen ( domain ) == 0 ) || ( strlen ( domain ) > INET_DOMAINSTRLEN ))
{
printf ( "invalid domain length! \n " );
return - 1 ;
}
if ( inet_pton ( AF_INET , domain , ( void * ) & s ) == 1 )
{
inet_ntop ( AF_INET , ( void * ) & s , IPdotdec , 16 );
//printf("fota_check_is_ipv4 %s\n", IPdotdec);
return 1 ;
} else {
//printf("may be domain\n");
return - 1 ;
}
}
int parse_url ( char * raw_url , URL_RESULT_T * result )
{
int i ;
int ret ;
char * p = NULL ;
char out_buf [ MAX_URL_LEN ]; //url
char body [ MAX_URL_LEN ];
strcpy ( out_buf , "" );
strcpy ( body , "" );
strncpy ( out_buf , raw_url , sizeof ( out_buf ));
p = out_buf ;
if ( strstr ( out_buf , " \" " ) != NULL )
{
remove_quotation_mark ( out_buf );
}
for ( i = 0 ; i < ARRAY_SIZE ( g_pro_port ); i ++ )
{
if ( strncasecmp ( g_pro_port [ i ]. pro_s , p , strlen ( g_pro_port [ i ]. pro_s )) == 0 )
{
p += strlen ( g_pro_port [ i ]. pro_s );
strncpy ( body , p , sizeof ( body ));
result -> port = g_pro_port [ i ]. port ;
break ;
}
}
if ( i == ARRAY_SIZE ( g_pro_port ))
{
printf ( "%s: Error: invalid protocol %s \n " , __FUNCTION__ , out_buf );
return URL_ERROR ;
}
#if _URL_DEBUG
printf ( "%s(): \n body: %s \n " , __FUNCTION__ , body );
#endif
if ( strstr ( body , " \" " ) != NULL )
{
remove_quotation_mark ( body );
}
//解析域名、端口號、文件目錄
ret = parse_domain_dir ( body , result );
if ( ret == URL_ERROR )
{
ret = printf ( "parse_domain_dir() err \n " );
return URL_ERROR ;
}
return ret ;
}
int dns_resoulve ( char * svr_ip , const char * domain )
{
char ** pptr ;
struct hostent * hptr ;
char str [ MAX_IP_STR_LEN ];
if (( hptr = gethostbyname ( domain )) == NULL )
{
printf ( " gethostbyname error for host:%s \n " , domain );
return URL_ERROR ;
}
#if _URL_DEBUG
printf ( "official hostname:%s \n " , hptr -> h_name );
for ( pptr = hptr -> h_aliases ; * pptr != NULL ; pptr ++ )
printf ( " alias:%s \n " , * pptr );
#endif
switch ( hptr -> h_addrtype )
{
case AF_INET :
case AF_INET6 :
pptr = hptr -> h_addr_list ;
#if _URL_DEBUG
for (; * pptr != NULL ; pptr ++ )
{
printf ( " address:%s \n " , inet_ntop ( hptr -> h_addrtype , * pptr , str , sizeof ( str )));
}
printf ( " first address: %s \n " , inet_ntop ( hptr -> h_addrtype , hptr -> h_addr , str , sizeof ( str )));
#else
inet_ntop ( hptr -> h_addrtype , * pptr , str , sizeof ( str ));
#endif
strncpy ( svr_ip , str , MAX_IP_STR_LEN );
break ;
default:
printf ( "unknown address type \n " );
break ;
}
return URL_OK ;
}
void main ( void )
{
int ret ;
char url_str1 [ 256 ] = "ftp://peng:pass@baidu.com:8080/dir/index.html" ;
char url_str2 [ 256 ] = "ftp://baidu.com:8080/dir/index.html" ;
char url_str3 [ 256 ] = "ftp://peng:pass@baidu.com:8080" ;
char url_str4 [ 256 ] = "ftp://peng:pass@baidu.com/dir/index.html" ;
/*
parse_url(url_str1,&url_result_t);
parse_url(url_str2,&url_result_t);
parse_url(url_str3,&url_result_t);
parse_url(url_str4,&url_result_t);
*/
parse_url ( url_str4 , & url_result_t );
ret = check_is_ipv4 ( url_result_t . domain );
if ( ret != 1 )
{
//dns
dns_resoulve ( url_result_t . svr_ip , url_result_t . domain );
}
printf ( " \n -------------result--------------- \n " );
printf ( "user:%s \n " , url_result_t . user );
printf ( "pass:%s \n " , url_result_t . pass );
printf ( "port:%d \n " , url_result_t . port );
printf ( "domain:%s \n " , url_result_t . domain );
printf ( "svr_dir:%s \n " , url_result_t . svr_dir );
printf ( "svr_ip:%s \n " , url_result_t . svr_ip );
printf ( "-------------end--------------- \n " );
}