项目设计文档——爬虫项目(爬取天气预报)

一、项目背景以及项目意义

项目背景:

爬虫技术的核心目的是自动化地从互联网上采集,提取和存储数据。网络爬虫是一种自动化程序,用于从互联网上抓取数据并进行处理。C语言因其高效性和接近硬件的特性,常被用于开发高性能的网络爬虫,尤其是在资源受限或需要极致性能的场景中。

项目意义:

综合使用了学习到的网络的TCP协议,套接字编程和http协议,与网络服务端建立相关的联系,从网络网址当中爬取自己想要的相关数据。

二、项目实现的功能

1.通过TCP协议建立联系

2.通过http协议获得响应报文以及正文

3.解析HTML和相应网页内容

4.提取自己想要的数据

三、项目的整体框图以及各部分如何实现

四、项目中的流程图及说明

五、项目中遇到的问题及解决方法

1.在查询https://sapi.k780.com/的端口和IP地址时只找到https协议的地址造成无法爬取

解决方法:通过在网上搜集资料了解应该将网址上的s和sapi的s去掉,即可访问http网址

2.在将请求报文发送时,不能将所想要查询的城市输入到请求报文当中

解决方法:通过主函数传参以及利用了snprintf函数将请求报文保存到了一个数组当中

3.在将回复的响应报文保存到文本当中后,不知道怎么将文本当中的数据读到cjson当中

解决方法:利用了一个strstr函数对文本当中的数据进行了一个定位,将指针定位到了{\处进行解析报文

4.在解析cjson报文当中不知道如何解析嵌套的JSON数据

解决方法:通过第一次先解析到外层的JSON数据,然后再依次往里面进行解析,其相应报文中的数字,汉语,字母等都是字符串类型的要用valuestring。

5.在JSON数组当中解析JSON数据

解决方法:在一个JSON数组当中转换字符串应该先获得该数组的大小,然后进行循环,利用item进行获取数组,再获取数组当中想要的数据

#include "pachong.h"
#include "cJSON.h"

int main(int argc,const char *argv[])
{
    if(argc != 2)
    {
        printf("Usage : ./a.out <城市名字>\n");
        return -1;
    }
    int sockfd = create_tcp_connect();
    if(sockfd < 0)
    {
        perror("socket error");
        return -1;
    }
    printf("-----------------------------\n");
    printf("查询实时天气或天气预报-------\n");
    printf("1.查询实时天气---------------\n");
    printf("2.查询天气预报---------------\n");
    printf("3.退出-----------------------\n");
    printf("-----------------------------\n");
    int choose;
    scanf("%d",&choose);
    switch (choose)
    {
        case 1: send_http_request_weather_today(sockfd,argv[1]);
                recv_http_response(sockfd);
                find_today_weather();
                break;
        case 2: send_http_requesr_weather_prediction(sockfd,argv[1]);
                recv_http_response(sockfd);
                find_prediction_weather();
                break;
        case 3: exit_program(sockfd);
                break;
        default:
                break;
    }
    
    
    close(sockfd);
   
   

    

    return 0;
}


#include "pachong.h"
#include "cJSON.h"
#define SER_PORT 80
#define SER_IP "8.129.233.227"
int create_tcp_connect()
{
    int sockfd = socket(AF_INET,SOCK_STREAM,0);
    if(sockfd < 0)
    {
        perror("socket error");
        return -1;
    }

    struct  sockaddr_in seraddr;
    seraddr.sin_family = AF_INET;
    seraddr.sin_port = htons(SER_PORT);
    seraddr.sin_addr.s_addr = inet_addr(SER_IP); 
    
    int ret = connect(sockfd,(struct sockaddr *)&seraddr,sizeof(seraddr));
    if(ret < 0)
    {
        perror("connect error");
        return -1;
    }


    return sockfd;

}
int send_http_request_weather_today(int sockfd,const char *cityname)
{
    char buff[4096] = {0};
    snprintf(buff,sizeof(buff),
                    "GET /?app=weather.today&cityNm=%s&appkey=77275&sign=3b1921902ede0a45608c50b0f1b919c3&format=json HTTP/1.1\r\n"
                    "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\n"
                    "Accept-Encoding: gzip, deflate\r\n"
                    "Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6\r\n"
                    "Cache-Control: max-age=0\r\n"
                    "Connection: close\r\n"
                    "Host: api.k780.com\r\n"
                    "Upgrade-Insecure-Requests: 1\r\n"
                    "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0 \r\n"
                                "\r\n",cityname);

    ssize_t cnt = send(sockfd,buff,strlen(buff),0);
    if(cnt < 0)
    {
        perror("send error");
        return -1;
    }


    return 0;

}
void exit_program(int sockfd)
{
    if(sockfd >= 0)
    {
        close(sockfd);
    }
    printf("已退出\n");
    
    exit(0);


}

int send_http_requesr_weather_prediction(int sockfd,const char *cityname)
{ 
    char buff[4096] = {0};
    snprintf(buff,sizeof(buff),
                "GET /?app=weather.future&cityNm=%s&appkey=77275&sign=3b1921902ede0a45608c50b0f1b919c3&format=json HTTP/1.1\r\n"
                "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7\r\n"
                "Accept-Encoding: gzip, deflate\r\n"
                "Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6\r\n"
                "Cache-Control: max-age=0\r\n"
                "Connection: close\r\n"
                "Host: api.k780.com\r\n"
                "Upgrade-Insecure-Requests: 1\r\n"
                "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/139.0.0.0 Safari/537.36 Edg/139.0.0.0\r\n"
                "\r\n",cityname);
               // printf("buff:%s\n",buff);

    ssize_t cnt = send(sockfd,buff,strlen(buff),0);
    if(cnt < 0)
    {
        perror("send error");
        return -1;
    }


    return 0;




}
int recv_http_response(int sockfd)
{
    char buff[1024] = {0};
    int fd = open("./1.txt",O_WRONLY | O_CREAT |O_TRUNC,0664);
         if(fd < 0)
         {
             perror("open error");
             return -1;
         }
    while(1)
    {
        ssize_t cnt = recv(sockfd,buff,sizeof(buff),0);
        if(cnt < 0)
        {
            perror("recv error");
            return -1;
        }
        else if(0 == cnt)
        {
            printf("off\n");
            break;
        }
       
        write(fd,buff,cnt);
       // write(1,buff,cnt);
       
    }
    close(fd);
    return 0;


}
int find_today_weather()
{
    int fd = open("1.txt",O_RDONLY);
    if(fd < 0)
    {
        perror("open error");
        return -1;
    }
    char buff[40960] = {0};
    ssize_t cnt = read(fd,buff,sizeof(buff) - 1);
    if(cnt <= 0)
    {
        perror("read error");
        return -1;
    }
    close(fd);
    buff[cnt] = '\0';



    const char *json_start = strstr(buff,"\r\n\r\n");
    json_start = strstr(buff,"{\"");
    cJSON* json = NULL;
    json = cJSON_Parse(json_start);
    if(!json)
    {   
        printf("json error : %s",cJSON_GetErrorPtr());
        return -1;

    }
    else
    {
        cJSON* json_result = cJSON_GetObjectItem(json,"result");
        
        cJSON* json_days = cJSON_GetObjectItem(json_result,"days");
        if(cJSON_IsString(json_days))
        {
            printf("日期:%s\n",json_days->valuestring);

        }
        cJSON* json_citynm = cJSON_GetObjectItem(json_result,"citynm");
        if(cJSON_IsString(json_citynm))
        {
            printf("城市:%s\n",json_citynm->valuestring);

        }
        cJSON* json_week = cJSON_GetObjectItem(json_result,"week");
        if(cJSON_IsString(json_week))
        {
            printf("星期:%s\n",json_week->valuestring);

        }
        cJSON* json_temperature = cJSON_GetObjectItem(json_result,"temperature");
        if(cJSON_IsString(json_temperature))
        {
            printf("最高温度/最低温度:%s\n",json_temperature->valuestring);

        }
        cJSON* json_temperature_curr = cJSON_GetObjectItem(json_result,"temperature_curr");
        if(cJSON_IsString(json_temperature_curr))
        {
            printf("当前温度:%s\n",json_temperature_curr->valuestring);

        }
        cJSON* json_humidity = cJSON_GetObjectItem(json_result,"humidity");
        if(cJSON_IsString(json_humidity))
        {
            printf("湿度:%s\n",json_humidity->valuestring);

        }
        cJSON* json_weather = cJSON_GetObjectItem(json_result,"weather");
        if(cJSON_IsString(json_weather))
        {
            printf("今天天气:%s\n",json_weather->valuestring);

        }
        cJSON* json_weather_curr = cJSON_GetObjectItem(json_result,"weather_curr");
        if(cJSON_IsString(json_weather_curr))
        {
            printf("当前天气:%s\n",json_weather_curr->valuestring);

        }
        cJSON* json_wind = cJSON_GetObjectItem(json_result,"wind");
        if(cJSON_IsString(json_wind))
        {
            printf("风向:%s\n",json_wind->valuestring);

        }   
        cJSON* json_winp = cJSON_GetObjectItem(json_result,"winp");
        if(cJSON_IsString(json_winp))
        {
            printf("风力:%s\n",json_winp->valuestring);

        }   
       
    }

    cJSON_Delete(json);


    return 0;

}
int find_prediction_weather()
{
    int fd = open("1.txt",O_RDONLY);
    if(fd < 0)
    {
        perror("open error");
        return -1;
    }
    char buff[40960] = {0};
    ssize_t cnt = read(fd,buff,sizeof(buff) - 1);
    if(cnt <= 0)
    {
        perror("read error");
        return -1;
    }
    close(fd);
    buff[cnt] = '\0';
   



    const char *json_start = strstr(buff,"\r\n\r\n");
    json_start = strstr(buff,"{\"");
    cJSON* json = NULL;
    json = cJSON_Parse(json_start);
    if(!json)
    {   
        printf("json error : %s",cJSON_GetErrorPtr());
        return -1;

    }
    else
    {
        cJSON* json_result = cJSON_GetObjectItem(json,"result");
        if(!json_result)
        {
             printf("json_result error : %s",cJSON_GetErrorPtr());
            return -1;

        }
        int size = cJSON_GetArraySize(json_result);
        for(int i = 0;i < size;i++)
        {
            cJSON* item = cJSON_GetArrayItem(json_result,i);
            if(!item)
            {
             printf("item error : %s",cJSON_GetErrorPtr());
            return -1;

            }
            
            if(cJSON_IsObject(item))
            {
                cJSON* json_days = cJSON_GetObjectItem(item,"days");
                if(cJSON_IsString(json_days))
                {
                    printf("日期:%s\n",json_days->valuestring);

                }
                else
                {
                    printf("error\n");
                }
                cJSON* json_citynm = cJSON_GetObjectItem(item,"citynm");
                if(cJSON_IsString(json_citynm))
                {
                    printf("城市:%s\n",json_citynm->valuestring);

                }
                cJSON* json_week = cJSON_GetObjectItem(item,"week");
                if(cJSON_IsString(json_week))
                {
                    printf("星期:%s\n",json_week->valuestring);

                }
                cJSON* json_temperature = cJSON_GetObjectItem(item,"temperature");
                if(cJSON_IsString(json_temperature))
                {
                    printf("最高温度/最低温度:%s\n",json_temperature->valuestring);

                }
                cJSON* json_temperature_curr = cJSON_GetObjectItem(item,"temperature_curr");
                if(cJSON_IsString(json_temperature_curr))
                {
                    printf("当前温度:%s\n",json_temperature_curr->valuestring);

                }
                cJSON* json_humidity = cJSON_GetObjectItem(item,"humidity");
                if(cJSON_IsString(json_humidity))
                {
                    printf("湿度:%s\n",json_humidity->valuestring);

                }
                cJSON* json_weather = cJSON_GetObjectItem(item,"weather");
                if(cJSON_IsString(json_weather))
                {
                    printf("今天天气:%s\n",json_weather->valuestring);

                }
                cJSON* json_weather_curr = cJSON_GetObjectItem(item,"weather_curr");
                if(cJSON_IsString(json_weather_curr))
                {
                    printf("当前天气:%s\n",json_weather_curr->valuestring);

                }
                cJSON* json_wind = cJSON_GetObjectItem(item,"wind");
                if(cJSON_IsString(json_wind))
                {
                    printf("风向:%s\n",json_wind->valuestring);

                }   
                cJSON* json_winp = cJSON_GetObjectItem(item,"winp");
                if(cJSON_IsString(json_winp))
                {
                    printf("风力:%s\n",json_winp->valuestring);

                }
                printf("\n");
            }   
        

        }




    }




    return 0;
}
#ifndef __PACHONG_H__
#define __PACHONG_H__





#include <stdlib.h>
#include <stdio.h>
#include <sys/types.h>          /* See NOTES */
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h> 
#include <arpa/inet.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
extern int find_today_weather();
extern void exit_program(int sockfd);
extern int send_http_requesr_weather_prediction(int sockfd,const char *cityname);
extern int send_http_request_weather_today(int sockfd,const char *cityname);
extern int recv_http_response(int sockfd);
extern int create_tcp_connect();
extern int find_prediction_weather();
#endif

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值