SinaWeiboAPI 发表于 2019-03-10 | 分类于 脚本 | 评论数: | 阅读次数:WeiboAPI2019.04.04更新 微博新增”仅半年内微博可见”,若设置此选项,只能获取到最近半年内微博信息。具体可用于个人微博数据分析,个人微博备份等。123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172#!/usr/bin/python3# Author: Conyyon# Date: 2019.03.10 17:26# Software: PyCharmimport timeimport requestsfrom pprint import pprintheaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'}# 配置微博用户的UID(具体方法:PC端打开用户微博个人界面,右键查看源代码,搜索 $CONFIG['oid'] ,后面的数据即为下面的UID)UID = '1642591402'def get_url(): """ 返回请求接口的URL :return:URL """ index_url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(UID) return index_urldef get_user_info(): """ 获取用户信息 :return: 用户信息、containerid参数 """ index_url = get_url() index_ret = requests.get(index_url, headers=headers).json() userInfo = index_ret['data']['userInfo'] # 个人信息 person_info = { # 头像高清 'avatar_hd': userInfo.get('avatar_hd'), # 手机微博背景图片 'cover_image_phone': userInfo.get('cover_image_phone'), # 个人描述 'description': userInfo.get('description'), # 关注人数 'follow': userInfo.get('description'), # 微博总数 'statuses_count': userInfo.get('statuses_count'), # 是否关注当前登录用户 'follow_me': userInfo.get('follow_me'), # 粉丝数量 'followers_count': userInfo.get('followers_count'), # 性别 m:男、f:女、n:未知 'gender': userInfo.get('gender'), # id 'id': userInfo.get('id'), # 用户头像地址(中图),50×50像素 'profile_image_url': userInfo.get('profile_image_url'), # 微博统一URL地址 'profile_url': userInfo.get('profile_url'), # 用户昵称 'screen_name': userInfo.get('screen_name'), # 是否是微博认证用户,即加V用户 'verified': userInfo.get('verified'), # 认证原因 'verified_reason': userInfo.get('verified_reason') } # 在当前页面获取containerid参数,作为下文获取微博信息的必要参数 containerid = [i['containerid'] for i in index_ret['data']['tabsInfo']['tabs'] if i['title'] == '微博'][0] return containerid, person_infodef get_weibo_info(): """ 获取用户所有已发微博信息 :return:微博信息 """ index_url = get_url() # 获取containerid参数 containerid = get_user_info()[0] blogs = [] # 设置page参数穷举,每页约为10条微博数据,设置无穷值10000000*10 for page in range(1, 10000000): # 配置微博接口URL url = index_url + '&containerid={}&page={}'.format(containerid, page) weibo_ret = requests.get(url, headers=headers).json() # 判断返回数据是否为空,为空则表明数据已获取完,则退出 if not weibo_ret['data']['cards']: break print('=' * 50, '第 {} 页'.format(page), '=' * 50) data = weibo_ret['data'] # 遍历每页的数据 for card in data['cards']: # 判断是否为用户微博,过滤推荐、赞过的微博等 if card['card_type'] == 9: mblog = card.get('mblog') blog = { # 微博文章id 'itemid': card.get('itemid'), # 能否编辑 'can_edit': mblog.get('can_edit'), # 上一次编辑时间 'edit_at': mblog.get('edit_at'), # 编辑次数 'edit_count': mblog.get('edit_count'), # 表态(赞)数 'attitudes_count': mblog.get('attitudes_count'), # 评论数 'comments_count': mblog.get('comments_count'), # 转发数 'reposts_count': mblog.get('reposts_count'), # 发表日期 'created_at': mblog.get('created_at'), # 发表来源 'source': mblog.get('source'), # 文章内容 'text': mblog.get('text'), # 文本长度 'textLength': mblog.get('textLength'), # 文章图片 'pics': mblog.get('pics'), # 微博地址 'scheme': card.get('scheme'), # 是否置顶 'isTop': mblog.get('isTop'), # 是否付费 'is_paid': mblog.get('is_paid') } # 判断微博种是否含有视频 if mblog.get('obj_ext'): # 处理转发视频 if mblog.get('retweeted_status'): # (含有视频)微博文字内容 blog['content'] = mblog['retweeted_status']['page_info']['content1'] + mblog['retweeted_status']['page_info']['content2'] # 视频地址 blog['mp4_720p_mp4'] = mblog['retweeted_status']['page_info']['media_info']['mp4_720p_mp4'] # (含有视频)微博原始地址 blog['page_url'] = mblog['retweeted_status']['page_info']['page_url'] # 视频播放数量 blog['play_count'] = mblog['retweeted_status']['page_info']['play_count'] # 视频类型 blog['type'] = mblog['retweeted_status']['page_info']['type'] # 处理个人上传视频 elif mblog['page_info'].get('media_info'): # (含有视频)微博文字内容 blog['content'] = mblog['page_info']['content1'] + mblog['page_info']['content2'] # 视频地址 blog['mp4_720p_mp4'] = mblog['page_info']['media_info']['mp4_720p_mp4'] # (含有视频)微博原始地址 blog['page_url'] = mblog['page_info']['page_url'] # 视频播放数量 blog['play_count'] = mblog['page_info']['play_count'] # 视频类型 blog['type'] = mblog['page_info']['type'] # 处理非video视频(webpage) else: # (含有视频)微博文字内容 blog['content'] = mblog['page_info']['content1'] + mblog['page_info']['content2'] # (含有视频)微博原始地址 blog['page_url'] = mblog['page_info']['page_url'] # 视频类型 blog['type'] = mblog['page_info']['type'] print(blog) blogs.append(blog) time.sleep(3) return blogsif __name__ == '__main__': pprint(get_user_info()[1]) print(len(get_weibo_info()))未完待续… 本文作者: Conyyon 本文链接: https://conyyon.ren/posts/36281/ 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 许可协议。转载请注明出处!