SinaWeiboAPI

WeiboAPI

  • 2019.04.04更新 微博新增”仅半年内微博可见”,若设置此选项,只能获取到最近半年内微博信息。
  • 具体可用于个人微博数据分析,个人微博备份等。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/python3
# Author: Conyyon
# Date: 2019.03.10 17:26
# Software: PyCharm

import time
import requests
from pprint import pprint

headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'
}

# 配置微博用户的UID(具体方法:PC端打开用户微博个人界面,右键查看源代码,搜索 $CONFIG['oid'] ,后面的数据即为下面的UID)
UID = '1642591402'


def get_url():
"""
返回请求接口的URL
:return:URL
"""
index_url = 'https://m.weibo.cn/api/container/getIndex?type=uid&value={}'.format(UID)
return index_url


def get_user_info():
"""
获取用户信息
:return: 用户信息、containerid参数
"""
index_url = get_url()
index_ret = requests.get(index_url, headers=headers).json()
userInfo = index_ret['data']['userInfo']
# 个人信息
person_info = {
# 头像高清
'avatar_hd': userInfo.get('avatar_hd'),
# 手机微博背景图片
'cover_image_phone': userInfo.get('cover_image_phone'),
# 个人描述
'description': userInfo.get('description'),
# 关注人数
'follow': userInfo.get('description'),
# 微博总数
'statuses_count': userInfo.get('statuses_count'),
# 是否关注当前登录用户
'follow_me': userInfo.get('follow_me'),
# 粉丝数量
'followers_count': userInfo.get('followers_count'),
# 性别 m:男、f:女、n:未知
'gender': userInfo.get('gender'),
# id
'id': userInfo.get('id'),
# 用户头像地址(中图),50×50像素
'profile_image_url': userInfo.get('profile_image_url'),
# 微博统一URL地址
'profile_url': userInfo.get('profile_url'),
# 用户昵称
'screen_name': userInfo.get('screen_name'),
# 是否是微博认证用户,即加V用户
'verified': userInfo.get('verified'),
# 认证原因
'verified_reason': userInfo.get('verified_reason')
}
# 在当前页面获取containerid参数,作为下文获取微博信息的必要参数
containerid = [i['containerid'] for i in index_ret['data']['tabsInfo']['tabs'] if i['title'] == '微博'][0]
return containerid, person_info


def get_weibo_info():
"""
获取用户所有已发微博信息
:return:微博信息
"""
index_url = get_url()
# 获取containerid参数
containerid = get_user_info()[0]
blogs = []
# 设置page参数穷举,每页约为10条微博数据,设置无穷值10000000*10
for page in range(1, 10000000):
# 配置微博接口URL
url = index_url + '&containerid={}&page={}'.format(containerid, page)
weibo_ret = requests.get(url, headers=headers).json()
# 判断返回数据是否为空,为空则表明数据已获取完,则退出
if not weibo_ret['data']['cards']:
break
print('=' * 50, '第 {} 页'.format(page), '=' * 50)
data = weibo_ret['data']
# 遍历每页的数据
for card in data['cards']:
# 判断是否为用户微博,过滤推荐、赞过的微博等
if card['card_type'] == 9:
mblog = card.get('mblog')
blog = {
# 微博文章id
'itemid': card.get('itemid'),
# 能否编辑
'can_edit': mblog.get('can_edit'),
# 上一次编辑时间
'edit_at': mblog.get('edit_at'),
# 编辑次数
'edit_count': mblog.get('edit_count'),
# 表态(赞)数
'attitudes_count': mblog.get('attitudes_count'),
# 评论数
'comments_count': mblog.get('comments_count'),
# 转发数
'reposts_count': mblog.get('reposts_count'),
# 发表日期
'created_at': mblog.get('created_at'),
# 发表来源
'source': mblog.get('source'),
# 文章内容
'text': mblog.get('text'),
# 文本长度
'textLength': mblog.get('textLength'),
# 文章图片
'pics': mblog.get('pics'),
# 微博地址
'scheme': card.get('scheme'),
# 是否置顶
'isTop': mblog.get('isTop'),
# 是否付费
'is_paid': mblog.get('is_paid')
}
# 判断微博种是否含有视频
if mblog.get('obj_ext'):

# 处理转发视频
if mblog.get('retweeted_status'):
# (含有视频)微博文字内容
blog['content'] = mblog['retweeted_status']['page_info']['content1'] + mblog['retweeted_status']['page_info']['content2']
# 视频地址
blog['mp4_720p_mp4'] = mblog['retweeted_status']['page_info']['media_info']['mp4_720p_mp4']
# (含有视频)微博原始地址
blog['page_url'] = mblog['retweeted_status']['page_info']['page_url']
# 视频播放数量
blog['play_count'] = mblog['retweeted_status']['page_info']['play_count']
# 视频类型
blog['type'] = mblog['retweeted_status']['page_info']['type']

# 处理个人上传视频
elif mblog['page_info'].get('media_info'):
# (含有视频)微博文字内容
blog['content'] = mblog['page_info']['content1'] + mblog['page_info']['content2']
# 视频地址
blog['mp4_720p_mp4'] = mblog['page_info']['media_info']['mp4_720p_mp4']
# (含有视频)微博原始地址
blog['page_url'] = mblog['page_info']['page_url']
# 视频播放数量
blog['play_count'] = mblog['page_info']['play_count']
# 视频类型
blog['type'] = mblog['page_info']['type']

# 处理非video视频(webpage)
else:
# (含有视频)微博文字内容
blog['content'] = mblog['page_info']['content1'] + mblog['page_info']['content2']
# (含有视频)微博原始地址
blog['page_url'] = mblog['page_info']['page_url']
# 视频类型
blog['type'] = mblog['page_info']['type']
print(blog)
blogs.append(blog)
time.sleep(3)
return blogs


if __name__ == '__main__':
pprint(get_user_info()[1])
print(len(get_weibo_info()))

未完待续…