基本信息
源码名称:python微博爬虫(scrapy)示例源码
源码大小:0.02M
文件格式:.zip
开发语言:Python
更新时间:2019-03-12
   友情提示:(无需注册或充值,赞助后即可获取资源下载链接)

     嘿,亲!知识可是无价之宝呢,但咱这精心整理的资料也耗费了不少心血呀。小小地破费一下,绝对物超所值哦!如有下载和支付问题,请联系我们QQ(微信同号):813200300

本次赞助数额为: 2 元 
   源码介绍


class WeiboSpider(Spider):
    name = 'weibocn'   allowed_domains = ['m.weibo.cn']
    
    user_url = 'https://m.weibo.cn/api/container/getIndex?uid={uid}&type=uid&value={uid}&containerid=100505{uid}'   follow_url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_followers_-_{uid}&page={page}'   fan_url = 'https://m.weibo.cn/api/container/getIndex?containerid=231051_-_fans_-_{uid}&page={page}'   weibo_url = 'https://m.weibo.cn/api/container/getIndex?uid={uid}&type=uid&page={page}&containerid=107603{uid}'   start_users = ['3217179555', '1742566624', '2282991915', '1288739185', '3952070245', '5878659096'] def start_requests(self): for uid in self.start_users: yield Request(self.user_url.format(uid=uid), callback=self.parse_user) def parse_user(self, response): """  解析用户信息  :param response: Response对象  """  self.logger.debug(response)
        result = json.loads(response.text) if result.get('data').get('userInfo'):
            user_info = result.get('data').get('userInfo')
            user_item = UserItem()
            field_map = { 'id': 'id', 'name': 'screen_name', 'avatar': 'profile_image_url', 'cover': 'cover_image_phone', 'gender': 'gender', 'description': 'description', 'fans_count': 'followers_count', 'follows_count': 'follow_count', 'weibos_count': 'statuses_count', 'verified': 'verified', 'verified_reason': 'verified_reason', 'verified_type': 'verified_type'  } for field, attr in field_map.items():
                user_item[field] = user_info.get(attr) yield user_item # 关注  uid = user_info.get('id') yield Request(self.follow_url.format(uid=uid, page=1), callback=self.parse_follows,
                          meta={'page': 1, 'uid': uid}) # 粉丝  yield Request(self.fan_url.format(uid=uid, page=1), callback=self.parse_fans,
                          meta={'page': 1, 'uid': uid}) # 微博  yield Request(self.weibo_url.format(uid=uid, page=1), callback=self.parse_weibos,
                          meta={'page': 1, 'uid': uid}) def parse_follows(self, response): """  解析用户关注  :param response: Response对象  """  result = json.loads(response.text) if result.get('ok') and result.get('data').get('cards') and len(result.get('data').get('cards')) and result.get('data').get('cards')[-1].get( 'card_group'): # 解析用户  follows = result.get('data').get('cards')[-1].get('card_group') for follow in follows: if follow.get('user'):
                    uid = follow.get('user').get('id') yield Request(self.user_url.format(uid=uid), callback=self.parse_user)
            
            uid = response.meta.get('uid') # 关注列表  user_relation_item = UserRelationItem()
            follows = [{'id': follow.get('user').get('id'), 'name': follow.get('user').get('screen_name')} for follow in  follows]
            user_relation_item['id'] = uid
            user_relation_item['follows'] = follows
            user_relation_item['fans'] = [] yield user_relation_item # 下一页关注  page = response.meta.get('page') 1  yield Request(self.follow_url.format(uid=uid, page=page),
                          callback=self.parse_follows, meta={'page': page, 'uid': uid}) def parse_fans(self, response): """  解析用户粉丝  :param response: Response对象  """  result = json.loads(response.text) if result.get('ok') and result.get('data').get('cards') and len(result.get('data').get('cards')) and result.get('data').get('cards')[-1].get( 'card_group'): # 解析用户  fans = result.get('data').get('cards')[-1].get('card_group') for fan in fans: if fan.get('user'):
                    uid = fan.get('user').get('id') yield Request(self.user_url.format(uid=uid), callback=self.parse_user)
            
            uid = response.meta.get('uid') # 粉丝列表  user_relation_item = UserRelationItem()
            fans = [{'id': fan.get('user').get('id'), 'name': fan.get('user').get('screen_name')} for fan in  fans]
            user_relation_item['id'] = uid
            user_relation_item['fans'] = fans
            user_relation_item['follows'] = [] yield user_relation_item # 下一页粉丝  page = response.meta.get('page') 1  yield Request(self.fan_url.format(uid=uid, page=page),
                          callback=self.parse_fans, meta={'page': page, 'uid': uid}) def parse_weibos(self, response): """  解析微博列表  :param response: Response对象  """  result = json.loads(response.text) if result.get('ok') and result.get('data').get('cards'):
            weibos = result.get('data').get('cards') for weibo in weibos:
                mblog = weibo.get('mblog') if mblog:
                    weibo_item = WeiboItem()
                    field_map = { 'id': 'id', 'attitudes_count': 'attitudes_count', 'comments_count': 'comments_count', 'reposts_count': 'reposts_count', 'picture': 'original_pic', 'pictures': 'pics', 'created_at': 'created_at', 'source': 'source', 'text': 'text', 'raw_text': 'raw_text', 'thumbnail': 'thumbnail_pic',
                    } for field, attr in field_map.items():
                        weibo_item[field] = mblog.get(attr)
                    weibo_item['user'] = response.meta.get('uid') yield weibo_item # 下一页微博  uid = response.meta.get('uid')
            page = response.meta.get('page') 1  yield Request(self.weibo_url.format(uid=uid, page=page), callback=self.parse_weibos,
                          meta={'uid': uid, 'page': page})