AI智能
改变未来

英雄联盟Python爬虫

文章目录

  • 英雄联盟Python爬虫
  • 1.英雄爬取
  • 2.JS获取所有英雄信息
  • 3.爬取比赛数据
  • 第一个LOL网页爬取
  • 第二个LOL网页数据爬取
  • 第三个LOL网页数据爬取
  • 4.多线程爬取LOL英雄皮肤图片
  • 英雄联盟Python爬虫

    英雄主界面qq

    https://lol.qq.com/data/info-heros.shtml

    1.英雄爬取

    https://lol.qq.com/data/info-heros.shtml

    get方法获取指定英雄信息。

    https://lol.qq.com/data/info-heros.shtml?id=xxx

    id=xxx

    2.JS获取所有英雄信息

    import jsonimport requestsfrom faker import Factoryfrom bs4 import BeautifulSoupf = Factory.create()def get_all_heros():url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'headers = {'user-agent': f.user_agent()}r = requests.get(url, headers=headers)r.encoding = r.apparent_encodingc = r.textl = json.loads(c)['hero']for i in l[:50]:print("ID: {0} 姓名:{1} 别名:{2}".format(i['heroId'], i['name'], i['alias']))if __name__ == '__main__':get_all_heros()

    效果:

    3.爬取比赛数据

    第一个LOL网页爬取

    http://www.wanplus.com/lol/playerstats

    用到了csrf-token,post请求需要携带set-cookies 中的csrf-token即可。

    import jsonimport timeimport requestsfrom faker import Factoryfrom urllib import parsef = Factory.create()def get_token():url = 'http://www.wanplus.com/lol/playerstats'headers = {'user-agent': f.user_agent(),'Referer': 'http://www.wanplus.com/lol/teamstats','Host': 'www.wanplus.com',}r = requests.get(url, headers=headers, allow_redirects=False)r.encoding = r.apparent_encodingc = r.cookiesr.close()myCookies = c.get_dict()# print(myCookies)return str(int(c.get('wanplus_csrf')[9:]) + int(16777216)), myCookiesdef get_competition():url = 'http://www.wanplus.com/ajax/stats/list'token, myCookies = get_token()headers = {'user-agent': f.user_agent(),'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8','Host': 'www.wanplus.com','Origin': 'http://www.wanplus.com','Referer': 'http://www.wanplus.com/lol/playerstats','X-CSRF-Token': token,'X-Requested-With': 'XMLHttpRequest',}formdata = {'_gtk': token,'draw': '1','columns[0][data]': 'order','columns[0][name]': '','columns[0][searchable]': 'true','columns[0][orderable]': 'false','columns[0][search][value]': '','columns[0][search][regex]': 'false','columns[1][data]': 'playername','columns[1][name]': '','columns[1][searchable]': 'true','columns[1][orderable]': 'false','columns[1][search][value]': '','columns[1][search][regex]': 'false','columns[2][data]': 'teamname','columns[2][name]': '','columns[2][searchable]': 'true','columns[2][orderable]': 'false','columns[2][search][value]': '','columns[2][search][regex]': 'false','columns[3][data]': 'meta','columns[3][name]': '','columns[3][searchable]': 'true','columns[3][orderable]': 'false','columns[3][search][value]': '','columns[3][search][regex]': 'false','columns[4][data]': 'appearedTimes','columns[4][name]': '','columns[4][searchable]': 'true','columns[4][orderable]': 'true','columns[4][search][value]': '','columns[4][search][regex]': 'false','columns[5][data]': 'kda','columns[5][name]': '','columns[5][searchable]': 'true','columns[5][orderable]': 'true','columns[5][search][value]': '','columns[5][search][regex]': 'false','columns[6][data]': 'attendrate','columns[6][name]': '','columns[6][searchable]': 'true','columns[6][orderable]': 'true','columns[6][search][value]': '','columns[6][search][regex]': 'false','columns[7][data]': 'killsPergame','columns[7][name]': '','columns[7][searchable]': 'true','columns[7][orderable]': 'true','columns[7][search][value]': '','columns[7][search][regex]': 'false','columns[8][data]': 'mostkills','columns[8][name]': '','columns[8][searchable]': 'true','columns[8][orderable]': 'true','columns[8][search][value]': '','columns[8][search][regex]': 'false','columns[9][data]': 'deathsPergame','columns[9][name]': '','columns[9][searchable]': 'true','columns[9][orderable]': 'true','columns[9][search][value]': '','columns[9][search][regex]': 'false','columns[10][data]': 'mostdeaths','columns[10][name]': '','columns[10][searchable]': 'true','columns[10][orderable]': 'true','columns[10][search][value]': '','columns[10][search][regex]': 'false','columns[11][data]': 'assistsPergame','columns[11][name]': '','columns[11][searchable]': 'true','columns[11][orderable]': 'true','columns[11][search][value]': '','columns[11][search][regex]': 'false','columns[12][data]': 'mostassists','columns[12][name]': '','columns[12][searchable]': 'true','columns[12][orderable]': 'true','columns[12][search][value]': '','columns[12][search][regex]': 'false','columns[13][data]': 'goldsPermin','columns[13][name]': '','columns[13][searchable]': 'true','columns[13][orderable]': 'true','columns[13][search][value]': '','columns[13][search][regex]': 'false','columns[14][data]': 'lasthitPermin','columns[14][name]': '','columns[14][searchable]': 'true','columns[14][orderable]': 'true','columns[14][search][value]': '','columns[14][search][regex]': 'false','columns[15][data]': 'damagetoheroPermin','columns[15][name]': '','columns[15][searchable]': 'true','columns[15][orderable]': 'true','columns[15][search][value]': '','columns[15][search][regex]': 'false','columns[16][data]': 'damagetoheroPercent','columns[16][name]': '','columns[16][searchable]': 'true','columns[16][orderable]': 'true','columns[16][search][value]': '','columns[16][search][regex]': 'false','columns[17][data]': 'damagetakenPermin','columns[17][name]': '','columns[17][searchable]': 'true','columns[17][orderable]': 'true','columns[17][search][value]': '','columns[17][search][regex]': 'false','columns[18][data]': 'damagetakenPercent','columns[18][name]': '','columns[18][searchable]': 'true','columns[18][orderable]': 'true','columns[18][search][value]': '','columns[18][search][regex]': 'false','columns[19][data]': 'wardsplacedPermin','columns[19][name]': '','columns[19][searchable]': 'true','columns[19][orderable]': 'true','columns[19][search][value]': '','columns[19][search][regex]': 'false','columns[20][data]': 'wardskilledPermin','columns[20][name]': '','columns[20][searchable]': 'true','columns[20][orderable]': 'true','columns[20][search][value]': '','columns[20][search][regex]': 'false','order[0][column]': '4','order[0][dir]': 'desc','start': '0','length': '20','search[value]': '','search[regex]': 'false','area': '','eid': '1065','type': 'player','gametype': '2','filter': '{"team":{},"player":{},"meta":{}}',}# 字典转换为 k1 = v1 & k2 = v2data = parse.urlencode(formdata)# print(data)r = requests.post(url, cookies=myCookies, data=data, headers=headers, allow_redirects=False)r.encoding = r.apparent_encodingc = r.text# print("11111内容如下:----------------------------------------")if len(c) < 100:print('获取失败,重新获取!')return Falseprint('获取成功!')l = json.loads(c)['data']for i in l[:20]:print('队伍编号: {0} 队伍名: {1} 玩家名称: {2}'.format(['teamid'], i['teamname'], i['playername']))return Truedef cookie_to_dic(mycookie):dic = {}for i in mycookie.split('; '):dic[i.split('=')[0]] = i.split('=')[1]return dicif __name__ == '__main__':while 1:ok = get_competition()if ok is True:break#    test()

    第二个LOL网页数据爬取

    http://lol.admin.pentaq.com/

    没有任何反爬和csrf-token认证:

    from faker import Factoryimport requestsimport jsonf = Factory.create()def fun():url = 'http://lol.admin.pentaq.com/api/tournament_team_data?tour=29&patch='headers = {'user-agent': f.user_agent()}r = requests.get(url, headers=headers)r.encoding = r.apparent_encodingc = r.textr.close()l = json.loads(c)['data']['teams_data']for i in l[:20]:print("队伍名称: {0} 队伍ID:{1} win:{2}".format(i['team_full_name'], i['team_id'], i['win']))if __name__ == '__main__':fun()

    第三个LOL网页数据爬取

    http://www.op.gg/champion/statistics

    采用BeautifulSoup 即可。

    from faker import Factoryimport requestsfrom bs4 import BeautifulSoupf = Factory.create()def fun():url = 'http://www.op.gg/champion/statistics'headers = {'user-agent': f.user_agent(),'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8'"}r = requests.get(url, headers=headers)r.encoding = r.apparent_encodingif r.status_code != 200:return Falsec = r.textr.close()# print(c)if len(c) < 10000:return Falsehtml = BeautifulSoup(c, 'html.parser')l = html.find('tbody', class_='tabItem champion-trend-tier-TOP').find_all('tr')for x in l[:5]:a = x.find_all('td')tmp = a[3]b = tmp.find_all('div')name = b[0].textpos = b[1].text.replace('\\t','').replace('\\n','')print('rank: {0} name: {1} pos:{2} 胜率:{3} 登场率:{4}'.format(a[0].text, name, pos, a[4].text, a[5].text))return True# for c in l[:20]:#     a = c.find_all('td')#     tmp  = a[3]#     b = tmp.find_all('div')#     name  = b[0].text#     pos =  b[1].text#     print('rank: {0] name: {1} pos:{2} 胜率:{3} 登场率:{4}'.format(a[0].text,name,pos,a[4].text,a[5].text))if __name__ == '__main__':while True:ok = fun()if ok:break

    4.多线程爬取LOL英雄皮肤图片

    1.获取对应英雄url 列表,函数get_url_list()

    2.下载对应的图片保存到文件夹download()

    3.main()开启多线程执行爬取任务

    import requestsimport jsonimport osfrom faker import Factoryfrom multiprocessing.dummy import Pool as ThreadPoolimport timef = Factory.create()headers = {'user-agent': f.user_agent()}def get_url_list():url = 'https://game.gtimg.cn/images/lol/act/img/js/heroList/hero_list.js'r = requests.get(url, headers=headers)r.encoding = r.apparent_encodingc = r.textHeros = json.loads(c)["hero"]  # 156个hero信息idList = []for hero in Heros:hero_id = hero["heroId"]idList.append(hero_id)# print(idList)def spider(url):r = requests.get(url, headers=headers)r.encoding = r.apparent_encodingc = r.textr.close()res_dict = json.loads(c)skins = res_dict["skins"]  # 15个hero信息for index, hero in enumerate(skins):  # 这里使用到enumerate获取下标,以便文件图片命名;item = {}  # 字典对象item['name'] = hero["heroName"]item['skin_name'] = hero["name"]if hero["mainImg"] == '':continueitem['imgLink'] = hero["mainImg"]#   print(item)download(index + 1, item)def download(index, contdict):name = contdict['name']path = "皮肤/" + nameif not os.path.exists(path):os.makedirs(path)content = requests.get(contdict['imgLink'], headers=headers).contentwith open('./皮肤/' + name + '/' + contdict['skin_name'] + str(index) + '.jpg', 'wb') as f:f.write(content)def main():start = time.time()pool = ThreadPool(6)page = []for i in range(1, 11):newpage = 'https://game.gtimg.cn/images/lol/act/img/js/hero/{}.js'.format(i)print(newpage)page.append(newpage)result = pool.map(spider, page)pool.close()pool.join()end = time.time()print('用时:', end-start)if __name__ == '__main__':main()


    赞(0) 打赏
    未经允许不得转载:爱站程序员基地 » 英雄联盟Python爬虫