时隔多年,开盘啦APP龙虎榜席位标签爬虫,再上路,代码如下,非专业开发,很业余,数据解析存储中间还有很多不到位的地方,欢迎留言交流:
# -*- coding:utf-8 -*- import pymysql import datetime import pandas as pd import akshare as ak import requests import json import pymysql from sqlalchemy import create_engine def spider_lhb_sales_department(date, code): try: url = 'https://lhb.kaipanla.com/w1/api/index.php?apiv=w28&PhoneOSNew=1&VerSion=5.2.0.1 HTTP/1.1' data = { 'c': 'Stock', 'a': 'GetNewOneStockInfo', 'Type': 0, 'Time': date, 'StockID': code, 'DeviceID': 'ffffffff-f916-2186-0000-00000cdf9093' } headers = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 7.1.2; VOG-AL00 Build/N2G48H; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/68.0.3440.70 Mobile Safari/537.36;kaipanla 5.2.0.1', } response = eval(requests.post(url=url, data=data, headers=headers).text).get('List')[0] # 开始解析 SellList = response.get('SellList') BuyList = response.get('BuyList') df = pd.DataFrame(columns=['营业部ID', '营业部名称', '营业部标签', '资金席位ID', '资金席位名称']) for i in SellList: ID = i.get('ID') Name = i.get('Name') YouZiIcon = i.get('YouZiIcon') GroupID = i.get('GroupID') GroupIcon = i.get('GroupIcon') dict1 = { '营业部ID': ID, '营业部名称': Name, '营业部标签': YouZiIcon, '资金席位ID': GroupID, '资金席位名称': GroupIcon } df1 = pd.DataFrame(dict1) df = df.append(df1, ignore_index=False) for j in BuyList: ID = j.get('ID') Name = j.get('Name') YouZiIcon = j.get('YouZiIcon') GroupID = j.get('GroupID') GroupIcon = j.get('GroupIcon') dict2 = { '营业部ID': ID, '营业部名称': Name, '营业部标签': YouZiIcon, '资金席位ID': GroupID, '资金席位名称': GroupIcon } df2 = pd.DataFrame(dict2) df = df.append(df2, ignore_index=False) print(df) df.to_sql('ods_basic_department_info', con=engine1, if_exists='append', index=False) except Exception as error: pass if __name__ == '__main__': # 定义日期 today = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d') # engine engine1 = create_engine('mysql+pymysql://root:123456@localhost/stock_ods_db?charset=utf8') engine2 = create_engine('mysql+pymysql://root:123456@localhost/stock_dwd_db?charset=utf8') # 获取龙虎榜名单 lhb_df = pd.read_sql('select distinct t_date,v_code from dwd_stock_special_lhb', con=engine2) for date in lhb_df['t_date'].values: for code in lhb_df['v_code'].values: print('开始') spider_lhb_sales_department(date, code) print('结束')