python获取涨停板历史数据

获取A股每天的涨停板数据,起始日期和截止日期都可以自定义。
这个数据可以用来后续的大数据分析,比如统计每天涨停板的数目和大盘指数的相关性,涨停打开次数与当日人气的强弱的关系。
 

Screenshot_from_2018-04-23_20-31-13.png
点击查看大图

python代码:
# -*- coding=utf-8 -*-
import datetime

__author__ = 'Rocky'
'''
http://30daydo.com
Contact: weigesysu@qq.com
'''
# 每天的涨跌停
import urllib2, re, time, xlrd, xlwt, sys, os
import setting
import pandas as pd
import tushare as ts
from setting import LLogger
reload(sys)
sys.setdefaultencoding('gbk')

logger = LLogger('zdt.log')
class GetZDT:
def __init__(self,current):
self.user_agent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/64.0.3282.167 Chrome/64.0.3282.167 Safari/537.36"
# self.today = time.strftime("%Y%m%d")
self.today=current
self.path = os.path.join(os.path.dirname(__file__), 'data')
self.zdt_url = 'http://home.flashdata2.jrj.com.cn/limitStatistic/ztForce/' + self.today + ".js"
self.zrzt_url = 'http://hqdata.jrj.com.cn/zrztjrbx/limitup.js'

self.host = "home.flashdata2.jrj.com.cn"
self.reference = "http://stock.jrj.com.cn/tzzs/z ... ot%3B

self.header_zdt = {"User-Agent": self.user_agent,
"Host": self.host,
"Referer": self.reference}

self.zdt_indexx = [u'代码', u'名称', u'最新价格', u'涨跌幅', u'封成比', u'封流比', u'封单金额', u'最后一次涨停时间', u'第一次涨停时间', u'打开次数',
u'振幅',
u'涨停强度']

self.zrzt_indexx = [u'序号', u'代码', u'名称', u'昨日涨停时间', u'最新价格', u'今日涨幅', u'最大涨幅', u'最大跌幅', u'是否连板', u'连续涨停次数',
u'昨日涨停强度', u'今日涨停强度', u'是否停牌', u'昨天的日期', u'昨日涨停价', u'今日开盘价格', u'今日开盘涨幅']
self.header_zrzt = {"User-Agent": self.user_agent,
"Host": "hqdata.jrj.com.cn",
"Referer": "http://stock.jrj.com.cn/tzzs/zrztjrbx.shtml"
}

def getdata(self, url, headers, retry=5):
req = urllib2.Request(url=url, headers=headers)
for i in range(retry):
try:
resp = urllib2.urlopen(req,timeout=20)
content = resp.read()
md_check = re.findall('summary|lasttradedate',content)
if content and len(md_check)>0:
return content
else:
time.sleep(60)
logger.log('failed to get content, retry: {}'.format(i))
continue
except Exception, e:
logger.log(e)
time.sleep(60)
continue
return None

def convert_json(self, content):
p = re.compile(r'"Data":(.*)};', re.S)
if len(content)<=0:
logger.log('Content\'s length is 0')
exit(0)
result = p.findall(content)
if result:
try:
# print result
t1 = result[0]
t2 = list(eval(t1))
return t2
except Exception,e:
logger.log(e)
return None
else:
return None


def save_to_dataframe(self, data, indexx, choice, post_fix):
engine = setting.get_engine('db_zdt')
if not data:
exit()
data_len = len(data)
if choice == 1:
for i in range(data_len):
data[i][choice] = data[i][choice].decode('gbk')

df = pd.DataFrame(data, columns=indexx)

filename = os.path.join(self.path, self.today + "_" + post_fix + ".xls")
if choice == 1:
df[u'今天的日期']=self.today
df.to_excel(filename, encoding='gbk')
try:
df.to_sql(self.today + post_fix, engine, if_exists='fail')
except Exception,e:
logger.log(e)


def storedata(self):
zdt_content = self.getdata(self.zdt_url, headers=self.header_zdt)
logger.log('zdt Content'+zdt_content)
zdt_js = self.convert_json(zdt_content)
self.save_to_dataframe(zdt_js, self.zdt_indexx, 1, 'zdt')
time.sleep(5)

if __name__ == '__main__':
date_list = [datetime.datetime.strftime(i,'%Y%m%d') for i in list(pd.date_range('20170401','20171231'))]
for today in date_list:

if not ts.is_holiday(datetime.datetime.strptime(today,'%Y%m%d').strftime('%Y-%m-%d')):
print today
obj = GetZDT(today)
obj.storedata()
else:
logger.log('Holiday')
[/i][/i]

原创。
转载请注明出处。

0 个评论

要回复文章请先登录注册