python预测下一期双色球号码【机器学习】
Task:
1. 在福彩官网抓取所有的历史双色球数据。
2. 使用历史数据进行继续学习。
Part1 数据抓取
main.py
from mylog import MyLog as mylog
from save2excel import SavaBallDate
这两个模块四用来显示log和保存为excel数据。
运行脚本后,会在本地生成一个excel文件,保存为一下的格式:
第一步完成。
Part2 机器学习进行预测
2018-02-19
待续,后面继续更新
原文链接:
http://30daydo.com/article/277
1. 在福彩官网抓取所有的历史双色球数据。
2. 使用历史数据进行继续学习。
Part1 数据抓取
main.py
import re
from bs4 import BeautifulSoup
import urllib2
from mylog import MyLog as mylog
from save2excel import SavaBallDate
class DoubleColorBallItem(object):
date = None
order = None
red1 = None
red2 = None
red3 = None
red4 = None
red5 = None
red6 = None
blue = None
money = None
firstPrize = None
secondPrize = None
class GetDoubleColorBallNumber(object):
'''这个类用于获取双色球中奖号码, 返回一个txt文件
'''
def __init__(self):
self.urls =
self.log = mylog()
self.getUrls()
self.items = self.spider(self.urls)
self.pipelines(self.items)
self.log.info('beging save data to excel \r\n')
SavaBallDate(self.items)
self.log.info('save data to excel end ...\r\n')
def getUrls(self):
'''获取数据来源网页
'''
URL = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_1.html'
htmlContent = self.getResponseContent(URL)
soup = BeautifulSoup(htmlContent, 'lxml')
tag = soup.find_all(re.compile('p'))[-1]
pages = tag.strong.get_text()
for i in xrange(1, int(pages)+1):
url = r'http://kaijiang.zhcw.com/zhcw/html/ssq/list_' + str(i) + '.html'
self.urls.append(url)
self.log.info(u'添加URL:%s 到URLS \r\n' %url)
def getResponseContent(self, url):
'''这里单独使用一个函数返回页面返回值,是为了后期方便的加入proxy和headers等
'''
try:
response = urllib2.urlopen(url.encode('utf8'))
except:
self.log.error(u'Python 返回URL:%s 数据失败 \r\n' %url)
else:
self.log.info(u'Python 返回URUL:%s 数据成功 \r\n' %url)
return response.read()
def spider(self,urls):
'''这个函数的作用是从获取的数据中过滤得到中奖信息
'''
items =
for url in urls:
htmlContent = self.getResponseContent(url)
soup = BeautifulSoup(htmlContent, 'lxml')
tags = soup.find_all('tr', attrs={})
for tag in tags:
if tag.find('em'):
item = DoubleColorBallItem()
tagTd = tag.find_all('td')
item.date = tagTd[0].get_text()
item.order = tagTd[1].get_text()
tagEm = tagTd[2].find_all('em')
item.red1 = tagEm[0].get_text()
item.red2 = tagEm[1].get_text()
item.red3 = tagEm[2].get_text()
item.red4 = tagEm[3].get_text()
item.red5 = tagEm[4].get_text()
item.red6 = tagEm[5].get_text()
item.blue = tagEm[6].get_text()
item.money = tagTd[3].find('strong').get_text()
item.firstPrize = tagTd[4].find('strong').get_text()
item.secondPrize = tagTd[5].find('strong').get_text()
items.append(item)
self.log.info(u'获取日期为:%s 的数据成功' %(item.date))
return items
def pipelines(self,items):
fileName = u'双色球.txt'.encode('GBK')
with open(fileName, 'w') as fp:
for item in items:
fp.write('%s %s \t %s %s %s %s %s %s %s \t %s \t %s %s \n'
%(item.date,item.order,item.red1,item.red2,item.red3,item.red4,item.red5,item.red6,item.blue,item.money,item.firstPrize,item.secondPrize))
self.log.info(u'将日期为:%s 的数据存入"%s"...' %(item.date, fileName.decode('GBK')))
if __name__ == '__main__':
GDCBN = GetDoubleColorBallNumber()
from mylog import MyLog as mylog
from save2excel import SavaBallDate
这两个模块四用来显示log和保存为excel数据。
import logging
import getpass
import sys
#### 定义MyLog类
class MyLog(object):
#### 类MyLog的构造函数
def __init__(self):
self.user = getpass.getuser()
self.logger = logging.getLogger(self.user)
self.logger.setLevel(logging.DEBUG)
#### 日志文件名
self.logFile = sys.argv[0][0:-3] + '.log'
self.formatter = logging.Formatter('%(asctime)-12s %(levelname)-8s %(name)-10s %(message)-12s\r\n')
#### 日志显示到屏幕上并输出到日志文件内
self.logHand = logging.FileHandler(self.logFile, encoding='utf8')
self.logHand.setFormatter(self.formatter)
self.logHand.setLevel(logging.DEBUG)
self.logHandSt = logging.StreamHandler()
self.logHandSt.setFormatter(self.formatter)
self.logHandSt.setLevel(logging.DEBUG)
self.logger.addHandler(self.logHand)
self.logger.addHandler(self.logHandSt)
#### 日志的5个级别对应以下的5个函数
def debug(self,msg):
self.logger.debug(msg)
def info(self,msg):
self.logger.info(msg)
def warn(self,msg):
self.logger.warn(msg)
def error(self,msg):
self.logger.error(msg)
def critical(self,msg):
self.logger.critical(msg)
if __name__ == '__main__':
mylog = MyLog()
mylog.debug(u"I'm debug 测试中文")
mylog.info("I'm info")
mylog.warn("I'm warn")
mylog.error(u"I'm error 测试中文")
mylog.critical("I'm critical")
import xlwt
class SavaBallDate(object):
def __init__(self, items):
self.items = items
self.run(self.items)
def run(self,items):
fileName = u'双色球.xls'.encode('GBK')
book = xlwt.Workbook(encoding='utf8')
sheet=book.add_sheet('ball', cell_overwrite_ok=True)
sheet.write(0, 0, u'开奖日期'.encode('utf8'))
sheet.write(0, 1, u'期号'.encode('utf8'))
sheet.write(0, 2, u'红1'.encode('utf8'))
sheet.write(0, 3, u'红2'.encode('utf8'))
sheet.write(0, 4, u'红3'.encode('utf8'))
sheet.write(0, 5, u'红4'.encode('utf8'))
sheet.write(0, 6, u'红5'.encode('utf8'))
sheet.write(0, 7, u'红6'.encode('utf8'))
sheet.write(0, 8, u'蓝'.encode('utf8'))
sheet.write(0, 9, u'销售金额'.encode('utf8'))
sheet.write(0, 10, u'一等奖'.encode('utf8'))
sheet.write(0, 11, u'二等奖'.encode('utf8'))
i = 1
while i <= len(items):
item = items[i-1]
sheet.write(i, 0, item.date)
sheet.write(i, 1, item.order)
sheet.write(i, 2, item.red1)
sheet.write(i, 3, item.red2)
sheet.write(i, 4, item.red3)
sheet.write(i, 5, item.red4)
sheet.write(i, 6, item.red5)
sheet.write(i, 7, item.red6)
sheet.write(i, 8, item.blue)
sheet.write(i, 9, item.money)
sheet.write(i, 10, item.firstPrize)
sheet.write(i, 11, item.secondPrize)
i += 1
book.save(fileName)
if __name__ == '__main__':
pass
运行脚本后,会在本地生成一个excel文件,保存为一下的格式:
第一步完成。
Part2 机器学习进行预测
2018-02-19
待续,后面继续更新
原文链接:
http://30daydo.com/article/277