30天尝试新事情

python多重继承的super调用父类的兄弟类

python • 李魔佛发表了文章 • 0 个评论 • 2595 次浏览 • 2020-11-26 19:04 • 来自相关话题

先看一段代码：class A:
def __init__(self):
print('A init')
print(self)

class B:
def __init__(self):
print('B init')
print(self)

class C:
def __init__(self):
print('C init')
print(self)

class D(C, B, A):
def __init__(self):
super(A, self).__init__()
super(C, self).__init__()
super(B, self).__init__()
print('D ')

def main():
d = D()
看输出的是什么：B init
<__main__.D object at 0x00000000026365B0>
A init
<__main__.D object at 0x00000000026365B0>
D init-
为什么不输出C init ？

那么这个药从super的函数实现说起：def super(class, obj):
mro_list = obj.__class__.mro()
next_parent_class = mro_list[mro_list.index(class)+1]
return next_parent_class
super函数中，mro_list得到的是类的mro列表，mro列表就是类的继承有序关系图
比如上面代码中
C, B, A 在D中的mro是下面这样的。[<class '__main__.D'>, <class '__main__.C'>, <class '__main__.B'>, <class '__main__.A'>, <class 'object'>]

你提供的类名class传入来后，比如是C，那么找到C所在的index，然后加1后的class，也就是B，所以super（C,self）.__init__()
实际调用的是B.__init__()

另外，如果在多重继承中，要调用父类的父类的父类。。。。，
可以直接用改类的类名就可以# 多重继承

class A:
def __init__(self):
print('A init')
print(self)

def fun(self):
print('A',self)

class B(A):
def __init__(self):
print('B init')
print(self)

def fun(self):
print('B', self)

class C(B):

def __init__(self):
print('C init')
print(self)

def fun(self):
print('C', self)

class X:

def fun(self):
print('X',self)

class D(C):
def __init__(self):
# super(B, self).__init__() # super(D) -> 指向了C
print('D init')

def fun(self):
C.fun(self)
B.fun(self)
A.fun(self)
X.fun(self)

def main():
d = D()
print(d.__class__.mro())
d.fun()

if __name__ == '__main__':
main()

结果：D init
[<class '__main__.D'>, <class '__main__.C'>, <class '__main__.B'>, <class '__main__.A'>, <class 'object'>]
C <__main__.D object at 0x00000000025F6700>
B <__main__.D object at 0x00000000025F6700>
A <__main__.D object at 0x00000000025F6700>
X <__main__.D object at 0x00000000025F6700>
其实super和父类子类没什么关系， super关系的是mro里面的顺序。

原文链接：http://30daydo.com/article/44107
转载请注明出处
查看全部

先看一段代码：

class A:

    def __init__(self):

        print('A init')

        print(self)





class B:

    def __init__(self):

        print('B init')

        print(self)





class C:

    def __init__(self):

        print('C init')

        print(self)





class D(C, B, A):

    def __init__(self):

        super(A, self).__init__()

        super(C, self).__init__()

        super(B, self).__init__()

        print('D ')





def main():

    d = D()

看输出的是什么：

B init

<__main__.D object at 0x00000000026365B0>

A init

<__main__.D object at 0x00000000026365B0>

D init-

为什么不输出C init ？

那么这个药从super的函数实现说起：

def super(class, obj):

    mro_list = obj.__class__.mro()

    next_parent_class = mro_list[mro_list.index(class)+1]

    return next_parent_class

super函数中，mro_list得到的是类的mro列表，mro列表就是类的继承有序关系图
比如上面代码中
C, B, A 在D中的mro是下面这样的。

[<class '__main__.D'>, <class '__main__.C'>, <class '__main__.B'>, <class '__main__.A'>, <class 'object'>]

你提供的类名class传入来后，比如是C，那么找到C所在的index，然后加1后的class，也就是B，所以super（C,self）.__init__()
实际调用的是B.__init__()

另外，如果在多重继承中，要调用父类的父类的父类。。。。，
可以直接用改类的类名就可以

# 多重继承





class A:

    def __init__(self):

        print('A init')

        print(self)



    def fun(self):

        print('A',self)



class B(A):

    def __init__(self):

        print('B init')

        print(self)



    def fun(self):

        print('B', self)





class C(B):



    def __init__(self):

        print('C init')

        print(self)



    def fun(self):

        print('C', self)



class X:



    def fun(self):

        print('X',self)



class D(C):

    def __init__(self):

        # super(B, self).__init__() # super(D) -> 指向了C

        print('D init')



    def fun(self):

        C.fun(self)

        B.fun(self)

        A.fun(self)

        X.fun(self)







def main():

    d = D()

    print(d.__class__.mro())

    d.fun()



if __name__ == '__main__':

    main()

结果：

D init

[<class '__main__.D'>, <class '__main__.C'>, <class '__main__.B'>, <class '__main__.A'>, <class 'object'>]

C <__main__.D object at 0x00000000025F6700>

B <__main__.D object at 0x00000000025F6700>

A <__main__.D object at 0x00000000025F6700>

X <__main__.D object at 0x00000000025F6700>

其实super和父类子类没什么关系， super关系的是mro里面的顺序。

原文链接：http://30daydo.com/article/44107
转载请注明出处

python pathspec 库的作用

python • 李魔佛发表了文章 • 0 个评论 • 2402 次浏览 • 2020-11-25 13:28 • 来自相关话题

作为路径匹配用的。

看以下实例：
def get_ignore_matches():
# 排除文件
global ignore_matches
ignore_file = os.path.join(os.path.abspath(os.curdir), '.gitignore')
if not os.path.exists(ignore_file):
return None
if ignore_matches is not None:
return ignore_matches
with open(ignore_file, 'r') as fh:
spec = pathspec.PathSpec.from_lines('gitwildmatch', fh)
ignore_matches = spec
return ignore_matches

def is_ignored(file_name: str) -> bool:
# 匹配就ignore
matches = get_ignore_matches()
if matches is None:
return False
return matches.match_file(file_name)

gitignore文件里面的内容就会被匹配到
.idea/
build/
dist/
venv/
*.pyc
__pycache__/
*.egg-info/
tmp/ 查看全部

作为路径匹配用的。

看以下实例：

def get_ignore_matches():

    # 排除文件

    global ignore_matches

    ignore_file = os.path.join(os.path.abspath(os.curdir), '.gitignore')

    if not os.path.exists(ignore_file):

        return None

    if ignore_matches is not None:

        return ignore_matches

    with open(ignore_file, 'r') as fh:

        spec = pathspec.PathSpec.from_lines('gitwildmatch', fh)

    ignore_matches = spec

    return ignore_matches





def is_ignored(file_name: str) -> bool:

    # 匹配就ignore

    matches = get_ignore_matches()

    if matches is None:

        return False

    return matches.match_file(file_name)

gitignore文件里面的内容就会被匹配到

.idea/

build/

dist/

venv/

*.pyc

__pycache__/

*.egg-info/

tmp/

asyncio 异步爬取vs requests同步爬取性能对比

python爬虫 • 李魔佛发表了文章 • 0 个评论 • 2575 次浏览 • 2020-11-25 11:21 • 来自相关话题

首先是异步爬取：
import sys
sys.path.append('..')
import asyncio
import datetime
import aiohttp
import re
import time
from parsel import Selector
from configure.settings import DBSelector
from common.BaseService import BaseService

SLEEP = 2

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'}

URL_MAP = {'home_page': 'https://holdle.com/stocks/industry', 'base': 'https://holdle.com'}

class AsyncMongo():
def __init__(self):
self.DB = DBSelector()
self.client = self.DB.mongo(location_type='qq', async_type=True)
self.db = self.client['db_stock']

async def update(self, table,data):
self.doc= self.db[table]
await self.doc.insert_many(data)

class Holdle(BaseService):

def __init__(self):
super(Holdle, self).__init__()
self.data_processor = AsyncMongo()
self.tables_list =['ROE','Cash_Ratio','Gross_Margin','Operation_Margin','Net_Profit_Ratio','Dividend_ratio']

async def home_page(self):
start = time.time()
async with aiohttp.ClientSession() as session:
async with session.get(url=URL_MAP['home_page'], headers=headers) as response:
html = await response.text() # 这个阻塞
resp = Selector(text=html)
industries = resp.xpath('//ul[@class="list-unstyled"]/a')
task_list = []
for industry in industries:
json_data = {}
industry_url = industry.xpath('.//@href').extract_first()
industry_name = industry.xpath('.//li/text()').extract_first()
industry_name = industry_name.replace('-', '').strip()
json_data['industry_url'] = industry_url
json_data['industry_name'] = industry_name

task = asyncio.ensure_future(self.detail_list(session, industry_url, json_data))
task_list.append(task)

await asyncio.gather(*task_list)
end = time.time()

print(f'time used {end - start}')

async def detail_list(self, session, url, json_data):

async with session.get(URL_MAP['base'] + url, headers=headers) as response:
response = await response.text()
await self.parse_detail(response, json_data)

async def parse_detail(self, html, json_data=None):
resp = Selector(text=html)
industry=json_data['industry_name']
tables = resp.xpath('//table[@class="table table-bordered"]')
if len(tables)!=6:
raise ValueError

for index,table in enumerate(self.tables_list):
rows = tables[index].xpath('.//tr')
result = []
for row in rows[1:]:
stock_name = row.xpath('.//td[1]/text()').extract_first()
value = row.xpath('.//td[2]/text()').extract_first()
value = float(value)
d={'industry':industry,'name':stock_name,'value':value,'crawltime':datetime.datetime.now()}
result.append(d)
await self.data_processor.update(table,result)

app = Holdle()
loop = asyncio.get_event_loop()
loop.run_until_complete(app.home_page())
爬完并且入库，用时大约为35s

使用requests爬取
# -*- coding: utf-8 -*-
# @Time : 2020/11/24 21:42
# @File : sync_spider.py
# @Author : Rocky C@www.30daydo.com
import requests
import sys
sys.path.append('..')
import asyncio
import datetime
import aiohttp
import re
import time
from parsel import Selector
from configure.settings import DBSelector
from common.BaseService import BaseService

SLEEP = 2

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'}

URL_MAP = {'home_page': 'https://holdle.com/stocks/industry', 'base': 'https://holdle.com'}

class Holdle(BaseService):

def __init__(self):
super(Holdle, self).__init__()

self.DB = DBSelector()
self.client = self.DB.mongo(location_type='qq', async_type=True)
self.session = requests.Session()

def run(self):
start = time.time()

response = self.session.get(url=URL_MAP['home_page'], headers=headers)
html = response.text # 这个阻塞
resp = Selector(text=html)
industries = resp.xpath('//ul[@class="list-unstyled"]/a')
for industry in industries:
json_data = {}
industry_url = industry.xpath('.//@href').extract_first()
industry_name = industry.xpath('.//li/text()').extract_first()
json_data['industry_url'] = industry_url
json_data['industry_name'] = industry_name
self.detail_list(industry_url, json_data)

end = time.time()
print(f'time used {end-start}')

def detail_list(self, url, json_data):

response = self.session.get(URL_MAP['base']+url, headers=headers)
response =response.text
self.parse_detail(response, json_data)

def parse_detail(self, html, json_data=None):
resp = Selector(text=html)
title =resp.xpath('//title/text()').extract_first()
print(title)

app = Holdle()
app.run()
用时约160s，而且这里还省略了mongo入库的时间。上面异步爬取里面包含了异步存入mongo。

所以单从网络IO性能上来说，异步是比纯同步要快很多。
但是，async的生态做得不是太好，第三方的异步框架做得也不够完善。

因为如果系统中引入了异步，很多耗时的地方也是需要使用异步的写法和框架，不然会导致系统的控制权没有被正确转移。

水文一篇。
完毕
查看全部

首先是异步爬取：

import sys

sys.path.append('..')

import asyncio

import datetime

import aiohttp

import re

import time

from parsel import Selector

from configure.settings import DBSelector

from common.BaseService import BaseService



SLEEP = 2



headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',

           'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'}



URL_MAP = {'home_page': 'https://holdle.com/stocks/industry', 'base': 'https://holdle.com'}





class AsyncMongo():

    def __init__(self):

        self.DB = DBSelector()

        self.client = self.DB.mongo(location_type='qq', async_type=True)

        self.db = self.client['db_stock']



    async def update(self, table,data):

        self.doc= self.db[table]

        await self.doc.insert_many(data)





class Holdle(BaseService):



    def __init__(self):

        super(Holdle, self).__init__()

        self.data_processor = AsyncMongo()

        self.tables_list =['ROE','Cash_Ratio','Gross_Margin','Operation_Margin','Net_Profit_Ratio','Dividend_ratio']



    async def home_page(self):

        start = time.time()

        async with aiohttp.ClientSession() as session:

            async with session.get(url=URL_MAP['home_page'], headers=headers) as response:

                html = await response.text()  # 这个阻塞

                resp = Selector(text=html)

                industries = resp.xpath('//ul[@class="list-unstyled"]/a')

                task_list = []

                for industry in industries:

                    json_data = {}

                    industry_url = industry.xpath('.//@href').extract_first()

                    industry_name = industry.xpath('.//li/text()').extract_first()

                    industry_name = industry_name.replace('-', '').strip()

                    json_data['industry_url'] = industry_url

                    json_data['industry_name'] = industry_name



                    task = asyncio.ensure_future(self.detail_list(session, industry_url, json_data))

                    task_list.append(task)



                await asyncio.gather(*task_list)

                end = time.time()



                print(f'time used {end - start}')



    async def detail_list(self, session, url, json_data):



        async with session.get(URL_MAP['base'] + url, headers=headers) as response:

            response = await response.text()

            await self.parse_detail(response, json_data)



    async def parse_detail(self, html, json_data=None):

            resp = Selector(text=html)

            industry=json_data['industry_name']

            tables = resp.xpath('//table[@class="table table-bordered"]')

            if len(tables)!=6:

                raise ValueError



            for index,table in enumerate(self.tables_list):

                rows = tables[index].xpath('.//tr')

                result = []

                for row in rows[1:]:

                    stock_name = row.xpath('.//td[1]/text()').extract_first()

                    value = row.xpath('.//td[2]/text()').extract_first()

                    value = float(value)

                    d={'industry':industry,'name':stock_name,'value':value,'crawltime':datetime.datetime.now()}

                    result.append(d)

                await self.data_processor.update(table,result)





app = Holdle()

loop = asyncio.get_event_loop()

loop.run_until_complete(app.home_page())

爬完并且入库，用时大约为35s

使用requests爬取

# -*- coding: utf-8 -*-

# @Time : 2020/11/24 21:42

# @File : sync_spider.py

# @Author : Rocky C@www.30daydo.com

import requests

import sys

sys.path.append('..')

import asyncio

import datetime

import aiohttp

import re

import time

from parsel import Selector

from configure.settings import DBSelector

from common.BaseService import BaseService



SLEEP = 2



headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',

           'Accept-Language': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2'}



URL_MAP = {'home_page': 'https://holdle.com/stocks/industry', 'base': 'https://holdle.com'}





class Holdle(BaseService):



    def __init__(self):

        super(Holdle, self).__init__()



        self.DB = DBSelector()

        self.client = self.DB.mongo(location_type='qq', async_type=True)

        self.session = requests.Session()



    def run(self):

        start = time.time()



        response = self.session.get(url=URL_MAP['home_page'], headers=headers)

        html =  response.text  # 这个阻塞

        resp = Selector(text=html)

        industries = resp.xpath('//ul[@class="list-unstyled"]/a')

        for industry in industries:

            json_data = {}

            industry_url = industry.xpath('.//@href').extract_first()

            industry_name = industry.xpath('.//li/text()').extract_first()

            json_data['industry_url'] = industry_url

            json_data['industry_name'] = industry_name

            self.detail_list(industry_url, json_data)



        end = time.time()

        print(f'time used {end-start}')



    def detail_list(self, url, json_data):



        response = self.session.get(URL_MAP['base']+url, headers=headers)

        response =response.text

        self.parse_detail(response, json_data)



    def parse_detail(self, html, json_data=None):

        resp = Selector(text=html)

        title =resp.xpath('//title/text()').extract_first()

        print(title)





app = Holdle()

app.run()

用时约160s，而且这里还省略了mongo入库的时间。上面异步爬取里面包含了异步存入mongo。

所以单从网络IO性能上来说，异步是比纯同步要快很多。
但是，async的生态做得不是太好，第三方的异步框架做得也不够完善。

因为如果系统中引入了异步，很多耗时的地方也是需要使用异步的写法和框架，不然会导致系统的控制权没有被正确转移。

水文一篇。
完毕

【转载】现在都 2020 年了，普通人想好好维个权，太难了。

闲聊 • 绫波丽发表了文章 • 0 个评论 • 2098 次浏览 • 2020-11-24 12:50 • 来自相关话题

原文：
https://www.v2ex.com/t/728599#reply0
这几天在 v2 里看到租房子被商机割韭菜的，健身房跑路的，还有近期发生在自己身上的一些事情，发现维权真的太难了。当真有事情发生你自己在头上时，你连去哪里投诉解决这个事情的渠道都没有。

为了以后会有更多人上当受骗，我把自己案例写上来，希望大家别重蹈覆辙。哪怕增加一个 SEO 也 OK 了。

这家机构叫：环球网校公司主体北京环球兴学科技发展有限公司地址北京海淀区中关村南大街甲 18 号

我 11 月在这家机构缴费健康管理师，说报名 4 月考试，说的时候就告知这个是报名费，没有告知是课程费。

第二天我打客服要求退书费，我问客服 4 月份我是不是已经报上名了，客服说这个不是报名费是课程的费用，我想着 4 月份还不一定能报上，也没时间学习就跟销售申请退费了，销售说帮我申请了，后来我打客服，客服这边根本没有退费申请，现在给我开通的课程已经给我关闭了，我的课程也无法正常上课的，也不给退费，如果需要上课就需要签署协议，对于甲方来说是非常不利的协议，我不同意要求退费，这家公司就是不愿意退款，这家公司的销售就是最大的骗子，两个工号都不给的，问他工号，还问你上午中午下午，难道上中下的工号还是不一样的，最后还是不给工号。这家公司就是骗子公司，千万不要报名。

我现在的状态就是课程无法进去，一定要我签署协议，我不同意协议，客服和销售就说反正钱已经交了，你要上课协议就点同意，不想上钱也退不了。找客服，找销售，都是敷衍你。

第一承诺 4 月的名已经报上了，后来说这个是课程费，不是报名费。
第二，销售人员连工号都不给，无法保障消费者的利益。
第三，班主任现在也找不到人。第四多次联系客服，客服人员的说法不一致。

第五综合以上所有问题，我被坑了，无法信任次公司，绝对要求退款。
如果报了名的赶紧打客服要求退费，在打 010-12345 市长反应。

转载程序员的一个贴。
查看全部

原文：
https://www.v2ex.com/t/728599#reply0
这几天在 v2 里看到租房子被商机割韭菜的，健身房跑路的，还有近期发生在自己身上的一些事情，发现维权真的太难了。当真有事情发生你自己在头上时，你连去哪里投诉解决这个事情的渠道都没有。

为了以后会有更多人上当受骗，我把自己案例写上来，希望大家别重蹈覆辙。哪怕增加一个 SEO 也 OK 了。

这家机构叫：环球网校公司主体北京环球兴学科技发展有限公司地址北京海淀区中关村南大街甲 18 号

我 11 月在这家机构缴费健康管理师，说报名 4 月考试，说的时候就告知这个是报名费，没有告知是课程费。

第二天我打客服要求退书费，我问客服 4 月份我是不是已经报上名了，客服说这个不是报名费是课程的费用，我想着 4 月份还不一定能报上，也没时间学习就跟销售申请退费了，销售说帮我申请了，后来我打客服，客服这边根本没有退费申请，现在给我开通的课程已经给我关闭了，我的课程也无法正常上课的，也不给退费，如果需要上课就需要签署协议，对于甲方来说是非常不利的协议，我不同意要求退费，这家公司就是不愿意退款，这家公司的销售就是最大的骗子，两个工号都不给的，问他工号，还问你上午中午下午，难道上中下的工号还是不一样的，最后还是不给工号。这家公司就是骗子公司，千万不要报名。

我现在的状态就是课程无法进去，一定要我签署协议，我不同意协议，客服和销售就说反正钱已经交了，你要上课协议就点同意，不想上钱也退不了。找客服，找销售，都是敷衍你。

第一承诺 4 月的名已经报上了，后来说这个是课程费，不是报名费。
第二，销售人员连工号都不给，无法保障消费者的利益。
第三，班主任现在也找不到人。第四多次联系客服，客服人员的说法不一致。

第五综合以上所有问题，我被坑了，无法信任次公司，绝对要求退款。
如果报了名的赶紧打客服要求退费，在打 010-12345 市长反应。

转载程序员的一个贴。

网站恢复，图片要等dns缓存一段时间才会正常出来

闲聊 • 李魔佛发表了文章 • 0 个评论 • 1989 次浏览 • 2020-11-23 17:29 • 来自相关话题

被人插入广告，导致网站停止运行1天。现在恢复正常。

待会逆向一下看看是哪位。

最近用appium写自动化撸羊毛撸得有点多

Android • 李魔佛发表了文章 • 0 个评论 • 2439 次浏览 • 2020-11-22 02:03 • 来自相关话题

还是用python写代码方便。
前阵子用autojs写，用的js开发语言，写完在手机上运行，无论稳定性，还是业务逻辑，还和在python上开发差太远，无论是功能，还是代码。

撸支付宝基金红包，就挂着等红包吧。

查看全部

还是用python写代码方便。
前阵子用autojs写，用的js开发语言，写完在手机上运行，无论稳定性，还是业务逻辑，还和在python上开发差太远，无论是功能，还是代码。

撸支付宝基金红包，就挂着等红包吧。

vimium 配合chrome 真的好用，尤其用惯vim的用户

Linux • 李魔佛发表了文章 • 0 个评论 • 1952 次浏览 • 2020-11-22 01:59 • 来自相关话题

chrome最好用的插件，没有之一哈。

上面是vimium的快捷键用法

chrome最好用的插件，没有之一哈。

上面是vimium的快捷键用法

vs code流畅是流畅，只是面对pycharm的调试与代码提示

闲聊 • 李魔佛发表了文章 • 0 个评论 • 2080 次浏览 • 2020-11-21 20:14 • 来自相关话题

还是用回了pycharm。
vs code只能是一个用来写简单应用的文本编辑器。

appium xpath获取属性clickable=true的空间

Android • 李魔佛发表了文章 • 0 个评论 • 2004 次浏览 • 2020-11-21 19:51 • 来自相关话题

推荐一下这种写法：
在找不到id，text等情况下，刚好有2个textview可以点击的，那么我们就选择这两个按钮
answer_list = self.driver.find_elements_by_xpath('//android.view.View[@clickable="true"]') 查看全部

推荐一下这种写法：
在找不到id，text等情况下，刚好有2个textview可以点击的，那么我们就选择这两个按钮

answer_list = self.driver.find_elements_by_xpath('//android.view.View[@clickable="true"]')

阿里系纯粹是自己做死不注重用户体验的结果

闲聊 • 李魔佛发表了文章 • 0 个评论 • 1976 次浏览 • 2020-11-21 12:08 • 来自相关话题

手机里除了支付宝，阿里系的软件基本不会再装。

大概几年前吧，装了淘宝，闲鱼，支付宝，天猫等app，启动其中一个app后就在后期偷偷启动其他几个阿里系的app。这个在后台app或者在日志（adb logcat -v time）里面可以看到。

正常启动也就算了关键这几个app实在太占用内存，基本占据了手机内存排行榜的前几年，所以只好把上面的app全部卸载了，留一个平时要的支付宝。

最近几年的双十一，大伙玩的叠猫猫，我基本都没打开过，支付宝老让提示下载天猫，淘宝，说送多少多少红包，呵呵，懒得鸟了。而且，支付宝永久了，存储空间会达到1.5GB，所以也要定期把app卸载干净，再重新装一次。

双十一每天就是刷猴，身边的老用户知道套路，基本也没人玩，直接pdd下单，又快又便宜。反正平时买的日用的日用品，用来收纳的，假货也不影响使用。

查看全部

手机里除了支付宝，阿里系的软件基本不会再装。

大概几年前吧，装了淘宝，闲鱼，支付宝，天猫等app，启动其中一个app后就在后期偷偷启动其他几个阿里系的app。这个在后台app或者在日志（adb logcat -v time）里面可以看到。

正常启动也就算了关键这几个app实在太占用内存，基本占据了手机内存排行榜的前几年，所以只好把上面的app全部卸载了，留一个平时要的支付宝。

最近几年的双十一，大伙玩的叠猫猫，我基本都没打开过，支付宝老让提示下载天猫，淘宝，说送多少多少红包，呵呵，懒得鸟了。而且，支付宝永久了，存储空间会达到1.5GB，所以也要定期把app卸载干净，再重新装一次。

双十一每天就是刷猴，身边的老用户知道套路，基本也没人玩，直接pdd下单，又快又便宜。反正平时买的日用的日用品，用来收纳的，假货也不影响使用。

怎么能爬取注册信息或者是访客信息？

贡献

网络 • 李魔佛回复了问题 • 1 人关注 • 1 个回复 • 2362 次浏览 • 2020-11-21 12:00 • 来自相关话题

夜深了，你们还在吗？

python • chenchen 发表了文章 • 0 个评论 • 1783 次浏览 • 2020-11-20 22:34 • 来自相关话题

夜深了，你们还在吗？？？？？？？？？？？？？？？？？？？

大家好啊，日常报道，关照关照

python • chenchen 发表了文章 • 0 个评论 • 1640 次浏览 • 2020-11-20 17:11 • 来自相关话题

大家好啊，日常报道，关照关照。。。。。。。。。。。。

异步asyncio加锁的正确用法

python • 李魔佛发表了文章 • 0 个评论 • 4619 次浏览 • 2020-11-15 10:19 • 来自相关话题

对于全局变量count进行统计加锁
import aiohttp
import asyncio
import execjs
import threading
global pages
global count

headers = {
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "en-US,en;q=0.9",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Host": "dcfm.eastmoney.com",
"Pragma": "no-cache",
"Referer": "http://data.eastmoney.com/xg/xg/default.html",
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/69.0.3497.81 Chrome/69.0.3497.81 Safari/537.36",
}

home_url = 'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?type=XGSG_LB&token=70f12f2f4f091e459a279469fe49eca5&st=purchasedate,securitycode&sr=-1&p={}&ps=50&js=var%20hsEnHLwG={{pages:(tp),data:(x)}}&rt=53512217'

loop = asyncio.get_event_loop()
# lock = threading.Lock()
lock = asyncio.Lock()
def parse_json(content):
content += ';function getV(){return hsEnHLwG;}'
ctx = execjs.compile(content)
result = ctx.call('getV')
return result

async def fetch(session,page):
global pages
global count
async with session.get(home_url.format(page),headers=headers) as resp:
# print(f'here：： {page}')
content = await resp.text()

try:
js_content = parse_json(content)
for stock_info in js_content['data']:
securityshortname = stock_info['securityshortname']
# print(securityshortname)
except Exception as e:
print(e)

async with lock:
count=count+1

print(f'count:{count}')
if count == pages:
print('End of loop')
loop.stop()

async def main():
global pages
global count
count=0
async with aiohttp.ClientSession() as session:
async with session.get(home_url.format(1), headers=headers) as resp:

content = await resp.text()
js_data = parse_json(content)
pages = js_data['pages']
print(f'pages: {pages}')
for page in range(1,pages+1):
task = asyncio.ensure_future(fetch(session,page))

await asyncio.sleep(1)

asyncio.ensure_future(main())
loop.run_forever()
1. 如果不加入锁，每次运行的结果可能不一样。
2. 不能用多线程的threading 锁，得到的每次运行结果也有可能不一样
3. 用asyncio的锁要加关键字 async
查看全部

对于全局变量count进行统计加锁

import aiohttp

import asyncio

import execjs

import threading

global pages

global count



headers = {

    "Accept": "*/*",

    "Accept-Encoding": "gzip, deflate",

    "Accept-Language": "en-US,en;q=0.9",

    "Cache-Control": "no-cache",

    "Connection": "keep-alive",

    "Host": "dcfm.eastmoney.com",

    "Pragma": "no-cache",

    "Referer": "http://data.eastmoney.com/xg/xg/default.html",

    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/69.0.3497.81 Chrome/69.0.3497.81 Safari/537.36",

}



home_url = 'http://dcfm.eastmoney.com/em_mutisvcexpandinterface/api/js/get?type=XGSG_LB&token=70f12f2f4f091e459a279469fe49eca5&st=purchasedate,securitycode&sr=-1&p={}&ps=50&js=var%20hsEnHLwG={{pages:(tp),data:(x)}}&rt=53512217'



loop = asyncio.get_event_loop()

# lock = threading.Lock()

lock = asyncio.Lock()

def parse_json(content):

    content += ';function getV(){return hsEnHLwG;}'

    ctx = execjs.compile(content)

    result = ctx.call('getV')

    return result





async def fetch(session,page):

    global pages

    global count

    async with session.get(home_url.format(page),headers=headers) as resp:

        # print(f'here：： {page}')

        content = await resp.text()



        try:

            js_content = parse_json(content)

            for stock_info in js_content['data']:

                securityshortname = stock_info['securityshortname']

                # print(securityshortname)

        except Exception as e:

            print(e)



        async with lock:

            count=count+1



        print(f'count:{count}')

        if count == pages:

            print('End of loop')

            loop.stop()







async def main():

    global pages

    global count

    count=0

    async with aiohttp.ClientSession() as session:

        async with session.get(home_url.format(1), headers=headers) as resp:



            content = await resp.text()

            js_data = parse_json(content)

            pages = js_data['pages']

            print(f'pages: {pages}')

            for page in range(1,pages+1):

                task = asyncio.ensure_future(fetch(session,page))



            await asyncio.sleep(1)





asyncio.ensure_future(main())

loop.run_forever()

1. 如果不加入锁，每次运行的结果可能不一样。
2. 不能用多线程的threading 锁，得到的每次运行结果也有可能不一样
3. 用asyncio的锁要加关键字 async

attrs() got an unexpected keyword argument 'eq'

python • 李魔佛发表了文章 • 0 个评论 • 2676 次浏览 • 2020-11-12 22:42 • 来自相关话题

xda@xda-dt:~$ pip install attrs --upgrade
Collecting attrs
Downloading https://files.pythonhosted.org ... y.whl (49kB)
100% |████████████████████████████████| 51kB 79kB/s
Installing collected packages: attrs
Found existing installation: attrs 18.2.0
Uninstalling attrs-18.2.0:
Successfully uninstalled attrs-18.2.0
Successfully installed attrs-20.3.0 查看全部

xda@xda-dt:~$ pip install attrs --upgrade
Collecting attrs
Downloading https://files.pythonhosted.org ... y.whl (49kB)
100% |████████████████████████████████| 51kB 79kB/s
Installing collected packages: attrs
Found existing installation: attrs 18.2.0
Uninstalling attrs-18.2.0:
Successfully uninstalled attrs-18.2.0
Successfully installed attrs-20.3.0

office替代软件，不是WPS，而是openoffice，因为wps的广告受不了

闲聊 • 李魔佛发表了文章 • 0 个评论 • 2136 次浏览 • 2020-11-12 09:33 • 来自相关话题

曾经有台电脑装了wps，结果每天联网弹广告。然后下载一个360杀毒的可以拦截广告，然后电脑弹出的360广告更多了。

哈哈。个人不喜欢wps，但在linux下用的比较多是openoffice，而它也有windows版本，所以在windows平台下是一个不错的替代品。至少干净。

http://www.openoffice.org/download/

查看全部

曾经有台电脑装了wps，结果每天联网弹广告。然后下载一个360杀毒的可以拦截广告，然后电脑弹出的360广告更多了。

哈哈。个人不喜欢wps，但在linux下用的比较多是openoffice，而它也有windows版本，所以在windows平台下是一个不错的替代品。至少干净。

http://www.openoffice.org/download/

windows下的分区挂载到linux下面，然后docker在上面挂载-无法识别

Linux • 李魔佛发表了文章 • 0 个评论 • 2254 次浏览 • 2020-11-11 02:40 • 来自相关话题

坑：
windows下的分区挂载到linux下面，然后docker在上面挂载-无法识别。
情况是这样的：
双系统的环境，一个ubuntu，一个windows。 ubuntu上的有个docker，但是ubuntu的分区比较小，需要挂载windows的ntfs分区来扩展空间。
其他程序用起来没什么问题，可是，在用docker 挂在windows分区下的文件夹，总是报错，用的 -v xxxx:xxxx 方式。
同样的docker启动命令，在ubuntu下的目录就没有问题，所以就开始怀疑是权限问题。
但是经过一系列的mount操作，chmod也给足了了权限，可是还没无法启动docker。
后来没办法，只好把windows分区缩小容量，把多余的容量格式化为ext分区格式，然后重新用docker mount -v, 后面docker就可以正常启动了。查看全部

坑：
windows下的分区挂载到linux下面，然后docker在上面挂载-无法识别。
情况是这样的：
双系统的环境，一个ubuntu，一个windows。 ubuntu上的有个docker，但是ubuntu的分区比较小，需要挂载windows的ntfs分区来扩展空间。
其他程序用起来没什么问题，可是，在用docker 挂在windows分区下的文件夹，总是报错，用的 -v xxxx:xxxx 方式。
同样的docker启动命令，在ubuntu下的目录就没有问题，所以就开始怀疑是权限问题。
但是经过一系列的mount操作，chmod也给足了了权限，可是还没无法启动docker。
后来没办法，只好把windows分区缩小容量，把多余的容量格式化为ext分区格式，然后重新用docker mount -v, 后面docker就可以正常启动了。

FTP被部分网络运营商屏蔽

Linux • 李魔佛发表了文章 • 0 个评论 • 2662 次浏览 • 2020-11-08 15:51 • 来自相关话题

事情神奇的很，用阿里云的windows server的IIS搭建的FTP服务器，在服务器本地用fillza ftp软件可以正常使用，在家里却一直无法显示文件和正常下载。换到我的腾讯云服务器上，也可以正常下载与显示。

开始以为是我的tomato固件的路由器的问题，后来尝试换了路由器，问题还是存在。所以我就怀疑是我的联通运营商的问题。

我的FTP无论使用主动式还是被动式连接，都会出问题。而我换成用手机热点，作为wifi让我的电脑连上去，这个时候就没有问题了。妥妥的网络问题，渣渣联通运营商。

查看全部

事情神奇的很，用阿里云的windows server的IIS搭建的FTP服务器，在服务器本地用fillza ftp软件可以正常使用，在家里却一直无法显示文件和正常下载。换到我的腾讯云服务器上，也可以正常下载与显示。

开始以为是我的tomato固件的路由器的问题，后来尝试换了路由器，问题还是存在。所以我就怀疑是我的联通运营商的问题。

我的FTP无论使用主动式还是被动式连接，都会出问题。而我换成用手机热点，作为wifi让我的电脑连上去，这个时候就没有问题了。妥妥的网络问题，渣渣联通运营商。

pyecharts绘图保存为图片适用于ssh无头浏览器运行

python • 李魔佛发表了文章 • 0 个评论 • 2450 次浏览 • 2020-11-04 22:27 • 来自相关话题

网上搜索到的答案是使用chrome driver实现的，但是本人的程序是运行在centos下的，centos下折腾chrome driver比较蛋疼，所以看了下pyecharts.render的源码，其实这个也支持使用无头phantomjs进行截图的，当然这个不是一般的直接截取屏幕，是通过JS代码把html里面的渲染图像下载下来，清晰度比普通截图要高很多很多。

make_snapshot(snapshot, bar.render(), f"data/{today}_cb.png", driver=driver)

在最后一行传入一个driver既可以了，这个driver使用phantomjs的实例。

import os
from pyecharts.render import make_snapshot
from snapshot_selenium import snapshot
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Bar
import sys
from selenium import webdriver
from pyecharts.commons.utils import JsCode

if sys.platform == 'win32':
SELENIUM_PATH = r'C:\OneDrive\Tool\phantomjs-2.1.1-windows\phantomjs-2.1.1-windows\bin\phantomjs.exe'
driver = None
else:
SELENIUM_PATH = './phantomjs'
driver = webdriver.PhantomJS(executable_path=SELENIUM_PATH)

bar = (
Bar()
.add_xaxis(list(result_dict .keys()))
.add_yaxis(f"{today}-可转债价格分布", y_list, category_gap=3)
.add_yaxis(f"{today}-正股价格分布", y_zg_list, category_gap=3)
.set_series_opts(
label_opts=opts.LabelOpts(is_show=True),
axispointer_opts=opts.AxisPointerOpts(is_show=True))
.set_global_opts(
title_opts=opts.TitleOpts(title="可转债价格分布"),
xaxis_opts=opts.AxisOpts(
name="涨跌幅",
is_show=True,
name_rotate=30,
),
graphic_opts=[
opts.GraphicGroup(
graphic_item=opts.GraphicItem(
left="70%",
top="20%",
),
children=[
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center",
top="middle",
z=100,
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text=JsCode(
f"['涨幅>=0：{bigger}',"
f"'涨幅<0：{smaller}',"
f"'平均涨幅：{avg}%',"
f"'波动方差：{std}',"
f"'',"
f"'最大：{max_name} {max_pct}%',"
f"'最小：{min_name} {min_pct}%',"
"''].join('\\n')"
),
font="14px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#333"
)
)
)
]
)
],
)

)

bar.render(os.path.join('data', f"{today}_cb.html"))
make_snapshot(snapshot, bar.render(), f"data/{today}_cb.png", driver=driver)

查看全部

网上搜索到的答案是使用chrome driver实现的，但是本人的程序是运行在centos下的，centos下折腾chrome driver比较蛋疼，所以看了下pyecharts.render的源码，其实这个也支持使用无头phantomjs进行截图的，当然这个不是一般的直接截取屏幕，是通过JS代码把html里面的渲染图像下载下来，清晰度比普通截图要高很多很多。

make_snapshot(snapshot, bar.render(), f"data/{today}_cb.png", driver=driver)

在最后一行传入一个driver既可以了，这个driver使用phantomjs的实例。

import os

from pyecharts.render import make_snapshot

from snapshot_selenium import snapshot

import pandas as pd

from pyecharts import options as opts

from pyecharts.charts import Bar

import sys

from selenium import webdriver

from pyecharts.commons.utils import JsCode



if sys.platform == 'win32':

    SELENIUM_PATH = r'C:\OneDrive\Tool\phantomjs-2.1.1-windows\phantomjs-2.1.1-windows\bin\phantomjs.exe'

    driver = None

else:

    SELENIUM_PATH = './phantomjs'

    driver = webdriver.PhantomJS(executable_path=SELENIUM_PATH)

    

    

    bar = (

        Bar()

            .add_xaxis(list(result_dict .keys()))

            .add_yaxis(f"{today}-可转债价格分布", y_list, category_gap=3)

            .add_yaxis(f"{today}-正股价格分布", y_zg_list, category_gap=3)

            .set_series_opts(

            label_opts=opts.LabelOpts(is_show=True),

            axispointer_opts=opts.AxisPointerOpts(is_show=True))

            .set_global_opts(

            title_opts=opts.TitleOpts(title="可转债价格分布"),

            xaxis_opts=opts.AxisOpts(

                name="涨跌幅",

                is_show=True,

                name_rotate=30,

            ),

            graphic_opts=[

                opts.GraphicGroup(

                    graphic_item=opts.GraphicItem(

                        left="70%",

                        top="20%",

                    ),

                    children=[

                        opts.GraphicText(

                            graphic_item=opts.GraphicItem(

                                left="center",

                                top="middle",

                                z=100,

                            ),

                            graphic_textstyle_opts=opts.GraphicTextStyleOpts(

                                text=JsCode(

                                    f"['涨幅>=0：{bigger}',"

                                    f"'涨幅<0：{smaller}',"

                                    f"'平均涨幅：{avg}%',"

                                    f"'波动方差：{std}',"

                                    f"'',"

                                    f"'最大：{max_name}  {max_pct}%',"

                                    f"'最小：{min_name}  {min_pct}%',"

                                    "''].join('\\n')"

                                ),

                                font="14px Microsoft YaHei",

                                graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(

                                    fill="#333"

                                )

                            )

                        )

                    ]

                )

            ],

        )



    )



    bar.render(os.path.join('data', f"{today}_cb.html"))

    make_snapshot(snapshot, bar.render(), f"data/{today}_cb.png", driver=driver)

使用sshtunnel SSHTunnelForwarder 作为跳板连接mysql后一直卡住不退出

python • 李魔佛发表了文章 • 0 个评论 • 5323 次浏览 • 2020-11-04 10:10 • 来自相关话题

代码如下：server = SSHTunnelForwarder(
ssh_address_or_host=host,
ssh_port=port,
ssh_username=user,
ssh_password=password,
local_bind_address=('127.0.0.1', local_port),
remote_bind_address=(host, mysql_port)
)

server.start()
conn = pymysql.connect(
host='127.0.0.1',
port=local_port,
user=user,
password=password,
db='db_stock'
)

cursor = conn.cursor()
cursor.execute('select count(*) from tb_cb_index')
ret = cursor.fetchall()
print(ret)
server.stop()
print('stop')

代码运行后并没有结束，或者没有答应stop的字符。在程序里已经使用了server.stop（）关闭ssh的连接。

后面发现日志里面，mysql的连接没有断开，导致server没有被关闭，所以在上面的代码中加一句：
print(ret)
conn.close()
server.stop()
print('stop')
把mysql的连接关闭，然后就可以把ssh的连接关闭，然后打印stop字符了。
查看全部

代码如下：

server = SSHTunnelForwarder(

    ssh_address_or_host=host,

    ssh_port=port,

    ssh_username=user,

    ssh_password=password,

    local_bind_address=('127.0.0.1', local_port),

    remote_bind_address=(host, mysql_port)

)



server.start()

conn = pymysql.connect(

    host='127.0.0.1',

    port=local_port,

    user=user,

    password=password,

    db='db_stock'

)



cursor = conn.cursor()

cursor.execute('select count(*) from tb_cb_index')

ret = cursor.fetchall()

print(ret)

server.stop()

print('stop')

代码运行后并没有结束，或者没有答应stop的字符。在程序里已经使用了server.stop（）关闭ssh的连接。

后面发现日志里面，mysql的连接没有断开，导致server没有被关闭，所以在上面的代码中加一句：

print(ret)

conn.close()

server.stop()

print('stop')

把mysql的连接关闭，然后就可以把ssh的连接关闭，然后打印stop字符了。

什么是http代理ip

python爬虫 • wanbainip 发起了问题 • 1 人关注 • 0 个回复 • 2564 次浏览 • 2020-11-03 18:17 • 来自相关话题

pyecharts感觉这个库的设计有点业余，太过于反python了

python • 李魔佛发表了文章 • 0 个评论 • 2033 次浏览 • 2020-11-03 01:09 • 来自相关话题

难道不知道python的数据分析用的是numpy的数据类型么？

一些扩展的类型int64作为数据导入到绘图函数中，居然不兼容，而且也不报错。
经过逐个参数的排查，才发现是数据类型的问题。数据是直接在pandas里面抽取出来的，然后填充到绘图汇总，然后就显示一片空白，还一直以为是我哪个参数用错了。

也许是从前端设计者手里接过来的项目吧。按葫芦画瓢这样操作。

一个函数里面居然可以放置几十个，上百个参数，我滴神呀，而且参数里面还有其他类型的参数，尼玛！

当然最让人头疼是那往上不兼容的做法，现在的新版本完全已经修改了，不兼容旧版本，之前的写代码又是一通修改。查看全部

难道不知道python的数据分析用的是numpy的数据类型么？

一些扩展的类型int64作为数据导入到绘图函数中，居然不兼容，而且也不报错。
经过逐个参数的排查，才发现是数据类型的问题。数据是直接在pandas里面抽取出来的，然后填充到绘图汇总，然后就显示一片空白，还一直以为是我哪个参数用错了。

也许是从前端设计者手里接过来的项目吧。按葫芦画瓢这样操作。

一个函数里面居然可以放置几十个，上百个参数，我滴神呀，而且参数里面还有其他类型的参数，尼玛！

当然最让人头疼是那往上不兼容的做法，现在的新版本完全已经修改了，不兼容旧版本，之前的写代码又是一通修改。

免费代理ip与收费的代理ip

python爬虫 • wanbainip 发表了文章 • 0 个评论 • 2639 次浏览 • 2020-10-30 18:00 • 来自相关话题

在爬虫采集数据过程中，经常会遇到ip被限制，这也是常见的问题。因为网站都会根据ip访问的频率进行判断封锁ip,这是反爬虫机制的策略，如果拥有大量的ip资源，自然会突破ip限制的反爬虫策略。

曾经有尝试过使用免费的代理ip来搭建代理池，可是免费的代理ip不仅资源少，而且可用率、高匿性、速度等都极差，每次使用都需要借助第三方软件进行检查是否可用，严重影响效率，根本满足不了任务的需求。

收费的代理ip与免费的代理ip差距非常大，不仅拥有海量的ip资源，可用率、高匿性、速度都是极好。操作简单工作效率既然提高上去了。经过多家的测试，最终选择了性价比最高的万变ip。高质量的优质代理ip才可以真正用来防止爬虫被封锁，如果使用普通代理，爬虫的真实IP还是会暴露。新获取一批新IP 查看全部

在爬虫采集数据过程中，经常会遇到ip被限制，这也是常见的问题。因为网站都会根据ip访问的频率进行判断封锁ip,这是反爬虫机制的策略，如果拥有大量的ip资源，自然会突破ip限制的反爬虫策略。

曾经有尝试过使用免费的代理ip来搭建代理池，可是免费的代理ip不仅资源少，而且可用率、高匿性、速度等都极差，每次使用都需要借助第三方软件进行检查是否可用，严重影响效率，根本满足不了任务的需求。

收费的代理ip与免费的代理ip差距非常大，不仅拥有海量的ip资源，可用率、高匿性、速度都是极好。操作简单工作效率既然提高上去了。经过多家的测试，最终选择了性价比最高的万变ip。高质量的优质代理ip才可以真正用来防止爬虫被封锁，如果使用普通代理，爬虫的真实IP还是会暴露。新获取一批新IP

autojs基础教程入门篇

Android • 李魔佛发表了文章 • 0 个评论 • 9178 次浏览 • 2020-10-30 11:09 • 来自相关话题

1. 脚本存放路径：内部存储设置（内置存储卡）根目录 /storage/脚本"/storage/emulated/0/脚本/"2. 对于一些封装的模块的调用
像下面笔者自己封装的一个模块：
function Helper(packageName) {

this.packageName = packageName;
if (packageName && !getAppName(packageName)) {
toast("找不到此应用, 无法提供服务");
this.sleep(1000);
exit();
}

this.click = function (x, y) {
return click(x, y);
};
this.clickCenter = function (widget) {
if (!widget) {
toast('找不到控件');
return false;
}
let rect = widget.bounds();
return click(rect.centerX(), rect.centerY());
};
this.clickSelectorCenter = function (selector) {
if (!selector) {
toast('找不到控件');
return false;
}
let widget = selector.findOne(2000);
return this.clickCenter(widget);
};
this.clickMulti = function (points, interval) {
points.forEach(function (point) {
this.click(point[0], point[1]);
this.sleep(interval);
}.bind(this));
};

this.clickMultiCenter = function (widgets) {
if (!widgets || widgets.length == 0)
return;

var points = ;
widgets.forEach(function (widget) {
var rect = widget.bounds();
points.push([rect.centerX(), rect.centerY()]);
});
this.clickMulti(points);
};
this.clickIdCenter = function (idStr) {
if (!idStr) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(id(idStr));
};
this.clickTextCenter = function (str) {
if (!str) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(text(str));
};
this.clickRegTextCenter = function (str) {
if (!str) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(textMatches(str));
};
this.clickDescCenter = function (str) {
if (!str) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(desc(str));
};
this.clickRegDescCenter = function (str) {
if (!str) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(descMatches(str));
};

this.clickClassCenter = function (classNameStr) {
if (!className) {
toast('找不到控件');
return false;
}
return this.clickSelectorCenter(className(classNameStr));
};

// index表示第几个文字, 从1开始
this.clickNTextCenter = function (str, index) {
if (!str)
return false;
let widgets = text(str).find();
if (!widgets)
return false;

index--
for (let i = 0; i < widgets.length; i++) {
let widget = widgets[i];
if (i == index)
return this.clickCenter(widget);
else
continue;
}
return false;
};

this.pageUp = (counter, time) => {
if (counter && time) {
for (let i = 0; i < counter; i++) {
scrollUp();
this.sleep(time);
}
} else {
scrollUp();
}
};
this.pageDown = (counter, time) => {
if (counter && time) {
for (let i = 0; i < counter; i++) {
scrollDown();
this.sleep(time);
}
} else {
scrollDown();
}
};
this.swipe = function (x1, y1, x2, y2, duration) {
swipe(x1, y1, x2, y2, duration);
};
this.sleep = (second) => {
sleep(second * 1000);
};
this.back = function () {
back();
};
this.home = function () {
home();
};
this.shell = function (command) {
shell(command, true);
};
this.launch = function () {
launch(this.packageName);
};
this.waitForActivity = function (activityName) {
waitForActivity(activityName);
};
this.launchActivity = function (activityName) {
shell("am start -n " + this.packageName + "/" + activityName, true);
waitForActivity(activityName);
};
this.kill = function () {
shell("am force-stop " + this.packageName, true);
};
this.before = function (ignoreSleep) {
let source = engines.myEngine().source.toString();
source = source.replace("/storage/emulated/0/脚本/", "");
toast("开始执行[" + source + "]...");

const WIDTH = Math.min(device.width, device.height);
const HEIGHT = Math.max(device.width, device.height);
setScreenMetrics(WIDTH, HEIGHT);
if (!ignoreSleep)
this.sleep(random() * 10); //随机睡眠[0-10]秒, 使签到\打卡时间不固定
};
this.after = function () {
let source = engines.myEngine().source.toString();
source = source.replace("/storage/emulated/0/脚本", "");
toast("结束执行[" + source + "]...");
exit();
};

this.getCaptureImg = () => {
var img = captureScreen();
if (!img || typeof (img) == "undifined") {
console.log("截图失败,退出脚本");
exit();
} else {
return img;
}
}
}

module.exports = Helper;[/i]
可以把文件保存为Helper.js，然后要放到手机的 "脚本" 文件夹下

然后在其他文件中，如果要调用
[i]var Helper = require("Helper.js");
var helper = new Helper("com.galaxy.stock");[/i]
然后后续可以用helper来正常操作，比如上面我要自动启动银河证券[i]helper.launch()[/i] 就可以了

华宝油气自动化申购脚本 AutoJS

更新待续。。。。
查看全部

1. 脚本存放路径：内部存储设置（内置存储卡）根目录 /storage/脚本

"/storage/emulated/0/脚本/"

2. 对于一些封装的模块的调用
像下面笔者自己封装的一个模块：

function Helper(packageName) {



    this.packageName = packageName;

    if (packageName && !getAppName(packageName)) {

        toast("找不到此应用, 无法提供服务");

        this.sleep(1000);

        exit();

    }



    this.click = function (x, y) {

        return click(x, y);

    };

    this.clickCenter = function (widget) {

        if (!widget) {

            toast('找不到控件');

            return false;

        }

        let rect = widget.bounds();

        return click(rect.centerX(), rect.centerY());

    };

    this.clickSelectorCenter = function (selector) {

        if (!selector) {

            toast('找不到控件');

            return false;

        }

        let widget = selector.findOne(2000);

        return this.clickCenter(widget);

    };

    this.clickMulti = function (points, interval) {

        points.forEach(function (point) {

            this.click(point[0], point[1]);

            this.sleep(interval);

        }.bind(this));

    };



    this.clickMultiCenter = function (widgets) {

        if (!widgets || widgets.length == 0)

            return;



        var points = ;

        widgets.forEach(function (widget) {

            var rect = widget.bounds();

            points.push([rect.centerX(), rect.centerY()]);

        });

        this.clickMulti(points);

    };

    this.clickIdCenter = function (idStr) {

        if (!idStr) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(id(idStr));

    };

    this.clickTextCenter = function (str) {

        if (!str) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(text(str));

    };

    this.clickRegTextCenter = function (str) {

        if (!str) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(textMatches(str));

    };

    this.clickDescCenter = function (str) {

        if (!str) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(desc(str));

    };

    this.clickRegDescCenter = function (str) {

        if (!str) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(descMatches(str));

    };



    this.clickClassCenter = function (classNameStr) {

        if (!className) {

            toast('找不到控件');

            return false;

        }

        return this.clickSelectorCenter(className(classNameStr));

    };





    // index表示第几个文字, 从1开始

    this.clickNTextCenter = function (str, index) {

        if (!str)

            return false;

        let widgets = text(str).find();

        if (!widgets)

            return false;



        index--

        for (let i = 0; i < widgets.length; i++) {

            let widget = widgets[i];

            if (i == index)

                return this.clickCenter(widget);

            else

                continue;

        }

        return false;

    };





    this.pageUp = (counter, time) => {

        if (counter && time) {

            for (let i = 0; i < counter; i++) {

                scrollUp();

                this.sleep(time);

            }

        } else {

            scrollUp();

        }

    };

    this.pageDown = (counter, time) => {

        if (counter && time) {

            for (let i = 0; i < counter; i++) {

                scrollDown();

                this.sleep(time);

            }

        } else {

            scrollDown();

        }

    };

    this.swipe = function (x1, y1, x2, y2, duration) {

        swipe(x1, y1, x2, y2, duration);

    };

    this.sleep = (second) => {

        sleep(second * 1000);

    };

    this.back = function () {

        back();

    };

    this.home = function () {

        home();

    };

    this.shell = function (command) {

        shell(command, true);

    };

    this.launch = function () {

        launch(this.packageName);

    };

    this.waitForActivity = function (activityName) {

        waitForActivity(activityName);

    };

    this.launchActivity = function (activityName) {

        shell("am start -n " + this.packageName + "/" + activityName, true);

        waitForActivity(activityName);

    };

    this.kill = function () {

        shell("am force-stop " + this.packageName, true);

    };

    this.before = function (ignoreSleep) {

        let source = engines.myEngine().source.toString();

        source = source.replace("/storage/emulated/0/脚本/", "");

        toast("开始执行[" + source + "]...");



        const WIDTH = Math.min(device.width, device.height);

        const HEIGHT = Math.max(device.width, device.height);

        setScreenMetrics(WIDTH, HEIGHT);

        if (!ignoreSleep)

            this.sleep(random() * 10); //随机睡眠[0-10]秒, 使签到\打卡时间不固定

    };

    this.after = function () {

        let source = engines.myEngine().source.toString();

        source = source.replace("/storage/emulated/0/脚本", "");

        toast("结束执行[" + source + "]...");

        exit();

    };





    this.getCaptureImg = () => {

        var img = captureScreen();

        if (!img || typeof (img) == "undifined") {

            console.log("截图失败,退出脚本");

            exit();

        } else {

            return img;

        }

    }

}



module.exports = Helper;[/i]

可以把文件保存为Helper.js，然后要放到手机的 "脚本" 文件夹下

然后在其他文件中，如果要调用

[i]var Helper = require("Helper.js");

var helper = new Helper("com.galaxy.stock");[/i]

然后后续可以用helper来正常操作，比如上面我要自动启动银河证券

[i]helper.launch()[/i]

就可以了

华宝油气自动化申购脚本 AutoJS

更新待续。。。。

pandas dataframe 切片与python列表切片的区别

量化交易-Ptrade-QMT • 李魔佛发表了文章 • 0 个评论 • 3042 次浏览 • 2020-10-29 23:34 • 来自相关话题

有一个Dataframedf = pd.DataFrame(np.arange(16).reshape((8,2)),index=list('abcdefgh'),columns=['COL1','COL2'])COL1 COL2
a 0 1
b 2 3
c 4 5
d 6 7
e 8 9
f 10 11
g 12 13
h 14 15那么如果我用df['a':'e'] 返回的结果是：
COL1 COL2
a 0 1
b 2 3
c 4 5
d 6 7
e 8 9是包含e尾部的，

而python的切片
a = [0,1,2,3,4,5]
a[1:5] 是不包含最后一个元素的。

原文链接：
http://30daydo.com/article/630
查看全部

有一个Dataframe

df = pd.DataFrame(np.arange(16).reshape((8,2)),index=list('abcdefgh'),columns=['COL1','COL2'])

那么如果我用df['a':'e'] 返回的结果是：

	COL1	COL2

a	0	1

b	2	3

c	4	5

d	6	7

e	8	9

是包含e尾部的，

而python的切片
a = [0,1,2,3,4,5]
a[1:5] 是不包含最后一个元素的。

原文链接：
http://30daydo.com/article/630

【python数据采集与分析】获取配售新股最多的基金 - 天天基金网套利

股票 • 李魔佛发表了文章 • 0 个评论 • 2243 次浏览 • 2020-10-28 23:37 • 来自相关话题

需求是这样的：

根据天天基金网的获得新股的基金数据，采集下来。然后再去新股获配详情里面，点击某一只新股，然后里面有每一个基金的拟申购股数与金额，通过这个数据取更新某个基金的实际获配金额，更为精确的知道基金中新股的占比。

已完成。

部分数据如下图：

点击查看大图

程序编译为exe可执行程序，兼容各个平台。
对数据或者程序有意者可以私信。查看全部

需求是这样的：

根据天天基金网的获得新股的基金数据，采集下来。然后再去新股获配详情里面，点击某一只新股，然后里面有每一个基金的拟申购股数与金额，通过这个数据取更新某个基金的实际获配金额，更为精确的知道基金中新股的占比。

已完成。

部分数据如下图：

点击查看大图

程序编译为exe可执行程序，兼容各个平台。
对数据或者程序有意者可以私信。

Python爬虫学习者需要注意什么？

python爬虫 • wanbainip 发表了文章 • 0 个评论 • 2412 次浏览 • 2020-10-28 17:14 • 来自相关话题

在这个大数据时代里，学习Python网络爬虫的人越来越多，在学习过程中难免会遇到一些问题，学习者最常见的问题就是遇到ip被限制，因为你在爬虫采集数据过程中，同一个ip频繁的对网站进行访问，会给对方服务器会造成压力，那么网站就根据ip访问的频率来限制你的ip，这也是反爬虫机制常见的一种判断。

最常见的解决方法就是使用大量的ip，就是借着代理ip保证IP被封时有替换IP可用，永远保持着续航能力。这里推荐51代理ip，作为一家提供代理IP的专业服务商，万变ip代理拥有强大的技术团队运营维护，全高匿系统所产生的高匿ip不仅安全稳定、而且速度快，以及与爬虫用户多年来合作的宝贵经验，是Python爬虫首选代理IP。

Python是一种全栈计算机程序设计语言，全栈，顾名思义，应用范围广。你可能听说过很多编程语言，例如C语言，Java语言等，众所周知，这些语言都非常难学，更别说景桐使用了。而python不一样，比如完成一个Web服务，C语言要写1000行代码，Java要写100行，而python可能只要写20行。对！这就是差距！目前由于python“简单易懂”，已逐步成为网络爬虫主流语言。

在初学python爬虫时，很多程序员会被一些“小问题”阻碍脚步，为避免大家再次犯同样的错误，加快学习进程，在爬取网站信息时一定要使用大量代理IP。好用的代理IP服务商，

高效率的爬虫工作离不开ip代理的支持，这就是ip代理越来越受欢迎的原因！收藏举报投诉查看全部

在这个大数据时代里，学习Python网络爬虫的人越来越多，在学习过程中难免会遇到一些问题，学习者最常见的问题就是遇到ip被限制，因为你在爬虫采集数据过程中，同一个ip频繁的对网站进行访问，会给对方服务器会造成压力，那么网站就根据ip访问的频率来限制你的ip，这也是反爬虫机制常见的一种判断。

最常见的解决方法就是使用大量的ip，就是借着代理ip保证IP被封时有替换IP可用，永远保持着续航能力。这里推荐51代理ip，作为一家提供代理IP的专业服务商，万变ip代理拥有强大的技术团队运营维护，全高匿系统所产生的高匿ip不仅安全稳定、而且速度快，以及与爬虫用户多年来合作的宝贵经验，是Python爬虫首选代理IP。

Python是一种全栈计算机程序设计语言，全栈，顾名思义，应用范围广。你可能听说过很多编程语言，例如C语言，Java语言等，众所周知，这些语言都非常难学，更别说景桐使用了。而python不一样，比如完成一个Web服务，C语言要写1000行代码，Java要写100行，而python可能只要写20行。对！这就是差距！目前由于python“简单易懂”，已逐步成为网络爬虫主流语言。

在初学python爬虫时，很多程序员会被一些“小问题”阻碍脚步，为避免大家再次犯同样的错误，加快学习进程，在爬取网站信息时一定要使用大量代理IP。好用的代理IP服务商，

高效率的爬虫工作离不开ip代理的支持，这就是ip代理越来越受欢迎的原因！收藏举报投诉

艾德暗盘交易时间

股票 • 绫波丽发表了文章 • 0 个评论 • 1882 次浏览 • 2020-10-28 16:09 • 来自相关话题

看了下APP，和富途等其他券商一样，都是在16:15分。

港股开户：

联系小助手，融资额度20倍，不用抢。

看了下APP，和富途等其他券商一样，都是在16:15分。

港股开户：

联系小助手，融资额度20倍，不用抢。

优矿由于您的Notebook运行内容占用内存过大或其他逻辑错误，微核已自动重启。

股票 • 李魔佛发表了文章 • 0 个评论 • 2113 次浏览 • 2020-10-28 01:12 • 来自相关话题

刚打开一个空白的notebook，就给我提示这个错误信息，看来优矿要收割用户了，还好我有本地数据。

Python爬虫虎牙平台主播的图片代码

python爬虫 • wanbainip 发表了文章 • 0 个评论 • 2467 次浏览 • 2020-10-27 17:55 • 来自相关话题

今天万变ip就为大家分享一下，Python爬虫是如何爬取虎牙平台的主播图片。这里我们主要爬取虎牙星秀的主播图片，并下载保存到本地。在爬取之前，我们可先打开目标页面，F12审查元素，查找我们需要的信息所在的位置。

代码如下：

import urllib.request

import re

import os

# 全局变量用来记录图片的编号

gl_z = 0

def down_img(url1):

"""下载图片"""

# 处理图片链接,拼接http:

url = "https:" + re.sub(r"\?", "", url1)

global gl_z

print(url)

# 请求链接

response = urllib.request.urlopen(url)

# 读取内容

data = response.read()

# 切片取出图片名称

file_name = url[url.rfind('/') + 1:]

# 生成列表

a = [x for x in range(10000)]

# 打开文件用以写入

file = open(os.path.join("photo3", "img" + file_name + str(a[gl_z]) + ".jpg"), "wb")

file.write(data)

# 关闭文件

file.close()

# 编号加1

gl_z += 1

if __name__ == '__main__':

# 要抓去信息的网址

home = """http://www.huya.com/g/xingxiu"""

# 模拟请求头

headers = {

"Host": "www.huya.com",

"User-Agent": "agent信息"

}

# 构造好请求对象将请求提交到服务器获取的响应就是到首页的html代码

request = urllib.request.Request(url=home, headers=headers)

response = urllib.request.urlopen(request)

# 读取抓到的内容并解码

html_data = response.read().decode()

"""huyaimg.msstatic.com/avatar/1054/db/6590aa9bcf98e12e5d809d371e46cc_180_135.jpg

"""

# 使用正则从首页中提取出所有的图片链接

img_list = re.findall(r"//huyaimg\.msstatic\.com.+\.jpg\?", html_data)

print(img_list)

# 取出每张图片进行下载

for img_url in img_list:

print(img_url)

down_img(img_url) 查看全部

今天万变ip就为大家分享一下，Python爬虫是如何爬取虎牙平台的主播图片。这里我们主要爬取虎牙星秀的主播图片，并下载保存到本地。在爬取之前，我们可先打开目标页面，F12审查元素，查找我们需要的信息所在的位置。

代码如下：

import urllib.request

import re

import os

# 全局变量用来记录图片的编号

gl_z = 0

def down_img(url1):

"""下载图片"""

# 处理图片链接,拼接http:

url = "https:" + re.sub(r"\?", "", url1)

global gl_z

print(url)

# 请求链接

response = urllib.request.urlopen(url)

# 读取内容

data = response.read()

# 切片取出图片名称

file_name = url[url.rfind('/') + 1:]

# 生成列表

a = [x for x in range(10000)]

# 打开文件用以写入

file = open(os.path.join("photo3", "img" + file_name + str(a[gl_z]) + ".jpg"), "wb")

file.write(data)

# 关闭文件

file.close()

# 编号加1

gl_z += 1

if __name__ == '__main__':

# 要抓去信息的网址

home = """http://www.huya.com/g/xingxiu"""

# 模拟请求头

headers = {

"Host": "www.huya.com",

"User-Agent": "agent信息"

}

# 构造好请求对象将请求提交到服务器获取的响应就是到首页的html代码

request = urllib.request.Request(url=home, headers=headers)

response = urllib.request.urlopen(request)

# 读取抓到的内容并解码

html_data = response.read().decode()

"""huyaimg.msstatic.com/avatar/1054/db/6590aa9bcf98e12e5d809d371e46cc_180_135.jpg

"""

# 使用正则从首页中提取出所有的图片链接

img_list = re.findall(r"//huyaimg\.msstatic\.com.+\.jpg\?", html_data)

print(img_list)

# 取出每张图片进行下载

for img_url in img_list:

print(img_url)

down_img(img_url)

通知设置新通知

发现