yolo voc_label 源码分析

深度学习李魔佛 发表了文章 • 0 个评论 • 414 次浏览 • 2019-11-27 15:19 • 来自相关话题

作用读取每个xml文件,把坐标转化为相对坐标,对应文件名保存起来
 
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join




sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]

# 20类
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

# size w,h
# box x-min,x-max,y-min,y-max
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0 # 中心点位置
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh # 全部转化为相对坐标
return (x, y, w, h)


def convert_annotation(year, image_id):
# 找到2个同样的文件
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')

tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)

for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1: # difficult ==1 的不要了
continue
cls_id = classes.index(cls) # 排在第几位
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
# 传入的是w,h 与框框的周边
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


wd = getcwd()

for year, image_set in sets:
# ('2012', 'train') 循环5次
# 创建目录 一次性
if not os.path.exists('VOCdevkit/VOC%s/labels/' % (year)):
os.makedirs('VOCdevkit/VOC%s/labels/' % (year))

# 图片的id数据
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()

# 结果写入这个文件
list_file = open('%s_%s.txt' % (year, image_set), 'w')

for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (wd, year, image_id)) # 补全路径
convert_annotation(year, image_id)
list_file.close()

  查看全部
作用读取每个xml文件,把坐标转化为相对坐标,对应文件名保存起来
 
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join




sets = [('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]

# 20类
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog",
"horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

# size w,h
# box x-min,x-max,y-min,y-max
def convert(size, box):
dw = 1. / size[0]
dh = 1. / size[1]
x = (box[0] + box[1]) / 2.0 # 中心点位置
y = (box[2] + box[3]) / 2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh # 全部转化为相对坐标
return (x, y, w, h)


def convert_annotation(year, image_id):
# 找到2个同样的文件
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt' % (year, image_id), 'w')

tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)

for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1: # difficult ==1 的不要了
continue
cls_id = classes.index(cls) # 排在第几位
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
# 传入的是w,h 与框框的周边
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')


wd = getcwd()

for year, image_set in sets:
# ('2012', 'train') 循环5次
# 创建目录 一次性
if not os.path.exists('VOCdevkit/VOC%s/labels/' % (year)):
os.makedirs('VOCdevkit/VOC%s/labels/' % (year))

# 图片的id数据
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % (year, image_set)).read().strip().split()

# 结果写入这个文件
list_file = open('%s_%s.txt' % (year, image_set), 'w')

for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n' % (wd, year, image_id)) # 补全路径
convert_annotation(year, image_id)
list_file.close()

 

docker run 和 create 区别

Linux李魔佛 发表了文章 • 0 个评论 • 987 次浏览 • 2019-11-25 13:49 • 来自相关话题

Technically, docker run = docker create + docker start . 
docker create command creates a writeable container from the image and prepares it for running. 
docker run command creates the container (same as docker create ) and starts it.
 
  查看全部
Technically, docker run = docker create + docker start
docker create command creates a writeable container from the image and prepares it for running. 
docker run command creates the container (same as docker create ) and starts it.
 
 

用docker编译go代码

Linux李魔佛 发表了文章 • 0 个评论 • 464 次浏览 • 2019-11-25 13:45 • 来自相关话题

如果偶尔需要编译go代码,但是又不想要安装一堆乱七八糟的依赖和Go编译器,可以利用docker来实现。 应该是解决起来话费时间最小的。
1. 用文本编辑你的go代码,现在以hello world为例:
 package main import "fmt" func main() {
/* 这是我的第一个简单的程序 */
fmt.Println("Hello, World!")
}
 
2. 然后直接使用docker执行编译。docker首先会自动去下载go的编译器,顺便把所有的依赖给解决掉

docker run --rm -v "$(pwd)":/usr/src/hello -w /usr/src/hello golang:1.3 go build -v
 
最后会在本地生成一个编译好的hello静态文件。
上述docker命令的具体含义就是把当前路径挂在到docker容器里头,然后切换到改到改路径下,然后进行编译。 查看全部

如果偶尔需要编译go代码,但是又不想要安装一堆乱七八糟的依赖和Go编译器,可以利用docker来实现。 应该是解决起来话费时间最小的。
1. 用文本编辑你的go代码,现在以hello world为例:
 
package main import "fmt" func main() {
/* 这是我的第一个简单的程序 */
fmt.Println("Hello, World!")
}

 
2. 然后直接使用docker执行编译。docker首先会自动去下载go的编译器,顺便把所有的依赖给解决掉

docker run --rm -v "$(pwd)":/usr/src/hello -w /usr/src/hello golang:1.3 go build -v
 
最后会在本地生成一个编译好的hello静态文件。
上述docker命令的具体含义就是把当前路径挂在到docker容器里头,然后切换到改到改路径下,然后进行编译。

docker实战 代码勘误

Linux李魔佛 发表了文章 • 0 个评论 • 339 次浏览 • 2019-11-25 11:24 • 来自相关话题

作者在官网刊登的勘误记录:
 
看这本书的作者一定要看,不然坑挺多的。
一路采坑过来的哭着说。Last updated August 21, 2016
In an effort to offer continued support beyond publication, we have listed many updates to code due to version updates.

[code - omission] Page 18

The command to start the "mailer" is missing a line. Where the book reads:
docker run -d \
--name mailer \
the proper command should read:
docker run -d \
--name mailer \
dockerinaction/ch2_mailer

[code - regression] Page 68

Newer versions of Docker have changed the structure of the JSON returned by the docker inspect subcommand. If the following command does not work then use the replacement. Original:
docker inspect --format "{{json .Volumes}}" reader
Replacement:
docker inspect --format "{{json .Mounts}}" reader

[code - regression] Page 69

Newer versions of Docker have changed the structure of the JSON returned by the docker inspect subcommand. If the following command does not work then use the replacement. Original:
docker inspect --format "{{json .Volumes}}" student
Replacement:
docker inspect --format "{{json .Mounts}}" student

[code - regression] Page 74

The alpine image entrypoint has changed since original publication and has been unset. The last command on the page should now read:
docker run --rm \
--volumes-from tools \
--entrypoint /bin/sh \
alpine:latest \
-c 'ls /operations/*'
[code - regression] Page 75

The docker exec example on the top of page 75 was printed with the wrong tool name. The correct command is:
docker exec important_application /operations/tools/diagnostics

[code - regression] Page 86

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--hostname barker \
busybox:1 \
nslookup barker

[code - regression] Page 87 (top)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--dns 8.8.8.8 \
busybox:1 \
nslookup docker.com

[code - regression] Page 87 (bottom)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--dns-search docker.com \
busybox:1 \
nslookup registry.hub

[code - regression] Page 88 (bottom)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--add-host test:10.10.10.255 \
busybox:1 \
nslookup test

[code - regression] Page 106

There are a few new problems with this example. First, the named repository (dockerfile/mariadb) no longer exists. You can use mariadb:5.5 as replacement. However, the second problem is that containers created from the mariadb image perform certain initialization at startup. That initialization work requires certain capabilities and to be started with the default user. The system should instead drop permissions after the initialization work is complete. Note that the real value of this example is in demonstrating different resource isolation mechanisms. It is not so important that you get it working. You can start the database with the following command:
docker run -d --name ch6_mariadb \
--memory 256m \
--cpu-shares 1024 \
--cap-drop net_raw \
-e MYSQL_ROOT_PASSWORD=test \
mariadb:5.5

[code - regression] Page 107

Containers created from the wordpress:4.1 image perform certain initialization at startup and expect certain environment variables. That initialization work requires certain capabilities and to be started with the default user. The system should instead drop permissions after the initialization work is complete. Note that the real value of this example is in demonstrating different resource isolation mechanisms. It is not so important that you get it working. You can start wordpress with the following command:
docker run -d -P --name ch6_wordpress \
--memory 512m \
--cpu-shares 512 \
--cap-drop net_raw \
-e WORDPRESS_DB_PASSWORD=test \
mariadb:5.5

[code - typo] Page 109

The device access example is missing the "run" subcommand. The command listed as:
docker -it --rm \
--device /dev/video0:/dev/video0 \
ubuntu:latest ls -al /dev
should have been written as:
docker run -it --rm \
--device /dev/video0:/dev/video0 \
ubuntu:latest ls -al /dev

[code - typo] Page 110 - 111

Several commands are missing the "run" subcommand. In each case the command begins with
docker -d ...
and should have been written as:
docker run -d ...

[code - regression] Page 115 (bottom)

The busybox and alpine images have been updated to fix the problem described in the paragraph below. The 'su' command does not have the SUID bit set and will not provide any automatic privilege escalation.

[command correction] Page 116

Boot2Docker has been discontinued and rolled into a newer project called Docker Machine. Because a reader is unlikely to have the boot2docker command installed, the command at the top of this page should be changed from:
boot2docker ssh
to the Docker Machine equivalent:
docker-machine ssh default
where default is the name of the machine you created.

[code - regression] Page 119

The ifconfig command has since been removed from ubuntu:latest. Instead of using the ubuntu:latest image for these examples use ubuntu:trusty. The example using ifconfig should look like:
docker run --rm \
--privileged \
ubuntu:trusty ifconfig
[Illustration Mistake] Page 136
Image layer ced2 on the left side of the illustration is listed at c3d2 on the right side. These two layers should represent the same item.

[code - typo] Page 140

Containers need not be in a running state in order to export their file system. The first command on page 140 uses the "run" subcommand but the command listed will never be able to start. Replace "run" with "create." The command should appear as follows:
docker create --name export-test \
dockerinaction/ch7_packed:latest ./echo For Export

[code - missing line] Page 146

In the example Dockerfile near the top of the page the line with the RUN directive is missing part of the instruction. That line should read:
RUN apt-get update && apt-get install -y git

[code - evolution] Page 215

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: s3-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
storage:
cache:
layerinfo: inmemory
s3:
accesskey:
secretkey:
region:
bucket:
encrypt: true
secure: true
v4auth: true
chunksize: 5242880
rootdirectory: /s3/object/name/prefix
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001

[code - evolution] Page 216

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: rados-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
storage:
cache:
layerinfo: inmemory
rados:
poolname: radospool
username: radosuser
chunksize: 4194304
maintenance:
uploadpurging:
enabled: false
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001

[code - evolution] Page 218

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: redis-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001
storage:
cache:
blobdescriptor: redis
s3:
accesskey:
secretkey:
region:
bucket:
encrypt: true
secure: true
v4auth: true
chunksize: 5242880
rootdirectory: /s3/object/name/prefix
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
redis:
addr: redis-host:6379
password: asecret
dialtimeout: 10ms
readtimeout: 10ms
writetimeout: 10ms
pool:
maxidle: 16
maxactive: 64
idletimeout: 300s
[code - typo] Page 220

The name of the file shown should be scalable-config.yml as in previous examples. This example also requires the addition of the newer uploadpurging attributes. The mainenance section of the file should be as follows:
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
[text - typo] Page 240

In the second paragraph the reader is instructed to, "Open ./coffee/api/api.py" this is not the correct location of the file. The correct file location is at, "./coffee/app/api.py."
[text - typo] Page 262

The refere nce to "flock.json" in the first sentence of the third paragraph should be "flock.yml."
[code - typo] Page 270

The git clone command uses the ssh protocol instead of https. The command should read as follows:
git clone https://github.com/dockerinact ... i.git 查看全部
作者在官网刊登的勘误记录:
 
看这本书的作者一定要看,不然坑挺多的。
一路采坑过来的哭着说。
Last updated August 21, 2016
In an effort to offer continued support beyond publication, we have listed many updates to code due to version updates.

[code - omission] Page 18

The command to start the "mailer" is missing a line. Where the book reads:
docker run -d \
--name mailer \
the proper command should read:
docker run -d \
--name mailer \
dockerinaction/ch2_mailer

[code - regression] Page 68

Newer versions of Docker have changed the structure of the JSON returned by the docker inspect subcommand. If the following command does not work then use the replacement. Original:
docker inspect --format "{{json .Volumes}}" reader
Replacement:
docker inspect --format "{{json .Mounts}}" reader

[code - regression] Page 69

Newer versions of Docker have changed the structure of the JSON returned by the docker inspect subcommand. If the following command does not work then use the replacement. Original:
docker inspect --format "{{json .Volumes}}" student
Replacement:
docker inspect --format "{{json .Mounts}}" student

[code - regression] Page 74

The alpine image entrypoint has changed since original publication and has been unset. The last command on the page should now read:
docker run --rm \
--volumes-from tools \
--entrypoint /bin/sh \
alpine:latest \
-c 'ls /operations/*'
[code - regression] Page 75

The docker exec example on the top of page 75 was printed with the wrong tool name. The correct command is:
docker exec important_application /operations/tools/diagnostics

[code - regression] Page 86

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--hostname barker \
busybox:1 \
nslookup barker

[code - regression] Page 87 (top)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--dns 8.8.8.8 \
busybox:1 \
nslookup docker.com

[code - regression] Page 87 (bottom)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--dns-search docker.com \
busybox:1 \
nslookup registry.hub

[code - regression] Page 88 (bottom)

It appears that nslookup behavior in the alpine image has changed. To run the example use the busybox:1 image.
docker run --rm \
--add-host test:10.10.10.255 \
busybox:1 \
nslookup test

[code - regression] Page 106

There are a few new problems with this example. First, the named repository (dockerfile/mariadb) no longer exists. You can use mariadb:5.5 as replacement. However, the second problem is that containers created from the mariadb image perform certain initialization at startup. That initialization work requires certain capabilities and to be started with the default user. The system should instead drop permissions after the initialization work is complete. Note that the real value of this example is in demonstrating different resource isolation mechanisms. It is not so important that you get it working. You can start the database with the following command:
docker run -d --name ch6_mariadb \
--memory 256m \
--cpu-shares 1024 \
--cap-drop net_raw \
-e MYSQL_ROOT_PASSWORD=test \
mariadb:5.5

[code - regression] Page 107

Containers created from the wordpress:4.1 image perform certain initialization at startup and expect certain environment variables. That initialization work requires certain capabilities and to be started with the default user. The system should instead drop permissions after the initialization work is complete. Note that the real value of this example is in demonstrating different resource isolation mechanisms. It is not so important that you get it working. You can start wordpress with the following command:
docker run -d -P --name ch6_wordpress \
--memory 512m \
--cpu-shares 512 \
--cap-drop net_raw \
-e WORDPRESS_DB_PASSWORD=test \
mariadb:5.5

[code - typo] Page 109

The device access example is missing the "run" subcommand. The command listed as:
docker -it --rm \
--device /dev/video0:/dev/video0 \
ubuntu:latest ls -al /dev
should have been written as:
docker run -it --rm \
--device /dev/video0:/dev/video0 \
ubuntu:latest ls -al /dev

[code - typo] Page 110 - 111

Several commands are missing the "run" subcommand. In each case the command begins with
docker -d ...
and should have been written as:
docker run -d ...

[code - regression] Page 115 (bottom)

The busybox and alpine images have been updated to fix the problem described in the paragraph below. The 'su' command does not have the SUID bit set and will not provide any automatic privilege escalation.

[command correction] Page 116

Boot2Docker has been discontinued and rolled into a newer project called Docker Machine. Because a reader is unlikely to have the boot2docker command installed, the command at the top of this page should be changed from:
boot2docker ssh
to the Docker Machine equivalent:
docker-machine ssh default
where default is the name of the machine you created.

[code - regression] Page 119

The ifconfig command has since been removed from ubuntu:latest. Instead of using the ubuntu:latest image for these examples use ubuntu:trusty. The example using ifconfig should look like:
docker run --rm \
--privileged \
ubuntu:trusty ifconfig
[Illustration Mistake] Page 136
Image layer ced2 on the left side of the illustration is listed at c3d2 on the right side. These two layers should represent the same item.

[code - typo] Page 140

Containers need not be in a running state in order to export their file system. The first command on page 140 uses the "run" subcommand but the command listed will never be able to start. Replace "run" with "create." The command should appear as follows:
docker create --name export-test \
dockerinaction/ch7_packed:latest ./echo For Export

[code - missing line] Page 146

In the example Dockerfile near the top of the page the line with the RUN directive is missing part of the instruction. That line should read:
RUN apt-get update && apt-get install -y git

[code - evolution] Page 215

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: s3-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
storage:
cache:
layerinfo: inmemory
s3:
accesskey:
secretkey:
region:
bucket:
encrypt: true
secure: true
v4auth: true
chunksize: 5242880
rootdirectory: /s3/object/name/prefix
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001

[code - evolution] Page 216

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: rados-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
storage:
cache:
layerinfo: inmemory
rados:
poolname: radospool
username: radosuser
chunksize: 4194304
maintenance:
uploadpurging:
enabled: false
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001

[code - evolution] Page 218

The registry:2 configuration file now requires the population of additional fields under "maintenance > uploadpurging." The example should currently look like:
# Filename: redis-config.yml
version: 0.1
log:
level: debug
fields:
service: registry
environment: development
http:
addr: :5000
secret: asecretforlocaldevelopment
debug:
addr: localhost:5001
storage:
cache:
blobdescriptor: redis
s3:
accesskey:
secretkey:
region:
bucket:
encrypt: true
secure: true
v4auth: true
chunksize: 5242880
rootdirectory: /s3/object/name/prefix
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
redis:
addr: redis-host:6379
password: asecret
dialtimeout: 10ms
readtimeout: 10ms
writetimeout: 10ms
pool:
maxidle: 16
maxactive: 64
idletimeout: 300s
[code - typo] Page 220

The name of the file shown should be scalable-config.yml as in previous examples. This example also requires the addition of the newer uploadpurging attributes. The mainenance section of the file should be as follows:
maintenance:
uploadpurging:
enabled: true
age: 168h
interval: 24h
dryrun: false
readonly:
enabled: false
[text - typo] Page 240

In the second paragraph the reader is instructed to, "Open ./coffee/api/api.py" this is not the correct location of the file. The correct file location is at, "./coffee/app/api.py."
[text - typo] Page 262

The refere nce to "flock.json" in the first sentence of the third paragraph should be "flock.yml."
[code - typo] Page 270

The git clone command uses the ssh protocol instead of https. The command should read as follows:
git clone https://github.com/dockerinact ... i.git

jieba.posseg TypeError: cannot unpack non-iterable pair object 词性分析报错

python李魔佛 发表了文章 • 0 个评论 • 601 次浏览 • 2019-11-23 10:12 • 来自相关话题

词性标注的例子出现错误 'pair' object is not iterable
 
例子:import jieba.posseg as pseg
seg_list = pseg.cut("我爱北京天安门")
for word,flag in seg_list:
print(word)
print(flag) 
 ---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-f105f6980f88> in <module>()
1 import jieba.posseg as pseg
2 seg_list = pseg.cut("我爱北京天安门")
----> 3 for word,flag in seg_list:
4 print(word)
5 print(flag)

TypeError: cannot unpack non-iterable pair object原因是新版本中seg_list是一个生成器,所以只能 for win seg_list然后从word中解包出来

print(w.word)

print(w.flag)

这样问题就解决了。 查看全部
词性标注的例子出现错误 'pair' object is not iterable
 
例子:
import jieba.posseg as pseg
seg_list = pseg.cut("我爱北京天安门")
for word,flag in seg_list:
print(word)
print(flag)
 
 
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-5-f105f6980f88> in <module>()
1 import jieba.posseg as pseg
2 seg_list = pseg.cut("我爱北京天安门")
----> 3 for word,flag in seg_list:
4 print(word)
5 print(flag)

TypeError: cannot unpack non-iterable pair object
原因是新版本中seg_list是一个生成器,所以只能 for win seg_list
然后从word中解包出来

print(w.word)

print(w.flag)

这样问题就解决了。

华为AI云市场的modelarts是要收费的<坑>

闲聊李魔佛 发表了文章 • 0 个评论 • 617 次浏览 • 2019-11-21 10:57 • 来自相关话题

点击查看大图

看了介绍说觉得不错,就注册了,然后还要求实名才能使用。想着华为大公司应该不会拿个人数据到处倒卖吧,就按教程下一步下一步注册了。
拿了几张样本上传到服务器,自己打标注,生成模型。然后测试一下效果,感觉产品做得不错。但整个过程没提示说要收费。
 
然后晚上就收到几条短信说华为的AI云处于欠费状态,晕,原来是秋后算账呀。




点击查看大图

还以为不部署到服务器的话就不需要缴费呢,怎么随便测试了下就扣费了呢。
  查看全部
huawei.PNG
点击查看大图

看了介绍说觉得不错,就注册了,然后还要求实名才能使用。想着华为大公司应该不会拿个人数据到处倒卖吧,就按教程下一步下一步注册了。
拿了几张样本上传到服务器,自己打标注,生成模型。然后测试一下效果,感觉产品做得不错。但整个过程没提示说要收费。
 
然后晚上就收到几条短信说华为的AI云处于欠费状态,晕,原来是秋后算账呀。

欠费.PNG
点击查看大图

还以为不部署到服务器的话就不需要缴费呢,怎么随便测试了下就扣费了呢。
 

scrapy在settings中定义变量不能包含小写!

python爬虫李魔佛 发表了文章 • 0 个评论 • 411 次浏览 • 2019-11-16 16:39 • 来自相关话题

如果变量名包含小写字母,那么你的变量会被过滤掉,在scrapy编码的其他地方都会无法被识别。
比如定义了一个叫 Redis_host = '192.168.1.1',的值
 
然后在spider中,如果你调用self.settings.get('Redis_host')
那么返回值是 None。
 
如果用REDIS_HOST定义,那么就可以正确返回它的值。
 
如果你一定要用小写,也有其他方法可正常调用。
先导入settings文件
fromt xxxx import setttings # xxx为项目名
 
host = settings.Redis_host # 直接导入一个文件的形式来调用是可以的 查看全部
如果变量名包含小写字母,那么你的变量会被过滤掉,在scrapy编码的其他地方都会无法被识别。
比如定义了一个叫 Redis_host = '192.168.1.1',的值
 
然后在spider中,如果你调用self.settings.get('Redis_host')
那么返回值是 None。
 
如果用REDIS_HOST定义,那么就可以正确返回它的值。
 
如果你一定要用小写,也有其他方法可正常调用。
先导入settings文件
fromt xxxx import setttings # xxx为项目名
 
host = settings.Redis_host # 直接导入一个文件的形式来调用是可以的

docker实战 勘误 (docker in action)

回复

书籍李魔佛 发起了问题 • 1 人关注 • 0 个回复 • 439 次浏览 • 2019-11-09 15:48 • 来自相关话题

etree.strip_tags的用法

python爬虫李魔佛 发表了文章 • 0 个评论 • 588 次浏览 • 2019-10-24 11:24 • 来自相关话题

直接从官方文档那里拿过来,发现这个函数功能还挺不错的。
它把参数中的标签从源htmlelement中删除,并且把里面的标签文本给合并进来。
 
举个例子:from lxml.html import etree
from lxml.html import fromstring, HtmlElement

test_html = '''<p><span>hello</span><span>world</span></p>'''
test_element = fromstring(test_html)
etree.strip_tags(test_element,'span') # 清除span标签
etree.tostring(test_element)
因为上述操作直接应用于test_element上的,所以test_element的值已经被修改了。
 
所以现在test_element 的值是 
b'<p>helloworld</p>'

原创文章,转载请注明出处
http://30daydo.com/article/553
  查看全部
直接从官方文档那里拿过来,发现这个函数功能还挺不错的。
它把参数中的标签从源htmlelement中删除,并且把里面的标签文本给合并进来。
 
举个例子:
from lxml.html import etree
from lxml.html import fromstring, HtmlElement

test_html = '''<p><span>hello</span><span>world</span></p>'''
test_element = fromstring(test_html)
etree.strip_tags(test_element,'span') # 清除span标签
etree.tostring(test_element)

因为上述操作直接应用于test_element上的,所以test_element的值已经被修改了。
 
所以现在test_element 的值是 
b'<p>helloworld</p>'

原创文章,转载请注明出处
http://30daydo.com/article/553
 

pycharm自带的版本控制软件挺好用的

闲聊李魔佛 发表了文章 • 0 个评论 • 422 次浏览 • 2019-10-24 09:02 • 来自相关话题

pycharm自带的git,svn版本控制工具已经很好用的了,所以以后可以直接不用sourcetree这种专业的GUI管理软件了

pycharm自带的git,svn版本控制工具已经很好用的了,所以以后可以直接不用sourcetree这种专业的GUI管理软件了

android monitor 系统找不到文件 lib\monitor-Could。

Android李魔佛 发表了文章 • 0 个评论 • 399 次浏览 • 2019-10-17 09:53 • 来自相关话题

最新的android studio删除了android monitor 
但我觉得这是一个很好用的监控日志工具。
 
在sdk的tool目录底下启动
 
monitor.bat
 
然后就可以看到报错:
 
系统找不到文件 lib\monitor-Could
 
解决办法:
使用android-studio的sdk管理工具下载一个android-19的API,最新的api少了部分jar文件。
  查看全部
最新的android studio删除了android monitor 
但我觉得这是一个很好用的监控日志工具。
 
在sdk的tool目录底下启动
 
monitor.bat
 
然后就可以看到报错:
 
系统找不到文件 lib\monitor-Could
 
解决办法:
使用android-studio的sdk管理工具下载一个android-19的API,最新的api少了部分jar文件。
 

mumu模拟器adb无法识别

python爬虫李魔佛 发表了文章 • 0 个评论 • 791 次浏览 • 2019-10-17 08:41 • 来自相关话题

因为端口号被mumu改了。
 
            <Forwarding name="ADB_PORT" proto="1" hostip="127.0.0.1" hostport="7555" guestport="5555"/>
 
在mumu浏览器里面可以看到这个配置信息。
 
adb connect 127.0.0.1:7555
然后adb shell 就可以了。
 
配置文件名是:myandrovm_vbox86.nemu 查看全部
因为端口号被mumu改了。
 
            <Forwarding name="ADB_PORT" proto="1" hostip="127.0.0.1" hostport="7555" guestport="5555"/>
 
在mumu浏览器里面可以看到这个配置信息。
 
adb connect 127.0.0.1:7555
然后adb shell 就可以了。
 
配置文件名是:myandrovm_vbox86.nemu

最近是忙的飞起,没有更新文章

闲聊李魔佛 发表了文章 • 0 个评论 • 426 次浏览 • 2019-09-24 11:12 • 来自相关话题

最近家里事多,加上换公司交接工作,新公司需要提供一些资料,跑银行,体检等。
真是希望时间能够停顿下来让我歇歇。
最近家里事多,加上换公司交接工作,新公司需要提供一些资料,跑银行,体检等。
真是希望时间能够停顿下来让我歇歇。

aiohttp异步下载图片

python爬虫李魔佛 发表了文章 • 0 个评论 • 597 次浏览 • 2019-09-16 17:14 • 来自相关话题

保存图片的时候不能用自带的open函数打开文件,需要用到异步io库 aiofiles来打开url = 'http://xyhz.huizhou.gov.cn/static/js/common/jigsaw/images/{}.jpg'
headers={'User-Agent':'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
async def getPage(num):

async with aiohttp.ClientSession() as session:
async with session.get(url.format(num),headers=headers) as resp:
if resp.status==200:
f= await aiofiles.open('{}.jpg'.format(num),mode='wb')
await f.write(await resp.read())
await f.close()

loop = asyncio.get_event_loop()
tasks = [getPage(i) for i in range(5)]
loop.run_until_complete(asyncio.wait(tasks))
原创文章,
转载请注明出处:
http://30daydo.com/article/537
  查看全部
保存图片的时候不能用自带的open函数打开文件,需要用到异步io库 aiofiles来打开
url = 'http://xyhz.huizhou.gov.cn/static/js/common/jigsaw/images/{}.jpg'
headers={'User-Agent':'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
async def getPage(num):

async with aiohttp.ClientSession() as session:
async with session.get(url.format(num),headers=headers) as resp:
if resp.status==200:
f= await aiofiles.open('{}.jpg'.format(num),mode='wb')
await f.write(await resp.read())
await f.close()

loop = asyncio.get_event_loop()
tasks = [getPage(i) for i in range(5)]
loop.run_until_complete(asyncio.wait(tasks))

原创文章,
转载请注明出处:
http://30daydo.com/article/537
 

基于文本及符号密度的网页正文提取方法 python实现

python李魔佛 发表了文章 • 0 个评论 • 1191 次浏览 • 2019-09-10 15:19 • 来自相关话题

基于文本及符号密度的网页正文提取方法 python实现
 项目路径https://github.com/Rockyzsu/CodePool/tree/master/GeneralNewsExtractor
完成后在本文详细介绍,
请密切关注。 查看全部
基于文本及符号密度的网页正文提取方法 python实现
 项目路径https://github.com/Rockyzsu/CodePool/tree/master/GeneralNewsExtractor
完成后在本文详细介绍,
请密切关注。