mongodb

mongodb

mongodb python同步两个数据库数据

python李魔佛 发表了文章 • 0 个评论 • 2143 次浏览 • 2022-04-07 02:44 • 来自相关话题

有时候需要做一些迁移工作,需要对mongodb进行迁移。默认的工具貌似也十分好用的。缺少像Navicat 之于mysql的这样神级的软件。
 
所以自己动手写代码完成:
 
# -*- coding: utf-8 -*-
# @Time : 2022/4/6 4:41
# @File : database_migrate.py
# @Author : Rocky C@www.30daydo.com
import time
from loguru import logger
import pymongo

ignore_db = ['admin', 'config', 'local',
] # 忽略更新的库

ignore_col = [('db_stock','dfcf_list_full')]

logger.add('mongo.log')

# 数据库同步
def get_client(user, password, host, port):
connect_uri = f'mongodb://{user}:{password}@{host}:{port}'
client = pymongo.MongoClient(connect_uri)
return client


def origin():
return get_client('admin', 'password', '127.0.0.1', '27017')


def target():
return get_client('root', 'password', '127.0.0.1', '27017')


def transfer():
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)
for db in dbs:
for col in get_collection_name(origin_client, db):

if (db,col) in ignore_col:
continue
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
# time.sleep(0.5)

def get_item(client, db_name, col):
return client[db_name][col].find()



def insert_item(client, db_name, col, data):
batch = 1000
count = len(data)//batch + 1
for i in range(count):
item = data[i*batch:(i+1)*batch]

if len(item)==0:
continue

try:
client[db_name][col].insert_many(item)
except Exception as e:
logger.error(e)
logger.error(f'{db_name} {col} 插入出错')


def get_db_name(client):
db_name = client.list_database_names()
dbs = []
for db in db_name:
if db not in ignore_db:
dbs.append(db)
return dbs

def delete_col(client,db,col):
try:
client[db][col].delete_many({})
except Exception as e:
logger.error(e)
logger.error(db)
logger.error(col)
return False
else:
return True

def server_compare():
'''
比较2个数据库是否相同,只是单纯比较条数
'''
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)

for db in dbs:
for col in get_collection_name(origin_client, db):
origin_count = origin_client[db][col].count_documents({})
target_count = target_client[db][col].count_documents({})
if origin_count!=target_count:
logger.info(f'collection {db} {col}有区别')
#
if delete_col(target_client,db,col):
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
time.sleep(1)




def get_collection_name(client, db_name):
collection_names = client[db_name].list_collection_names(session=None)
return collection_names

def main():
server_compare()

if __name__ == '__main__':
main()

 原理就是不断迭代,不同的数据库,里面的不同的collection。
对于同名collection,通过条数是否一致,来决定是否要把原数据复制过来。 
 
保存上面文件为main.py
 
执行 python main.py
 
就可以进行数据同步工作啦。 查看全部
有时候需要做一些迁移工作,需要对mongodb进行迁移。默认的工具貌似也十分好用的。缺少像Navicat 之于mysql的这样神级的软件。
 
所以自己动手写代码完成:
 
# -*- coding: utf-8 -*-
# @Time : 2022/4/6 4:41
# @File : database_migrate.py
# @Author : Rocky C@www.30daydo.com
import time
from loguru import logger
import pymongo

ignore_db = ['admin', 'config', 'local',
] # 忽略更新的库

ignore_col = [('db_stock','dfcf_list_full')]

logger.add('mongo.log')

# 数据库同步
def get_client(user, password, host, port):
connect_uri = f'mongodb://{user}:{password}@{host}:{port}'
client = pymongo.MongoClient(connect_uri)
return client


def origin():
return get_client('admin', 'password', '127.0.0.1', '27017')


def target():
return get_client('root', 'password', '127.0.0.1', '27017')


def transfer():
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)
for db in dbs:
for col in get_collection_name(origin_client, db):

if (db,col) in ignore_col:
continue
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
# time.sleep(0.5)

def get_item(client, db_name, col):
return client[db_name][col].find()



def insert_item(client, db_name, col, data):
batch = 1000
count = len(data)//batch + 1
for i in range(count):
item = data[i*batch:(i+1)*batch]

if len(item)==0:
continue

try:
client[db_name][col].insert_many(item)
except Exception as e:
logger.error(e)
logger.error(f'{db_name} {col} 插入出错')


def get_db_name(client):
db_name = client.list_database_names()
dbs = []
for db in db_name:
if db not in ignore_db:
dbs.append(db)
return dbs

def delete_col(client,db,col):
try:
client[db][col].delete_many({})
except Exception as e:
logger.error(e)
logger.error(db)
logger.error(col)
return False
else:
return True

def server_compare():
'''
比较2个数据库是否相同,只是单纯比较条数
'''
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)

for db in dbs:
for col in get_collection_name(origin_client, db):
origin_count = origin_client[db][col].count_documents({})
target_count = target_client[db][col].count_documents({})
if origin_count!=target_count:
logger.info(f'collection {db} {col}有区别')
#
if delete_col(target_client,db,col):
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
time.sleep(1)




def get_collection_name(client, db_name):
collection_names = client[db_name].list_collection_names(session=None)
return collection_names

def main():
server_compare()

if __name__ == '__main__':
main()

 原理就是不断迭代,不同的数据库,里面的不同的collection。
对于同名collection,通过条数是否一致,来决定是否要把原数据复制过来。 
 
保存上面文件为main.py
 
执行 python main.py
 
就可以进行数据同步工作啦。

mongodb 判断列表字段不为空

数据库李魔佛 发表了文章 • 0 个评论 • 8968 次浏览 • 2019-08-20 11:08 • 来自相关话题

首先插入一批数据:
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[1,2,3,4,5]})
db.test_tab.insert({array:[1,2,3,4,5,6]})
使用以下命令判断列表不为空:
db.getCollection("example").find({array:{$exists:true,$ne:[]}}); # 字段不为0 查看全部
首先插入一批数据:
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[1,2,3,4,5]})
db.test_tab.insert({array:[1,2,3,4,5,6]})

使用以下命令判断列表不为空:
db.getCollection("example").find({array:{$exists:true,$ne:[]}}); # 字段不为0

mongodb 修改嵌套字典字典的字段名

数据库李魔佛 发表了文章 • 0 个评论 • 6075 次浏览 • 2019-08-05 13:55 • 来自相关话题

对于mongodb,修改字段名称的语法是

db.test.update({},{$rename:{'旧字段':'新字段'}},true,true)

 
比如下面的例子:db.getCollection('example').update({},{$rename:{'corp':'企业'}})
上面就是把字段corp改为企业。
 
如果是嵌套字段呢?
比如  corp字典是一个字典,里面是 { 'address':'USA',    'phone':'12345678' }
 
那么要修改里面的address为地址:
 db.getCollection('example').update({},{$rename:{'corp.address':'corp.地址'}})
 原创文章,转载请注明出处
原文连接:http://30daydo.com/article/521
  查看全部
对于mongodb,修改字段名称的语法是


db.test.update({},{$rename:{'旧字段':'新字段'}},true,true)


 
比如下面的例子:
db.getCollection('example').update({},{$rename:{'corp':'企业'}})

上面就是把字段corp改为企业。
 
如果是嵌套字段呢?
比如  corp字典是一个字典,里面是 { 'address':'USA',    'phone':'12345678' }
 
那么要修改里面的address为地址:
 
db.getCollection('example').update({},{$rename:{'corp.address':'corp.地址'}})

 原创文章,转载请注明出处
原文连接:http://30daydo.com/article/521
 

mongodb motor 异步操作比同步操作的时间要慢?

数据库量化投机者 回复了问题 • 2 人关注 • 1 个回复 • 5418 次浏览 • 2019-08-03 09:01 • 来自相关话题

mongodb find得到的数据顺序每次都是一样的

数据库李魔佛 发表了文章 • 0 个评论 • 3396 次浏览 • 2019-07-26 09:00 • 来自相关话题

只要用的find内容不变,那么返回的内容顺序也就都一样的。
只要用的find内容不变,那么返回的内容顺序也就都一样的。

使用pymongo中的find_one_and_update出错:需要分片键

数据库李魔佛 发表了文章 • 0 个评论 • 5830 次浏览 • 2019-06-10 17:13 • 来自相关话题

错误信息如下: File "C:\ProgramData\Anaconda3\lib\site-packages\pymongo\helpers.py", line 155, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Query for sharded findAndModify must contain the shard key
2019-06-10 16:14:32 [scrapy.core.engine] INFO: Closing spider (finished)
2019-06-10 16:14:32 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
需要在查询语句中把分片键也添加进去。
因为findOneModify只会找一个记录,但是到底在哪个分片的记录呢? 因为不确定,所以才需要把shard加上去。
 
 
参考官方:
Targeted Operations vs. Broadcast Operations
Generally, the fastest queries in a sharded environment are those that mongos route to a single shard, using the shard key and the cluster meta data from the config server. These targeted operations use the shard key value to locate the shard or subset of shards that satisfy the query document.
For queries that don’t include the shard key, mongos must query all shards, wait for their responses and then return the result to the application. These “scatter/gather” queries can be long running operations.
Broadcast Operations
mongos instances broadcast queries to all shards for the collection unless the mongos can determine which shard or subset of shards stores this data.

After the mongos receives responses from all shards, it merges the data and returns the result document. The performance of a broadcast operation depends on the overall load of the cluster, as well as variables like network latency, individual shard load, and number of documents returned per shard. Whenever possible, favor operations that result in targeted operation over those that result in a broadcast operation.
Multi-update operations are always broadcast operations.
The updateMany() and deleteMany() methods are broadcast operations, unless the query document specifies the shard key in full.
Targeted Operations
mongos can route queries that include the shard key or the prefix of a compound shard key a specific shard or set of shards. mongos uses the shard key value to locate the chunk whose range includes the shard key value and directs the query at the shard containing that chunk.

For example, if the shard key is:
copy
{ a: 1, b: 1, c: 1 }

The mongos program can route queries that include the full shard key or either of the following shard key prefixes at a specific shard or set of shards:
copy
{ a: 1 }
{ a: 1, b: 1 }

All insertOne() operations target to one shard. Each document in the insertMany() array targets to a single shard, but there is no guarantee all documents in the array insert into a single shard.
All updateOne(), replaceOne() and deleteOne() operations must include the shard key or _id in the query document. MongoDB returns an error if these methods are used without the shard key or _id.
Depending on the distribution of data in the cluster and the selectivity of the query, mongos may still perform a broadcast operation to fulfill these queries.
Index Use
If the query does not include the shard key, the mongos must send the query to all shards as a “scatter/gather” operation. Each shard will, in turn, use either the shard key index or another more efficient index to fulfill the query.
If the query includes multiple sub-expressions that reference the fields indexed by the shard key and the secondary index, the mongos can route the queries to a specific shard and the shard will use the index that will allow it to fulfill most efficiently.
Sharded Cluster Security
Use Internal Authentication to enforce intra-cluster security and prevent unauthorized cluster components from accessing the cluster. You must start each mongod or mongos in the cluster with the appropriate security settings in order to enforce internal authentication.
See Deploy Sharded Cluster with Keyfile Access Control for a tutorial on deploying a secured shardedcluster.
Cluster Users
Sharded clusters support Role-Based Access Control (RBAC) for restricting unauthorized access to cluster data and operations. You must start each mongod in the cluster, including the config servers, with the --auth option in order to enforce RBAC. Alternatively, enforcing Internal Authentication for inter-cluster security also enables user access controls via RBAC.
With RBAC enforced, clients must specify a --username, --password, and --authenticationDatabase when connecting to the mongos in order to access cluster resources.
Each cluster has its own cluster users. These users cannot be used to access individual shards.
See Enable Access Control for a tutorial on enabling adding users to an RBAC-enabled MongoDB deployment. 查看全部
错误信息如下:
  File "C:\ProgramData\Anaconda3\lib\site-packages\pymongo\helpers.py", line 155, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Query for sharded findAndModify must contain the shard key
2019-06-10 16:14:32 [scrapy.core.engine] INFO: Closing spider (finished)
2019-06-10 16:14:32 [scrapy.statscollectors] INFO: Dumping Scrapy stats:

需要在查询语句中把分片键也添加进去。
因为findOneModify只会找一个记录,但是到底在哪个分片的记录呢? 因为不确定,所以才需要把shard加上去。
 
 
参考官方:
Targeted Operations vs. Broadcast Operations
Generally, the fastest queries in a sharded environment are those that mongos route to a single shard, using the shard key and the cluster meta data from the config server. These targeted operations use the shard key value to locate the shard or subset of shards that satisfy the query document.
For queries that don’t include the shard key, mongos must query all shards, wait for their responses and then return the result to the application. These “scatter/gather” queries can be long running operations.
Broadcast Operations
mongos instances broadcast queries to all shards for the collection unless the mongos can determine which shard or subset of shards stores this data.

After the mongos receives responses from all shards, it merges the data and returns the result document. The performance of a broadcast operation depends on the overall load of the cluster, as well as variables like network latency, individual shard load, and number of documents returned per shard. Whenever possible, favor operations that result in targeted operation over those that result in a broadcast operation.
Multi-update operations are always broadcast operations.
The updateMany() and deleteMany() methods are broadcast operations, unless the query document specifies the shard key in full.
Targeted Operations
mongos can route queries that include the shard key or the prefix of a compound shard key a specific shard or set of shards. mongos uses the shard key value to locate the chunk whose range includes the shard key value and directs the query at the shard containing that chunk.

For example, if the shard key is:
copy
{ a: 1, b: 1, c: 1 }

The mongos program can route queries that include the full shard key or either of the following shard key prefixes at a specific shard or set of shards:
copy
{ a: 1 }
{ a: 1, b: 1 }

All insertOne() operations target to one shard. Each document in the insertMany() array targets to a single shard, but there is no guarantee all documents in the array insert into a single shard.
All updateOne(), replaceOne() and deleteOne() operations must include the shard key or _id in the query document. MongoDB returns an error if these methods are used without the shard key or _id.
Depending on the distribution of data in the cluster and the selectivity of the query, mongos may still perform a broadcast operation to fulfill these queries.
Index Use
If the query does not include the shard key, the mongos must send the query to all shards as a “scatter/gather” operation. Each shard will, in turn, use either the shard key index or another more efficient index to fulfill the query.
If the query includes multiple sub-expressions that reference the fields indexed by the shard key and the secondary index, the mongos can route the queries to a specific shard and the shard will use the index that will allow it to fulfill most efficiently.
Sharded Cluster Security
Use Internal Authentication to enforce intra-cluster security and prevent unauthorized cluster components from accessing the cluster. You must start each mongod or mongos in the cluster with the appropriate security settings in order to enforce internal authentication.
See Deploy Sharded Cluster with Keyfile Access Control for a tutorial on deploying a secured shardedcluster.
Cluster Users
Sharded clusters support Role-Based Access Control (RBAC) for restricting unauthorized access to cluster data and operations. You must start each mongod in the cluster, including the config servers, with the --auth option in order to enforce RBAC. Alternatively, enforcing Internal Authentication for inter-cluster security also enables user access controls via RBAC.
With RBAC enforced, clients must specify a --username, --password, and --authenticationDatabase when connecting to the mongos in order to access cluster resources.
Each cluster has its own cluster users. These users cannot be used to access individual shards.
See Enable Access Control for a tutorial on enabling adding users to an RBAC-enabled MongoDB deployment.

Warning: unable to run listCollections, attempting to approximate collection

数据库李魔佛 发表了文章 • 0 个评论 • 19717 次浏览 • 2019-06-07 17:35 • 来自相关话题

在mongodb中参数查看数据库中的表是报错:

Warning: unable to run listCollections, attempting to approximate collection names by parsing connectionStatus

那是因为设置了密码,但是没有进行认证导致的错误。这个错误为啥不直接说明原因呢。汗
 
直接: db.auth('admin','密码')
认证成功返回1, 然后重新执行show tables就可以看到所有的表了。 查看全部
在mongodb中参数查看数据库中的表是报错:

Warning: unable to run listCollections, attempting to approximate collection names by parsing connectionStatus

那是因为设置了密码,但是没有进行认证导致的错误。这个错误为啥不直接说明原因呢。汗
 
直接: db.auth('admin','密码')
认证成功返回1, 然后重新执行show tables就可以看到所有的表了。

python连接mongodb集群 cluster

数据库李魔佛 发表了文章 • 0 个评论 • 4831 次浏览 • 2019-06-03 15:55 • 来自相关话题

网上资料比较少,自己测试了下。
连接方法如下:import pymongo
db = pymongo.MongoClient('mongodb://10.18.6.46,10.18.6.26,10.18.6.102')上面默认的端口do都是27017,如果是其他端口,需要这样修改:db = pymongo.MongoClient('mongodb://10.18.6.46:8888,10.18.6.26:9999,10.18.6.102:7777')
然后就可以正常读写数据库:
 
读:coll=db['testdb']['testcollection'].find()
for i in coll:
print(i)输出内容:{'_id': ObjectId('5cf4c7981ee9edff72e5c503'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c504'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c505'), 'username': 'hello'}
{'_id': ObjectId('5cf4c79a1ee9edff72e5c506'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7b21ee9edff72e5c507'), 'username': 'hello world'}



 
写:collection = db['testdb']['testcollection']

for i in range(10):
collection.insert({'username':'huston{}'.format(i)})
 
原创文章,转载请注明出处:
http://30daydo.com/article/494
  查看全部
网上资料比较少,自己测试了下。
连接方法如下:
import pymongo
db = pymongo.MongoClient('mongodb://10.18.6.46,10.18.6.26,10.18.6.102')
上面默认的端口do都是27017,如果是其他端口,需要这样修改:
db = pymongo.MongoClient('mongodb://10.18.6.46:8888,10.18.6.26:9999,10.18.6.102:7777')

然后就可以正常读写数据库:
 
读:
coll=db['testdb']['testcollection'].find()
for i in coll:
print(i)
输出内容:
{'_id': ObjectId('5cf4c7981ee9edff72e5c503'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c504'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c505'), 'username': 'hello'}
{'_id': ObjectId('5cf4c79a1ee9edff72e5c506'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7b21ee9edff72e5c507'), 'username': 'hello world'}



 
写:
collection = db['testdb']['testcollection']

for i in range(10):
collection.insert({'username':'huston{}'.format(i)})

 
原创文章,转载请注明出处:
http://30daydo.com/article/494
 

【python】pymongo find_one_and_update的用法

python爬虫李魔佛 发表了文章 • 0 个评论 • 14528 次浏览 • 2019-04-04 11:31 • 来自相关话题

原生的mongo语句是这样的:db.collection.findOneAndUpdate(
<filter>,
<update>,
{
projection: <document>,
sort: <document>,
maxTimeMS: <number>,
upsert: <boolean>,
returnNewDocument: <boolean>,
collation: <document>,
arrayFilters: [ <filterdocument1>, ... ]
}
)
转换成python pymongo是这样的:>>> db.example.find_one_and_update(
... {'_id': 'userid'},
... {'$inc': {'seq': 1}},
... projection={'seq': True, '_id': False},
... return_document=ReturnDocument.AFTER)
上面的语句的意思是:
找到_id 为userid的值得文档,然后把该文档中的seq的值+1,然后返回seq的数据,不显示_id列
最后返回的数据是这样的:

{'seq': 2}
 
注意
findOneAndUpdate
是获取mongo文档中第一条满足条件的数据并做修改。该函数是线程安全的。意思就是在多个线程中操作,不会对同一条数据进行获取修改。
也就是该操作是原子操作。
 
ReturnDocument 引用的库
 
class pymongo.collection.ReturnDocument
 
在开头 from pymongo.collection import ReturnDocument
 
原创文章
转载请注明出处:
http://30daydo.com/article/445 查看全部
原生的mongo语句是这样的:
db.collection.findOneAndUpdate(
<filter>,
<update>,
{
projection: <document>,
sort: <document>,
maxTimeMS: <number>,
upsert: <boolean>,
returnNewDocument: <boolean>,
collation: <document>,
arrayFilters: [ <filterdocument1>, ... ]
}
)

转换成python pymongo是这样的:
>>> db.example.find_one_and_update(
... {'_id': 'userid'},
... {'$inc': {'seq': 1}},
... projection={'seq': True, '_id': False},
... return_document=ReturnDocument.AFTER)

上面的语句的意思是:
找到_id 为userid的值得文档,然后把该文档中的seq的值+1,然后返回seq的数据,不显示_id列
最后返回的数据是这样的:

{'seq': 2}
 
注意
findOneAndUpdate
是获取mongo文档中第一条满足条件的数据并做修改。该函数是线程安全的。意思就是在多个线程中操作,不会对同一条数据进行获取修改。
也就是该操作是原子操作。
 
ReturnDocument 引用的库
 
class pymongo.collection.ReturnDocument
 
在开头 from pymongo.collection import ReturnDocument
 
原创文章
转载请注明出处:
http://30daydo.com/article/445

python 代码获取mongodb数据库下所有的collection 文档名字

python李魔佛 发表了文章 • 0 个评论 • 5606 次浏览 • 2018-11-27 11:41 • 来自相关话题

获取一个数据库下所有的collection 文档db['db_pledge'].collection_names()
db['db_pledge'].list_collection_names()
获取一个数据库下所有的collection 文档
db['db_pledge'].collection_names()
db['db_pledge'].list_collection_names()

python爬虫集思录所有用户的帖子 scrapy写入mongodb数据库

python爬虫李魔佛 发表了文章 • 0 个评论 • 6660 次浏览 • 2018-09-02 21:52 • 来自相关话题

好久没更新了,把之前做的一些爬虫分享一下。不然都没有用户来了。-. -
 
项目采用scrapy的框架,数据写入到mongodb的数据库。 整个站点爬下来大概用了半小时,数据有12w条。
 
项目中的主要代码如下:
 
主spider# -*- coding: utf-8 -*-
import re
import scrapy
from scrapy import Request, FormRequest
from jsl.items import JslItem
from jsl import config
import logging

class AllcontentSpider(scrapy.Spider):
name = 'allcontent'

headers = {
'Host': 'www.jisilu.cn', 'Connection': 'keep-alive', 'Pragma': 'no-cache',
'Cache-Control': 'no-cache', 'Accept': 'application/json,text/javascript,*/*;q=0.01',
'Origin': 'https://www.jisilu.cn', 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Referer': 'https://www.jisilu.cn/login/',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8'
}

def start_requests(self):
login_url = 'https://www.jisilu.cn/login/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate,br', 'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8',
'Cache-Control': 'no-cache', 'Connection': 'keep-alive',
'Host': 'www.jisilu.cn', 'Pragma': 'no-cache', 'Referer': 'https://www.jisilu.cn/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36'}

yield Request(url=login_url, headers=headers, callback=self.login,dont_filter=True)

def login(self, response):
url = 'https://www.jisilu.cn/account/ajax/login_process/'
data = {
'return_url': 'https://www.jisilu.cn/',
'user_name': config.username,
'password': config.password,
'net_auto_login': '1',
'_post_type': 'ajax',
}

yield FormRequest(
url=url,
headers=self.headers,
formdata=data,
callback=self.parse,
dont_filter=True
)

def parse(self, response):
for i in range(1,3726):
focus_url = 'https://www.jisilu.cn/home/explore/sort_type-new__day-0__page-{}'.format(i)
yield Request(url=focus_url, headers=self.headers, callback=self.parse_page,dont_filter=True)

def parse_page(self, response):
nodes = response.xpath('//div[@class="aw-question-list"]/div')
for node in nodes:
each_url=node.xpath('.//h4/a/@href').extract_first()
yield Request(url=each_url,headers=self.headers,callback=self.parse_item,dont_filter=True)

def parse_item(self,response):
item = JslItem()
title = response.xpath('//div[@class="aw-mod-head"]/h1/text()').extract_first()
s = response.xpath('//div[@class="aw-question-detail-txt markitup-box"]').xpath('string(.)').extract_first()
ret = re.findall('(.*?)\.donate_user_avatar', s, re.S)

try:
content = ret[0].strip()
except:
content = None

createTime = response.xpath('//div[@class="aw-question-detail-meta"]/span/text()').extract_first()

resp_no = response.xpath('//div[@class="aw-mod aw-question-detail-box"]//ul/h2/text()').re_first('\d+')

url = response.url
item['title'] = title.strip()
item['content'] = content
try:
item['resp_no']=int(resp_no)
except Exception as e:
logging.warning('e')
item['resp_no']=None

item['createTime'] = createTime
item['url'] = url.strip()
resp =
for index,reply in enumerate(response.xpath('//div[@class="aw-mod-body aw-dynamic-topic"]/div[@class="aw-item"]')):
replay_user = reply.xpath('.//div[@class="pull-left aw-dynamic-topic-content"]//p/a/text()').extract_first()
rep_content = reply.xpath(
'.//div[@class="pull-left aw-dynamic-topic-content"]//div[@class="markitup-box"]/text()').extract_first()
# print rep_content
agree=reply.xpath('.//em[@class="aw-border-radius-5 aw-vote-count pull-left"]/text()').extract_first()
resp.append({replay_user.strip()+'_{}'.format(index): [int(agree),rep_content.strip()]})

item['resp'] = resp
yield item




login函数是模拟登录集思录,通过抓包就可以知道一些上传的data。
然后就是分页去抓取。逻辑很简单。
 
然后pipeline里面写入mongodb。import pymongo
from collections import OrderedDict
class JslPipeline(object):
def __init__(self):
self.db = pymongo.MongoClient(host='10.18.6.1',port=27017)
# self.user = u'neo牛3' # 修改为指定的用户名 如 毛之川 ,然后找到用户的id,在用户也的源码哪里可以找到 比如持有封基是8132
self.collection = self.db['db_parker']['jsl']
def process_item(self, item, spider):
self.collection.insert(OrderedDict(item))
return item
抓取到的数据入库mongodb:





 点击查看大图

原创文章
转载请注明出处:http://30daydo.com/publish/article/351
 
  查看全部
好久没更新了,把之前做的一些爬虫分享一下。不然都没有用户来了。-. -
 
项目采用scrapy的框架,数据写入到mongodb的数据库。 整个站点爬下来大概用了半小时,数据有12w条。
 
项目中的主要代码如下:
 
主spider
# -*- coding: utf-8 -*-
import re
import scrapy
from scrapy import Request, FormRequest
from jsl.items import JslItem
from jsl import config
import logging

class AllcontentSpider(scrapy.Spider):
name = 'allcontent'

headers = {
'Host': 'www.jisilu.cn', 'Connection': 'keep-alive', 'Pragma': 'no-cache',
'Cache-Control': 'no-cache', 'Accept': 'application/json,text/javascript,*/*;q=0.01',
'Origin': 'https://www.jisilu.cn', 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Referer': 'https://www.jisilu.cn/login/',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8'
}

def start_requests(self):
login_url = 'https://www.jisilu.cn/login/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate,br', 'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8',
'Cache-Control': 'no-cache', 'Connection': 'keep-alive',
'Host': 'www.jisilu.cn', 'Pragma': 'no-cache', 'Referer': 'https://www.jisilu.cn/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36'}

yield Request(url=login_url, headers=headers, callback=self.login,dont_filter=True)

def login(self, response):
url = 'https://www.jisilu.cn/account/ajax/login_process/'
data = {
'return_url': 'https://www.jisilu.cn/',
'user_name': config.username,
'password': config.password,
'net_auto_login': '1',
'_post_type': 'ajax',
}

yield FormRequest(
url=url,
headers=self.headers,
formdata=data,
callback=self.parse,
dont_filter=True
)

def parse(self, response):
for i in range(1,3726):
focus_url = 'https://www.jisilu.cn/home/explore/sort_type-new__day-0__page-{}'.format(i)
yield Request(url=focus_url, headers=self.headers, callback=self.parse_page,dont_filter=True)

def parse_page(self, response):
nodes = response.xpath('//div[@class="aw-question-list"]/div')
for node in nodes:
each_url=node.xpath('.//h4/a/@href').extract_first()
yield Request(url=each_url,headers=self.headers,callback=self.parse_item,dont_filter=True)

def parse_item(self,response):
item = JslItem()
title = response.xpath('//div[@class="aw-mod-head"]/h1/text()').extract_first()
s = response.xpath('//div[@class="aw-question-detail-txt markitup-box"]').xpath('string(.)').extract_first()
ret = re.findall('(.*?)\.donate_user_avatar', s, re.S)

try:
content = ret[0].strip()
except:
content = None

createTime = response.xpath('//div[@class="aw-question-detail-meta"]/span/text()').extract_first()

resp_no = response.xpath('//div[@class="aw-mod aw-question-detail-box"]//ul/h2/text()').re_first('\d+')

url = response.url
item['title'] = title.strip()
item['content'] = content
try:
item['resp_no']=int(resp_no)
except Exception as e:
logging.warning('e')
item['resp_no']=None

item['createTime'] = createTime
item['url'] = url.strip()
resp =
for index,reply in enumerate(response.xpath('//div[@class="aw-mod-body aw-dynamic-topic"]/div[@class="aw-item"]')):
replay_user = reply.xpath('.//div[@class="pull-left aw-dynamic-topic-content"]//p/a/text()').extract_first()
rep_content = reply.xpath(
'.//div[@class="pull-left aw-dynamic-topic-content"]//div[@class="markitup-box"]/text()').extract_first()
# print rep_content
agree=reply.xpath('.//em[@class="aw-border-radius-5 aw-vote-count pull-left"]/text()').extract_first()
resp.append({replay_user.strip()+'_{}'.format(index): [int(agree),rep_content.strip()]})

item['resp'] = resp
yield item




login函数是模拟登录集思录,通过抓包就可以知道一些上传的data。
然后就是分页去抓取。逻辑很简单。
 
然后pipeline里面写入mongodb。
import pymongo
from collections import OrderedDict
class JslPipeline(object):
def __init__(self):
self.db = pymongo.MongoClient(host='10.18.6.1',port=27017)
# self.user = u'neo牛3' # 修改为指定的用户名 如 毛之川 ,然后找到用户的id,在用户也的源码哪里可以找到 比如持有封基是8132
self.collection = self.db['db_parker']['jsl']
def process_item(self, item, spider):
self.collection.insert(OrderedDict(item))
return item

抓取到的数据入库mongodb:

记实录.PNG

 点击查看大图

原创文章
转载请注明出处:http://30daydo.com/publish/article/351
 
 

docker里运行mongodb,保存的数据在外部使用mongoexport不能导出:提示错误Unrecognized field 'snapshot'

python李魔佛 发表了文章 • 0 个评论 • 10747 次浏览 • 2018-08-31 14:21 • 来自相关话题

## 2019-03-19更新 问题已解决
 很无语。 目前还找不到原因。
 
docker里面运行的mongodb, mongodb的数据挂载到宿主机。 开放了27017端口。
在windows下使用mongoexport工具导出数据:
 
错误信息:C:\Program Files\MongoDB\Server\3.4\bin>mongoexport.exe /h 10.18.6.102 /d stock
/c company /o company.json /type json
2018-08-31T14:13:47.841+0800 connected to: 10.18.6.102
2018-08-31T14:13:47.854+0800 Failed: Failed to parse: { find: "company", filt
er: {}, sort: {}, skip: 0, snapshot: true, $readPreference: { mode: "secondaryPr
eferred" }, $db: "stock" }. Unrecognized field 'snapshot'.

C:\Program Files\MongoDB\Server\3.4\bin> 
目前这个问题已经解决:
需要进去docker容器里面,然后在容器里面操作,把数据导出来到挂载的目录下,然后可以直接获取到数据了。 查看全部
## 2019-03-19更新 问题已解决
 很无语。 目前还找不到原因。
 
docker里面运行的mongodb, mongodb的数据挂载到宿主机。 开放了27017端口。
在windows下使用mongoexport工具导出数据:
 
错误信息:
C:\Program Files\MongoDB\Server\3.4\bin>mongoexport.exe /h 10.18.6.102 /d stock
/c company /o company.json /type json
2018-08-31T14:13:47.841+0800 connected to: 10.18.6.102
2018-08-31T14:13:47.854+0800 Failed: Failed to parse: { find: "company", filt
er: {}, sort: {}, skip: 0, snapshot: true, $readPreference: { mode: "secondaryPr
eferred" }, $db: "stock" }. Unrecognized field 'snapshot'.

C:\Program Files\MongoDB\Server\3.4\bin>
 
目前这个问题已经解决:
需要进去docker容器里面,然后在容器里面操作,把数据导出来到挂载的目录下,然后可以直接获取到数据了。

python mongodb大数据(>3GB)转移Mysql数据库

python李魔佛 发表了文章 • 0 个评论 • 5332 次浏览 • 2018-08-20 15:44 • 来自相关话题

数据约为5GB左右,如果直接用for i in doc.find({})进行逐行遍历的话,游标就会超时,而且越到后面速度越慢.
 
 于是使用了分段遍历的方法.# -*-coding=utf-8-*-
import pandas as pd
import json
import pymongo
from sqlalchemy import create_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('xxx')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://xxx:xxx@xxx:/xxx?charset=utf8')


def classic_method():
temp =
start = 0
# 数据太大还是会爆内存,或者游标丢失
for i in doc.find().batch_size(500):
start += 1
del i['_id']
temp.append(i)
print(start)

print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid', drop=True)
df.to_sql('meituan', con=engine, if_exists='replace')
print('done')


def chunksize_move():
block = 10000
total = doc.find({}).count()
iter_number = total // block

for i in range(iter_number + 1):
small_part = doc.find({}).limit(block).skip(i * block)
list_data =

for item in small_part:
del item['_id']
del item['crawl_time']
item['poiid'] = int(item['poiid'])
for k, v in item.items():
if isinstance(v, dict) or isinstance(v, list):

item[k] = json.dumps(v, ensure_ascii=False)

list_data.append(item)

df = pd.DataFrame(list_data)
df = df.set_index('poiid', drop=True)

try:
df.to_sql('meituan', con=engine, if_exists='append')
print('to sql {}'.format(i))
except Exception as e:
print(e)

chunksize_move()

 





速度比一次批量的要快不少. 查看全部
数据约为5GB左右,如果直接用
for i in doc.find({})
进行逐行遍历的话,游标就会超时,而且越到后面速度越慢.
 
 于是使用了分段遍历的方法.
# -*-coding=utf-8-*-
import pandas as pd
import json
import pymongo
from sqlalchemy import create_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('xxx')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://xxx:xxx@xxx:/xxx?charset=utf8')


def classic_method():
temp =
start = 0
# 数据太大还是会爆内存,或者游标丢失
for i in doc.find().batch_size(500):
start += 1
del i['_id']
temp.append(i)
print(start)

print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid', drop=True)
df.to_sql('meituan', con=engine, if_exists='replace')
print('done')


def chunksize_move():
block = 10000
total = doc.find({}).count()
iter_number = total // block

for i in range(iter_number + 1):
small_part = doc.find({}).limit(block).skip(i * block)
list_data =

for item in small_part:
del item['_id']
del item['crawl_time']
item['poiid'] = int(item['poiid'])
for k, v in item.items():
if isinstance(v, dict) or isinstance(v, list):

item[k] = json.dumps(v, ensure_ascii=False)

list_data.append(item)

df = pd.DataFrame(list_data)
df = df.set_index('poiid', drop=True)

try:
df.to_sql('meituan', con=engine, if_exists='append')
print('to sql {}'.format(i))
except Exception as e:
print(e)

chunksize_move()

 

to_sql.PNG

速度比一次批量的要快不少.

python 把mongodb的数据迁移到mysql

python李魔佛 发表了文章 • 0 个评论 • 4842 次浏览 • 2018-08-20 11:02 • 来自相关话题

代码如下: 很简短.
import pymongo
from setting import get_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('10.18.6.101')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://localhost:1234@10.18.4.211/spider?charset=utf8')
temp=[]

for i in doc.find({}):
del i['_id']
temp.append(i)
print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid',drop=True)
df.to_sql('meituan',con=engine,if_exists='replace')
print('done')





 
居然CPU飙到了90%
  查看全部
代码如下: 很简短.
import pymongo
from setting import get_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('10.18.6.101')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://localhost:1234@10.18.4.211/spider?charset=utf8')
temp=[]

for i in doc.find({}):
del i['_id']
temp.append(i)
print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid',drop=True)
df.to_sql('meituan',con=engine,if_exists='replace')
print('done')


cpu.PNG

 
居然CPU飙到了90%
 

mongodb sort: Executor error during find command: OperationFailed: Sort operation used more than

网络李魔佛 发表了文章 • 0 个评论 • 7903 次浏览 • 2018-07-09 10:31 • 来自相关话题

mongodb 排序出现内存溢出:
 Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}
使用limit函数限制其输出就可以了:
 
db.getCollection('老布').find({}).sort({'created_at':-1}).limit(1000) 查看全部
mongodb 排序出现内存溢出:
 
Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}

使用limit函数限制其输出就可以了:
 
db.getCollection('老布').find({}).sort({'created_at':-1}).limit(1000)

mongo服务器因为mongod.lock 被锁定无法正常运行

网络安全李魔佛 发表了文章 • 0 个评论 • 8494 次浏览 • 2018-05-20 18:33 • 来自相关话题

看log文件:
Sun May 20 18:26:04.630 [initandlisten] MongoDB starting : pid=2343 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 20 18:26:04.631 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 20 18:26:04.631 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] db version v2.4.10
Sun May 20 18:26:04.632 [initandlisten] git version: nogitversion
Sun May 20 18:26:04.632 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 20 18:26:04.632 [initandlisten] allocator: system
Sun May 20 18:26:04.632 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 20 18:26:05.956 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 20 18:26:05.957 [initandlisten] recover : no journal files present, no recovery needed
Sun May 20 18:26:06.023 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 20 18:26:06.023 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x7660e294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x7660e294]
Sun May 20 18:26:06.035 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 20 18:26:06.035 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 20 18:26:06.036 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 20 18:26:06.036 dbexit:
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close listening sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to flush diaglog...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 20 18:26:06.036 [initandlisten] shutdown: lock for final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: closing all files...
Sun May 20 18:26:06.037 [initandlisten] closeAllFiles() finished
Sun May 20 18:26:06.037 [initandlisten] journalCleanup...
Sun May 20 18:26:06.037 [initandlisten] removeJournalFiles
Sun May 20 18:26:06.050 [initandlisten] shutdown: removing fs lock...
Sun May 20 18:26:06.050 dbexit: really exiting now

应该是之前没有正常被关闭,导致文件本锁住了。
 
解决办法:
 
1. 先把数据文件备份, --dbpath 的路径整个备份一下, 不然接下来的操作误操作了数据就丢失了
 
2. 运行修复命令:

mongod --dbpath /data/db --repair
 
替换上面的db为您自己的本地路径 查看全部
看log文件:
Sun May 20 18:26:04.630 [initandlisten] MongoDB starting : pid=2343 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 20 18:26:04.631 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 20 18:26:04.631 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] db version v2.4.10
Sun May 20 18:26:04.632 [initandlisten] git version: nogitversion
Sun May 20 18:26:04.632 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 20 18:26:04.632 [initandlisten] allocator: system
Sun May 20 18:26:04.632 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 20 18:26:05.956 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 20 18:26:05.957 [initandlisten] recover : no journal files present, no recovery needed
Sun May 20 18:26:06.023 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 20 18:26:06.023 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x7660e294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x7660e294]
Sun May 20 18:26:06.035 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 20 18:26:06.035 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 20 18:26:06.036 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 20 18:26:06.036 dbexit:
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close listening sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to flush diaglog...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 20 18:26:06.036 [initandlisten] shutdown: lock for final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: closing all files...
Sun May 20 18:26:06.037 [initandlisten] closeAllFiles() finished
Sun May 20 18:26:06.037 [initandlisten] journalCleanup...
Sun May 20 18:26:06.037 [initandlisten] removeJournalFiles
Sun May 20 18:26:06.050 [initandlisten] shutdown: removing fs lock...
Sun May 20 18:26:06.050 dbexit: really exiting now

应该是之前没有正常被关闭,导致文件本锁住了。
 
解决办法:
 
1. 先把数据文件备份, --dbpath 的路径整个备份一下, 不然接下来的操作误操作了数据就丢失了
 
2. 运行修复命令:

mongod --dbpath /data/db --repair
 
替换上面的db为您自己的本地路径

mongo服务启动失败: ERROR: mmap private failed with out of memory

树莓派李魔佛 发表了文章 • 0 个评论 • 4755 次浏览 • 2018-05-13 12:23 • 来自相关话题

平时在树莓派上开机自动执行以下命令,启动mongo服务
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --journal --logpath /home/pi/mongo/log.txt
突然今天发现mongo的服务连不上,看log发现mongo在启动后马上关闭了,提示的错误是在加载一个大的数据文件的时候提示内存不足(坑爹的,树莓派自身内存才1GB,无法扩容)。 
 
错误日志:
Sun May 13 12:08:11.185 [initandlisten] MongoDB starting : pid=1929 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 13 12:08:11.186 [initandlisten]
Sun May 13 12:08:11.186 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 13 12:08:11.186 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 13 12:08:11.186 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 13 12:08:11.187 [initandlisten]
Sun May 13 12:08:11.187 [initandlisten] db version v2.4.10
Sun May 13 12:08:11.187 [initandlisten] git version: nogitversion
Sun May 13 12:08:11.187 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 13 12:08:11.187 [initandlisten] allocator: system
Sun May 13 12:08:11.187 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", fork: true, journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 13 12:08:11.198 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 13 12:08:11.198 [initandlisten] recover : no journal files present, no recovery needed
Sun May 13 12:08:11.238 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 13 12:08:11.239 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x76556294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x76556294]
Sun May 13 12:08:11.250 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 13 12:08:11.251 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 13 12:08:11.251 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 13 12:08:11.251 dbexit:
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close listening sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to flush diaglog...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 13 12:08:11.252 [initandlisten] shutdown: lock for final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: closing all files...
Sun May 13 12:08:11.252 [initandlisten] closeAllFiles() finished
Sun May 13 12:08:11.252 [initandlisten] journalCleanup...
Sun May 13 12:08:11.253 [initandlisten] removeJournalFiles
Sun May 13 12:08:11.263 [initandlisten] shutdown: removing fs lock...
Sun May 13 12:08:11.264 dbexit: really exiting now
看了下mongod的用法,尝试把参数 --journal,去掉,重新运行,然后就可以了。
 
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --logpath /home/pi/mongo/log.txt 查看全部
平时在树莓派上开机自动执行以下命令,启动mongo服务
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --journal --logpath /home/pi/mongo/log.txt

突然今天发现mongo的服务连不上,看log发现mongo在启动后马上关闭了,提示的错误是在加载一个大的数据文件的时候提示内存不足(坑爹的,树莓派自身内存才1GB,无法扩容)。 
 
错误日志:
Sun May 13 12:08:11.185 [initandlisten] MongoDB starting : pid=1929 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 13 12:08:11.186 [initandlisten]
Sun May 13 12:08:11.186 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 13 12:08:11.186 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 13 12:08:11.186 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 13 12:08:11.187 [initandlisten]
Sun May 13 12:08:11.187 [initandlisten] db version v2.4.10
Sun May 13 12:08:11.187 [initandlisten] git version: nogitversion
Sun May 13 12:08:11.187 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 13 12:08:11.187 [initandlisten] allocator: system
Sun May 13 12:08:11.187 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", fork: true, journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 13 12:08:11.198 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 13 12:08:11.198 [initandlisten] recover : no journal files present, no recovery needed
Sun May 13 12:08:11.238 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 13 12:08:11.239 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x76556294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x76556294]
Sun May 13 12:08:11.250 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 13 12:08:11.251 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 13 12:08:11.251 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 13 12:08:11.251 dbexit:
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close listening sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to flush diaglog...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 13 12:08:11.252 [initandlisten] shutdown: lock for final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: closing all files...
Sun May 13 12:08:11.252 [initandlisten] closeAllFiles() finished
Sun May 13 12:08:11.252 [initandlisten] journalCleanup...
Sun May 13 12:08:11.253 [initandlisten] removeJournalFiles
Sun May 13 12:08:11.263 [initandlisten] shutdown: removing fs lock...
Sun May 13 12:08:11.264 dbexit: really exiting now

看了下mongod的用法,尝试把参数 --journal,去掉,重新运行,然后就可以了。
 
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --logpath /home/pi/mongo/log.txt

树莓派安装mongodb服务器

网络李魔佛 发表了文章 • 0 个评论 • 7986 次浏览 • 2017-12-18 16:57 • 来自相关话题

树莓派的自带的源就自带了mongodb-server的安装包,所以只需要使用命令:sudo apt-get install mongodb-server 就可以安装了。
 
不过安装的是32bit 的mongodb,数据库的大小会被限制在2GB。
树莓派启动mongodb
修改/etc/mongodb.config,
把里面的bind=127.0.0.1 注释掉,前面加一个#即可,因为这样其他主机也可以访问这台树莓派的mongodb服务器。
修改dbpath和dblog的路径,因为默认的路径你需要root权限

然后运行 mongod --config /etc/mongodb.config , 然后远程使用mongo ip地址就可以远程连接了。 查看全部
树莓派的自带的源就自带了mongodb-server的安装包,所以只需要使用命令:sudo apt-get install mongodb-server 就可以安装了。
 
不过安装的是32bit 的mongodb,数据库的大小会被限制在2GB。
树莓派启动mongodb
修改/etc/mongodb.config,
把里面的bind=127.0.0.1 注释掉,前面加一个#即可,因为这样其他主机也可以访问这台树莓派的mongodb服务器。
修改dbpath和dblog的路径,因为默认的路径你需要root权限

然后运行 mongod --config /etc/mongodb.config , 然后远程使用mongo ip地址就可以远程连接了。

使用官网下载的mongodb,如何设置远程连接mongodb服务器

网络安全李魔佛 发表了文章 • 0 个评论 • 5608 次浏览 • 2017-12-17 23:10 • 来自相关话题

在linux到官网下载mongodb,选择64位版本。
解压后在mongo的bin目录下运行 mongod --dbpath ~/mongo/db
可以看到mongodb服务被正常启动了。
 
在局域网其他电脑上使用mongodb客户端尝试连接这个mongo服务器,发现无法连接上。
因为官网下载的mongo问价解压后并没有mongo.conf配置文件。
 
在本机运行命令: mongo
可以看到输出:
 Server has startup warnings:
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten]
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** WARNING: Using the XFS filesystem is strongly recommended with the WiredTiger storage engine
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** See http://dochub.mongodb.org/core ... ystem
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: Access control is not enabled for the database.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Read and write access to data and configuration is unrestricted.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: This server is bound to localhost.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Remote systems will be unable to connect to this server.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Start the server with --bind_ip <address> to specify which IP
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** addresses it should serve responses from, or with --bind_ip_all to
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** bind to all interfaces. If this behavior is desired, start the
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** server with --bind_ip 127.0.0.1 to disable this warning.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** WARNING: /sys/kernel/mm/transparent_hugepage/defrag is 'always'.
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** We suggest setting it to 'never'
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
从上面的信息可以看到,如果需要远程的机子连接到本机,需要添加一个选项: --bind_ip_all
运行下面命令后:
mongod --dbpath ~/mongo/db --bind_ip_all
 
远程的机子就能够连上mongo服务器了。
 
原文地址:http://30daydo.com/article/247
转载请注明出处 查看全部
在linux到官网下载mongodb,选择64位版本。
解压后在mongo的bin目录下运行 mongod --dbpath ~/mongo/db
可以看到mongodb服务被正常启动了。
 
在局域网其他电脑上使用mongodb客户端尝试连接这个mongo服务器,发现无法连接上。
因为官网下载的mongo问价解压后并没有mongo.conf配置文件。
 
在本机运行命令: mongo
可以看到输出:
 
Server has startup warnings: 
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten]
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** WARNING: Using the XFS filesystem is strongly recommended with the WiredTiger storage engine
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** See http://dochub.mongodb.org/core ... ystem
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: Access control is not enabled for the database.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Read and write access to data and configuration is unrestricted.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: This server is bound to localhost.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Remote systems will be unable to connect to this server.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Start the server with --bind_ip <address> to specify which IP
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** addresses it should serve responses from, or with --bind_ip_all to
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** bind to all interfaces. If this behavior is desired, start the
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** server with --bind_ip 127.0.0.1 to disable this warning.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** WARNING: /sys/kernel/mm/transparent_hugepage/defrag is 'always'.
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** We suggest setting it to 'never'
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]

从上面的信息可以看到,如果需要远程的机子连接到本机,需要添加一个选项: --bind_ip_all
运行下面命令后:
mongod --dbpath ~/mongo/db --bind_ip_all
 
远程的机子就能够连上mongo服务器了。
 
原文地址:http://30daydo.com/article/247
转载请注明出处

mongodb中$sum:1 后面的1是什么意思

python李魔佛 发表了文章 • 0 个评论 • 10685 次浏览 • 2017-09-05 23:32 • 来自相关话题

源数据:
{
"_id" : "GuqXmAkkARqhBDqhy",
"beatmapset_id" : "342537",
"version" : "MX",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "3.5552737712860107"
}
{
"_id" : "oHLT7KqsB7bztBGvu",
"beatmapset_id" : "342537",
"version" : "HD",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "2.7515676021575928"
}
{
"_id" : "GbotZfrPEwW69FkGD",
"beatmapset_id" : "342537",
"version" : "NM",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "0"
}
 然后运行以下的命令:
 
db.getCollection('dup_case').aggregate(
[
{$group:{
_id:{diff_approach:'$diff_approach'},
count:{$sum:2}
}},
{$match:{count:{$gt:1}}}
]

 
返回的count是6
 
所以
$sum:1 的含义:
如果前面的情况出现一次,就加1, 如果后面$sum:2 那么每次前面条件满足一次就加2
  查看全部
源数据:
{
"_id" : "GuqXmAkkARqhBDqhy",
"beatmapset_id" : "342537",
"version" : "MX",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "3.5552737712860107"
}
{
"_id" : "oHLT7KqsB7bztBGvu",
"beatmapset_id" : "342537",
"version" : "HD",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "2.7515676021575928"
}
{
"_id" : "GbotZfrPEwW69FkGD",
"beatmapset_id" : "342537",
"version" : "NM",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "0"
}

 然后运行以下的命令:
 
db.getCollection('dup_case').aggregate(
[
{$group:{
_id:{diff_approach:'$diff_approach'},
count:{$sum:2}
}},
{$match:{count:{$gt:1}}}
]
)
 
 
返回的count是6
 
所以
$sum:1 的含义:
如果前面的情况出现一次,就加1, 如果后面$sum:2 那么每次前面条件满足一次就加2
 

mongodb motor 异步操作比同步操作的时间要慢?

回复

数据库量化投机者 回复了问题 • 2 人关注 • 1 个回复 • 5418 次浏览 • 2019-08-03 09:01 • 来自相关话题

mongodb python同步两个数据库数据

python李魔佛 发表了文章 • 0 个评论 • 2143 次浏览 • 2022-04-07 02:44 • 来自相关话题

有时候需要做一些迁移工作,需要对mongodb进行迁移。默认的工具貌似也十分好用的。缺少像Navicat 之于mysql的这样神级的软件。
 
所以自己动手写代码完成:
 
# -*- coding: utf-8 -*-
# @Time : 2022/4/6 4:41
# @File : database_migrate.py
# @Author : Rocky C@www.30daydo.com
import time
from loguru import logger
import pymongo

ignore_db = ['admin', 'config', 'local',
] # 忽略更新的库

ignore_col = [('db_stock','dfcf_list_full')]

logger.add('mongo.log')

# 数据库同步
def get_client(user, password, host, port):
connect_uri = f'mongodb://{user}:{password}@{host}:{port}'
client = pymongo.MongoClient(connect_uri)
return client


def origin():
return get_client('admin', 'password', '127.0.0.1', '27017')


def target():
return get_client('root', 'password', '127.0.0.1', '27017')


def transfer():
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)
for db in dbs:
for col in get_collection_name(origin_client, db):

if (db,col) in ignore_col:
continue
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
# time.sleep(0.5)

def get_item(client, db_name, col):
return client[db_name][col].find()



def insert_item(client, db_name, col, data):
batch = 1000
count = len(data)//batch + 1
for i in range(count):
item = data[i*batch:(i+1)*batch]

if len(item)==0:
continue

try:
client[db_name][col].insert_many(item)
except Exception as e:
logger.error(e)
logger.error(f'{db_name} {col} 插入出错')


def get_db_name(client):
db_name = client.list_database_names()
dbs = []
for db in db_name:
if db not in ignore_db:
dbs.append(db)
return dbs

def delete_col(client,db,col):
try:
client[db][col].delete_many({})
except Exception as e:
logger.error(e)
logger.error(db)
logger.error(col)
return False
else:
return True

def server_compare():
'''
比较2个数据库是否相同,只是单纯比较条数
'''
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)

for db in dbs:
for col in get_collection_name(origin_client, db):
origin_count = origin_client[db][col].count_documents({})
target_count = target_client[db][col].count_documents({})
if origin_count!=target_count:
logger.info(f'collection {db} {col}有区别')
#
if delete_col(target_client,db,col):
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
time.sleep(1)




def get_collection_name(client, db_name):
collection_names = client[db_name].list_collection_names(session=None)
return collection_names

def main():
server_compare()

if __name__ == '__main__':
main()

 原理就是不断迭代,不同的数据库,里面的不同的collection。
对于同名collection,通过条数是否一致,来决定是否要把原数据复制过来。 
 
保存上面文件为main.py
 
执行 python main.py
 
就可以进行数据同步工作啦。 查看全部
有时候需要做一些迁移工作,需要对mongodb进行迁移。默认的工具貌似也十分好用的。缺少像Navicat 之于mysql的这样神级的软件。
 
所以自己动手写代码完成:
 
# -*- coding: utf-8 -*-
# @Time : 2022/4/6 4:41
# @File : database_migrate.py
# @Author : Rocky C@www.30daydo.com
import time
from loguru import logger
import pymongo

ignore_db = ['admin', 'config', 'local',
] # 忽略更新的库

ignore_col = [('db_stock','dfcf_list_full')]

logger.add('mongo.log')

# 数据库同步
def get_client(user, password, host, port):
connect_uri = f'mongodb://{user}:{password}@{host}:{port}'
client = pymongo.MongoClient(connect_uri)
return client


def origin():
return get_client('admin', 'password', '127.0.0.1', '27017')


def target():
return get_client('root', 'password', '127.0.0.1', '27017')


def transfer():
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)
for db in dbs:
for col in get_collection_name(origin_client, db):

if (db,col) in ignore_col:
continue
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
# time.sleep(0.5)

def get_item(client, db_name, col):
return client[db_name][col].find()



def insert_item(client, db_name, col, data):
batch = 1000
count = len(data)//batch + 1
for i in range(count):
item = data[i*batch:(i+1)*batch]

if len(item)==0:
continue

try:
client[db_name][col].insert_many(item)
except Exception as e:
logger.error(e)
logger.error(f'{db_name} {col} 插入出错')


def get_db_name(client):
db_name = client.list_database_names()
dbs = []
for db in db_name:
if db not in ignore_db:
dbs.append(db)
return dbs

def delete_col(client,db,col):
try:
client[db][col].delete_many({})
except Exception as e:
logger.error(e)
logger.error(db)
logger.error(col)
return False
else:
return True

def server_compare():
'''
比较2个数据库是否相同,只是单纯比较条数
'''
origin_client = origin()
target_client = target()
dbs = get_db_name(origin_client)

for db in dbs:
for col in get_collection_name(origin_client, db):
origin_count = origin_client[db][col].count_documents({})
target_count = target_client[db][col].count_documents({})
if origin_count!=target_count:
logger.info(f'collection {db} {col}有区别')
#
if delete_col(target_client,db,col):
items = []
logger.info(f'正在更新{db} {col}')
for i in get_item(origin_client, db, col):
items.append(i)

insert_item(target_client, db, col, items)
logger.info(f'更新数据库 {db} {col}')
time.sleep(1)




def get_collection_name(client, db_name):
collection_names = client[db_name].list_collection_names(session=None)
return collection_names

def main():
server_compare()

if __name__ == '__main__':
main()

 原理就是不断迭代,不同的数据库,里面的不同的collection。
对于同名collection,通过条数是否一致,来决定是否要把原数据复制过来。 
 
保存上面文件为main.py
 
执行 python main.py
 
就可以进行数据同步工作啦。

mongodb 判断列表字段不为空

数据库李魔佛 发表了文章 • 0 个评论 • 8968 次浏览 • 2019-08-20 11:08 • 来自相关话题

首先插入一批数据:
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[1,2,3,4,5]})
db.test_tab.insert({array:[1,2,3,4,5,6]})
使用以下命令判断列表不为空:
db.getCollection("example").find({array:{$exists:true,$ne:[]}}); # 字段不为0 查看全部
首先插入一批数据:
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[]})
db.test_tab.insert({array:[1,2,3,4,5]})
db.test_tab.insert({array:[1,2,3,4,5,6]})

使用以下命令判断列表不为空:
db.getCollection("example").find({array:{$exists:true,$ne:[]}}); # 字段不为0

mongodb 修改嵌套字典字典的字段名

数据库李魔佛 发表了文章 • 0 个评论 • 6075 次浏览 • 2019-08-05 13:55 • 来自相关话题

对于mongodb,修改字段名称的语法是

db.test.update({},{$rename:{'旧字段':'新字段'}},true,true)

 
比如下面的例子:db.getCollection('example').update({},{$rename:{'corp':'企业'}})
上面就是把字段corp改为企业。
 
如果是嵌套字段呢?
比如  corp字典是一个字典,里面是 { 'address':'USA',    'phone':'12345678' }
 
那么要修改里面的address为地址:
 db.getCollection('example').update({},{$rename:{'corp.address':'corp.地址'}})
 原创文章,转载请注明出处
原文连接:http://30daydo.com/article/521
  查看全部
对于mongodb,修改字段名称的语法是


db.test.update({},{$rename:{'旧字段':'新字段'}},true,true)


 
比如下面的例子:
db.getCollection('example').update({},{$rename:{'corp':'企业'}})

上面就是把字段corp改为企业。
 
如果是嵌套字段呢?
比如  corp字典是一个字典,里面是 { 'address':'USA',    'phone':'12345678' }
 
那么要修改里面的address为地址:
 
db.getCollection('example').update({},{$rename:{'corp.address':'corp.地址'}})

 原创文章,转载请注明出处
原文连接:http://30daydo.com/article/521
 

mongodb find得到的数据顺序每次都是一样的

数据库李魔佛 发表了文章 • 0 个评论 • 3396 次浏览 • 2019-07-26 09:00 • 来自相关话题

只要用的find内容不变,那么返回的内容顺序也就都一样的。
只要用的find内容不变,那么返回的内容顺序也就都一样的。

使用pymongo中的find_one_and_update出错:需要分片键

数据库李魔佛 发表了文章 • 0 个评论 • 5830 次浏览 • 2019-06-10 17:13 • 来自相关话题

错误信息如下: File "C:\ProgramData\Anaconda3\lib\site-packages\pymongo\helpers.py", line 155, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Query for sharded findAndModify must contain the shard key
2019-06-10 16:14:32 [scrapy.core.engine] INFO: Closing spider (finished)
2019-06-10 16:14:32 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
需要在查询语句中把分片键也添加进去。
因为findOneModify只会找一个记录,但是到底在哪个分片的记录呢? 因为不确定,所以才需要把shard加上去。
 
 
参考官方:
Targeted Operations vs. Broadcast Operations
Generally, the fastest queries in a sharded environment are those that mongos route to a single shard, using the shard key and the cluster meta data from the config server. These targeted operations use the shard key value to locate the shard or subset of shards that satisfy the query document.
For queries that don’t include the shard key, mongos must query all shards, wait for their responses and then return the result to the application. These “scatter/gather” queries can be long running operations.
Broadcast Operations
mongos instances broadcast queries to all shards for the collection unless the mongos can determine which shard or subset of shards stores this data.

After the mongos receives responses from all shards, it merges the data and returns the result document. The performance of a broadcast operation depends on the overall load of the cluster, as well as variables like network latency, individual shard load, and number of documents returned per shard. Whenever possible, favor operations that result in targeted operation over those that result in a broadcast operation.
Multi-update operations are always broadcast operations.
The updateMany() and deleteMany() methods are broadcast operations, unless the query document specifies the shard key in full.
Targeted Operations
mongos can route queries that include the shard key or the prefix of a compound shard key a specific shard or set of shards. mongos uses the shard key value to locate the chunk whose range includes the shard key value and directs the query at the shard containing that chunk.

For example, if the shard key is:
copy
{ a: 1, b: 1, c: 1 }

The mongos program can route queries that include the full shard key or either of the following shard key prefixes at a specific shard or set of shards:
copy
{ a: 1 }
{ a: 1, b: 1 }

All insertOne() operations target to one shard. Each document in the insertMany() array targets to a single shard, but there is no guarantee all documents in the array insert into a single shard.
All updateOne(), replaceOne() and deleteOne() operations must include the shard key or _id in the query document. MongoDB returns an error if these methods are used without the shard key or _id.
Depending on the distribution of data in the cluster and the selectivity of the query, mongos may still perform a broadcast operation to fulfill these queries.
Index Use
If the query does not include the shard key, the mongos must send the query to all shards as a “scatter/gather” operation. Each shard will, in turn, use either the shard key index or another more efficient index to fulfill the query.
If the query includes multiple sub-expressions that reference the fields indexed by the shard key and the secondary index, the mongos can route the queries to a specific shard and the shard will use the index that will allow it to fulfill most efficiently.
Sharded Cluster Security
Use Internal Authentication to enforce intra-cluster security and prevent unauthorized cluster components from accessing the cluster. You must start each mongod or mongos in the cluster with the appropriate security settings in order to enforce internal authentication.
See Deploy Sharded Cluster with Keyfile Access Control for a tutorial on deploying a secured shardedcluster.
Cluster Users
Sharded clusters support Role-Based Access Control (RBAC) for restricting unauthorized access to cluster data and operations. You must start each mongod in the cluster, including the config servers, with the --auth option in order to enforce RBAC. Alternatively, enforcing Internal Authentication for inter-cluster security also enables user access controls via RBAC.
With RBAC enforced, clients must specify a --username, --password, and --authenticationDatabase when connecting to the mongos in order to access cluster resources.
Each cluster has its own cluster users. These users cannot be used to access individual shards.
See Enable Access Control for a tutorial on enabling adding users to an RBAC-enabled MongoDB deployment. 查看全部
错误信息如下:
  File "C:\ProgramData\Anaconda3\lib\site-packages\pymongo\helpers.py", line 155, in _check_command_response
raise OperationFailure(msg % errmsg, code, response)
pymongo.errors.OperationFailure: Query for sharded findAndModify must contain the shard key
2019-06-10 16:14:32 [scrapy.core.engine] INFO: Closing spider (finished)
2019-06-10 16:14:32 [scrapy.statscollectors] INFO: Dumping Scrapy stats:

需要在查询语句中把分片键也添加进去。
因为findOneModify只会找一个记录,但是到底在哪个分片的记录呢? 因为不确定,所以才需要把shard加上去。
 
 
参考官方:
Targeted Operations vs. Broadcast Operations
Generally, the fastest queries in a sharded environment are those that mongos route to a single shard, using the shard key and the cluster meta data from the config server. These targeted operations use the shard key value to locate the shard or subset of shards that satisfy the query document.
For queries that don’t include the shard key, mongos must query all shards, wait for their responses and then return the result to the application. These “scatter/gather” queries can be long running operations.
Broadcast Operations
mongos instances broadcast queries to all shards for the collection unless the mongos can determine which shard or subset of shards stores this data.

After the mongos receives responses from all shards, it merges the data and returns the result document. The performance of a broadcast operation depends on the overall load of the cluster, as well as variables like network latency, individual shard load, and number of documents returned per shard. Whenever possible, favor operations that result in targeted operation over those that result in a broadcast operation.
Multi-update operations are always broadcast operations.
The updateMany() and deleteMany() methods are broadcast operations, unless the query document specifies the shard key in full.
Targeted Operations
mongos can route queries that include the shard key or the prefix of a compound shard key a specific shard or set of shards. mongos uses the shard key value to locate the chunk whose range includes the shard key value and directs the query at the shard containing that chunk.

For example, if the shard key is:
copy
{ a: 1, b: 1, c: 1 }

The mongos program can route queries that include the full shard key or either of the following shard key prefixes at a specific shard or set of shards:
copy
{ a: 1 }
{ a: 1, b: 1 }

All insertOne() operations target to one shard. Each document in the insertMany() array targets to a single shard, but there is no guarantee all documents in the array insert into a single shard.
All updateOne(), replaceOne() and deleteOne() operations must include the shard key or _id in the query document. MongoDB returns an error if these methods are used without the shard key or _id.
Depending on the distribution of data in the cluster and the selectivity of the query, mongos may still perform a broadcast operation to fulfill these queries.
Index Use
If the query does not include the shard key, the mongos must send the query to all shards as a “scatter/gather” operation. Each shard will, in turn, use either the shard key index or another more efficient index to fulfill the query.
If the query includes multiple sub-expressions that reference the fields indexed by the shard key and the secondary index, the mongos can route the queries to a specific shard and the shard will use the index that will allow it to fulfill most efficiently.
Sharded Cluster Security
Use Internal Authentication to enforce intra-cluster security and prevent unauthorized cluster components from accessing the cluster. You must start each mongod or mongos in the cluster with the appropriate security settings in order to enforce internal authentication.
See Deploy Sharded Cluster with Keyfile Access Control for a tutorial on deploying a secured shardedcluster.
Cluster Users
Sharded clusters support Role-Based Access Control (RBAC) for restricting unauthorized access to cluster data and operations. You must start each mongod in the cluster, including the config servers, with the --auth option in order to enforce RBAC. Alternatively, enforcing Internal Authentication for inter-cluster security also enables user access controls via RBAC.
With RBAC enforced, clients must specify a --username, --password, and --authenticationDatabase when connecting to the mongos in order to access cluster resources.
Each cluster has its own cluster users. These users cannot be used to access individual shards.
See Enable Access Control for a tutorial on enabling adding users to an RBAC-enabled MongoDB deployment.

Warning: unable to run listCollections, attempting to approximate collection

数据库李魔佛 发表了文章 • 0 个评论 • 19717 次浏览 • 2019-06-07 17:35 • 来自相关话题

在mongodb中参数查看数据库中的表是报错:

Warning: unable to run listCollections, attempting to approximate collection names by parsing connectionStatus

那是因为设置了密码,但是没有进行认证导致的错误。这个错误为啥不直接说明原因呢。汗
 
直接: db.auth('admin','密码')
认证成功返回1, 然后重新执行show tables就可以看到所有的表了。 查看全部
在mongodb中参数查看数据库中的表是报错:

Warning: unable to run listCollections, attempting to approximate collection names by parsing connectionStatus

那是因为设置了密码,但是没有进行认证导致的错误。这个错误为啥不直接说明原因呢。汗
 
直接: db.auth('admin','密码')
认证成功返回1, 然后重新执行show tables就可以看到所有的表了。

python连接mongodb集群 cluster

数据库李魔佛 发表了文章 • 0 个评论 • 4831 次浏览 • 2019-06-03 15:55 • 来自相关话题

网上资料比较少,自己测试了下。
连接方法如下:import pymongo
db = pymongo.MongoClient('mongodb://10.18.6.46,10.18.6.26,10.18.6.102')上面默认的端口do都是27017,如果是其他端口,需要这样修改:db = pymongo.MongoClient('mongodb://10.18.6.46:8888,10.18.6.26:9999,10.18.6.102:7777')
然后就可以正常读写数据库:
 
读:coll=db['testdb']['testcollection'].find()
for i in coll:
print(i)输出内容:{'_id': ObjectId('5cf4c7981ee9edff72e5c503'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c504'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c505'), 'username': 'hello'}
{'_id': ObjectId('5cf4c79a1ee9edff72e5c506'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7b21ee9edff72e5c507'), 'username': 'hello world'}



 
写:collection = db['testdb']['testcollection']

for i in range(10):
collection.insert({'username':'huston{}'.format(i)})
 
原创文章,转载请注明出处:
http://30daydo.com/article/494
  查看全部
网上资料比较少,自己测试了下。
连接方法如下:
import pymongo
db = pymongo.MongoClient('mongodb://10.18.6.46,10.18.6.26,10.18.6.102')
上面默认的端口do都是27017,如果是其他端口,需要这样修改:
db = pymongo.MongoClient('mongodb://10.18.6.46:8888,10.18.6.26:9999,10.18.6.102:7777')

然后就可以正常读写数据库:
 
读:
coll=db['testdb']['testcollection'].find()
for i in coll:
print(i)
输出内容:
{'_id': ObjectId('5cf4c7981ee9edff72e5c503'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c504'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7991ee9edff72e5c505'), 'username': 'hello'}
{'_id': ObjectId('5cf4c79a1ee9edff72e5c506'), 'username': 'hello'}
{'_id': ObjectId('5cf4c7b21ee9edff72e5c507'), 'username': 'hello world'}



 
写:
collection = db['testdb']['testcollection']

for i in range(10):
collection.insert({'username':'huston{}'.format(i)})

 
原创文章,转载请注明出处:
http://30daydo.com/article/494
 

【python】pymongo find_one_and_update的用法

python爬虫李魔佛 发表了文章 • 0 个评论 • 14528 次浏览 • 2019-04-04 11:31 • 来自相关话题

原生的mongo语句是这样的:db.collection.findOneAndUpdate(
<filter>,
<update>,
{
projection: <document>,
sort: <document>,
maxTimeMS: <number>,
upsert: <boolean>,
returnNewDocument: <boolean>,
collation: <document>,
arrayFilters: [ <filterdocument1>, ... ]
}
)
转换成python pymongo是这样的:>>> db.example.find_one_and_update(
... {'_id': 'userid'},
... {'$inc': {'seq': 1}},
... projection={'seq': True, '_id': False},
... return_document=ReturnDocument.AFTER)
上面的语句的意思是:
找到_id 为userid的值得文档,然后把该文档中的seq的值+1,然后返回seq的数据,不显示_id列
最后返回的数据是这样的:

{'seq': 2}
 
注意
findOneAndUpdate
是获取mongo文档中第一条满足条件的数据并做修改。该函数是线程安全的。意思就是在多个线程中操作,不会对同一条数据进行获取修改。
也就是该操作是原子操作。
 
ReturnDocument 引用的库
 
class pymongo.collection.ReturnDocument
 
在开头 from pymongo.collection import ReturnDocument
 
原创文章
转载请注明出处:
http://30daydo.com/article/445 查看全部
原生的mongo语句是这样的:
db.collection.findOneAndUpdate(
<filter>,
<update>,
{
projection: <document>,
sort: <document>,
maxTimeMS: <number>,
upsert: <boolean>,
returnNewDocument: <boolean>,
collation: <document>,
arrayFilters: [ <filterdocument1>, ... ]
}
)

转换成python pymongo是这样的:
>>> db.example.find_one_and_update(
... {'_id': 'userid'},
... {'$inc': {'seq': 1}},
... projection={'seq': True, '_id': False},
... return_document=ReturnDocument.AFTER)

上面的语句的意思是:
找到_id 为userid的值得文档,然后把该文档中的seq的值+1,然后返回seq的数据,不显示_id列
最后返回的数据是这样的:

{'seq': 2}
 
注意
findOneAndUpdate
是获取mongo文档中第一条满足条件的数据并做修改。该函数是线程安全的。意思就是在多个线程中操作,不会对同一条数据进行获取修改。
也就是该操作是原子操作。
 
ReturnDocument 引用的库
 
class pymongo.collection.ReturnDocument
 
在开头 from pymongo.collection import ReturnDocument
 
原创文章
转载请注明出处:
http://30daydo.com/article/445

python 代码获取mongodb数据库下所有的collection 文档名字

python李魔佛 发表了文章 • 0 个评论 • 5606 次浏览 • 2018-11-27 11:41 • 来自相关话题

获取一个数据库下所有的collection 文档db['db_pledge'].collection_names()
db['db_pledge'].list_collection_names()
获取一个数据库下所有的collection 文档
db['db_pledge'].collection_names()
db['db_pledge'].list_collection_names()

python爬虫集思录所有用户的帖子 scrapy写入mongodb数据库

python爬虫李魔佛 发表了文章 • 0 个评论 • 6660 次浏览 • 2018-09-02 21:52 • 来自相关话题

好久没更新了,把之前做的一些爬虫分享一下。不然都没有用户来了。-. -
 
项目采用scrapy的框架,数据写入到mongodb的数据库。 整个站点爬下来大概用了半小时,数据有12w条。
 
项目中的主要代码如下:
 
主spider# -*- coding: utf-8 -*-
import re
import scrapy
from scrapy import Request, FormRequest
from jsl.items import JslItem
from jsl import config
import logging

class AllcontentSpider(scrapy.Spider):
name = 'allcontent'

headers = {
'Host': 'www.jisilu.cn', 'Connection': 'keep-alive', 'Pragma': 'no-cache',
'Cache-Control': 'no-cache', 'Accept': 'application/json,text/javascript,*/*;q=0.01',
'Origin': 'https://www.jisilu.cn', 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Referer': 'https://www.jisilu.cn/login/',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8'
}

def start_requests(self):
login_url = 'https://www.jisilu.cn/login/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate,br', 'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8',
'Cache-Control': 'no-cache', 'Connection': 'keep-alive',
'Host': 'www.jisilu.cn', 'Pragma': 'no-cache', 'Referer': 'https://www.jisilu.cn/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36'}

yield Request(url=login_url, headers=headers, callback=self.login,dont_filter=True)

def login(self, response):
url = 'https://www.jisilu.cn/account/ajax/login_process/'
data = {
'return_url': 'https://www.jisilu.cn/',
'user_name': config.username,
'password': config.password,
'net_auto_login': '1',
'_post_type': 'ajax',
}

yield FormRequest(
url=url,
headers=self.headers,
formdata=data,
callback=self.parse,
dont_filter=True
)

def parse(self, response):
for i in range(1,3726):
focus_url = 'https://www.jisilu.cn/home/explore/sort_type-new__day-0__page-{}'.format(i)
yield Request(url=focus_url, headers=self.headers, callback=self.parse_page,dont_filter=True)

def parse_page(self, response):
nodes = response.xpath('//div[@class="aw-question-list"]/div')
for node in nodes:
each_url=node.xpath('.//h4/a/@href').extract_first()
yield Request(url=each_url,headers=self.headers,callback=self.parse_item,dont_filter=True)

def parse_item(self,response):
item = JslItem()
title = response.xpath('//div[@class="aw-mod-head"]/h1/text()').extract_first()
s = response.xpath('//div[@class="aw-question-detail-txt markitup-box"]').xpath('string(.)').extract_first()
ret = re.findall('(.*?)\.donate_user_avatar', s, re.S)

try:
content = ret[0].strip()
except:
content = None

createTime = response.xpath('//div[@class="aw-question-detail-meta"]/span/text()').extract_first()

resp_no = response.xpath('//div[@class="aw-mod aw-question-detail-box"]//ul/h2/text()').re_first('\d+')

url = response.url
item['title'] = title.strip()
item['content'] = content
try:
item['resp_no']=int(resp_no)
except Exception as e:
logging.warning('e')
item['resp_no']=None

item['createTime'] = createTime
item['url'] = url.strip()
resp =
for index,reply in enumerate(response.xpath('//div[@class="aw-mod-body aw-dynamic-topic"]/div[@class="aw-item"]')):
replay_user = reply.xpath('.//div[@class="pull-left aw-dynamic-topic-content"]//p/a/text()').extract_first()
rep_content = reply.xpath(
'.//div[@class="pull-left aw-dynamic-topic-content"]//div[@class="markitup-box"]/text()').extract_first()
# print rep_content
agree=reply.xpath('.//em[@class="aw-border-radius-5 aw-vote-count pull-left"]/text()').extract_first()
resp.append({replay_user.strip()+'_{}'.format(index): [int(agree),rep_content.strip()]})

item['resp'] = resp
yield item




login函数是模拟登录集思录,通过抓包就可以知道一些上传的data。
然后就是分页去抓取。逻辑很简单。
 
然后pipeline里面写入mongodb。import pymongo
from collections import OrderedDict
class JslPipeline(object):
def __init__(self):
self.db = pymongo.MongoClient(host='10.18.6.1',port=27017)
# self.user = u'neo牛3' # 修改为指定的用户名 如 毛之川 ,然后找到用户的id,在用户也的源码哪里可以找到 比如持有封基是8132
self.collection = self.db['db_parker']['jsl']
def process_item(self, item, spider):
self.collection.insert(OrderedDict(item))
return item
抓取到的数据入库mongodb:





 点击查看大图

原创文章
转载请注明出处:http://30daydo.com/publish/article/351
 
  查看全部
好久没更新了,把之前做的一些爬虫分享一下。不然都没有用户来了。-. -
 
项目采用scrapy的框架,数据写入到mongodb的数据库。 整个站点爬下来大概用了半小时,数据有12w条。
 
项目中的主要代码如下:
 
主spider
# -*- coding: utf-8 -*-
import re
import scrapy
from scrapy import Request, FormRequest
from jsl.items import JslItem
from jsl import config
import logging

class AllcontentSpider(scrapy.Spider):
name = 'allcontent'

headers = {
'Host': 'www.jisilu.cn', 'Connection': 'keep-alive', 'Pragma': 'no-cache',
'Cache-Control': 'no-cache', 'Accept': 'application/json,text/javascript,*/*;q=0.01',
'Origin': 'https://www.jisilu.cn', 'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36',
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
'Referer': 'https://www.jisilu.cn/login/',
'Accept-Encoding': 'gzip,deflate,br',
'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8'
}

def start_requests(self):
login_url = 'https://www.jisilu.cn/login/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip,deflate,br', 'Accept-Language': 'zh,en;q=0.9,en-US;q=0.8',
'Cache-Control': 'no-cache', 'Connection': 'keep-alive',
'Host': 'www.jisilu.cn', 'Pragma': 'no-cache', 'Referer': 'https://www.jisilu.cn/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/67.0.3396.99Safari/537.36'}

yield Request(url=login_url, headers=headers, callback=self.login,dont_filter=True)

def login(self, response):
url = 'https://www.jisilu.cn/account/ajax/login_process/'
data = {
'return_url': 'https://www.jisilu.cn/',
'user_name': config.username,
'password': config.password,
'net_auto_login': '1',
'_post_type': 'ajax',
}

yield FormRequest(
url=url,
headers=self.headers,
formdata=data,
callback=self.parse,
dont_filter=True
)

def parse(self, response):
for i in range(1,3726):
focus_url = 'https://www.jisilu.cn/home/explore/sort_type-new__day-0__page-{}'.format(i)
yield Request(url=focus_url, headers=self.headers, callback=self.parse_page,dont_filter=True)

def parse_page(self, response):
nodes = response.xpath('//div[@class="aw-question-list"]/div')
for node in nodes:
each_url=node.xpath('.//h4/a/@href').extract_first()
yield Request(url=each_url,headers=self.headers,callback=self.parse_item,dont_filter=True)

def parse_item(self,response):
item = JslItem()
title = response.xpath('//div[@class="aw-mod-head"]/h1/text()').extract_first()
s = response.xpath('//div[@class="aw-question-detail-txt markitup-box"]').xpath('string(.)').extract_first()
ret = re.findall('(.*?)\.donate_user_avatar', s, re.S)

try:
content = ret[0].strip()
except:
content = None

createTime = response.xpath('//div[@class="aw-question-detail-meta"]/span/text()').extract_first()

resp_no = response.xpath('//div[@class="aw-mod aw-question-detail-box"]//ul/h2/text()').re_first('\d+')

url = response.url
item['title'] = title.strip()
item['content'] = content
try:
item['resp_no']=int(resp_no)
except Exception as e:
logging.warning('e')
item['resp_no']=None

item['createTime'] = createTime
item['url'] = url.strip()
resp =
for index,reply in enumerate(response.xpath('//div[@class="aw-mod-body aw-dynamic-topic"]/div[@class="aw-item"]')):
replay_user = reply.xpath('.//div[@class="pull-left aw-dynamic-topic-content"]//p/a/text()').extract_first()
rep_content = reply.xpath(
'.//div[@class="pull-left aw-dynamic-topic-content"]//div[@class="markitup-box"]/text()').extract_first()
# print rep_content
agree=reply.xpath('.//em[@class="aw-border-radius-5 aw-vote-count pull-left"]/text()').extract_first()
resp.append({replay_user.strip()+'_{}'.format(index): [int(agree),rep_content.strip()]})

item['resp'] = resp
yield item




login函数是模拟登录集思录,通过抓包就可以知道一些上传的data。
然后就是分页去抓取。逻辑很简单。
 
然后pipeline里面写入mongodb。
import pymongo
from collections import OrderedDict
class JslPipeline(object):
def __init__(self):
self.db = pymongo.MongoClient(host='10.18.6.1',port=27017)
# self.user = u'neo牛3' # 修改为指定的用户名 如 毛之川 ,然后找到用户的id,在用户也的源码哪里可以找到 比如持有封基是8132
self.collection = self.db['db_parker']['jsl']
def process_item(self, item, spider):
self.collection.insert(OrderedDict(item))
return item

抓取到的数据入库mongodb:

记实录.PNG

 点击查看大图

原创文章
转载请注明出处:http://30daydo.com/publish/article/351
 
 

docker里运行mongodb,保存的数据在外部使用mongoexport不能导出:提示错误Unrecognized field 'snapshot'

python李魔佛 发表了文章 • 0 个评论 • 10747 次浏览 • 2018-08-31 14:21 • 来自相关话题

## 2019-03-19更新 问题已解决
 很无语。 目前还找不到原因。
 
docker里面运行的mongodb, mongodb的数据挂载到宿主机。 开放了27017端口。
在windows下使用mongoexport工具导出数据:
 
错误信息:C:\Program Files\MongoDB\Server\3.4\bin>mongoexport.exe /h 10.18.6.102 /d stock
/c company /o company.json /type json
2018-08-31T14:13:47.841+0800 connected to: 10.18.6.102
2018-08-31T14:13:47.854+0800 Failed: Failed to parse: { find: "company", filt
er: {}, sort: {}, skip: 0, snapshot: true, $readPreference: { mode: "secondaryPr
eferred" }, $db: "stock" }. Unrecognized field 'snapshot'.

C:\Program Files\MongoDB\Server\3.4\bin> 
目前这个问题已经解决:
需要进去docker容器里面,然后在容器里面操作,把数据导出来到挂载的目录下,然后可以直接获取到数据了。 查看全部
## 2019-03-19更新 问题已解决
 很无语。 目前还找不到原因。
 
docker里面运行的mongodb, mongodb的数据挂载到宿主机。 开放了27017端口。
在windows下使用mongoexport工具导出数据:
 
错误信息:
C:\Program Files\MongoDB\Server\3.4\bin>mongoexport.exe /h 10.18.6.102 /d stock
/c company /o company.json /type json
2018-08-31T14:13:47.841+0800 connected to: 10.18.6.102
2018-08-31T14:13:47.854+0800 Failed: Failed to parse: { find: "company", filt
er: {}, sort: {}, skip: 0, snapshot: true, $readPreference: { mode: "secondaryPr
eferred" }, $db: "stock" }. Unrecognized field 'snapshot'.

C:\Program Files\MongoDB\Server\3.4\bin>
 
目前这个问题已经解决:
需要进去docker容器里面,然后在容器里面操作,把数据导出来到挂载的目录下,然后可以直接获取到数据了。

python mongodb大数据(>3GB)转移Mysql数据库

python李魔佛 发表了文章 • 0 个评论 • 5332 次浏览 • 2018-08-20 15:44 • 来自相关话题

数据约为5GB左右,如果直接用for i in doc.find({})进行逐行遍历的话,游标就会超时,而且越到后面速度越慢.
 
 于是使用了分段遍历的方法.# -*-coding=utf-8-*-
import pandas as pd
import json
import pymongo
from sqlalchemy import create_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('xxx')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://xxx:xxx@xxx:/xxx?charset=utf8')


def classic_method():
temp =
start = 0
# 数据太大还是会爆内存,或者游标丢失
for i in doc.find().batch_size(500):
start += 1
del i['_id']
temp.append(i)
print(start)

print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid', drop=True)
df.to_sql('meituan', con=engine, if_exists='replace')
print('done')


def chunksize_move():
block = 10000
total = doc.find({}).count()
iter_number = total // block

for i in range(iter_number + 1):
small_part = doc.find({}).limit(block).skip(i * block)
list_data =

for item in small_part:
del item['_id']
del item['crawl_time']
item['poiid'] = int(item['poiid'])
for k, v in item.items():
if isinstance(v, dict) or isinstance(v, list):

item[k] = json.dumps(v, ensure_ascii=False)

list_data.append(item)

df = pd.DataFrame(list_data)
df = df.set_index('poiid', drop=True)

try:
df.to_sql('meituan', con=engine, if_exists='append')
print('to sql {}'.format(i))
except Exception as e:
print(e)

chunksize_move()

 





速度比一次批量的要快不少. 查看全部
数据约为5GB左右,如果直接用
for i in doc.find({})
进行逐行遍历的话,游标就会超时,而且越到后面速度越慢.
 
 于是使用了分段遍历的方法.
# -*-coding=utf-8-*-
import pandas as pd
import json
import pymongo
from sqlalchemy import create_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('xxx')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://xxx:xxx@xxx:/xxx?charset=utf8')


def classic_method():
temp =
start = 0
# 数据太大还是会爆内存,或者游标丢失
for i in doc.find().batch_size(500):
start += 1
del i['_id']
temp.append(i)
print(start)

print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid', drop=True)
df.to_sql('meituan', con=engine, if_exists='replace')
print('done')


def chunksize_move():
block = 10000
total = doc.find({}).count()
iter_number = total // block

for i in range(iter_number + 1):
small_part = doc.find({}).limit(block).skip(i * block)
list_data =

for item in small_part:
del item['_id']
del item['crawl_time']
item['poiid'] = int(item['poiid'])
for k, v in item.items():
if isinstance(v, dict) or isinstance(v, list):

item[k] = json.dumps(v, ensure_ascii=False)

list_data.append(item)

df = pd.DataFrame(list_data)
df = df.set_index('poiid', drop=True)

try:
df.to_sql('meituan', con=engine, if_exists='append')
print('to sql {}'.format(i))
except Exception as e:
print(e)

chunksize_move()

 

to_sql.PNG

速度比一次批量的要快不少.

python 把mongodb的数据迁移到mysql

python李魔佛 发表了文章 • 0 个评论 • 4842 次浏览 • 2018-08-20 11:02 • 来自相关话题

代码如下: 很简短.
import pymongo
from setting import get_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('10.18.6.101')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://localhost:1234@10.18.4.211/spider?charset=utf8')
temp=[]

for i in doc.find({}):
del i['_id']
temp.append(i)
print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid',drop=True)
df.to_sql('meituan',con=engine,if_exists='replace')
print('done')





 
居然CPU飙到了90%
  查看全部
代码如下: 很简短.
import pymongo
from setting import get_engine

# 将mongo数据转移到mysql

client = pymongo.MongoClient('10.18.6.101')
doc = client['spider']['meituan']
engine = create_engine('mysql+pymysql://localhost:1234@10.18.4.211/spider?charset=utf8')
temp=[]

for i in doc.find({}):
del i['_id']
temp.append(i)
print('start to save to mysql')
df = pd.read_json(json.dumps(temp))
df = df.set_index('poiid',drop=True)
df.to_sql('meituan',con=engine,if_exists='replace')
print('done')


cpu.PNG

 
居然CPU飙到了90%
 

mongodb sort: Executor error during find command: OperationFailed: Sort operation used more than

网络李魔佛 发表了文章 • 0 个评论 • 7903 次浏览 • 2018-07-09 10:31 • 来自相关话题

mongodb 排序出现内存溢出:
 Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}
使用limit函数限制其输出就可以了:
 
db.getCollection('老布').find({}).sort({'created_at':-1}).limit(1000) 查看全部
mongodb 排序出现内存溢出:
 
Error: error: {
"ok" : 0,
"errmsg" : "Executor error during find command: OperationFailed: Sort operation used more than the maximum 33554432 bytes of RAM. Add an index, or specify a smaller limit.",
"code" : 96,
"codeName" : "OperationFailed"
}

使用limit函数限制其输出就可以了:
 
db.getCollection('老布').find({}).sort({'created_at':-1}).limit(1000)

mongo服务器因为mongod.lock 被锁定无法正常运行

网络安全李魔佛 发表了文章 • 0 个评论 • 8494 次浏览 • 2018-05-20 18:33 • 来自相关话题

看log文件:
Sun May 20 18:26:04.630 [initandlisten] MongoDB starting : pid=2343 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 20 18:26:04.631 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 20 18:26:04.631 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] db version v2.4.10
Sun May 20 18:26:04.632 [initandlisten] git version: nogitversion
Sun May 20 18:26:04.632 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 20 18:26:04.632 [initandlisten] allocator: system
Sun May 20 18:26:04.632 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 20 18:26:05.956 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 20 18:26:05.957 [initandlisten] recover : no journal files present, no recovery needed
Sun May 20 18:26:06.023 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 20 18:26:06.023 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x7660e294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x7660e294]
Sun May 20 18:26:06.035 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 20 18:26:06.035 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 20 18:26:06.036 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 20 18:26:06.036 dbexit:
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close listening sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to flush diaglog...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 20 18:26:06.036 [initandlisten] shutdown: lock for final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: closing all files...
Sun May 20 18:26:06.037 [initandlisten] closeAllFiles() finished
Sun May 20 18:26:06.037 [initandlisten] journalCleanup...
Sun May 20 18:26:06.037 [initandlisten] removeJournalFiles
Sun May 20 18:26:06.050 [initandlisten] shutdown: removing fs lock...
Sun May 20 18:26:06.050 dbexit: really exiting now

应该是之前没有正常被关闭,导致文件本锁住了。
 
解决办法:
 
1. 先把数据文件备份, --dbpath 的路径整个备份一下, 不然接下来的操作误操作了数据就丢失了
 
2. 运行修复命令:

mongod --dbpath /data/db --repair
 
替换上面的db为您自己的本地路径 查看全部
看log文件:
Sun May 20 18:26:04.630 [initandlisten] MongoDB starting : pid=2343 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 20 18:26:04.631 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 20 18:26:04.631 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 20 18:26:04.631 [initandlisten]
Sun May 20 18:26:04.631 [initandlisten] db version v2.4.10
Sun May 20 18:26:04.632 [initandlisten] git version: nogitversion
Sun May 20 18:26:04.632 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 20 18:26:04.632 [initandlisten] allocator: system
Sun May 20 18:26:04.632 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 20 18:26:05.956 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 20 18:26:05.957 [initandlisten] recover : no journal files present, no recovery needed
Sun May 20 18:26:06.023 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 20 18:26:06.023 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x7660e294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x7660e294]
Sun May 20 18:26:06.035 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 20 18:26:06.035 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 20 18:26:06.036 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 20 18:26:06.036 dbexit:
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close listening sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to flush diaglog...
Sun May 20 18:26:06.036 [initandlisten] shutdown: going to close sockets...
Sun May 20 18:26:06.036 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 20 18:26:06.036 [initandlisten] shutdown: lock for final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: final commit...
Sun May 20 18:26:06.036 [initandlisten] shutdown: closing all files...
Sun May 20 18:26:06.037 [initandlisten] closeAllFiles() finished
Sun May 20 18:26:06.037 [initandlisten] journalCleanup...
Sun May 20 18:26:06.037 [initandlisten] removeJournalFiles
Sun May 20 18:26:06.050 [initandlisten] shutdown: removing fs lock...
Sun May 20 18:26:06.050 dbexit: really exiting now

应该是之前没有正常被关闭,导致文件本锁住了。
 
解决办法:
 
1. 先把数据文件备份, --dbpath 的路径整个备份一下, 不然接下来的操作误操作了数据就丢失了
 
2. 运行修复命令:

mongod --dbpath /data/db --repair
 
替换上面的db为您自己的本地路径

mongo服务启动失败: ERROR: mmap private failed with out of memory

树莓派李魔佛 发表了文章 • 0 个评论 • 4755 次浏览 • 2018-05-13 12:23 • 来自相关话题

平时在树莓派上开机自动执行以下命令,启动mongo服务
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --journal --logpath /home/pi/mongo/log.txt
突然今天发现mongo的服务连不上,看log发现mongo在启动后马上关闭了,提示的错误是在加载一个大的数据文件的时候提示内存不足(坑爹的,树莓派自身内存才1GB,无法扩容)。 
 
错误日志:
Sun May 13 12:08:11.185 [initandlisten] MongoDB starting : pid=1929 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 13 12:08:11.186 [initandlisten]
Sun May 13 12:08:11.186 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 13 12:08:11.186 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 13 12:08:11.186 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 13 12:08:11.187 [initandlisten]
Sun May 13 12:08:11.187 [initandlisten] db version v2.4.10
Sun May 13 12:08:11.187 [initandlisten] git version: nogitversion
Sun May 13 12:08:11.187 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 13 12:08:11.187 [initandlisten] allocator: system
Sun May 13 12:08:11.187 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", fork: true, journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 13 12:08:11.198 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 13 12:08:11.198 [initandlisten] recover : no journal files present, no recovery needed
Sun May 13 12:08:11.238 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 13 12:08:11.239 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x76556294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x76556294]
Sun May 13 12:08:11.250 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 13 12:08:11.251 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 13 12:08:11.251 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 13 12:08:11.251 dbexit:
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close listening sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to flush diaglog...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 13 12:08:11.252 [initandlisten] shutdown: lock for final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: closing all files...
Sun May 13 12:08:11.252 [initandlisten] closeAllFiles() finished
Sun May 13 12:08:11.252 [initandlisten] journalCleanup...
Sun May 13 12:08:11.253 [initandlisten] removeJournalFiles
Sun May 13 12:08:11.263 [initandlisten] shutdown: removing fs lock...
Sun May 13 12:08:11.264 dbexit: really exiting now
看了下mongod的用法,尝试把参数 --journal,去掉,重新运行,然后就可以了。
 
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --logpath /home/pi/mongo/log.txt 查看全部
平时在树莓派上开机自动执行以下命令,启动mongo服务
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --journal --logpath /home/pi/mongo/log.txt

突然今天发现mongo的服务连不上,看log发现mongo在启动后马上关闭了,提示的错误是在加载一个大的数据文件的时候提示内存不足(坑爹的,树莓派自身内存才1GB,无法扩容)。 
 
错误日志:
Sun May 13 12:08:11.185 [initandlisten] MongoDB starting : pid=1929 port=27017 dbpath=/home/pi/mongo/db/ 32-bit host=raspberrypi
Sun May 13 12:08:11.186 [initandlisten]
Sun May 13 12:08:11.186 [initandlisten] ** NOTE: This is a 32 bit MongoDB binary.
Sun May 13 12:08:11.186 [initandlisten] ** 32 bit builds are limited to less than 2GB of data (or less with --journal).
Sun May 13 12:08:11.186 [initandlisten] ** See http://dochub.mongodb.org/core/32bit
Sun May 13 12:08:11.187 [initandlisten]
Sun May 13 12:08:11.187 [initandlisten] db version v2.4.10
Sun May 13 12:08:11.187 [initandlisten] git version: nogitversion
Sun May 13 12:08:11.187 [initandlisten] build info: Linux bm-wb-04 3.19.0-trunk-armmp #1 SMP Debian 3.19.1-1~exp1+plugwash1 (2015-03-28) armv7l BOOST_LIB_VERSION=1_55
Sun May 13 12:08:11.187 [initandlisten] allocator: system
Sun May 13 12:08:11.187 [initandlisten] options: { dbpath: "/home/pi/mongo/db/", fork: true, journal: true, logpath: "/home/pi/mongo/mongod.log" }
Sun May 13 12:08:11.198 [initandlisten] journal dir=/home/pi/mongo/db/journal
Sun May 13 12:08:11.198 [initandlisten] recover : no journal files present, no recovery needed
Sun May 13 12:08:11.238 [initandlisten] ERROR: mmap private failed with out of memory. You are using a 32-bit build and probably need to upgrade to 64
Sun May 13 12:08:11.239 [initandlisten] Assertion: 13636:file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
0x662fa8 0x63527c 0x6196c0 0x6197bc 0x409d5c 0x4414fc 0x2ea634 0x2eaa3c 0x2eb55c 0x2ebd98 0x26f998 0x26fb94 0x175a20 0x177bf4 0x152bf0 0x76556294
mongod(_ZN5mongo15printStackTraceERSo+0x1c) [0x662fa8]
mongod(_ZN5mongo10logContextEPKc+0x110) [0x63527c]
mongod(_ZN5mongo11msgassertedEiPKc+0xc0) [0x6196c0]
mongod(_ZN5mongo18msgassertedNoTraceEiPKc+0) [0x6197bc]
mongod(_ZN5mongo8MongoMMF13finishOpeningEv+0x308) [0x409d5c]
mongod(_ZN5mongo13MongoDataFile12openExistingEPKc+0x9c) [0x4414fc]
mongod(_ZN5mongo8Database16openExistingFileEi+0x23c) [0x2ea634]
mongod(_ZN5mongo8Database12openAllFilesEv+0x24) [0x2eaa3c]
mongod(_ZN5mongo8DatabaseC2EPKcRbRKSs+0x158) [0x2eb55c]
mongod(_ZN5mongo14DatabaseHolder11getOrCreateERKSsS2_Rb+0x500) [0x2ebd98]
mongod(_ZN5mongo6Client7Context11_finishInitEv+0x34) [0x26f998]
mongod(_ZN5mongo6Client7ContextC1ERKSsS3_b+0x78) [0x26fb94]
mongod(_ZN5mongo14_initAndListenEi+0xb00) [0x175a20]
mongod(_ZN5mongo13initAndListenEi+0x14) [0x177bf4]
mongod(main+0x2b8) [0x152bf0]
/lib/arm-linux-gnueabihf/libc.so.6(__libc_start_main+0x114) [0x76556294]
Sun May 13 12:08:11.250 [initandlisten] warning database /home/pi/mongo/db/ xueqiu could not be opened
Sun May 13 12:08:11.251 [initandlisten] DBException 13636: file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information)
Sun May 13 12:08:11.251 [initandlisten] exception in initAndListen: 13636 file /home/pi/mongo/db/xueqiu.5 open/create failed in createPrivateMap (look in log for more information), terminating
Sun May 13 12:08:11.251 dbexit:
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close listening sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to flush diaglog...
Sun May 13 12:08:11.252 [initandlisten] shutdown: going to close sockets...
Sun May 13 12:08:11.252 [initandlisten] shutdown: waiting for fs preallocator...
Sun May 13 12:08:11.252 [initandlisten] shutdown: lock for final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: final commit...
Sun May 13 12:08:11.252 [initandlisten] shutdown: closing all files...
Sun May 13 12:08:11.252 [initandlisten] closeAllFiles() finished
Sun May 13 12:08:11.252 [initandlisten] journalCleanup...
Sun May 13 12:08:11.253 [initandlisten] removeJournalFiles
Sun May 13 12:08:11.263 [initandlisten] shutdown: removing fs lock...
Sun May 13 12:08:11.264 dbexit: really exiting now

看了下mongod的用法,尝试把参数 --journal,去掉,重新运行,然后就可以了。
 
sudo mongod --fork --dbpath /home/pi/mongo/db/ --smallfiles --logpath /home/pi/mongo/log.txt

树莓派安装mongodb服务器

网络李魔佛 发表了文章 • 0 个评论 • 7986 次浏览 • 2017-12-18 16:57 • 来自相关话题

树莓派的自带的源就自带了mongodb-server的安装包,所以只需要使用命令:sudo apt-get install mongodb-server 就可以安装了。
 
不过安装的是32bit 的mongodb,数据库的大小会被限制在2GB。
树莓派启动mongodb
修改/etc/mongodb.config,
把里面的bind=127.0.0.1 注释掉,前面加一个#即可,因为这样其他主机也可以访问这台树莓派的mongodb服务器。
修改dbpath和dblog的路径,因为默认的路径你需要root权限

然后运行 mongod --config /etc/mongodb.config , 然后远程使用mongo ip地址就可以远程连接了。 查看全部
树莓派的自带的源就自带了mongodb-server的安装包,所以只需要使用命令:sudo apt-get install mongodb-server 就可以安装了。
 
不过安装的是32bit 的mongodb,数据库的大小会被限制在2GB。
树莓派启动mongodb
修改/etc/mongodb.config,
把里面的bind=127.0.0.1 注释掉,前面加一个#即可,因为这样其他主机也可以访问这台树莓派的mongodb服务器。
修改dbpath和dblog的路径,因为默认的路径你需要root权限

然后运行 mongod --config /etc/mongodb.config , 然后远程使用mongo ip地址就可以远程连接了。

使用官网下载的mongodb,如何设置远程连接mongodb服务器

网络安全李魔佛 发表了文章 • 0 个评论 • 5608 次浏览 • 2017-12-17 23:10 • 来自相关话题

在linux到官网下载mongodb,选择64位版本。
解压后在mongo的bin目录下运行 mongod --dbpath ~/mongo/db
可以看到mongodb服务被正常启动了。
 
在局域网其他电脑上使用mongodb客户端尝试连接这个mongo服务器,发现无法连接上。
因为官网下载的mongo问价解压后并没有mongo.conf配置文件。
 
在本机运行命令: mongo
可以看到输出:
 Server has startup warnings:
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten]
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** WARNING: Using the XFS filesystem is strongly recommended with the WiredTiger storage engine
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** See http://dochub.mongodb.org/core ... ystem
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: Access control is not enabled for the database.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Read and write access to data and configuration is unrestricted.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: This server is bound to localhost.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Remote systems will be unable to connect to this server.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Start the server with --bind_ip <address> to specify which IP
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** addresses it should serve responses from, or with --bind_ip_all to
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** bind to all interfaces. If this behavior is desired, start the
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** server with --bind_ip 127.0.0.1 to disable this warning.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** WARNING: /sys/kernel/mm/transparent_hugepage/defrag is 'always'.
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** We suggest setting it to 'never'
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
从上面的信息可以看到,如果需要远程的机子连接到本机,需要添加一个选项: --bind_ip_all
运行下面命令后:
mongod --dbpath ~/mongo/db --bind_ip_all
 
远程的机子就能够连上mongo服务器了。
 
原文地址:http://30daydo.com/article/247
转载请注明出处 查看全部
在linux到官网下载mongodb,选择64位版本。
解压后在mongo的bin目录下运行 mongod --dbpath ~/mongo/db
可以看到mongodb服务被正常启动了。
 
在局域网其他电脑上使用mongodb客户端尝试连接这个mongo服务器,发现无法连接上。
因为官网下载的mongo问价解压后并没有mongo.conf配置文件。
 
在本机运行命令: mongo
可以看到输出:
 
Server has startup warnings: 
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten]
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** WARNING: Using the XFS filesystem is strongly recommended with the WiredTiger storage engine
2017-12-17T22:56:19.702+0800 I STORAGE [initandlisten] ** See http://dochub.mongodb.org/core ... ystem
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: Access control is not enabled for the database.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Read and write access to data and configuration is unrestricted.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** WARNING: This server is bound to localhost.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Remote systems will be unable to connect to this server.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** Start the server with --bind_ip <address> to specify which IP
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** addresses it should serve responses from, or with --bind_ip_all to
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** bind to all interfaces. If this behavior is desired, start the
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten] ** server with --bind_ip 127.0.0.1 to disable this warning.
2017-12-17T22:56:20.600+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** WARNING: /sys/kernel/mm/transparent_hugepage/defrag is 'always'.
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten] ** We suggest setting it to 'never'
2017-12-17T22:56:20.601+0800 I CONTROL [initandlisten]

从上面的信息可以看到,如果需要远程的机子连接到本机,需要添加一个选项: --bind_ip_all
运行下面命令后:
mongod --dbpath ~/mongo/db --bind_ip_all
 
远程的机子就能够连上mongo服务器了。
 
原文地址:http://30daydo.com/article/247
转载请注明出处

mongodb中$sum:1 后面的1是什么意思

python李魔佛 发表了文章 • 0 个评论 • 10685 次浏览 • 2017-09-05 23:32 • 来自相关话题

源数据:
{
"_id" : "GuqXmAkkARqhBDqhy",
"beatmapset_id" : "342537",
"version" : "MX",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "3.5552737712860107"
}
{
"_id" : "oHLT7KqsB7bztBGvu",
"beatmapset_id" : "342537",
"version" : "HD",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "2.7515676021575928"
}
{
"_id" : "GbotZfrPEwW69FkGD",
"beatmapset_id" : "342537",
"version" : "NM",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "0"
}
 然后运行以下的命令:
 
db.getCollection('dup_case').aggregate(
[
{$group:{
_id:{diff_approach:'$diff_approach'},
count:{$sum:2}
}},
{$match:{count:{$gt:1}}}
]

 
返回的count是6
 
所以
$sum:1 的含义:
如果前面的情况出现一次,就加1, 如果后面$sum:2 那么每次前面条件满足一次就加2
  查看全部
源数据:
{
"_id" : "GuqXmAkkARqhBDqhy",
"beatmapset_id" : "342537",
"version" : "MX",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "3.5552737712860107"
}
{
"_id" : "oHLT7KqsB7bztBGvu",
"beatmapset_id" : "342537",
"version" : "HD",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "2.7515676021575928"
}
{
"_id" : "GbotZfrPEwW69FkGD",
"beatmapset_id" : "342537",
"version" : "NM",
"diff_approach" : "5",
"artist" : "Yousei Teikoku",
"title" : "Kokou no Sousei",
"difficultyrating" : "0"
}

 然后运行以下的命令:
 
db.getCollection('dup_case').aggregate(
[
{$group:{
_id:{diff_approach:'$diff_approach'},
count:{$sum:2}
}},
{$match:{count:{$gt:1}}}
]
)
 
 
返回的count是6
 
所以
$sum:1 的含义:
如果前面的情况出现一次,就加1, 如果后面$sum:2 那么每次前面条件满足一次就加2