eg. 把pmid列由字符型转为整型
1 find + forEach逐条修改
文档较大时会很慢
# 更改全部数据
db.article.find().forEach(function(x) {
x.pmid = NumberInt(x.pmid);
db.article.save(x);
})
# 只更改类型为string的数据
db.article.find({pmid: {$type: 2}}).forEach(function(x) {
x.pmid = NumberInt(x.pmid);
db.article.save(x);
})
2 bulkWrite批量修改
mongo version
>= 2.6 and < 3.2
var col = db.article;
var bulk = col.initializeOrderedBulkOp();
var counter = 0;
col.find({ pmid: { $type: "string" }}).forEach(function(data) {
var updoc = {
"$set": {}
};
updoc["$set"]["pmid"] = NumberInt(data.pmid);
// queue the update
bulk.find({
"_id": data._id
}).update(updoc);
counter++;
// Drain and re-initialize every 1000 update statements
if (counter % 1000 == 0) {
bulk.execute();
bulk = col.initializeOrderedBulkOp();
}
});
// Add the rest in the queue
if (counter % 1000 != 0) bulk.execute();
mongo version
>= 3.2
var bulkOps = [];
db.article.find({pmid: {$type: 2}}).forEach(function(x) {
var new_pmid = new NumberInt(x.pmid);
bulkOps.push({
"updateOne": {
"filter": {"_id": x._id },
"update": {"$set": {"pmid": new_pmid}}
}
});
})
db.article.bulkWrite(bulkOps, {"ordered": true});
# ============================================================
use pubmed;
var counter = 0;
bulk = [];
db.article.find({pmid: {$type: 2}}).forEach(function(x) {
var new_pmid = new NumberInt(x.pmid);
bulk.push({
updateOne: {
filter: {_id: x._id },
update: {$set: {pmid: new_pmid}}
}
});
counter++;
if (counter % 100000 == 0) {
print(counter, 'records updated');
db.article.bulkWrite(bulk, {ordered: true});
bulk = [];
}
})
if (counter % 100000 != 0) db.article.bulkWrite(bulk, {ordered: true});
3 update方法
Mongo 4.2
db.collection.update(
{ a : { $type: 1 } },
[{ $set: { a: { $convert: { input: "$a", to: 2 } } } }],
{ multi: true }
)
参考: