|
@@ -53,8 +53,21 @@ def add_paper(file_path):
|
|
|
inserted_ids = 0
|
|
|
|
|
|
try:
|
|
|
- result = papers.insert_many(data_list, ordered=False)
|
|
|
- inserted_ids = len(result.inserted_ids)
|
|
|
+ sub_list = []
|
|
|
+ for line in data_list:
|
|
|
+ sub_list.append(line)
|
|
|
+
|
|
|
+ if len(sub_list) == 2000:
|
|
|
+ result = papers.insert_many(sub_list, ordered=False)
|
|
|
+ inserted_ids += len(result.inserted_ids)
|
|
|
+ sub_list = []
|
|
|
+
|
|
|
+ if sub_list:
|
|
|
+ result = papers.insert_many(sub_list, ordered=False)
|
|
|
+ inserted_ids += len(result.inserted_ids)
|
|
|
+ sub_list = []
|
|
|
+
|
|
|
+ print('-------process', inserted_ids, '/', len(data_list))
|
|
|
except pymongo.errors.BulkWriteError as e:
|
|
|
inserted_ids = e.details['nInserted']
|
|
|
finally:
|