Revision 313037313236 () - Diff

Link to this snippet: https://friendpaste.com/3ACjKssyNXMhFju9irTdJg
Embed:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
diff --git a/etc/couchdb/default.ini.tpl.in b/etc/couchdb/default.ini.tpl.in
index 8207d6e..e05ddd2 100644
--- a/etc/couchdb/default.ini.tpl.in
+++ b/etc/couchdb/default.ini.tpl.in
@@ -12,6 +12,10 @@ max_dbs_open = 100
delayed_commits = true ; set this to false to ensure an fsync before 201 Created is returned
uri_file = %localstaterundir%/couch.uri
+[database_compaction]
+doc_buffer_size = 1048576 ; value in bytes
+checkpoint_after = 10485760 ; checkpoint after every N bytes were written
+
[httpd]
port = 5984
bind_address = 127.0.0.1
diff --git a/src/couchdb/couch_db_updater.erl b/src/couchdb/couch_db_updater.erl
index fef4032..4a626b0 100644
--- a/src/couchdb/couch_db_updater.erl
+++ b/src/couchdb/couch_db_updater.erl
@@ -858,32 +858,48 @@ copy_compact(Db, NewDb0, Retry) ->
FsyncOptions = [Op || Op <- NewDb0#db.fsync_options, Op == before_header],
NewDb = NewDb0#db{fsync_options=FsyncOptions},
TotalChanges = couch_db:count_changes_since(Db, NewDb#db.update_seq),
+ BufferSize = list_to_integer(
+ couch_config:get("database_compaction", "doc_buffer_size", "1048576")),
+ CheckpointAfter = list_to_integer(
+ couch_config:get("database_compaction", "checkpoint_after", "10485760")),
+
EnumBySeqFun =
- fun(#doc_info{high_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied, TotalCopied}) ->
- couch_task_status:update("Copied ~p of ~p changes (~p%)",
- [TotalCopied, TotalChanges, (TotalCopied*100) div TotalChanges]),
- if TotalCopied rem 1000 =:= 0 ->
- NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
- if TotalCopied rem 10000 =:= 0 ->
- {ok, {commit_data(NewDb2#db{update_seq=Seq}), [], TotalCopied + 1}};
+ fun(#doc_info{high_seq=Seq}=DocInfo, _Offset,
+ {AccNewDb, AccUncopied, AccUncopiedSize, AccCopiedSize, TotalCopied}) ->
+
+ AccUncopiedSize2 = AccUncopiedSize + byte_size(term_to_binary(DocInfo)),
+ if AccUncopiedSize2 >= BufferSize ->
+ NewDb2 = copy_docs(
+ Db, AccNewDb, lists:reverse([DocInfo | AccUncopied]), Retry),
+ TotalCopied2 = TotalCopied + 1 + length(AccUncopied),
+ couch_task_status:update("Copied ~p of ~p changes (~p%)",
+ [TotalCopied2, TotalChanges, (TotalCopied2 * 100) div TotalChanges]),
+ AccCopiedSize2 = AccCopiedSize + AccUncopiedSize2,
+ if AccCopiedSize2 >= CheckpointAfter ->
+ {ok, {commit_data(NewDb2#db{update_seq = Seq}), [],
+ 0, 0, TotalCopied2}};
true ->
- {ok, {NewDb2#db{update_seq=Seq}, [], TotalCopied + 1}}
+ {ok, {NewDb2#db{update_seq = Seq}, [],
+ 0, AccCopiedSize2, TotalCopied2}}
end;
true ->
- {ok, {AccNewDb, [DocInfo | AccUncopied], TotalCopied + 1}}
+ {ok, {AccNewDb, [DocInfo | AccUncopied], AccUncopiedSize2,
+ AccCopiedSize, TotalCopied}}
end
end,
couch_task_status:set_update_frequency(500),
+ couch_task_status:update("Copied 0 of ~p changes (0%)", [TotalChanges]),
- {ok, _, {NewDb2, Uncopied, TotalChanges}} =
+ {ok, _, {NewDb2, Uncopied, _, _, ChangesDone}} =
couch_btree:foldl(Db#db.docinfo_by_seq_btree, EnumBySeqFun,
- {NewDb, [], 0},
+ {NewDb, [], 0, 0, 0},
[{start_key, NewDb#db.update_seq + 1}]),
couch_task_status:update("Flushing"),
NewDb3 = copy_docs(Db, NewDb2, lists:reverse(Uncopied), Retry),
+ TotalChanges = ChangesDone + length(Uncopied),
% copy misc header values
if NewDb3#db.security /= Db#db.security ->