4lSkgZR0xdAJ2Od42U0Yiz changeset

Changeset316663353936 (b)
ParentNone (a)
ab
0+#!/usr/bin/perl
0+
0+# Takes one argument which is the URL of the JSON document to use
0+# The JSON document describes the various
0+# replications to ensure are running and what indexes need rebuilding and
0+# how often that should be done
0+
0+use JSON::Any;
0+use LWP::UserAgent;
0+use Storable;
0+use CouchDB::Client;
0+use Time::ParseDate qw(parsedate); # from libtime-modules-perl
0+
0+my ($url) = @ARGV;
0+die "First and only argument must be a URL" unless $url;
0+
0+# Get our persistent state (or a blank one)
0+my $state = {};
0+eval {
0+        # We can keep it in the temp folder because it doesn't matter much if we loose it, we just count from scratch
0+        $state = retrieve("/tmp/couch_maint.state");
0+};
0+
0+# Since we've been passed a URL use LWP to grab it (rather than CouchDB::Client)
0+my $ua = LWP::UserAgent->new();
0+my $response = $ua->get($url);
0+die "Failed to download document" unless $response->is_success();
0+my $info = JSON::Any->jsonToObj($response->content);
0+
0+die "Document doesn't contain a 'url' key so don't know how to contact the couch service\n" unless $info->{uri};
0+my $couch_client = CouchDB::Client->new( uri => $info->{uri} );
0+$couch_client->testConnection or die "Can't talk to the couch database on $info->{uri}";
0+
0+die "No databases field in the document" unless $info->{databases};
0+# For each database
0+while(my ($database, $db_info) = each %{$info->{databases}}) {
0+        my $db = $couch_client->newDB($database);
0+        # Trigger the indexes
0+        if ($db_info->{index}) {
0+                while( my ($index, $index_info) = each %{$db_info->{index}}) {
0+                        if (run_now($database,$index)) {
0+                                my $design_doc = $db->newDesignDoc("_design/$index");
0+                                $design_doc->retrieve();
0+                                my @views = $design_doc->listViews();
0+                                # Eval the view query because it might timeout if the view is a long way behind.
0+                                # This is ok, at least we triggered it and by evaling we still get to trigger the
0+                                # other views (admittedly significantly slower than we might like but better than nothing)
0+                                # The only real danger is that each time we trigger but timeout we eat a handle off the
0+                                # couch server which will only be returned when the view eventually finishes building and
0+                                # hence we can run it out of handles. Not a lot we can do about that here though really.
0+                                eval {
0+                                        # In theory triggering the first view
0+                                        # will update the whole lot but lets
0+                                        # just trigger all of them to be safe
0+                                        for(@views) {
0+                                                $design_doc->queryView($_, limit => 0);
0+                                        }
0+                                };
0+                                warn $@ if $@;
0+                                calc_next($index_info,$database,$index);
0+                        }
0+                }
0+        }
0+        # Set the replication, we only do push replication (you can do this over and over again and couch is ok with that)
0+        if ($db_info->{replicate}) {
0+                foreach my $target (@{$db_info->{replicate}}) {
0+                        $db->replicate(target => $target, continuous => 1);
0+                }
0+        }
0+}
0+store($state,"/tmp/couch_maint.state");
0+
0+sub run_now {
0+        my ($a,$b) = @_;
0+        return 1 unless defined($state->{$a}->{$b}); # If we know nothing we want to run it now
0+        return time()>$state->{$a}->{$b}->{next_run};
0+}
0+
0+sub calc_next {
0+        my ($info,$a,$b) = @_;
0+        my ($next_time,$error) = parsedate($info->{every});
0+        if ($next_time){
0+                # We subtract 10 seconds from the next run time so that
0+                # under normal running (when we can index very quickly)
0+                # doing it every minute from cron might actually cause
0+                # it to happen every minute. Without this if we are
0+                # invoked once a minute there is a chance we will decide
0+                # not to index because it was only 59 seconds since we last
0+                # did it and therefore we won't reindex until almost
0+                # 2 minutes have passed.
0+                $state->{$a}->{$b}->{next_run} = $next_time-10;
0+        }
0+        else {
0+                die "Error parsing time reference '$info->{every}': $error\n";
0+        }
0+}
...
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
--- Revision None
+++ Revision 316663353936
@@ -0,0 +1,96 @@
+#!/usr/bin/perl
+
+# Takes one argument which is the URL of the JSON document to use
+# The JSON document describes the various
+# replications to ensure are running and what indexes need rebuilding and
+# how often that should be done
+
+use JSON::Any;
+use LWP::UserAgent;
+use Storable;
+use CouchDB::Client;
+use Time::ParseDate qw(parsedate); # from libtime-modules-perl
+
+my ($url) = @ARGV;
+die "First and only argument must be a URL" unless $url;
+
+# Get our persistent state (or a blank one)
+my $state = {};
+eval {
+ # We can keep it in the temp folder because it doesn't matter much if we loose it, we just count from scratch
+ $state = retrieve("/tmp/couch_maint.state");
+};
+
+# Since we've been passed a URL use LWP to grab it (rather than CouchDB::Client)
+my $ua = LWP::UserAgent->new();
+my $response = $ua->get($url);
+die "Failed to download document" unless $response->is_success();
+my $info = JSON::Any->jsonToObj($response->content);
+
+die "Document doesn't contain a 'url' key so don't know how to contact the couch service\n" unless $info->{uri};
+my $couch_client = CouchDB::Client->new( uri => $info->{uri} );
+$couch_client->testConnection or die "Can't talk to the couch database on $info->{uri}";
+
+die "No databases field in the document" unless $info->{databases};
+# For each database
+while(my ($database, $db_info) = each %{$info->{databases}}) {
+ my $db = $couch_client->newDB($database);
+ # Trigger the indexes
+ if ($db_info->{index}) {
+ while( my ($index, $index_info) = each %{$db_info->{index}}) {
+ if (run_now($database,$index)) {
+ my $design_doc = $db->newDesignDoc("_design/$index");
+ $design_doc->retrieve();
+ my @views = $design_doc->listViews();
+ # Eval the view query because it might timeout if the view is a long way behind.
+ # This is ok, at least we triggered it and by evaling we still get to trigger the
+ # other views (admittedly significantly slower than we might like but better than nothing)
+ # The only real danger is that each time we trigger but timeout we eat a handle off the
+ # couch server which will only be returned when the view eventually finishes building and
+ # hence we can run it out of handles. Not a lot we can do about that here though really.
+ eval {
+ # In theory triggering the first view
+ # will update the whole lot but lets
+ # just trigger all of them to be safe
+ for(@views) {
+ $design_doc->queryView($_, limit => 0);
+ }
+ };
+ warn $@ if $@;
+ calc_next($index_info,$database,$index);
+ }
+ }
+ }
+ # Set the replication, we only do push replication (you can do this over and over again and couch is ok with that)
+ if ($db_info->{replicate}) {
+ foreach my $target (@{$db_info->{replicate}}) {
+ $db->replicate(target => $target, continuous => 1);
+ }
+ }
+}
+store($state,"/tmp/couch_maint.state");
+
+sub run_now {
+ my ($a,$b) = @_;
+ return 1 unless defined($state->{$a}->{$b}); # If we know nothing we want to run it now
+ return time()>$state->{$a}->{$b}->{next_run};
+}
+
+sub calc_next {
+ my ($info,$a,$b) = @_;
+ my ($next_time,$error) = parsedate($info->{every});
+ if ($next_time){
+ # We subtract 10 seconds from the next run time so that
+ # under normal running (when we can index very quickly)
+ # doing it every minute from cron might actually cause
+ # it to happen every minute. Without this if we are
+ # invoked once a minute there is a chance we will decide
+ # not to index because it was only 59 seconds since we last
+ # did it and therefore we won't reindex until almost
+ # 2 minutes have passed.
+ $state->{$a}->{$b}->{next_run} = $next_time-10;
+ }
+ else {
+ die "Error parsing time reference '$info->{every}': $error\n";
+ }
+}