| a | b | |
|---|
| 0 | + | #!/usr/bin/perl |
|---|
| 0 | + | |
|---|
| 0 | + | # Takes one argument which is the URL of the JSON document to use |
|---|
| 0 | + | # The JSON document describes the various |
|---|
| 0 | + | # replications to ensure are running and what indexes need rebuilding and |
|---|
| 0 | + | # how often that should be done |
|---|
| 0 | + | |
|---|
| 0 | + | use JSON::Any; |
|---|
| 0 | + | use LWP::UserAgent; |
|---|
| 0 | + | use Storable; |
|---|
| 0 | + | use CouchDB::Client; |
|---|
| 0 | + | use Time::ParseDate qw(parsedate); # from libtime-modules-perl |
|---|
| 0 | + | |
|---|
| 0 | + | my ($url) = @ARGV; |
|---|
| 0 | + | die "First and only argument must be a URL" unless $url; |
|---|
| 0 | + | |
|---|
| 0 | + | # Get our persistent state (or a blank one) |
|---|
| 0 | + | my $state = {}; |
|---|
| 0 | + | eval { |
|---|
| 0 | + | # We can keep it in the temp folder because it doesn't matter much if we loose it, we just count from scratch |
|---|
| 0 | + | $state = retrieve("/tmp/couch_maint.state"); |
|---|
| 0 | + | }; |
|---|
| 0 | + | |
|---|
| 0 | + | # Since we've been passed a URL use LWP to grab it (rather than CouchDB::Client) |
|---|
| 0 | + | my $ua = LWP::UserAgent->new(); |
|---|
| 0 | + | my $response = $ua->get($url); |
|---|
| 0 | + | die "Failed to download document" unless $response->is_success(); |
|---|
| 0 | + | my $info = JSON::Any->jsonToObj($response->content); |
|---|
| 0 | + | |
|---|
| 0 | + | die "Document doesn't contain a 'url' key so don't know how to contact the couch service\n" unless $info->{uri}; |
|---|
| 0 | + | my $couch_client = CouchDB::Client->new( uri => $info->{uri} ); |
|---|
| 0 | + | $couch_client->testConnection or die "Can't talk to the couch database on $info->{uri}"; |
|---|
| 0 | + | |
|---|
| 0 | + | die "No databases field in the document" unless $info->{databases}; |
|---|
| 0 | + | # For each database |
|---|
| 0 | + | while(my ($database, $db_info) = each %{$info->{databases}}) { |
|---|
| 0 | + | my $db = $couch_client->newDB($database); |
|---|
| 0 | + | # Trigger the indexes |
|---|
| 0 | + | if ($db_info->{index}) { |
|---|
| 0 | + | while( my ($index, $index_info) = each %{$db_info->{index}}) { |
|---|
| 0 | + | if (run_now($database,$index)) { |
|---|
| 0 | + | my $design_doc = $db->newDesignDoc("_design/$index"); |
|---|
| 0 | + | $design_doc->retrieve(); |
|---|
| 0 | + | my @views = $design_doc->listViews(); |
|---|
| 0 | + | # Eval the view query because it might timeout if the view is a long way behind. |
|---|
| 0 | + | # This is ok, at least we triggered it and by evaling we still get to trigger the |
|---|
| 0 | + | # other views (admittedly significantly slower than we might like but better than nothing) |
|---|
| 0 | + | # The only real danger is that each time we trigger but timeout we eat a handle off the |
|---|
| 0 | + | # couch server which will only be returned when the view eventually finishes building and |
|---|
| 0 | + | # hence we can run it out of handles. Not a lot we can do about that here though really. |
|---|
| 0 | + | eval { |
|---|
| 0 | + | # In theory triggering the first view |
|---|
| 0 | + | # will update the whole lot but lets |
|---|
| 0 | + | # just trigger all of them to be safe |
|---|
| 0 | + | for(@views) { |
|---|
| 0 | + | $design_doc->queryView($_, limit => 0); |
|---|
| 0 | + | } |
|---|
| 0 | + | }; |
|---|
| 0 | + | warn $@ if $@; |
|---|
| 0 | + | calc_next($index_info,$database,$index); |
|---|
| 0 | + | } |
|---|
| 0 | + | } |
|---|
| 0 | + | } |
|---|
| 0 | + | # Set the replication, we only do push replication (you can do this over and over again and couch is ok with that) |
|---|
| 0 | + | if ($db_info->{replicate}) { |
|---|
| 0 | + | foreach my $target (@{$db_info->{replicate}}) { |
|---|
| 0 | + | $db->replicate(target => $target, continuous => 1); |
|---|
| 0 | + | } |
|---|
| 0 | + | } |
|---|
| 0 | + | } |
|---|
| 0 | + | store($state,"/tmp/couch_maint.state"); |
|---|
| 0 | + | |
|---|
| 0 | + | sub run_now { |
|---|
| 0 | + | my ($a,$b) = @_; |
|---|
| 0 | + | return 1 unless defined($state->{$a}->{$b}); # If we know nothing we want to run it now |
|---|
| 0 | + | return time()>$state->{$a}->{$b}->{next_run}; |
|---|
| 0 | + | } |
|---|
| 0 | + | |
|---|
| 0 | + | sub calc_next { |
|---|
| 0 | + | my ($info,$a,$b) = @_; |
|---|
| 0 | + | my ($next_time,$error) = parsedate($info->{every}); |
|---|
| 0 | + | if ($next_time){ |
|---|
| 0 | + | # We subtract 10 seconds from the next run time so that |
|---|
| 0 | + | # under normal running (when we can index very quickly) |
|---|
| 0 | + | # doing it every minute from cron might actually cause |
|---|
| 0 | + | # it to happen every minute. Without this if we are |
|---|
| 0 | + | # invoked once a minute there is a chance we will decide |
|---|
| 0 | + | # not to index because it was only 59 seconds since we last |
|---|
| 0 | + | # did it and therefore we won't reindex until almost |
|---|
| 0 | + | # 2 minutes have passed. |
|---|
| 0 | + | $state->{$a}->{$b}->{next_run} = $next_time-10; |
|---|
| 0 | + | } |
|---|
| 0 | + | else { |
|---|
| 0 | + | die "Error parsing time reference '$info->{every}': $error\n"; |
|---|
| 0 | + | } |
|---|
| 0 | + | } |
|---|
| ... | |
|---|