This script will grind through your web site's "access.log" file (which must be in the "combined" log format). It'll pick out the top 100 Google searches found in the referer field, re-run those searches, and determine which ones are giving your website all the linky Google love -- in other words, the searches that your site 'wins' on. Im sure some of you know to work perl. If not then... well, I cant be bothered writing a tutorial. Google it or something. Code: #!/usr/bin/perl my $site = shift @ARGV; my $google_api_key = shift @ARGV; if (!$google_api_key) { die q{ goog-love.pl - find out where your site's google juice comes from This script will grind through your web site's "access.log" file (which must be in the "combined" log format). It'll pick out the top 100 Google searches found in the referer field, re-run those searches, and determine which ones are giving your website all the linky Google love -- in other words, the searches that your site 'wins' on. The output is in plain text and a chunk of HTML. usage: goog-love.pl sitehost google-api-key < access.log > out.html e.g. cat /var/www/logs/taint.org.* | \ goog-love.pl taint.org kjkdf909403g0fg0f90dfgdf0g09gfd | \ tee out.html NOTE: this script requires the "SOAP::Lite" module be installed. Install it using "apt-get install libsoap-lite-perl" or "cpan SOAP::Lite". It also requires a Google API key. }; } # version: Feb 7 2006 jm # --------------------------------------------------------------------------- use warnings; use strict; use SOAP::Lite; use CGI; # to decode the CGI-formatted query string # cf [url]http://www.oreillynet.com/pub/a/network/excerpt/ggl_hcks/index.html[/url] # for the general Google-API idea. Net::Google doesn't work anymore it # seems :( my $google = SOAP::Lite->service("http://api.google.com/GoogleSearch.wsdl"); # $ua = LWP::UserAgent->new; # $ua->env_proxy; my %queries; $| = 1; readlogs(); google(); summarise(); exit; # --------------------------------------------------------------------------- # "http://www.google.it/search?q=%2B%22Bob+Menschel%22+%2Bspamassassin&hl=it&lr=&client=firefox-a&rls=org.mozilla:en-US:official&start=10&sa=N" # "http://www.google.com/search?q=%22History+of+CD-ROM%22&hl=en&lr=&rls=GGLD,GGLD:2005-09,GGLD:en&start=50&sa=N" # etc. # sub readlogs { while (<>) { /^\d\S+ \S+ \S+ \S+ \S+ "GET (.*?) HTTP\/1.0" \S+ \S+ "(.+?)" "/ or next; my $path = $1; my $referer = $2; my $query; if ($referer =~ /google/i && $referer =~ /[\?\&]q=([^\&]+)(?:\&|$)/) { $query = $1; } if ($query) { add_query($query, $path); } } } sub add_query { my ($query, $path) = @_; $queries{$query} ||= { paths => { }, count => 0 }; $queries{$query}{paths}{$path} = 1; $queries{$query}{count}++; } sub google { my $maxqueries = 100; my $qcount; print "progress: found ".(scalar keys %queries)." searches. ". "checking the top $maxqueries against google...\n"; foreach my $query (sort { $queries{$b}{count} <=> $queries{$a}{count} } keys %queries) { $qcount++; last if ($qcount > $maxqueries); my $cgi = CGI->new('q='.$query); my $qstring = $cgi->param('q'); my $count = $queries{$query}{count}; $queries{$query}{rawqstring} = $query; $queries{$query}{qstring} = $qstring; $queries{$query}{posn} = 999999; # ie. "crappy" # print "googling: [$qstring] ($count hits)\n"; my $results = $google->doGoogleSearch( $google_api_key, $qstring, 0, 10, "false", "", "false", "", "latin1", "latin1" ); @{$results->{'resultElements'}} or next; my $posn = 0; foreach my $result (@{$results->{'resultElements'}}) { $posn++; # print $result->{URL}, $result->{snippet},"\n"; if ($result->{URL} =~ /${site}/i) { $queries{$query}{posn} = $posn; $queries{$query}{url} = $result->{URL}; print "progress: \@$qcount - #$posn for [$qstring]: $result->{URL}\n"; last; } } } } sub summarise { my $otext = ''; my $ohtml = ''; foreach my $query (sort { $queries{$a}{posn} <=> $queries{$b}{posn} or length($queries{$a}{qstring}) <=> length($queries{$b}{qstring}) } grep { $queries{$_}{posn} && ($queries{$_}{posn} < 999999) && $queries{$_}{qstring} && $queries{$_}{url} } keys %queries) { my $url = $queries{$query}{url}; my $posn = $queries{$query}{posn}; my $qstring = $queries{$query}{qstring}; my $rawqstring = $queries{$query}{rawqstring}; my $qhref = 'http://www.google.com/search?num=10&q='.$rawqstring; $otext .= "#$posn for [$qstring]: $url\n"; $ohtml .= qq{ [*]#$posn for [url="$qhref"]$qstring[/url]: [url="$url"]$url[/url] }; } print qq{ <!-- Final, sorted list, as text: $otext As HTML: --> [list]$ohtml[/list] }; }