Lines 12 and 13 show an example. $files should contain in each line either a local file name or a URL.
1 | #!/usr/bin/perl
2 | # lnkcheck.pl -- This scripts tests pages locally or given by URL
3 | # for correctness of absoulte links.
4 | # by Boris 'pi' Piwinger <3.14@piology.org>
5 | use LWP::UserAgent;
6 | $| = 1;
7 | undef($/); # Read files in one step
8 | my$ua = new LWP::UserAgent;
9 | $ua->agent("lnkcheck/3.14\@piology.org");
10 | $ua->timeout(300);
11 |
12 | my$files= `find /home/httpd/html \\( -path '/home/httpd/html/manual' -prune \\) -or -type f -iname \"*html\" -print \\) | sort`;
13 | $files .= "http://piology.org/perl/\n";
14 |
15 | my($fail,%hostname,%urls);
16 | my$temperrors="time(?:d )?out|Connection reset by peer|No route to host|unreachable";
17 |
18 | # Run thru the entries in $files
19 | while ($files =~ /([^\n]+)/g) {
20 |
21 | # Fetch page from web or locally
22 | my$file = $1;
23 | my$page;
24 | if ($file =~ m!^http://!) {
25 | undef($ua->max_size(0)); # We have to get the full page here
26 | my$response = $ua->request(HTTP::Request->new("GET",$file));
27 | next unless ($response->is_success);
28 | $page = $response->content;
29 | } else {
30 | if (open PAGE, "<$file") {
31 | $page = <PAGE>;
32 | close PAGE;
33 | } else {
34 | print "Cannot read $file.\n";
35 | next;
36 | }
37 | }
38 |
39 | # Reduce server load
40 | $ua->max_size(0);
41 |
42 | # Scan page for links and verify 'em
43 | $fail = "$file\n";
44 | while ($page =~ /href[\s="]+(https?:[^">\s]+)/gi) {
45 | my$url = $1;
46 | $url =~ s/&/&/g;
47 | $url =~ s/&/&/g;
48 | $fail .= " $url\n see above\n" if ($urls{$url} eq "a-oh");
49 | next if (defined($urls{$url}));
50 | $urls{$url}="OK";
51 | my$error = " $url\n";
52 | my$response = $ua->simple_request(HTTP::Request->new("HEAD",$url));
53 | unless ($response->is_success) {
54 | unless ($response->message =~ /$temperrors/i || $response->code == 401 || $response->code == 405 || $response->code == 302) {
55 | $error .= " HEAD: " . $response->code . " " . $response->message;
56 | $error =~ s/\n*$/\n/s;
57 | }
58 | if ($error =~ /\(Bad hostname '([^']+)/) {
59 | $hostname{$1}="a-oh";
60 | $fail.=$error;
61 | $urls{$url}="a-oh";
62 | } else {
63 | $response = $ua->simple_request(HTTP::Request->new("GET",$url));
64 | unless ($response->is_success) {
65 | unless ($response->message =~ /$temperrors/i || $response->code == 401 || $response->code == 302) {
66 | $error .= " GET: " . $response->code . " " . $response->message;
67 | $error =~ s/\n*$/\n/s;
68 | }
69 | if ($error =~ /GET:/) {
70 | $fail .= $error;
71 | $urls{$url}="a-oh";
72 | }
73 | }
74 | }
75 | }
76 | }
77 | print $fail if ($fail =~ /GET|see above|Bad hostname/);
78 |
79 | }
80 |
81 | # Now lookup all "bad hostnames" from authoratative name servers
82 | @_=(sort keys %hostname);
83 | print "\nanslookup.pl @_\n";
84 | print `anslookup.pl @_`;