find.pl
1 | #!/usr/bin/perl
2 | use LWP::UserAgent;
3 | my$ua=LWP::UserAgent->new;
4 | $ua->agent("Mozilla/3.14\@piology.org");
5 | $|=1;
6 | my%cgivars = &getcgivars;
7 |
8 | if ($ENV{'REQUEST_METHOD'} eq "GET") {
9 | print <<PAGE;
10 | Content-Type: text/html; charset=iso-8859-1
11 |
12 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
13 | <HTML><HEAD><TITLE>Suche nach Regulären Ausdrücken in Webseiten</TITLE></HEAD>
14 |
15 | <BODY>
16 | <FORM METHOD=POST ACTION="find.pl">
17 | <P>Suchausdruck (Perl-RegExp):<BR>
18 | <INPUT NAME=search SIZE=60></P>
19 | <P>Anzeige von <INPUT TYPE=CHECKBOX NAME=yes CHECKED> Treffern (yes)
20 | <INPUT TYPE=CHECKBOX NAME=no CHECKED> Nicht-Treffern (no)
21 | <INPUT TYPE=CHECKBOX NAME=error CHECKED> Fehlern (error)</P>
22 | <P>Zu durchsuchende Webseiten:<BR>
23 | <TEXTAREA NAME=pages ROWS=10 COLS=60></TEXTAREA><BR>
24 | <INPUT TYPE=SUBMIT VALUE=Suche></P>
25 | </FORM>
26 | </BODY></HTML>
27 | PAGE
28 | } else {
29 | print <<PAGE;
30 | Content-Type: text/html
31 |
32 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
33 | <HTML><HEAD><TITLE>Suchergebnisse</TITLE></HEAD>
34 | <BODY>
35 | <H1>Suchergebnisse</H1>
36 | PAGE
37 | while ($cgivars{'pages'} =~ /(http\S*)/g) {
38 | my$url = $1;
39 | if (!fork()) {
40 | my$response = $ua->simple_request(HTTP::Request->new(GET=>$url));
41 | $url=~s/&/\&/g;
42 | unless ($response->is_success || $response->code == 301 || $response->code == 302)
43 | {print "<A HREF=\"$url\">$url</A> error (".$response->code." ".$response->message.")<BR>\n"
44 | if $cgivars{'error'}}
45 | elsif ($response->content =~ /($cgivars{'search'})/)
46 | {print "<A HREF=\"$url\">$url</A> yes: ".&html_escape($1)."<BR>\n" if $cgivars{'yes'}}
47 | else
48 | {print "<A HREF=\"$url\">$url</A> no<BR>\n" if $cgivars{'no'}}
49 | exit;
50 | }
51 | }
52 | }
53 | 1 until (wait == -1);
54 | print "</BODY></HTML>\n";
55 |
56 | #######################################################################################
57 |
58 | sub getcgivars {
59 | my($in,%in);
60 | my($name,$value);
61 | read(STDIN, $in, $ENV{'CONTENT_LENGTH'}) ;
62 | foreach (split('&', $in)) {
63 | tr/+/ /;
64 | ($name, $value) = split('=', $_, 2);
65 | $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/ge;
66 | $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/ge;
67 | $in{$name} = $value;
68 | }
69 | return %in;
70 | }
71 |
72 | sub html_escape {
73 | my$html=$_[0];
74 | $html =~ s/&/&/g;
75 | $html =~ s/</</g;
76 | $html =~ s/>/>/g;
77 | $html =~ s/"/"/g;
78 | return $html;
79 | }

© Boris 'pi' Piwinger,
May 21, 2003