find.pl

    1 | #!/usr/bin/perl
    2 | use LWP::UserAgent;
    3 | my$ua=LWP::UserAgent->new;
    4 | $ua->agent("Mozilla/3.14\@piology.org");
    5 | $|=1;
    6 | my%cgivars = &getcgivars;
    7 | 
    8 | if ($ENV{'REQUEST_METHOD'} eq "GET") {
    9 | print <<PAGE;
   10 | Content-Type: text/html; charset=iso-8859-1
   11 | 
   12 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
   13 | <HTML><HEAD><TITLE>Suche nach Regul&auml;ren Ausdr&uuml;cken in Webseiten</TITLE></HEAD>
   14 | 
   15 | <BODY>
   16 |   <FORM METHOD=POST ACTION="find.pl">
   17 |      <P>Suchausdruck (Perl-RegExp):<BR>
   18 |         <INPUT NAME=search SIZE=60></P>
   19 |      <P>Anzeige von <INPUT TYPE=CHECKBOX NAME=yes CHECKED> Treffern (yes)
   20 |         <INPUT TYPE=CHECKBOX NAME=no CHECKED> Nicht-Treffern (no)
   21 |         <INPUT TYPE=CHECKBOX NAME=error CHECKED> Fehlern (error)</P>
   22 |      <P>Zu durchsuchende Webseiten:<BR>
   23 |         <TEXTAREA NAME=pages ROWS=10 COLS=60></TEXTAREA><BR>
   24 |         <INPUT TYPE=SUBMIT VALUE=Suche></P>
   25 |   </FORM>
   26 | </BODY></HTML>
   27 | PAGE
   28 | } else {
   29 |   print <<PAGE;
   30 | Content-Type: text/html
   31 | 
   32 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
   33 | <HTML><HEAD><TITLE>Suchergebnisse</TITLE></HEAD>
   34 | <BODY>
   35 | <H1>Suchergebnisse</H1>
   36 | PAGE
   37 |   while ($cgivars{'pages'} =~ /(http\S*)/g) {
   38 |     my$url = $1; 
   39 |     if (!fork()) {
   40 |       my$response = $ua->simple_request(HTTP::Request->new(GET=>$url));
   41 |       $url=~s/&/\&amp;/g;
   42 |       unless ($response->is_success || $response->code == 301 || $response->code == 302)
   43 |         {print "<A HREF=\"$url\">$url</A> error (".$response->code." ".$response->message.")<BR>\n"
   44 |            if $cgivars{'error'}}
   45 |       elsif ($response->content =~ /($cgivars{'search'})/)
   46 |         {print "<A HREF=\"$url\">$url</A> yes: ".&html_escape($1)."<BR>\n" if $cgivars{'yes'}}
   47 |       else
   48 |         {print "<A HREF=\"$url\">$url</A> no<BR>\n" if $cgivars{'no'}}
   49 |       exit;
   50 |     }
   51 |   }
   52 | }
   53 | 1 until (wait == -1);
   54 | print "</BODY></HTML>\n";
   55 | 
   56 | #######################################################################################
   57 | 
   58 | sub getcgivars {
   59 |   my($in,%in);
   60 |   my($name,$value);
   61 |   read(STDIN, $in, $ENV{'CONTENT_LENGTH'}) ;
   62 |   foreach (split('&', $in)) {
   63 |     tr/+/ /;
   64 |     ($name, $value) = split('=', $_, 2);
   65 |     $name =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/ge;
   66 |     $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/chr(hex($1))/ge;
   67 |     $in{$name} = $value;
   68 |   }
   69 |   return %in;
   70 | }
   71 | 
   72 | sub html_escape {
   73 |   my$html=$_[0];
   74 |   $html =~ s/&/&amp;/g;
   75 |   $html =~ s/</&lt;/g;
   76 |   $html =~ s/>/&gt;/g;
   77 |   $html =~ s/"/&quot;/g;
   78 |   return $html;
   79 | }

Valid CSS!Valid HTML 4.01!
© Boris 'pi' Piwinger, May 21, 2003