id-or-mail.pl

    1 | #!/usr/bin/perl
    2 | #
    3 | # This script looks for message-ids/e-mail addresses (one per line) in STDIN.
    4 | # For each it tries to decide if it is a message-id or an e-mail address.
    5 | #
    6 | # Special thanks to Urs Jan▀en for his extensive help and numerous suggestions.
    7 | # And thanks to all those other people who offered their knowledge.
    8 | #
    9 | # Another solution:
   10 | # http://www.enyo.de/fw/scripts/
   11 | 
   12 | my$ip='(?:[01]?\d\d?|2[0-4]\d|25[0-5])'; # one byte of an IP-address
   13 | $ip="(?:$ip\.){3}$ip";                   # complete IP-address
   14 | 
   15 | while (<STDIN>) {
   16 |   print "$1 -> ".&analyze($1)."\n" if (/(<?[^\s<>@]+@[^\s<>@]+>?)/);
   17 | }
   18 | 
   19 | sub analyze {
   20 |   my$test=$_[0];
   21 |   my($l,$lp,$fqdn,$r)=$test=~/(<?)([^\s<>@]+)@([^\s<>@]+)(>?)/;
   22 |   $fqdn=lc($fqdn);
   23 |   my$len=length($lp);
   24 |   return "mail" unless ($l && $r);             # Message-IDs MUST have angle brackets
   25 |   return "m-id" if ($fqdn eq "4ax.com");       # This domain is for Forte Agent's IDs only
   26 |   return "m-id" if ($fqdn eq "localhost");     # Crippleware
   27 |   return "m-id" if ($fqdn=~/^\[?$ip\]?$/);     # People don't use IP-addresses as local parts
   28 |   return "mail" if ($len<8);                   # Short local parts are unlikely in Message-IDs
   29 |   return "m-id" if ($fqdn=~/^[^.]+$/);         # Crippleware (risk: local address and few people)
   30 |   return "m-id" if ($lp=~/[\$%!\/=~*'{}^?|]/); # Symbols very unlikely in e-mail addresses
   31 |   return "m-id" if ($lp=~/[\da-z]{8,}\.fsf/);  # Gnus
   32 |   return "m-id" if ($lp=~/^[A-Z]{28}\./);      # unidentified (user-id at the end)
   33 |   return "m-id" if ($lp=~/^[a-z]{9,24}\.[\da-z]{7}\.pminews$/);         # PMINews
   34 |   return "mail" if ($lp=~/^\d+\.\d+$/ && $fqdn eq "compuserve.com");    # CompuServe
   35 |   return "mail" if ($lp=~/^[^\d]+$/);                                   # IDs are likely to have digits ...
   36 |   return "mail" if ($lp=~/^\d+$/                                        # ... but not only ...
   37 |                     && $fqdn!~'aol\.com$'                               # ... ... except for AOL ...
   38 |                     && $fqdn!~'virtualsight\.com$'                      # ... ... and virtualsight ...
   39 |                     && $fqdn!~'zetnet\.'                                # ... ... and zetnet ...
   40 |                     && $fqdn!~'(?:^|\.)fido');                          # ... ... and Fido ...
   41 |   return "mail" if ($lp=~/^\d+[-#]\d*$/);                               # ... or blocked.
   42 |                                                                         
   43 |   return "m-id" if ($lp=~/^[\dA-F]{4,}\.[\dA-F]{4,}/);                  # various                    #692227
   44 |   return "m-id" if ($lp=~/^[\da-f]{4,}\.[\da-f]{4,}/);                  # unknown                    #501211
   45 |   return "m-id" if ($lp=~/^[\da-f]{9}/);                                # Pluto et al.               #241513
   46 |   return "m-id" if ($lp=~/^[\da-v]{6,7}\.[\da-v]{1,7}\.\d+$/);          # Hamster                    #59222
   47 |   return "m-id" if ($lp=~/^[a-z](?=[\da-z]*\d[a-z]+\d)[\da-z]{9,11}[\da-f]{2}$/); # supernews (WAG)  #58783
   48 |   return "m-id" if ($lp=~/^slrn[\da-v.]+\.[\da-v]+\.[\da-zA-Z.+_\-]+$/);# slrn                       #55603
   49 |   return "m-id" if ($lp=~/^MPG\.[\da-f]{12,24}$/);                      # Gravity                    #43462
   50 |   return "m-id" if ($lp=~/^[\da-z]{6}\.[\da-z]{2,6}\.ln$/);             # Leafnode                   #35780
   51 |   return "m-id" if ($lp=~/^[\da-f]{8}_\d\d?$/);                         # DNews                      #29771
   52 | # Dnews actually looks different, but $-part -- if exists -- matched already:
   53 | # return "m-id\n" if ($lp=~/^[\da-f]{8}(?:\$\d\d?)?_\d\d?$/);           # DNews
   54 |   return "m-id" if ($lp=~/[-.][\dA-F]{6}\.\d{14}$/);                    # MT-NewsWatcher             #26422
   55 |   return "m-id" if ($lp=~/^Pine\.[\dA-Z-]{3,4}\.\d\.\d[\dA-Za-z_-]+(?:\..+)?\.\d{12,14}\.[-\da-zA-Z]+$/);# Pine                                                                                                    #22114
   56 |   return "m-id" if ($lp=~/^(?=.*[\d-])[\da-zA-Z-]{10}B($|\.)/);         # XP                         #18862
   57 |   return "m-id" if ($lp=~/^\d[a-z](?=.*\d)[\da-z]{7,9}$/);              # newsguy.com                #17554
   58 |   return "m-id" if ($lp=~/^.{11}8E.{3}$/);                              # Turnpike                   #11017
   59 |   return "m-id" if ($lp=~/^Xns[\dA-F]{8}/);                             # XNews                      #9061
   60 |   return "m-id" if ($lp=~/[PUV]R[\da-zA-Z_-]{0,3}_B[DNOS][\dA-Z](?:$|\.)/); # clari.net (WAG)        #8855
   61 |   return "m-id" if ($lp=~/^[\da-z]{34}$/);                              # unknown                    #8005
   62 |   return "m-id" if ($lp=~/^[\da-z]{7}\.[\da-zA-Z]{8,}N$/);              # MacSoup                    #7939
   63 |   return "m-id" if ($lp=~/^pan\.[12]\d\d\d\.[01]\d\.[0-3]\d\.[0-2]\d\.[0-5]\d\.[0-5]\d\.\d+\.\d+$/); # pan                                                                                                         #5388
   64 |   return "m-id" if ($lp=~/^(?:\d+Z){3}\d+Y\d+X\d+$/);                   # usenet.iol.it              #4360
   65 |   return "m-id" if ($lp=~/^VA\.[\da-f]{8}\.[\da-f]{8}$/);               # Virtual Access             #3839
   66 |   return "m-id" if ($lp=~/^[\da-zA-Z#]{8}[AB]HA\.\d{2,4}$/);            # Microsoft (unclear)        #3640
   67 |   return "m-id" if ($lp=~/^(?=.*(?:\d[A-Z]+\d|[A-Z]\d+[A-Z]))[\dA-Z]{12,14}$/);# unknown (WAG)       #3322
   68 |   return "m-id" if ($lp=~/^[A-Z][\da-zA-Z]{19}[\da-f]{7}(?:$|\.)/);     # unknown                    #3253
   69 |   return "m-id" if ($lp=~/^[\da-zA-Z]{12}-pn2-[\da-zA-Z]{12}$/);        # ProNews                    #3247
   70 |   return "m-id" if ($lp=~/^\d\dHW\.[\dA-F]{24}$/);                      # Hogwasher                  #3118
   71 |   return "m-id" if ($lp=~/^M\.\d{10}\.[ABC]\.\d{1,3}$/);                # bbs.nsysu.edu.tw           #2557
   72 |   return "m-id" if ($lp=~/^mailman\.\d{10}\.\d{3,5}\./);                # Mailman                    #2375
   73 |   return "m-id" if ($lp=~/^[\da-f]{1,3}\.[\da-f]{5,8}\.[\da-f]{7,8}$/); # AOL                        #1892
   74 |   return "m-id" if ($lp=~/^ee[\da-f]{5}\.-?\d{1,2}$/);                  # WebX (WAG)                 #1493
   75 |   return "m-id" if ($lp=~/^memo\.\d{14}\.[\da-zA-Z]{3,6}$/);            # unknown                    #1380
   76 |   return "m-id" if ($lp=~/^[\dA-Z]{13}\.\d{9,10}$/);                    # some anon remailers        #1296
   77 |   return "m-id" if ($lp=~/^na\.[\da-f]{10}\.a\d\d/);                    # NewsAgent                  #1170
   78 |   return "m-id" if ($lp=~/^ant\d{6}.{7}$/);                             # ANT                        #1037
   79 |   return "m-id" if ($lp=~/^(?:cancel|[sS]upersedes)/);                  # guess what                 #1023
   80 |   return "m-id" if ($lp=~/^[\da-fA-F]{8}\.MD-\d\.\d\.\d\d\./);          # MicroDot-II                #642
   81 |   return "m-id" if ($lp=~/^[^a-zA-Z0-9_.+-]/);                          # usual address start        #610
   82 |   return "m-id" if ($lp=~/^(?:[\da-f]+[tin]){3}[\da-f]+$/);             # tin                        #454
   83 | # tin actually looks different, but last part -- if exists -- matched already:
   84 | # return "m-id\n" if ($lp=~/^(?:[\da-f]+[tin]){3}[\da-f]+(?:%[\da-z.+_\-]+)?$/);
   85 |   return "m-id" if ($lp=~/^[\dA-F]{8}yf$/);                             # Yanoff Conduit             #264
   86 |   return "m-id" if ($lp=~/^XFMail\.\d{12,14}\./);                       # XFMail                     #225
   87 |   return "m-id" if ($lp=~/^Y[aA][mM]2NN\.(?:AmigaOS\.|NewsNet\.)?[\dA-F]{7,8}\.[\dA-F]{4,8}$/);# Yam2NN #142
   88 | 
   89 | # case-independent from here on ###################################################################
   90 |   $lp=lc($lp);
   91 |   return "m-id" if ($lp=~/^[\da-z]{6}\.[\da-z]{3}$/);            # CNews
   92 |   return "m-id" if ($lp=~/^e[\da-z]{6}-[\da-z]{6}-[\da-z]{2}$/); # exim
   93 |   return "m-id" if ($lp=~/^.{5}md[\da-z]{7,8}$/);                # MicroDot
   94 |   return "m-id" if ($lp=~/^[12]\d\d\d[01]\d[0-3]\d[0-2]\d[0-5]\d(?:[0-5]\d)?\.[a-z]{1,3}\d+(?:\.[\da-z.+_\-]+)?$/); # mutt (variations)
   95 |   return "m-id" if ($lp=~/^[12]\d\d\d[01]\d[0-3]\d[0-2]\d[0-5]\d\.[\da-z][\dab][\da-u][\da-n][\da-z]{3}\d{5,6}$/); # sendmail
   96 |   return "m-id" if ($lp=~/^[12]\d\d\d[01]\d[0-3]\d[0-2]\d[0-5]\d\.[a-x]?[a-z~][a-z]\d{5}$/); # sendmail
   97 | # return "m-id" if ($lp=~/^[\da-f]{8}\.[\da-f]{1,8}$/);          # Netscape, matched above
   98 | 
   99 | # no we can only guess ############################################################################
  100 |   return "mail" if ($len==8 && $lp=~/^[a-f]+\d+$/);                           # just guessing
  101 |   return "m-id" if ($lp=~/^[\da-f]{8}$/);                                     # just guessing
  102 |   return "m-id" if ($lp=~/^(?![\d#]+$).*[\da-f]{15,}/);                       # just guessing
  103 |   return "m-id" if ($lp=~/^\d{2,}-[\da-f]{8,}-\d+$/);                         # just guessing (webtv.net)
  104 |   return "m-id" if ($lp=~/([.-])\d{4,}\1\d{3,}$/);                            # just guessing
  105 |   return "m-id" if ($lp=~/^[\da-f]{8,}[._]/);                                 # just guessing
  106 |   return "m-id" if ($lp=~/^\d{7,8}\.[\da-z]{10}/);                            # just guessing
  107 |   return "m-id" if ($lp=~/^[\da-f]{11,}$/);                                   # just guessing
  108 |   return "m-id" if ($lp=~/^\d{2,4}[.-]\d\d[.-]\d{2,4}.*\d\d[.-]\d\d[.-]\d\d/);# just guessing
  109 |   return "mmmm";                                                              # When in doubt, say mail.
  110 | }

Valid CSS!Valid HTML 4.01!
© Boris 'pi' Piwinger, February 18, 2002