diff options
| -rw-r--r-- | longify/longify-urls.pl | 226 | 
1 files changed, 226 insertions, 0 deletions
| diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl new file mode 100644 index 0000000..208e468 --- /dev/null +++ b/longify/longify-urls.pl @@ -0,0 +1,226 @@ +=pod + +=head1 NAME + +longify-urls.pl + +=head1 DESCRIPTION + +Checks channel messages for 'shortened' links, and expands them to their +final target address. + +=head1 INSTALLATION + +Copy into your F<~/.irssi/scripts/> directory and load with +C</SCRIPT LOAD F<filename>>. + +=head1 USAGE + +Load it. + +B<Note:> The lookup to check if a link is shortened runs in the background, so it +won't affect the running of Irssi, but the message containing the link is queued +until either a response comes back, or the timeout (~2 seconds) is hit. + +=head1 AUTHORS + +Copyright E<copy> 2011 Tom Feist C<E<lt>shabble+irssi@metavore.orgE<gt>> + +=head1 LICENCE + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=head1 BUGS + + +=head1 TODO + +=over + +=item * Not tested with simultaneous lookups + +=item * User-configurable timeout + +=item * some sort of list of shorteners? (saves having to look up every single url) + +=back + +=cut + +use strict; +use warnings; + +use Irssi; +use Irssi::Irc; +use Irssi::TextUI; + +use Data::Dumper; + +use IrssiX::Async qw(fork_off); +use LWP::UserAgent; + +our $VERSION = '0.1'; +our %IRSSI = ( +              authors     => 'shabble', +              contact     => 'shabble+irssi@metavore.org', +              name        => 'longify-urls', +              description => 'checks to see if links mentioned in public' +                     . 'channels are shortened, and, if so, expands them', +              license     => 'MIT', +              updated     => '8/7/2011' +             ); + +my $pending_msg_params = {}; +my $lookup_in_progress; +my $flushing_message; + +sub sig_public_message { +    my ($server, $msg, @rest) = @_; + +    if ($flushing_message) { # don't interrupt it a second time. +        delete $pending_msg_params->{$flushing_message}; +        $flushing_message = ''; +        return; +    } + +    my $url = match_uri($msg); + +    return unless $url; + + + +    $pending_msg_params->{$url} = [@_]; +    $lookup_in_progress = 1; +    expand_url($url); + +    Irssi::signal_stop; +} + +sub sig_private_message { +    my ($server, $msg, $nick, $addr, $target) = @_; + +} + + +sub expand_url { +    my ($url) = @_; +    fork_off $url, \&expand_url_request, \&expand_url_callback; +} + +sub expand_url_request { +    my $url = <STDIN>; +    chomp $url; + +    my $user_agent = LWP::UserAgent->new; +    $user_agent->agent("irssi-longify-urls/0.1 "); +    $user_agent->timeout(2); # TODO: make this a setting. + +    my $request = HTTP::Request->new(HEAD => $url); +    my $result = $user_agent->request($request); + +    print "$url\n"; + +    if ($result->is_error) { +        print "ERROR: " . $result->as_string . "\n"; +        return; +    } + +    my @redirects = $result->redirects; +    if (@redirects) { +        print $redirects[-1]->header('Location') . "\n"; +    } +} + +sub expand_url_callback { +    my ($result) = @_; + +    chomp $result; +    my ($orig_url, $long_url) = split /\n/, $result; +    $long_url = '' unless $long_url; +    $long_url =~ s/\s*(\S*)\s*/$1/; + + +    my $pending_message_data = $pending_msg_params->{$orig_url}; +    my @new_signal = @$pending_message_data; + +    Irssi::print("Result: orignal: $orig_url, new: $long_url"); + +    if ($long_url && $long_url !~ /^ERROR/ && $long_url ne $orig_url) { +        $new_signal[1] =~ s/\Q$orig_url\E/$long_url [was: $orig_url]/; +        print "Printing with: " . Dumper(@new_signal[1..$#new_signal]); +    } elsif ($long_url && $long_url =~ /^ERROR/) { +        $new_signal[1] =~ s/\Q$orig_url\E/$long_url while expanding "$orig_url"/; +    } + +    $flushing_message = $orig_url; +    Irssi::signal_emit 'message public', @new_signal; + +} + +sub match_uri { +    my $text = shift; +    # url matching regex taken +    # from http://daringfireball.net/2010/07/improved_regex_for_matching_urls +    my $regex = qr((?xi) +\b +(                           # Capture 1: entire matched URL +  (?: +    [a-z][\w-]+:                # URL protocol and colon +    (?: +      /{1,3}                        # 1-3 slashes +      |                             #   or +      [a-z0-9%]                     # Single letter or digit or '%' +                                    # (Trying not to match e.g. "URI::Escape") +    ) +    |                           #   or +    www\d{0,3}[.]               # "www.", "www1.", "www2." … "www999." +    |                           #   or +    [a-z0-9.\-]+[.][a-z]{2,4}/  # looks like domain name followed by a slash +  ) +  (?:                           # One or more: +    [^\s()<>]+                      # Run of non-space, non-()<> +    |                               #   or +    \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels +  )+ +  (?:                           # End with: +    \(([^\s()<>]+|(\([^\s()<>]+\)))*\)  # balanced parens, up to 2 levels +    |                                   #   or +    [^\s`!()\[\]{};:'".,<>?«»“”‘’]        # not a space or one of these punct chars +  ) +)); + + +    if ($text =~ $regex) { +        my $uri = $1; +        # shorten needs the http prefix or it'll treat it as a relative link. +        $uri = 'http://' . $uri if $uri !~ m(http://); +        return $uri; +    } else { +        # no match +        return undef; +    } +} + +sub init { +    Irssi::signal_add_first 'message public',  \&sig_public_message; +    Irssi::signal_add_first 'message private', \&sig_private_message; +} + + +init(); | 
