From c7f0d951db38d81f93c7564611f7e4794b3c1656 Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 10:19:32 +0100 Subject: added longify-urls script, to unshorten things automagically. --- longify/longify-urls.pl | 226 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 longify/longify-urls.pl (limited to 'longify') diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl new file mode 100644 index 0000000..208e468 --- /dev/null +++ b/longify/longify-urls.pl @@ -0,0 +1,226 @@ +=pod + +=head1 NAME + +longify-urls.pl + +=head1 DESCRIPTION + +Checks channel messages for 'shortened' links, and expands them to their +final target address. + +=head1 INSTALLATION + +Copy into your F<~/.irssi/scripts/> directory and load with +C>. + +=head1 USAGE + +Load it. + +B The lookup to check if a link is shortened runs in the background, so it +won't affect the running of Irssi, but the message containing the link is queued +until either a response comes back, or the timeout (~2 seconds) is hit. + +=head1 AUTHORS + +Copyright E 2011 Tom Feist Cshabble+irssi@metavore.orgE> + +=head1 LICENCE + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=head1 BUGS + + +=head1 TODO + +=over + +=item * Not tested with simultaneous lookups + +=item * User-configurable timeout + +=item * some sort of list of shorteners? (saves having to look up every single url) + +=back + +=cut + +use strict; +use warnings; + +use Irssi; +use Irssi::Irc; +use Irssi::TextUI; + +use Data::Dumper; + +use IrssiX::Async qw(fork_off); +use LWP::UserAgent; + +our $VERSION = '0.1'; +our %IRSSI = ( + authors => 'shabble', + contact => 'shabble+irssi@metavore.org', + name => 'longify-urls', + description => 'checks to see if links mentioned in public' + . 'channels are shortened, and, if so, expands them', + license => 'MIT', + updated => '8/7/2011' + ); + +my $pending_msg_params = {}; +my $lookup_in_progress; +my $flushing_message; + +sub sig_public_message { + my ($server, $msg, @rest) = @_; + + if ($flushing_message) { # don't interrupt it a second time. + delete $pending_msg_params->{$flushing_message}; + $flushing_message = ''; + return; + } + + my $url = match_uri($msg); + + return unless $url; + + + + $pending_msg_params->{$url} = [@_]; + $lookup_in_progress = 1; + expand_url($url); + + Irssi::signal_stop; +} + +sub sig_private_message { + my ($server, $msg, $nick, $addr, $target) = @_; + +} + + +sub expand_url { + my ($url) = @_; + fork_off $url, \&expand_url_request, \&expand_url_callback; +} + +sub expand_url_request { + my $url = ; + chomp $url; + + my $user_agent = LWP::UserAgent->new; + $user_agent->agent("irssi-longify-urls/0.1 "); + $user_agent->timeout(2); # TODO: make this a setting. + + my $request = HTTP::Request->new(HEAD => $url); + my $result = $user_agent->request($request); + + print "$url\n"; + + if ($result->is_error) { + print "ERROR: " . $result->as_string . "\n"; + return; + } + + my @redirects = $result->redirects; + if (@redirects) { + print $redirects[-1]->header('Location') . "\n"; + } +} + +sub expand_url_callback { + my ($result) = @_; + + chomp $result; + my ($orig_url, $long_url) = split /\n/, $result; + $long_url = '' unless $long_url; + $long_url =~ s/\s*(\S*)\s*/$1/; + + + my $pending_message_data = $pending_msg_params->{$orig_url}; + my @new_signal = @$pending_message_data; + + Irssi::print("Result: orignal: $orig_url, new: $long_url"); + + if ($long_url && $long_url !~ /^ERROR/ && $long_url ne $orig_url) { + $new_signal[1] =~ s/\Q$orig_url\E/$long_url [was: $orig_url]/; + print "Printing with: " . Dumper(@new_signal[1..$#new_signal]); + } elsif ($long_url && $long_url =~ /^ERROR/) { + $new_signal[1] =~ s/\Q$orig_url\E/$long_url while expanding "$orig_url"/; + } + + $flushing_message = $orig_url; + Irssi::signal_emit 'message public', @new_signal; + +} + +sub match_uri { + my $text = shift; + # url matching regex taken + # from http://daringfireball.net/2010/07/improved_regex_for_matching_urls + my $regex = qr((?xi) +\b +( # Capture 1: entire matched URL + (?: + [a-z][\w-]+: # URL protocol and colon + (?: + /{1,3} # 1-3 slashes + | # or + [a-z0-9%] # Single letter or digit or '%' + # (Trying not to match e.g. "URI::Escape") + ) + | # or + www\d{0,3}[.] # "www.", "www1.", "www2." … "www999." + | # or + [a-z0-9.\-]+[.][a-z]{2,4}/ # looks like domain name followed by a slash + ) + (?: # One or more: + [^\s()<>]+ # Run of non-space, non-()<> + | # or + \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels + )+ + (?: # End with: + \(([^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels + | # or + [^\s`!()\[\]{};:'".,<>?«»“”‘’] # not a space or one of these punct chars + ) +)); + + + if ($text =~ $regex) { + my $uri = $1; + # shorten needs the http prefix or it'll treat it as a relative link. + $uri = 'http://' . $uri if $uri !~ m(http://); + return $uri; + } else { + # no match + return undef; + } +} + +sub init { + Irssi::signal_add_first 'message public', \&sig_public_message; + Irssi::signal_add_first 'message private', \&sig_private_message; +} + + +init(); -- cgit v1.2.3 From 4221ecd6c932617167b810c7ca0ec9655788ae2a Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 10:29:08 +0100 Subject: update install instructions to mention IrssiX modules, and generate a readme. --- longify/README.pod | 77 +++++++++++++++++++++++++++++++++++++++++++++++++ longify/longify-urls.pl | 12 +++++++- 2 files changed, 88 insertions(+), 1 deletion(-) create mode 100644 longify/README.pod (limited to 'longify') diff --git a/longify/README.pod b/longify/README.pod new file mode 100644 index 0000000..25a7f8e --- /dev/null +++ b/longify/README.pod @@ -0,0 +1,77 @@ +=pod + +=head1 NAME + +longify-urls.pl + +=head1 DESCRIPTION + +Checks channel messages for 'shortened' links, and expands them to their +final target address. + +=head1 INSTALLATION + +=over + +=item Download the modules from L +and place them in a directory known to Perl (One of the default system locations +for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment variable). +They should be placed in a subdirectory named C in whichever module directory +you choose. + +=item Copy this script into your F<~/.irssi/scripts/> directory and load with +C>. + +=back + +=head1 USAGE + +Load it. + +B The lookup to check if a link is shortened runs in the background, so it +won't affect the running of Irssi, but the message containing the link is queued +until either a response comes back, or the timeout (~2 seconds) is hit. + +=head1 AUTHORS + +Copyright E 2011 Tom Feist Cshabble+irssi@metavore.orgE> + +=head1 LICENCE + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +=head1 BUGS + + +=head1 TODO + +=over + +=item * Not tested with simultaneous lookups + +=item * User-configurable timeout + +=item * some sort of list of shorteners? (saves having to look up every single url) + +=back + + + +=cut + diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl index 208e468..7d352dc 100644 --- a/longify/longify-urls.pl +++ b/longify/longify-urls.pl @@ -11,9 +11,19 @@ final target address. =head1 INSTALLATION -Copy into your F<~/.irssi/scripts/> directory and load with +=over + +=item Download the modules from L +and place them in a directory known to Perl (One of the default system locations +for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment variable). +They should be placed in a subdirectory named C in whichever module directory +you choose. + +=item Copy this script into your F<~/.irssi/scripts/> directory and load with C>. +=back + =head1 USAGE Load it. -- cgit v1.2.3 From 60d269f91370c60befc394a741b59aaffb7acede Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 10:35:35 +0100 Subject: fix formatting error in POD, hopefully. --- longify/README.pod | 6 +++--- longify/longify-urls.pl | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'longify') diff --git a/longify/README.pod b/longify/README.pod index 25a7f8e..eb49086 100644 --- a/longify/README.pod +++ b/longify/README.pod @@ -13,13 +13,13 @@ final target address. =over -=item Download the modules from L +=item * Download the modules from L and place them in a directory known to Perl (One of the default system locations for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment variable). -They should be placed in a subdirectory named C in whichever module directory +They should be placed in a subdirectory named C in whichever module directory you choose. -=item Copy this script into your F<~/.irssi/scripts/> directory and load with +=item * Copy this script into your F<~/.irssi/scripts/> directory and load with C>. =back diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl index 7d352dc..b294918 100644 --- a/longify/longify-urls.pl +++ b/longify/longify-urls.pl @@ -13,13 +13,13 @@ final target address. =over -=item Download the modules from L +=item * Download the modules from L and place them in a directory known to Perl (One of the default system locations for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment variable). -They should be placed in a subdirectory named C in whichever module directory +They should be placed in a subdirectory named C in whichever module directory you choose. -=item Copy this script into your F<~/.irssi/scripts/> directory and load with +=item * Copy this script into your F<~/.irssi/scripts/> directory and load with C>. =back -- cgit v1.2.3 From 4ad6f4629a30dea934138bfec5f409bf58c07f9d Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 11:02:24 +0100 Subject: added list of shortener domains scraped from http://bazaar.launchpad.net/~seanmurphy/longurl/trunk/view/head:/libraries/known_services.php --- longify/longify-urls.list | 227 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100644 longify/longify-urls.list (limited to 'longify') diff --git a/longify/longify-urls.list b/longify/longify-urls.list new file mode 100644 index 0000000..4ac705c --- /dev/null +++ b/longify/longify-urls.list @@ -0,0 +1,227 @@ +tinyurl.com +is.gd +ur1.ca +ping.fm +snipurl.com +snurl.com +snipr.com +sn.im +bit.ly +j.mp +tr.im +xrl.us +twurl.nl +x.se +poprl.com +url.ie +6url.com +yep.it +ln-s.net +piurl.com +yatuc.com +g8l.us +icanhaz.com +urlkiss.com +minilien.com +tinylink.com +urlcut.com +doiop.com +smurl.com +tighturl.com +2tu.us +myurl.in +memurl.com +redirx.com +easyurl.net +qurlyq.com +dwarfurl.com +shrinkurl.us +starturl.com +urlhawk.com +canurl.com +surl.co.uk +lnkurl.com +urlbrief.com +urlborg.com +ub0.cc +urlvi.be +sn.vc +lurl.no +hurl.ws +twiturl.de +spedr.com +parv.us +decenturl.com +shorterlink.com +shortlinks.co.uk +budurl.com +shw.me +buk.me +zz.gd +0rz.tw +lin.cr +shink.de +s3nt.com +fff.to +wipi.es +ri.ms +b23.ru +zi.ma +srs.li +liip.to +post.ly +tcrn.ch +cli.gs +ptiturl.com +snadr.it +shrinkify.com +s7y.us +tiny.cc +idek.net +ff.im +onsaas.info +tubeurl.com +tgr.me +ow.ly +a2n.eu +azqq.com +liltext.com +b65.com +cuturls.com +goshrink.com +atu.ca +redirects.ca +fhurl.com +flingk.com +fly2.ws +nutshellurl.com +notlong.com +nanoref.com +moourl.com +shurl.net +shrinkr.com +shortenurl.com +rubyurl.com +url.co.uk +xil.in +wapurl.co.uk +u76.org +urlcutter.com +fwdurl.net +shrtnd.com +urlpire.com +smallr.com +shredurl.com +linkgap.com +plumurl.com +offur.com +liurl.cn +adjix.com +ad.vu +togoto.us +a.gg +a.nf +go.9nl.com +lru.jp +kl.am +nn.nf +idek.net +hex.io +to.ly +digg.com +cliccami.info +➡.ws +➨.ws +➯.ws +➔.ws +➞.ws +➽.ws +➹.ws +✩.ws +✿.ws +❥.ws +›.ws +ta.gd +cort.as +u.nu +r.im +alturl.com +chilp.it +ewerl.com +flq.us +hugeurl.com +jijr.com +kissa.be +l9k.net +o-x.fr +omf.gd +plurl.me +rb6.me +rickroll.it +shoturl.us +srnk.net +tiny.pl +u.mavrev.com +updating.me +xrl.in +xurl.jp +yfrog.com +307.to +bacn.me +bloat.me +clipurl.us +href.in +korta.nu +merky.de +nanourl.se +peaurl.com +pnt.me +rde.me +reallytinyurl.com +redir.ec +short.ie +short.to +tra.kz +twurl.cc +url.az +urlx.ie +zurl.ws +qlnk.net +twitclicks.com +htxt.it +fwd4.me +clop.in +ln-s.ru +sp2.ro +eepurl.com +twitterurl.net +clck.ru +atu.ca +netnet.me +adf.ly +zzang.kr +ni.to +lu.to +xrl.in +virl.com +gl.am +klck.me +trunc.it +migre.me +xzb.cc +ulu.lu +shar.es +3.ly +urli.nl +su.pr +goo.gl +om.ly +hiderefer.com +flic.kr +w33.us +fwib.net +zud.me +7.ly +shrt.fr +smsh.me +bon.no -- cgit v1.2.3 From 8e8d2111bb23dcc1aa4998e4577950a1ef327196 Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 11:12:13 +0100 Subject: load the shorteners list, and check it against the host part of any URIs before attempting a query. Also /longify-reload command to reload list from file. --- longify/longify-urls.pl | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'longify') diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl index b294918..69d7f94 100644 --- a/longify/longify-urls.pl +++ b/longify/longify-urls.pl @@ -84,6 +84,7 @@ use Data::Dumper; use IrssiX::Async qw(fork_off); use LWP::UserAgent; +use URI; our $VERSION = '0.1'; our %IRSSI = ( @@ -99,6 +100,8 @@ our %IRSSI = ( my $pending_msg_params = {}; my $lookup_in_progress; my $flushing_message; +my $domains; + sub sig_public_message { my ($server, $msg, @rest) = @_; @@ -113,7 +116,9 @@ sub sig_public_message { return unless $url; + my $uri_obj = URI->new($url); + return unless ref($uri_obj) && exists $domains->{$uri_obj->host}; $pending_msg_params->{$url} = [@_]; $lookup_in_progress = 1; @@ -140,8 +145,8 @@ sub expand_url_request { my $user_agent = LWP::UserAgent->new; $user_agent->agent("irssi-longify-urls/0.1 "); $user_agent->timeout(2); # TODO: make this a setting. - - my $request = HTTP::Request->new(HEAD => $url); + $user_agent->max_size(0); + my $request = HTTP::Request->new(GET => $url); my $result = $user_agent->request($request); print "$url\n"; @@ -169,11 +174,11 @@ sub expand_url_callback { my $pending_message_data = $pending_msg_params->{$orig_url}; my @new_signal = @$pending_message_data; - Irssi::print("Result: orignal: $orig_url, new: $long_url"); + #Irssi::print("Result: orignal: $orig_url, new: $long_url"); if ($long_url && $long_url !~ /^ERROR/ && $long_url ne $orig_url) { $new_signal[1] =~ s/\Q$orig_url\E/$long_url [was: $orig_url]/; - print "Printing with: " . Dumper(@new_signal[1..$#new_signal]); + #print "Printing with: " . Dumper(@new_signal[1..$#new_signal]); } elsif ($long_url && $long_url =~ /^ERROR/) { $new_signal[1] =~ s/\Q$orig_url\E/$long_url while expanding "$orig_url"/; } @@ -227,10 +232,30 @@ sub match_uri { } } +sub cmd_reload { + my $filename = shift || Irssi::get_irssi_dir . '/longify-urls.list'; + $domains = {}; + open my $fh, '<', $filename + or die "Couldn't open file containing shorteners list $filename: $!"; + while (<$fh>) { + chomp; + $domains->{$_} = 1; + } + close $fh; + Irssi::active_win->print('%_Longify:%_ List of domains has been reloaded.'); +} + sub init { Irssi::signal_add_first 'message public', \&sig_public_message; Irssi::signal_add_first 'message private', \&sig_private_message; + Irssi::signal_add 'setup changed', \&sig_setup_changed; + Irssi::command_bind 'longify-reload', \&cmd_reload; + + cmd_reload(); } +sub sig_setup_changed { + # TODO: settings updating stuff goes here. +} init(); -- cgit v1.2.3 From 31c754b728b886eadfd728f6dbdefb28f8623f80 Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 11:14:00 +0100 Subject: update docs to mention urls file --- longify/longify-urls.pl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'longify') diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl index 69d7f94..8b2691f 100644 --- a/longify/longify-urls.pl +++ b/longify/longify-urls.pl @@ -19,6 +19,8 @@ for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment va They should be placed in a subdirectory named C in whichever module directory you choose. +=item * Copy the F file into your F<~/.irssi/> directory. + =item * Copy this script into your F<~/.irssi/scripts/> directory and load with C>. @@ -67,7 +69,7 @@ THE SOFTWARE. =item * User-configurable timeout -=item * some sort of list of shorteners? (saves having to look up every single url) +=item * deal with utf-8 (that stupid arrow site). =back -- cgit v1.2.3 From 6aab115aa9f9374b3ceb599de6bc34f62d9cbbab Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Fri, 8 Jul 2011 11:14:28 +0100 Subject: rebuilt README --- longify/README.pod | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'longify') diff --git a/longify/README.pod b/longify/README.pod index eb49086..e161985 100644 --- a/longify/README.pod +++ b/longify/README.pod @@ -19,6 +19,8 @@ for perl modules, or somewhere that is listed in the C<$PERL5LIB> environment va They should be placed in a subdirectory named C in whichever module directory you choose. +=item * Copy the F file into your F<~/.irssi/> directory. + =item * Copy this script into your F<~/.irssi/scripts/> directory and load with C>. @@ -67,7 +69,7 @@ THE SOFTWARE. =item * User-configurable timeout -=item * some sort of list of shorteners? (saves having to look up every single url) +=item * deal with utf-8 (that stupid arrow site). =back -- cgit v1.2.3 From adffe12768b78f2b69305a5532c883576d9756c0 Mon Sep 17 00:00:00 2001 From: Tom Feist Date: Sat, 16 Jul 2011 04:57:28 +0100 Subject: updated to add PM support, and build filenames more betterly --- longify/longify-urls.pl | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'longify') diff --git a/longify/longify-urls.pl b/longify/longify-urls.pl index 8b2691f..38e8ba8 100644 --- a/longify/longify-urls.pl +++ b/longify/longify-urls.pl @@ -87,6 +87,7 @@ use Data::Dumper; use IrssiX::Async qw(fork_off); use LWP::UserAgent; use URI; +use File::Spec; our $VERSION = '0.1'; our %IRSSI = ( @@ -106,7 +107,16 @@ my $domains; sub sig_public_message { - my ($server, $msg, @rest) = @_; + _handle_messages(@_); +} + +sub sig_private_message { + _handle_messages(@_); +} + +sub _handle_messages { + + my $msg = $_[1]; if ($flushing_message) { # don't interrupt it a second time. delete $pending_msg_params->{$flushing_message}; @@ -129,12 +139,6 @@ sub sig_public_message { Irssi::signal_stop; } -sub sig_private_message { - my ($server, $msg, $nick, $addr, $target) = @_; - -} - - sub expand_url { my ($url) = @_; fork_off $url, \&expand_url_request, \&expand_url_callback; @@ -235,7 +239,8 @@ sub match_uri { } sub cmd_reload { - my $filename = shift || Irssi::get_irssi_dir . '/longify-urls.list'; + my $filename = shift + || File::Spec->catfile(Irssi::get_irssi_dir, 'longify-urls.list'); $domains = {}; open my $fh, '<', $filename or die "Couldn't open file containing shorteners list $filename: $!"; -- cgit v1.2.3