#!/home/grinnz/projects/cpandoc-browser/perls/5.38.2/bin/perl =head1 NAME lwp-download - Fetch large files from the web =head1 SYNOPSIS lwp-download [-a] [-s] [] Options: -a save the file in ASCII mode -s use HTTP headers to guess output filename =head1 DESCRIPTION The B program will save the file at I to a local file. If I is not specified, then the current directory is assumed. If I is a directory, then the last segment of the path of the I is appended to form a local filename. If the I path ends with slash the name "index" is used. With the B<-s> option pick up the last segment of the filename from server provided sources like the Content-Disposition header or any redirect URLs. A file extension to match the server reported Content-Type might also be appended. If a file with the produced filename already exists, then B will prompt before it overwrites and will fail if its standard input is not a terminal. This form of invocation will also fail is no acceptable filename can be derived from the sources mentioned above. If I is not a directory, then it is simply used as the path to save into. If the file already exists it's overwritten. The I program is implemented using the I library. It is better suited to down load big files than the I program because it does not store the file in memory. Another benefit is that it will keep you updated about its progress and that you don't have much options to worry about. Use the C<-a> option to save the file in text (ASCII) mode. Might make a difference on DOSish systems. =head1 EXAMPLE Fetch the newest and greatest perl version: $ lwp-download http://www.perl.com/CPAN/src/latest.tar.gz Saving to 'latest.tar.gz'... 11.4 MB received in 8 seconds (1.43 MB/sec) =head1 AUTHOR Gisle Aas =cut #' get emacs out of quote mode use strict; use warnings; use LWP::UserAgent (); use LWP::MediaTypes qw(guess_media_type media_suffix); use URI (); use HTTP::Date (); use Encode; use Encode::Locale; use Getopt::Long qw(HelpMessage :config gnu_getopt no_ignore_case auto_help); my $progname = $0; $progname =~ s,.*/,,; # only basename left in progname $progname =~ s,.*\\,, if $^O eq "MSWin32"; $progname =~ s/\.\w*$//; # strip extension if any my %opt; GetOptions( 'a' => \$opt{a}, 's' => \$opt{s} ) or HelpMessage(); my $url = URI->new(decode(locale => shift) || HelpMessage()); my $argfile = encode(locale_fs => decode(locale => shift)); HelpMessage() if defined($argfile) && !length($argfile); my $ua = LWP::UserAgent->new( agent => "lwp-download/$LWP::UserAgent::VERSION ", keep_alive => 1, env_proxy => 1, ); my $file; # name of file we download into my $length; # total number of bytes to download my $flength; # formatted length my $size = 0; # number of bytes received my $start_t; # start time of download my $last_dur; # time of last callback my $shown = 0; # have we called the show() function yet $SIG{INT} = sub { die "Interrupted\n"; }; $| = 1; # autoflush my $res = $ua->request( HTTP::Request->new(GET => $url), sub { unless (defined $file) { my $res = $_[1]; my $directory; if (defined $argfile && -d $argfile) { ($directory, $argfile) = ($argfile, undef); } unless (defined $argfile) { # find a suitable name to use $file = $opt{s} && $res->filename; # if this fails we try to make something from the URL unless ($file) { $file = ($url->path_segments)[-1]; if (!defined($file) || !length($file)) { $file = "index"; my $suffix = media_suffix($res->content_type); $file .= ".$suffix" if $suffix; } elsif ($url->scheme eq 'ftp' || $file =~ /\.t[bg]z$/ || $file =~ /\.tar(\.(Z|gz|bz2?))?$/) { # leave the filename as it was } else { my $ct = guess_media_type($file); unless ($ct eq $res->content_type) { # need a better suffix for this type my $suffix = media_suffix($res->content_type); $file .= ".$suffix" if $suffix; } } } # validate that we don't have a harmful filename now. The server # might try to trick us into doing something bad. if (!length($file) || $file =~ s/([^a-zA-Z0-9_\.\-\+\~])/sprintf "\\x%02x", ord($1)/ge || $file =~ /^\./) { die "Will not save <$url> as \"$file\".\nPlease override file name on the command line.\n"; } if (defined $directory) { require File::Spec; $file = File::Spec->catfile($directory, $file); } # Check if the file is already present if (-l $file) { die "Will not save <$url> to link \"$file\".\nPlease override file name on the command line.\n"; } elsif (-f _) { die "Will not save <$url> as \"$file\" without verification.\nEither run from terminal or override file name on the command line.\n" unless -t; $shown = 1; print "Overwrite $file? [y] "; my $ans = ; unless (defined($ans) && $ans =~ /^y?\n/) { if (defined $ans) { print "Ok, aborting.\n"; } else { print "\nAborting.\n"; } exit 1; } $shown = 0; } elsif (-e _) { die "Will not save <$url> as \"$file\". Path exists.\n"; } else { print "Saving to '$file'...\n"; use Fcntl qw(O_WRONLY O_EXCL O_CREAT); sysopen(FILE, $file, O_WRONLY | O_EXCL | O_CREAT) || die "Can't open $file: $!"; } } else { $file = $argfile; } unless (fileno(FILE)) { open(FILE, ">", $file) || die "Can't open $file: $!\n"; } binmode FILE unless $opt{a}; $length = $res->content_length; $flength = fbytes($length) if defined $length; $start_t = time; $last_dur = 0; } print FILE $_[0] or die "Can't write to $file: $!\n"; $size += length($_[0]); if (defined $length) { my $dur = time - $start_t; if ($dur != $last_dur) { # don't update too often $last_dur = $dur; my $perc = $size / $length; my $speed; $speed = fbytes($size / $dur) . "/sec" if $dur > 3; my $secs_left = fduration($dur / $perc - $dur); $perc = int($perc * 100); my $show = "$perc% of $flength"; $show .= " (at $speed, $secs_left remaining)" if $speed; show($show, 1); } } else { show(fbytes($size) . " received"); } } ); if (fileno(FILE)) { close(FILE) || die "Can't write to $file: $!\n"; show(""); # clear text print "\r"; print fbytes($size); print " of ", fbytes($length) if defined($length) && $length != $size; print " received"; my $dur = time - $start_t; if ($dur) { my $speed = fbytes($size / $dur) . "/sec"; print " in ", fduration($dur), " ($speed)"; } print "\n"; if (my $mtime = $res->last_modified) { utime time, $mtime, $file; } if ($res->header("X-Died") || !$res->is_success) { if (my $died = $res->header("X-Died")) { print "$died\n"; } if (-t) { print "Transfer aborted. Delete $file? [n] "; my $ans = ; if (defined($ans) && $ans =~ /^y\n/) { unlink($file) && print "Deleted.\n"; } elsif ($length > $size) { print "Truncated file kept: ", fbytes($length - $size), " missing\n"; } else { print "File kept.\n"; } exit 1; } else { print "Transfer aborted, $file kept\n"; } } exit 0; } # Did not manage to create any file print "\n" if $shown; if (my $xdied = $res->header("X-Died")) { print "$progname: Aborted\n$xdied\n"; } else { print "$progname: ", $res->status_line, "\n"; } exit 1; sub fbytes { my $n = int(shift); if ($n >= 1024 * 1024) { return sprintf "%.3g MB", $n / (1024.0 * 1024); } elsif ($n >= 1024) { return sprintf "%.3g KB", $n / 1024.0; } else { return "$n bytes"; } } sub fduration { use integer; my $secs = int(shift); my $hours = $secs / (60 * 60); $secs -= $hours * 60 * 60; my $mins = $secs / 60; $secs %= 60; if ($hours) { return "$hours hours $mins minutes"; } elsif ($mins >= 2) { return "$mins minutes"; } else { $secs += $mins * 60; return "$secs seconds"; } } BEGIN { my @ani = qw(- \ | /); my $ani = 0; sub show { my ($mess, $show_ani) = @_; print "\r$mess" . (" " x (75 - length $mess)); my $msg = $show_ani ? $ani[$ani++]. "\b" : ' '; print $msg; $ani %= @ani; $shown++; } }