package Digest::MD5::File;
use strict;
use warnings;
use Carp;
use Digest::MD5;
eval { require Encode; };
use LWP::UserAgent;
require Exporter;
our @ISA = qw(Exporter Digest::MD5);
our @EXPORT_OK = qw(dir_md5 dir_md5_hex dir_md5_base64
file_md5 file_md5_hex file_md5_base64
url_md5 url_md5_hex url_md5_base64);
our $BINMODE = 1;
our $UTF8 = 0;
our $NOFATALS = 0;
sub import {
my $me = shift;
my %imp;
@imp{ @_ } = ();
for(@EXPORT_OK) {
delete $imp{$_} if exists($imp{$_});
}
$BINMODE = 0 if exists $imp{-nobin};
$UTF8 = 1 if exists $imp{-utf8};
$NOFATALS = 1 if exists $imp{-nofatals};
for(keys %imp) {
s/^-//;
$imp{$_}='' unless $_ =~ m/^(no)?(bin|utf8|fatals)$/;
push @EXPORT_OK, $_ unless $_ =~ m/^(no)?(bin|utf8|fatals)$/;
delete $imp{"-$_"} if exists $imp{"-$_"};
}
$me->export_to_level(1, $me, grep(!/^-/, @_));
Digest::MD5->import(keys %imp);
}
our $VERSION = '0.08';
my $getfh = sub {
my $file = shift;
croak "$file: Does not exist" if !-e $file && !$NOFATALS;
croak "$file: Is a directory" if -d $file && !$NOFATALS;
if(-e $file && !-d $file) {
open my $fh, $file or return;
binmode $fh if $BINMODE;
return $fh;
}
else { return undef; }
};
my $getur = sub {
my $res = LWP::UserAgent->new->get(shift());
return $res->is_success ? $res->content : undef;
};
sub Digest::MD5::adddir {
my $md5 = shift;
my $base = shift;
for my $key ( sort keys %{ _dir($base, undef, undef, 3) }) {
next if !$key;
my $file = File::Spec->catfile($base, $key);
$md5->addpath($file) or carp "addpath $file failed: $!" if !-d $file;
}
return 1;
}
sub _dir {
my($dir, $hr, $base, $type, $cc) = @_;
require File::Spec; # only load it if its needed
$cc = {} if ref $cc ne 'HASH';
$hr = {} if ref $hr ne 'HASH';
$base = $dir if !defined $base;
$type = 0 if ! defined $type;
my $_md5func = \&file_md5;
$_md5func = \&file_md5_hex if $type eq '1';
$_md5func = \&file_md5_base64 if $type eq '2';
opendir(DIR, $dir) or return;
my @dircont = sort grep { $_ ne '.' && $_ ne '..' } readdir(DIR);
closedir DIR;
for my $file( @dircont ) {
my $_dirver = File::Spec->catdir($dir, $file);
my $full = -d $_dirver ? $_dirver
: File::Spec->catfile($dir, $file);
my $short = File::Spec->abs2rel( $full, $base );
if(-l $full) {
my $target = readlink $full;
$full = $target if -d $target;
}
if(exists $hr->{$full}) {
carp "$full seen already, you may have circular links";
$cc->{$full}++;
croak "$full is in a circular link, bailing out."
if $cc->{$full} > 4;
}
if(-d $full) {
$hr->{ $short } = '';
_dir($full, $hr, $base, $type, $cc) or return;
}
else {
$hr->{ $short } = '';
$hr->{ $short } = $_md5func->( $full ) or return if $type ne '3';
}
}
return $hr;
}
sub dir_md5 {
push @_, undef if @_ < 3;
push @_, undef if @_ < 3;
_dir(@_, 0);
}
sub dir_md5_hex {
push @_, undef if @_ < 3;
push @_, undef if @_ < 3;
_dir(@_, 1);
}
sub dir_md5_base64 {
push @_, undef if @_ < 3;
push @_, undef if @_ < 3;
_dir(@_, 2);
}
sub file_md5 {
my ($file,$bn,$ut) = @_;
local $BINMODE = $bn if defined $bn;
local $UTF8 = $ut if defined $ut;
my $fh = $getfh->($file) or return;
my $md5 = Digest::MD5->new();
my $buf;
while(my $l = read($fh, $buf, 1024)) {
$md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
}
return $md5->digest;
}
sub file_md5_hex {
my ($file,$bn,$ut) = @_;
local $BINMODE = $bn if defined $bn;
local $UTF8 = $ut if defined $ut;
my $fh = $getfh->($file) or return;
my $md5 = Digest::MD5->new();
my $buf;
while(my $l = read($fh, $buf, 1024)) {
$md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
}
return $md5->hexdigest;
}
sub file_md5_base64 {
my ($file,$bn,$ut) = @_;
local $BINMODE = $bn if defined $bn;
local $UTF8 = $ut if defined $ut;
my $fh = $getfh->($file) or return;
my $md5 = Digest::MD5->new();
my $buf;
while(my $l = read($fh, $buf, 1024)) {
$md5->add( $UTF8 ? Encode::encode_utf8($buf) : $buf );
}
return $md5->b64digest;
}
sub url_md5 {
my $cn = $getur->(shift()) or return;
my ($ut) = shift;
local $UTF8 = $ut if defined $ut;
return Digest::MD5::md5($cn) if !$UTF8;
return Digest::MD5::md5(Encode::encode_utf8($cn));
}
sub url_md5_hex {
my $cn = $getur->(shift()) or return;
my ($ut) = shift;
local $UTF8 = $ut if defined $ut;
return Digest::MD5::md5_hex($cn) if !$UTF8;
return Digest::MD5::md5_hex(Encode::encode_utf8($cn));
}
sub url_md5_base64 {
my $cn = $getur->(shift()) or return;
my ($ut) = shift;
local $UTF8 = $ut if defined $ut;
return Digest::MD5::md5_base64($cn) if !$UTF8;
return Digest::MD5::md5_base64(Encode::encode_utf8($cn));
}
sub Digest::MD5::addpath {
my $md5 = shift;
my ($fl,$bn,$ut) = @_;
local $BINMODE = $bn if defined $bn;
local $UTF8 = $ut if defined $ut;
if(ref $fl eq 'ARRAY') {
for my $pth (@{ $fl }) {
$md5->addpath($pth, $bn, $ut) or return;
}
}
else {
my $fh = $getfh->($fl) or return;
my $buf;
while(my $l = read($fh, $buf, 1024)) {
!$UTF8 ? $md5->add($buf) : $md5->add(Encode::encode_utf8($buf));
}
}
return 1;
}
sub Digest::MD5::addurl {
my $md5 = shift;
my $cn = $getur->(shift()) or return;
my $ut = shift;
local $UTF8 = $ut if defined $ut;
!$UTF8 ? $md5->add($cn) : $md5->add(Encode::encode_utf8($cn));
}
1;
__END__
=head1 NAME
Digest::MD5::File - Perl extension for getting MD5 sums for files and urls.
=head1 SYNOPSIS
use Digest::MD5::File qw(dir_md5_hex file_md5_hex url_md5_hex);
my $md5 = Digest::MD5->new;
$md5->addpath('/path/to/file');
my $digest = $md5->hexdigest;
my $digest = file_md5($file);
my $digest = file_md5_hex($file);
my $digest = file_md5_base64($file);
my $md5 = Digest::MD5->new;
$md5->addurl('http://www.tmbg.com/tour.html');
my $digest = $md5->hexdigest;
my $digest = url_md5($url);
my $digest = url_md5_hex($url);
my $digest = url_md5_base64($url);
my $md5 = Digest::MD5->new;
$md5->adddir('/directory');
my $digest = $md5->hexdigest;
my $dir_hashref = dir_md5($dir);
my $dir_hashref = dir_md5_hex($dir);
my $dir_hashref = dir_md5_base64($dir);
=head1 DESCRIPTION
Get MD5 sums for files of a given path or content of a given url.
=head1 EXPORT
None by default.
You can export any file_* dir_*, or url_* function and anything L<Digest::MD5> can export.
use Digest::MD5::File qw(md5 md5_hex md5_base64); # 3 Digest::MD5 functions
print md5_hex('abc123'), "\n";
print md5_base64('abc123'), "\n";
=head1 OBJECT METHODS
=head2 addpath()
my $md5 = Digest::MD5->new;
$md5->addpath('/path/to/file.txt')
or die "file.txt is not where you said: $!";
or you can add multiple files by specifying an array ref of files:
$md5->addpath(\@files);
=head2 adddir()
addpath()s each file in a directory recursively. Follows the same rules as the dir_* functions.
my $md5 = Digest::MD5->new;
$md5->adddir('/home/tmbg/')
or die "See warning above to see why I bailed: $!";
=head2 addurl()
my $md5 = Digest::MD5->new;
$md5->addurl('http://www.tmbg.com/tour.html')
or die "They Must Be not on tour";
=head1 file_* functions
Get the digest in variouse formats of $file.
If file does not exist or is a directory it croaks (See NOFATALS for more info)
my $digest = file_md5($file) or warn "$file failed: $!";
my $digest = file_md5_hex($file) or warn "$file failed: $!";
my $digest = file_md5_base64($file) or warn "$file failed: $!";
=head1 dir_* functions
Returns a hashref whose keys are files relative to the given path and the values are the MD5 sum of the file or and empty string if a directory.
It recurses through the entire depth of the directory.
Symlinks to files are just addpath()d and symlinks to directories are followed.
my $dir_hashref = dir_md5($dir) or warn "$dir failed: $!";
my $dir_hashref = dir_md5_hex($dir) or warn "$dir failed: $!";
my $dir_hashref = dir_md5_base64($dir) or warn "$dir failed: $!";
=head1 url_* functions
Get the digest in various formats of the content at $url (Including, if $url points to directory, the directory listing content).
Returns undef if url fails (IE if L<LWP::UserAgent>'s $res->is_success is false)
my $digest = url_md5($url) or warn "$url failed";
my $digest = url_md5_hex($url) or warn "$url failed";
my $digest = url_md5_base64($url) or warn "$url failed";
=head1 SPECIAL SETTINGS
=head2 BINMODE
By default files are opened in binmode. If you do not want to do this you can unset it a variety of ways:
use Digest::MD5::File qw(-nobin);
or
$Digest::MD5::File::BINMODE = 0;
or at the function/method level by specifying its value as the second argument:
$md5->addpath($file,0);
my $digest = file_md5_hex($file,0);
=head2 UTF8
In some cases you may want to have your data utf8 encoded, you can do this the following ways:
use Digest::MD5::File qw(-utf8);
or
$Digest::MD5::File::UTF8 = 1;
or at the function/method level by specifying its value as the third argument for files and second for urls:
$md5->addpath($file,$binmode,1);
my $digest = file_md5_hex($file,$binmode,1);
$md5->addurl($url,1);
url_md5_hex($url,1);
It use's L<Encode>'s encode_utf8() function to do the encoding. So if you do not have Encode (pre 5.7.3) this won't work :)
=head2 NOFATALS
Instead of croaking it will return undef if you set NOFATALS to true.
You can do this two ways:
$Digest::MD5::File::NOFATALS = 1;
or the -nofatals flag:
use Digest::MD5::File qw(-nofatals);
my $digest = file_md5_hex($file) or die "$file failed";
$! is not set so its not really helpful if you die().
=head1 SEE ALSO
L<Digest::MD5>, L<Encode>, L<LWP::UserAgent>
=head1 AUTHOR
Daniel Muey, L<http://drmuey.com/cpan_contact.pl>
=head1 COPYRIGHT AND LICENSE
Copyright 2005 by Daniel Muey
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=cut