package WWWDict; use warnings; use strict; use WWW::Mechanize; use HTML::Strip; =head1 NAME WWW::Dictionary - Interface with www.dictionary.com =head1 VERSION Version 0.01 =cut our $VERSION = '0.01'; our @unwanted; BEGIN { our @unwanted = ( 'CancerWEB\'s On-line Medical Dictionary', 'Download Now or Buy the Book', 'in Acronym Finder', ); } =head1 SYNOPSIS use WWW::Dictionary; my $dictionary = WWW::Dictionary->new(); my $meaning = $dictionary->meaning( $word ); =head1 FUNCTIONS =head2 new Creates a new WWW::Dictionary object. If passed an expression, sets that expression to the current one. my $dictionary = WWW::Dictionary->new(); or my $dictionary = WWW::Dictionary->new('current expression'); =cut sub new { my $self = shift; my $expression = shift || ''; my %dictionary = ( 'current' => $expression, 'dictionary' => {}, ); bless \%dictionary => $self; } =head2 set_expression Sets the current expression to look for (doesn't look, merely sets the expression). $dictionary->set_expression('new expression'); Returns the same expression. =cut sub set_expression { my $self = shift; my $expression = shift; if ($expression) { $self->{'current'} = $expression; } return $expression; } =head2 get_expression Returns the current expression. my $expression = $dictionary->get_expression(); =cut sub get_expression { my $self = shift; return $self->{'current'}; } =head2 get_meaning Returns the meaning of the current expression by fetching from www.dictionary.com. If the expression has already been fetched (if it still has the information stored), returns what is already on memory. my $meaning = $dictionary->get_meaning(); You can also pass a new expression, which is set to be the current expression before fetching is made: my $meaning = $dictionary->get_meaning('some other expression'); =cut sub get_meaning { my $self = shift; my $expression = shift; if ($expression) { $self->set_expression($expression); } else { $expression = $self->get_expression(); } if (defined $self->{'dictionary'}->{$expression}) { return $self->{'dictionary'}->{$expression}; } else { # retrieve the webpage my $mech = WWW::Mechanize->new(); $mech->get( "http://dictionary.reference.com/search?q=$expression" ); my $cont = $mech->content; # if there's no meaning if ( $cont =~ /No entry found for $expression<\/i>./ ) { $self->set_meaning( $expression, "Entry not found"); } # if there's a meaning else { my $clean_text = parseWebpage($cont); # store the meaning $self->set_meaning( $expression, $clean_text); } return $self->{'dictionary'}->{$expression}; } } =head2 set_meaning Sets a meaning in the object dictionary. $dictionary->set_meaning( $word, $meaning ); From this point on (until a C is called), retrieving the meaning of $word will return whatever was on $meaning. =cut sub set_meaning { my $self = shift; my ($expression, $meaning) = @_; if ($expression) { $self->{'dictionary'}->{$expression} = $meaning; } else { return undef; } } =head2 get_dictionary Returns the current dictionary inside the object. my %dictionary = %{ $dictionary->get_dictionary }; =cut sub get_dictionary { my $self = shift; return $self->{'dictionary'}; } =head2 reset_dictionary Resets the current dictionary. $dictionary->reset_dictionary; =cut sub reset_dictionary { my $self = shift; for (keys %{$self->{'dictionary'}}) { delete $self->{'dictionary'}->{$_}; } } =head1 Internal Functions =head2 parseWebpage Parses the HTML into a pretty definition =cut sub parseWebpage { my ($text, $t, $definition, $p) = @_; utf8::decode($text); $p = HTML::TokeParser->new( \$text ); # Skip down to the begining of the definition while($t = $p->get_tag()) { last if ( ref($t->[1]) and $t->[1]->{'class'} and $t->[1]->{'class'} eq 'luna-Ent') } while($t = $p->get_tag()) { last if ( ref($t->[1]) and $t->[1]->{'class'} and $t->[1]->{'class'} eq 'pbk') } # Build the definition while($t = $p->get_tag()) { # The tail class indicates the definition is done last if ( ref($t->[1]) and $t->[1]->{'class'} and $t->[1]->{'class'} eq 'tail'); my $txt = $p->get_trimmed_text(); if ($txt =~ /.noun/ or $txt =~ /.verb/ or $txt =~ /.adjective/) { $definition .= "\n" if ($definition); $definition .= $txt; } elsif ($txt =~ /^\d+\.$/) { $definition .= "\n" if ($definition); $definition .= "$txt"; } else { $definition .= "$txt "; } } return $definition; } =head1 AUTHOR Jose Castro, C<< >> =head1 BUGS Please report any bugs or feature requests to C, or through the web interface at L. I will be notified, and then you'll automatically be notified of progress on your bug as I make changes. =head1 SUPPORT You can find documentation for this module with the perldoc command. perldoc WWW::Dictionary You can also look for information at: =over 4 =item * AnnoCPAN: Annotated CPAN documentation L =item * CPAN Ratings L =item * RT: CPAN's request tracker L =item * Search CPAN L =back =head1 COPYRIGHT & LICENSE Copyright 2005 Jose Castro, all rights reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut 1; # End of WWW::Dictionary