#!/usr/bin/perl -w
# $Id: sman,v 1.46 2005/08/29 16:10:07 joshr Exp $ 
# ranked, enhanced 'apropos' emulator
use strict; 
use warnings;

use Getopt::Long qw(:config no_ignore_case);
use Sman::Util;	# for $VERSION
use Sman::IndexVersion;
use FindBin qw($Bin); 
use Sman::Config;
use bytes; # NOTE: swish-e won't understand UTF8/multi-byte chars

my ($max,$rankshow,$fileshow,$cnt,$help,$configfile) = (undef,0,0,0,0,"");
my $numbershow;
my $begin = 0;
my $verbose;
my $debug=0;	# undocumented, and can only be set from cmd line, for testing.
my $versionshow;
my $index = "";
my $repeatshow = 0;
my $quote = "";	# no quoting
my $extractshow;	# do we show extract of text?
my $digestshow;		# do we show the digest?


GetOptions( "max=i"   => \$max, 
            "index=s" => \$index,
            "config=s"=> \$configfile,
            "number"  => \$numbershow,
            "repeats" => \$repeatshow,
            "begin=i" => \$begin,
            "rank"    => \$rankshow,
            "file"    => \$fileshow,
            "extract" => \$extractshow,
            "digest" => \$digestshow,
            "Debug" => \$debug,
            "verbose!"=> \$verbose,
            "help"    => \$help,
            "quote=s" => \$quote,
            "VERSION" => \$versionshow,
) or ($help = 1);

my ($width, $height) = (80, 24);
if (-t STDIN && -t STDOUT) {	# if we're connected to a terminal, as per Perl Cookbook p.518
	eval {
		require 'Term/Size.pm';		# we 'require' this so we don't require it :)
		($width, $height) = (Term::Size::chars(*STDOUT{"IO"}));	
		# Term::Size::chars doesn't seem to work on OS X
		print "sman: Got width, height of $width, $height\n" if $debug;
		if($width && $height) { 
			$height = MAX(1, MIN(20, $height-3)); 
		} else {
			($width, $height) = (80, 24); 
		}
	};
}


my $smanconfig = new Sman::Config();	
if ($configfile) {
	my $fileread = $smanconfig->ReadSingleConfigFile($configfile);
	if ($debug) { print "sman: Read config file $fileread.\n"; }
} else {
	my $fileread = $smanconfig->ReadDefaultConfigFile($verbose);
}
unless($max) {	
	# unless user set a max number of rows to show,
	# show 1/2 as many rows (because there's two lines for each hit)
	($max) = (($extractshow) ? ($height/2) : ($height));	
}

if (defined($verbose)) { $smanconfig->SetConfigData("VERBOSE", $verbose); }

if (!$index && $smanconfig->GetConfigData("SWISHE_IndexFile")) {
	$index = $smanconfig->GetConfigData("SWISHE_IndexFile");
}
unless($index) {
	$index = "sman.index";
}
unless (-f $index || $index =~ m!/!) {
	$index = $Bin . "/" . $index;
}
if ($help) {	# the search comes in through @ARGV
	print Usage();
	exit(0);
} 
Sman::Util::CheckSwisheVersion();

if ($versionshow) {	# move this to Sman/Util.pm ?
	$|++;
	my $str = Sman::Util::GetVersionString("sman",
		$smanconfig->GetConfigData("SWISHECMD") || 'swish-e');
	print "$str\n";
	print Sman::Util::GetIndexDescriptionString( $index || "/dev/null");
	exit(0);
}

my $index_versions = new Sman::IndexVersion( $smanconfig );
my $versions_hashref = $index_versions->get_versions();
#if ($debug) { print Data::Dumper::Dumper( $versions_hashref ); }
my ($rankscheme1_ok) = 
	(exists($versions_hashref->{SMAN_DATA_VERSION}) && 
		$versions_hashref->{SMAN_DATA_VERSION});
# will have values like 'VERSION' and 'SMAN_DATA_VERSION'

if ($verbose) {
	print Sman::Util::GetIndexDescriptionString( $index || "/dev/null" );
}

my ($mtime) =  int((-M $index || 0) / 30);
if ($mtime >= 1) {
	my ($month) = ($mtime > 1 ? "months" : "month");
	warn "sman: Index $index $mtime $month old. Re-run sman-update!\n";
}

my $query = join(" ", @ARGV); 
my $handle;
my $results;
eval { 
	require SWISH::API;	# defer to here so we can give a nicer error message
	$handle = SWISH::API->new($index); 
	check_for_swishe_error($handle);
	if ($rankscheme1_ok) {
		print "Setting rank scheme\n" if $debug;
		$handle->RankScheme(1);
		check_for_swishe_error($handle);
	}

	print "Setting query \n" if $debug; 
	$results = $handle->Query( $query );	
	# this aborts (uncatchably in 2.4.3) if rankscheme is 1,
    # unless index was built w/ IgnoreTotalWordCountWhenRanking set to 0  
	check_for_swishe_error($handle);

	print "Checking num hits\n" if $debug;
	if ( $results->Hits() <= 0 ) {
	   warn "sman: No Results for '$query'.\n"; 
	}
	check_for_swishe_error($handle);
};
if ($@) { 
		warn "sman: index not updated for new rankscheme\n" if $verbose; 
		$handle->RankScheme(0); 
}

my (%seen, %digests);
my $numrepeats = 0;
my @toshow = ();
die "sman: Error: can't get results from swish-e. Bad index?\n" unless $results;
while ( my $res = $results->NextResult() ) {
	check_for_swishe_error($handle);
	$cnt++;
	next if ($begin && $cnt - $numrepeats <= $begin);
	my $isskippedrepeat = 0;
	my ($title, $sec, $desc, $digest, $manpage) = (
		$res->ResultPropertyStr( "swishtitle" ), 
		$res->ResultPropertyStr( "sec" ),
		$res->ResultPropertyStr( "desc" ),
		$res->ResultPropertyStr( "digest" ),
		$extractshow ? $res->ResultPropertyStr( "manpage" ) : "" );
	if ($digest eq "(null)") { $digest = ""; }	# fixup in case of old sman data
	$desc = "" unless defined($desc);
	chomp($desc);	# this should be done at parse time: TODO
	next unless ($title || $sec || $desc);
	unless($repeatshow) {
		my $k = "$title/$sec/$desc"; 
		$numrepeats++, $isskippedrepeat++ if (defined($seen{$k}) || defined($digests{$digest}));
		$seen{$k}++;
		$digests{$digest}++ if $digest;
	}
	next if ($isskippedrepeat);
	push(@toshow, [$title, $sec, $desc, $digest, $manpage, $res]);
	last if (scalar(@toshow) >= $max);
}
for (my $i=0; $i < scalar(@toshow) && $i < $max; $i++) {
	my ($title, $sec, $desc, $digest, $manpage, $res) = @ { $toshow[$i] };
	my $line = "";
	$line .= sprintf ("#%d ", $i + $begin + 1) if $numbershow;
	$line .= sprintf ("x%s.. ", substr($digest,0,4)) if $digestshow;
	$line .= sprintf "%4d ", $res->ResultPropertyStr( "swishrank" )
      if $rankshow;

   $line .= sprintf "%-15s (%s) ", $quote.$title.$quote, $sec;
	my $sofarlen = length($line);
	my ($docpath) = ($fileshow ?  ($res->ResultPropertyStr( "swishdocpath" )) : ("") );
	my ($docpathlen) = ($docpath ? (length($docpath)+1) : (0) );
	my $descbytes = MAX(0, $width - $sofarlen - $docpathlen);
	if (length($desc) > $descbytes) {
		$desc = substr($desc, 0, $descbytes - 3 - 2*length($quote)) || "";
		$desc =~ s/\s+$//; 	# remove trailing ws
		$desc .= "..." if (length($desc) <= $descbytes - 3 - 2*length($quote));
	}
	$desc = $quote . $desc . $quote; 	# if quote is empty this makes no change
	my $extrabytes = $descbytes - length($desc);	# how many spaces to fill?
	$desc .= " " x $extrabytes if ($extrabytes > 0);
	if ($descbytes > 0) {
	   $line .= sprintf('%-' . $descbytes . 's', $desc);
	}
	$line .= sprintf(" %s", $docpath) if $fileshow; 
	print "$line\n";
	if ($extractshow) {
		my $manpage = $res->ResultPropertyStr("manpage") || "";
		if ($manpage eq "(null)") {
			$extractshow = 0;
		} else {
			$manpage =~ s/^(\s*)NAME:?\s*/$1/;	
				# many manpages begin with NAME blah blah blah, strip that off
				# because that data is in the other fields returned
			my $extract = Sman::Util::ExtractSummary($manpage, \@ARGV, " " x 20, $width - 3);
			print $extract . "\n" if $extract;
		}
	}
} 
if ($verbose) {
	printf "  (Total %d hits found", $results->Hits();
	print ", $numrepeats repeats not shown" if ($numrepeats);
	print ")\n";
}

sub check_for_swishe_error {
	my $handle = shift;
	if ( my $error = $handle->Error( ) ) {
		my $errstr = $handle->ErrorString();
		my $extra = "";
		if ($errstr =~ /is empty/) {
			$extra = "(perhaps you need to run sman-update?)";
		}
		die "sman: Error: ",  join(": ", "'" . $handle->ErrorString() . "'", $extra) .  "\n";
	} 
}

sub Usage {
	return "Usage: sman [--max=#] [--rank] [--number] [--index='index'] \n" . 
          "            [--file] [--help] [--repeats] [--begin=#'] [--config=file]\n" .
		  "            [--quote='\"'] [--verbose] [--VERSION] searchword [...]\n" . 
			 "Ranked freetext searches on manpages.\n" . 
			 "Options:\n" . 
			 "  --max=#:                limit number of results, default 20\n" .
			 "  --rank:                 show the rank of each hit\n" . 
			 "  --number:               show the number of each hit\n" . 
			 "  --index=index:          specify an index (overrides config)\n" . 
			 "  --file:                 show the source man file for each hit\n" . 
			 "  --help:                 this help information\n" .
			 "  --repeats:              show repeat manpages\n" . 
			 "  --begin=#:              start showing hits at number N\n" . 
			 "  --config=my-sman.conf:  a config file (specs an index file)\n" . 
			 "  --quote='\"':            specify a quoting char for output\n" . 
			 "  --extract:              show extraction of manpage for each hit\n" . 
			 "  --verbose:              show more output\n" .
			 "  --VERSION:              show version and exit\n"; 
}
sub MAX {
	my $max = shift;
	for (@_) { $max = $_ if $_ > $max; }
	return $max;
}
sub MIN {
	my $min = shift;
	for (@_) { $min = $_ if $_ < $min; }
	return $min;
}

__END__ 

=head1 NAME

sman - Perl tool for searching man pages

=head1 SYNOPSIS

See 'sman --help'

  % sman -m 10 --file --rank linux kernel
    # show first 10 hits about the linux kernel
    # with the manpage's Rank and Filename

  % sman '(linux and kernel and module) or (eepro100 and ipchains)'
    # a more complex query

=head1 DESCRIPTION 

Sman is a Searcher for Man pages. 

=head1 AUTHOR

Josh Rabinowitz <joshr>

=head1 SEE ALSO

L<sman>, L<sman-update>, L<sman.conf>

=cut

