#!/usr/bin/perl -w
# $Id: sman,v 1.24 2004/06/06 17:08:54 joshr Exp $ 
# ranked, enhanced 'apropos' emulator
use strict; 
use warnings;

use Getopt::Long qw(:config no_ignore_case);
use Sman::Util;	# for $VERSION
use SWISH::API;
use FindBin qw($Bin); 
use Sman::Config;
my $width = 80;
if (-t STDIN && -t STDOUT) {	# if we're connected to a terminal, as per Perl Cookbook p.518
	eval {
		require 'Term/Size.pm';
		($width) = (Term::Size::chars(*STDOUT{"IO"}) || (80));
	};
}

use bytes; # NOTE: swish-e won't understand UTF8/multi-byte chars

my ($max,$rankshow,$fileshow,$cnt,$help,$configfile) = (undef,0,0,0,0,"");
my $numbershow;
my $begin = 0;
my $verbose;
my $debug=0;	# can only be set from cmd line, for testing.
my $versionshow;
my $index = "";
my $repeatshow = 0;
my $quote = "";	# no quoting
my $extractshow;	# do we show extract of text?
my $digestshow;		# do we show the digest?

GetOptions( "max=i"   => \$max, 
            "index=s" => \$index,
            "config=s"=> \$configfile,
            "number"  => \$numbershow,
            "repeats" => \$repeatshow,
            "begin=i" => \$begin,
            "rank"    => \$rankshow,
            "file"    => \$fileshow,
            "extract" => \$extractshow,
            "digest" => \$digestshow,
            "verbose!"=> \$verbose,
            "help"    => \$help,
            "quote=s" => \$quote,
            "VERSION" => \$versionshow,
) or ($help = 1);

my $smanconfig = new Sman::Config();	
if ($configfile) {
	my $fileread = $smanconfig->ReadSingleConfigFile($configfile);
	if ($verbose) { print "Read config file $fileread.\n"; }
} else {
	my $fileread = $smanconfig->ReadDefaultConfigFile($verbose);
}
if ($versionshow) {	# move this to Sman/Util.pm ?
	$|++;
	my $str = Sman::Util::GetVersionString("sman",
		$smanconfig->GetConfigData("SWISHECMD") || 'swish-e');
	print "$str\n";
	exit(0);
}
if ($help || scalar(@ARGV) == 0) {
	print Usage();
	exit(0);
} 
unless(defined($max)) {
	$max = $extractshow ? 10 : 20;
}

if (defined($verbose)) { $smanconfig->SetConfigData("VERBOSE", $verbose); }

if (!$index && $smanconfig->GetConfigData("SWISHE_IndexFile")) {
	$index = $smanconfig->GetConfigData("SWISHE_IndexFile");
}
unless($index) {
	$index = "sman.index";
}
unless ($index =~ m!^/!) {
	$index = $Bin . "/" . $index;
}
print "Using index $index\n" if $verbose;

my $query = join(" ", @ARGV); 
my $handle = SWISH::API->new($index); 
my $results = $handle->Query( $query );
if ( $results->Hits() <= 0 ) {
   warn "No Results for '$query'.\n"; 
}
if ( my $error = $handle->Error( ) ) {
   warn "Error: ",  $handle->ErrorString(), "\n";
} 

my (%seen, %digests);
my $numrepeats = 0;
my @toshow = ();
while ( my $res = $results->NextResult() ) {
	$cnt++;
	next if ($begin && $cnt - $numrepeats <= $begin);
	my $isskippedrepeat = 0;
	my ($title, $sec, $desc, $digest, $manpage) = (
		$res->ResultPropertyStr( "swishtitle" ), 
		$res->ResultPropertyStr( "sec" ),
		$res->ResultPropertyStr( "desc" ),
		$res->ResultPropertyStr( "digest" ),
		$extractshow ? $res->ResultPropertyStr( "manpage" ) : "" );
	if ($digest eq "(null)") { $digest = ""; }	# fixup in case of old sman data
	$desc = "" unless defined($desc);
	chomp($desc);	# this should be done at parse time: TODO
	next unless ($title || $sec || $desc);
	unless($repeatshow) {
		my $k = "$title/$sec/$desc"; 
		$numrepeats++, $isskippedrepeat++ if (defined($seen{$k}) || defined($digests{$digest}));
		$seen{$k}++;
		$digests{$digest}++ if $digest;
	}
	next if ($isskippedrepeat);
	push(@toshow, [$title, $sec, $desc, $digest, $manpage, $res]);
	last if (scalar(@toshow) >= $max);
}
for (my $i=0; $i < scalar(@toshow) && $i < $max; $i++) {
	my ($title, $sec, $desc, $digest, $manpage, $res) = @ { $toshow[$i] };
   my $line = "";
   $line .= sprintf ("#%d ", $i + $begin + 1) if $numbershow;
   $line .= sprintf ("x%s.. ", substr($digest,0,4)) if $digestshow;
   $line .= sprintf "%4d ", $res->ResultPropertyStr( "swishrank" )
      if $rankshow;

   $line .= sprintf "%-15s (%s) ", $quote.$title.$quote, $sec;
	my $sofarlen = length($line);
	my ($docpath) = ($fileshow ?  ($res->ResultPropertyStr( "swishdocpath" )) : ("") );
	my ($docpathlen) = ($docpath ? (length($docpath)+1) : (0) );
	my $descbytes = MAX(0, $width - $sofarlen - $docpathlen);
	if (length($desc) > $descbytes) {
		$desc = substr($desc, 0, $descbytes - 3 - 2*length($quote)) || "";
		$desc =~ s/\s+$//; 	# remove trailing ws
		$desc .= "..." if (length($desc) <= $descbytes - 3 - 2*length($quote));
	}
	$desc = $quote . $desc . $quote;
	my $extrabytes = $descbytes - length($desc);
	$desc .= " " x $extrabytes if ($extrabytes > 0);
	if ($descbytes > 0) {
	   $line .= sprintf('%-' . $descbytes . 's', $desc);
	}
	$line .= sprintf(" %s", $docpath) if $fileshow; 
	print "$line\n";
	if (defined($extractshow)) {
		my $manpage = $res->ResultPropertyStr("manpage") || "";
		$manpage =~ s/^(\s*)NAME:?\s*/$1/;	# many manpages begin with NAME
		my $extract = Sman::Util::ExtractSummary($manpage, \@ARGV, " " x 20, $width - 5);
		print $extract . "\n" if $extract;
	}
} 
if ($numrepeats && $verbose) {
	print "  ($numrepeats repeats not shown.)\n";
}

sub Usage {
	return "Usage: sman [--max=#] [--rank] [--number] [--index='index'] \n" . 
          "            [--file] [--help] searchwords\n" . 
			 "Ranked freetext searches on manpages.\n" . 
			 "Options:\n" . 
			 "  --max=#:                limit number of results, default 20\n" .
			 "  --number:               show the number of each hit\n" . 
			 "  --repeats:              show repeat manpages\n" . 
			 "  --begin=#:              start showing hits at number N\n" . 
			 "  --rank:                 show the rank of each hit\n" . 
			 "  --file:                 show the source man file for each hit\n" . 
			 "  --extract:              show extraction of manpage for each hit\n" . 
			 "  --config=my-sman.conf:  a config file (specs an index file)\n" . 
			 "  --index=index:          specify an index (overrides config)\n" . 
			 "  --quote=\"'\":            specify a quoting char for output\n" . 
			 "  --verbose:              show more output\n" .
			 "  --VERSION:              show version and exit\n" . 
			 "  --help:                 this help information\n"; 
}
sub MIN {
	my ($a, $b) = @_;
	return $a if ($a < $b);
	return $b;
}
sub MAX {
	my ($a, $b) = @_;
	return $a if ($a > $b);
	return $b;
}

__END__ 

=head1 NAME

  sman - Perl tool for searching man pages

=head1 SYNOPSIS

  % sman boot disk
    # searches for man pages about 'boot disk'

  % sman -m 10 -f -r linux kernel
    # show first 10 hits about the linux kernel
    # with the manpage's Rank and Filename

  % sman '(linux and kernel and module) or (eepro100 and ipchains)'
    # a more complex query

  % sman swishtitle=linux and desc=kernel
    # where title contains 'linux' and description contains 'kernel'

=head1 DESCRIPTION 

Sman is the Searcher for Man pages. It depends on an index which is built by
sman-update and by default resides in /var/lib/sman/sman.index.
 
Both sman and sman-update search for the first configuration file named sman.conf in /etc, 
/usr/local/etc/, $HOME, or the directory with sman. If no sman.conf file is found 
(or specified through the --config option), then the default 
configuration in /usr/local/etc/sman-defaults.conf will be used.

NOTE: In all cases command line options take precendence over directives read from
configuration files.
	
=head1 AUTHOR

Josh Rabinowitz

=head1 SEE ALSO

the output of 'sman --help',
L<sman>, L<sman-update>, L<sman.conf>, L<SWISH-RUN>, 
and the unrelated but similar L<man -k>, L<apropos> and 
L<whatis>

=cut

