#!/usr/bin/perl
use strict;

# Attempts to guess the file type of an OLE container file and rename it
# accordingly.
# Depends on an ancient perl4 script from 1998 named laola.pl to parse the
# file. It would be better to use the OLE::Storage module from CPAN, but that
# module is broken at the time of writing.

# This is not a hash, because it is important that the extensions are tried in
# order of importance.
my @extensions = (
    WordDocument       => "doc",
    PowerPointDocument => "ppt",
    Workbook           => "xls", # New Excel versions
    StarWriterDocument => "sdw",
    StarCalcDocument   => "sdc",
    StarDrawDocument3  => "sdd", # Could also be .sda
    Book               => "xls", # Excel 5.0
    Quill              => "pub", # Microsoft Publisher
    PP40               => "pot", # PowerPoint Template, or...?
    WPG20              => "wordperfect_unknown", # What is this?
    PerfectOffice_MAIN => "wb3", # Are all such files from Quattro Pro? Some
                                 # may be .shw presentations.
    EquationNative     => "equation",
    StarBaseDocument   => "starbase",
    WorkspaceState     => "opt", # MS Visual Studio
    SentenceExceptList => "staroffice_dictionary",
    StarBASIC          => "starbasic",
    SIG1               => "staroffice_unknown", # What is this?
);
my %extensions = @extensions; # for quick lookup

# When invoked by magicrescue, laola.pl should be in the PATH
push @INC, grep /tools/, split /:/, $ENV{PATH};
require 'laola.pl';

my $file = $ARGV[0];
unless (@ARGV and -f $file) {
    die "Usage: ole_rename.pl FILENAME\n";
}

# LAOLA does not do much sanity checking, a corrupted file can send it into
# a memory-exhausting loop. Using alarm here is basically a hack for systems
# where magicrescue can't do setrlimit to set max memory usage.
$SIG{ALRM} = sub { die "Timed out" };
alarm 10;

my $extension = "";
my $status = laola_open_document($file);
$status eq "ok" or die "laola_open_document failed: $status\n";

foreach my $pps (laola_get_dirhandles(0)) {
    my $name = laola_pps_get_name($pps);
    $name =~ s/[^-\w]//g;

    next unless $extensions{$name};
    for (my $i = 0; $i < @extensions; $i += 2) {
	if ($name eq $extensions[$i]) {
	    $extension = $extensions[$i+1];
	    last;
	    # we keep looking even though we have found the format, because
	    # the real format always seems to be closest to the end.
	}
    }
}
laola_close_document();

alarm 0;

if ($extension) {
    print "RENAME $extension";
}

