Platon Technologies
not logged in Login Registration
EnglishSlovak
open source software development celebrating 10 years of open source development! Friday, April 19, 2024

File: [Platon] / scripts / perl / wwwoffle-extract / wwwoffle-extract (download)

Revision 1.2, Mon Aug 4 18:29:05 2003 UTC (20 years, 8 months ago) by nepto


Changes since 1.1: +54 -9 lines

Support for entering directory on command line.
Correct header added into source code.

#!/usr/bin/perl -w

#
# wwwoffle-extract
#
# Developed by Peter Rockai <yenar@platon.sk>
# Copyright (c) 2003 Platon SDG, http://platon.sk/
# Licensed under terms of GNU General Public License.
# All rights reserved.
#
# Changelog:
# 2002 - created
# 04/08/2003 - support for entering directory on command line
#              (by Ondrej Jombik)
#

# $Platon: $

if ($#ARGV != 0) {
    printf "Usage: %s <directory>\n", $0;
    exit 1;
}

sub in_array
{
    my $val = shift;
    for (my $i = 0; $i < scalar(@_); $i++) {
        return 1 if ($_[$i] eq $val);
    }
    return 0;
}

$dir  = $ARGV[0];
$dir  =~ s/\/*$//g;
$out  = $dir;
$out  =~ s/^.*\/([^\/]+)$/$1/g;
@ls   = split (/\n/, ` ls "$dir/"U* `);
@dirs = ();

for $id (@ls) {
    $id =~ s/^.*\/U([^\/]+)$/$1/g;
    $f  = ` cat "$dir/U$id" `;
    # strip "protocol://server/" part of URL
    $f =~ s!^(.*?//.*?/)!!;
    $server = $1;
    print "Extracting $f\n";
    # create directory tree
    $newdir = $f;
    $newdir =~ s/^(.*\/)[^\/]*$/$1/g;
    if (! in_array($newdir, @dirs)) {
        print "Creating $out/$newdir\n";
        push @dirs, $newdir if (! system("mkdir -p '$out/$newdir'"));
    }
    $data = ` cat "$dir/D$id" `;
    # strip HTTP header
    $data =~ s/^.*?\n\x0d?\n\x0d?//ms;
    $td   = "/$newdir/";
    1 while $td =~ s!(?<=/)[^.]+?(?=/)!..!g;
    $td =~ s/\n$//;
    $td =~ s!^/!!;
    $td =~ s/^\/$//;
    # convert URL's
    $data =~ s!$server!$td!g;
    # remove remote images
    $data =~ s!<[iI][mM][gG](.*?)[sS][rR][cC]="?.*?://"?.*?>!!g;
    # strip comments
    $data =~ s@<!--.*?-->@@gsm;
    #print "converting URL's ('$server' -> '$td')\n";
    # output data
    $f = "$out/$f";
    open (OUTF, ">$f");
    print OUTF $data;
    close OUTF;
}


Platon Group <platon@platon.org> http://platon.org/
Copyright © 2002-2006 Platon Group
Site powered by Metafox CMS
Go to Top