Root/scripts/cleanfile

1#!/usr/bin/perl -w
2#
3# Clean a text file -- or directory of text files -- of stealth whitespace.
4# WARNING: this can be a highly destructive operation. Use with caution.
5#
6
7use bytes;
8use File::Basename;
9
10# Default options
11$max_width = 79;
12
13# Clean up space-tab sequences, either by removing spaces or
14# replacing them with tabs.
15sub clean_space_tabs($)
16{
17    no bytes; # Tab alignment depends on characters
18
19    my($li) = @_;
20    my($lo) = '';
21    my $pos = 0;
22    my $nsp = 0;
23    my($i, $c);
24
25    for ($i = 0; $i < length($li); $i++) {
26    $c = substr($li, $i, 1);
27    if ($c eq "\t") {
28        my $npos = ($pos+$nsp+8) & ~7;
29        my $ntab = ($npos >> 3) - ($pos >> 3);
30        $lo .= "\t" x $ntab;
31        $pos = $npos;
32        $nsp = 0;
33    } elsif ($c eq "\n" || $c eq "\r") {
34        $lo .= " " x $nsp;
35        $pos += $nsp;
36        $nsp = 0;
37        $lo .= $c;
38        $pos = 0;
39    } elsif ($c eq " ") {
40        $nsp++;
41    } else {
42        $lo .= " " x $nsp;
43        $pos += $nsp;
44        $nsp = 0;
45        $lo .= $c;
46        $pos++;
47    }
48    }
49    $lo .= " " x $nsp;
50    return $lo;
51}
52
53# Compute the visual width of a string
54sub strwidth($) {
55    no bytes; # Tab alignment depends on characters
56
57    my($li) = @_;
58    my($c, $i);
59    my $pos = 0;
60    my $mlen = 0;
61
62    for ($i = 0; $i < length($li); $i++) {
63    $c = substr($li,$i,1);
64    if ($c eq "\t") {
65        $pos = ($pos+8) & ~7;
66    } elsif ($c eq "\n") {
67        $mlen = $pos if ($pos > $mlen);
68        $pos = 0;
69    } else {
70        $pos++;
71    }
72    }
73
74    $mlen = $pos if ($pos > $mlen);
75    return $mlen;
76}
77
78$name = basename($0);
79
80@files = ();
81
82while (defined($a = shift(@ARGV))) {
83    if ($a =~ /^-/) {
84    if ($a eq '-width' || $a eq '-w') {
85        $max_width = shift(@ARGV)+0;
86    } else {
87        print STDERR "Usage: $name [-width #] files...\n";
88        exit 1;
89    }
90    } else {
91    push(@files, $a);
92    }
93}
94
95foreach $f ( @files ) {
96    print STDERR "$name: $f\n";
97
98    if (! -f $f) {
99    print STDERR "$f: not a file\n";
100    next;
101    }
102
103    if (!open(FILE, '+<', $f)) {
104    print STDERR "$name: Cannot open file: $f: $!\n";
105    next;
106    }
107
108    binmode FILE;
109
110    # First, verify that it is not a binary file; consider any file
111    # with a zero byte to be a binary file. Is there any better, or
112    # additional, heuristic that should be applied?
113    $is_binary = 0;
114
115    while (read(FILE, $data, 65536) > 0) {
116    if ($data =~ /\0/) {
117        $is_binary = 1;
118        last;
119    }
120    }
121
122    if ($is_binary) {
123    print STDERR "$name: $f: binary file\n";
124    next;
125    }
126
127    seek(FILE, 0, 0);
128
129    $in_bytes = 0;
130    $out_bytes = 0;
131    $blank_bytes = 0;
132
133    @blanks = ();
134    @lines = ();
135    $lineno = 0;
136
137    while ( defined($line = <FILE>) ) {
138    $lineno++;
139    $in_bytes += length($line);
140    $line =~ s/[ \t\r]*$//; # Remove trailing spaces
141    $line = clean_space_tabs($line);
142
143    if ( $line eq "\n" ) {
144        push(@blanks, $line);
145        $blank_bytes += length($line);
146    } else {
147        push(@lines, @blanks);
148        $out_bytes += $blank_bytes;
149        push(@lines, $line);
150        $out_bytes += length($line);
151        @blanks = ();
152        $blank_bytes = 0;
153    }
154
155    $l_width = strwidth($line);
156    if ($max_width && $l_width > $max_width) {
157        print STDERR
158        "$f:$lineno: line exceeds $max_width characters ($l_width)\n";
159    }
160    }
161
162    # Any blanks at the end of the file are discarded
163
164    if ($in_bytes != $out_bytes) {
165    # Only write to the file if changed
166    seek(FILE, 0, 0);
167    print FILE @lines;
168
169    if ( !defined($where = tell(FILE)) ||
170         !truncate(FILE, $where) ) {
171        die "$name: Failed to truncate modified file: $f: $!\n";
172    }
173    }
174
175    close(FILE);
176}
177

Archive Download this file



interactive