Diff: %s

#!/bin/bash # # Synopsis: # Show changes between 2 internet-drafts using changebars or html # side-by-side diff. # # Usage: # rfcdiff [options] file1 file2 # # rfcdiff takes two RFCs or Internet-Drafts in text form as input, and # produces output which indicates the differences found in one of various # forms, controlled by the options listed below. In all cases, page # headers and page footers are stripped before looking for changes. # # --html Produce side-by-side .html diff (default) # # --chbars Produce changebar marked .txt output # # --diff Produce a regular diff output # # --wdiff Produce paged wdiff output # # --hwdiff Produce html-wrapped coloured wdiff output # # --browse Show html output in browser # # --keep Don't delete temporary workfiles # # --version Show version # # --help Show this help # # --info "Synopsis|Usage|Copyright|Description|Log" # Show various info # # --width N Set a maximum width of N characters for the # display of each half of the old/new html diff # # --linenum Show linenumbers for each line, not only at the # start of each change section # # --body Strip document preamble (title, boilerplate and # table of contents) and postamble (Intellectual # Property Statement, Disclaimer etc) # # --nostrip Don't strip headers and footers (or body) # # --ab-diff Before/After diff, suitable for rfc-editor # --abdiff # # --stdout Send output to stdout instead to a file # # # Copyright: # ----------------------------------------------------------------- # # Copyright 2002 Henrik Levkowetz # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # ----------------------------------------------------------------- # # Description: # # The purpose of this program is to compare two versions of an # internet-draft, and as output produce a diff in one of several # formats: # # - side-by-side html diff # - paged wdiff output in a text terminal # - a text file with changebars in the left margin # - a simple unified diff output # # In all cases, internet-draft headers and footers are stripped before # generating the diff, to produce a cleaner diff. # # It is called as # # rfcdiff first-file second-file # # The latest version is available from # http://tools.ietf.org/tools/rfcdiff/ # # Log: # 17 Jul 2007 v1.34 - Added quotes around file names to be able to # handle filenames containing whitespace, and some # other changes associated with files containing # whitespace and running on *BSD. # # 01 Jun 2006 v1.32 - changed 'para.' to 'paragraph' in abdiff. # Added new regexps for page headers in strip. # # 12 May 2006 v1.31 - Fixed a bug where the last diff in # --abdiff mode would not be output. Tweaked the # regexp which recognizes section headers in # abdiff(). # # 05 May 2006 v1.30 - Changed -U to -U to work on Macs # # 20 Dec 2005 v1.29 - Added --hwdiff switch, to give single- # columnt html output, as in Bill Fenner's htmlwdiff # # 20 Dec 2005 v1.28 - Changed font-size units from pt to em. # Made white-space ignoring somewhat less aggressive. # # 04 Aug 2005 v1.27 - Added -f option to some mv commands to # handle read-only input files. Added statistics # output. # # 29 Jul 2005 v1.26 - Removed spurious empty output line. # Added guard against files beginning with "-" # for mv. # # 11 Jul 2005 v1.25 - Tweaked the regexps for --bodystrip and # recognition of wdiff version, both thanks to # Bruce Lilly. Tweak to handle directory names # with whitespace, thanks to Thomas Morin. # # 16 Jun 2005 v1.24 - changed to use uppercase "-U" option to # diff, for MacOS X compatibility. (Thanks to # Lars Eggert.) # # 24 Jan 2005 v1.23 - Fixed a bug where html entity codes was # broken up by diff markup. # # 06 Jan 2005 v1.22 - Tweaked a style setting # # 10 Dec 2004 v1.21 - Added anchor for each diff, in order # for it to be possible to create URLs refering # to individual diffs. # # 09 Dec 2004 v1.20 - fixed the same bug again, a bit better # this time :-) # # 06 Dec 2004 v1.19 - fixed a bug introduced in v1.18, where # lines of diffs with linebreaks would not be # shown when using the --width option. # # 02 Dec 2004 v1.18 - On some systems, awk is the original # Aho, Weinberger, Kernighan awk. We need gawk # or at least nawk, so now we look for those. # If not found, we try to run with awk anyway, # but that will probably not work... # Nawk won't accept continuation lines inside # strings, so all multiline strings broken up. # Also changed the linebreaking and page # formatting somewhat. # # 16 Nov 2004 v1.17 - Minor bugfix for cygwin - "cd -" was # reported not to work; working around that. # # 10 Nov 2004 v1.16 - Minor bugfix - properly removing temp # outfile when using --stdout # # 20 Sep 2004 v1.15 - Improved the page header/footer # stripping to handle more cases of paragraphs # split over page breaks, and a greater variety # of whitespace in the page break. Added some # diagnostic information to the generated html # diff, in comments. Added --stdout option. # # 04 Sep 2004 v1.14 - Cleaned up html in a few places so it's # now clean again. # # 11 Jun 2004 v1.13 - Tweaked the regexp to match section # numbers slightly. Added -wd option to diff # also for the ab (rfc-editor before/after) diff. # # 07 Jun 2004 v1.12 - Added --abdiff option, to produce # OLD/NEW output suitable for the RFC-Editor # # 03 Apr 2004 v1.11 - Added --nostrip option. Fixed a bug # where a diff on the very first line would not # be shown. # # 17 Mar 2004 v1.10 - Minor tweaks to handle malformed drafts # better. Added firefox to browser list. # # 23 Feb 2004 v1.09 - Started work on an Old/New diff mode, # suitable for diff summaries to mailing lists, # issue trackers, reviewers & rfc-editor. Not # complete yet. # # 22 Feb 2004 v1.08 - Added --body option to exclude # boilerplate and table of contents changes. # # 21 Feb 2004 v1.07 - Added diagnostic message when wdiff not # found or wdiff version not recognised # # 01 Feb 2004 v1.06 - Added --linenum option to provide line numbers # on each line. Simplified linebreaking markup. # Some mild refactoring. # # 29 Jan 2004 v1.05 - Now providing page and line numbers for # both old and new document versions at the start # of each change section. # The line-breaking code is still buggy... # # 25 Jan 2004 v1.04 - Added line numbers for the case when no page # numbers are available # # 24 Jan 2004 v1.03 - Fixed a line coloring bug introduced in v1.02 # # 22 Jan 2004 v1.02 - Added line-breaking functionality through # the --width option. Experimental -- may be # buggy. # # 17 Dec 2003 v1.01 - Fixed a bug where diffs with no text # occurring after the last change would be shown # without the last change. Added some debug # functionality to be able to track this one down. # # 14 Dec 2003 v1.00 - Bumped version number to 1.00 # # 6 Dec 2003 v0.42 - Added html diff output for the # identical files case. # # 5 Dec 2003 v0.41 - Added --info option # # 25 Nov 2003 v0.40 - Added the use of wget (if available) to # pull down remote source files (http: or ftp:) # # 20 Nov 2003 v0.39 - Added 'End of changes' line at the end # of html diff. Added a test on wdiff producing # reasonable output with --version option, to # avoid old broken wdiff versions. # # 20 Nov 2003 v0.38 - Added --keep option, to keep temporary # files. # # 20 Nov 2003 v0.37 - Added --nowdiff option, to make --html # *not* use wdiff even if it is available. # # 20 Nov 2003 v0.36 - Added --browse option, to optionally # start a browser to show html diff output. # Refined how we look for a wdiff binary. # # 18 Nov 2003 v0.35 - minor tweaks to header/footer stripping # regexps. Changed color marking of differences. # Other minor tweaks and comment updates. # # 16 Nov 2003 v0.34 - removed listing of environment when no # files were given on the command line. Added # help text. Added the possibility of using wdiff to get # the changed words in a change block highlighted. # # 23 Oct 2003 v0.33 - using different dir's for the stripped # files, to be able to diff files with the same # basename. # # 2 Sep 2003 v0.32 - fixed spurious error message when using # --wdiff option # # 1 Sep 2003 v0.31 - not touching the original files, using # temporary directory for work files. # # 29 Aug 2003 v0.30 - Removed explicit font size for output. # Changed regexp for page start (now accepting space # in "Internet Draft". # # 16 Apr 2003 v0.29 - added wdiff support # # 6 Mar 2003 v0.28 - added --html, --chbars and --diff switches # # 3 Mar 2003 v0.27 - Changed page regexp to accept lowercase # 'p'. # # 2 Feb 2003 Expanded to provide side-by-side html diff, in # addition to changebars in .txt files # # End: # export version="1.34" export progdate="17 Jul 2007 15:42:59" export prelines="10" export basename=$(basename $0) export workdir="/tmp/$basename-$$" export pagecache1="$workdir/pagecache1" export pagecache2="$workdir/pagecache2" # ---------------------------------------------------------------------- # Utility to find an executable # ---------------------------------------------------------------------- lookfor() { for b in "$@"; do found=$(which "$b" 2>/dev/null) if [ -n "$found" ]; then if [ -x "$found" ]; then echo "$found" return fi fi done } AWK=$(lookfor gawk nawk awk) # ---------------------------------------------------------------------- # Strip headers footers and formfeeds from infile to stdout # ---------------------------------------------------------------------- strip() { $AWK ' { gsub(/\r/, ""); } { gsub(/[ \t]+$/, ""); } /\[?[Pp]age [0-9ivx]+\]?[ \t\f]*$/{ match($0, /\[?[Pp]age [0-9ivx]+\]?/); print substr($0, RSTART+6, RLENGTH-7), outline > ENVIRON["pagecache" ENVIRON["which"]] next; } /^[ \t]*\f/ { newpage=1; next; } /^ *Internet.Draft.+[12][0-9][0-9][0-9] *$/ { newpage=1; next; } /^ *INTERNET.DRAFT.+[12][0-9][0-9][0-9] *$/ { newpage=1; next; } /^ *Draft.+[12][0-9][0-9][0-9] *$/ { newpage=1; next; } /^RFC.+[0-9]+$/ { newpage=1; next; } /^draft-[-a-z0-9_.]+.*[0-9][0-9][0-9][0-9]$/ { newpage=1; next; } /(Jan|Feb|Mar|March|Apr|April|May|Jun|June|Jul|July|Aug|Sep|Oct|Nov|Dec) (19[89][0-9]|20[0-9][0-9]) *$/ && pagelength < 3 { newpage=1; next; } newpage && $0 ~ /^ *draft-[-a-z0-9_.]+ *$/ { newpage=1; next; } /^[^ \t]/ { sentence=1; } /[^ \t]/ { if (newpage) { if (sentence) { outline++; print ""; } } else { if (haveblank) { outline++; print ""; } } haveblank=0; sentence=0; newpage=0; } /[.:][ \t]*$/ { sentence=1; } /^[ \t]*$/ { haveblank=1; next; } { outline++; print; } ' "$1" } # ---------------------------------------------------------------------- # Strip preamble (title, boilerplate and table of contents) and # postamble (Intellectual Property Statement, Disclaimer etc) # ---------------------------------------------------------------------- bodystrip() { $AWK ' /^[ \t]*Acknowledgment/ { inbody = 0; } /^(Full )*Copyright Statement$/ { inbody = 0; } /^[ \t]*Disclaimer of Validid/ { inbody = 0; } /^[ \t]*Intellectual Property/ { inbody = 0; } /^Abstract$/ { inbody = 0; } /^Table of Contents$/ { inbody = 0; } /^1.[ \t]*Introduction$/ { inbody = 1; } inbody { print; } ' "$1" } # ---------------------------------------------------------------------- # From two words, find common prefix and differing part, join descriptively # ---------------------------------------------------------------------- worddiff() { $AWK ' BEGIN { w1 = ARGV[1] w2 = ARGV[2] format = ARGV[3] do { if (substr(w1,1,1) == substr(w2,1,1)) { w1 = substr(w1,2) w2 = substr(w2,2) } else { break; } prefixlen++; } while (length(w1) && length(w2)) prefix = substr(ARGV[1],1,prefixlen); do { l1 = length(w1); l2 = length(w2); if (substr(w1,l1,1) == substr(w2,l2,1)) { w1 = substr(w1,1,l1-1) w2 = substr(w2,1,l2-1) } else { break; } } while (l1 && l2) suffix = substr(ARGV[1], prefixlen+length(w1)) printf format, prefix, w1, w2, suffix; } ' "$1" "$2" "$3" } # ---------------------------------------------------------------------- # Generate a html page with side-by-side diff from a unified diff # ---------------------------------------------------------------------- htmldiff() { $AWK ' BEGIN { FS = "[ \t,]"; # Read pagecache1 maxpage[1] = 1 pageend[1,0] = 2; while ( getline < ENVIRON["pagecache1"] > 0) { pageend[1,$1] = $2; if ($1+0 > maxpage[1]) maxpage[1] = $1+0; } # Read pagecache2 maxpage[2] = 1 pageend[2,0] = 2; while ( getline < ENVIRON["pagecache2"] > 0) { pageend[2,$1] = $2; if ($1+0 > maxpage[2]) maxpage[2] = $1+0; } wdiff = ENVIRON["wdiffbin"] base1 = ENVIRON["base1"] base2 = ENVIRON["base2"] optwidth = ENVIRON["optwidth"] optnums = ENVIRON["optnums"] cmdline = ENVIRON["cmdline"] gsub("--", "- -", cmdline) ENVIRON["cmdline"] = cmdline header(base1, base2) difflines1 = 0 difflines2 = 0 } function header(file1, file2) { printf "" \ " \n" \ " \n" \ "\n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " Diff: %s - %s \n" \ " \n" \ " \n" \ " \n" \ " \n" \ " \n" \ "", ENVIRON["version"], ENVIRON["cmdline"], ENVIRON["uname"], ENVIRON["awkbin"], ENVIRON["awkver"], ENVIRON["diffbin"], ENVIRON["diffver"], ENVIRON["wdiffbin"], ENVIRON["wdiffver"], file1, file2, file1, file2; } function worddiff(w1, w2) { prefixlen = 0; word1 = w1; do { if (substr(w1,1,1) == substr(w2,1,1)) { w1 = substr(w1,2); w2 = substr(w2,2); } else { break; } prefixlen++; } while (length(w1) && length(w2)); prefix = substr(word1,1,prefixlen); do { l1 = length(w1); l2 = length(w2); if (substr(w1,l1,1) == substr(w2,l2,1)) { w1 = substr(w1,1,l1-1); w2 = substr(w2,1,l2-1); } else { break; } } while (l1 && l2); suffix = substr(word1, prefixlen+length(w1)+1); wordpart[0] = prefix; wordpart[1] = w1; wordpart[2] = w2; wordpart[3] = suffix; } function numdisplay(which, line) { if (optnums && (line != prevline[which])) { prevline[which] = line; return line-1; } return ""; } function fixesc(line) { # Making this a no-op for now -- the change in line-breaking # "
" => "\n" should make this less necessary. # line = gensub(/&(<[^>]*>)/, "\\1\\&", "g", line); # We still have to handle cases where we have a broken up "<" / ">" gsub(/&l<\/span>t;/, "\\<", line); gsub(/&g<\/span>t;/, "\\>", line); gsub(/&l<\/span>t;/, "\\<", line); gsub(/&g<\/span>t;/, "\\>", line); gsub(/&l<\/span>t;/, "\\<", line); gsub(/&g<\/span>t;/, "\\>", line); return line; } function chunkdiff(chunk) { if (difflines1 == 0 && difflines2 == 0) return; chunkfile1= sprintf("1/chunk%04d", chunk); chunkfile2= sprintf("2/chunk%04d", chunk); printf "" > chunkfile1; printf "" > chunkfile2; for (l = 0; l < difflines1; l++) { print stack1[l] >> chunkfile1; } for (l = 0; l < difflines2; l++) { print stack2[l] >> chunkfile2; } close(chunkfile1); close(chunkfile2); cmd1 = sprintf("%s -n -2 -w \"\" -x \"\" %s %s", wdiff, chunkfile1, chunkfile2); cmd2 = sprintf("%s -n -1 -y \"\" -z \"\" %s %s", wdiff, chunkfile1, chunkfile2); l=0; while (cmd1 | getline > 0) { stack1[l] = fixesc($0); l++; } difflines1 = l; l=0; while (cmd2 | getline > 0) { stack2[l] = fixesc($0); l++; } difflines2 = l; close(cmd1); close(cmd2); } function flush() { if (difflines1 || difflines2) { difftag++; multiline = (difflines1 > 1) || (difflines2 > 1); if (multiline && (wdiff != "")) chunkdiff(difftag); printf " \n", difftag; for (l = 0; l < difflines1 || l < difflines2; l++) { if (l in stack1) { line1 = stack1[l]; delete stack1[l]; linenum1++ if (line1 == "") if (optwidth > 0) { line1 = substr(" ",0,optwidth); } else { line1 = " "; } } else { line1 = ""; } if (l in stack2) { line2 = stack2[l]; delete stack2[l]; linenum2++; if (line2 == "") if (optwidth > 0) { line2 = substr(" ",0,optwidth); } else { line2 = " "; } } else { line2 = ""; } if (!multiline || (wdiff == "")) { worddiff(line1, line2); line1 = fixesc(sprintf("%s%s%s", wordpart[0], wordpart[1], wordpart[3])); line2 = fixesc(sprintf("%s%s%s", wordpart[0], wordpart[2], wordpart[3])); # Clean up; remove empty spans sub(/<\/span>/,"", line1); sub(/<\/span>/,"", line2); } left = sprintf("", numdisplay(1, linenum1), line1); right = sprintf("", line2, numdisplay(2, linenum2)); printf " %s%s\n", left, right; } } } function getpage(which, line) { line = line + ENVIRON["prelines"]; page = "?"; for (p=1; p <= maxpage[which]; p++) { if (pageend[which,p] == 0) continue; if (line <= pageend[which,p]) { page = p; break; } } return page; } function getpageline(which, line, page) { if (page == "?") { return line + ENVIRON["prelines"]; } else { if (pageend[which,page-1]+0 != 0) { return line + ENVIRON["prelines"] - pageend[which,page-1] + 3; # numbers of header lines stripped } else { return "?"; } } } function htmlesc(line) { gsub("&", "\\&", line); gsub("<", "\\<", line); gsub(">", "\\>", line); return line; } function expandtabs(line) { spaces = " "; while (pos = index(line, "\t")) { sub("\t", substr(spaces, 0, (8-pos%8)), line); } return line; } function maybebreakline(line, width) { width = optwidth; new = ""; if (width > 0) { line = expandtabs(line); while (length(line) > width) { new = new htmlesc(substr(line, 1, width)) "\n"; line = substr(line, width+1); } } line = new htmlesc(line) ; return line; } /^@@/ { linenum1 = 0 - $2; linenum2 = 0 + $4; diffnum ++; if (linenum1 > 1) { printf " \n"; page1 = getpage(1,linenum1); page2 = getpage(2,linenum2); if (page1 == "?") { posinfo1 = sprintf("skipping to change at line %s", diffnum, getpageline(1, linenum1, page1)); } else { posinfo1 = sprintf("skipping to change at page %s, line %s", diffnum, page1, getpageline(1, linenum1, page1)); } if (page2 == "?") { posinfo2 = sprintf("skipping to change at line %s", diffnum, getpageline(2, linenum2, page2)); } else { posinfo2 = sprintf("skipping to change at page %s, line %s", diffnum, page2, getpageline(2, linenum2, page2)); } printf " \n", posinfo1, posinfo2; } } /^---/ { next; } /^[+][+][+]/ { next; } /^[ ]/ { line = substr($0, 2); line = maybebreakline(line); flush(); linenum1++; linenum2++; printf " ", numdisplay(1, linenum1), line; printf "\n", line, numdisplay(2, linenum2); diffcount1 += difflines1 difflines1 = 0 diffcount2 += difflines2 difflines2 = 0 } /^-/ { line = substr($0, 2); line = maybebreakline(line); stack1[difflines1] = line; difflines1++; } /^[+]/ { line = substr($0, 2); line = maybebreakline(line); stack2[difflines2] = line; difflines2++; } END { flush(); printf("\n" \ " \n" \ " \n" \ " \n" \ " \n" \ "

	%s		%s

%s	%s	%s	%s


	%s		%s
%s	%s		%s	%s

%s. %s change blocks.
	%s lines changed or deleted		%s lines changed or added
This html diff was produced by rfcdiff %s. The latest version is available from http://tools.ietf.org/tools/rfcdiff/

"								>> "$tempout"
    wdiff -w "" -x "" -y "" -z "" 1/"$base1" 2/"$base2" >> "$tempout"
    echo "

" >> "$tempout" echo "" >> "$tempout" fi if [ $optstdout -gt 0 ]; then cat "$tempout" rm "$tempout" else cd "$origdir"; if [ -f $workdir/"$tempout" ]; then mv $workdir/"$tempout" "$outfile"; fi fi if [ $optshow -gt 0 ]; then browse "$outfile" fi if [ $optkeep -eq 0 ]; then if [ -f $pagecache1 ]; then rm $pagecache1; fi if [ -f $pagecache2 ]; then rm $pagecache2; fi rm -fr $workdir/1 rm -fr $workdir/2 if [ -f $workdir/diff ]; then rm $workdir/diff fi rmdir $workdir else cd /tmp tar czf $basename-$$.tgz $basename-$$ echo " Temporary workfiles have been left in $workdir/, and packed up in $workdir.tgz" fi