shithub: 9intro

ref: 2b99422480d596ebc26921c87c6bb81a07949f3e
dir: /idx/doclean/

View raw version
#!/bin/rc
exec sed 's/^ix: //' $*

awk ' # doclean

#   Input:  "number tab IX string
#	107     IX self-reference #1186 -
#	281     TL APPENDIX A  AMPL Reference Manual   #26 -
#   Output:        string (tab) number
#	excess spaces are removed output string
#	note reversal of order;  rest of programs expect it

# This contains some special pleading for the AMPL book

BEGIN	      { FS = OFS = "\t" }

/\t(TL|H1|H2|H3|LASTPAGE)/	{ next }	# zap expected noise

$0 !~ /^ix: / {
	print "doclean: non index line: " $0 | "cat 1>&2"; next
}

{	sub(/IX +/, "", $2)	# zap "IX "
        sub(/ +#[0-9]+ .*$/, "", $2)	# zap trailing blanks, slug, file
        gsub(/  +/, " ", $2)	# compress internal blanks
        print $2, $1		# item (tab) page number
}
' $*