shithub: drawterm

Download patch

ref: 93a43be0ea85aca12826c9d53aec0ad2a0278198
parent: f493c9c544c7c3b4c7cf25b3af61d5791105cb2d
author: Russ Cox <[email protected]>
date: Fri Nov 4 11:05:20 EST 2005

use assembly

--- a/Make.unix
+++ b/Make.unix
@@ -1,5 +1,6 @@
 # Unix
 AR=ar
+AS=as
 CC=gcc
 RANLIB=ranlib
 CFLAGS=-I$(ROOT) -I$(ROOT)/include -I$(ROOT)/kern -c -ggdb -D_THREAD_SAFE -pthread # not ready for this yet: -Wall
--- a/Make.win32
+++ b/Make.win32
@@ -7,6 +7,7 @@
 #MING=
 AR=$(MING)ar
 CC=$(MING)gcc
+AS=$(MING)as
 RANLIB=$(MING)ranlib
 CFLAGS=-I$(ROOT)/include -I$(ROOT) -I$(ROOT)/kern -c -D_X86_ -DIS_32 -DWINDOWS
 O=o
@@ -28,7 +29,7 @@
 #OS=win32
 #GUI=win32
 
-all: $(TARG)
+all: default
 
 # for root
 libmachdep.a:
--- a/libsec/Makefile
+++ b/libsec/Makefile
@@ -33,7 +33,6 @@
 	hmac.$O\
 	md4.$O\
 	md5.$O\
-	md5block.$O\
 	md5pickle.$O\
 	nfastrand.$O\
 	prng.$O\
@@ -46,7 +45,6 @@
 	rsagen.$O\
 	rsaprivtopub.$O\
 	sha1.$O\
-	sha1block.$O\
 	sha1pickle.$O\
 	smallprimes.$O
 
--- a/posix-386/Makefile
+++ b/posix-386/Makefile
@@ -4,6 +4,8 @@
 
 OFILES=\
 	getcallerpc.$O\
+	md5block.$O\
+	sha1block.$O\
 	tas.$O
 
 default: $(LIB)
@@ -13,4 +15,11 @@
 
 %.$O: %.c
 	$(CC) $(CFLAGS) $*.c
+
+%.$O: %.s
+	$(AS) -o $*.$O $*.s
+
+%.s: %.spp
+	cpp $*.spp >$*.s
+
 
--- a/posix-386/md5block.s
+++ /dev/null
@@ -1,241 +1,0 @@
-/*
- *  rfc1321 requires that I include this.  The code is new.  The constants
- *  all come from the rfc (hence the copyright).  We trade a table for the
- *  macros in rfc.  The total size is a lot less. -- presotto
- *
- *	Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
- *	rights reserved.
- *
- *	License to copy and use this software is granted provided that it
- *	is identified as the "RSA Data Security, Inc. MD5 Message-Digest
- *	Algorithm" in all material mentioning or referencing this software
- *	or this function.
- *
- *	License is also granted to make and use derivative works provided
- *	that such works are identified as "derived from the RSA Data
- *	Security, Inc. MD5 Message-Digest Algorithm" in all material
- *	mentioning or referencing the derived work.
- *
- *	RSA Data Security, Inc. makes no representations concerning either
- *	the merchantability of this software or the suitability of this
- *	software forany particular purpose. It is provided "as is"
- *	without express or implied warranty of any kind.
- *	These notices must be retained in any copies of any part of this
- *	documentation and/or software.
- */
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-
-#define PAYME(x) $##x
-
-/*
- * SI is data
- *	a += FN(B,C,D);
- *	a += x[sh] + t[sh];
- *	a = (a << S11) | (a >> (32 - S11));
- *	a += b;
- */
-
-#define BODY1(off,V,FN,SH,A,B,C,D)\
-	FN(B,C,D)\
-	leal V(A, %edi, 1), A;\
-	addl off(%ebp), A;\
-	roll PAYME(SH), A;\
-	addl B, A;\
-
-#define BODY(off,V,FN,SH,A,B,C,D)\
-	FN(B,C,D)\
-	leal V(A, %edi, 1), A;\
-	addl (off)(%ebp), A;\
-	roll PAYME(SH), A;\
-	addl B,A;\
-
-/*
- * fn1 = ((c ^ d) & b) ^ d
- */
-#define FN1(B,C,D)\
-	movl C, %edi;\
-	xorl D, %edi;\
-	andl B, %edi;\
-	xorl D, %edi;\
-
-/*
- * fn2 = ((b ^ c) & d) ^ c;
- */
-#define FN2(B,C,D)\
-	movl B, %edi;\
-	xorl C, %edi;\
-	andl D, %edi;\
-	xorl C, %edi;\
-
-/*
- * fn3 = b ^ c ^ d;
- */
-#define FN3(B,C,D)\
-	movl B, %edi;\
-	xorl C, %edi;\
-	xorl D, %edi;\
-
-/*
- * fn4 = c ^ (b | ~d);
- */
-#define FN4(B,C,D)\
-	movl D, %edi;\
-	xorl $-1, %edi;\
-	orl B, %edi;\
-	xorl C, %edi;\
-
-#define	DATA	8
-#define	LEN	12
-#define	STATE	16
-
-#define EDATA	(-4)
-#define OLDEBX	(-8)
-#define OLDESI	(-12)
-#define OLDEDI	(-16)
-
-	.text
-
-	.p2align 2,0x90
-	.globl _md5block
-		.type _md5block, @function
-	_md5block:
-
-	/* Prelude */
-	pushl %ebp
-	movl %ebx, OLDEBX(%esp)
-	movl %esi, OLDESI(%esp)
-	movl %edi, OLDEDI(%esp)
-
-	movl	DATA(%esp), %eax
-	addl	LEN(%esp), %eax
-	movl	%eax, EDATA(%esp)
-
-	movl DATA(%esp), %ebp
-
-mainloop:
-	movl STATE(%esp), %esi
-	movl (%esi), %eax
-	movl 4(%esi), %ebx
-	movl 8(%esi), %ecx
-	movl 12(%esi), %edx
-
-	BODY1( 0*4,0xd76aa478,FN1,S11,%eax,%ebx,%ecx,%edx)
-	BODY1( 1*4,0xe8c7b756,FN1,S12,%edx,%eax,%ebx,%ecx)
-	BODY1( 2*4,0x242070db,FN1,S13,%ecx,%edx,%eax,%ebx)
-	BODY1( 3*4,0xc1bdceee,FN1,S14,%ebx,%ecx,%edx,%eax)
-
-	BODY1( 4*4,0xf57c0faf,FN1,S11,%eax,%ebx,%ecx,%edx)
-	BODY1( 5*4,0x4787c62a,FN1,S12,%edx,%eax,%ebx,%ecx)
-	BODY1( 6*4,0xa8304613,FN1,S13,%ecx,%edx,%eax,%ebx)
-	BODY1( 7*4,0xfd469501,FN1,S14,%ebx,%ecx,%edx,%eax)
-
-	BODY1( 8*4,0x698098d8,FN1,S11,%eax,%ebx,%ecx,%edx)
-	BODY1( 9*4,0x8b44f7af,FN1,S12,%edx,%eax,%ebx,%ecx)
-	BODY1(10*4,0xffff5bb1,FN1,S13,%ecx,%edx,%eax,%ebx)
-	BODY1(11*4,0x895cd7be,FN1,S14,%ebx,%ecx,%edx,%eax)
-
-	BODY1(12*4,0x6b901122,FN1,S11,%eax,%ebx,%ecx,%edx)
-	BODY1(13*4,0xfd987193,FN1,S12,%edx,%eax,%ebx,%ecx)
-	BODY1(14*4,0xa679438e,FN1,S13,%ecx,%edx,%eax,%ebx)
-	BODY1(15*4,0x49b40821,FN1,S14,%ebx,%ecx,%edx,%eax)
-
-
-	BODY( 1*4,0xf61e2562,FN2,S21,%eax,%ebx,%ecx,%edx)
-	BODY( 6*4,0xc040b340,FN2,S22,%edx,%eax,%ebx,%ecx)
-	BODY(11*4,0x265e5a51,FN2,S23,%ecx,%edx,%eax,%ebx)
-	BODY( 0*4,0xe9b6c7aa,FN2,S24,%ebx,%ecx,%edx,%eax)
-
-	BODY( 5*4,0xd62f105d,FN2,S21,%eax,%ebx,%ecx,%edx)
-	BODY(10*4,0x02441453,FN2,S22,%edx,%eax,%ebx,%ecx)
-	BODY(15*4,0xd8a1e681,FN2,S23,%ecx,%edx,%eax,%ebx)
-	BODY( 4*4,0xe7d3fbc8,FN2,S24,%ebx,%ecx,%edx,%eax)
-
-	BODY( 9*4,0x21e1cde6,FN2,S21,%eax,%ebx,%ecx,%edx)
-	BODY(14*4,0xc33707d6,FN2,S22,%edx,%eax,%ebx,%ecx)
-	BODY( 3*4,0xf4d50d87,FN2,S23,%ecx,%edx,%eax,%ebx)
-	BODY( 8*4,0x455a14ed,FN2,S24,%ebx,%ecx,%edx,%eax)
-
-	BODY(13*4,0xa9e3e905,FN2,S21,%eax,%ebx,%ecx,%edx)
-	BODY( 2*4,0xfcefa3f8,FN2,S22,%edx,%eax,%ebx,%ecx)
-	BODY( 7*4,0x676f02d9,FN2,S23,%ecx,%edx,%eax,%ebx)
-	BODY(12*4,0x8d2a4c8a,FN2,S24,%ebx,%ecx,%edx,%eax)
-
-
-	BODY( 5*4,0xfffa3942,FN3,S31,%eax,%ebx,%ecx,%edx)
-	BODY( 8*4,0x8771f681,FN3,S32,%edx,%eax,%ebx,%ecx)
-	BODY(11*4,0x6d9d6122,FN3,S33,%ecx,%edx,%eax,%ebx)
-	BODY(14*4,0xfde5380c,FN3,S34,%ebx,%ecx,%edx,%eax)
-
-	BODY( 1*4,0xa4beea44,FN3,S31,%eax,%ebx,%ecx,%edx)
-	BODY( 4*4,0x4bdecfa9,FN3,S32,%edx,%eax,%ebx,%ecx)
-	BODY( 7*4,0xf6bb4b60,FN3,S33,%ecx,%edx,%eax,%ebx)
-	BODY(10*4,0xbebfbc70,FN3,S34,%ebx,%ecx,%edx,%eax)
-
-	BODY(13*4,0x289b7ec6,FN3,S31,%eax,%ebx,%ecx,%edx)
-	BODY( 0*4,0xeaa127fa,FN3,S32,%edx,%eax,%ebx,%ecx)
-	BODY( 3*4,0xd4ef3085,FN3,S33,%ecx,%edx,%eax,%ebx)
-	BODY( 6*4,0x04881d05,FN3,S34,%ebx,%ecx,%edx,%eax)
-
-	BODY( 9*4,0xd9d4d039,FN3,S31,%eax,%ebx,%ecx,%edx)
-	BODY(12*4,0xe6db99e5,FN3,S32,%edx,%eax,%ebx,%ecx)
-	BODY(15*4,0x1fa27cf8,FN3,S33,%ecx,%edx,%eax,%ebx)
-	BODY( 2*4,0xc4ac5665,FN3,S34,%ebx,%ecx,%edx,%eax)
-
-
-	BODY( 0*4,0xf4292244,FN4,S41,%eax,%ebx,%ecx,%edx)
-	BODY( 7*4,0x432aff97,FN4,S42,%edx,%eax,%ebx,%ecx)
-	BODY(14*4,0xab9423a7,FN4,S43,%ecx,%edx,%eax,%ebx)
-	BODY( 5*4,0xfc93a039,FN4,S44,%ebx,%ecx,%edx,%eax)
-
-	BODY(12*4,0x655b59c3,FN4,S41,%eax,%ebx,%ecx,%edx)
-	BODY( 3*4,0x8f0ccc92,FN4,S42,%edx,%eax,%ebx,%ecx)
-	BODY(10*4,0xffeff47d,FN4,S43,%ecx,%edx,%eax,%ebx)
-	BODY( 1*4,0x85845dd1,FN4,S44,%ebx,%ecx,%edx,%eax)
-
-	BODY( 8*4,0x6fa87e4f,FN4,S41,%eax,%ebx,%ecx,%edx)
-	BODY(15*4,0xfe2ce6e0,FN4,S42,%edx,%eax,%ebx,%ecx)
-	BODY( 6*4,0xa3014314,FN4,S43,%ecx,%edx,%eax,%ebx)
-	BODY(13*4,0x4e0811a1,FN4,S44,%ebx,%ecx,%edx,%eax)
-
-	BODY( 4*4,0xf7537e82,FN4,S41,%eax,%ebx,%ecx,%edx)
-	BODY(11*4,0xbd3af235,FN4,S42,%edx,%eax,%ebx,%ecx)
-	BODY( 2*4,0x2ad7d2bb,FN4,S43,%ecx,%edx,%eax,%ebx)
-	BODY( 9*4,0xeb86d391,FN4,S44,%ebx,%ecx,%edx,%eax)
-
-	addl $(16*4), %ebp
-	movl STATE(%esp), %edi
-	addl %eax,0(%edi)
-	addl %ebx,4(%edi)
-	addl %ecx,8(%edi)
-	addl %edx,12(%edi)
-
-	movl EDATA(%esp), %edi
-	cmpl %edi, %ebp
-	jb mainloop
-
-	/* Postlude */
-	movl OLDEBX(%esp), %ebx
-	movl OLDESI(%esp), %esi
-	movl OLDEDI(%esp), %edi
-	movl %esp, %ebp
-	leave
-	ret
-
--- /dev/null
+++ b/posix-386/md5block.spp
@@ -1,0 +1,244 @@
+/*
+ *  rfc1321 requires that I include this.  The code is new.  The constants
+ *  all come from the rfc (hence the copyright).  We trade a table for the
+ *  macros in rfc.  The total size is a lot less. -- presotto
+ *
+ *	Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ *	rights reserved.
+ *
+ *	License to copy and use this software is granted provided that it
+ *	is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ *	Algorithm" in all material mentioning or referencing this software
+ *	or this function.
+ *
+ *	License is also granted to make and use derivative works provided
+ *	that such works are identified as "derived from the RSA Data
+ *	Security, Inc. MD5 Message-Digest Algorithm" in all material
+ *	mentioning or referencing the derived work.
+ *
+ *	RSA Data Security, Inc. makes no representations concerning either
+ *	the merchantability of this software or the suitability of this
+ *	software forany particular purpose. It is provided "as is"
+ *	without express or implied warranty of any kind.
+ *	These notices must be retained in any copies of any part of this
+ *	documentation and/or software.
+ */
+#define S11 7
+#define S12 12
+#define S13 17
+#define S14 22
+
+#define S21 5
+#define S22 9
+#define S23 14
+#define S24 20
+
+#define S31 4
+#define S32 11
+#define S33 16
+#define S34 23
+
+#define S41 6
+#define S42 10
+#define S43 15
+#define S44 21
+
+#define PAYME(x) $ ## x
+
+/*
+ * SI is data
+ *	a += FN(B,C,D);
+ *	a += x[sh] + t[sh];
+ *	a = (a << S11) | (a >> (32 - S11));
+ *	a += b;
+ */
+
+#define BODY1(off,V,FN,SH,A,B,C,D)\
+	FN(B,C,D)\
+	leal V(A, %edi, 1), A;\
+	addl off(%ebp), A;\
+	roll PAYME(SH), A;\
+	addl B, A;\
+
+#define BODY(off,V,FN,SH,A,B,C,D)\
+	FN(B,C,D)\
+	leal V(A, %edi, 1), A;\
+	addl (off)(%ebp), A;\
+	roll PAYME(SH), A;\
+	addl B,A;\
+
+/*
+ * fn1 = ((c ^ d) & b) ^ d
+ */
+#define FN1(B,C,D)\
+	movl C, %edi;\
+	xorl D, %edi;\
+	andl B, %edi;\
+	xorl D, %edi;\
+
+/*
+ * fn2 = ((b ^ c) & d) ^ c;
+ */
+#define FN2(B,C,D)\
+	movl B, %edi;\
+	xorl C, %edi;\
+	andl D, %edi;\
+	xorl C, %edi;\
+
+/*
+ * fn3 = b ^ c ^ d;
+ */
+#define FN3(B,C,D)\
+	movl B, %edi;\
+	xorl C, %edi;\
+	xorl D, %edi;\
+
+/*
+ * fn4 = c ^ (b | ~d);
+ */
+#define FN4(B,C,D)\
+	movl D, %edi;\
+	xorl $-1, %edi;\
+	orl B, %edi;\
+	xorl C, %edi;\
+
+#define	STACKSIZE	20
+
+#define	DATA	(STACKSIZE+8)
+#define	LEN	(STACKSIZE+12)
+#define	STATE	(STACKSIZE+16)
+
+#define EDATA	(STACKSIZE-4)
+#define OLDEBX	(STACKSIZE-8)
+#define OLDESI	(STACKSIZE-12)
+#define OLDEDI	(STACKSIZE-16)
+
+	.text
+
+	.p2align 2,0x90
+	.globl _md5block
+		.type _md5block, @function
+	_md5block:
+
+	/* Prelude */
+	pushl %ebp
+	subl $(STACKSIZE), %esp
+	movl %ebx, OLDEBX(%esp)
+	movl %esi, OLDESI(%esp)
+	movl %edi, OLDEDI(%esp)
+
+	movl	DATA(%esp), %eax
+	addl	LEN(%esp), %eax
+	movl	%eax, EDATA(%esp)
+
+	movl DATA(%esp), %ebp
+
+0:
+	movl STATE(%esp), %esi
+	movl (%esi), %eax
+	movl 4(%esi), %ebx
+	movl 8(%esi), %ecx
+	movl 12(%esi), %edx
+
+	BODY1( 0*4,0xd76aa478,FN1,S11,%eax,%ebx,%ecx,%edx)
+	BODY1( 1*4,0xe8c7b756,FN1,S12,%edx,%eax,%ebx,%ecx)
+	BODY1( 2*4,0x242070db,FN1,S13,%ecx,%edx,%eax,%ebx)
+	BODY1( 3*4,0xc1bdceee,FN1,S14,%ebx,%ecx,%edx,%eax)
+
+	BODY1( 4*4,0xf57c0faf,FN1,S11,%eax,%ebx,%ecx,%edx)
+	BODY1( 5*4,0x4787c62a,FN1,S12,%edx,%eax,%ebx,%ecx)
+	BODY1( 6*4,0xa8304613,FN1,S13,%ecx,%edx,%eax,%ebx)
+	BODY1( 7*4,0xfd469501,FN1,S14,%ebx,%ecx,%edx,%eax)
+
+	BODY1( 8*4,0x698098d8,FN1,S11,%eax,%ebx,%ecx,%edx)
+	BODY1( 9*4,0x8b44f7af,FN1,S12,%edx,%eax,%ebx,%ecx)
+	BODY1(10*4,0xffff5bb1,FN1,S13,%ecx,%edx,%eax,%ebx)
+	BODY1(11*4,0x895cd7be,FN1,S14,%ebx,%ecx,%edx,%eax)
+
+	BODY1(12*4,0x6b901122,FN1,S11,%eax,%ebx,%ecx,%edx)
+	BODY1(13*4,0xfd987193,FN1,S12,%edx,%eax,%ebx,%ecx)
+	BODY1(14*4,0xa679438e,FN1,S13,%ecx,%edx,%eax,%ebx)
+	BODY1(15*4,0x49b40821,FN1,S14,%ebx,%ecx,%edx,%eax)
+
+
+	BODY( 1*4,0xf61e2562,FN2,S21,%eax,%ebx,%ecx,%edx)
+	BODY( 6*4,0xc040b340,FN2,S22,%edx,%eax,%ebx,%ecx)
+	BODY(11*4,0x265e5a51,FN2,S23,%ecx,%edx,%eax,%ebx)
+	BODY( 0*4,0xe9b6c7aa,FN2,S24,%ebx,%ecx,%edx,%eax)
+
+	BODY( 5*4,0xd62f105d,FN2,S21,%eax,%ebx,%ecx,%edx)
+	BODY(10*4,0x02441453,FN2,S22,%edx,%eax,%ebx,%ecx)
+	BODY(15*4,0xd8a1e681,FN2,S23,%ecx,%edx,%eax,%ebx)
+	BODY( 4*4,0xe7d3fbc8,FN2,S24,%ebx,%ecx,%edx,%eax)
+
+	BODY( 9*4,0x21e1cde6,FN2,S21,%eax,%ebx,%ecx,%edx)
+	BODY(14*4,0xc33707d6,FN2,S22,%edx,%eax,%ebx,%ecx)
+	BODY( 3*4,0xf4d50d87,FN2,S23,%ecx,%edx,%eax,%ebx)
+	BODY( 8*4,0x455a14ed,FN2,S24,%ebx,%ecx,%edx,%eax)
+
+	BODY(13*4,0xa9e3e905,FN2,S21,%eax,%ebx,%ecx,%edx)
+	BODY( 2*4,0xfcefa3f8,FN2,S22,%edx,%eax,%ebx,%ecx)
+	BODY( 7*4,0x676f02d9,FN2,S23,%ecx,%edx,%eax,%ebx)
+	BODY(12*4,0x8d2a4c8a,FN2,S24,%ebx,%ecx,%edx,%eax)
+
+
+	BODY( 5*4,0xfffa3942,FN3,S31,%eax,%ebx,%ecx,%edx)
+	BODY( 8*4,0x8771f681,FN3,S32,%edx,%eax,%ebx,%ecx)
+	BODY(11*4,0x6d9d6122,FN3,S33,%ecx,%edx,%eax,%ebx)
+	BODY(14*4,0xfde5380c,FN3,S34,%ebx,%ecx,%edx,%eax)
+
+	BODY( 1*4,0xa4beea44,FN3,S31,%eax,%ebx,%ecx,%edx)
+	BODY( 4*4,0x4bdecfa9,FN3,S32,%edx,%eax,%ebx,%ecx)
+	BODY( 7*4,0xf6bb4b60,FN3,S33,%ecx,%edx,%eax,%ebx)
+	BODY(10*4,0xbebfbc70,FN3,S34,%ebx,%ecx,%edx,%eax)
+
+	BODY(13*4,0x289b7ec6,FN3,S31,%eax,%ebx,%ecx,%edx)
+	BODY( 0*4,0xeaa127fa,FN3,S32,%edx,%eax,%ebx,%ecx)
+	BODY( 3*4,0xd4ef3085,FN3,S33,%ecx,%edx,%eax,%ebx)
+	BODY( 6*4,0x04881d05,FN3,S34,%ebx,%ecx,%edx,%eax)
+
+	BODY( 9*4,0xd9d4d039,FN3,S31,%eax,%ebx,%ecx,%edx)
+	BODY(12*4,0xe6db99e5,FN3,S32,%edx,%eax,%ebx,%ecx)
+	BODY(15*4,0x1fa27cf8,FN3,S33,%ecx,%edx,%eax,%ebx)
+	BODY( 2*4,0xc4ac5665,FN3,S34,%ebx,%ecx,%edx,%eax)
+
+
+	BODY( 0*4,0xf4292244,FN4,S41,%eax,%ebx,%ecx,%edx)
+	BODY( 7*4,0x432aff97,FN4,S42,%edx,%eax,%ebx,%ecx)
+	BODY(14*4,0xab9423a7,FN4,S43,%ecx,%edx,%eax,%ebx)
+	BODY( 5*4,0xfc93a039,FN4,S44,%ebx,%ecx,%edx,%eax)
+
+	BODY(12*4,0x655b59c3,FN4,S41,%eax,%ebx,%ecx,%edx)
+	BODY( 3*4,0x8f0ccc92,FN4,S42,%edx,%eax,%ebx,%ecx)
+	BODY(10*4,0xffeff47d,FN4,S43,%ecx,%edx,%eax,%ebx)
+	BODY( 1*4,0x85845dd1,FN4,S44,%ebx,%ecx,%edx,%eax)
+
+	BODY( 8*4,0x6fa87e4f,FN4,S41,%eax,%ebx,%ecx,%edx)
+	BODY(15*4,0xfe2ce6e0,FN4,S42,%edx,%eax,%ebx,%ecx)
+	BODY( 6*4,0xa3014314,FN4,S43,%ecx,%edx,%eax,%ebx)
+	BODY(13*4,0x4e0811a1,FN4,S44,%ebx,%ecx,%edx,%eax)
+
+	BODY( 4*4,0xf7537e82,FN4,S41,%eax,%ebx,%ecx,%edx)
+	BODY(11*4,0xbd3af235,FN4,S42,%edx,%eax,%ebx,%ecx)
+	BODY( 2*4,0x2ad7d2bb,FN4,S43,%ecx,%edx,%eax,%ebx)
+	BODY( 9*4,0xeb86d391,FN4,S44,%ebx,%ecx,%edx,%eax)
+
+	addl $(16*4), %ebp
+	movl STATE(%esp), %edi
+	addl %eax,0(%edi)
+	addl %ebx,4(%edi)
+	addl %ecx,8(%edi)
+	addl %edx,12(%edi)
+
+	movl EDATA(%esp), %edi
+	cmpl %edi, %ebp
+	jb 0b
+
+	/* Postlude */
+	movl OLDEBX(%esp), %ebx
+	movl OLDESI(%esp), %esi
+	movl OLDEDI(%esp), %edi
+	addl $(STACKSIZE), %esp
+	popl %ebp
+	ret
+
--- a/posix-386/sha1block.s
+++ /dev/null
@@ -1,214 +1,0 @@
-.text
-
-.p2align 2,0x90
-.globl _sha1block
-	.type _sha1block, @function
-_sha1block:
-
-/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
- * wp[off] = x;
- * x += A <<< 5;
- * E += 0xca62c1d6 + x;
- * x = FN(B,C,D);
- * E += x;
- * B >>> 2
- */
-#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;
-
-#define BODY(off,FN,V,A,B,C,D,E)\
-	movl (off-64)(%ebp), %edi;\
-	xorl (off-56)(%ebp), %edi;\
-	xorl (off-32)(%ebp), %edi;\
-	xorl (off-12)(%ebp), %edi;\
-	roll $1, %edi;\
-	movl %edi, off(%ebp);\
-	leal V(%edi, E, 1), E;\
-	movl A, %edi;\
-	roll $5, %edi;\
-	addl %edi, E;\
-	FN(B,C,D)\
-	addl %edi, E;\
-	rorl $2, B;\
-
-#define BODY0(off,FN,V,A,B,C,D,E)\
-	movl off(%ebx), %edi;\
-	bswap %edi;\
-	movl %edi, off(%ebp);\
-	leal V(%edi,E,1), E;\
-	movl A, %edi;\
-	roll $5,%edi;\
-	addl %edi,E;\
-	FN(B,C,D)\
-	addl %edi,E;\
-	rorl $2,B;\
-
-/*
- * fn1 = (((C^D)&B)^D);
- */
-#define FN1(B,C,D)\
-	movl C, %edi;\
-	xorl D, %edi;\
-	andl B, %edi;\
-	xorl D, %edi;\
-
-/*
- * fn24 = B ^ C ^ D
- */
-#define FN24(B,C,D)\
-	movl B, %edi;\
-	xorl C, %edi;\
-	xorl D, %edi;\
-
-/*
- * fn3 = ((B ^ C) & (D ^= B)) ^ B
- * D ^= B to restore D
- */
-#define FN3(B,C,D)\
-	movl B, %edi;\
-	xorl C, %edi;\
-	xorl B, D;\
-	andl D, %edi;\
-	xorl B, %edi;\
-	xorl B, D;\
-
-/*
- * stack offsets
- * void sha1block(uchar *DATA, int LEN, ulong *STATE)
- */
-#define	DATA	8
-#define	LEN	12
-#define	STATE	16
-
-/*
- * stack offsets for locals
- * ulong w[80];
- * uchar *edata;
- * ulong *w15, *w40, *w60, *w80;
- * register local
- * ulong *wp = %ebp
- * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
- * ulong tmp = edi
- */
-#define WARRAY	(-4-(80*4))
-#define TMP1	(-8-(80*4))
-#define TMP2	(-12-(80*4))
-#define W15	(-16-(80*4))
-#define W40	(-20-(80*4))
-#define W60	(-24-(80*4))
-#define W80	(-28-(80*4))
-#define EDATA	(-32-(80*4))
-#define OLDEBX	(-36-(80*4))
-#define OLDESI	(-40-(80*4))
-#define OLDEDI	(-44-(80*4))
-
-	/* Prelude */
-	pushl %ebp
-	mov %ebx, OLDEBX(%esp)
-	mov %esi, OLDESI(%esp)
-	mov %edi, OLDEDI(%esp)
-
-	movl DATA(%esp), %eax
-	addl LEN(%esp), %eax
-	movl %eax, EDATA(%esp)
-
-	leal (WARRAY+15*4)(%esp), %edi	/* aw15 */
-	movl %edi, W15(%esp)
-	leal (WARRAY+40*4)(%esp), %edx	/* aw40 */
-	movl %edx, W40(%esp)
-	leal (WARRAY+60*4)(%esp), %ecx	/* aw60 */
-	movl %ecx, W60(%esp)
-	leal (WARRAY+80*4)(%esp), %edi	/* aw80 */
-	movl %edi, W80(%esp)
-
-mainloop:
-	leal WARRAY(%esp), %ebp		/* warray */
-
-	movl STATE(%esp), %edi		/* state */
-	movl (%edi),%eax
-	movl 4(%edi),%ebx
-	movl %ebx, TMP1(%esp)		/* tmp1 */
-	movl 8(%edi), %ecx
-	movl 12(%edi), %edx
-	movl 16(%edi), %esi
-
-	movl DATA(%esp), %ebx		/* data */
-
-loop1:
-	BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
-	movl %esi,TMP2(%esp)
-	BODY0(4,FN1,0x5a827999,%esi,%eax,TMP1(%esp),%ecx,%edx)
-	movl TMP1(%esp),%esi
-	BODY0(8,FN1,0x5a827999,%edx,TMP2(%esp),%eax,%esi,%ecx)
-	BODY0(12,FN1,0x5a827999,%ecx,%edx,TMP2(%esp),%eax,%esi)
-	movl %esi,TMP1(%esp)
-	BODY0(16,FN1,0x5a827999,%esi,%ecx,%edx,TMP2(%esp),%eax)
-	movl TMP2(%esp),%esi
-
-	addl $20, %ebx
-	addl $20, %ebp
-	cmpl W15(%esp), %ebp	/* w15 */
-	jb loop1
-
-	BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
-	addl $4, %ebx
-	MOVL %ebx, DATA(%esp)	/* data */
-	MOVL TMP1(%esp),%ebx
-
-	BODY(4,FN1,0x5a827999,%esi,%eax,%ebx,%ecx,%edx)
-	BODY(8,FN1,0x5a827999,%edx,%esi,%eax,%ebx,%ecx)
-	BODY(12,FN1,0x5a827999,%ecx,%edx,%esi,%eax,%ebx)
-	BODY(16,FN1,0x5a827999,%ebx,%ecx,%edx,%esi,%eax)
-
-	addl $20, %ebp
-
-loop2:
-	BODY(0,FN24,0x6ed9eba1,%eax,%ebx,%ecx,%edx,%esi)
-	BODY(4,FN24,0x6ed9eba1,%esi,%eax,%ebx,%ecx,%edx)
-	BODY(8,FN24,0x6ed9eba1,%edx,%esi,%eax,%ebx,%ecx)
-	BODY(12,FN24,0x6ed9eba1,%ecx,%edx,%esi,%eax,%ebx)
-	BODY(16,FN24,0x6ed9eba1,%ebx,%ecx,%edx,%esi,%eax)
-
-	addl $20,%ebp
-	cmpl W40(%esp), %ebp
-	jb loop2
-
-loop3:
-	BODY(0,FN3,0x8f1bbcdc,%eax,%ebx,%ecx,%edx,%esi)
-	BODY(4,FN3,0x8f1bbcdc,%esi,%eax,%ebx,%ecx,%edx)
-	BODY(8,FN3,0x8f1bbcdc,%edx,%esi,%eax,%ebx,%ecx)
-	BODY(12,FN3,0x8f1bbcdc,%ecx,%edx,%esi,%eax,%ebx)
-	BODY(16,FN3,0x8f1bbcdc,%ebx,%ecx,%edx,%esi,%eax)
-
-	addl $20, %ebp
-	cmpl W60(%esp), %ebp 	/* w60 */
-	jb loop3
-
-loop4:
-	BODY(0,FN24,0xca62c1d6,%eax,%ebx,%ecx,%edx,%esi)
-	BODY(4,FN24,0xca62c1d6,%esi,%eax,%ebx,%ecx,%edx)
-	BODY(8,FN24,0xca62c1d6,%edx,%esi,%eax,%ebx,%ecx)
-	BODY(12,FN24,0xca62c1d6,%ecx,%edx,%esi,%eax,%ebx)
-	BODY(16,FN24,0xca62c1d6,%ebx,%ecx,%edx,%esi,%eax)
-
-	addl $20, %ebp
-	cmpl W80(%esp), %ebp 	/* w80 */
-	jb loop4
-
-	movl STATE(%esp), %edi	/* state */
-	addl %eax, 0(%edi)
-	addl %ebx, 4(%edi)
-	addl %ecx, 8(%edi)
-	addl %edx, 12(%edi)
-	addl %esi, 16(%edi)
-
-	movl EDATA(%esp), %edi	/* edata */
-	cmpl %edi, DATA(%esp)	/* data */
-	jb mainloop
-
-	/* Postlude */
-	mov OLDEBX(%esp), %ebx
-	mov OLDESI(%esp), %esi
-	mov OLDEDI(%esp), %edi
-	movl %esp, %ebp
-	leave
-	ret
--- /dev/null
+++ b/posix-386/sha1block.spp
@@ -1,0 +1,217 @@
+.text
+
+.p2align 2,0x90
+.globl _sha1block
+	.type _sha1block, @function
+_sha1block:
+
+/* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
+ * wp[off] = x;
+ * x += A <<< 5;
+ * E += 0xca62c1d6 + x;
+ * x = FN(B,C,D);
+ * E += x;
+ * B >>> 2
+ */
+#define BSWAPDI	BYTE $0x0f; BYTE $0xcf;
+
+#define BODY(off,FN,V,A,B,C,D,E)\
+	movl (off-64)(%ebp), %edi;\
+	xorl (off-56)(%ebp), %edi;\
+	xorl (off-32)(%ebp), %edi;\
+	xorl (off-12)(%ebp), %edi;\
+	roll $1, %edi;\
+	movl %edi, off(%ebp);\
+	leal V(%edi, E, 1), E;\
+	movl A, %edi;\
+	roll $5, %edi;\
+	addl %edi, E;\
+	FN(B,C,D)\
+	addl %edi, E;\
+	rorl $2, B;\
+
+#define BODY0(off,FN,V,A,B,C,D,E)\
+	movl off(%ebx), %edi;\
+	bswap %edi;\
+	movl %edi, off(%ebp);\
+	leal V(%edi,E,1), E;\
+	movl A, %edi;\
+	roll $5,%edi;\
+	addl %edi,E;\
+	FN(B,C,D)\
+	addl %edi,E;\
+	rorl $2,B;\
+
+/*
+ * fn1 = (((C^D)&B)^D);
+ */
+#define FN1(B,C,D)\
+	movl C, %edi;\
+	xorl D, %edi;\
+	andl B, %edi;\
+	xorl D, %edi;\
+
+/*
+ * fn24 = B ^ C ^ D
+ */
+#define FN24(B,C,D)\
+	movl B, %edi;\
+	xorl C, %edi;\
+	xorl D, %edi;\
+
+/*
+ * fn3 = ((B ^ C) & (D ^= B)) ^ B
+ * D ^= B to restore D
+ */
+#define FN3(B,C,D)\
+	movl B, %edi;\
+	xorl C, %edi;\
+	xorl B, D;\
+	andl D, %edi;\
+	xorl B, %edi;\
+	xorl B, D;\
+
+/*
+ * stack offsets
+ * void sha1block(uchar *DATA, int LEN, ulong *STATE)
+ */
+#define STACKSIZE (48+80*4)
+#define	DATA	(STACKSIZE+8)
+#define	LEN	(STACKSIZE+12)
+#define	STATE	(STACKSIZE+16)
+
+/*
+ * stack offsets for locals
+ * ulong w[80];
+ * uchar *edata;
+ * ulong *w15, *w40, *w60, *w80;
+ * register local
+ * ulong *wp = %ebp
+ * ulong a = eax, b = ebx, c = ecx, d = edx, e = esi
+ * ulong tmp = edi
+ */
+#define WARRAY	(STACKSIZE-4-(80*4))
+#define TMP1	(STACKSIZE-8-(80*4))
+#define TMP2	(STACKSIZE-12-(80*4))
+#define W15	(STACKSIZE-16-(80*4))
+#define W40	(STACKSIZE-20-(80*4))
+#define W60	(STACKSIZE-24-(80*4))
+#define W80	(STACKSIZE-28-(80*4))
+#define EDATA	(STACKSIZE-32-(80*4))
+#define OLDEBX	(STACKSIZE-36-(80*4))
+#define OLDESI	(STACKSIZE-40-(80*4))
+#define OLDEDI	(STACKSIZE-44-(80*4))
+
+	/* Prelude */
+	pushl %ebp
+	subl $(STACKSIZE), %esp
+
+	mov %ebx, OLDEBX(%esp)
+	mov %esi, OLDESI(%esp)
+	mov %edi, OLDEDI(%esp)
+
+	movl DATA(%esp), %eax
+	addl LEN(%esp), %eax
+	movl %eax, EDATA(%esp)
+
+	leal (WARRAY+15*4)(%esp), %edi	/* aw15 */
+	movl %edi, W15(%esp)
+	leal (WARRAY+40*4)(%esp), %edx	/* aw40 */
+	movl %edx, W40(%esp)
+	leal (WARRAY+60*4)(%esp), %ecx	/* aw60 */
+	movl %ecx, W60(%esp)
+	leal (WARRAY+80*4)(%esp), %edi	/* aw80 */
+	movl %edi, W80(%esp)
+
+0:
+	leal WARRAY(%esp), %ebp		/* warray */
+
+	movl STATE(%esp), %edi		/* state */
+	movl (%edi),%eax
+	movl 4(%edi),%ebx
+	movl %ebx, TMP1(%esp)		/* tmp1 */
+	movl 8(%edi), %ecx
+	movl 12(%edi), %edx
+	movl 16(%edi), %esi
+
+	movl DATA(%esp), %ebx		/* data */
+
+1:
+	BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
+	movl %esi,TMP2(%esp)
+	BODY0(4,FN1,0x5a827999,%esi,%eax,TMP1(%esp),%ecx,%edx)
+	movl TMP1(%esp),%esi
+	BODY0(8,FN1,0x5a827999,%edx,TMP2(%esp),%eax,%esi,%ecx)
+	BODY0(12,FN1,0x5a827999,%ecx,%edx,TMP2(%esp),%eax,%esi)
+	movl %esi,TMP1(%esp)
+	BODY0(16,FN1,0x5a827999,%esi,%ecx,%edx,TMP2(%esp),%eax)
+	movl TMP2(%esp),%esi
+
+	addl $20, %ebx
+	addl $20, %ebp
+	cmpl W15(%esp), %ebp	/* w15 */
+	jb 1b
+
+	BODY0(0,FN1,0x5a827999,%eax,TMP1(%esp),%ecx,%edx,%esi)
+	addl $4, %ebx
+	MOVL %ebx, DATA(%esp)	/* data */
+	MOVL TMP1(%esp),%ebx
+
+	BODY(4,FN1,0x5a827999,%esi,%eax,%ebx,%ecx,%edx)
+	BODY(8,FN1,0x5a827999,%edx,%esi,%eax,%ebx,%ecx)
+	BODY(12,FN1,0x5a827999,%ecx,%edx,%esi,%eax,%ebx)
+	BODY(16,FN1,0x5a827999,%ebx,%ecx,%edx,%esi,%eax)
+
+	addl $20, %ebp
+
+2:
+	BODY(0,FN24,0x6ed9eba1,%eax,%ebx,%ecx,%edx,%esi)
+	BODY(4,FN24,0x6ed9eba1,%esi,%eax,%ebx,%ecx,%edx)
+	BODY(8,FN24,0x6ed9eba1,%edx,%esi,%eax,%ebx,%ecx)
+	BODY(12,FN24,0x6ed9eba1,%ecx,%edx,%esi,%eax,%ebx)
+	BODY(16,FN24,0x6ed9eba1,%ebx,%ecx,%edx,%esi,%eax)
+
+	addl $20,%ebp
+	cmpl W40(%esp), %ebp
+	jb 2b
+
+3:
+	BODY(0,FN3,0x8f1bbcdc,%eax,%ebx,%ecx,%edx,%esi)
+	BODY(4,FN3,0x8f1bbcdc,%esi,%eax,%ebx,%ecx,%edx)
+	BODY(8,FN3,0x8f1bbcdc,%edx,%esi,%eax,%ebx,%ecx)
+	BODY(12,FN3,0x8f1bbcdc,%ecx,%edx,%esi,%eax,%ebx)
+	BODY(16,FN3,0x8f1bbcdc,%ebx,%ecx,%edx,%esi,%eax)
+
+	addl $20, %ebp
+	cmpl W60(%esp), %ebp 	/* w60 */
+	jb 3b
+
+4:
+	BODY(0,FN24,0xca62c1d6,%eax,%ebx,%ecx,%edx,%esi)
+	BODY(4,FN24,0xca62c1d6,%esi,%eax,%ebx,%ecx,%edx)
+	BODY(8,FN24,0xca62c1d6,%edx,%esi,%eax,%ebx,%ecx)
+	BODY(12,FN24,0xca62c1d6,%ecx,%edx,%esi,%eax,%ebx)
+	BODY(16,FN24,0xca62c1d6,%ebx,%ecx,%edx,%esi,%eax)
+
+	addl $20, %ebp
+	cmpl W80(%esp), %ebp 	/* w80 */
+	jb 4b
+
+	movl STATE(%esp), %edi	/* state */
+	addl %eax, 0(%edi)
+	addl %ebx, 4(%edi)
+	addl %ecx, 8(%edi)
+	addl %edx, 12(%edi)
+	addl %esi, 16(%edi)
+
+	movl EDATA(%esp), %edi	/* edata */
+	cmpl %edi, DATA(%esp)	/* data */
+	jb 0b
+
+	/* Postlude */
+	mov OLDEBX(%esp), %ebx
+	mov OLDESI(%esp), %esi
+	mov OLDEDI(%esp), %edi
+	addl $(STACKSIZE), %esp
+	popl %ebp
+	ret
--- a/posix-power/Makefile
+++ b/posix-power/Makefile
@@ -4,6 +4,8 @@
 
 OFILES=\
 	getcallerpc.$O\
+	md5block.$O\
+	sha1block.$O\
 	tas.$O
 
 default: $(LIB)
@@ -13,4 +15,11 @@
 
 %.$O: %.c
 	$(CC) $(CFLAGS) $*.c
+
+%.$O: %.s
+	$(AS) -o $*.$O $*.s
+
+%.s: %.spp
+	cpp $*.spp >$*.s
+
 
--- /dev/null
+++ b/posix-power/md5block.c
@@ -1,0 +1,267 @@
+#include "os.h"
+#include <libsec.h>
+
+/*
+ *  rfc1321 requires that I include this.  The code is new.  The constants
+ *  all come from the rfc (hence the copyright).  We trade a table for the
+ *  macros in rfc.  The total size is a lot less. -- presotto
+ *
+ *	Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
+ *	rights reserved.
+ *
+ *	License to copy and use this software is granted provided that it
+ *	is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ *	Algorithm" in all material mentioning or referencing this software
+ *	or this function.
+ *
+ *	License is also granted to make and use derivative works provided
+ *	that such works are identified as "derived from the RSA Data
+ *	Security, Inc. MD5 Message-Digest Algorithm" in all material
+ *	mentioning or referencing the derived work.
+ *
+ *	RSA Data Security, Inc. makes no representations concerning either
+ *	the merchantability of this software or the suitability of this
+ *	software forany particular purpose. It is provided "as is"
+ *	without express or implied warranty of any kind.
+ *	These notices must be retained in any copies of any part of this
+ *	documentation and/or software.
+ */
+
+/*
+ *	Rotate ammounts used in the algorithm
+ */
+enum
+{
+	S11=	7,
+	S12=	12,
+	S13=	17,
+	S14=	22,
+
+	S21=	5,
+	S22=	9,
+	S23=	14,
+	S24=	20,
+
+	S31=	4,
+	S32=	11,
+	S33=	16,
+	S34=	23,
+
+	S41=	6,
+	S42=	10,
+	S43=	15,
+	S44=	21,
+};
+
+static u32int md5tab[] =
+{
+	/* round 1 */
+/*[0]*/	0xd76aa478,	
+	0xe8c7b756,	
+	0x242070db,	
+	0xc1bdceee,	
+	0xf57c0faf,	
+	0x4787c62a,	
+	0xa8304613,	
+	0xfd469501,	
+	0x698098d8,	
+	0x8b44f7af,	
+	0xffff5bb1,	
+	0x895cd7be,	
+	0x6b901122,	
+	0xfd987193,	
+	0xa679438e,	
+	0x49b40821,
+
+	/* round 2 */
+/*[16]*/0xf61e2562,	
+	0xc040b340,	
+	0x265e5a51,	
+	0xe9b6c7aa,	
+	0xd62f105d,	
+	 0x2441453,	
+	0xd8a1e681,	
+	0xe7d3fbc8,	
+	0x21e1cde6,	
+	0xc33707d6,	
+	0xf4d50d87,	
+	0x455a14ed,	
+	0xa9e3e905,	
+	0xfcefa3f8,	
+	0x676f02d9,	
+	0x8d2a4c8a,
+
+	/* round 3 */
+/*[32]*/0xfffa3942,	
+	0x8771f681,	
+	0x6d9d6122,	
+	0xfde5380c,	
+	0xa4beea44,	
+	0x4bdecfa9,	
+	0xf6bb4b60,	
+	0xbebfbc70,	
+	0x289b7ec6,	
+	0xeaa127fa,	
+	0xd4ef3085,	
+	 0x4881d05,	
+	0xd9d4d039,	
+	0xe6db99e5,	
+	0x1fa27cf8,	
+	0xc4ac5665,	
+
+	/* round 4 */
+/*[48]*/0xf4292244,	
+	0x432aff97,	
+	0xab9423a7,	
+	0xfc93a039,	
+	0x655b59c3,	
+	0x8f0ccc92,	
+	0xffeff47d,	
+	0x85845dd1,	
+	0x6fa87e4f,	
+	0xfe2ce6e0,	
+	0xa3014314,	
+	0x4e0811a1,	
+	0xf7537e82,	
+	0xbd3af235,	
+	0x2ad7d2bb,	
+	0xeb86d391,	
+};
+
+static void decode(u32int*, uchar*, ulong);
+extern void _md5block(uchar *p, ulong len, u32int *s);
+
+void
+_md5block(uchar *p, ulong len, u32int *s)
+{
+	u32int a, b, c, d, sh;
+	u32int *t;
+	uchar *end;
+	u32int x[16];
+
+	for(end = p+len; p < end; p += 64){
+		a = s[0];
+		b = s[1];
+		c = s[2];
+		d = s[3];
+
+		decode(x, p, 64);
+	
+		t = md5tab;
+		sh = 0;
+		for(; sh != 16; t += 4){
+			a += ((c ^ d) & b) ^ d;
+			a += x[sh] + t[0];
+			a = (a << S11) | (a >> (32 - S11));
+			a += b;
+
+			d += ((b ^ c) & a) ^ c;
+			d += x[sh + 1] + t[1];
+			d = (d << S12) | (d >> (32 - S12));
+			d += a;
+
+			c += ((a ^ b) & d) ^ b;
+			c += x[sh + 2] + t[2];
+			c = (c << S13) | (c >> (32 - S13));
+			c += d;
+
+			b += ((d ^ a) & c) ^ a;
+			b += x[sh + 3] + t[3];
+			b = (b << S14) | (b >> (32 - S14));
+			b += c;
+
+			sh += 4;
+		}
+		sh = 1;
+		for(; sh != 1+20*4; t += 4){
+			a += ((b ^ c) & d) ^ c;
+			a += x[sh & 0xf] + t[0];
+			a = (a << S21) | (a >> (32 - S21));
+			a += b;
+
+			d += ((a ^ b) & c) ^ b;
+			d += x[(sh + 5) & 0xf] + t[1];
+			d = (d << S22) | (d >> (32 - S22));
+			d += a;
+
+			c += ((d ^ a) & b) ^ a;
+			c += x[(sh + 10) & 0xf] + t[2];
+			c = (c << S23) | (c >> (32 - S23));
+			c += d;
+
+			b += ((c ^ d) & a) ^ d;
+			b += x[(sh + 15) & 0xf] + t[3];
+			b = (b << S24) | (b >> (32 - S24));
+			b += c;
+
+			sh += 20;
+		}
+		sh = 5;
+		for(; sh != 5+12*4; t += 4){
+			a += b ^ c ^ d;
+			a += x[sh & 0xf] + t[0];
+			a = (a << S31) | (a >> (32 - S31));
+			a += b;
+
+			d += a ^ b ^ c;
+			d += x[(sh + 3) & 0xf] + t[1];
+			d = (d << S32) | (d >> (32 - S32));
+			d += a;
+
+			c += d ^ a ^ b;
+			c += x[(sh + 6) & 0xf] + t[2];
+			c = (c << S33) | (c >> (32 - S33));
+			c += d;
+
+			b += c ^ d ^ a;
+			b += x[(sh + 9) & 0xf] + t[3];
+			b = (b << S34) | (b >> (32 - S34));
+			b += c;
+
+			sh += 12;
+		}
+		sh = 0;
+		for(; sh != 28*4; t += 4){
+			a += c ^ (b | ~d);
+			a += x[sh & 0xf] + t[0];
+			a = (a << S41) | (a >> (32 - S41));
+			a += b;
+
+			d += b ^ (a | ~c);
+			d += x[(sh + 7) & 0xf] + t[1];
+			d = (d << S42) | (d >> (32 - S42));
+			d += a;
+
+			c += a ^ (d | ~b);
+			c += x[(sh + 14) & 0xf] + t[2];
+			c = (c << S43) | (c >> (32 - S43));
+			c += d;
+
+			b += d ^ (c | ~a);
+			b += x[(sh + 21) & 0xf] + t[3];
+			b = (b << S44) | (b >> (32 - S44));
+			b += c;
+
+			sh += 28;
+		}
+
+		s[0] += a;
+		s[1] += b;
+		s[2] += c;
+		s[3] += d;
+	}
+}
+
+/*
+ *	decodes input (uchar) into output (u32int). Assumes len is
+ *	a multiple of 4.
+ */
+static void
+decode(u32int *output, uchar *input, ulong len)
+{
+	uchar *e;
+
+	for(e = input+len; input < e; input += 4)
+		*output++ = input[0] | (input[1] << 8) |
+			(input[2] << 16) | (input[3] << 24);
+}
--- /dev/null
+++ b/posix-power/sha1block.c
@@ -1,0 +1,187 @@
+#include "os.h"
+
+void
+_sha1block(uchar *p, ulong len, u32int *s)
+{
+	u32int a, b, c, d, e, x;
+	uchar *end;
+	u32int *wp, *wend;
+	u32int w[80];
+
+	/* at this point, we have a multiple of 64 bytes */
+	for(end = p+len; p < end;){
+		a = s[0];
+		b = s[1];
+		c = s[2];
+		d = s[3];
+		e = s[4];
+
+		wend = w + 15;
+		for(wp = w; wp < wend; wp += 5){
+			wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+			e += ((a<<5) | (a>>27)) + wp[0];
+			e += 0x5a827999 + (((c^d)&b)^d);
+			b = (b<<30)|(b>>2);
+
+			wp[1] = (p[4]<<24) | (p[5]<<16) | (p[6]<<8) | p[7];
+			d += ((e<<5) | (e>>27)) + wp[1];
+			d += 0x5a827999 + (((b^c)&a)^c);
+			a = (a<<30)|(a>>2);
+
+			wp[2] = (p[8]<<24) | (p[9]<<16) | (p[10]<<8) | p[11];
+			c += ((d<<5) | (d>>27)) + wp[2];
+			c += 0x5a827999 + (((a^b)&e)^b);
+			e = (e<<30)|(e>>2);
+
+			wp[3] = (p[12]<<24) | (p[13]<<16) | (p[14]<<8) | p[15];
+			b += ((c<<5) | (c>>27)) + wp[3];
+			b += 0x5a827999 + (((e^a)&d)^a);
+			d = (d<<30)|(d>>2);
+
+			wp[4] = (p[16]<<24) | (p[17]<<16) | (p[18]<<8) | p[19];
+			a += ((b<<5) | (b>>27)) + wp[4];
+			a += 0x5a827999 + (((d^e)&c)^e);
+			c = (c<<30)|(c>>2);
+			
+			p += 20;
+		}
+
+		wp[0] = (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+		e += ((a<<5) | (a>>27)) + wp[0];
+		e += 0x5a827999 + (((c^d)&b)^d);
+		b = (b<<30)|(b>>2);
+
+		x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
+		wp[1] = (x<<1) | (x>>31);
+		d += ((e<<5) | (e>>27)) + wp[1];
+		d += 0x5a827999 + (((b^c)&a)^c);
+		a = (a<<30)|(a>>2);
+
+		x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
+		wp[2] = (x<<1) | (x>>31);
+		c += ((d<<5) | (d>>27)) + wp[2];
+		c += 0x5a827999 + (((a^b)&e)^b);
+		e = (e<<30)|(e>>2);
+
+		x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
+		wp[3] = (x<<1) | (x>>31);
+		b += ((c<<5) | (c>>27)) + wp[3];
+		b += 0x5a827999 + (((e^a)&d)^a);
+		d = (d<<30)|(d>>2);
+
+		x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
+		wp[4] = (x<<1) | (x>>31);
+		a += ((b<<5) | (b>>27)) + wp[4];
+		a += 0x5a827999 + (((d^e)&c)^e);
+		c = (c<<30)|(c>>2);
+
+		wp += 5;
+		p += 4;
+
+		wend = w + 40;
+		for(; wp < wend; wp += 5){
+			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
+			wp[0] = (x<<1) | (x>>31);
+			e += ((a<<5) | (a>>27)) + wp[0];
+			e += 0x6ed9eba1 + (b^c^d);
+			b = (b<<30)|(b>>2);
+
+			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
+			wp[1] = (x<<1) | (x>>31);
+			d += ((e<<5) | (e>>27)) + wp[1];
+			d += 0x6ed9eba1 + (a^b^c);
+			a = (a<<30)|(a>>2);
+
+			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
+			wp[2] = (x<<1) | (x>>31);
+			c += ((d<<5) | (d>>27)) + wp[2];
+			c += 0x6ed9eba1 + (e^a^b);
+			e = (e<<30)|(e>>2);
+
+			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
+			wp[3] = (x<<1) | (x>>31);
+			b += ((c<<5) | (c>>27)) + wp[3];
+			b += 0x6ed9eba1 + (d^e^a);
+			d = (d<<30)|(d>>2);
+
+			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
+			wp[4] = (x<<1) | (x>>31);
+			a += ((b<<5) | (b>>27)) + wp[4];
+			a += 0x6ed9eba1 + (c^d^e);
+			c = (c<<30)|(c>>2);
+		}
+
+		wend = w + 60;
+		for(; wp < wend; wp += 5){
+			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
+			wp[0] = (x<<1) | (x>>31);
+			e += ((a<<5) | (a>>27)) + wp[0];
+			e += 0x8f1bbcdc + ((b&c)|((b|c)&d));
+			b = (b<<30)|(b>>2);
+
+			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
+			wp[1] = (x<<1) | (x>>31);
+			d += ((e<<5) | (e>>27)) + wp[1];
+			d += 0x8f1bbcdc + ((a&b)|((a|b)&c));
+			a = (a<<30)|(a>>2);
+
+			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
+			wp[2] = (x<<1) | (x>>31);
+			c += ((d<<5) | (d>>27)) + wp[2];
+			c += 0x8f1bbcdc + ((e&a)|((e|a)&b));
+			e = (e<<30)|(e>>2);
+
+			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
+			wp[3] = (x<<1) | (x>>31);
+			b += ((c<<5) | (c>>27)) + wp[3];
+			b += 0x8f1bbcdc + ((d&e)|((d|e)&a));
+			d = (d<<30)|(d>>2);
+
+			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
+			wp[4] = (x<<1) | (x>>31);
+			a += ((b<<5) | (b>>27)) + wp[4];
+			a += 0x8f1bbcdc + ((c&d)|((c|d)&e));
+			c = (c<<30)|(c>>2);
+		}
+
+		wend = w + 80;
+		for(; wp < wend; wp += 5){
+			x = wp[-3] ^ wp[-8] ^ wp[-14] ^ wp[-16];
+			wp[0] = (x<<1) | (x>>31);
+			e += ((a<<5) | (a>>27)) + wp[0];
+			e += 0xca62c1d6 + (b^c^d);
+			b = (b<<30)|(b>>2);
+
+			x = wp[-2] ^ wp[-7] ^ wp[-13] ^ wp[-15];
+			wp[1] = (x<<1) | (x>>31);
+			d += ((e<<5) | (e>>27)) + wp[1];
+			d += 0xca62c1d6 + (a^b^c);
+			a = (a<<30)|(a>>2);
+
+			x = wp[-1] ^ wp[-6] ^ wp[-12] ^ wp[-14];
+			wp[2] = (x<<1) | (x>>31);
+			c += ((d<<5) | (d>>27)) + wp[2];
+			c += 0xca62c1d6 + (e^a^b);
+			e = (e<<30)|(e>>2);
+
+			x = wp[0] ^ wp[-5] ^ wp[-11] ^ wp[-13];
+			wp[3] = (x<<1) | (x>>31);
+			b += ((c<<5) | (c>>27)) + wp[3];
+			b += 0xca62c1d6 + (d^e^a);
+			d = (d<<30)|(d>>2);
+
+			x = wp[1] ^ wp[-4] ^ wp[-10] ^ wp[-12];
+			wp[4] = (x<<1) | (x>>31);
+			a += ((b<<5) | (b>>27)) + wp[4];
+			a += 0xca62c1d6 + (c^d^e);
+			c = (c<<30)|(c>>2);
+		}
+
+		/* save state */
+		s[0] += a;
+		s[1] += b;
+		s[2] += c;
+		s[3] += d;
+		s[4] += e;
+	}
+}
--- a/win32-386/md5block.spp
+++ b/win32-386/md5block.spp
@@ -116,9 +116,20 @@
 	.text
 
 	.p2align 2,0x90
+	.globl ___md5block
+	___md5block:
+
+	.p2align 2,0x90
+	.globl __md5block
+	__md5block:
+
+	.p2align 2,0x90
 	.globl _md5block
-		.type _md5block, @function
 	_md5block:
+
+	.p2align 2,0x90
+	.globl md5block
+	md5block:
 
 	/* Prelude */
 	pushl %ebp
--- a/win32-386/sha1block.spp
+++ b/win32-386/sha1block.spp
@@ -1,9 +1,23 @@
 .text
 
 .p2align 2,0x90
+.globl ___sha1block
+___sha1block:
+	jmp sha1block
+	
+.p2align 2,0x90
+.globl __sha1block
+__sha1block:
+	jmp sha1block
+	
+.p2align 2,0x90
 .globl _sha1block
-	.type _sha1block, @function
 _sha1block:
+	jmp sha1block
+
+.p2align 2,0x90
+.globl sha1block
+sha1block:
 
 /* x = (wp[off-f] ^ wp[off-8] ^ wp[off-14] ^ wp[off-16]) <<< 1;
  * wp[off] = x;
--- /dev/null
+++ b/win32-386/tas.c
@@ -1,0 +1,23 @@
+#include "u.h"
+#include "libc.h"
+
+int
+tas(long *x)
+{
+	int     v;
+
+	__asm__(	"movl   $1, %%eax\n\t"
+			"xchgl  %%eax,(%%ecx)"
+			: "=a" (v)
+			: "c" (x)
+	);
+	switch(v) {
+	case 0:
+	case 1:
+		return v;
+	default:
+		print("canlock: corrupted 0x%lux\n", v);
+		return 1;
+	}
+}
+