| Let's assume (for convenience) that you are going to represent the
number in binary. You have a function which allows you to build the
"index" by emitting its bits in order (either from lowest to highest or
vice versa, it doesn't matter) as you discover them. You are going to
build the index by looking at each element of the permutation 1 through
m, in turn. Let k1 (=k) be the number of different objects which
appear at or above location 1 in the permuation, k2 (= either k or k-1)
be the number of different objects which appear at or above location
2 in the permuation, etc. up to km (=1) which appaer at or above
the last locatio of the permuation.
Go through the elements of the permutation from first to last. At
location i, there are ki possible elements. Assume for the moment
that all the ki are powers of 2. Then you could specify the correct
choice at each position by emiting the appropriate number of bits.
(Keeping track of which number represents which element can be handled
in a pretty straight forward way with some form of rebalancing binary
tree which can be accessed at step i at average cost of log(ki)).
What if a ki is not a power of 2? (In fact unless all of the ni-s are
2 or less it is almost guarenteed that one of the kis will not be a
power of 2). This is precisely what the technique called arithmetic
coding is for (arithmetic coding is usually used for compression, and
if you think about it, this is simply a problem in maximal compression
of a permuated sequence).
Arithmetic coding is complicated enough so that I won't describe it
here -- but there was a nice article about it in CACM not too many
years ago. I can probably find the reference if you really want it.
Topher
|
| C This program is a quick example to show the idea of maximally
C compressed coding of permutations of object types. This coding
C preserves lexicographical ordering although it can be made more
C efficient if this property is discarded. See comments within code
C for how to do this.
C
C The input is very specialized and there is no error checking.
C
INTEGER IPERM1(99),IPERM2(99),N(9)
INTEGER ENPERM
C
C M is the number of distinct types of objects
C N[I] is the number of objects of the Ith type, I=1..M
C IPERM1 is the user input permutation
C IPERM2 is the decoded permutation
C
M=3
N(1)=4
N(2)=4
N(3)=1
NPOS=0
DO 10 I=1,M
10 NPOS=NPOS+N(I)
TYPE 20
20 FORMAT(' Input permutation (4 ones, 4 twos, and 1 three): ',$)
ACCEPT 30,(IPERM1(I),I=1,NPOS)
30 FORMAT(80I1)
KODE=ENPERM(N,M,IPERM1)
CALL DEPERM(IPERM2,N,M,KODE)
TYPE 40,KODE
40 FORMAT(' Code: ',I9)
TYPE 50,(IPERM1(I),I=1,NPOS)
50 FORMAT(' Input: ',70I1)
TYPE 60,(IPERM2(I),I=1,NPOS)
60 FORMAT(' Decoded: ',70I1)
CALL EXIT
END
SUBROUTINE DEPERM(IARRAY,N,M,KODE)
INTEGER IARRAY(1),N(1)
INTEGER IMAP(99),LEFT(99)
C
C IARRAY is the decoded permutation
C N[I] is the number of objects of the Ith type, I=1..M
C M is the number of distinct types of objects
C KODE is the permutation code
C IMAP(I) is the Ith type of object. This changes as objects are
C used up
C LEFT(I) is the number of unused objects of type I
C NPOS is the number of positions in the permutation
C NTOT is the total number of permutations
C ITOP is the number of object types that aren't used up
C
NPOS=0
NTOT=1
NPOS=0
DO 20 I=1,M
DO 10 J=1,N(I)
NPOS=NPOS+1
10 NTOT=(NTOT*NPOS)/J
IMAP(I)=I
20 LEFT(I)=N(I)
ITOP=M
NUM=KODE
KKK=NTOT
DO 50 IPOS=NPOS,1,-1
IBOT=0
I=0
30 I=I+1
LLL=(KKK*LEFT(I))/IPOS
IBOT=IBOT+LLL
IF (NUM.GE.IBOT) GOTO 30
KKK=LLL
NUM=NUM-IBOT+KKK
IARRAY(NPOS-IPOS+1)=IMAP(I)
LEFT(I)=LEFT(I)-1
IF (LEFT(I).EQ.0) THEN
C
C The following loop can be replaced with this code if the code isn't
C required to preserve lexicographical order. But make sure that both
C ENPERM and DEPERM are consistent.
C
C LEFT(I)=LEFT(ITOP)
C IMAP(I)=IMAP(ITOP)
C
DO 40 J=I,ITOP-1
LEFT(J)=LEFT(J+1)
40 IMAP(J)=IMAP(J+1)
C
ITOP=ITOP-1
END IF
50 CONTINUE
RETURN
END
INTEGER FUNCTION ENPERM(N,M,IARRAY)
INTEGER N(1),IARRAY(1)
INTEGER MAP(9),LEFT(9)
C
C ENPERM is the permutation code (function return value)
C N[I] is the number of objects of the Ith type, I=1..M
C M is the number of distinct types of objects
C IARRAY is the permutation to encode
C IMAP(I) is the Ith type of object. This changes as objects are
C used up
C LEFT(I) is the number of unused objects of type I
C NPOS is the number of positions in the permutation
C NTOT is the total number of permutations
C ITOP is the number of object types that aren't used up
C
NPOS=0
NTOT=1
NPOS=0
DO 20 I=1,M
DO 10 J=1,N(I)
NPOS=NPOS+1
10 NTOT=(NTOT*NPOS)/J
MAP(I)=I
20 LEFT(I)=N(I)
ITOP=M
ENPERM=0
KKK=NTOT
DO 50 IPOS=NPOS,2,-1
J=IARRAY(NPOS-IPOS+1)
I=0
30 I=I+1
LLL=(KKK*LEFT(I))/IPOS
ENPERM=ENPERM+LLL
IF (MAP(I).NE.J) GOTO 30
ENPERM=ENPERM-LLL
KKK=LLL
LEFT(I)=LEFT(I)-1
IF (LEFT(I).EQ.0) THEN
C
C The following loop can be replaced with this code if the code isn't
C required to preserve lexicographical order. But make sure that both
C ENPERM and DEPERM are consistent.
C
C LEFT(I)=LEFT(ITOP)
C IMAP(I)=IMAP(ITOP)
C
DO 40 J=I,ITOP-1
LEFT(J)=LEFT(J+1)
40 MAP(J)=MAP(J+1)
C
ITOP=ITOP-1
END IF
50 CONTINUE
RETURN
END
|