1 // -*- mode: cpp; mode: fold -*-
3 // $Id: rsync-algo.cc,v 1.3 1999/12/26 06:59:01 jgg Exp $
4 /* ######################################################################
8 The RSync algorithim is attributed to Andrew Tridgell and is a means
9 for matching blocks between two streams. The algorithrim implemented
10 here differs slightly in its structure and is carefully optimized to be
11 able to operate on very large files effectively.
13 We rely on the RSync rolling weak checksum routine and the MD4 strong
14 checksum routine. This implementation requires a uniform block size
17 ##################################################################### */
19 // Include files /*{{{*/
21 #pragma implementation "dsync/rsync-algo.h"
24 #include <dsync/rsync-algo.h>
25 #include <dsync/error.h>
26 #include <dsync/slidingwindow.h>
27 #include <dsync/md5.h>
28 #include <dsync/md4.h>
32 #include <netinet/in.h>
35 // RollingChecksum - Compute the checksum perscribed by rsync /*{{{*/
36 // ---------------------------------------------------------------------
38 static inline unsigned long RollingChecksum(unsigned char *Start,
44 /* A = sum(X[i],j,k) B = sum((k-j+1)*X[i],j,k);
45 Which reduces to the recurrence, B = sum(A[I],j,k); */
46 for (; Start != End; Start++)
52 return (A & 0xFFFF) | (B << 16);
55 // GenerateRSync - Compute the rsync blocks for a file /*{{{*/
56 // ---------------------------------------------------------------------
57 /* This function generates the RSync checksums for each uniform block in
59 bool GenerateRSync(FileFd &Fd,dsFList::RSyncChecksum &Ck,
60 unsigned char OutMD5[16],
61 unsigned long BlockSize)
63 SlidingWindow Wind(Fd);
66 Ck.Tag = dsFList::tRSyncChecksum;
67 Ck.BlockSize = BlockSize;
68 Ck.FileSize = Fd.Size();
70 // Allocate sum storage space
72 Ck.Sums = new unsigned char[(Ck.FileSize + BlockSize-1)/BlockSize*20];
74 // Slide over the file
75 unsigned char *Start = 0;
76 unsigned char *End = 0;
77 unsigned char *Sum = Ck.Sums;
78 unsigned char *SumEnd = Sum + (Ck.FileSize + BlockSize-1)/BlockSize*20;
81 // Tail little bit of the file
82 if ((unsigned)(End - Start) < BlockSize)
84 unsigned char *OldEnd = End;
85 if (Wind.Extend(Start,End) == false)
88 // The file is very small, pretend this is the last block
89 if ((unsigned)(End - Start) < BlockSize && End != Start)
98 /* The last block is rather artifical but can be of use in some
99 cases. Just remember not to insert it into the hash
101 *(uint32_t *)Sum = htonl(0xDEADBEEF);
103 ComputeMD4Final(Sum+4,Start,OldEnd,OldEnd-Start);
104 MD5.Add(Start,OldEnd);
110 // Compute the checksums
111 MD5.Add(Start,Start+BlockSize);
112 *(uint32_t *)Sum = htonl(RollingChecksum(Start,Start+BlockSize));
114 ComputeMD4Final(Sum+4,Start,Start+BlockSize,BlockSize);
121 return _error->Error("Size Mismatch generating checksums");
123 MD5.Result().Value(OutMD5);
129 // RSyncMatch::RSyncMatch - Constructor /*{{{*/
130 // ---------------------------------------------------------------------
131 /* This generates the btree and hash table for looking up checksums */
132 RSyncMatch::RSyncMatch(dsFList::RSyncChecksum const &Ck) : Fast(1 << 16),
136 unsigned int Blocks = (Ck.FileSize + Ck.BlockSize-1)/Ck.BlockSize;
138 // Drop the last partial block from the hashing
143 // Setup the index table
144 Indexes = new uint32_t *[Blocks];
145 IndexesEnd = Indexes + Blocks;
147 // Ready the checksum pointers
148 unsigned char *Sum = Ck.Sums;
149 unsigned char *SumEnd = Sum + Blocks*20;
150 for (uint32_t **I = Indexes; Sum < SumEnd; Sum += 20)
152 *I++ = (uint32_t *)Sum;
156 qsort(Indexes,Blocks,sizeof(*Indexes),Sort);
158 // Generate the hash table
159 unsigned int Cur = 0;
160 Hashes[Cur] = Indexes;
161 for (uint32_t **I = Indexes; I != IndexesEnd; I++)
164 Fast.Set((**I) >> 16);
165 while (((**I) >> 24) > Cur)
169 Hashes[Cur++] = IndexesEnd;
171 for (unsigned int Cur = 1; Cur != 255; Cur++)
173 printf("%u %p %x\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur],**Hashes[Cur] >> 24);
177 // RSyncMatch::~RSyncMatch - Destructor /*{{{*/
178 // ---------------------------------------------------------------------
180 RSyncMatch::~RSyncMatch()
185 // RSyncMatch::Sort - QSort function /*{{{*/
186 // ---------------------------------------------------------------------
188 int RSyncMatch::Sort(const void *L,const void *R)
190 if (**(uint32_t **)L == **(uint32_t **)R)
192 if (**(uint32_t **)L > **(uint32_t **)R)
197 bool RSyncMatch::Scan(FileFd &Fd)
199 for (unsigned int Cur = 1; Cur != 256; Cur++)
201 printf("%u %p\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur]);