X-Git-Url: https://git.decadent.org.uk/gitweb/?a=blobdiff_plain;f=tools%2Fdsync-0.0%2Flibdsync%2Frsync-algo.cc;fp=tools%2Fdsync-0.0%2Flibdsync%2Frsync-algo.cc;h=7b513b3203831758436c95bd1df49b67489b3ec0;hb=824e90cf1466f3d62db08f27b865ef8e301a9ae9;hp=0000000000000000000000000000000000000000;hpb=f9d876eacbc4c4c5a57f862b8d3a46443aad5da7;p=dak.git diff --git a/tools/dsync-0.0/libdsync/rsync-algo.cc b/tools/dsync-0.0/libdsync/rsync-algo.cc new file mode 100644 index 00000000..7b513b32 --- /dev/null +++ b/tools/dsync-0.0/libdsync/rsync-algo.cc @@ -0,0 +1,205 @@ +// -*- mode: cpp; mode: fold -*- +// Description /*{{{*/ +// $Id: rsync-algo.cc,v 1.3 1999/12/26 06:59:01 jgg Exp $ +/* ###################################################################### + + RSync Algorithrim + + The RSync algorithim is attributed to Andrew Tridgell and is a means + for matching blocks between two streams. The algorithrim implemented + here differs slightly in its structure and is carefully optimized to be + able to operate on very large files effectively. + + We rely on the RSync rolling weak checksum routine and the MD4 strong + checksum routine. This implementation requires a uniform block size + for each run. + + ##################################################################### */ + /*}}}*/ +// Include files /*{{{*/ +#ifdef __GNUG__ +#pragma implementation "dsync/rsync-algo.h" +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include + /*}}}*/ + +// RollingChecksum - Compute the checksum perscribed by rsync /*{{{*/ +// --------------------------------------------------------------------- +/* */ +static inline unsigned long RollingChecksum(unsigned char *Start, + unsigned char *End) +{ + unsigned long A = 0; + unsigned long B = 0; + + /* A = sum(X[i],j,k) B = sum((k-j+1)*X[i],j,k); + Which reduces to the recurrence, B = sum(A[I],j,k); */ + for (; Start != End; Start++) + { + A += *Start; + B += A; + } + + return (A & 0xFFFF) | (B << 16); +} + /*}}}*/ +// GenerateRSync - Compute the rsync blocks for a file /*{{{*/ +// --------------------------------------------------------------------- +/* This function generates the RSync checksums for each uniform block in + the file. */ +bool GenerateRSync(FileFd &Fd,dsFList::RSyncChecksum &Ck, + unsigned char OutMD5[16], + unsigned long BlockSize) +{ + SlidingWindow Wind(Fd); + MD5Summation MD5; + + Ck.Tag = dsFList::tRSyncChecksum; + Ck.BlockSize = BlockSize; + Ck.FileSize = Fd.Size(); + + // Allocate sum storage space + delete [] Ck.Sums; + Ck.Sums = new unsigned char[(Ck.FileSize + BlockSize-1)/BlockSize*20]; + + // Slide over the file + unsigned char *Start = 0; + unsigned char *End = 0; + unsigned char *Sum = Ck.Sums; + unsigned char *SumEnd = Sum + (Ck.FileSize + BlockSize-1)/BlockSize*20; + while (Sum < SumEnd) + { + // Tail little bit of the file + if ((unsigned)(End - Start) < BlockSize) + { + unsigned char *OldEnd = End; + if (Wind.Extend(Start,End) == false) + return false; + + // The file is very small, pretend this is the last block + if ((unsigned)(End - Start) < BlockSize && End != Start) + { + OldEnd = End; + End = Start; + } + + // All Done + if (Start == End) + { + /* The last block is rather artifical but can be of use in some + cases. Just remember not to insert it into the hash + search table!! */ + *(uint32_t *)Sum = htonl(0xDEADBEEF); + InitMD4(Sum+4); + ComputeMD4Final(Sum+4,Start,OldEnd,OldEnd-Start); + MD5.Add(Start,OldEnd); + Sum += 20; + break; + } + } + + // Compute the checksums + MD5.Add(Start,Start+BlockSize); + *(uint32_t *)Sum = htonl(RollingChecksum(Start,Start+BlockSize)); + InitMD4(Sum+4); + ComputeMD4Final(Sum+4,Start,Start+BlockSize,BlockSize); + Sum += 20; + + Start += BlockSize; + } + + if (Sum != SumEnd) + return _error->Error("Size Mismatch generating checksums"); + + MD5.Result().Value(OutMD5); + + return true; +} + /*}}}*/ + +// RSyncMatch::RSyncMatch - Constructor /*{{{*/ +// --------------------------------------------------------------------- +/* This generates the btree and hash table for looking up checksums */ +RSyncMatch::RSyncMatch(dsFList::RSyncChecksum const &Ck) : Fast(1 << 16), + Ck(Ck) +{ + Indexes = 0; + unsigned int Blocks = (Ck.FileSize + Ck.BlockSize-1)/Ck.BlockSize; + + // Drop the last partial block from the hashing + if (Blocks < 3) + return; + Blocks--; + + // Setup the index table + Indexes = new uint32_t *[Blocks]; + IndexesEnd = Indexes + Blocks; + + // Ready the checksum pointers + unsigned char *Sum = Ck.Sums; + unsigned char *SumEnd = Sum + Blocks*20; + for (uint32_t **I = Indexes; Sum < SumEnd; Sum += 20) + { + *I++ = (uint32_t *)Sum; + } + + // Sort them + qsort(Indexes,Blocks,sizeof(*Indexes),Sort); + + // Generate the hash table + unsigned int Cur = 0; + Hashes[Cur] = Indexes; + for (uint32_t **I = Indexes; I != IndexesEnd; I++) + { + printf("%x\n",**I); + Fast.Set((**I) >> 16); + while (((**I) >> 24) > Cur) + Hashes[Cur++] = I; + } + while (Cur <= 256) + Hashes[Cur++] = IndexesEnd; + + for (unsigned int Cur = 1; Cur != 255; Cur++) + { + printf("%u %p %x\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur],**Hashes[Cur] >> 24); + } +} + /*}}}*/ +// RSyncMatch::~RSyncMatch - Destructor /*{{{*/ +// --------------------------------------------------------------------- +/* */ +RSyncMatch::~RSyncMatch() +{ + delete [] Indexes; +} + /*}}}*/ +// RSyncMatch::Sort - QSort function /*{{{*/ +// --------------------------------------------------------------------- +/* */ +int RSyncMatch::Sort(const void *L,const void *R) +{ + if (**(uint32_t **)L == **(uint32_t **)R) + return 0; + if (**(uint32_t **)L > **(uint32_t **)R) + return 1; + return -1; +} + /*}}}*/ +bool RSyncMatch::Scan(FileFd &Fd) +{ + for (unsigned int Cur = 1; Cur != 256; Cur++) + { + printf("%u %p\n",Hashes[Cur] - Hashes[Cur-1],Hashes[Cur]); + } + + return true; +}