]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/xul/staff_client/external/libmar/tool/bsdiff.c
ea384bbdc11f2383b9ef513f36fb01f14c39e828
[working/Evergreen.git] / Open-ILS / xul / staff_client / external / libmar / tool / bsdiff.c
1 /*-
2  * Copyright 2003-2005 Colin Percival
3  * All rights reserved
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted providing that the following conditions 
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
18  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26
27 /*
28  * Modified 18 March 2013 by Jason Stephenson <jason@sigio.com>.
29  *
30  * Modifications to make it compatible with the output of Mozilla's mbsdiff:
31  *
32  * Switch from 64 bit off_t to 32 bit int32_t.
33  * Don't bzip2 the output blocks.
34  * Use the modified header that Mozilla's bspatch expects.
35  * Add crc32 of the original file to the header.
36  * Use 32-bit ftell and fseek instead of ftello and fseeko.
37  */
38
39 #include <sys/types.h>
40 #include <err.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <netinet/in.h>
46 #include <unistd.h>
47
48 /* Calculate the crc32. */
49 extern uint32_t
50 crc32(const unsigned char *buf, uint32_t len);
51
52 #define MIN(x,y) (((x)<(y)) ? (x) : (y))
53
54 static void split(int32_t *I,int32_t *V,int32_t start,int32_t len,int32_t h)
55 {
56         int32_t i,j,k,x,tmp,jj,kk;
57
58         if(len<16) {
59                 for(k=start;k<start+len;k+=j) {
60                         j=1;x=V[I[k]+h];
61                         for(i=1;k+i<start+len;i++) {
62                                 if(V[I[k+i]+h]<x) {
63                                         x=V[I[k+i]+h];
64                                         j=0;
65                                 };
66                                 if(V[I[k+i]+h]==x) {
67                                         tmp=I[k+j];I[k+j]=I[k+i];I[k+i]=tmp;
68                                         j++;
69                                 };
70                         };
71                         for(i=0;i<j;i++) V[I[k+i]]=k+j-1;
72                         if(j==1) I[k]=-1;
73                 };
74                 return;
75         };
76
77         x=V[I[start+len/2]+h];
78         jj=0;kk=0;
79         for(i=start;i<start+len;i++) {
80                 if(V[I[i]+h]<x) jj++;
81                 if(V[I[i]+h]==x) kk++;
82         };
83         jj+=start;kk+=jj;
84
85         i=start;j=0;k=0;
86         while(i<jj) {
87                 if(V[I[i]+h]<x) {
88                         i++;
89                 } else if(V[I[i]+h]==x) {
90                         tmp=I[i];I[i]=I[jj+j];I[jj+j]=tmp;
91                         j++;
92                 } else {
93                         tmp=I[i];I[i]=I[kk+k];I[kk+k]=tmp;
94                         k++;
95                 };
96         };
97
98         while(jj+j<kk) {
99                 if(V[I[jj+j]+h]==x) {
100                         j++;
101                 } else {
102                         tmp=I[jj+j];I[jj+j]=I[kk+k];I[kk+k]=tmp;
103                         k++;
104                 };
105         };
106
107         if(jj>start) split(I,V,start,jj-start,h);
108
109         for(i=0;i<kk-jj;i++) V[I[jj+i]]=kk-1;
110         if(jj==kk-1) I[jj]=-1;
111
112         if(start+len>kk) split(I,V,kk,start+len-kk,h);
113 }
114
115 static void qsufsort(int32_t *I,int32_t *V,u_char *old,int32_t oldsize)
116 {
117         int32_t buckets[256];
118         int32_t i,h,len;
119
120         for(i=0;i<256;i++) buckets[i]=0;
121         for(i=0;i<oldsize;i++) buckets[old[i]]++;
122         for(i=1;i<256;i++) buckets[i]+=buckets[i-1];
123         for(i=255;i>0;i--) buckets[i]=buckets[i-1];
124         buckets[0]=0;
125
126         for(i=0;i<oldsize;i++) I[++buckets[old[i]]]=i;
127         I[0]=oldsize;
128         for(i=0;i<oldsize;i++) V[i]=buckets[old[i]];
129         V[oldsize]=0;
130         for(i=1;i<256;i++) if(buckets[i]==buckets[i-1]+1) I[buckets[i]]=-1;
131         I[0]=-1;
132
133         for(h=1;I[0]!=-(oldsize+1);h+=h) {
134                 len=0;
135                 for(i=0;i<oldsize+1;) {
136                         if(I[i]<0) {
137                                 len-=I[i];
138                                 i-=I[i];
139                         } else {
140                                 if(len) I[i-len]=-len;
141                                 len=V[I[i]]+1-i;
142                                 split(I,V,i,len,h);
143                                 i+=len;
144                                 len=0;
145                         };
146                 };
147                 if(len) I[i-len]=-len;
148         };
149
150         for(i=0;i<oldsize+1;i++) I[V[i]]=i;
151 }
152
153 static int32_t matchlen(u_char *old,int32_t oldsize,u_char *new,int32_t newsize)
154 {
155         int32_t i;
156
157         for(i=0;(i<oldsize)&&(i<newsize);i++)
158                 if(old[i]!=new[i]) break;
159
160         return i;
161 }
162
163 static int32_t search(int32_t *I,u_char *old,int32_t oldsize,
164                 u_char *new,int32_t newsize,int32_t st,int32_t en,int32_t *pos)
165 {
166         int32_t x,y;
167
168         if(en-st<2) {
169                 x=matchlen(old+I[st],oldsize-I[st],new,newsize);
170                 y=matchlen(old+I[en],oldsize-I[en],new,newsize);
171
172                 if(x>y) {
173                         *pos=I[st];
174                         return x;
175                 } else {
176                         *pos=I[en];
177                         return y;
178                 }
179         };
180
181         x=st+(en-st)/2;
182         if(memcmp(old+I[x],new,MIN(oldsize-I[x],newsize))<0) {
183                 return search(I,old,oldsize,new,newsize,x,en,pos);
184         } else {
185                 return search(I,old,oldsize,new,newsize,st,x,pos);
186         };
187 }
188
189 int main(int argc,char *argv[])
190 {
191         int fd;
192         u_char *old,*new;
193         int32_t oldsize,newsize;
194         int32_t *I,*V;
195         int32_t scan,pos,len;
196         int32_t lastscan,lastpos,lastoffset;
197         int32_t oldscore,scsc;
198         int32_t s,Sf,lenf,Sb,lenb;
199         int32_t overlap,Ss,lens;
200         int32_t i;
201         int32_t dblen,eblen;
202         u_char *db,*eb;
203         u_char header[32];
204         FILE * pf;
205         uint32_t crcVal;
206         int32_t outVal; /* place holder for result of htonl */
207
208         if(argc!=4) errx(1,"usage: %s oldfile newfile patchfile\n",argv[0]);
209
210         /* Allocate oldsize+1 bytes instead of oldsize bytes to ensure
211                 that we never try to malloc(0) and get a NULL pointer */
212         if(((fd=open(argv[1],O_RDONLY,0))<0) ||
213                 ((oldsize=lseek(fd,0,SEEK_END))==-1) ||
214                 ((old=malloc(oldsize+1))==NULL) ||
215                 (lseek(fd,0,SEEK_SET)!=0) ||
216                 (read(fd,old,oldsize)!=oldsize) ||
217                 (close(fd)==-1)) err(1,"%s",argv[1]);
218
219         if(((I=malloc((oldsize+1)*sizeof(int32_t)))==NULL) ||
220                 ((V=malloc((oldsize+1)*sizeof(int32_t)))==NULL)) err(1,NULL);
221
222         /* Calculate the old file's crc32 value. */
223         crcVal = crc32(old, oldsize);
224
225         qsufsort(I,V,old,oldsize);
226
227         free(V);
228
229         /* Allocate newsize+1 bytes instead of newsize bytes to ensure
230                 that we never try to malloc(0) and get a NULL pointer */
231         if(((fd=open(argv[2],O_RDONLY,0))<0) ||
232                 ((newsize=lseek(fd,0,SEEK_END))==-1) ||
233                 ((new=malloc(newsize+1))==NULL) ||
234                 (lseek(fd,0,SEEK_SET)!=0) ||
235                 (read(fd,new,newsize)!=newsize) ||
236                 (close(fd)==-1)) err(1,"%s",argv[2]);
237
238         if(((db=malloc(newsize+1))==NULL) ||
239                 ((eb=malloc(newsize+1))==NULL)) err(1,NULL);
240         dblen=0;
241         eblen=0;
242
243         /* Create the patch file */
244         if ((pf = fopen(argv[3], "w")) == NULL)
245                 err(1, "%s", argv[3]);
246
247         /* This is the modified header used by Mozilla's hacked bspatch. It
248          * doesn't actually bzip2 anything despite what the documentation on
249          * the wiki implies. */
250         /* Header is
251                 0       8        "MBDIFF10"
252     8 4  length of the file to be patched
253     12 4 crc32 of file to be patched
254     16 4 length of the new file
255                 20 4 length of ctrl block
256                 24 4 length of diff block
257     28 4 length of extra block
258         /* File is
259                 0       32      Header
260                 32      ??      ctrl block
261                 ??      ??      diff block
262                 ??      ??      extra block */
263         memcpy(header, "MBDIFF10", 8);
264         outVal = htonl(oldsize);
265         memcpy(header + 8, &outVal, 4);
266         outVal = htonl(crcVal);
267         memcpy(header + 12, &outVal, 4);
268         outVal = htonl(newsize);
269         memcpy(header + 16, &outVal, 4);
270         memcpy(header + 20, 0, 12);
271         if (fwrite(header, 32, 1, pf) != 1)
272                 err(1, "fwrite(%s)", argv[3]);
273
274         /* Compute the differences, writing ctrl as we go */
275         scan=0;len=0;
276         lastscan=0;lastpos=0;lastoffset=0;
277         while(scan<newsize) {
278                 oldscore=0;
279
280                 for(scsc=scan+=len;scan<newsize;scan++) {
281                         len=search(I,old,oldsize,new+scan,newsize-scan,
282                                         0,oldsize,&pos);
283
284                         for(;scsc<scan+len;scsc++)
285                         if((scsc+lastoffset<oldsize) &&
286                                 (old[scsc+lastoffset] == new[scsc]))
287                                 oldscore++;
288
289                         if(((len==oldscore) && (len!=0)) || 
290                                 (len>oldscore+8)) break;
291
292                         if((scan+lastoffset<oldsize) &&
293                                 (old[scan+lastoffset] == new[scan]))
294                                 oldscore--;
295                 };
296
297                 if((len!=oldscore) || (scan==newsize)) {
298                         s=0;Sf=0;lenf=0;
299                         for(i=0;(lastscan+i<scan)&&(lastpos+i<oldsize);) {
300                                 if(old[lastpos+i]==new[lastscan+i]) s++;
301                                 i++;
302                                 if(s*2-i>Sf*2-lenf) { Sf=s; lenf=i; };
303                         };
304
305                         lenb=0;
306                         if(scan<newsize) {
307                                 s=0;Sb=0;
308                                 for(i=1;(scan>=lastscan+i)&&(pos>=i);i++) {
309                                         if(old[pos-i]==new[scan-i]) s++;
310                                         if(s*2-i>Sb*2-lenb) { Sb=s; lenb=i; };
311                                 };
312                         };
313
314                         if(lastscan+lenf>scan-lenb) {
315                                 overlap=(lastscan+lenf)-(scan-lenb);
316                                 s=0;Ss=0;lens=0;
317                                 for(i=0;i<overlap;i++) {
318                                         if(new[lastscan+lenf-overlap+i]==
319                                            old[lastpos+lenf-overlap+i]) s++;
320                                         if(new[scan-lenb+i]==
321                                            old[pos-lenb+i]) s--;
322                                         if(s>Ss) { Ss=s; lens=i+1; };
323                                 };
324
325                                 lenf+=lens-overlap;
326                                 lenb-=lens;
327                         };
328
329                         for(i=0;i<lenf;i++)
330                                 db[dblen+i]=new[lastscan+i]-old[lastpos+i];
331                         for(i=0;i<(scan-lenb)-(lastscan+lenf);i++)
332                                 eb[eblen+i]=new[lastscan+lenf+i];
333
334                         dblen+=lenf;
335                         eblen+=(scan-lenb)-(lastscan+lenf);
336
337                         outVal = htonl(lenf);
338                         if (fwrite(&outVal, 4, 1, pf) != 1)
339                                 err(1, "fwrite(%s)", argv[3]);
340
341                         outVal = htonl((scan-lenb)-(lastscan+lenf));
342                         if (fwrite(&outVal, 4, 1, pf) != 1)
343                                 err(1, "fwrite(%s)", argv[3]);
344
345                         outVal = htonl((pos-lenb)-(lastpos+lenf));
346                         if (fwrite(&outVal, 4, 1, pf) != 1)
347                                 err(1, "fwrite(%s)", argv[3]);
348
349                         lastscan=scan-lenb;
350                         lastpos=pos-lenb;
351                         lastoffset=pos-scan;
352                 };
353         };
354
355         /* Compute size of ctrl data */
356         if ((len = ftell(pf)) == -1)
357                 err(1, "ftell");
358         outVal = htonl(len - 32);
359         memcpy(header + 20, &outVal, 4);
360
361         /* Write the diff data */
362         if (fwrite(db, 1, dblen, pf) != dblen)
363                 err(1, "fwrite(%s)", argv[3]);
364
365         /* Size of diff data */
366         outVal = htonl(dblen);
367         memcpy(header + 24, &outVal, 4);
368
369         /* Write extra data */
370         if (fwrite(eb, 1, eblen, pf) != eblen)
371                 err(1, "fwrite(%s)", argv[3]);
372
373         /* Size of extra data */
374         outVal = htonl(eblen);
375         memcpy(header + 28, &outVal, 4);
376
377         /* Seek to the beginning, write the header, and close the file */
378         if (fseek(pf, 0, SEEK_SET))
379                 err(1, "fseek");
380         if (fwrite(header, 32, 1, pf) != 1)
381                 err(1, "fwrite(%s)", argv[3]);
382         if (fclose(pf))
383                 err(1, "fclose");
384
385         /* Free the memory we used */
386         free(db);
387         free(eb);
388         free(I);
389         free(old);
390         free(new);
391
392         return 0;
393 }