1 // -*- mode: cpp; mode: fold -*-
3 // $Id: strutl.cc,v 1.4 1999/10/24 06:53:12 jgg Exp $
4 /* ######################################################################
6 String Util - Some usefull string functions.
8 These have been collected from here and there to do all sorts of usefull
9 things to strings. They are usefull in file parsers, URI handlers and
10 especially in APT methods.
12 This source is placed in the Public Domain, do with it what you will
13 It was originally written by Jason Gunthorpe <jgg@gpu.srv.ualberta.ca>
15 ##################################################################### */
19 #pragma implementation "dsync/strutl.h"
22 #include <dsync/strutl.h>
23 #include <dsync/fileutl.h>
32 // strstrip - Remove white space from the front and back of a string /*{{{*/
33 // ---------------------------------------------------------------------
34 /* This is handy to use when parsing a file. It also removes \n's left
35 over from fgets and company */
36 char *_strstrip(char *String)
38 for (;*String != 0 && (*String == ' ' || *String == '\t'); String++);
43 char *End = String + strlen(String) - 1;
44 for (;End != String - 1 && (*End == ' ' || *End == '\t' || *End == '\n' ||
45 *End == '\r'); End--);
51 // strtabexpand - Converts tabs into 8 spaces /*{{{*/
52 // ---------------------------------------------------------------------
54 char *_strtabexpand(char *String,size_t Len)
56 for (char *I = String; I != I + Len && *I != 0; I++)
60 if (I + 8 > String + Len)
66 /* Assume the start of the string is 0 and find the next 8 char
72 Len = 8 - ((String - I) % 8);
80 memmove(I + Len,I + 1,strlen(I) + 1);
81 for (char *J = I; J + Len != I; *I = ' ', I++);
86 // ParseQuoteWord - Parse a single word out of a string /*{{{*/
87 // ---------------------------------------------------------------------
88 /* This grabs a single word, converts any % escaped characters to their
89 proper values and advances the pointer. Double quotes are understood
90 and striped out as well. This is for URI/URL parsing. */
91 bool ParseQuoteWord(const char *&String,string &Res)
93 // Skip leading whitespace
94 const char *C = String;
95 for (;*C != 0 && *C == ' '; C++);
99 // Jump to the next word
100 for (;*C != 0 && isspace(*C) == 0; C++)
104 for (C++;*C != 0 && *C != '"'; C++);
110 // Now de-quote characters
113 const char *Start = String;
115 for (I = Buffer; I < Buffer + sizeof(Buffer) && Start != C; I++)
117 if (*Start == '%' && Start + 2 < C)
122 *I = (char)strtol(Tmp,0,16);
135 // Skip ending white space
136 for (;*C != 0 && isspace(*C) != 0; C++);
141 // ParseCWord - Parses a string like a C "" expression /*{{{*/
142 // ---------------------------------------------------------------------
143 /* This expects a series of space seperated strings enclosed in ""'s.
144 It concatenates the ""'s into a single string. */
145 bool ParseCWord(const char *String,string &Res)
147 // Skip leading whitespace
148 const char *C = String;
149 for (;*C != 0 && *C == ' '; C++);
155 if (strlen(String) >= sizeof(Buffer))
162 for (C++; *C != 0 && *C != '"'; C++)
171 if (C != String && isspace(*C) != 0 && isspace(C[-1]) != 0)
173 if (isspace(*C) == 0)
182 // QuoteString - Convert a string into quoted from /*{{{*/
183 // ---------------------------------------------------------------------
185 string QuoteString(string Str,const char *Bad)
188 for (string::iterator I = Str.begin(); I != Str.end(); I++)
190 if (strchr(Bad,*I) != 0 || isprint(*I) == 0 ||
191 *I <= 0x20 || *I >= 0x7F)
194 sprintf(Buf,"%%%02x",(int)*I);
203 // DeQuoteString - Convert a string from quoted from /*{{{*/
204 // ---------------------------------------------------------------------
205 /* This undoes QuoteString */
206 string DeQuoteString(string Str)
209 for (string::iterator I = Str.begin(); I != Str.end(); I++)
211 if (*I == '%' && I + 2 < Str.end())
217 Res += (char)strtol(Tmp,0,16);
228 // SizeToStr - Convert a long into a human readable size /*{{{*/
229 // ---------------------------------------------------------------------
230 /* A max of 4 digits are shown before conversion to the next highest unit.
231 The max length of the string will be 5 chars unless the size is > 10
233 string SizeToStr(double Size)
242 /* bytes, KiloBytes, MegaBytes, GigaBytes, TeraBytes, PetaBytes,
243 ExaBytes, ZettaBytes, YottaBytes */
244 char Ext[] = {'\0','k','M','G','T','P','E','Z','Y'};
248 if (ASize < 100 && I != 0)
250 sprintf(S,"%.1f%c",ASize,Ext[I]);
256 sprintf(S,"%.0f%c",ASize,Ext[I]);
266 // TimeToStr - Convert the time into a string /*{{{*/
267 // ---------------------------------------------------------------------
268 /* Converts a number of seconds to a hms format */
269 string TimeToStr(unsigned long Sec)
277 sprintf(S,"%lid %lih%lim%lis",Sec/60/60/24,(Sec/60/60) % 24,(Sec/60) % 60,Sec % 60);
283 sprintf(S,"%lih%lim%lis",Sec/60/60,(Sec/60) % 60,Sec % 60);
289 sprintf(S,"%lim%lis",Sec/60,Sec % 60);
293 sprintf(S,"%lis",Sec);
300 // SubstVar - Substitute a string for another string /*{{{*/
301 // ---------------------------------------------------------------------
302 /* This replaces all occurances of Subst with Contents in Str. */
303 string SubstVar(string Str,string Subst,string Contents)
305 string::size_type Pos = 0;
306 string::size_type OldPos = 0;
309 while (OldPos < Str.length() &&
310 (Pos = Str.find(Subst,OldPos)) != string::npos)
312 Temp += string(Str,OldPos,Pos) + Contents;
313 OldPos = Pos + Subst.length();
319 return Temp + string(Str,OldPos);
322 // URItoFileName - Convert the uri into a unique file name /*{{{*/
323 // ---------------------------------------------------------------------
324 /* This converts a URI into a safe filename. It quotes all unsafe characters
325 and converts / to _ and removes the scheme identifier. The resulting
326 file name should be unique and never occur again for a different file */
327 string URItoFileName(string URI)
329 // Nuke 'sensitive' items
332 U.Password = string();
335 // "\x00-\x20{}|\\\\^\\[\\]<>\"\x7F-\xFF";
336 URI = QuoteString(U,"\\|{}[]<>\"^~_=!@#$%^&*");
337 string::iterator J = URI.begin();
338 for (; J != URI.end(); J++)
344 // Base64Encode - Base64 Encoding routine for short strings /*{{{*/
345 // ---------------------------------------------------------------------
346 /* This routine performs a base64 transformation on a string. It was ripped
347 from wget and then patched and bug fixed.
349 This spec can be found in rfc2045 */
350 string Base64Encode(string S)
353 static char tbl[64] = {'A','B','C','D','E','F','G','H',
354 'I','J','K','L','M','N','O','P',
355 'Q','R','S','T','U','V','W','X',
356 'Y','Z','a','b','c','d','e','f',
357 'g','h','i','j','k','l','m','n',
358 'o','p','q','r','s','t','u','v',
359 'w','x','y','z','0','1','2','3',
360 '4','5','6','7','8','9','+','/'};
362 // Pre-allocate some space
364 Final.reserve((4*S.length() + 2)/3 + 2);
366 /* Transform the 3x8 bits to 4x6 bits, as required by
368 for (string::const_iterator I = S.begin(); I < S.end(); I += 3)
370 char Bits[3] = {0,0,0};
377 Final += tbl[Bits[0] >> 2];
378 Final += tbl[((Bits[0] & 3) << 4) + (Bits[1] >> 4)];
380 if (I + 1 >= S.end())
383 Final += tbl[((Bits[1] & 0xf) << 2) + (Bits[2] >> 6)];
385 if (I + 2 >= S.end())
388 Final += tbl[Bits[2] & 0x3f];
391 /* Apply the padding elements, this tells how many bytes the remote
392 end should discard */
393 if (S.length() % 3 == 2)
395 if (S.length() % 3 == 1)
401 // stringcmp - Arbitary string compare /*{{{*/
402 // ---------------------------------------------------------------------
403 /* This safely compares two non-null terminated strings of arbitary
405 int stringcmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
407 for (; A != AEnd && B != BEnd; A++, B++)
411 if (A == AEnd && B == BEnd)
422 // stringcasecmp - Arbitary case insensitive string compare /*{{{*/
423 // ---------------------------------------------------------------------
425 int stringcasecmp(const char *A,const char *AEnd,const char *B,const char *BEnd)
427 for (; A != AEnd && B != BEnd; A++, B++)
428 if (toupper(*A) != toupper(*B))
431 if (A == AEnd && B == BEnd)
437 if (toupper(*A) < toupper(*B))
442 // LookupTag - Lookup the value of a tag in a taged string /*{{{*/
443 // ---------------------------------------------------------------------
444 /* The format is like those used in package files and the method
445 communication system */
446 string LookupTag(string Message,const char *Tag,const char *Default)
448 // Look for a matching tag.
449 int Length = strlen(Tag);
450 for (string::iterator I = Message.begin(); I + Length < Message.end(); I++)
453 const char *i = Message.c_str() + (I - Message.begin());
454 if (I[Length] == ':' && stringcasecmp(i,i+Length,Tag) == 0)
456 // Find the end of line and strip the leading/trailing spaces
459 for (; isspace(*I) != 0 && I < Message.end(); I++);
460 for (J = I; *J != '\n' && J < Message.end(); J++);
461 for (; J > I && isspace(J[-1]) != 0; J--);
463 return string(i,J-I);
466 for (; *I != '\n' && I < Message.end(); I++);
469 // Failed to find a match
475 // StringToBool - Converts a string into a boolean /*{{{*/
476 // ---------------------------------------------------------------------
477 /* This inspects the string to see if it is true or if it is false and
478 then returns the result. Several varients on true/false are checked. */
479 int StringToBool(string Text,int Default)
482 int Res = strtol(Text.c_str(),&End,0);
483 if (End != Text.c_str() && Res >= 0 && Res <= 1)
486 // Check for positives
487 if (strcasecmp(Text.c_str(),"no") == 0 ||
488 strcasecmp(Text.c_str(),"false") == 0 ||
489 strcasecmp(Text.c_str(),"without") == 0 ||
490 strcasecmp(Text.c_str(),"off") == 0 ||
491 strcasecmp(Text.c_str(),"disable") == 0)
494 // Check for negatives
495 if (strcasecmp(Text.c_str(),"yes") == 0 ||
496 strcasecmp(Text.c_str(),"true") == 0 ||
497 strcasecmp(Text.c_str(),"with") == 0 ||
498 strcasecmp(Text.c_str(),"on") == 0 ||
499 strcasecmp(Text.c_str(),"enable") == 0)
505 // TimeRFC1123 - Convert a time_t into RFC1123 format /*{{{*/
506 // ---------------------------------------------------------------------
507 /* This converts a time_t into a string time representation that is
508 year 2000 complient and timezone neutral */
509 string TimeRFC1123(time_t Date)
511 struct tm Conv = *gmtime(&Date);
514 const char *Day[] = {"Sun","Mon","Tue","Wed","Thu","Fri","Sat"};
515 const char *Month[] = {"Jan","Feb","Mar","Apr","May","Jun","Jul",
516 "Aug","Sep","Oct","Nov","Dec"};
518 sprintf(Buf,"%s, %02i %s %i %02i:%02i:%02i GMT",Day[Conv.tm_wday],
519 Conv.tm_mday,Month[Conv.tm_mon],Conv.tm_year+1900,Conv.tm_hour,
520 Conv.tm_min,Conv.tm_sec);
524 // ReadMessages - Read messages from the FD /*{{{*/
525 // ---------------------------------------------------------------------
526 /* This pulls full messages from the input FD into the message buffer.
527 It assumes that messages will not pause during transit so no
528 fancy buffering is used. */
529 bool ReadMessages(int Fd, vector<string> &List)
536 int Res = read(Fd,End,sizeof(Buffer) - (End-Buffer));
537 if (Res < 0 && errno == EINTR)
540 // Process is dead, this is kind of bad..
550 // Look for the end of the message
551 for (char *I = Buffer; I + 1 < End; I++)
553 if (I[0] != '\n' || I[1] != '\n')
556 // Pull the message out
557 string Message(Buffer,0,I-Buffer);
560 for (; I < End && *I == '\n'; I++);
562 memmove(Buffer,I,End-Buffer);
565 List.push_back(Message);
570 if (WaitFd(Fd) == false)
575 // MonthConv - Converts a month string into a number /*{{{*/
576 // ---------------------------------------------------------------------
577 /* This was lifted from the boa webserver which lifted it from 'wn-v1.07'
578 Made it a bit more robust with a few touppers though. */
579 static int MonthConv(char *Month)
581 switch (toupper(*Month))
584 return toupper(Month[1]) == 'P'?3:7;
590 if (toupper(Month[1]) == 'A')
592 return toupper(Month[2]) == 'N'?5:6;
594 return toupper(Month[2]) == 'R'?2:4;
602 // Pretend it is January..
608 // timegm - Internal timegm function if gnu is not available /*{{{*/
609 // ---------------------------------------------------------------------
610 /* Ripped this evil little function from wget - I prefer the use of
611 GNU timegm if possible as this technique will have interesting problems
612 with leap seconds, timezones and other.
614 Converts struct tm to time_t, assuming the data in tm is UTC rather
615 than local timezone (mktime assumes the latter).
617 Contributed by Roger Beeman <beeman@cisco.com>, with the help of
618 Mark Baushke <mdb@cisco.com> and the rest of the Gurus at CISCO. */
619 #ifndef __USE_MISC // glib sets this
620 static time_t timegm(struct tm *t)
627 tb = mktime (gmtime (&tl));
628 return (tl <= tb ? (tl + (tl - tb)) : (tl - (tb - tl)));
632 // StrToTime - Converts a string into a time_t /*{{{*/
633 // ---------------------------------------------------------------------
634 /* This handles all 3 populare time formats including RFC 1123, RFC 1036
635 and the C library asctime format. It requires the GNU library function
636 'timegm' to convert a struct tm in UTC to a time_t. For some bizzar
637 reason the C library does not provide any such function :<*/
638 bool StrToTime(string Val,time_t &Result)
642 const char *I = Val.c_str();
644 // Skip the day of the week
645 for (;*I != 0 && *I != ' '; I++);
647 // Handle RFC 1123 time
648 if (sscanf(I," %d %3s %d %d:%d:%d GMT",&Tm.tm_mday,Month,&Tm.tm_year,
649 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) != 6)
651 // Handle RFC 1036 time
652 if (sscanf(I," %d-%3s-%d %d:%d:%d GMT",&Tm.tm_mday,Month,
653 &Tm.tm_year,&Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec) == 6)
658 if (sscanf(I," %3s %d %d:%d:%d %d",Month,&Tm.tm_mday,
659 &Tm.tm_hour,&Tm.tm_min,&Tm.tm_sec,&Tm.tm_year) != 6)
665 Tm.tm_mon = MonthConv(Month);
668 // Convert to local time and then to GMT
669 Result = timegm(&Tm);
673 // StrToNum - Convert a fixed length string to a number /*{{{*/
674 // ---------------------------------------------------------------------
675 /* This is used in decoding the crazy fixed length string headers in
677 bool StrToNum(const char *Str,unsigned long &Res,unsigned Len,unsigned Base)
680 if (Len >= sizeof(S))
685 // All spaces is a zero
688 for (I = 0; S[I] == ' '; I++);
693 Res = strtoul(S,&End,Base);
700 // HexDigit - Convert a hex character into an integer /*{{{*/
701 // ---------------------------------------------------------------------
702 /* Helper for Hex2Num */
703 static int HexDigit(int c)
705 if (c >= '0' && c <= '9')
707 if (c >= 'a' && c <= 'f')
709 if (c >= 'A' && c <= 'F')
714 // Hex2Num - Convert a long hex number into a buffer /*{{{*/
715 // ---------------------------------------------------------------------
716 /* The length of the buffer must be exactly 1/2 the length of the string. */
717 bool Hex2Num(const char *Start,const char *End,unsigned char *Num,
720 if (End - Start != (signed)(Length*2))
723 // Convert each digit. We store it in the same order as the string
725 for (const char *I = Start; I < End;J++, I += 2)
727 if (isxdigit(*I) == 0 || isxdigit(I[1]) == 0)
730 Num[J] = HexDigit(I[0]) << 4;
731 Num[J] += HexDigit(I[1]);
738 // URI::CopyFrom - Copy from an object /*{{{*/
739 // ---------------------------------------------------------------------
740 /* This parses the URI into all of its components */
741 void URI::CopyFrom(string U)
743 string::const_iterator I = U.begin();
745 // Locate the first colon, this seperates the scheme
746 for (; I < U.end() && *I != ':' ; I++);
747 string::const_iterator FirstColon = I;
749 /* Determine if this is a host type URI with a leading double //
750 and then search for the first single / */
751 string::const_iterator SingleSlash = I;
752 if (I + 3 < U.end() && I[1] == '/' && I[2] == '/')
754 for (; SingleSlash < U.end() && *SingleSlash != '/'; SingleSlash++);
755 if (SingleSlash > U.end())
756 SingleSlash = U.end();
758 // We can now write the access and path specifiers
759 Access = string(U,0,FirstColon - U.begin());
760 if (SingleSlash != U.end())
761 Path = string(U,SingleSlash - U.begin());
762 if (Path.empty() == true)
765 // Now we attempt to locate a user:pass@host fragment
766 if (FirstColon[1] == '/' && FirstColon[2] == '/')
770 if (FirstColon >= U.end())
773 if (FirstColon > SingleSlash)
774 FirstColon = SingleSlash;
780 for (; I < SingleSlash && *I != ':'; I++);
781 string::const_iterator SecondColon = I;
783 // Search for the @ after the colon
784 for (; I < SingleSlash && *I != '@'; I++);
785 string::const_iterator At = I;
787 // Now write the host and user/pass
788 if (At == SingleSlash)
790 if (FirstColon < SingleSlash)
791 Host = string(U,FirstColon - U.begin(),SingleSlash - FirstColon);
795 Host = string(U,At - U.begin() + 1,SingleSlash - At - 1);
796 User = string(U,FirstColon - U.begin(),SecondColon - FirstColon);
797 if (SecondColon < At)
798 Password = string(U,SecondColon - U.begin() + 1,At - SecondColon - 1);
801 // Now we parse off a port number from the hostname
803 string::size_type Pos = Host.rfind(':');
804 if (Pos == string::npos)
807 Port = atoi(string(Host,Pos+1).c_str());
808 Host = string(Host,0,Pos);
811 // URI::operator string - Convert the URI to a string /*{{{*/
812 // ---------------------------------------------------------------------
814 URI::operator string()
818 if (Access.empty() == false)
821 if (Host.empty() == false)
823 if (Access.empty() == false)
826 if (User.empty() == false)
829 if (Password.empty() == false)
830 Res += ":" + Password;
838 sprintf(S,":%u",Port);
843 if (Path.empty() == false)