00001
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "vul_url.h"
00015 #include <vcl_cstdio.h>
00016 #include <vcl_cstring.h>
00017 #include <vcl_cstdlib.h>
00018 #include <vcl_sstream.h>
00019 #include <vcl_cassert.h>
00020 #include <vcl_fstream.h>
00021 #include <vul/vul_file.h>
00022
00023 #if defined(unix) || defined(__unix) || defined(__unix__)
00024
00025 # include <unistd.h>
00026 # include <netdb.h>
00027 # include <sys/socket.h>
00028 # include <netinet/in.h>
00029 # ifdef __alpha
00030 # include <fp.h>
00031 # endif
00032 # define SOCKET int
00033
00034 #elif defined (VCL_WIN32) && !defined(__CYGWIN__)
00035
00036 # include <winsock2.h>
00037
00038 #endif // unix
00039
00040 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00041
00042 static int called_WSAStartup = 0;
00043 #endif
00044
00045
00046 vcl_istream * vul_http_open(char const *url)
00047 {
00048
00049 vcl_string host;
00050 vcl_string path;
00051 vcl_string auth;
00052 int port = 80;
00053
00054
00055 assert (vcl_strncmp(url, "http://", 7) == 0);
00056
00057 char const *p = url + 7;
00058 while (*p && *p!='/')
00059 ++ p;
00060 host = vcl_string(url+7, p);
00061
00062
00063 if (*p)
00064 path = p+1;
00065 else
00066 path = "";
00067
00068
00069 for (unsigned int i=0; i<host.size(); ++i)
00070 if (host[i] == '@') {
00071 auth = vcl_string(host.c_str(), host.c_str()+i);
00072 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00073 break;
00074 }
00075
00076
00077 for (unsigned int i=host.size()-1; i>0; --i)
00078 if (host[i] == ':') {
00079 port = vcl_atoi(host.c_str() + i + 1);
00080 host = vcl_string(host.c_str(), host.c_str() + i);
00081 break;
00082 }
00083
00084
00085 unsigned k =0;
00086 while (k < path.size())
00087 {
00088 if (path[k] == ' ')
00089 path.replace(k, 1, "%20");
00090 else if (path[k] == '%')
00091 path.replace(k, 1, "%25");
00092 k++;
00093 }
00094
00095
00096 #ifdef DEBUG
00097 vcl_cerr << "auth = \'" << auth << "\'\n"
00098 << "host = \'" << host << "\'\n"
00099 << "path = \'" << path << "\'\n"
00100 << "port = " << port << vcl_endl;
00101 #endif
00102
00103 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00104 if (called_WSAStartup==0)
00105 {
00106 WORD wVersionRequested;
00107 WSADATA wsaData;
00108
00109 wVersionRequested = MAKEWORD( 2, 2 );
00110
00111 WSAStartup( wVersionRequested, &wsaData );
00112 }
00113 #endif
00114
00115
00116 SOCKET tcp_socket = socket(PF_INET,
00117 SOCK_STREAM,
00118
00119 PF_UNSPEC);
00120 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00121 if (tcp_socket == INVALID_SOCKET) {
00122 # ifndef NDEBUG
00123 vcl_cerr << __FILE__ "error code : " << WSAGetLastError() << '\n';
00124 # endif
00125 #else
00126 if (tcp_socket < 0) {
00127 #endif
00128 vcl_cerr << __FILE__ ": failed to create socket.\n";
00129 return 0;
00130 }
00131
00132 #ifdef DEBUG
00133 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << '\n';
00134 #endif
00135
00136
00137 hostent *hp = gethostbyname(host.c_str());
00138 if (! hp) {
00139 vcl_cerr << __FILE__ ": failed to lookup host\n";
00140
00141 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00142 closesocket(tcp_socket);
00143 #else
00144 close(tcp_socket);
00145 #endif
00146
00147 return 0;
00148 }
00149
00150
00151 sockaddr_in my_addr;
00152 my_addr.sin_family = AF_INET;
00153
00154 my_addr.sin_port = htons(port);
00155 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00156
00157
00158 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0) {
00159 vcl_cerr << __FILE__ ": failed to connect to host\n";
00160
00161
00162 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00163 closesocket(tcp_socket);
00164 #else
00165 close(tcp_socket);
00166 #endif
00167
00168 return 0;
00169 }
00170
00171
00172 char buffer[4096];
00173
00174
00175 vcl_snprintf(buffer, 4090-vcl_strlen(buffer),
00176 "GET %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00177 url, host.c_str());
00178
00179 if (auth != "")
00180 vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer),
00181 "Authorization: Basic %s\r\n",
00182 vul_url::encode_base64(auth).c_str());
00183
00184 if (vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer), "\r\n") < 0)
00185 {
00186 vcl_cerr << "ERROR: vul_http_open buffer overflow.";
00187 vcl_abort();
00188 }
00189
00190 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00191 if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00192 #else
00193 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00194 #endif
00195 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00196
00197 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00198 closesocket(tcp_socket);
00199 #else
00200 close(tcp_socket);
00201 #endif
00202 return 0;
00203 }
00204
00205
00206
00207 vcl_string contents;
00208 {
00209 int n;
00210 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00211 while ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00212 #else
00213 while ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00214 #endif
00215 contents.append(buffer, n);
00216 #ifdef DEBUG
00217 vcl_cerr << n << " bytes\n";
00218 #endif
00219 }
00220 }
00221
00222
00223 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00224 closesocket(tcp_socket);
00225 #else
00226 close(tcp_socket);
00227 #endif
00228
00229 #ifdef DEBUG
00230 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00231 #endif
00232
00233 if (contents.find("HTTP/1.1 200") == contents.npos)
00234 {
00235 return 0;
00236 }
00237 vcl_string::size_type n = contents.find("\r\n\r\n");
00238 if (n == contents.npos)
00239 {
00240 return 0;
00241 }
00242
00243 contents.erase(0,n+4);
00244 #ifdef DEBUG
00245 vcl_cerr << "vul_url::vul_http_open() returns:\n" << contents << '\n';
00246 #endif
00247 return new vcl_istringstream(contents);
00248 }
00249
00250
00251
00252 bool vul_http_exists(char const *url)
00253 {
00254
00255 vcl_string host;
00256 vcl_string path;
00257 vcl_string auth;
00258 int port = 80;
00259 assert (vcl_strncmp(url, "http://", 7) == 0);
00260
00261 char const *p = url + 7;
00262 while (*p && *p!='/')
00263 ++ p;
00264 host = vcl_string(url+7, p);
00265
00266
00267 if (*p)
00268 path = p+1;
00269 else
00270 path = "";
00271
00272
00273 for (unsigned int i=0; i<host.size(); ++i)
00274 if (host[i] == '@') {
00275 auth = vcl_string(host.c_str(), host.c_str()+i);
00276 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00277 break;
00278 }
00279
00280
00281 for (unsigned int i=0; i<host.size(); ++i)
00282 if (host[i] == ':') {
00283 port = vcl_atoi(host.c_str() + i + 1);
00284 host = vcl_string(host.c_str(), host.c_str() + i);
00285 break;
00286 }
00287
00288
00289 unsigned k =0;
00290 while (k < path.size())
00291 {
00292 if (path[k] == ' ')
00293 path.replace(k, 1, "%20");
00294 else if (path[k] == '%')
00295 path.replace(k, 1, "%25");
00296 k++;
00297 }
00298
00299
00300 #ifdef DEBUG
00301 vcl_cerr << "auth = \'" << auth << "\'\n"
00302 << "host = \'" << host << "\'\n"
00303 << "path = \'" << path << "\'\n"
00304 << "port = " << port << vcl_endl;
00305 #endif
00306
00307 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00308 if (called_WSAStartup==0)
00309 {
00310 WORD wVersionRequested;
00311 WSADATA wsaData;
00312
00313 wVersionRequested = MAKEWORD( 2, 2 );
00314
00315 WSAStartup( wVersionRequested, &wsaData );
00316 }
00317 #endif
00318
00319
00320 SOCKET tcp_socket = socket(PF_INET,
00321 SOCK_STREAM,
00322
00323 PF_UNSPEC);
00324
00325 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00326 if (tcp_socket == INVALID_SOCKET) {
00327 # ifndef NDEBUG
00328 vcl_cerr << "error code : " << WSAGetLastError() << vcl_endl;
00329 # endif
00330 #else
00331 if (tcp_socket < 0) {
00332 #endif
00333 vcl_cerr << __FILE__ ": failed to create socket.\n";
00334 return false;
00335 }
00336
00337 #ifdef DEBUG
00338 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << vcl_endl;
00339 #endif
00340
00341
00342 hostent *hp = gethostbyname(host.c_str());
00343 if (! hp) {
00344 vcl_cerr << __FILE__ ": failed to lookup host\n";
00345 return false;
00346 }
00347
00348
00349 sockaddr_in my_addr;
00350 my_addr.sin_family = AF_INET;
00351
00352 my_addr.sin_port = htons(port);
00353 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00354
00355
00356 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0)
00357 {
00358 vcl_cerr << __FILE__ ": failed to connect to host\n";
00359
00360 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00361 closesocket(tcp_socket);
00362 #else
00363 close(tcp_socket);
00364 #endif
00365
00366 return false;
00367 }
00368
00369
00370 char buffer[4096];
00371
00372
00373 vcl_snprintf(buffer, 4090,
00374 "HEAD %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00375 url, host.c_str());
00376 if (auth != "")
00377 vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer),
00378 "Authorization: Basic %s\r\n",
00379 vul_url::encode_base64(auth).c_str() );
00380
00381 if (vcl_snprintf(buffer+vcl_strlen(buffer), 4090-vcl_strlen(buffer), "\r\n") < 0)
00382 {
00383 vcl_cerr << "ERROR: vul_http_exists buffer overflow.";
00384 vcl_abort();
00385 }
00386
00387 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00388 if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00389 #else
00390 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00391 #endif
00392 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00393
00394 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00395 closesocket(tcp_socket);
00396 #else
00397 close(tcp_socket);
00398 #endif
00399 return false;
00400 }
00401
00402
00403
00404 vcl_string contents;
00405 {
00406 int n;
00407 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00408 if ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00409 #else
00410 if ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00411 #endif
00412 contents.append(buffer, n);
00413
00414 }
00415 else
00416 {
00417 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00418 closesocket(tcp_socket);
00419 #else
00420 close(tcp_socket);
00421 #endif
00422 return false;
00423 }
00424 }
00425
00426
00427 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00428 closesocket(tcp_socket);
00429 #else
00430 close(tcp_socket);
00431 #endif
00432
00433 #ifdef DEBUG
00434 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00435 #endif
00436
00437 return contents.find("HTTP/1.1 200") != contents.npos;
00438 }
00439
00440
00441 vcl_istream * vul_url::open(const char * url, vcl_ios_openmode mode)
00442 {
00443
00444 if (!url || !*url)
00445 return 0;
00446 unsigned l = vcl_strlen(url);
00447
00448
00449 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00450 return new vcl_ifstream(url+7,mode);
00451
00452
00453 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00454 return vul_http_open(url);
00455
00456
00457 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00458 {
00459 vcl_cerr << __LINE__ << "ERROR:\n vul_read_url(const char * url)\n"
00460 "Doesn't support FTP yet, url=" << url << vcl_endl;
00461 return 0;
00462 }
00463
00464
00465 return new vcl_ifstream(url, mode);
00466 }
00467
00468
00469
00470 bool vul_url::exists(const char * url)
00471 {
00472
00473 if (!url || !*url)
00474 return false;
00475 unsigned l = vcl_strlen(url);
00476
00477
00478 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00479 return vul_file::exists(url+7);
00480
00481
00482 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00483 return vul_http_exists(url);
00484
00485
00486 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00487 {
00488 vcl_cerr << "ERROR: vul_read_url(const char * url)\n"
00489 "Doesn't support FTP yet, url=" << url << vcl_endl;
00490 return false;
00491 }
00492
00493
00494 return vul_file::exists(url);
00495 }
00496
00497
00498 bool vul_url::is_url(const char * url)
00499 {
00500
00501 if (!url || !*url)
00502 return false;
00503 unsigned l = vcl_strlen(url);
00504
00505
00506 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00507 return true;
00508
00509
00510 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00511 return true;
00512
00513
00514 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00515 return true;
00516
00517 return false;
00518 }
00519
00520
00521
00522 bool vul_url::is_file(const char * fn)
00523 {
00524 if (vul_url::is_url(fn))
00525 return vul_url::exists(fn);
00526 else
00527 return vul_file::exists(fn) && ! vul_file::is_directory(fn);
00528 }
00529
00530
00531
00532 static const
00533 char base64_encoding[]=
00534 {
00535 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
00536 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
00537 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
00538 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
00539 };
00540
00541 static char out_buf[4];
00542
00543 static const char * encode_triplet(char data[3], unsigned n)
00544 {
00545 assert (n>0 && n <4);
00546 out_buf[0] = base64_encoding[(data[0] & 0xFC) >> 2];
00547 out_buf[1] = base64_encoding[
00548 ((data[0] & 0x3) << 4) + ((data[1] & 0xf0)>>4)];
00549
00550 if (n==1)
00551 {
00552 out_buf[2] = out_buf[3] = '=';
00553 return out_buf;
00554 }
00555
00556 out_buf[2] = base64_encoding[
00557 ((data[1] & 0xf) << 2) + ((data[2] & 0xc0)>>6)];
00558
00559 if (n==2)
00560 {
00561 out_buf[3] = '=';
00562 return out_buf;
00563 }
00564
00565 out_buf[3] = base64_encoding[ (data[2] & 0x3f) ];
00566 return out_buf;
00567 }
00568
00569
00570
00571 vcl_string vul_url::encode_base64(const vcl_string& in)
00572 {
00573 vcl_string out;
00574 unsigned i = 0, line_octets = 0;
00575 const unsigned l = in.size();
00576 char data[3];
00577 while (i <= l)
00578 {
00579 if (i == l)
00580 {
00581 out.append("=");
00582 return out;
00583 }
00584
00585 data[0] = in[i++];
00586 data[1] = data[2] = 0;
00587
00588 if (i == l)
00589 {
00590 out.append(encode_triplet(data,1),4);
00591 return out;
00592 }
00593
00594 data[1] = in[i++];
00595
00596 if (i == l)
00597 {
00598 out.append(encode_triplet(data,2),4);
00599 return out;
00600 }
00601
00602 data[2] = in[i++];
00603
00604 out.append(encode_triplet(data,3),4);
00605
00606 if (line_octets >= 68/4)
00607 {
00608 out.append("\r\n",2);
00609 line_octets = 0;
00610 }
00611 else
00612 ++line_octets;
00613 }
00614
00615 return out;
00616 }
00617
00618
00619
00620 static int get_next_char(const vcl_string &in, unsigned int *i)
00621 {
00622 while (*i < in.size())
00623 {
00624 char c;
00625 c = in[(*i)++];
00626
00627 if (c == '+')
00628 return 62;
00629
00630 if (c == '/')
00631 return 63;
00632
00633 if (c >= 'A' && c <= 'Z')
00634 return 0 + (int)c - (int)'A';
00635
00636 if (c >= 'a' && c <= 'z')
00637 return 26 + (int)c - (int)'a';
00638
00639 if (c >= '0' && c <= '9')
00640 return 52 + (int)c - (int)'0';
00641
00642 if (c == '=')
00643 return 64;
00644 }
00645 return -1;
00646 }
00647
00648
00649
00650 vcl_string vul_url::decode_base64(const vcl_string& in)
00651 {
00652 int c;
00653 char data[3];
00654
00655 unsigned i=0;
00656 const unsigned l = in.size();
00657 vcl_string out;
00658 while (i < l)
00659 {
00660 data[0] = data[1] = data[2] = 0;
00661
00662
00663
00664 c = get_next_char(in , &i);
00665
00666
00667 if (c == 64)
00668 return out;
00669 if (c==-1)
00670 return "";
00671
00672 data[0] = char(((c & 0x3f) << 2) | (0x3 & data[0]));
00673
00674
00675
00676 c = get_next_char(in , &i);
00677
00678
00679 if (c == 64 || c==-1)
00680 return "";
00681
00682 data[0] = char(((c & 0x30) >> 4) | (0xfc & data[0]));
00683 data[1] = char(((c & 0x0f) << 4) | (0x0f & data[1]));
00684
00685
00686
00687
00688 c = get_next_char(in , &i);
00689
00690 if (c==-1)
00691 return "";
00692 if (c == 64)
00693 {
00694
00695 out.append(data,1);
00696 return out;
00697 }
00698
00699 data[1] = char(((c & 0x3c) >> 2) | (0xf0 & data[1]));
00700 data[2] = char(((c & 0x03) << 6) | (0x3f & data[2]));
00701
00702
00703
00704 c = get_next_char(in , &i);
00705
00706 if (c==-1)
00707 return "";
00708
00709 if (c == 64)
00710 {
00711 out.append(data,2);
00712 return out;
00713 }
00714
00715 data[2] = char((c & 0x3f) | (0xc0 & data[2]));
00716
00717 out.append(data,3);
00718 }
00719
00720 return out;
00721 }