00001
00002 #ifdef VCL_NEEDS_PRAGMA_INTERFACE
00003 #pragma implementation
00004 #endif
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #include "vul_url.h"
00015 #include <vcl_cstdio.h>
00016 #include <vcl_cstring.h>
00017 #include <vcl_cstdlib.h>
00018 #include <vcl_sstream.h>
00019 #include <vcl_cassert.h>
00020 #include <vcl_fstream.h>
00021 #include <vul/vul_file.h>
00022
00023 #if defined(unix) || defined(__unix)
00024
00025 # include <unistd.h>
00026 # include <netdb.h>
00027 # include <sys/socket.h>
00028 # include <netinet/in.h>
00029 # ifdef __alpha
00030 # include <fp.h>
00031 # endif
00032 # define SOCKET int
00033
00034 #elif defined (VCL_WIN32) && !defined(__CYGWIN__)
00035
00036 # include <winsock2.h>
00037
00038 #endif // unix
00039
00040 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00041
00042 static int called_WSAStartup = 0;
00043 #endif
00044
00045
00046 vcl_istream * vul_http_open(char const *url)
00047 {
00048
00049 vcl_string host;
00050 vcl_string path;
00051 vcl_string auth;
00052 int port = 80;
00053
00054
00055 assert (vcl_strncmp(url, "http://", 7) == 0);
00056
00057 char const *p = url + 7;
00058 while (*p && *p!='/')
00059 ++ p;
00060 host = vcl_string(url+7, p);
00061
00062
00063 if (*p)
00064 path = p+1;
00065 else
00066 path = "";
00067
00068
00069 for (unsigned int i=0; i<host.size(); ++i)
00070 if (host[i] == '@') {
00071 auth = vcl_string(host.c_str(), host.c_str()+i);
00072 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00073 break;
00074 }
00075
00076
00077 for (unsigned int i=host.size()-1; i>0; --i)
00078 if (host[i] == ':') {
00079 port = vcl_atoi(host.c_str() + i + 1);
00080 host = vcl_string(host.c_str(), host.c_str() + i);
00081 break;
00082 }
00083
00084
00085 unsigned k =0;
00086 while (k < path.size())
00087 {
00088 if (path[k] == ' ')
00089 path.replace(k, 1, "%20");
00090 else if (path[k] == '%')
00091 path.replace(k, 1, "%25");
00092 k++;
00093 }
00094
00095
00096 #ifdef DEBUG
00097 vcl_cerr << "auth = \'" << auth << "\'\n"
00098 << "host = \'" << host << "\'\n"
00099 << "path = \'" << path << "\'\n"
00100 << "port = " << port << vcl_endl;
00101 #endif
00102
00103 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00104 if (called_WSAStartup==0)
00105 {
00106 WORD wVersionRequested;
00107 WSADATA wsaData;
00108
00109 wVersionRequested = MAKEWORD( 2, 2 );
00110
00111 WSAStartup( wVersionRequested, &wsaData );
00112 }
00113 #endif
00114
00115
00116 SOCKET tcp_socket = socket(PF_INET,
00117 SOCK_STREAM,
00118
00119 PF_UNSPEC);
00120 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00121 if (tcp_socket == INVALID_SOCKET) {
00122 # ifndef NDEBUG
00123 vcl_cerr << __FILE__ "error code : " << WSAGetLastError() << '\n';
00124 # endif
00125 #else
00126 if (tcp_socket < 0) {
00127 #endif
00128 vcl_cerr << __FILE__ ": failed to create socket.\n";
00129 return 0;
00130 }
00131
00132 #ifdef DEBUG
00133 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << '\n';
00134 #endif
00135
00136
00137 hostent *hp = gethostbyname(host.c_str());
00138 if (! hp) {
00139 vcl_cerr << __FILE__ ": failed to lookup host\n";
00140
00141 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00142 closesocket(tcp_socket);
00143 #else
00144 close(tcp_socket);
00145 #endif
00146
00147 return 0;
00148 }
00149
00150
00151 sockaddr_in my_addr;
00152 my_addr.sin_family = AF_INET;
00153
00154 my_addr.sin_port = htons(port);
00155 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00156
00157
00158 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0) {
00159 vcl_cerr << __FILE__ ": failed to connect to host\n";
00160
00161
00162 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00163 closesocket(tcp_socket);
00164 #else
00165 close(tcp_socket);
00166 #endif
00167
00168 return 0;
00169 }
00170
00171
00172 char buffer[4096];
00173
00174
00175 vcl_sprintf(buffer, "GET %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00176 url, host.c_str());
00177
00178 if (auth != "")
00179 vcl_sprintf(buffer+vcl_strlen(buffer),
00180 "Authorization: Basic %s\r\n",
00181 vul_url::encode_base64(auth).c_str());
00182
00183 vcl_sprintf(buffer+vcl_strlen(buffer), "\r\n");
00184
00185 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00186 if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00187 #else
00188 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00189 #endif
00190 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00191
00192 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00193 closesocket(tcp_socket);
00194 #else
00195 close(tcp_socket);
00196 #endif
00197 return 0;
00198 }
00199
00200
00201 #if 1
00202 shutdown(tcp_socket, 1);
00203 #else
00204 for (int i=0; i<4096; ++i) ::write(tcp_socket, "\n\n\n\n", 4);
00205 #endif
00206
00207
00208 vcl_string contents;
00209 {
00210 int n;
00211 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00212 while ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00213 #else
00214 while ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00215 #endif
00216 contents.append(buffer, n);
00217 #ifdef DEBUG
00218 vcl_cerr << n << " bytes\n";
00219 #endif
00220 }
00221 }
00222
00223
00224 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00225 closesocket(tcp_socket);
00226 #else
00227 close(tcp_socket);
00228 #endif
00229
00230 #ifdef DEBUG
00231 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00232 #endif
00233
00234 if (contents.find("HTTP/1.1 200") == contents.npos)
00235 {
00236 return 0;
00237 }
00238 vcl_string::size_type n = contents.find("\r\n\r\n");
00239 if (n == contents.npos)
00240 {
00241 return 0;
00242 }
00243
00244 contents.erase(0,n+4);
00245 #ifdef DEBUG
00246 vcl_cerr << "vul_url::vul_http_open() returns:\n" << contents << '\n';
00247 #endif
00248 return new vcl_istringstream(contents);
00249 }
00250
00251
00252
00253 bool vul_http_exists(char const *url)
00254 {
00255
00256 vcl_string host;
00257 vcl_string path;
00258 vcl_string auth;
00259 int port = 80;
00260 assert (vcl_strncmp(url, "http://", 7) == 0);
00261
00262 char const *p = url + 7;
00263 while (*p && *p!='/')
00264 ++ p;
00265 host = vcl_string(url+7, p);
00266
00267
00268 if (*p)
00269 path = p+1;
00270 else
00271 path = "";
00272
00273
00274 for (unsigned int i=0; i<host.size(); ++i)
00275 if (host[i] == '@') {
00276 auth = vcl_string(host.c_str(), host.c_str()+i);
00277 host = vcl_string(host.c_str()+i+1, host.c_str() + host.size());
00278 break;
00279 }
00280
00281
00282 for (unsigned int i=0; i<host.size(); ++i)
00283 if (host[i] == ':') {
00284 port = vcl_atoi(host.c_str() + i + 1);
00285 host = vcl_string(host.c_str(), host.c_str() + i);
00286 break;
00287 }
00288
00289
00290 unsigned k =0;
00291 while (k < path.size())
00292 {
00293 if (path[k] == ' ')
00294 path.replace(k, 1, "%20");
00295 else if (path[k] == '%')
00296 path.replace(k, 1, "%25");
00297 k++;
00298 }
00299
00300
00301 #ifdef DEBUG
00302 vcl_cerr << "auth = \'" << auth << "\'\n"
00303 << "host = \'" << host << "\'\n"
00304 << "path = \'" << path << "\'\n"
00305 << "port = " << port << vcl_endl;
00306 #endif
00307
00308 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00309 if (called_WSAStartup==0)
00310 {
00311 WORD wVersionRequested;
00312 WSADATA wsaData;
00313
00314 wVersionRequested = MAKEWORD( 2, 2 );
00315
00316 WSAStartup( wVersionRequested, &wsaData );
00317 }
00318 #endif
00319
00320
00321 SOCKET tcp_socket = socket(PF_INET,
00322 SOCK_STREAM,
00323
00324 PF_UNSPEC);
00325
00326 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00327 if (tcp_socket == INVALID_SOCKET) {
00328 # ifndef NDEBUG
00329 vcl_cerr << "error code : " << WSAGetLastError() << vcl_endl;
00330 # endif
00331 #else
00332 if (tcp_socket < 0) {
00333 #endif
00334 vcl_cerr << __FILE__ ": failed to create socket.\n";
00335 return false;
00336 }
00337
00338 #ifdef DEBUG
00339 vcl_cerr << __FILE__ ": tcp_socket = " << tcp_socket << vcl_endl;
00340 #endif
00341
00342
00343 hostent *hp = gethostbyname(host.c_str());
00344 if (! hp) {
00345 vcl_cerr << __FILE__ ": failed to lookup host\n";
00346 return false;
00347 }
00348
00349
00350 sockaddr_in my_addr;
00351 my_addr.sin_family = AF_INET;
00352
00353 my_addr.sin_port = htons(port);
00354 vcl_memcpy(&my_addr.sin_addr, hp->h_addr_list[0], hp->h_length);
00355
00356
00357 if (connect(tcp_socket , (sockaddr *) &my_addr, sizeof my_addr) < 0)
00358 {
00359 vcl_cerr << __FILE__ ": failed to connect to host\n";
00360
00361 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00362 closesocket(tcp_socket);
00363 #else
00364 close(tcp_socket);
00365 #endif
00366
00367 return false;
00368 }
00369
00370
00371 char buffer[4096];
00372
00373
00374 vcl_sprintf(buffer, "HEAD %s HTTP/1.1\r\nUser-Agent: vul_url\r\nHost: %s\r\nAccept: */*\r\n",
00375 url, host.c_str());
00376 if (auth != "")
00377 vcl_sprintf(buffer+vcl_strlen(buffer), "Authorization: Basic %s\r\n",
00378 vul_url::encode_base64(auth).c_str());
00379 vcl_sprintf(buffer+vcl_strlen(buffer),"\r\n");
00380
00381 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00382 if (send(tcp_socket, buffer, vcl_strlen(buffer), 0) < 0) {
00383 #else
00384 if (::write(tcp_socket, buffer, vcl_strlen(buffer)) < 0) {
00385 #endif
00386 vcl_cerr << __FILE__ ": error sending HTTP request\n";
00387
00388 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00389 closesocket(tcp_socket);
00390 #else
00391 close(tcp_socket);
00392 #endif
00393 return false;
00394 }
00395
00396
00397 #if 1
00398 shutdown(tcp_socket, 1);
00399 #else
00400 for (int i=0; i<4096; ++i) ::write(tcp_socket, "\n\n\n\n", 4);
00401 #endif
00402
00403
00404 vcl_string contents;
00405 {
00406 int n;
00407 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00408 if ((n = recv(tcp_socket, buffer, sizeof buffer,0 )) > 0) {
00409 #else
00410 if ((n = ::read(tcp_socket, buffer, sizeof buffer)) > 0) {
00411 #endif
00412 contents.append(buffer, n);
00413
00414 }
00415 else
00416 {
00417 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00418 closesocket(tcp_socket);
00419 #else
00420 close(tcp_socket);
00421 #endif
00422 return false;
00423 }
00424 }
00425
00426
00427 #if defined(VCL_WIN32) && !defined(__CYGWIN__)
00428 closesocket(tcp_socket);
00429 #else
00430 close(tcp_socket);
00431 #endif
00432
00433 #ifdef DEBUG
00434 vcl_cerr << "HTTP server returned:\n" << contents << '\n';
00435 #endif
00436
00437 return contents.find("HTTP/1.1 200") != contents.npos;
00438 }
00439
00440
00441 vcl_istream * vul_url::open(const char * url, vcl_ios_openmode mode)
00442 {
00443
00444 if (!url || !*url)
00445 return 0;
00446 unsigned l = vcl_strlen(url);
00447
00448
00449 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00450 return new vcl_ifstream(url+7,mode);
00451
00452
00453 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00454 return vul_http_open(url);
00455
00456
00457 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00458 {
00459 vcl_cerr << __LINE__ << "ERROR:\n vul_read_url(const char * url)\n"
00460 "Doesn't support FTP yet, url=" << url << vcl_endl;
00461 return 0;
00462 }
00463
00464
00465 return new vcl_ifstream(url, mode);
00466 }
00467
00468
00469
00470 bool vul_url::exists(const char * url)
00471 {
00472
00473 if (!url || !*url)
00474 return false;
00475 unsigned l = vcl_strlen(url);
00476
00477
00478 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00479 return vul_file::exists(url+7);
00480
00481
00482 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00483 return vul_http_exists(url);
00484
00485
00486 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00487 {
00488 vcl_cerr << "ERROR: vul_read_url(const char * url)\n"
00489 "Doesn't support FTP yet, url=" << url << vcl_endl;
00490 return false;
00491 }
00492
00493
00494 return vul_file::exists(url);
00495 }
00496
00497
00498 bool vul_url::is_url(const char * url)
00499 {
00500
00501 if (!url || !*url)
00502 return false;
00503 unsigned l = vcl_strlen(url);
00504
00505
00506 if (l > 7 && vcl_strncmp(url, "file://", 7) == 0)
00507 return true;
00508
00509
00510 if (l > 7 && vcl_strncmp(url, "http://", 7) == 0)
00511 return true;
00512
00513
00514 if (l > 6 && vcl_strncmp(url, "ftp://", 6) == 0)
00515 return true;
00516
00517 return false;
00518 }
00519
00520
00521
00522 bool vul_url::is_file(const char * fn)
00523 {
00524 if (vul_url::is_url(fn))
00525 return vul_url::exists(fn);
00526 else
00527 return vul_file::exists(fn) && ! vul_file::is_directory(fn);
00528 }
00529
00530
00531
00532 static const
00533 int base64_encoding[]=
00534 {
00535 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
00536 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',
00537 'g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v',
00538 'w','x','y','z','0','1','2','3','4','5','6','7','8','9','+','/'
00539 };
00540
00541 static char out_buf[4];
00542
00543 static const char * encode_triplet(char data[3], unsigned n)
00544 {
00545 assert (n>0 && n <4);
00546 out_buf[0] = base64_encoding[(data[0] & 0xFC) >> 2];
00547 out_buf[1] = base64_encoding[
00548 ((data[0] & 0x3) << 4) + ((data[1] & 0xf0)>>4)];
00549
00550 if (n==1)
00551 {
00552 out_buf[2] = out_buf[3] = '=';
00553 return out_buf;
00554 }
00555
00556 out_buf[2] = base64_encoding[
00557 ((data[1] & 0xf) << 2) + ((data[2] & 0xc0)>>6)];
00558
00559 if (n==2)
00560 {
00561 out_buf[3] = '=';
00562 return out_buf;
00563 }
00564
00565 out_buf[3] = base64_encoding[ (data[2] & 0x3f) ];
00566 return out_buf;
00567 }
00568
00569
00570
00571 vcl_string vul_url::encode_base64(const vcl_string& in)
00572 {
00573 vcl_string out;
00574 unsigned i = 0, line_octets = 0;
00575 const unsigned l = in.size();
00576 char data[3];
00577 while (i <= l)
00578 {
00579 if (i == l)
00580 {
00581 out.append("=");
00582 return out;
00583 }
00584
00585 data[0] = in[i++];
00586 data[1] = data[2] = 0;
00587
00588 if (i == l)
00589 {
00590 out.append(encode_triplet(data,1),4);
00591 return out;
00592 }
00593
00594 data[1] = in[i++];
00595
00596 if (i == l)
00597 {
00598 out.append(encode_triplet(data,2),4);
00599 return out;
00600 }
00601
00602 data[2] = in[i++];
00603
00604 out.append(encode_triplet(data,3),4);
00605
00606 if (line_octets >= 68/4)
00607 {
00608 out.append("\r\n",2);
00609 line_octets = 0;
00610 }
00611 else
00612 ++line_octets;
00613 }
00614
00615 return out;
00616 }
00617
00618
00619
00620 static int get_next_char(const vcl_string &in, unsigned int *i)
00621 {
00622 while (*i < in.size())
00623 {
00624 char c;
00625 c = in[(*i)++];
00626
00627 if (c == '+')
00628 return 62;
00629
00630 if (c == '/')
00631 return 63;
00632
00633 if (c >= 'A' && c <= 'Z')
00634 return 0 + (int)c - (int)'A';
00635
00636 if (c >= 'a' && c <= 'z')
00637 return 26 + (int)c - (int)'a';
00638
00639 if (c >= '0' && c <= '9')
00640 return 52 + (int)c - (int)'0';
00641
00642 if (c == '=')
00643 return 64;
00644 }
00645 return -1;
00646 }
00647
00648
00649
00650 vcl_string vul_url::decode_base64(const vcl_string& in)
00651 {
00652 int c;
00653 char data[3];
00654
00655 unsigned i=0;
00656 const unsigned l = in.size();
00657 vcl_string out;
00658 while (i < l)
00659 {
00660 data[0] = data[1] = data[2] = 0;
00661
00662
00663
00664 c = get_next_char(in , &i);
00665
00666
00667 if (c == 64)
00668 return out;
00669 if (c==-1)
00670 return "";
00671
00672 data[0] = ((c & 0x3f) << 2) | (0x3 & data[0]);
00673
00674
00675
00676 c = get_next_char(in , &i);
00677
00678
00679 if (c == 64 || c==-1)
00680 return "";
00681
00682 data[0] = ((c & 0x30) >> 4) | (0xfc & data[0]);
00683 data[1] = ((c & 0xf) << 4) | (0xf & data[1]);
00684
00685
00686
00687
00688
00689 c = get_next_char(in , &i);
00690
00691 if (c==-1)
00692 return "";
00693 if (c == 64)
00694 {
00695
00696 out.append(data,1);
00697 return out;
00698 }
00699
00700 data[1] = ((c & 0x3C) >> 2) | (0xf0 & data[1]);
00701 data[2] = ((c & 0x3) << 6) | (0x3f & data[2]);
00702
00703
00704
00705
00706 c = get_next_char(in , &i);
00707
00708 if (c==-1)
00709 return "";
00710
00711 if (c == 64)
00712 {
00713 out.append(data,2);
00714 return out;
00715 }
00716
00717 data[2] = (c & 0x3f) | (0xc0 & data[2]);
00718
00719 out.append(data,3);
00720 }
00721
00722 return out;
00723 }