I want to write a C program to generate a Get Request without using any external libraries. Is this possible using only C libraries, using sockets ? I'm thinking of crafting a http packet(using proper formatting) and sending it to the server. Is this the only possible way or is there a better way ?
4 Answers
Using BSD sockets or, if you're somewhat limited, say you have some RTOS, some simpler TCP stack, like lwIP, you can form the GET/POST request.
There are a number of open-source implementations. See the "happyhttp" as a sample ( http://scumways.com/happyhttp/happyhttp.html ). I know, it is C++, not C, but the only thing that is "C++-dependant" there is a string/array management, so it is easily ported to pure C.
Beware, there are no "packets", since HTTP is usually transfered over the TCP connection, so technically there is only a stream of symbols in RFC format. Since http requests are usually done in a connect-send-disconnect manner, one might actually call this a "packet".
Basically, once you have an open socket (sockfd) "all" you have to do is something like
char sendline[MAXLINE + 1], recvline[MAXLINE + 1]; char* ptr; size_t n; /// Form request snprintf(sendline, MAXSUB, "GET %s HTTP/1.0\r\n" // POST or GET, both tested and works. Both HTTP 1.0 HTTP 1.1 works, but sometimes "Host: %s\r\n" // but sometimes HTTP 1.0 works better in localhost type "Content-type: application/x-www-form-urlencoded\r\n" "Content-length: %d\r\n\r\n" "%s\r\n", page, host, (unsigned int)strlen(poststr), poststr); /// Write the request if (write(sockfd, sendline, strlen(sendline))>= 0) { /// Read the response while ((n = read(sockfd, recvline, MAXLINE)) > 0) { recvline[n] = '\0'; if(fputs(recvline, stdout) == EOF) { printf("fputs() error\n"); } /// Remove the trailing chars ptr = strstr(recvline, "\r\n\r\n"); // check len for OutResponse here ? snprintf(OutResponse, MAXRESPONSE,"%s", ptr); } } 5 Comments
POSIX 7 minimal runnable example
Let's fetch http://example.com.
wget.c
#define _XOPEN_SOURCE 700 #include <arpa/inet.h> #include <assert.h> #include <netdb.h> /* getprotobyname */ #include <netinet/in.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/socket.h> #include <unistd.h> int main(int argc, char** argv) { char buffer[BUFSIZ]; enum CONSTEXPR { MAX_REQUEST_LEN = 1024}; char request[MAX_REQUEST_LEN]; char request_template[] = "GET / HTTP/1.1\r\nHost: %s\r\n\r\n"; struct protoent *protoent; char *hostname = "example.com"; int request_len; int socket_file_descriptor; ssize_t nbytes_total, nbytes_last; struct sockaddr_in sockaddr_in; unsigned short server_port = 80; if (argc > 1) hostname = argv[1]; if (argc > 2) server_port = strtoul(argv[2], NULL, 10); request_len = snprintf(request, MAX_REQUEST_LEN, request_template, hostname); if (request_len >= MAX_REQUEST_LEN) { fprintf(stderr, "request length large: %d\n", request_len); exit(EXIT_FAILURE); } /* Build the socket. */ protoent = getprotobyname("tcp"); if (protoent == NULL) { perror("getprotobyname"); exit(EXIT_FAILURE); } socket_file_descriptor = socket(AF_INET, SOCK_STREAM, protoent->p_proto); if (socket_file_descriptor == -1) { perror("socket"); exit(EXIT_FAILURE); } /* Build the address in sockaddr_in * Possibly does a DNS query to get the IP from a hostanme. */ { struct hostent *hostent = gethostbyname(hostname); if (hostent == NULL) { fprintf(stderr, "error: gethostbyname(\"%s\")\n", hostname); exit(EXIT_FAILURE); } in_addr_t in_addr = inet_addr(inet_ntoa(*(struct in_addr*)*(hostent->h_addr_list))); if (in_addr == (in_addr_t)-1) { fprintf(stderr, "error: inet_addr(\"%s\")\n", *(hostent->h_addr_list)); exit(EXIT_FAILURE); } sockaddr_in.sin_addr.s_addr = in_addr; sockaddr_in.sin_family = AF_INET; sockaddr_in.sin_port = htons(server_port); fprintf(stderr, "debug: IP: %s\n", inet_ntoa(sockaddr_in.sin_addr)); } /* Actually connect. */ if (connect(socket_file_descriptor, (struct sockaddr*)&sockaddr_in, sizeof(sockaddr_in)) == -1) { perror("connect"); exit(EXIT_FAILURE); } /* Send HTTP request. */ nbytes_total = 0; while (nbytes_total < request_len) { nbytes_last = write(socket_file_descriptor, request + nbytes_total, request_len - nbytes_total); if (nbytes_last == -1) { perror("write"); exit(EXIT_FAILURE); } nbytes_total += nbytes_last; } /* Read the response. */ fprintf(stderr, "debug: before first read\n"); while ((nbytes_total = read(socket_file_descriptor, buffer, BUFSIZ)) > 0) { fprintf(stderr, "debug: after a read\n"); write(STDOUT_FILENO, buffer, nbytes_total); } fprintf(stderr, "debug: after last read\n"); if (nbytes_total == -1) { perror("read"); exit(EXIT_FAILURE); } close(socket_file_descriptor); exit(EXIT_SUCCESS); } Compile:
gcc -ggdb3 -std=c99 -Wall -Wextra -o wget wget.c Get http://example.com and output to stdout:
./wget example.com We see something like:
debug: IP: 93.184.216.34 debug: before first read debug: after a read HTTP/1.1 200 OK Age: 540354 Cache-Control: max-age=604800 Content-Type: text/html; charset=UTF-8 Date: Tue, 02 Feb 2021 15:21:14 GMT Etag: "3147526947+ident" Expires: Tue, 09 Feb 2021 15:21:14 GMT Last-Modified: Thu, 17 Oct 2019 07:18:26 GMT Server: ECS (nyb/1D11) Vary: Accept-Encoding X-Cache: HIT Content-Length: 1256 <!doctype html> <html> ... </html> After printing the reply, this command hangs for most servers until timeout, and that is expected:
- either server or client must close the connection
- we (client) are not doing it
- most HTTP servers leave the connection open until a timeout expecting further requests, e.g. JavaScript, CSS and images following an HTML page
- we could parse the response, and close when Content-Length bytes are read, but we didn't for simplicity. What HTTP response headers are required says that if
Content-Lengthis not sent, the server can just close to determine length.
We could however make the host close by passing adding the HTTP 1.1 standard header Connection: close to the server:
char request_template[] = "GET / HTTP/1.1\r\nHost: %s\r\nConnection: close\r\n\r\n"; The connection part also works with the IP:
host example.com gives:
example.com has address 93.184.216.34 example.com has IPv6 address 2606:2800:220:1:248:1893:25c8:1946 and so we do:
./wget 93.184.216.34 however, the reply is an error, because we are not setting the Host: properly in our program, and that is required in HTTP 1.1.
A major reason this is mandatory is that you can host multiple websites on the same machine and therefore same IP simultaneously, typically in a cloud provider. This is done to reduce server costs, at the price of potentially lower performance. With this setup, the server is only be able to differentiate between them the different websites based on the Host: header. This is known as "virtual hosting".
Tested on Ubuntu 18.04.
Why doesn't POSIX supply wget?
It is a great shame, considering that all main capabilities are in place! Is wget or similar programs always available on POSIX systems?
Server examples
- minimal POSIX C example: Send and Receive a file in socket programming in Linux with C/C++ (GCC/G++)
- minimal Android Java example: how to create Socket connection in Android?
4 Comments
read(socket_file_descriptor, buffer, BUFSIZ).Content-Length: in the case of HTTP, but I didn't want to parse HTTP in this simple example.Connection: close to your request header to automatically close the connection after the first request, else by default in http1.1 connections are kept-alive until either server or client closes.“Without any external libraries” strictly speaking would exclude libc as well, so you'd have to write all syscalls yourself. I doubt you mean it that strict, though. If you don't want to link to another library, and don't want to copy source code from another library into your application, then directly dealing with the TCP stream using the socket API is your best approach.
Creating the HTTP request and sending it over a TCP socket connection is easy, as is reading the answer. It's parsing the answer which is going to be real tricky, particularly if you aim to support a reasonably large portion of the standard. Things like error pages, redirects, content negotiation and so on can make our life quite hard if you're talking to arbitrary web servers. If on the other hand the server is known to be well-behaved, and a simple error message is all right for any unexpected server response, then that is reasonably simple as well.
Comments
Try Socket Programming, the below C++ code issues a simple GET Request to specified host and prints the response header and content
Tested in Windows 10
#include <windows.h> #include <string> #include <stdio.h> #include <winsock2.h> using std::string; SOCKET conn; WSADATA wsaData; struct hostent *hp; unsigned int addr; struct sockaddr_in server; long fileSize; const int bufSize = 512; char readBuffer[bufSize], sendBuffer[bufSize], tmpBuffer[bufSize]; char *memBuffer=NULL; char *headerBuffer=NULL; long totalBytesRead, thisReadSize, headerLen; char *tmpResult=NULL, *result; char* antenna(string host,string path); SOCKET connectToServer(char *szServerName, WORD portNum); int getHeaderLength(char *content); int main(){ if(WSAStartup(0x101, &wsaData) != 0){printf("startup failure");} memBuffer = antenna("www.spreadsheets.google.com", "/feeds/list/{Published_Sheet_ID-1}/1/public/values?alt=json"); printf("Response content:\n%s\n\n", memBuffer); memBuffer = antenna("www.spreadsheets.google.com", "/feeds/list/{Published_Sheet_ID-2}/1/public/values?alt=json"); printf("Response content:\n%s", memBuffer); WSACleanup(); } char *antenna(string host, string path){ fileSize=0; totalBytesRead=0; memBuffer=NULL; headerBuffer=NULL; tmpResult=NULL, conn = connectToServer((char*)host.c_str(), 80); if(conn == 0){printf("No Internet connection");} sprintf(sendBuffer, "GET %s HTTP/1.0 \r\nHost: %s\r\nConnection: close\r\n\r\n", path.c_str(),host.c_str()); send(conn, sendBuffer, strlen(sendBuffer), 0); printf("Request Format: \n%s",sendBuffer); while(1){ memset(readBuffer, 0, bufSize); thisReadSize = recv (conn, readBuffer, bufSize, 0); if ( thisReadSize <= 0 ){break;} tmpResult = (char*)realloc(tmpResult, thisReadSize+totalBytesRead); memcpy(tmpResult+totalBytesRead, readBuffer, thisReadSize); totalBytesRead += thisReadSize; } headerLen = getHeaderLength(tmpResult); long contenLen = totalBytesRead-headerLen; result = new char[contenLen+1]; memcpy(result, tmpResult+headerLen, contenLen); result[contenLen] = 0x0; char *myTmp; myTmp = new char[headerLen+1]; strncpy(myTmp, tmpResult, headerLen); myTmp[headerLen] = 0; delete(tmpResult); headerBuffer = myTmp; printf("Response Header: \n%s",headerBuffer); fileSize = contenLen; closesocket(conn); if(fileSize != 0){ delete(memBuffer); delete(headerBuffer); } return(result); } SOCKET connectToServer(char *szServerName, WORD portNum) { conn = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); if (conn == INVALID_SOCKET){return 0;} if(inet_addr(szServerName)==INADDR_NONE){hp=gethostbyname(szServerName);} else{ addr=inet_addr(szServerName); hp=gethostbyaddr((char*)&addr,sizeof(addr),AF_INET); } if(hp==NULL){closesocket(conn);return 0;} server.sin_addr.s_addr=*((unsigned long*)hp->h_addr); server.sin_family=AF_INET; server.sin_port=htons(portNum); if(connect(conn,(struct sockaddr*)&server,sizeof(server))) { closesocket(conn); return 0; } return conn; } int getHeaderLength(char *content) { const char *srchStr1 = "\r\n\r\n", *srchStr2 = "\n\r\n\r"; char *findPos; int ofset = -1; findPos = strstr(content, srchStr1); if (findPos != NULL) { ofset = findPos - content; ofset += strlen(srchStr1); } else { findPos = strstr(content, srchStr2); if (findPos != NULL) { ofset = findPos - content; ofset += strlen(srchStr2); } } return ofset; } To compile (using g++) :
g++ -static test.cpp -o test.exe -lws2_32 -lws2_32 specifies the linker to link with winsock dlls