2 * Copyright (c) 2008, 2009, 2010 Zmanda, Inc. All Rights Reserved.
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of the GNU General Public License version 2 as published
6 * by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
10 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 * Contact information: Zmanda Inc., 465 S. Mathilda Ave., Suite 300
18 * Sunnyvale, CA 94085, USA, or: http://www.zmanda.com
22 * - collect speed statistics
27 /* use a relative path here to avoid conflicting with Perl's config.h. */
28 #include "../config/config.h"
36 #ifdef HAVE_SYS_TYPES_H
37 #include <sys/types.h>
39 #ifdef HAVE_SYS_STAT_H
58 #include <curl/curl.h>
60 /* Constant renamed after version 7.10.7 */
61 #ifndef CURLINFO_RESPONSE_CODE
62 #define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE
65 /* We don't need OpenSSL's kerberos support, and it's broken in
67 #define OPENSSL_NO_KRB5
69 #ifdef HAVE_OPENSSL_HMAC_H
70 # include <openssl/hmac.h>
72 # ifdef HAVE_CRYPTO_HMAC_H
73 # include <crypto/hmac.h>
81 #include <openssl/err.h>
82 #include <openssl/ssl.h>
83 #include <openssl/md5.h>
85 /* Maximum key length as specified in the S3 documentation
86 * (*excluding* null terminator) */
87 #define S3_MAX_KEY_LENGTH 1024
89 #define AMAZON_SECURITY_HEADER "x-amz-security-token"
90 #define AMAZON_BUCKET_CONF_TEMPLATE "\
91 <CreateBucketConfiguration>\n\
92 <LocationConstraint>%s</LocationConstraint>\n\
93 </CreateBucketConfiguration>"
95 #define AMAZON_STORAGE_CLASS_HEADER "x-amz-storage-class"
97 #define AMAZON_WILDCARD_LOCATION "*"
99 /* parameters for exponential backoff in the face of retriable errors */
102 #define EXPONENTIAL_BACKOFF_START_USEC G_USEC_PER_SEC/100
103 /* double at each retry */
104 #define EXPONENTIAL_BACKOFF_BASE 2
105 /* retry 14 times (for a total of about 3 minutes spent waiting) */
106 #define EXPONENTIAL_BACKOFF_MAX_RETRIES 14
108 /* general "reasonable size" parameters */
109 #define MAX_ERROR_RESPONSE_LEN (100*1024)
111 /* Results which should always be retried */
112 #define RESULT_HANDLING_ALWAYS_RETRY \
113 { 400, S3_ERROR_RequestTimeout, 0, S3_RESULT_RETRY }, \
114 { 403, S3_ERROR_RequestTimeTooSkewed,0, S3_RESULT_RETRY }, \
115 { 409, S3_ERROR_OperationAborted, 0, S3_RESULT_RETRY }, \
116 { 412, S3_ERROR_PreconditionFailed, 0, S3_RESULT_RETRY }, \
117 { 500, S3_ERROR_InternalError, 0, S3_RESULT_RETRY }, \
118 { 501, S3_ERROR_NotImplemented, 0, S3_RESULT_RETRY }, \
119 { 0, 0, CURLE_COULDNT_CONNECT, S3_RESULT_RETRY }, \
120 { 0, 0, CURLE_COULDNT_RESOLVE_HOST, S3_RESULT_RETRY }, \
121 { 0, 0, CURLE_PARTIAL_FILE, S3_RESULT_RETRY }, \
122 { 0, 0, CURLE_OPERATION_TIMEOUTED, S3_RESULT_RETRY }, \
123 { 0, 0, CURLE_SEND_ERROR, S3_RESULT_RETRY }, \
124 { 0, 0, CURLE_RECV_ERROR, S3_RESULT_RETRY }, \
125 { 0, 0, CURLE_GOT_NOTHING, S3_RESULT_RETRY }
128 * Data structures and associated functions
132 /* (all strings in this struct are freed by s3_free()) */
138 /* attributes for new objects */
139 char *bucket_location;
143 gboolean use_subdomain;
151 guint64 max_send_speed;
152 guint64 max_recv_speed;
154 /* information from the last request */
156 guint last_response_code;
157 s3_error_code_t last_s3_error_code;
158 CURLcode last_curl_code;
159 guint last_num_retries;
160 void *last_response_body;
161 guint last_response_body_size;
164 time_t time_offset_with_s3;
169 s3_write_func write_func;
170 s3_reset_func reset_func;
173 gboolean headers_done;
174 gboolean int_write_done;
176 /* Points to current handle: Added to get hold of s3 offset */
177 struct S3Handle *hdl;
180 /* Callback function to examine headers one-at-a-time
182 * @note this is the same as CURLOPT_HEADERFUNCTION
184 * @param data: The pointer to read data from
185 * @param size: The size of each "element" of the data buffer in bytes
186 * @param nmemb: The number of elements in the data buffer.
187 * So, the buffer's size is size*nmemb bytes.
188 * @param stream: the header_data (an opaque pointer)
190 * @return The number of bytes written to the buffer or
191 * CURL_WRITEFUNC_PAUSE to pause.
192 * If it's the number of bytes written, it should match the buffer size
194 typedef size_t (*s3_header_func)(void *data, size_t size, size_t nmemb, void *stream);
200 /* (see preprocessor magic in s3.h) */
202 static char * s3_error_code_names[] = {
203 #define S3_ERROR(NAME) #NAME
208 /* Convert an s3 error name to an error code. This function
209 * matches strings case-insensitively, and is appropriate for use
210 * on data from the network.
212 * @param s3_error_code: the error name
213 * @returns: the error code (see constants in s3.h)
215 static s3_error_code_t
216 s3_error_code_from_name(char *s3_error_name);
218 /* Convert an s3 error code to a string
220 * @param s3_error_code: the error code to convert
221 * @returns: statically allocated string
224 s3_error_name_from_code(s3_error_code_t s3_error_code);
230 /* result handling is specified by a static array of result_handling structs,
231 * which match based on response_code (from HTTP) and S3 error code. The result
232 * given for the first match is used. 0 acts as a wildcard for both response_code
233 * and s3_error_code. The list is terminated with a struct containing 0 for both
234 * response_code and s3_error_code; the result for that struct is the default
237 * See RESULT_HANDLING_ALWAYS_RETRY for an example.
240 S3_RESULT_RETRY = -1,
245 typedef struct result_handling {
247 s3_error_code_t s3_error_code;
252 /* Lookup a result in C{result_handling}.
254 * @param result_handling: array of handling specifications
255 * @param response_code: response code from operation
256 * @param s3_error_code: s3 error code from operation, if any
257 * @param curl_code: the CURL error, if any
258 * @returns: the matching result
261 lookup_result(const result_handling_t *result_handling,
263 s3_error_code_t s3_error_code,
267 * Precompiled regular expressions */
268 static regex_t etag_regex, error_name_regex, message_regex, subdomain_regex,
269 location_con_regex, date_sync_regex;
276 /* Check if a string is non-empty
278 * @param str: string to check
279 * @returns: true iff str is non-NULL and not "\0"
281 static gboolean is_non_empty_string(const char *str);
283 /* Construct the URL for an Amazon S3 REST request.
285 * A new string is allocated and returned; it is the responsiblity of the caller.
287 * @param hdl: the S3Handle object
288 * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
289 * @param host: the host name to connect to, 's3.amazonaws.com'
290 * @param service_path: A path to add in the URL, or NULL for none.
291 * @param bucket: the bucket being accessed, or NULL for none
292 * @param key: the key being accessed, or NULL for none
293 * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
294 * @param use_subdomain: if TRUE, a subdomain of 'host' will be used
295 * @param use_ssl: if TRUE, use 'https'
297 * !use_subdomain: http://host/service_path/bucket/key
298 * use_subdomain : http://bucket.host/service_path/key
304 const char *service_path,
307 const char *subresource,
309 gboolean use_subdomain,
312 /* Create proper authorization headers for an Amazon S3 REST
313 * request to C{headers}.
315 * @note: C{X-Amz} headers (in C{headers}) must
317 * - be in alphabetical order
318 * - have no spaces around the colon
319 * (don't yell at me -- see the Amazon Developer Guide)
321 * @param hdl: the S3Handle object
322 * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
323 * @param bucket: the bucket being accessed, or NULL for none
324 * @param key: the key being accessed, or NULL for none
325 * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
326 * @param md5_hash: the MD5 hash of the request body, or NULL for none
328 static struct curl_slist *
329 authenticate_request(S3Handle *hdl,
333 const char *subresource,
334 const char *md5_hash);
338 /* Interpret the response to an S3 operation, assuming CURL completed its request
339 * successfully. This function fills in the relevant C{hdl->last*} members.
341 * @param hdl: The S3Handle object
342 * @param body: the response body
343 * @param body_len: the length of the response body
344 * @param etag: The response's ETag header
345 * @param content_md5: The hex-encoded MD5 hash of the request body,
346 * which will be checked against the response's ETag header.
347 * If NULL, the header is not checked.
348 * If non-NULL, then the body should have the response headers at its beginnning.
349 * @returns: TRUE if the response should be retried (e.g., network error)
352 interpret_response(S3Handle *hdl,
354 char *curl_error_buffer,
358 const char *content_md5);
360 /* Perform an S3 operation. This function handles all of the details
361 * of retryig requests and so on.
363 * The concepts of bucket and keys are defined by the Amazon S3 API.
364 * See: "Components of Amazon S3" - API Version 2006-03-01 pg. 8
366 * Individual sub-resources are defined in several places. In the REST API,
367 * they they are represented by a "flag" in the "query string".
368 * See: "Constructing the CanonicalizedResource Element" - API Version 2006-03-01 pg. 60
370 * @param hdl: the S3Handle object
371 * @param verb: the HTTP request method
372 * @param bucket: the bucket to access, or NULL for none
373 * @param key: the key to access, or NULL for none
374 * @param subresource: the "sub-resource" to request (e.g. "acl") or NULL for none
375 * @param query: the query string to send (not including th initial '?'),
377 * @param read_func: the callback for reading data
378 * Will use s3_empty_read_func if NULL is passed in.
379 * @param read_reset_func: the callback for to reset reading data
380 * @param size_func: the callback to get the number of bytes to upload
381 * @param md5_func: the callback to get the MD5 hash of the data to upload
382 * @param read_data: pointer to pass to the above functions
383 * @param write_func: the callback for writing data.
384 * Will use s3_counter_write_func if NULL is passed in.
385 * @param write_reset_func: the callback for to reset writing data
386 * @param write_data: pointer to pass to C{write_func}
387 * @param progress_func: the callback for progress information
388 * @param progress_data: pointer to pass to C{progress_func}
389 * @param result_handling: instructions for handling the results; see above.
390 * @returns: the result specified by result_handling; details of the response
391 * are then available in C{hdl->last*}
394 perform_request(S3Handle *hdl,
398 const char *subresource,
400 s3_read_func read_func,
401 s3_reset_func read_reset_func,
402 s3_size_func size_func,
403 s3_md5_func md5_func,
405 s3_write_func write_func,
406 s3_reset_func write_reset_func,
408 s3_progress_func progress_func,
409 gpointer progress_data,
410 const result_handling_t *result_handling);
413 * a CURLOPT_WRITEFUNCTION to save part of the response in memory and
414 * call an external function if one was provided.
417 s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream);
420 * a function to reset to our internal buffer
423 s3_internal_reset_func(void * stream);
426 * a CURLOPT_HEADERFUNCTION to save the ETag header only.
429 s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream);
432 compile_regexes(void);
435 * Static function implementations
437 static s3_error_code_t
438 s3_error_code_from_name(char *s3_error_name)
442 if (!s3_error_name) return S3_ERROR_Unknown;
444 /* do a brute-force search through the list, since it's not sorted */
445 for (i = 0; i < S3_ERROR_END; i++) {
446 if (g_ascii_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
450 return S3_ERROR_Unknown;
454 s3_error_name_from_code(s3_error_code_t s3_error_code)
456 if (s3_error_code >= S3_ERROR_END)
457 s3_error_code = S3_ERROR_Unknown;
459 return s3_error_code_names[s3_error_code];
463 s3_curl_supports_ssl(void)
465 static int supported = -1;
466 if (supported == -1) {
467 #if defined(CURL_VERSION_SSL)
468 curl_version_info_data *info = curl_version_info(CURLVERSION_NOW);
469 if (info->features & CURL_VERSION_SSL)
482 s3_curl_throttling_compat(void)
484 /* CURLOPT_MAX_SEND_SPEED_LARGE added in 7.15.5 */
485 #if LIBCURL_VERSION_NUM >= 0x070f05
486 curl_version_info_data *info;
488 /* check the runtime version too */
489 info = curl_version_info(CURLVERSION_NOW);
490 return info->version_num >= 0x070f05;
497 lookup_result(const result_handling_t *result_handling,
499 s3_error_code_t s3_error_code,
502 while (result_handling->response_code
503 || result_handling->s3_error_code
504 || result_handling->curl_code) {
505 if ((result_handling->response_code && result_handling->response_code != response_code)
506 || (result_handling->s3_error_code && result_handling->s3_error_code != s3_error_code)
507 || (result_handling->curl_code && result_handling->curl_code != curl_code)) {
512 return result_handling->result;
515 /* return the result for the terminator, as the default */
516 return result_handling->result;
520 is_non_empty_string(const char *str)
522 return str && str[0] != '\0';
528 const char *service_path,
531 const char *subresource,
533 gboolean use_subdomain,
537 char *esc_bucket = NULL, *esc_key = NULL;
540 url = g_string_new("http");
542 g_string_append(url, "s");
544 g_string_append(url, "://");
547 if (use_subdomain && bucket)
548 g_string_append_printf(url, "%s.%s", bucket, host);
550 g_string_append_printf(url, "%s", host);
553 g_string_append_printf(url, "%s/", service_path);
555 g_string_append(url, "/");
559 if (!use_subdomain && bucket) {
560 esc_bucket = curl_escape(bucket, 0);
561 if (!esc_bucket) goto cleanup;
562 g_string_append_printf(url, "%s", esc_bucket);
564 g_string_append(url, "/");
568 esc_key = curl_escape(key, 0);
569 if (!esc_key) goto cleanup;
570 g_string_append_printf(url, "%s", esc_key);
574 if (subresource || query)
575 g_string_append(url, "?");
578 g_string_append(url, subresource);
580 if (subresource && query)
581 g_string_append(url, "&");
584 g_string_append(url, query);
587 if (esc_bucket) curl_free(esc_bucket);
588 if (esc_key) curl_free(esc_key);
590 return g_string_free(url, FALSE);
593 static struct curl_slist *
594 authenticate_request(S3Handle *hdl,
598 const char *subresource,
599 const char *md5_hash)
606 GByteArray *md = NULL;
607 char *auth_base64 = NULL;
608 struct curl_slist *headers = NULL;
609 char *esc_bucket = NULL, *esc_key = NULL;
610 GString *auth_string = NULL;
613 static const char *wkday[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"};
614 static const char *month[] = {"Jan", "Feb", "Mar", "Apr", "May", "Jun",
615 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"};
619 /* Build the string to sign, per the S3 spec.
620 * See: "Authenticating REST Requests" - API Version 2006-03-01 pg 58
624 auth_string = g_string_new(verb);
625 g_string_append(auth_string, "\n");
627 /* Content-MD5 header */
629 g_string_append(auth_string, md5_hash);
630 g_string_append(auth_string, "\n");
632 /* Content-Type is empty*/
633 g_string_append(auth_string, "\n");
636 /* calculate the date */
639 /* sync clock with amazon s3 */
640 t = t + hdl->time_offset_with_s3;
643 if (!gmtime_s(&tmp, &t)) g_debug("localtime error");
645 if (!gmtime_r(&t, &tmp)) perror("localtime");
649 date = g_strdup_printf("%s, %02d %s %04d %02d:%02d:%02d GMT",
650 wkday[tmp.tm_wday], tmp.tm_mday, month[tmp.tm_mon], 1900+tmp.tm_year,
651 tmp.tm_hour, tmp.tm_min, tmp.tm_sec);
653 g_string_append(auth_string, date);
654 g_string_append(auth_string, "\n");
656 /* CanonicalizedAmzHeaders, sorted lexicographically */
657 if (is_non_empty_string(hdl->user_token)) {
658 g_string_append(auth_string, AMAZON_SECURITY_HEADER);
659 g_string_append(auth_string, ":");
660 g_string_append(auth_string, hdl->user_token);
661 g_string_append(auth_string, ",");
662 g_string_append(auth_string, STS_PRODUCT_TOKEN);
663 g_string_append(auth_string, "\n");
666 if (is_non_empty_string(hdl->storage_class)) {
667 g_string_append(auth_string, AMAZON_STORAGE_CLASS_HEADER);
668 g_string_append(auth_string, ":");
669 g_string_append(auth_string, hdl->storage_class);
670 g_string_append(auth_string, "\n");
673 /* CanonicalizedResource */
674 if (hdl->service_path) {
675 g_string_append(auth_string, hdl->service_path);
677 g_string_append(auth_string, "/");
679 if (hdl->use_subdomain)
680 g_string_append(auth_string, bucket);
682 esc_bucket = curl_escape(bucket, 0);
683 if (!esc_bucket) goto cleanup;
684 g_string_append(auth_string, esc_bucket);
688 if (bucket && (hdl->use_subdomain || key))
689 g_string_append(auth_string, "/");
692 esc_key = curl_escape(key, 0);
693 if (!esc_key) goto cleanup;
694 g_string_append(auth_string, esc_key);
698 g_string_append(auth_string, "?");
699 g_string_append(auth_string, subresource);
702 /* run HMAC-SHA1 on the canonicalized string */
703 md = g_byte_array_sized_new(EVP_MAX_MD_SIZE+1);
705 HMAC_Init_ex(&ctx, hdl->secret_key, (int) strlen(hdl->secret_key), EVP_sha1(), NULL);
706 HMAC_Update(&ctx, (unsigned char*) auth_string->str, auth_string->len);
707 HMAC_Final(&ctx, md->data, &md->len);
708 HMAC_CTX_cleanup(&ctx);
709 auth_base64 = s3_base64_encode(md);
710 /* append the new headers */
711 if (is_non_empty_string(hdl->user_token)) {
712 /* Devpay headers are included in hash. */
713 buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", hdl->user_token);
714 headers = curl_slist_append(headers, buf);
717 buf = g_strdup_printf(AMAZON_SECURITY_HEADER ": %s", STS_PRODUCT_TOKEN);
718 headers = curl_slist_append(headers, buf);
722 if (is_non_empty_string(hdl->storage_class)) {
723 buf = g_strdup_printf(AMAZON_STORAGE_CLASS_HEADER ": %s", hdl->storage_class);
724 headers = curl_slist_append(headers, buf);
729 buf = g_strdup_printf("Authorization: AWS %s:%s",
730 hdl->access_key, auth_base64);
731 headers = curl_slist_append(headers, buf);
734 if (md5_hash && '\0' != md5_hash[0]) {
735 buf = g_strdup_printf("Content-MD5: %s", md5_hash);
736 headers = curl_slist_append(headers, buf);
740 buf = g_strdup_printf("Date: %s", date);
741 headers = curl_slist_append(headers, buf);
747 g_byte_array_free(md, TRUE);
749 g_string_free(auth_string, TRUE);
755 interpret_response(S3Handle *hdl,
757 char *curl_error_buffer,
761 const char *content_md5)
763 long response_code = 0;
764 regmatch_t pmatch[2];
765 char *error_name = NULL, *message = NULL;
766 char *body_copy = NULL;
769 if (!hdl) return FALSE;
771 if (hdl->last_message) g_free(hdl->last_message);
772 hdl->last_message = NULL;
774 /* bail out from a CURL error */
775 if (curl_code != CURLE_OK) {
776 hdl->last_curl_code = curl_code;
777 hdl->last_message = g_strdup_printf("CURL error: %s", curl_error_buffer);
781 /* CURL seems to think things were OK, so get its response code */
782 curl_easy_getinfo(hdl->curl, CURLINFO_RESPONSE_CODE, &response_code);
783 hdl->last_response_code = response_code;
785 /* check ETag, if present */
786 if (etag && content_md5 && 200 == response_code) {
787 if (etag && g_ascii_strcasecmp(etag, content_md5))
788 hdl->last_message = g_strdup("S3 Error: Possible data corruption (ETag returned by Amazon did not match the MD5 hash of the data sent)");
794 if (200 <= response_code && response_code < 400) {
795 /* 2xx and 3xx codes won't have a response body we care about */
796 hdl->last_s3_error_code = S3_ERROR_None;
800 /* Now look at the body to try to get the actual Amazon error message. Rather
801 * than parse out the XML, just use some regexes. */
803 /* impose a reasonable limit on body size */
804 if (body_len > MAX_ERROR_RESPONSE_LEN) {
805 hdl->last_message = g_strdup("S3 Error: Unknown (response body too large to parse)");
807 } else if (!body || body_len == 0) {
808 hdl->last_message = g_strdup("S3 Error: Unknown (empty response body)");
809 return TRUE; /* perhaps a network error; retry the request */
812 /* use strndup to get a zero-terminated string */
813 body_copy = g_strndup(body, body_len);
814 if (!body_copy) goto cleanup;
816 if (!s3_regexec_wrap(&error_name_regex, body_copy, 2, pmatch, 0))
817 error_name = find_regex_substring(body_copy, pmatch[1]);
819 if (!s3_regexec_wrap(&message_regex, body_copy, 2, pmatch, 0))
820 message = find_regex_substring(body_copy, pmatch[1]);
823 hdl->last_s3_error_code = s3_error_code_from_name(error_name);
827 hdl->last_message = message;
828 message = NULL; /* steal the reference to the string */
839 /* a CURLOPT_READFUNCTION to read data from a buffer. */
841 s3_buffer_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
843 CurlBuffer *data = stream;
844 guint bytes_desired = (guint) size * nmemb;
846 /* check the number of bytes remaining, just to be safe */
847 if (bytes_desired > data->buffer_len - data->buffer_pos)
848 bytes_desired = data->buffer_len - data->buffer_pos;
850 memcpy((char *)ptr, data->buffer + data->buffer_pos, bytes_desired);
851 data->buffer_pos += bytes_desired;
853 return bytes_desired;
857 s3_buffer_size_func(void *stream)
859 CurlBuffer *data = stream;
860 return data->buffer_len;
864 s3_buffer_md5_func(void *stream)
866 CurlBuffer *data = stream;
867 GByteArray req_body_gba = {(guint8 *)data->buffer, data->buffer_len};
869 return s3_compute_md5_hash(&req_body_gba);
873 s3_buffer_reset_func(void *stream)
875 CurlBuffer *data = stream;
876 data->buffer_pos = 0;
879 /* a CURLOPT_WRITEFUNCTION to write data to a buffer. */
881 s3_buffer_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
883 CurlBuffer * data = stream;
884 guint new_bytes = (guint) size * nmemb;
885 guint bytes_needed = data->buffer_pos + new_bytes;
887 /* error out if the new size is greater than the maximum allowed */
888 if (data->max_buffer_size && bytes_needed > data->max_buffer_size)
891 /* reallocate if necessary. We use exponential sizing to make this
892 * happen less often. */
893 if (bytes_needed > data->buffer_len) {
894 guint new_size = MAX(bytes_needed, data->buffer_len * 2);
895 if (data->max_buffer_size) {
896 new_size = MIN(new_size, data->max_buffer_size);
898 data->buffer = g_realloc(data->buffer, new_size);
899 data->buffer_len = new_size;
902 return 0; /* returning zero signals an error to libcurl */
904 /* actually copy the data to the buffer */
905 memcpy(data->buffer + data->buffer_pos, ptr, new_bytes);
906 data->buffer_pos += new_bytes;
908 /* signal success to curl */
912 /* a CURLOPT_READFUNCTION that writes nothing. */
914 s3_empty_read_func(G_GNUC_UNUSED void *ptr, G_GNUC_UNUSED size_t size, G_GNUC_UNUSED size_t nmemb, G_GNUC_UNUSED void * stream)
920 s3_empty_size_func(G_GNUC_UNUSED void *stream)
926 s3_empty_md5_func(G_GNUC_UNUSED void *stream)
928 static const GByteArray empty = {(guint8 *) "", 0};
930 return s3_compute_md5_hash(&empty);
933 /* a CURLOPT_WRITEFUNCTION to write data that just counts data.
934 * s3_write_data should be NULL or a pointer to an gint64.
937 s3_counter_write_func(G_GNUC_UNUSED void *ptr, size_t size, size_t nmemb, void *stream)
939 gint64 *count = (gint64*) stream, inc = nmemb*size;
941 if (count) *count += inc;
946 s3_counter_reset_func(void *stream)
948 gint64 *count = (gint64*) stream;
950 if (count) *count = 0;
954 /* a CURLOPT_READFUNCTION to read data from a file. */
956 s3_file_read_func(void *ptr, size_t size, size_t nmemb, void * stream)
958 HANDLE *hFile = (HANDLE *) stream;
961 ReadFile(hFile, ptr, (DWORD) size*nmemb, &bytes_read, NULL);
966 s3_file_size_func(void *stream)
968 HANDLE *hFile = (HANDLE *) stream;
969 DWORD size = GetFileSize(hFile, NULL);
971 if (INVALID_FILE_SIZE == size) {
979 s3_file_md5_func(void *stream)
981 #define S3_MD5_BUF_SIZE (10*1024)
982 HANDLE *hFile = (HANDLE *) stream;
983 guint8 buf[S3_MD5_BUF_SIZE];
986 GByteArray *ret = NULL;
988 g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
990 ret = g_byte_array_sized_new(S3_MD5_HASH_BYTE_LEN);
991 g_byte_array_set_size(ret, S3_MD5_HASH_BYTE_LEN);
994 while (ReadFile(hFile, buf, S3_MD5_BUF_SIZE, &bytes_read, NULL)) {
995 MD5_Update(&md5_ctx, buf, bytes_read);
997 MD5_Final(ret->data, &md5_ctx);
999 g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
1001 #undef S3_MD5_BUF_SIZE
1005 s3_file_reset_func(void *stream)
1007 g_assert(INVALID_SET_FILE_POINTER != SetFilePointer(hFile, 0, NULL, FILE_BEGIN));
1010 /* a CURLOPT_WRITEFUNCTION to write data to a file. */
1012 s3_file_write_func(void *ptr, size_t size, size_t nmemb, void *stream)
1014 HANDLE *hFile = (HANDLE *) stream;
1015 DWORD bytes_written;
1017 WriteFile(hFile, ptr, (DWORD) size*nmemb, &bytes_written, NULL);
1018 return bytes_written;
1023 curl_debug_message(CURL *curl G_GNUC_UNUSED,
1027 void *unused G_GNUC_UNUSED)
1031 char **lines, **line;
1038 case CURLINFO_HEADER_IN:
1039 lineprefix="Hdr In: ";
1042 case CURLINFO_HEADER_OUT:
1043 lineprefix="Hdr Out: ";
1047 /* ignore data in/out -- nobody wants to see that in the
1052 /* split the input into lines */
1053 message = g_strndup(s, (gsize) len);
1054 lines = g_strsplit(message, "\n", -1);
1057 for (line = lines; *line; line++) {
1058 if (**line == '\0') continue; /* skip blank lines */
1059 g_debug("%s%s", lineprefix, *line);
1067 perform_request(S3Handle *hdl,
1071 const char *subresource,
1073 s3_read_func read_func,
1074 s3_reset_func read_reset_func,
1075 s3_size_func size_func,
1076 s3_md5_func md5_func,
1078 s3_write_func write_func,
1079 s3_reset_func write_reset_func,
1080 gpointer write_data,
1081 s3_progress_func progress_func,
1082 gpointer progress_data,
1083 const result_handling_t *result_handling)
1086 s3_result_t result = S3_RESULT_FAIL; /* assume the worst.. */
1087 CURLcode curl_code = CURLE_OK;
1088 char curl_error_buffer[CURL_ERROR_SIZE] = "";
1089 struct curl_slist *headers = NULL;
1090 /* Set S3Internal Data */
1091 S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, FALSE, NULL, hdl};
1092 gboolean should_retry;
1094 gulong backoff = EXPONENTIAL_BACKOFF_START_USEC;
1095 /* corresponds to PUT, HEAD, GET, and POST */
1096 int curlopt_upload = 0, curlopt_nobody = 0, curlopt_httpget = 0, curlopt_post = 0;
1097 /* do we want to examine the headers */
1098 const char *curlopt_customrequest = NULL;
1099 /* for MD5 calculation */
1100 GByteArray *md5_hash = NULL;
1101 gchar *md5_hash_hex = NULL, *md5_hash_b64 = NULL;
1102 size_t request_body_size = 0;
1104 g_assert(hdl != NULL && hdl->curl != NULL);
1108 url = build_url(hdl->host, hdl->service_path, bucket, key, subresource,
1109 query, hdl->use_subdomain, hdl->use_ssl);
1110 if (!url) goto cleanup;
1112 /* libcurl may behave strangely if these are not set correctly */
1113 if (!strncmp(verb, "PUT", 4)) {
1115 } else if (!strncmp(verb, "GET", 4)) {
1116 curlopt_httpget = 1;
1117 } else if (!strncmp(verb, "POST", 5)) {
1119 } else if (!strncmp(verb, "HEAD", 5)) {
1122 curlopt_customrequest = verb;
1126 request_body_size = size_func(read_data);
1130 md5_hash = md5_func(read_data);
1132 md5_hash_b64 = s3_base64_encode(md5_hash);
1133 md5_hash_hex = s3_hex_encode(md5_hash);
1134 g_byte_array_free(md5_hash, TRUE);
1138 /* Curl will use fread() otherwise */
1139 read_func = s3_empty_read_func;
1143 int_writedata.write_func = write_func;
1144 int_writedata.reset_func = write_reset_func;
1145 int_writedata.write_data = write_data;
1147 /* Curl will use fwrite() otherwise */
1148 int_writedata.write_func = s3_counter_write_func;
1149 int_writedata.reset_func = s3_counter_reset_func;
1150 int_writedata.write_data = NULL;
1156 curl_slist_free_all(headers);
1158 curl_error_buffer[0] = '\0';
1159 if (read_reset_func) {
1160 read_reset_func(read_data);
1162 /* calls write_reset_func */
1163 s3_internal_reset_func(&int_writedata);
1165 /* set up the request */
1166 headers = authenticate_request(hdl, verb, bucket, key, subresource,
1169 if (hdl->use_ssl && hdl->ca_info) {
1170 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CAINFO, hdl->ca_info)))
1174 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_VERBOSE, hdl->verbose)))
1177 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_DEBUGFUNCTION,
1178 curl_debug_message)))
1181 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_ERRORBUFFER,
1182 curl_error_buffer)))
1184 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOPROGRESS, 1)))
1186 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_FOLLOWLOCATION, 1)))
1188 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_URL, url)))
1190 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPHEADER,
1193 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEFUNCTION, s3_internal_write_func)))
1195 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_WRITEDATA, &int_writedata)))
1197 /* Note: we always have to set this apparently, for consistent "end of header" detection */
1198 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERFUNCTION, s3_internal_header_func)))
1200 /* Note: if set, CURLOPT_HEADERDATA seems to also be used for CURLOPT_WRITEDATA ? */
1201 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HEADERDATA, &int_writedata)))
1203 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSFUNCTION, progress_func)))
1205 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_PROGRESSDATA, progress_data)))
1208 /* CURLOPT_INFILESIZE_LARGE added in 7.11.0 */
1209 #if LIBCURL_VERSION_NUM >= 0x070b00
1210 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)request_body_size)))
1213 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_INFILESIZE, (long)request_body_size)))
1216 /* CURLOPT_MAX_{RECV,SEND}_SPEED_LARGE added in 7.15.5 */
1217 #if LIBCURL_VERSION_NUM >= 0x070f05
1218 if (s3_curl_throttling_compat()) {
1219 if (hdl->max_send_speed)
1220 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_send_speed)))
1223 if (hdl->max_recv_speed)
1224 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)hdl->max_recv_speed)))
1229 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_HTTPGET, curlopt_httpget)))
1231 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_UPLOAD, curlopt_upload)))
1233 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_POST, curlopt_post)))
1235 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_NOBODY, curlopt_nobody)))
1237 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CUSTOMREQUEST,
1238 curlopt_customrequest)))
1242 if (curlopt_upload) {
1243 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION, read_func)))
1245 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA, read_data)))
1248 /* Clear request_body options. */
1249 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READFUNCTION,
1252 if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_READDATA,
1257 /* Perform the request */
1258 curl_code = curl_easy_perform(hdl->curl);
1261 /* interpret the response into hdl->last* */
1262 curl_error: /* (label for short-circuiting the curl_easy_perform call) */
1263 should_retry = interpret_response(hdl, curl_code, curl_error_buffer,
1264 int_writedata.resp_buf.buffer, int_writedata.resp_buf.buffer_pos, int_writedata.etag, md5_hash_hex);
1266 /* and, unless we know we need to retry, see what we're to do now */
1267 if (!should_retry) {
1268 result = lookup_result(result_handling, hdl->last_response_code,
1269 hdl->last_s3_error_code, hdl->last_curl_code);
1271 /* break out of the while(1) unless we're retrying */
1272 if (result != S3_RESULT_RETRY)
1276 if (retries >= EXPONENTIAL_BACKOFF_MAX_RETRIES) {
1277 /* we're out of retries, so annotate hdl->last_message appropriately and bail
1279 char *m = g_strdup_printf("Too many retries; last message was '%s'", hdl->last_message);
1280 if (hdl->last_message) g_free(hdl->last_message);
1281 hdl->last_message = m;
1282 result = S3_RESULT_FAIL;
1288 backoff *= EXPONENTIAL_BACKOFF_BASE;
1291 if (result != S3_RESULT_OK) {
1292 g_debug(_("%s %s failed with %d/%s"), verb, url,
1293 hdl->last_response_code,
1294 s3_error_name_from_code(hdl->last_s3_error_code));
1299 if (headers) curl_slist_free_all(headers);
1300 g_free(md5_hash_b64);
1301 g_free(md5_hash_hex);
1303 /* we don't deallocate the response body -- we keep it for later */
1304 hdl->last_response_body = int_writedata.resp_buf.buffer;
1305 hdl->last_response_body_size = int_writedata.resp_buf.buffer_pos;
1306 hdl->last_num_retries = retries;
1313 s3_internal_write_func(void *ptr, size_t size, size_t nmemb, void * stream)
1315 S3InternalData *data = (S3InternalData *) stream;
1318 if (!data->headers_done)
1321 /* call write on internal buffer (if not full) */
1322 if (data->int_write_done) {
1325 bytes_saved = s3_buffer_write_func(ptr, size, nmemb, &data->resp_buf);
1327 data->int_write_done = TRUE;
1330 /* call write on user buffer */
1331 if (data->write_func) {
1332 return data->write_func(ptr, size, nmemb, data->write_data);
1339 s3_internal_reset_func(void * stream)
1341 S3InternalData *data = (S3InternalData *) stream;
1343 s3_buffer_reset_func(&data->resp_buf);
1344 data->headers_done = FALSE;
1345 data->int_write_done = FALSE;
1347 if (data->reset_func) {
1348 data->reset_func(data->write_data);
1353 s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream)
1355 static const char *final_header = "\r\n";
1356 time_t remote_time_in_sec,local_time;
1358 regmatch_t pmatch[2];
1359 S3InternalData *data = (S3InternalData *) stream;
1361 header = g_strndup((gchar *) ptr, (gsize) size*nmemb);
1363 if (!s3_regexec_wrap(&etag_regex, header, 2, pmatch, 0))
1364 data->etag = find_regex_substring(header, pmatch[1]);
1365 if (!strcmp(final_header, header))
1366 data->headers_done = TRUE;
1368 /* If date header is found */
1369 if (!s3_regexec_wrap(&date_sync_regex, header, 2, pmatch, 0)){
1370 char *date = find_regex_substring(header, pmatch[1]);
1372 /* Remote time is always in GMT: RFC 2616 */
1373 /* both curl_getdate and time operate in UTC, so no timezone math is necessary */
1374 if ( (remote_time_in_sec = curl_getdate(date, NULL)) < 0 ){
1375 g_debug("Error: Conversion of remote time to seconds failed.");
1376 data->hdl->time_offset_with_s3 = 0;
1378 local_time = time(NULL);
1380 data->hdl->time_offset_with_s3 = remote_time_in_sec - local_time;
1382 if (data->hdl->verbose)
1383 g_debug("Time Offset (remote - local) :%ld",(long)data->hdl->time_offset_with_s3);
1394 compile_regexes(void)
1398 /* using POSIX regular expressions */
1399 struct {const char * str; int flags; regex_t *regex;} regexes[] = {
1400 {"<Code>[[:space:]]*([^<]*)[[:space:]]*</Code>", REG_EXTENDED | REG_ICASE, &error_name_regex},
1401 {"^ETag:[[:space:]]*\"([^\"]+)\"[[:space:]]*$", REG_EXTENDED | REG_ICASE | REG_NEWLINE, &etag_regex},
1402 {"<Message>[[:space:]]*([^<]*)[[:space:]]*</Message>", REG_EXTENDED | REG_ICASE, &message_regex},
1403 {"^[a-z0-9](-*[a-z0-9]){2,62}$", REG_EXTENDED | REG_NOSUB, &subdomain_regex},
1404 {"(/>)|(>([^<]*)</LocationConstraint>)", REG_EXTENDED | REG_ICASE, &location_con_regex},
1405 {"^Date:(.*)\r",REG_EXTENDED | REG_ICASE | REG_NEWLINE, &date_sync_regex},
1408 char regmessage[1024];
1412 for (i = 0; regexes[i].str; i++) {
1413 reg_result = regcomp(regexes[i].regex, regexes[i].str, regexes[i].flags);
1414 if (reg_result != 0) {
1415 size = regerror(reg_result, regexes[i].regex, regmessage, sizeof(regmessage));
1416 g_error(_("Regex error: %s"), regmessage);
1420 #else /* ! HAVE_REGEX_H */
1421 /* using PCRE via GLib */
1422 struct {const char * str; int flags; regex_t *regex;} regexes[] = {
1423 {"<Code>\\s*([^<]*)\\s*</Code>",
1424 G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
1426 {"^ETag:\\s*\"([^\"]+)\"\\s*$",
1427 G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
1429 {"<Message>\\s*([^<]*)\\s*</Message>",
1430 G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
1432 {"^[a-z0-9]((-*[a-z0-9])|(\\.[a-z0-9])){2,62}$",
1433 G_REGEX_OPTIMIZE | G_REGEX_NO_AUTO_CAPTURE,
1435 {"(/>)|(>([^<]*)</LocationConstraint>)",
1437 &location_con_regex},
1439 G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
1446 for (i = 0; regexes[i].str; i++) {
1447 *(regexes[i].regex) = g_regex_new(regexes[i].str, regexes[i].flags, 0, &err);
1449 g_error(_("Regex error: %s"), err->message);
1459 * Public function implementations
1462 gboolean s3_init(void)
1464 static GStaticMutex mutex = G_STATIC_MUTEX_INIT;
1465 static gboolean init = FALSE, ret;
1467 /* n.b. curl_global_init is called in common-src/glib-util.c:glib_init() */
1469 g_static_mutex_lock (&mutex);
1471 ret = compile_regexes();
1474 g_static_mutex_unlock(&mutex);
1479 s3_curl_location_compat(void)
1481 curl_version_info_data *info;
1483 info = curl_version_info(CURLVERSION_NOW);
1484 return info->version_num > 0x070a02;
1488 s3_bucket_location_compat(const char *bucket)
1490 return !s3_regexec_wrap(&subdomain_regex, bucket, 0, NULL, 0);
1494 s3_open(const char *access_key,
1495 const char *secret_key,
1497 const char *service_path,
1498 const gboolean use_subdomain,
1499 const char *user_token,
1500 const char *bucket_location,
1501 const char *storage_class,
1507 hdl = g_new0(S3Handle, 1);
1508 if (!hdl) goto error;
1510 hdl->verbose = FALSE;
1511 hdl->use_ssl = s3_curl_supports_ssl();
1513 g_assert(access_key);
1514 hdl->access_key = g_strdup(access_key);
1515 g_assert(secret_key);
1516 hdl->secret_key = g_strdup(secret_key);
1518 hdl->user_token = g_strdup(user_token);
1521 hdl->bucket_location = g_strdup(bucket_location);
1524 hdl->storage_class = g_strdup(storage_class);
1527 hdl->ca_info = g_strdup(ca_info);
1529 if (!is_non_empty_string(host))
1530 host = "s3.amazonaws.com";
1531 hdl->host = g_strdup(host);
1532 hdl->use_subdomain = use_subdomain ||
1533 (strcmp(host, "s3.amazonaws.com") == 0 &&
1534 is_non_empty_string(hdl->bucket_location));
1536 if (service_path[0] != '/')
1537 hdl->service_path = g_strdup_printf("/%s", service_path);
1539 hdl->service_path = g_strdup(service_path);
1541 hdl->service_path = NULL;
1544 hdl->curl = curl_easy_init();
1545 if (!hdl->curl) goto error;
1555 s3_free(S3Handle *hdl)
1560 g_free(hdl->access_key);
1561 g_free(hdl->secret_key);
1562 if (hdl->user_token) g_free(hdl->user_token);
1563 if (hdl->bucket_location) g_free(hdl->bucket_location);
1564 if (hdl->storage_class) g_free(hdl->storage_class);
1565 if (hdl->host) g_free(hdl->host);
1566 if (hdl->service_path) g_free(hdl->service_path);
1567 if (hdl->curl) curl_easy_cleanup(hdl->curl);
1574 s3_reset(S3Handle *hdl)
1577 /* We don't call curl_easy_reset here, because doing that in curl
1578 * < 7.16 blanks the default CA certificate path, and there's no way
1579 * to get it back. */
1580 if (hdl->last_message) {
1581 g_free(hdl->last_message);
1582 hdl->last_message = NULL;
1585 hdl->last_response_code = 0;
1586 hdl->last_curl_code = 0;
1587 hdl->last_s3_error_code = 0;
1588 hdl->last_num_retries = 0;
1590 if (hdl->last_response_body) {
1591 g_free(hdl->last_response_body);
1592 hdl->last_response_body = NULL;
1595 hdl->last_response_body_size = 0;
1600 s3_error(S3Handle *hdl,
1601 const char **message,
1602 guint *response_code,
1603 s3_error_code_t *s3_error_code,
1604 const char **s3_error_name,
1605 CURLcode *curl_code,
1609 if (message) *message = hdl->last_message;
1610 if (response_code) *response_code = hdl->last_response_code;
1611 if (s3_error_code) *s3_error_code = hdl->last_s3_error_code;
1612 if (s3_error_name) *s3_error_name = s3_error_name_from_code(hdl->last_s3_error_code);
1613 if (curl_code) *curl_code = hdl->last_curl_code;
1614 if (num_retries) *num_retries = hdl->last_num_retries;
1616 /* no hdl? return something coherent, anyway */
1617 if (message) *message = "NULL S3Handle";
1618 if (response_code) *response_code = 0;
1619 if (s3_error_code) *s3_error_code = 0;
1620 if (s3_error_name) *s3_error_name = NULL;
1621 if (curl_code) *curl_code = 0;
1622 if (num_retries) *num_retries = 0;
1627 s3_verbose(S3Handle *hdl, gboolean verbose)
1629 hdl->verbose = verbose;
1633 s3_set_max_send_speed(S3Handle *hdl, guint64 max_send_speed)
1635 if (!s3_curl_throttling_compat())
1638 hdl->max_send_speed = max_send_speed;
1644 s3_set_max_recv_speed(S3Handle *hdl, guint64 max_recv_speed)
1646 if (!s3_curl_throttling_compat())
1649 hdl->max_recv_speed = max_recv_speed;
1655 s3_use_ssl(S3Handle *hdl, gboolean use_ssl)
1657 gboolean ret = TRUE;
1658 if (use_ssl & !s3_curl_supports_ssl()) {
1661 hdl->use_ssl = use_ssl;
1667 s3_strerror(S3Handle *hdl)
1669 const char *message;
1670 guint response_code;
1671 const char *s3_error_name;
1675 char s3_info[256] = "";
1676 char response_info[16] = "";
1677 char curl_info[32] = "";
1678 char retries_info[32] = "";
1680 s3_error(hdl, &message, &response_code, NULL, &s3_error_name, &curl_code, &num_retries);
1683 message = "Unknown S3 error";
1685 g_snprintf(s3_info, sizeof(s3_info), " (%s)", s3_error_name);
1687 g_snprintf(response_info, sizeof(response_info), " (HTTP %d)", response_code);
1689 g_snprintf(curl_info, sizeof(curl_info), " (CURLcode %d)", curl_code);
1691 g_snprintf(retries_info, sizeof(retries_info), " (after %d retries)", num_retries);
1693 return g_strdup_printf("%s%s%s%s%s", message, s3_info, curl_info, response_info, retries_info);
1696 /* Perform an upload. When this function returns, KEY and
1697 * BUFFER remain the responsibility of the caller.
1699 * @param self: the s3 device
1700 * @param bucket: the bucket to which the upload should be made
1701 * @param key: the key to which the upload should be made
1702 * @param buffer: the data to be uploaded
1703 * @param buffer_len: the length of the data to upload
1704 * @returns: false if an error ocurred
1707 s3_upload(S3Handle *hdl,
1710 s3_read_func read_func,
1711 s3_reset_func reset_func,
1712 s3_size_func size_func,
1713 s3_md5_func md5_func,
1715 s3_progress_func progress_func,
1716 gpointer progress_data)
1718 s3_result_t result = S3_RESULT_FAIL;
1719 static result_handling_t result_handling[] = {
1720 { 200, 0, 0, S3_RESULT_OK },
1721 RESULT_HANDLING_ALWAYS_RETRY,
1722 { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
1725 g_assert(hdl != NULL);
1727 result = perform_request(hdl, "PUT", bucket, key, NULL, NULL,
1728 read_func, reset_func, size_func, md5_func, read_data,
1729 NULL, NULL, NULL, progress_func, progress_data,
1732 return result == S3_RESULT_OK;
1736 /* Private structure for our "thunk", which tracks where the user is in the list
1738 struct list_keys_thunk {
1739 GSList *filename_list; /* all pending filenames */
1741 gboolean in_contents; /* look for "key" entities in here */
1742 gboolean in_common_prefixes; /* look for "prefix" entities in here */
1744 gboolean is_truncated;
1754 /* Functions for a SAX parser to parse the XML from Amazon */
1757 list_start_element(GMarkupParseContext *context G_GNUC_UNUSED,
1758 const gchar *element_name,
1759 const gchar **attribute_names G_GNUC_UNUSED,
1760 const gchar **attribute_values G_GNUC_UNUSED,
1762 GError **error G_GNUC_UNUSED)
1764 struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
1766 thunk->want_text = 0;
1767 if (g_ascii_strcasecmp(element_name, "contents") == 0) {
1768 thunk->in_contents = 1;
1769 } else if (g_ascii_strcasecmp(element_name, "commonprefixes") == 0) {
1770 thunk->in_common_prefixes = 1;
1771 } else if (g_ascii_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
1772 thunk->want_text = 1;
1773 } else if (g_ascii_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
1774 thunk->want_text = 1;
1775 } else if (g_ascii_strcasecmp(element_name, "size") == 0 && thunk->in_contents) {
1776 thunk->want_text = 1;
1777 } else if (g_ascii_strcasecmp(element_name, "istruncated")) {
1778 thunk->want_text = 1;
1779 } else if (g_ascii_strcasecmp(element_name, "nextmarker")) {
1780 thunk->want_text = 1;
1785 list_end_element(GMarkupParseContext *context G_GNUC_UNUSED,
1786 const gchar *element_name,
1788 GError **error G_GNUC_UNUSED)
1790 struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
1792 if (g_ascii_strcasecmp(element_name, "contents") == 0) {
1793 thunk->in_contents = 0;
1794 } else if (g_ascii_strcasecmp(element_name, "commonprefixes") == 0) {
1795 thunk->in_common_prefixes = 0;
1796 } else if (g_ascii_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
1797 thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
1799 } else if (g_ascii_strcasecmp(element_name, "size") == 0 && thunk->in_contents) {
1800 thunk->size += g_ascii_strtoull (thunk->text, NULL, 10);
1802 } else if (g_ascii_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
1803 thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
1805 } else if (g_ascii_strcasecmp(element_name, "istruncated") == 0) {
1806 if (thunk->text && g_ascii_strncasecmp(thunk->text, "false", 5) != 0)
1807 thunk->is_truncated = TRUE;
1808 } else if (g_ascii_strcasecmp(element_name, "nextmarker") == 0) {
1809 if (thunk->next_marker) g_free(thunk->next_marker);
1810 thunk->next_marker = thunk->text;
1816 list_text(GMarkupParseContext *context G_GNUC_UNUSED,
1820 GError **error G_GNUC_UNUSED)
1822 struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
1824 if (thunk->want_text) {
1825 if (thunk->text) g_free(thunk->text);
1826 thunk->text = g_strndup(text, text_len);
1830 /* Perform a fetch from S3; several fetches may be involved in a
1831 * single listing operation */
1833 list_fetch(S3Handle *hdl,
1836 const char *delimiter,
1838 const char *max_keys,
1841 s3_result_t result = S3_RESULT_FAIL;
1842 static result_handling_t result_handling[] = {
1843 { 200, 0, 0, S3_RESULT_OK },
1844 RESULT_HANDLING_ALWAYS_RETRY,
1845 { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
1847 const char* pos_parts[][2] = {
1849 {"delimiter", delimiter},
1851 {"max-keys", max_keys},
1857 gboolean have_prev_part = FALSE;
1859 /* loop over possible parts to build query string */
1860 query = g_string_new("");
1861 for (i = 0; pos_parts[i][0]; i++) {
1862 if (pos_parts[i][1]) {
1864 g_string_append(query, "&");
1866 have_prev_part = TRUE;
1867 esc_value = curl_escape(pos_parts[i][1], 0);
1868 g_string_append_printf(query, "%s=%s", pos_parts[i][0], esc_value);
1869 curl_free(esc_value);
1873 /* and perform the request on that URI */
1874 result = perform_request(hdl, "GET", bucket, NULL, NULL, query->str,
1875 NULL, NULL, NULL, NULL, NULL,
1876 S3_BUFFER_WRITE_FUNCS, buf, NULL, NULL,
1879 if (query) g_string_free(query, TRUE);
1885 s3_list_keys(S3Handle *hdl,
1888 const char *delimiter,
1890 guint64 *total_size)
1893 * max len of XML variables:
1894 * bucket: 255 bytes (p12 API Version 2006-03-01)
1895 * key: 1024 bytes (p15 API Version 2006-03-01)
1896 * size per key: 5GB bytes (p6 API Version 2006-03-01)
1897 * size of size 10 bytes (i.e. 10 decimal digits)
1898 * etag: 44 (observed+assumed)
1899 * owner ID: 64 (observed+assumed)
1900 * owner DisplayName: 255 (assumed)
1901 * StorageClass: const (p18 API Version 2006-03-01)
1903 static const guint MAX_RESPONSE_LEN = 1000*2000;
1904 static const char *MAX_KEYS = "1000";
1905 struct list_keys_thunk thunk;
1906 GMarkupParseContext *ctxt = NULL;
1907 static GMarkupParser parser = { list_start_element, list_end_element, list_text, NULL, NULL };
1909 s3_result_t result = S3_RESULT_FAIL;
1910 CurlBuffer buf = {NULL, 0, 0, MAX_RESPONSE_LEN};
1914 thunk.filename_list = NULL;
1916 thunk.next_marker = NULL;
1919 /* Loop until S3 has given us the entire picture */
1921 s3_buffer_reset_func(&buf);
1922 /* get some data from S3 */
1923 result = list_fetch(hdl, bucket, prefix, delimiter, thunk.next_marker, MAX_KEYS, &buf);
1924 if (result != S3_RESULT_OK) goto cleanup;
1926 /* run the parser over it */
1927 thunk.in_contents = FALSE;
1928 thunk.in_common_prefixes = FALSE;
1929 thunk.is_truncated = FALSE;
1930 thunk.want_text = FALSE;
1932 ctxt = g_markup_parse_context_new(&parser, 0, (gpointer)&thunk, NULL);
1934 if (!g_markup_parse_context_parse(ctxt, buf.buffer, buf.buffer_pos, &err)) {
1935 if (hdl->last_message) g_free(hdl->last_message);
1936 hdl->last_message = g_strdup(err->message);
1937 result = S3_RESULT_FAIL;
1941 if (!g_markup_parse_context_end_parse(ctxt, &err)) {
1942 if (hdl->last_message) g_free(hdl->last_message);
1943 hdl->last_message = g_strdup(err->message);
1944 result = S3_RESULT_FAIL;
1948 g_markup_parse_context_free(ctxt);
1950 } while (thunk.next_marker);
1953 if (err) g_error_free(err);
1954 if (thunk.text) g_free(thunk.text);
1955 if (thunk.next_marker) g_free(thunk.next_marker);
1956 if (ctxt) g_markup_parse_context_free(ctxt);
1957 if (buf.buffer) g_free(buf.buffer);
1959 if (result != S3_RESULT_OK) {
1960 g_slist_free(thunk.filename_list);
1963 *list = thunk.filename_list;
1965 *total_size = thunk.size;
1972 s3_read(S3Handle *hdl,
1975 s3_write_func write_func,
1976 s3_reset_func reset_func,
1977 gpointer write_data,
1978 s3_progress_func progress_func,
1979 gpointer progress_data)
1981 s3_result_t result = S3_RESULT_FAIL;
1982 static result_handling_t result_handling[] = {
1983 { 200, 0, 0, S3_RESULT_OK },
1984 RESULT_HANDLING_ALWAYS_RETRY,
1985 { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
1988 g_assert(hdl != NULL);
1989 g_assert(write_func != NULL);
1991 result = perform_request(hdl, "GET", bucket, key, NULL, NULL,
1992 NULL, NULL, NULL, NULL, NULL, write_func, reset_func, write_data,
1993 progress_func, progress_data, result_handling);
1995 return result == S3_RESULT_OK;
1999 s3_delete(S3Handle *hdl,
2003 s3_result_t result = S3_RESULT_FAIL;
2004 static result_handling_t result_handling[] = {
2005 { 204, 0, 0, S3_RESULT_OK },
2006 { 404, S3_ERROR_NoSuchBucket, 0, S3_RESULT_OK },
2007 RESULT_HANDLING_ALWAYS_RETRY,
2008 { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
2011 g_assert(hdl != NULL);
2013 result = perform_request(hdl, "DELETE", bucket, key, NULL, NULL,
2014 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2017 return result == S3_RESULT_OK;
2021 s3_make_bucket(S3Handle *hdl,
2025 s3_result_t result = S3_RESULT_FAIL;
2026 static result_handling_t result_handling[] = {
2027 { 200, 0, 0, S3_RESULT_OK },
2028 { 404, S3_ERROR_NoSuchBucket, 0, S3_RESULT_RETRY },
2029 RESULT_HANDLING_ALWAYS_RETRY,
2030 { 0, 0, 0, /* default: */ S3_RESULT_FAIL }
2032 regmatch_t pmatch[4];
2033 char *loc_end_open, *loc_content;
2034 CurlBuffer buf = {NULL, 0, 0, 0}, *ptr = NULL;
2035 s3_read_func read_func = NULL;
2036 s3_reset_func reset_func = NULL;
2037 s3_md5_func md5_func = NULL;
2038 s3_size_func size_func = NULL;
2040 g_assert(hdl != NULL);
2042 if (is_non_empty_string(hdl->bucket_location) &&
2043 0 != strcmp(AMAZON_WILDCARD_LOCATION, hdl->bucket_location)) {
2044 if (s3_bucket_location_compat(bucket)) {
2046 buf.buffer = g_strdup_printf(AMAZON_BUCKET_CONF_TEMPLATE, hdl->bucket_location);
2047 buf.buffer_len = (guint) strlen(buf.buffer);
2049 buf.max_buffer_size = buf.buffer_len;
2050 read_func = s3_buffer_read_func;
2051 reset_func = s3_buffer_reset_func;
2052 size_func = s3_buffer_size_func;
2053 md5_func = s3_buffer_md5_func;
2055 hdl->last_message = g_strdup_printf(_(
2056 "Location constraint given for Amazon S3 bucket, "
2057 "but the bucket name (%s) is not usable as a subdomain."), bucket);
2062 result = perform_request(hdl, "PUT", bucket, NULL, NULL, NULL,
2063 read_func, reset_func, size_func, md5_func, ptr,
2064 NULL, NULL, NULL, NULL, NULL, result_handling);
2066 if (result == S3_RESULT_OK ||
2067 (result != S3_RESULT_OK &&
2068 hdl->last_s3_error_code == S3_ERROR_BucketAlreadyOwnedByYou)) {
2069 /* verify the that the location constraint on the existing bucket matches
2070 * the one that's configured.
2072 if (is_non_empty_string(hdl->bucket_location)) {
2073 result = perform_request(hdl, "GET", bucket, NULL, "location", NULL,
2074 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2075 NULL, NULL, result_handling);
2077 result = perform_request(hdl, "GET", bucket, NULL, NULL, NULL,
2078 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
2079 NULL, NULL, result_handling);
2082 if (result == S3_RESULT_OK && is_non_empty_string(hdl->bucket_location)) {
2083 /* return to the default state of failure */
2084 result = S3_RESULT_FAIL;
2086 if (body) g_free(body);
2087 /* use strndup to get a null-terminated string */
2088 body = g_strndup(hdl->last_response_body, hdl->last_response_body_size);
2090 hdl->last_message = g_strdup(_("No body received for location request"));
2092 } else if ('\0' == body[0]) {
2093 hdl->last_message = g_strdup(_("Empty body received for location request"));
2097 if (!s3_regexec_wrap(&location_con_regex, body, 4, pmatch, 0)) {
2098 loc_end_open = find_regex_substring(body, pmatch[1]);
2099 loc_content = find_regex_substring(body, pmatch[3]);
2101 /* The case of an empty string is special because XML allows
2102 * "self-closing" tags
2104 if (0 == strcmp(AMAZON_WILDCARD_LOCATION, hdl->bucket_location) &&
2105 '/' != loc_end_open[0])
2106 hdl->last_message = g_strdup(_("A wildcard location constraint is "
2107 "configured, but the bucket has a non-empty location constraint"));
2108 else if (strcmp(AMAZON_WILDCARD_LOCATION, hdl->bucket_location)?
2109 strncmp(loc_content, hdl->bucket_location, strlen(hdl->bucket_location)) :
2110 ('\0' != loc_content[0]))
2111 hdl->last_message = g_strdup(_("The location constraint configured "
2112 "does not match the constraint currently on the bucket"));
2114 result = S3_RESULT_OK;
2116 hdl->last_message = g_strdup(_("Unexpected location response from Amazon S3"));
2122 if (body) g_free(body);
2124 return result == S3_RESULT_OK;
2129 s3_delete_bucket(S3Handle *hdl,
2132 return s3_delete(hdl, bucket, NULL);