Merge commit 'upstream/3.3.0'

[debian/amanda] / device-src / s3.c
diff --git a/device-src/s3.c b/device-src/s3.c

index 1658a37fa3f496c02533e1ad27ed19eb2198904c..cd234c335671a0ce8ec0a8204de537443ab4c0c2 100644 (file)
--- a/device-src/s3.c
+++ b/device-src/s3.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2008,2009 Zmanda, Inc.  All Rights Reserved.
+ * Copyright (c) 2008, 2009, 2010 Zmanda, Inc.  All Rights Reserved.
   *
   * This program is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 as published
@@ -92,6 +92,8 @@
      <LocationConstraint>%s</LocationConstraint>\n\
    </CreateBucketConfiguration>"
  
+#define AMAZON_STORAGE_CLASS_HEADER "x-amz-storage-class"
+
  #define AMAZON_WILDCARD_LOCATION "*"
  
  /* parameters for exponential backoff in the face of retriable errors */
@@ -109,6 +111,7 @@
  /* Results which should always be retried */
  #define RESULT_HANDLING_ALWAYS_RETRY \
          { 400,  S3_ERROR_RequestTimeout,     0,                          S3_RESULT_RETRY }, \
+        { 403,  S3_ERROR_RequestTimeTooSkewed,0,                          S3_RESULT_RETRY }, \
          { 409,  S3_ERROR_OperationAborted,   0,                          S3_RESULT_RETRY }, \
          { 412,  S3_ERROR_PreconditionFailed, 0,                          S3_RESULT_RETRY }, \
          { 500,  S3_ERROR_InternalError,      0,                          S3_RESULT_RETRY }, \
@@ -132,8 +135,12 @@ struct S3Handle {
      char *secret_key;
      char *user_token;
  
+    /* attributes for new objects */
      char *bucket_location;
-
+    char *storage_class;
+    char *host;
+    char *service_path;
+    gboolean use_subdomain;
      char *ca_info;
  
      CURL *curl;
@@ -152,6 +159,9 @@ struct S3Handle {
      guint last_num_retries;
      void *last_response_body;
      guint last_response_body_size;
+
+    /* offset with s3 */
+    time_t time_offset_with_s3;
  };
  
  typedef struct {
@@ -163,6 +173,8 @@ typedef struct {
      gboolean headers_done;
      gboolean int_write_done;
      char *etag;
+    /* Points to current handle: Added to get hold of s3 offset */
+    struct S3Handle *hdl;
  } S3InternalData;
  
  /* Callback function to examine headers one-at-a-time
@@ -254,7 +266,8 @@ lookup_result(const result_handling_t *result_handling,
  /*
   * Precompiled regular expressions */
  static regex_t etag_regex, error_name_regex, message_regex, subdomain_regex,
-    location_con_regex;
+    location_con_regex, date_sync_regex;
+
  
  /*
   * Utility functions
@@ -273,13 +286,23 @@ static gboolean is_non_empty_string(const char *str);
   *
   * @param hdl: the S3Handle object
   * @param verb: capitalized verb for this request ('PUT', 'GET', etc.)
+ * @param host: the host name to connect to, 's3.amazonaws.com'
+ * @param service_path: A path to add in the URL, or NULL for none.
   * @param bucket: the bucket being accessed, or NULL for none
   * @param key: the key being accessed, or NULL for none
   * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
- * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
+ * @param use_subdomain: if TRUE, a subdomain of 'host' will be used
+ * @param use_ssl: if TRUE, use 'https'
+ *
+ * !use_subdomain: http://host/service_path/bucket/key
+ * use_subdomain : http://bucket.host/service_path/key
+ *
   */
  static char *
-build_url(const char *bucket,
+build_url(
+      const char *host,
+      const char *service_path,
+      const char *bucket,
        const char *key,
        const char *subresource,
        const char *query,
@@ -301,7 +324,6 @@ build_url(const char *bucket,
   * @param key: the key being accessed, or NULL for none
   * @param subresource: the sub-resource being accessed (e.g. "acl"), or NULL for none
   * @param md5_hash: the MD5 hash of the request body, or NULL for none
- * @param use_subdomain: if TRUE, a subdomain of s3.amazonaws.com will be used
   */
  static struct curl_slist *
  authenticate_request(S3Handle *hdl,
@@ -309,8 +331,7 @@ authenticate_request(S3Handle *hdl,
                       const char *bucket,
                       const char *key,
                       const char *subresource,
-                     const char *md5_hash,
-                     gboolean use_subdomain);
+                     const char *md5_hash);
  
  
  
@@ -422,7 +443,7 @@ s3_error_code_from_name(char *s3_error_name)
  
      /* do a brute-force search through the list, since it's not sorted */
      for (i = 0; i < S3_ERROR_END; i++) {
-        if (g_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
+        if (g_ascii_strcasecmp(s3_error_name, s3_error_code_names[i]) == 0)
              return i;
      }
  
@@ -502,7 +523,10 @@ is_non_empty_string(const char *str)
  }
  
  static char *
-build_url(const char *bucket,
+build_url(
+      const char *host,
+      const char *service_path,
+      const char *bucket,
        const char *key,
        const char *subresource,
        const char *query,
@@ -521,14 +545,20 @@ build_url(const char *bucket,
  
      /* domain */
      if (use_subdomain && bucket)
-        g_string_append_printf(url, "%s.s3.amazonaws.com/", bucket);
+        g_string_append_printf(url, "%s.%s", bucket, host);
      else
-        g_string_append(url, "s3.amazonaws.com/");
+        g_string_append_printf(url, "%s", host);
+
+    if (service_path) {
+        g_string_append_printf(url, "%s/", service_path);
+    } else {
+       g_string_append(url, "/");
+    }
  
      /* path */
      if (!use_subdomain && bucket) {
          esc_bucket = curl_escape(bucket, 0);
-    if (!esc_bucket) goto cleanup;
+        if (!esc_bucket) goto cleanup;
          g_string_append_printf(url, "%s", esc_bucket);
          if (key)
              g_string_append(url, "/");
@@ -536,7 +566,7 @@ build_url(const char *bucket,
  
      if (key) {
          esc_key = curl_escape(key, 0);
-    if (!esc_key) goto cleanup;
+        if (!esc_key) goto cleanup;
          g_string_append_printf(url, "%s", esc_key);
      }
  
@@ -566,8 +596,7 @@ authenticate_request(S3Handle *hdl,
                       const char *bucket,
                       const char *key,
                       const char *subresource,
-                     const char *md5_hash,
-                     gboolean use_subdomain)
+                     const char *md5_hash)
  {
      time_t t;
      struct tm tmp;
@@ -606,11 +635,17 @@ authenticate_request(S3Handle *hdl,
  
      /* calculate the date */
      t = time(NULL);
+
+    /* sync clock with amazon s3 */
+    t = t + hdl->time_offset_with_s3;
+
  #ifdef _WIN32
      if (!gmtime_s(&tmp, &t)) g_debug("localtime error");
  #else
      if (!gmtime_r(&t, &tmp)) perror("localtime");
  #endif
+
+
      date = g_strdup_printf("%s, %02d %s %04d %02d:%02d:%02d GMT",
          wkday[tmp.tm_wday], tmp.tm_mday, month[tmp.tm_mon], 1900+tmp.tm_year,
          tmp.tm_hour, tmp.tm_min, tmp.tm_sec);
@@ -618,6 +653,7 @@ authenticate_request(S3Handle *hdl,
      g_string_append(auth_string, date);
      g_string_append(auth_string, "\n");
  
+    /* CanonicalizedAmzHeaders, sorted lexicographically */
      if (is_non_empty_string(hdl->user_token)) {
          g_string_append(auth_string, AMAZON_SECURITY_HEADER);
          g_string_append(auth_string, ":");
@@ -627,10 +663,20 @@ authenticate_request(S3Handle *hdl,
          g_string_append(auth_string, "\n");
      }
  
+    if (is_non_empty_string(hdl->storage_class)) {
+        g_string_append(auth_string, AMAZON_STORAGE_CLASS_HEADER);
+        g_string_append(auth_string, ":");
+        g_string_append(auth_string, hdl->storage_class);
+        g_string_append(auth_string, "\n");
+    }
+
      /* CanonicalizedResource */
+    if (hdl->service_path) {
+       g_string_append(auth_string, hdl->service_path);
+    }
      g_string_append(auth_string, "/");
      if (bucket) {
-        if (use_subdomain)
+        if (hdl->use_subdomain)
              g_string_append(auth_string, bucket);
          else {
              esc_bucket = curl_escape(bucket, 0);
@@ -639,7 +685,7 @@ authenticate_request(S3Handle *hdl,
          }
      }
  
-    if (bucket && (use_subdomain || key))
+    if (bucket && (hdl->use_subdomain || key))
          g_string_append(auth_string, "/");
  
      if (key) {
@@ -661,7 +707,6 @@ authenticate_request(S3Handle *hdl,
      HMAC_Final(&ctx, md->data, &md->len);
      HMAC_CTX_cleanup(&ctx);
      auth_base64 = s3_base64_encode(md);
-
      /* append the new headers */
      if (is_non_empty_string(hdl->user_token)) {
          /* Devpay headers are included in hash. */
@@ -674,6 +719,13 @@ authenticate_request(S3Handle *hdl,
          g_free(buf);
      }
  
+    if (is_non_empty_string(hdl->storage_class)) {
+       buf = g_strdup_printf(AMAZON_STORAGE_CLASS_HEADER ": %s", hdl->storage_class);
+       headers = curl_slist_append(headers, buf);
+       g_free(buf);
+    }
+
+
      buf = g_strdup_printf("Authorization: AWS %s:%s",
                            hdl->access_key, auth_base64);
      headers = curl_slist_append(headers, buf);
@@ -732,7 +784,7 @@ interpret_response(S3Handle *hdl,
  
      /* check ETag, if present */
      if (etag && content_md5 && 200 == response_code) {
-        if (etag && g_strcasecmp(etag, content_md5))
+        if (etag && g_ascii_strcasecmp(etag, content_md5))
              hdl->last_message = g_strdup("S3 Error: Possible data corruption (ETag returned by Amazon did not match the MD5 hash of the data sent)");
          else
              ret = FALSE;
@@ -885,7 +937,7 @@ size_t
  s3_counter_write_func(G_GNUC_UNUSED void *ptr, size_t size, size_t nmemb, void *stream)
  {
      gint64 *count = (gint64*) stream, inc = nmemb*size;
-    
+
      if (count) *count += inc;
      return inc;
  }
@@ -1030,13 +1082,13 @@ perform_request(S3Handle *hdl,
                  gpointer progress_data,
                  const result_handling_t *result_handling)
  {
-    gboolean use_subdomain;
      char *url = NULL;
      s3_result_t result = S3_RESULT_FAIL; /* assume the worst.. */
      CURLcode curl_code = CURLE_OK;
      char curl_error_buffer[CURL_ERROR_SIZE] = "";
      struct curl_slist *headers = NULL;
-    S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, FALSE, NULL};
+    /* Set S3Internal Data */
+    S3InternalData int_writedata = {{NULL, 0, 0, MAX_ERROR_RESPONSE_LEN}, NULL, NULL, NULL, FALSE, FALSE, NULL, hdl};
      gboolean should_retry;
      guint retries = 0;
      gulong backoff = EXPONENTIAL_BACKOFF_START_USEC;
@@ -1053,8 +1105,8 @@ perform_request(S3Handle *hdl,
  
      s3_reset(hdl);
  
-    use_subdomain = is_non_empty_string(hdl->bucket_location);
-    url = build_url(bucket, key, subresource, query, use_subdomain, hdl->use_ssl);
+    url = build_url(hdl->host, hdl->service_path, bucket, key, subresource,
+                    query, hdl->use_subdomain, hdl->use_ssl);
      if (!url) goto cleanup;
  
      /* libcurl may behave strangely if these are not set correctly */
@@ -1112,7 +1164,7 @@ perform_request(S3Handle *hdl,
  
          /* set up the request */
          headers = authenticate_request(hdl, verb, bucket, key, subresource,
-            md5_hash_b64, is_non_empty_string(hdl->bucket_location));
+            md5_hash_b64);
  
          if (hdl->use_ssl && hdl->ca_info) {
              if ((curl_code = curl_easy_setopt(hdl->curl, CURLOPT_CAINFO, hdl->ca_info)))
@@ -1301,16 +1353,40 @@ static size_t
  s3_internal_header_func(void *ptr, size_t size, size_t nmemb, void * stream)
  {
      static const char *final_header = "\r\n";
+    time_t remote_time_in_sec,local_time;
      char *header;
      regmatch_t pmatch[2];
      S3InternalData *data = (S3InternalData *) stream;
  
      header = g_strndup((gchar *) ptr, (gsize) size*nmemb);
+
      if (!s3_regexec_wrap(&etag_regex, header, 2, pmatch, 0))
-            data->etag = find_regex_substring(header, pmatch[1]);
+        data->etag = find_regex_substring(header, pmatch[1]);
      if (!strcmp(final_header, header))
          data->headers_done = TRUE;
  
+    /* If date header is found */
+    if (!s3_regexec_wrap(&date_sync_regex, header, 2, pmatch, 0)){
+        char *date = find_regex_substring(header, pmatch[1]);
+
+        /* Remote time is always in GMT: RFC 2616 */
+        /* both curl_getdate and time operate in UTC, so no timezone math is necessary */
+        if ( (remote_time_in_sec = curl_getdate(date, NULL)) < 0 ){
+            g_debug("Error: Conversion of remote time to seconds failed.");
+            data->hdl->time_offset_with_s3 = 0;
+        }else{
+            local_time = time(NULL);
+            /* Offset time */
+            data->hdl->time_offset_with_s3 = remote_time_in_sec - local_time;
+
+           if (data->hdl->verbose)
+               g_debug("Time Offset (remote - local) :%ld",(long)data->hdl->time_offset_with_s3);
+        }
+
+        g_free(date);
+    }
+
+    g_free(header);
      return size*nmemb;
  }
  
@@ -1326,6 +1402,7 @@ compile_regexes(void)
          {"<Message>[[:space:]]*([^<]*)[[:space:]]*</Message>", REG_EXTENDED | REG_ICASE, &message_regex},
          {"^[a-z0-9](-*[a-z0-9]){2,62}$", REG_EXTENDED | REG_NOSUB, &subdomain_regex},
          {"(/>)|(>([^<]*)</LocationConstraint>)", REG_EXTENDED | REG_ICASE, &location_con_regex},
+        {"^Date:(.*)\r",REG_EXTENDED | REG_ICASE | REG_NEWLINE, &date_sync_regex},
          {NULL, 0, NULL}
      };
      char regmessage[1024];
@@ -1358,6 +1435,9 @@ compile_regexes(void)
          {"(/>)|(>([^<]*)</LocationConstraint>)",
           G_REGEX_CASELESS,
           &location_con_regex},
+        {"^Date:(.*)\\r",
+         G_REGEX_OPTIMIZE | G_REGEX_CASELESS,
+         &date_sync_regex},
          {NULL, 0, NULL}
    };
    int i;
@@ -1413,10 +1493,15 @@ s3_bucket_location_compat(const char *bucket)
  S3Handle *
  s3_open(const char *access_key,
          const char *secret_key,
+        const char *host,
+        const char *service_path,
+        const gboolean use_subdomain,
          const char *user_token,
          const char *bucket_location,
+        const char *storage_class,
          const char *ca_info
-        ) {
+        )
+{
      S3Handle *hdl;
  
      hdl = g_new0(S3Handle, 1);
@@ -1435,9 +1520,27 @@ s3_open(const char *access_key,
      /* NULL is okay */
      hdl->bucket_location = g_strdup(bucket_location);
  
+    /* NULL is ok */
+    hdl->storage_class = g_strdup(storage_class);
+
      /* NULL is okay */
      hdl->ca_info = g_strdup(ca_info);
  
+    if (!is_non_empty_string(host))
+       host = "s3.amazonaws.com";
+    hdl->host = g_strdup(host);
+    hdl->use_subdomain = use_subdomain ||
+                        (strcmp(host, "s3.amazonaws.com") == 0 &&
+                         is_non_empty_string(hdl->bucket_location));
+    if (service_path) {
+       if (service_path[0] != '/')
+           hdl->service_path = g_strdup_printf("/%s", service_path);
+       else
+           hdl->service_path = g_strdup(service_path);
+    } else {
+       hdl->service_path = NULL;
+    }
+
      hdl->curl = curl_easy_init();
      if (!hdl->curl) goto error;
  
@@ -1458,6 +1561,9 @@ s3_free(S3Handle *hdl)
          g_free(hdl->secret_key);
          if (hdl->user_token) g_free(hdl->user_token);
          if (hdl->bucket_location) g_free(hdl->bucket_location);
+        if (hdl->storage_class) g_free(hdl->storage_class);
+        if (hdl->host) g_free(hdl->host);
+        if (hdl->service_path) g_free(hdl->service_path);
          if (hdl->curl) curl_easy_cleanup(hdl->curl);
  
          g_free(hdl);
@@ -1637,6 +1743,7 @@ struct list_keys_thunk {
  
      gboolean is_truncated;
      gchar *next_marker;
+    guint64 size;
  
      gboolean want_text;
  
@@ -1657,17 +1764,19 @@ list_start_element(GMarkupParseContext *context G_GNUC_UNUSED,
      struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
  
      thunk->want_text = 0;
-    if (g_strcasecmp(element_name, "contents") == 0) {
+    if (g_ascii_strcasecmp(element_name, "contents") == 0) {
          thunk->in_contents = 1;
-    } else if (g_strcasecmp(element_name, "commonprefixes") == 0) {
+    } else if (g_ascii_strcasecmp(element_name, "commonprefixes") == 0) {
          thunk->in_common_prefixes = 1;
-    } else if (g_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
+    } else if (g_ascii_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
          thunk->want_text = 1;
-    } else if (g_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
+    } else if (g_ascii_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
          thunk->want_text = 1;
-    } else if (g_strcasecmp(element_name, "istruncated")) {
+    } else if (g_ascii_strcasecmp(element_name, "size") == 0 && thunk->in_contents) {
          thunk->want_text = 1;
-    } else if (g_strcasecmp(element_name, "nextmarker")) {
+    } else if (g_ascii_strcasecmp(element_name, "istruncated")) {
+        thunk->want_text = 1;
+    } else if (g_ascii_strcasecmp(element_name, "nextmarker")) {
          thunk->want_text = 1;
      }
  }
@@ -1680,20 +1789,23 @@ list_end_element(GMarkupParseContext *context G_GNUC_UNUSED,
  {
      struct list_keys_thunk *thunk = (struct list_keys_thunk *)user_data;
  
-    if (g_strcasecmp(element_name, "contents") == 0) {
+    if (g_ascii_strcasecmp(element_name, "contents") == 0) {
          thunk->in_contents = 0;
-    } else if (g_strcasecmp(element_name, "commonprefixes") == 0) {
+    } else if (g_ascii_strcasecmp(element_name, "commonprefixes") == 0) {
          thunk->in_common_prefixes = 0;
-    } else if (g_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
+    } else if (g_ascii_strcasecmp(element_name, "key") == 0 && thunk->in_contents) {
          thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
          thunk->text = NULL;
-    } else if (g_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
+    } else if (g_ascii_strcasecmp(element_name, "size") == 0 && thunk->in_contents) {
+        thunk->size += g_ascii_strtoull (thunk->text, NULL, 10);
+        thunk->text = NULL;
+    } else if (g_ascii_strcasecmp(element_name, "prefix") == 0 && thunk->in_common_prefixes) {
          thunk->filename_list = g_slist_prepend(thunk->filename_list, thunk->text);
          thunk->text = NULL;
-    } else if (g_strcasecmp(element_name, "istruncated") == 0) {
-        if (thunk->text && g_strncasecmp(thunk->text, "false", 5) != 0)
+    } else if (g_ascii_strcasecmp(element_name, "istruncated") == 0) {
+        if (thunk->text && g_ascii_strncasecmp(thunk->text, "false", 5) != 0)
              thunk->is_truncated = TRUE;
-    } else if (g_strcasecmp(element_name, "nextmarker") == 0) {
+    } else if (g_ascii_strcasecmp(element_name, "nextmarker") == 0) {
          if (thunk->next_marker) g_free(thunk->next_marker);
          thunk->next_marker = thunk->text;
          thunk->text = NULL;
@@ -1774,7 +1886,8 @@ s3_list_keys(S3Handle *hdl,
                const char *bucket,
                const char *prefix,
                const char *delimiter,
-              GSList **list)
+              GSList **list,
+              guint64 *total_size)
  {
      /*
       * max len of XML variables:
@@ -1801,6 +1914,7 @@ s3_list_keys(S3Handle *hdl,
      thunk.filename_list = NULL;
      thunk.text = NULL;
      thunk.next_marker = NULL;
+    thunk.size = 0;
  
      /* Loop until S3 has given us the entire picture */
      do {
@@ -1847,6 +1961,9 @@ cleanup:
          return FALSE;
      } else {
          *list = thunk.filename_list;
+        if(total_size) {
+            *total_size = thunk.size;
+        }
          return TRUE;
      }
  }
@@ -1947,18 +2064,21 @@ s3_make_bucket(S3Handle *hdl,
                   NULL, NULL, NULL, NULL, NULL, result_handling);
  
     if (result == S3_RESULT_OK ||
-       (is_non_empty_string(hdl->bucket_location) && result != S3_RESULT_OK
-         && hdl->last_s3_error_code == S3_ERROR_BucketAlreadyOwnedByYou)) {
+       (result != S3_RESULT_OK &&
+        hdl->last_s3_error_code == S3_ERROR_BucketAlreadyOwnedByYou)) {
          /* verify the that the location constraint on the existing bucket matches
           * the one that's configured.
           */
-        result = perform_request(hdl, "GET", bucket, NULL, "location", NULL,
-                                 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                                 NULL, NULL, result_handling);
+       if (is_non_empty_string(hdl->bucket_location)) {
+            result = perform_request(hdl, "GET", bucket, NULL, "location", NULL,
+                                     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                                     NULL, NULL, result_handling);
+       } else {
+            result = perform_request(hdl, "GET", bucket, NULL, NULL, NULL,
+                                     NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                                     NULL, NULL, result_handling);
+       }
  
-        /* note that we can check only one of the three AND conditions above 
-         * and infer that the others are true
-         */
          if (result == S3_RESULT_OK && is_non_empty_string(hdl->bucket_location)) {
              /* return to the default state of failure */
              result = S3_RESULT_FAIL;