module AWS module S3 # S3Objects represent the data you store on S3. They have a key (their name) and a value (their data). All objects belong to a # bucket. # # You can store an object on S3 by specifying a key, its data and the name of the bucket you want to put it in: # # S3Object.store('me.jpg', open('headshot.jpg'), 'photos') # # The content type of the object will be inferred by its extension. If the appropriate content type can not be inferred, S3 defaults # to binary/octet-stream. # # If you want to override this, you can explicitly indicate what content type the object should have with the :content_type option: # # file = 'black-flowers.m4a' # S3Object.store( # file, # open(file), # 'jukebox', # :content_type => 'audio/mp4a-latm' # ) # # You can read more about storing files on S3 in the documentation for S3Object.store. # # If you just want to fetch an object you've stored on S3, you just specify its name and its bucket: # # picture = S3Object.find 'headshot.jpg', 'photos' # # N.B. The actual data for the file is not downloaded in both the example where the file appeared in the bucket and when fetched directly. # You get the data for the file like this: # # picture.value # # You can fetch just the object's data directly: # # S3Object.value 'headshot.jpg', 'photos' # # Or stream it by passing a block to stream: # # open('song.mp3', 'w') do |file| # S3Object.stream('song.mp3', 'jukebox') do |chunk| # file.write chunk # end # end # # The data of the file, once download, is cached, so subsequent calls to value won't redownload the file unless you # tell the object to reload its value: # # # Redownloads the file's data # song.value(:reload) # # Other functionality includes: # # # Check if an object exists? # S3Object.exists? 'headshot.jpg', 'photos' # # # Copying an object # S3Object.copy 'headshot.jpg', 'headshot2.jpg', 'photos' # # # Renaming an object # S3Object.rename 'headshot.jpg', 'portrait.jpg', 'photos' # # # Deleting an object # S3Object.delete 'headshot.jpg', 'photos' # # ==== More about objects and their metadata # # You can find out the content type of your object with the content_type method: # # song.content_type # # => "audio/mpeg" # # You can change the content type as well if you like: # # song.content_type = 'application/pdf' # song.store # # (Keep in mind that due to limitiations in S3's exposed API, the only way to change things like the content_type # is to PUT the object onto S3 again. In the case of large files, this will result in fully re-uploading the file.) # # A bevie of information about an object can be had using the about method: # # pp song.about # {"last-modified" => "Sat, 28 Oct 2006 21:29:26 GMT", # "content-type" => "binary/octet-stream", # "etag" => "\"dc629038ffc674bee6f62eb64ff3a\"", # "date" => "Sat, 28 Oct 2006 21:30:41 GMT", # "x-amz-request-id" => "B7BC68F55495B1C8", # "server" => "AmazonS3", # "content-length" => "3418766"} # # You can get and set metadata for an object: # # song.metadata # # => {} # song.metadata[:album] = "A River Ain't Too Much To Love" # # => "A River Ain't Too Much To Love" # song.metadata[:released] = 2005 # pp song.metadata # {"x-amz-meta-released" => 2005, # "x-amz-meta-album" => "A River Ain't Too Much To Love"} # song.store # # That metadata will be saved in S3 and is hence forth available from that object: # # song = S3Object.find('black-flowers.mp3', 'jukebox') # pp song.metadata # {"x-amz-meta-released" => "2005", # "x-amz-meta-album" => "A River Ain't Too Much To Love"} # song.metadata[:released] # # => "2005" # song.metadata[:released] = 2006 # pp song.metadata # {"x-amz-meta-released" => 2006, # "x-amz-meta-album" => "A River Ain't Too Much To Love"} class S3Object < Base class << self # Returns the value of the object with key in the specified bucket. # # === Conditional GET options # # * :if_modified_since - Return the object only if it has been modified since the specified time, # otherwise return a 304 (not modified). # * :if_unmodified_since - Return the object only if it has not been modified since the specified time, # otherwise raise PreconditionFailed. # * :if_match - Return the object only if its entity tag (ETag) is the same as the one specified, # otherwise raise PreconditionFailed. # * :if_none_match - Return the object only if its entity tag (ETag) is different from the one specified, # otherwise return a 304 (not modified). # # === Other options # * :range - Return only the bytes of the object in the specified range. def value(key, bucket = nil, options = {}, &block) Value.new(get(path!(bucket, key, options), options, &block)) end def stream(key, bucket = nil, options = {}, &block) value(key, bucket, options) do |response| response.read_body(&block) end end # Returns the object whose key is name in the specified bucket. If the specified key does not # exist, a NoSuchKey exception will be raised. def find(key, bucket = nil) # N.B. This is arguably a hack. From what the current S3 API exposes, when you retrieve a bucket, it # provides a listing of all the files in that bucket (assuming you haven't limited the scope of what it returns). # Each file in the listing contains information about that file. It is from this information that an S3Object is built. # # If you know the specific file that you want, S3 allows you to make a get request for that specific file and it returns # the value of that file in its response body. This response body is used to build an S3Object::Value object. # If you want information about that file, you can make a head request and the headers of the response will contain # information about that file. There is no way, though, to say, give me the representation of just this given file the same # way that it would appear in a bucket listing. # # When fetching a bucket, you can provide options which narrow the scope of what files should be returned in that listing. # Of those options, one is marker which is a string and instructs the bucket to return only object's who's key comes after # the specified marker according to alphabetic order. Another option is max-keys which defaults to 1000 but allows you # to dictate how many objects should be returned in the listing. With a combination of marker and max-keys you can # *almost* specify exactly which file you'd like it to return, but marker is not inclusive. In other words, if there is a bucket # which contains three objects who's keys are respectively 'a', 'b' and 'c', then fetching a bucket listing with marker set to 'b' will only # return 'c', not 'b'. # # Given all that, my hack to fetch a bucket with only one specific file, is to set the marker to the result of calling String#previous on # the desired object's key, which functionally makes the key ordered one degree higher than the desired object key according to # alphabetic ordering. This is a hack, but it should work around 99% of the time. I can't think of a scenario where it would return # something incorrect. # We need to ensure the key doesn't have extended characters but not uri escape it before doing the lookup and comparing since if the object exists, # the key on S3 will have been normalized key = key.remove_extended unless key.utf8? bucket = Bucket.find(bucket_name(bucket), :marker => key.previous, :max_keys => 1) # If our heuristic failed, trigger a NoSuchKey exception if (object = bucket.objects.first) && object.key == key object else raise NoSuchKey.new("No such key `#{key}'", bucket) end end # Makes a copy of the object with key to copy_key, preserving the ACL of the existing object if the :copy_acl option is true (default false). def copy(key, copy_key, bucket = nil, options = {}) bucket = bucket_name(bucket) source_key = path!(bucket, key) default_options = {'x-amz-copy-source' => source_key} target_key = path!(bucket, copy_key) returning put(target_key, default_options) do acl(copy_key, bucket, acl(key, bucket)) if options[:copy_acl] end end # Rename the object with key from to have key in to. def rename(from, to, bucket = nil, options = {}) copy(from, to, bucket, options) delete(from, bucket) end # Fetch information about the object with key from bucket. Information includes content type, content length, # last modified time, and others. # # If the specified key does not exist, NoSuchKey is raised. def about(key, bucket = nil, options = {}) response = head(path!(bucket, key, options), options) raise NoSuchKey.new("No such key `#{key}'", bucket) if response.code == 404 About.new(response.headers) end # Checks if the object with key in bucket exists. # # S3Object.exists? 'kiss.jpg', 'marcel' # # => true def exists?(key, bucket = nil) about(key, bucket) true rescue NoSuchKey false end # Delete object with key from bucket. def delete(key, bucket = nil, options = {}) # A bit confusing. Calling super actually makes an HTTP DELETE request. The delete method is # defined in the Base class. It happens to have the same name. super(path!(bucket, key, options), options).success? end # When storing an object on the S3 servers using S3Object.store, the data argument can be a string or an I/O stream. # If data is an I/O stream it will be read in segments and written to the socket incrementally. This approach # may be desirable for very large files so they are not read into memory all at once. # # # Non streamed upload # S3Object.store('greeting.txt', 'hello world!', 'marcel') # # # Streamed upload # S3Object.store('roots.mpeg', open('roots.mpeg'), 'marcel') def store(key, data, bucket = nil, options = {}) validate_key!(key) # Must build path before infering content type in case bucket is being used for options path = path!(bucket, key, options) infer_content_type!(key, options) put(path, options, data) # Don't call .success? on response. We want to get the etag. end alias_method :create, :store alias_method :save, :store # All private objects are accessible via an authenticated GET request to the S3 servers. You can generate an # authenticated url for an object like this: # # S3Object.url_for('beluga_baby.jpg', 'marcel_molina') # # By default authenticated urls expire 5 minutes after they were generated. # # Expiration options can be specified either with an absolute time since the epoch with the :expires options, # or with a number of seconds relative to now with the :expires_in options: # # # Absolute expiration date # # (Expires January 18th, 2038) # doomsday = Time.mktime(2038, 1, 18).to_i # S3Object.url_for('beluga_baby.jpg', # 'marcel', # :expires => doomsday) # # # Expiration relative to now specified in seconds # # (Expires in 3 hours) # S3Object.url_for('beluga_baby.jpg', # 'marcel', # :expires_in => 60 * 60 * 3) # # You can specify whether the url should go over SSL with the :use_ssl option: # # # Url will use https protocol # S3Object.url_for('beluga_baby.jpg', # 'marcel', # :use_ssl => true) # # By default, the ssl settings for the current connection will be used. # # If you have an object handy, you can use its url method with the same objects: # # song.url(:expires_in => 30) # # To get an unauthenticated url for the object, such as in the case # when the object is publicly readable, pass the # :authenticated option with a value of false. # # S3Object.url_for('beluga_baby.jpg', # 'marcel', # :authenticated => false) # # => http://s3.amazonaws.com/marcel/beluga_baby.jpg def url_for(name, bucket = nil, options = {}) connection.url_for(path!(bucket, name, options), options) # Do not normalize options end def path!(bucket, name, options = {}) #:nodoc: # We're using the second argument for options if bucket.is_a?(Hash) options.replace(bucket) bucket = nil end '/' << File.join(bucket_name(bucket), name) end private def validate_key!(key) raise InvalidKeyName.new(key) unless key && key.size <= 1024 end def infer_content_type!(key, options) return if options.has_key?(:content_type) if mime_type = MIME::Types.type_for(key).first options[:content_type] = mime_type.content_type end end end class Value < String #:nodoc: attr_reader :response def initialize(response) super(response.body) @response = response end end class About < Hash #:nodoc: def initialize(headers) super() replace(headers) metadata end def [](header) super(header.to_header) end def []=(header, value) super(header.to_header, value) end def to_headers self.merge(metadata.to_headers) end def metadata Metadata.new(self) end memoized :metadata end class Metadata < Hash #:nodoc: HEADER_PREFIX = 'x-amz-meta-' SIZE_LIMIT = 2048 # 2 kilobytes def initialize(headers) @headers = headers super() extract_metadata! end def []=(header, value) super(header_name(header.to_header), value) end def [](header) super(header_name(header.to_header)) end def to_headers validate! self end private attr_reader :headers def extract_metadata! headers.keys.grep(Regexp.new(HEADER_PREFIX)).each do |metadata_header| self[metadata_header] = headers.delete(metadata_header) end end def header_name(name) name =~ Regexp.new(HEADER_PREFIX) ? name : [HEADER_PREFIX, name].join end def validate! invalid_headers = inject([]) do |invalid, (name, value)| invalid << name unless valid?(value) invalid end raise InvalidMetadataValue.new(invalid_headers) unless invalid_headers.empty? end def valid?(value) value && value.size < SIZE_LIMIT end end attr_writer :value #:nodoc: # Provides readers and writers for all valid header settings listed in valid_header_settings. # Subsequent saves to the object after setting any of the valid headers settings will be reflected in # information about the object. # # some_s3_object.content_type # => nil # some_s3_object.content_type = 'text/plain' # => "text/plain" # some_s3_object.content_type # => "text/plain" # some_s3_object.store # S3Object.about(some_s3_object.key, some_s3_object.bucket.name)['content-type'] # => "text/plain" include SelectiveAttributeProxy #:nodoc proxy_to :about, :exclusively => false # Initializes a new S3Object. def initialize(attributes = {}, &block) super self.value = attributes.delete(:value) self.bucket = attributes.delete(:bucket) yield self if block_given? end # The current object's bucket. If no bucket has been set, a NoBucketSpecified exception will be raised. For # cases where you are not sure if the bucket has been set, you can use the belongs_to_bucket? method. def bucket @bucket or raise NoBucketSpecified end # Sets the bucket that the object belongs to. def bucket=(bucket) @bucket = bucket self end # Returns true if the current object has been assigned to a bucket yet. Objects must belong to a bucket before they # can be saved onto S3. def belongs_to_bucket? !@bucket.nil? end alias_method :orphan?, :belongs_to_bucket? # Returns the key of the object. If the key is not set, a NoKeySpecified exception will be raised. For cases # where you are not sure if the key has been set, you can use the key_set? method. Objects must have a key # set to be saved onto S3. Objects which have already been saved onto S3 will always have their key set. def key attributes['key'] or raise NoKeySpecified end # Sets the key for the current object. def key=(value) attributes['key'] = value end # Returns true if the current object has had its key set yet. Objects which have already been saved will # always return true. This method is useful for objects which have not been saved yet so you know if you # need to set the object's key since you can not save an object unless its key has been set. # # object.store if object.key_set? && object.belongs_to_bucket? def key_set? !attributes['key'].nil? end # Lazily loads object data. # # Force a reload of the data by passing :reload. # # object.value(:reload) # # When loading the data for the first time you can optionally yield to a block which will # allow you to stream the data in segments. # # object.value do |segment| # send_data segment # end # # The full list of options are listed in the documentation for its class method counter part, S3Object::value. def value(options = {}, &block) if options.is_a?(Hash) reload = !options.empty? else reload = options options = {} end memoize(reload) do self.class.stream(key, bucket.name, options, &block) end end # Interface to information about the current object. Information is read only, though some of its data # can be modified through specific methods, such as content_type and content_type=. # # pp some_object.about # {"last-modified" => "Sat, 28 Oct 2006 21:29:26 GMT", # "x-amz-id-2" => "LdcQRk5qLwxJQiZ8OH50HhoyKuqyWoJ67B6i+rOE5MxpjJTWh1kCkL+I0NQzbVQn", # "content-type" => "binary/octet-stream", # "etag" => "\"dc629038ffc674bee6f62eb68454ff3a\"", # "date" => "Sat, 28 Oct 2006 21:30:41 GMT", # "x-amz-request-id" => "B7BC68F55495B1C8", # "server" => "AmazonS3", # "content-length" => "3418766"} # # some_object.content_type # # => "binary/octet-stream" # some_object.content_type = 'audio/mpeg' # some_object.content_type # # => 'audio/mpeg' # some_object.store def about stored? ? self.class.about(key, bucket.name) : About.new end memoized :about # Interface to viewing and editing metadata for the current object. To be treated like a Hash. # # some_object.metadata # # => {} # some_object.metadata[:author] = 'Dave Thomas' # some_object.metadata # # => {"x-amz-meta-author" => "Dave Thomas"} # some_object.metadata[:author] # # => "Dave Thomas" def metadata about.metadata end memoized :metadata # Saves the current object with the specified options. Valid options are listed in the documentation for S3Object::store. def store(options = {}) raise DeletedObject if frozen? options = about.to_headers.merge(options) if stored? response = self.class.store(key, value, bucket.name, options) bucket.update(:stored, self) response.success? end alias_method :create, :store alias_method :save, :store # Deletes the current object. Trying to save an object after it has been deleted with # raise a DeletedObject exception. def delete bucket.update(:deleted, self) freeze self.class.delete(key, bucket.name) end # Copies the current object, given it the name copy_name. Keep in mind that due to limitations in # S3's API, this operation requires retransmitting the entire object to S3. def copy(copy_name, options = {}) self.class.copy(key, copy_name, bucket.name, options) end # Rename the current object. Keep in mind that due to limitations in S3's API, this operation requires # retransmitting the entire object to S3. def rename(to, options = {}) self.class.rename(key, to, bucket.name, options) end def etag(reload = false) return nil unless stored? memoize(reload) do reload ? about(reload)['etag'][1...-1] : attributes['e_tag'][1...-1] end end # Returns the owner of the current object. def owner Owner.new(attributes['owner']) end memoized :owner # Generates an authenticated url for the current object. Accepts the same options as its class method # counter part S3Object.url_for. def url(options = {}) self.class.url_for(key, bucket.name, options) end # Returns true if the current object has been stored on S3 yet. def stored? !attributes['e_tag'].nil? end def ==(s3object) #:nodoc: path == s3object.path end def path #:nodoc: self.class.path!( belongs_to_bucket? ? bucket.name : '(no bucket)', key_set? ? key : '(no key)' ) end # Don't dump binary data :) def inspect #:nodoc: "#<%s:0x%s '%s'>" % [self.class, object_id, path] end private def proxiable_attribute?(name) valid_header_settings.include?(name) end def valid_header_settings %w(cache_control content_type content_length content_md5 content_disposition content_encoding expires) end end end end