# Storage

The storage abstraction provides a high-level interface to retrieve data that is being stored during execution.

# Storage

machinable.Storage(url=None)

Interface to manage storage data

The interface is read-only. machinable will never modify any of the collected data.

# Examples

import machinable as ml
storage = ml.Storage('~/data')
storage.find('t3s42Q)

# Arguments

  • url: String, URL of storage location.

# add

add(self, url:str) -> 'Storage'

Adds a URL to the search index

# Arguments

  • url: String, URL to add

Returns the number of experiments found

# find

find(self, experiment:Union[str, NoneType]=None)

Finds an experiment

# Arguments

  • experiment: String, experiment ID. If None, all available experiments will be returned.

# Returns

Instance or collection of machinable.storage.ExperimentStorage

# find_all

find_all(self)

Returns a collection of all available experiments in the storage

# Returns

Instance or collection of machinable.storage.ExperimentStorage

# find_by_directory

find_by_directory(self, directory)

Finds experiments in a directory

# Arguments

  • directory: String

# Returns

Instance or collection of machinable.storage.ExperimentStorage

# find_many

find_many(self, experiments:Union[List[str], Tuple[str]])

Finds many experiments

# Arguments

  • experiments: List of experiment IDs

# Returns

Instance or collection of machinable.storage.ExperimentStorage

# reset

reset(self) -> 'Storage'

Resets the interface and discards the cache

# ExperimentStorage

machinable.storage.experiment.ExperimentStorage(url:str)

# code_backup property

True if code backup is available

# code_version property

Returns information about the source code version as a dictionary

project:
  path: VCS url
  commit: Commit hash or None
  is_dirty: Whether everything has been commited to VCS
vendor: List of vendor project information with the same structure as above

# components property

List of components

# host property

Returns information on the experiment host

# id property

6-digit experiment ID, e.g. F4K3r6

# output property

Returns the captured output

# schedule property

Returns the experiment schedule

# seed property

Returns the global random seed used in the experiment

# started_at property

Start of execution

# timestamp property

Returns the timestamp of the experiment

# file

file(self, filepath, reload=False)

Returns the content of a file in the experiment storage

# Arguments

  • filepath: Relative filepath
  • reload: If True, cache will be ignored

# ComponentStorage

machinable.storage.component.ComponentStorage(url:str, experiment=None)

# config property

Returns the component config

# experiment property

The experiment of this observation

# finished_at property

Returns the finishing time

# flags property

Returns the component flags

# heartbeat_at property

Returns the last heartbeat time

# host property

Returns information of the host

# id property

Returns the component storage ID

# log property

Returns the content of the log file

# output property

Returns the content of the log file

# records property

Returns the record interface

# started_at property

Returns the starting time

# state property

Returns information of component state

# tuning property

True if experiment is a tuning experiment

# file

file(self, filepath, default=<object object at 0x7f000f949520>, reload=False)

Returns the content of a file in the component storage

# Arguments

  • filepath: Relative filepath
  • reload: If True, cache will be ignored

# get_records_writer

get_records_writer(self, scope=None)

Returns a record writer

# Arguments

  • scope: The name of the record writer

# has_records

has_records(self, scope='default')

Returns True if records of given scope exist

# is_alive

is_alive(self)

True if not finished and last heartbeat occurred less than 30 seconds ago

# is_finished

is_finished(self)

True if finishing time has been written

# store

store(self, name=None)

Retrieves element from the write

This is the counterpart to the store.write method.

# Arguments

  • name: Key or filename of the object that is to be retrieved. If None, a list of available objects is returned

# Collection

machinable.storage.collections.Collection(items=None)

# items property

Items of the collection

# all

all(self)

Get all of the items in the collection.

Returns the underlying list represented by the collection

Collection([1, 2, 3]).all()

#### [1, 2, 3]

# append

append(self, value)

Add an item onto the end of the collection.

# Arguments

  • value: The value to push
collection = Collection([1, 2, 3, 4])

collection.push(5)

collection.all()

#### [1, 2, 3, 4, 5]

# as_json

as_json(self, **options)

Converts the collection into JSON

# Arguments

  • options: JSON encoding options
collection = Collection([{'name': 'Desk', 'price': 200}])

collection.to_json()

#### '[{"name": "Desk", "price": 200}]'

# as_numpy

as_numpy(self)

Converts the collection into a numpy array

# as_table

as_table(self, mode='html', headers=(), **kwargs)

Converts the collection into a table

# Arguments

  • mode: String 'html' or any other mode of the tabulate package
  • headers: Optional header row **kwargs: Options to pass to tabulate

# avg

avg(self, key=None)

Get the average value of a given key.

# Arguments

  • key: The key to get the average for
Collection([1, 2, 3, 4, 5]).avg()
#### 3

If the collection contains nested objects or dictionaries, you must pass a key to use for determining which values to calculate the average:

collection = Collection([
    {'name': 'JavaScript: The Good Parts', 'pages': 176},
    {'name': 'JavaScript: The Defnitive Guide', 'pages': 1096}
])
#### 636
collection.avg('pages')

# chunk

chunk(self, size)

Chunk the underlying collection.

The chunk method breaks the collection into multiple, smaller collections of a given size:

collection = Collection([1, 2, 3, 4, 5, 6, 7])

chunks = collection.chunk(4)

chunks.serialize()

#### [[1, 2, 3, 4], [5, 6, 7]]

# Arguments

  • size: The chunk size

# collapse

collapse(self)

Collapses a collection of lists into a flat collection

collection = Collection([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
collapsed = collection.collapse()
collapsed.all()
#### [1, 2, 3, 4, 5, 6, 7, 8, 9]

# contains

contains(self, key, value=None)

Determines if an element is in the collection

# Arguments

  • key: Integer|String|callable The element
  • value: The value of the element
collection = Collection(['foo', 'bar'])

collection.contains('foo')

#### True

You can also use the in keyword:

'foo' in collection

#### True

You can also pass a key / value pair to the contains method, which will determine if the given pair exists in the collection:

collection = Collection([
    {'name': 'John', 'id': 1},
    {'name': 'Jane', 'id': 2}
])

collection.contains('name', 'Simon')

#### False

Finally, you may also pass a callback to the contains method to perform your own truth test:

collection = Collection([1, 2, 3, 4, 5])

collection.contains(lambda item: item > 5)

#### False

# count

count(self)

Returns the total number of items in the collection:

collection = Collection([1, 2, 3, 4])

collection.count()

#### 4

The len function can also be used:

len(collection)

#### 4

# diff

diff(self, items)

Compares the collection against another collection, a list or a dict

# Arguments

  • items: The items to diff with
collection = Collection([1, 2, 3, 4, 5])
diff = collection.diff([2, 4, 6, 8])
diff.all()
#### [1, 3, 5]

# each

each(self, callback)

Iterates over the items in the collection and passes each item to a given callback

# Arguments

  • callback: callable The callback to execute
collection = Collection([1, 2, 3])
collection.each(lambda x: x + 3)

Return False from your callback to break out of the loop:

observations.each(lambda data: data.save() if data.name == 'mnist' else False)

TIP

It only applies the callback but does not modify the collection's items. Use the transform() method to modify the collection.

# empty

empty(self)

Returns True if the collection is empty; otherwise, False is returned

# every

every(self, step, offset=0)

Create a new collection consisting of every n-th element.

# Arguments

  • step: int The step size
  • offset: int The start offset
collection = Collection(['a', 'b', 'c', 'd', 'e', 'f'])

collection.every(4).all()

#### ['a', 'e']

You can optionally pass the offset as the second argument:

collection.every(4, 1).all()

#### ['b', 'f']

# filter

filter(self, callback=None)

Filters the collection by a given callback, keeping only those items that pass a given truth test

# Arguments

  • callback: callable|None The filter callback
collection = Collection([1, 2, 3, 4])

filtered = collection.filter(lambda item: item > 2)

filtered.all()

#### [3, 4]

# first

first(self, callback=None, default=None)

Returns the first element in the collection that passes a given truth test

# Arguments

  • callback: Optional callable truth condition to find first element
  • default: A default value
collection = Collection([1, 2, 3, 4])

collection.first(lambda item: item > 2)

#### 3

You can also call the first method with no arguments to get the first element in the collection. If the collection is empty, None is returned:

collection.first()

#### 1

# flatten

flatten(self)

Flattens a multi-dimensional collection into a single dimension

collection = Collection([1, 2, [3, 4, 5, {'foo': 'bar'}]])

flattened = collection.flatten()

flattened.all()

#### [1, 2, 3, 4, 5, 'bar']

# forget

forget(self, *keys)

Remove an item from the collection by key.

# Arguments

  • keys: The keys to remove
collection = Collection([1, 2, 3, 4, 5])
collection.forget(1)
collection.all()
#### [1, 3, 4, 5]

WARNING

Unlike most other collection methods, forget does not return a new modified collection; it modifies the collection it is called on.

# get

get(self, key, default=None)

Returns the item at a given key. If the key does not exist, None is returned

# Arguments

  • key: The index of the element
  • default: The default value to return
collection = Collection([1, 2, 3])
collection.get(3)
#### None

You can optionally pass a default value as the second argument:

collection = Collection([1, 2, 3])
collection.get(3, 'default-value')
#### default-value

# implode

implode(self, value, glue='')

Joins the items in a collection. Its arguments depend on the type of items in the collection.

# Arguments

  • value: The value
  • glue: The glue

If the collection contains dictionaries or objects, you must pass the key of the attributes you wish to join, and the "glue" string you wish to place between the values:

collection = Collection([
    {'account_id': 1, 'product': 'Desk'},
    {'account_id': 2, 'product': 'Chair'}
])

collection.implode('product', ', ')

#### Desk, Chair

If the collection contains simple strings, simply pass the "glue" as the only argument to the method:

collection = Collection(['foo', 'bar', 'baz'])

collection.implode('-')

#### foo-bar-baz

# last

last(self, callback=None, default=None)

Returns the last element in the collection that passes a given truth test

# Arguments

  • callback: Optional callable truth condition
  • default: The default value
collection = Collection([1, 2, 3, 4])

last = collection.last(lambda item: item < 3)

#### 2

You can also call the last method with no arguments to get the last element in the collection. If the collection is empty, None is returned:

collection.last()

#### 4

# map

map(self, callback)

Iterates through the collection and passes each value to the given callback. The callback is free to modify the item and return it, thus forming a new collection of modified items

# Arguments

  • callback: The map function
collection = Collection([1, 2, 3, 4])

multiplied = collection.map(lambda item: item * 2)

multiplied.all()

#### [2, 4, 6, 8]

WARNING

Like most other collection methods, map returns a new Collection instance; it does not modify the collection it is called on. If you want to transform the original collection, use the transform method.

# max

max(self, key=None)

Get the max value of a given key.

# Arguments

  • key: The key

# merge

merge(self, items)

Merges the given list into the collection

# Arguments

  • items: The items to merge
collection = Collection(['Desk', 'Chair'])
collection.merge(['Bookcase', 'Door'])
collection.all()
#### ['Desk', 'Chair', 'Bookcase', 'Door']

WARNING

Unlike most other collection methods, merge does not return a new modified collection; it modifies the collection it is called on.

# min

min(self, key=None)

Get the min value of a given key.

key: The key

# only

only(self, *keys)

Get the items with the specified keys.

# Arguments

  • keys: tuple The keys to keep

# pluck

pluck(self, value, key=None)

Retrieves all of the collection values for a given key

# Arguments

  • value: Value
  • key: Optional key
collection = Collection([
    {'product_id': 1, 'product': 'Desk'},
    {'product_id': 2, 'product': 'Chair'}
])

plucked = collection.pluck('product')

plucked.all()

#### ['Desk', 'Chair']

You can also specify how you wish the resulting collection to be keyed:

plucked = collection.pluck('name', 'product_id')

plucked

#### {1: 'Desk', 2: 'Chair'}

# pluck_or_nan

pluck_or_nan(self, value, key=None)

Pluck method that returns NaNs if key is not present

# Arguments

  • value: Value
  • key: Key

# pluck_or_none

pluck_or_none(self, value, key=None, none=None)

Pluck method that returns None if key is not present

# Arguments

  • value: Value
  • key: Key
  • none: Return value if key is not present

# pop

pop(self, key=None)

Removes and returns the last item from the collection. If no index is specified, returns the last item.

# Arguments

  • key: The index of the item to return
collection = Collection([1, 2, 3, 4, 5])
collection.pop()
#### 5

collection.all()
#### [1, 2, 3, 4]

# prepend

prepend(self, value)

Adds an item to the beginning of the collection

# Arguments

  • value: The value to push
collection = Collection([1, 2, 3, 4])

collection.prepend(0)

collection.all()

#### [0, 1, 2, 3, 4]

# pull

pull(self, key, default=None)

Removes and returns an item from the collection by its key

# Arugments

key: The key default: The default value

collection = Collection([1, 2, 3, 4])

collection.pull(1)

collection.all()

#### [1, 3, 4]

# put

put(self, key, value)

Sets the given key and value in the collection

# Arguments

  • key: The key
  • value: The value
collection = Collection([1, 2, 3, 4])
collection.put(1, 5)
collection.all()

#### [1, 5, 3, 4]

TIP

It is equivalent to collection[1] = 5

# reduce

reduce(self, callback, initial=None)

Reduces the collection to a single value, passing the result of each iteration into the subsequent iteration

# Arguments

  • callback: The callback
  • initial: The initial value
collection = Collection([1, 2, 3])

collection.reduce(lambda result, item: (result or 0) + item)

#### 6

The value for result on the first iteration is None; however, you can specify its initial value by passing a second argument to reduce:

collection.reduce(lambda result, item: result + item, 4)

#### 10

# reject

reject(self, callback)

Filters the collection using the given callback. The callback should return True for any items it wishes to remove from the resulting collection

# Arguments

  • callback: The truth test
collection = Collection([1, 2, 3, 4])

filtered = collection.reject(lambda item: item > 2)

filtered.all()

#### [1, 2]

For the inverse of reject, see the filter method.

# reverse

reverse(self)

Reverses the order of the collection's items

collection = Collection([1, 2, 3, 4, 5])
reverse = collection.reverse()
reverse.all()
#### [5, 4, 3, 2, 1]

# section

section(self, of, reduce=None)

Performs horizontal reduce through collection

# Arguments

  • of: String|Callable Selector of reduce values
  • reduce: Optional callable reduce method

# serialize

serialize(self)

Converts the collection into a list

collection = Collection([User.find(1)])
collection.serialize()
#### [{'id': 1, 'name': 'John'}]

WARNING

serialize also converts all of its nested objects. If you want to get the underlying items as is, use the all method instead.

# sort

sort(self, callback=None)

Sorts the collection

# Arguments

  • callback: Sort callable
collection = Collection([5, 3, 1, 2, 4])

sorted = collection.sort()

sorted.all()

#### [1, 2, 3, 4, 5]

# sum

sum(self, callback=None)

Returns the sum of all items in the collection

callback: The callback

Collection([1, 2, 3, 4, 5]).sum()

#### 15

If the collection contains dictionaries or objects, you must pass a key to use for determining which values to sum:

collection = Collection([
    {'name': 'JavaScript: The Good Parts', 'pages': 176},
    {'name': 'JavaScript: The Defnitive Guide', 'pages': 1096}
])

collection.sum('pages')

#### 1272

In addition, you can pass your own callback to determine which values of the collection to sum:

collection = Collection([
    {'name': 'Chair', 'colors': ['Black']},
    {'name': 'Desk', 'colors': ['Black', 'Mahogany']},
    {'name': 'Bookcase', 'colors': ['Red', 'Beige', 'Brown']}
])

collection.sum(lambda product: len(product['colors']))

#### 6

# take

take(self, limit)

Take the first or last n items.

# Arguments

  • limit: The number of items to take
collection = Collection([0, 1, 2, 3, 4, 5])
chunk = collection.take(3)
chunk.all()
#### [0, 1, 2]

You can also pass a negative integer to take the specified amount of items from the end of the collection:

chunk = collection.chunk(-2)
chunk.all()
#### [4, 5]

# transform

transform(self, callback)

Transform each item in the collection using a callback.

Iterates over the collection and calls the given callback with each item in the collection. The items in the collection will be replaced by the values returned by the callback.

# Arguments

  • callback: The callback
collection = Collection([1, 2, 3, 4, 5])
collection.transform(lambda item: item * 2)
collection.all()

#### [2, 4, 6, 8, 10]

WARNING

Unlike most other collection methods, transform modifies the collection itself. If you wish to create a new collection instead, use the map method.

# unique

unique(self, key=None)

Returns all of the unique items in the collection

# Arguments

  • key: The key to check uniqueness on
collection = Collection([1, 1, 2, 2, 3, 4, 2])

unique = collection.unique()

unique.all()

#### [1, 2, 3, 4]

When dealing with dictionaries or objects, you can specify the key used to determine uniqueness:

collection = Collection([
    {'name': 'iPhone 6', 'brand': 'Apple', 'type': 'phone'},
    {'name': 'iPhone 5', 'brand': 'Apple', 'type': 'phone'},
    {'name': 'Apple Watch', 'brand': 'Apple', 'type': 'watch'},
    {'name': 'Galaxy S6', 'brand': 'Samsung', 'type': 'phone'},
    {'name': 'Galaxy Gear', 'brand': 'Samsung', 'type': 'watch'}
])

unique = collection.unique('brand')

unique.all()

#### [
####     {'name': 'iPhone 6', 'brand': 'Apple', 'type': 'phone'},
####     {'name': 'Galaxy S6', 'brand': 'Samsung', 'type': 'phone'}
#### ]

You can also pass your own callback to determine item uniqueness:

unique = collection.unique(lambda item: item['brand'] + item['type'])

unique.all()

#### [
####     {'name': 'iPhone 6', 'brand': 'Apple', 'type': 'phone'},
####     {'name': 'Apple Watch', 'brand': 'Apple', 'type': 'watch'},
####     {'name': 'Galaxy S6', 'brand': 'Samsung', 'type': 'phone'},
####     {'name': 'Galaxy Gear', 'brand': 'Samsung', 'type': 'watch'}
#### ]

# where

where(self, key, value)

Filter items by the given key value pair.

# Arguments

  • key: The key to filter by
  • value: The value to filter by
collection = Collection([
    {'name': 'Desk', 'price': 200},
    {'name': 'Chair', 'price': 100},
    {'name': 'Bookcase', 'price': 150},
    {'name': 'Door', 'price': 100},
])

filtered = collection.where('price', 100)

filtered.all()

#### [
####     {'name': 'Chair', 'price': 100},
####     {'name': 'Door', 'price': 100}
#### ]

# without

without(self, *keys)

Get all items except for those with the specified keys.

# Arguments

  • keys: tuple The keys to remove

# zip

zip(self, *items)

Merges together the values of the given list with the values of the collection at the corresponding index

# Argument

*items: Zip items

collection = Collection(['Chair', 'Desk'])
zipped = collection.zip([100, 200])
zipped.all()
#### [('Chair', 100), ('Desk', 200)]