Source code for acton.cli

"""Command-line interface for Acton."""

import logging
import struct
import sys
from typing import BinaryIO, Iterable, List

import acton.acton
import acton.predictors
import acton.proto.wrappers
import acton.recommenders
import click


[docs]def read_bytes_from_buffer(n: int, buffer: BinaryIO) -> bytes:
    """Reads n bytes from stdin, blocking until all bytes are received.

    Parameters
    ----------
    n
        How many bytes to read.
    buffer
        Which buffer to read from.

    Returns
    -------
    bytes
        Exactly n bytes.
    """
    b = b''
    while len(b) < n:
        b += buffer.read(n - len(b))
    assert len(b) == n
    return b


[docs]def read_binary() -> bytes:
    """Reads binary data from stdin.

    Notes
    -----
    The first eight bytes are expected to be the length of the input data as an
    unsigned long long.

    Returns
    -------
    bytes
        Binary data.
    """
    logging.debug('Reading 8 bytes from stdin.')
    length = read_bytes_from_buffer(8, sys.stdin.buffer)
    length, = struct.unpack('<Q', length)
    logging.debug('Reading {} bytes from stdin.'.format(length))
    return read_bytes_from_buffer(length, sys.stdin.buffer)


[docs]def write_binary(string: bytes):
    """Writes binary data to stdout.

    Notes
    -----
    The output will be preceded by the length as an unsigned long long.
    """
    logging.debug('Writing 8 + {} bytes to stdout.'.format(len(string)))
    length = struct.pack('<Q', len(string))
    logging.debug('Writing length {} ({}).'.format(length, len(string)))
    sys.stdout.buffer.write(length)
    sys.stdout.buffer.write(string)
    sys.stdout.buffer.flush()


# acton


@click.command()
@click.option('--data',
              type=click.Path(exists=True, dir_okay=False),
              help='Path to features/labels file',
              required=True)
@click.option('-l', '--label',
              type=str,
              help='Column name of labels',
              required=True)
@click.option('-o', '--output',
              type=click.Path(dir_okay=False),
              help='Path to output file',
              required=True)
@click.option('-f', '--feature',
              type=str,
              multiple=True,
              help='Column names of features')
@click.option('--epochs',
              type=int,
              help='Number of epochs to run active learning for',
              default=100)
@click.option('-i', '--id',
              type=str,
              help='Column name of IDs')
@click.option('--diversity',
              type=float,
              help='Diversity of recommendations',
              default=0.0)
@click.option('--recommendation-count',
              type=int,
              help='Number of recommendations to make',
              default=1)
@click.option('--labeller-accuracy',
              type=float,
              help='Accuracy of simulated labellers',
              default=1.0)
@click.option('--initial-count',
              type=int,
              help='Number of random instances to label initially',
              default=10)
@click.option('--predictor',
              type=click.Choice(acton.predictors.PREDICTORS.keys()),
              default='LogisticRegression',
              help='Predictor to use')
@click.option('--recommender',
              type=click.Choice(acton.recommenders.RECOMMENDERS.keys()),
              default='RandomRecommender',
              help='Recommender to use')
@click.option('--pandas-key',
              type=str,
              default='',
              help='Key for pandas HDF5')
@click.option('-v', '--verbose',
              is_flag=True,
              help='Verbose output')
def main(
        data: str,
        label: str,
        output: str,
        feature: str,
        epochs: int,
        id: str,
        diversity: float,
        recommendation_count: int,
        labeller_accuracy: float,
        initial_count: int,
        predictor: str,
        recommender: str,
        verbose: bool,
        pandas_key: str,
):
    logging.warning('Not implemented: diversity, id_col, labeller_accuracy')
    logging.captureWarnings(True)
    if verbose:
        logging.root.setLevel(logging.DEBUG)
    return acton.acton.main(
        data_path=data,
        feature_cols=feature,
        label_col=label,
        output_path=output,
        n_epochs=epochs,
        initial_count=initial_count,
        recommender=recommender,
        predictor=predictor,
        pandas_key=pandas_key,
        n_recommendations=recommendation_count)


# acton-predict


@click.command()
@click.option('--predictor',
              type=click.Choice(acton.predictors.PREDICTORS.keys()),
              default='LogisticRegression',
              help='Predictor to use')
@click.option('-v', '--verbose',
              is_flag=True,
              help='Verbose output')
def predict(
        predictor: str,
        verbose: bool,
):
    # Logging setup.
    logging.captureWarnings(True)
    if verbose:
        logging.root.setLevel(logging.DEBUG)

    # Read labels.
    labels = read_binary()
    labels = acton.proto.wrappers.LabelPool.deserialise(labels)

    # Write predictions.
    proto = acton.acton.predict(labels=labels, predictor=predictor)
    write_binary(proto.proto.SerializeToString())


# acton-recommend


@click.command()
@click.option('--diversity',
              type=float,
              help='Diversity of recommendations',
              default=0.0)
@click.option('--recommendation-count',
              type=int,
              help='Number of recommendations to make',
              default=1)
@click.option('--recommender',
              type=click.Choice(acton.recommenders.RECOMMENDERS.keys()),
              default='RandomRecommender',
              help='Recommender to use')
@click.option('-v', '--verbose',
              is_flag=True,
              help='Verbose output')
def recommend(
        diversity: float,
        recommendation_count: int,
        recommender: str,
        verbose: bool,
):
    # Logging setup.
    logging.warning('Not implemented: diversity')
    logging.captureWarnings(True)
    if verbose:
        logging.root.setLevel(logging.DEBUG)

    # Read the predictions protobuf.
    predictions = read_binary()
    predictions = acton.proto.wrappers.Predictions.deserialise(predictions)

    # Write the recommendations protobuf.
    proto = acton.acton.recommend(
        predictions=predictions,
        recommender=recommender,
        n_recommendations=recommendation_count)
    write_binary(proto.proto.SerializeToString())


# acton-label


[docs]def lines_from_stdin() -> Iterable[str]:
    """Yields lines from stdin."""
    for line in sys.stdin:
        line = line.strip()
        logging.debug('Read line {} from stdin.'.format(repr(line)))
        if line:
            yield line


@click.command()
@click.option('--data',
              type=click.Path(exists=True, dir_okay=False),
              help='Path to labels file',
              required=False)
@click.option('-l', '--label',
              type=str,
              help='Column name of labels',
              required=False)
@click.option('-f', '--feature',
              type=str,
              multiple=True,
              help='Column names of features')
@click.option('--labeller-accuracy',
              type=float,
              help='Accuracy of simulated labellers',
              default=1.0)
@click.option('--pandas-key',
              type=str,
              default='',
              help='Key for pandas HDF5')
@click.option('-v', '--verbose',
              is_flag=True,
              help='Verbose output')
def label(
        data: str,
        feature: List[str],
        label: str,
        labeller_accuracy: float,
        verbose: bool,
        pandas_key: str,
):
    # Logging setup.
    logging.warning('Not implemented: labeller_accuracy')
    logging.captureWarnings(True)
    if verbose:
        logging.root.setLevel(logging.DEBUG)

    # If any arguments are specified, expect all arguments.
    if data or label or pandas_key:
        if not data or not label:
            raise ValueError('--data, --label, or --pandas-key specified, but '
                             'missing --data or --label.')

        # Handle database arguments.
        data_path = data
        feature_cols = feature
        label_col = label

        # Read IDs from stdin.
        ids_to_label = [int(i) for i in lines_from_stdin()]

        # There wasn't a recommendations protobuf given, so we have no existing
        # labelled instances.
        labelled_ids = []

        # Construct the recommendations protobuf.
        DB, db_kwargs = acton.acton.get_DB(data_path, pandas_key=pandas_key)
        db_kwargs['label_col'] = label_col
        db_kwargs['feature_cols'] = feature_cols
        with DB(data_path, **db_kwargs) as db:
            recs = acton.proto.wrappers.Recommendations.make(
                recommended_ids=ids_to_label,
                labelled_ids=labelled_ids,
                recommender='None',
                db=db)
    else:
        # Read a recommendations protobuf from stdin.
        recs = read_binary()
        recs = acton.proto.wrappers.Recommendations.deserialise(recs)

    proto = acton.acton.label(recs)
    write_binary(proto.proto.SerializeToString())


if __name__ == '__main__':
    sys.exit(main())