bitsandbytes.optim package

bitsandbytes.optim.adam module

class bitsandbytes.optim.adam.Adam(params, lr=0.001, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.adam.Adam32bit(params, lr=0.001, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.adam.Adam8bit(params, lr=0.001, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)

bitsandbytes.optim.lamb module

class bitsandbytes.optim.lamb.LAMB(params, lr=0.001, bias_correction=True, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, adam_w_mode=True, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=False, max_unorm=1.0)
class bitsandbytes.optim.lamb.LAMB32bit(params, lr=0.001, bias_correction=True, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, adam_w_mode=True, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=False, max_unorm=1.0)
class bitsandbytes.optim.lamb.LAMB8bit(params, lr=0.001, bias_correction=True, betas=0.9, 0.999, eps=1e-08, weight_decay=0, amsgrad=False, adam_w_mode=True, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=False, max_unorm=1.0)

bitsandbytes.optim.lars module

class bitsandbytes.optim.lars.LARS(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, max_unorm=0.02)
class bitsandbytes.optim.lars.LARS32bit(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, args=None, min_8bit_size=4096, percentile_clipping=100, max_unorm=0.02)
class bitsandbytes.optim.lars.LARS8bit(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, args=None, min_8bit_size=4096, percentile_clipping=100, max_unorm=0.02)
class bitsandbytes.optim.lars.PytorchLARS(params, lr=0.01, momentum=0, dampening=0, weight_decay=0, nesterov=False, max_unorm=0.02)
step(closure=None)

Performs a single optimization step.

Parameters

closure (callable, optional) – A closure that reevaluates the model and returns the loss.

bitsandbytes.optim.optimizer module

class bitsandbytes.optim.optimizer.Optimizer1State(optimizer_name, params, lr=0.001, betas=0.9, 0.0, eps=1e-08, weight_decay=0.0, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True, max_unorm=0.0)
class bitsandbytes.optim.optimizer.Optimizer2State(optimizer_name, params, lr=0.001, betas=0.9, 0.999, eps=1e-08, weight_decay=0.0, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True, max_unorm=0.0)
class bitsandbytes.optim.optimizer.Optimizer8bit(params, defaults, optim_bits=32)
load_state_dict(state_dict)

Loads the optimizer state.

Parameters

state_dict (dict) – optimizer state. Should be an object returned from a call to state_dict().

step(closure=None)

Performs a single optimization step.

Parameters

closure (callable, optional) – A closure that reevaluates the model and returns the loss.

bitsandbytes.optim.rmsprop module

class bitsandbytes.optim.rmsprop.RMSprop(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.rmsprop.RMSprop32bit(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.rmsprop.RMSprop8bit(params, lr=0.01, alpha=0.99, eps=1e-08, weight_decay=0, momentum=0, centered=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)

bitsandbytes.optim.sgd module

class bitsandbytes.optim.sgd.SGD(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, optim_bits=32, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.sgd.SGD32bit(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)
class bitsandbytes.optim.sgd.SGD8bit(params, lr, momentum=0, dampening=0, weight_decay=0, nesterov=False, args=None, min_8bit_size=4096, percentile_clipping=100, block_wise=True)