maxtext.layers package

Contents

maxtext.layers package#

Submodules#

maxtext.layers.attention_mla module
- Indexer
- mla_as_linen()
- MLA
maxtext.layers.attention_op module
- validate_compute_axis_order()
- apply_mask_to_logits()
- validate_gpu_flash_attention()
- ChunkedCausalMask
  - ChunkedCausalMask.chunk_size
- attention_op_as_linen()
- AttentionOp
- LoadBalancedCausalMask
maxtext.layers.attentions module
- L2Norm
  - L2Norm.eps
  - L2Norm.rngs
- l2_norm_as_linen()
- attention_as_linen()
- Attention
maxtext.layers.decoders module
- DecoderLayer
- SequentialBlockDecoderLayers
- deepstack_process()
- Decoder
maxtext.layers.embeddings module
- embed_as_linen()
- Embed
  - Embed.attend()
- attend_on_embedding()
- rotary_embedding_as_linen()
- RotaryEmbedding
  - RotaryEmbedding.timescale
  - RotaryEmbedding.apply_rotary()
- llama_rotary_embedding_as_linen()
- partial_rotary_embedding_as_linen()
- PartialRotaryEmbedding
- Gemma4PartialRotaryEmbedding
  - Gemma4PartialRotaryEmbedding.timescale
- LLaMARotaryEmbedding
  - LLaMARotaryEmbedding.timescale
- yarn_rotary_embedding_as_linen()
- YarnRotaryEmbedding
  - YarnRotaryEmbedding.embedding_dims
  - YarnRotaryEmbedding.max_position_embeddings
  - YarnRotaryEmbedding.original_max_position_embeddings
  - YarnRotaryEmbedding.beta_fast
  - YarnRotaryEmbedding.beta_slow
  - YarnRotaryEmbedding.rope_theta
  - YarnRotaryEmbedding.rope_factor
  - YarnRotaryEmbedding.cast_as_fprop_dtype
  - YarnRotaryEmbedding.fprop_dtype
  - YarnRotaryEmbedding.rope_interleave
  - YarnRotaryEmbedding.rope_truncate
  - YarnRotaryEmbedding.rope_attention_scaling
  - YarnRotaryEmbedding.rngs
  - YarnRotaryEmbedding.freqs_cis
- positional_embedding_as_linen()
- PositionalEmbedding
  - PositionalEmbedding.embedding_dims
  - PositionalEmbedding.max_wavelength
  - PositionalEmbedding.cast_as_fprop_dtype
  - PositionalEmbedding.fprop_dtype
  - PositionalEmbedding.rngs
- llama_vision_rotary_embedding_as_linen()
- LlamaVisionRotaryEmbedding
  - LlamaVisionRotaryEmbedding.image_size
  - LlamaVisionRotaryEmbedding.patch_size
  - LlamaVisionRotaryEmbedding.hidden_size
  - LlamaVisionRotaryEmbedding.num_attention_heads
  - LlamaVisionRotaryEmbedding.rope_theta
  - LlamaVisionRotaryEmbedding.cast_as_fprop_dtype
  - LlamaVisionRotaryEmbedding.fprop_dtype
  - LlamaVisionRotaryEmbedding.rngs
  - LlamaVisionRotaryEmbedding.freqs_cis
- Qwen3OmniMoeVisionRotaryEmbedding
  - Qwen3OmniMoeVisionRotaryEmbedding.hidden_size
  - Qwen3OmniMoeVisionRotaryEmbedding.num_attention_heads
  - Qwen3OmniMoeVisionRotaryEmbedding.spatial_merge_size
  - Qwen3OmniMoeVisionRotaryEmbedding.rope_theta
  - Qwen3OmniMoeVisionRotaryEmbedding.cast_as_fprop_dtype
  - Qwen3OmniMoeVisionRotaryEmbedding.fprop_dtype
  - Qwen3OmniMoeVisionRotaryEmbedding.rngs
  - Qwen3OmniMoeVisionRotaryEmbedding.compute_cos_sin()
- qwen3omnimoe_vision_pos_embed_interpolate_as_linen()
- Qwen3OmniMoeVisionPosEmbedInterpolate
  - Qwen3OmniMoeVisionPosEmbedInterpolate.num_position_embeddings
  - Qwen3OmniMoeVisionPosEmbedInterpolate.hidden_size
  - Qwen3OmniMoeVisionPosEmbedInterpolate.spatial_merge_size
  - Qwen3OmniMoeVisionPosEmbedInterpolate.dtype
  - Qwen3OmniMoeVisionPosEmbedInterpolate.cast_as_fprop_dtype
  - Qwen3OmniMoeVisionPosEmbedInterpolate.fprop_dtype
  - Qwen3OmniMoeVisionPosEmbedInterpolate.rngs
- Qwen3OmniMoeThinkerTextRotaryEmbedding
- qwen3_omni_mrope_embedding_as_linen()
maxtext.layers.encoders module
- VisionEncoder
- AudioEncoder
- vision_encoder_as_linen()
- audio_encoder_as_linen()
maxtext.layers.engram module
- CompressedTokenizer
  - CompressedTokenizer.lookup_table
  - CompressedTokenizer.num_new_token
- NgramHashMapping
  - NgramHashMapping.get_vocab_sizes()
- StaticWrapper
- MultiHeadEmbedding
- ShortConv
- Engram
maxtext.layers.initializers module
- nd_dense_init()
- variable_to_logically_partitioned()
maxtext.layers.learn_to_init_layer module
- LearnToInitDecoderLayer
- LearnToInitDense
- calculate_attn_weight()
- apply_lti_model_update()
maxtext.layers.linears module
- normalize_axes()
- canonicalize_tuple()
- DenseGeneral
  - DenseGeneral.quant_dot_general
- dense_general()
- Dropout
- MlpBlock
  - MlpBlock.get_norm_layer()
- mlp_block()
maxtext.layers.mhc module
- get_functions()
- sinkhorn()
- ManifoldConstrainedHyperConnections
  - ManifoldConstrainedHyperConnections.res_mapping()
  - ManifoldConstrainedHyperConnections.mapping()
maxtext.layers.moe module
- get_batchsplit_init_kernel_axes()
- random_routing()
- calculate_load_balance_updates()
- GateLogit
  - GateLogit.quant_dot_general
- RoutedMoE
- RoutedAndSharedMoE
  - RoutedAndSharedMoE.routed_moe
- get_gate_logit()
- get_routed_moe()
- get_routed_and_shared_moe()
maxtext.layers.multi_token_prediction module
- mtp_losses
- mtp_acceptance
- roll_and_mask()
- MultiTokenPredictionLayer
- MultiTokenPredictionBlock
- calculate_mtp_loss()
- calculate_mtp_acceptance_rate()
- multi_token_prediction_block_as_linen()
maxtext.layers.nnx_decoders module
- NNXDecoderLayer
- deepstack_process()
- NNXDecoder
- decoder_as_linen()
maxtext.layers.nnx_wrappers module
- is_vanilla_variable()
- to_linen_var()
- get_col_name()
- to_nnx_var()
- linen_vars_to_nnx_attrs()
- nnx_attrs_to_linen_vars()
- lazy_init()
- current_linen_module()
- ToNNX
  - ToNNX.lazy_init()
- linen_rngs_dict()
- ToLinen
- to_linen()
- to_linen_class()
maxtext.layers.normalizations module
- RMSNorm
- GlobalRMSNorm
- Qwen3NextRMSNorm()
- Qwen3NextRMSNormGated
- rms_norm()
- l2norm()
maxtext.layers.pipeline module
- PipelineBase
- Pipeline
- CircularPipeline
- create_pipeline()
maxtext.layers.pipeline_deprecated module
- Pipeline
maxtext.layers.quantizations module
- Quantization
  - Quantization.dot_general_cls()
  - Quantization.einsum()
- AqtQuantization
  - AqtQuantization.quant_dg
  - AqtQuantization.quant_mode
  - AqtQuantization.replicate_scale
  - AqtQuantization.dot_general_cls()
  - AqtQuantization.einsum()
- QwixQuantization
  - QwixQuantization.quant_mode
  - QwixQuantization.act_calibration_method
  - QwixQuantization.weight_calibration_method
  - QwixQuantization.bwd_calibration_method
  - QwixQuantization.dot_general_cls()
  - QwixQuantization.einsum()
- QwixDotGeneral
  - QwixDotGeneral.config
  - QwixDotGeneral.name
  - QwixDotGeneral.parent
  - QwixDotGeneral.scope
- QwixEinsum
  - QwixEinsum.config
  - QwixEinsum.name
  - QwixEinsum.parent
  - QwixEinsum.scope
- Fp8Quantization
  - Fp8Quantization.quant_mode
  - Fp8Quantization.dot_general_cls()
  - Fp8Quantization.einsum()
- Fp8Einsum
  - Fp8Einsum.amax_history_length
  - Fp8Einsum.e4m3_dtype
  - Fp8Einsum.e5m2_dtype
  - Fp8Einsum.dtype
  - Fp8Einsum.setup()
  - Fp8Einsum.name
  - Fp8Einsum.parent
  - Fp8Einsum.scope
- NANOOFp8Quantization
  - NANOOFp8Quantization.quant_mode
  - NANOOFp8Quantization.dot_general_cls()
- ConstantBoundConfig
  - ConstantBoundConfig.fwd_lhs_bound
  - ConstantBoundConfig.fwd_rhs_bound
  - ConstantBoundConfig.dlhs_lhs_bound
  - ConstantBoundConfig.dlhs_rhs_bound
  - ConstantBoundConfig.drhs_lhs_bound
  - ConstantBoundConfig.drhs_rhs_bound
- PerTensorScales
  - PerTensorScales.fwd_lhs
  - PerTensorScales.fwd_rhs
  - PerTensorScales.dlhs_lhs
  - PerTensorScales.dlhs_rhs
  - PerTensorScales.drhs_lhs
  - PerTensorScales.drhs_rhs
- in_convert_mode()
- in_serve_mode()
- get_quant_mode()
- configure_quantization()
- match_aqt_and_unquantized_param()
- remove_quantized_params()
- configure_kv_quant()
- NvidaFp8Provider
  - NvidaFp8Provider.dot_general()
  - NvidaFp8Provider.einsum()
- NANOOFp8Provider
  - NANOOFp8Provider.dot_general()
- get_fp8_full_qwix_rule_w_sparsity()
- get_quantization_rule()
- get_qt_provider()
- maybe_quantize_model()
- manual_quantize()
- TransformerEngineQuantization
  - TransformerEngineQuantization.get_block_size()
  - TransformerEngineQuantization.dot_general_cls()
  - TransformerEngineQuantization.einsum()
maxtext.layers.train_state_nnx module
- TrainStateNNX
  - TrainStateNNX.apply_gradients()