Index A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | Q | R | S | T | U | V | W | X | Y | Z A A (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) abort_on_inf_loss (maxtext.configs.types.TrainingLoop attribute) abort_on_nan_loss (maxtext.configs.types.TrainingLoop attribute) act_calibration_method (maxtext.layers.quantizations.QwixQuantization attribute) act_quantization_calibration_method (maxtext.configs.types.Quantization attribute) activation_dropout_for_audio (maxtext.configs.types.AudioEncoder attribute) activation_function_for_audio (maxtext.configs.types.AudioEncoder attribute) activations_in_float32 (maxtext.configs.types.DataTypes attribute) actual_completion (maxtext.input_pipeline.distillation_data_processing.InputRequest attribute) adam_b1 (maxtext.configs.types.AdamW attribute) adam_b2 (maxtext.configs.types.AdamW attribute) adam_eps (maxtext.configs.types.AdamW attribute) adam_eps_root (maxtext.configs.types.AdamW attribute) ADAM_PAX (maxtext.configs.types.OptimizerType attribute) adam_weight_decay (maxtext.configs.types.AdamW attribute) AdamW (class in maxtext.configs.types) ADAMW (maxtext.configs.types.OptimizerType attribute) adamw_mask (maxtext.configs.types.AdamW attribute) add_bos (maxtext.configs.types.Tokenizer attribute) add_eos (maxtext.configs.types.Tokenizer attribute) add_extra_tokens_for_images_gemma3() (in module maxtext.multimodal.processor_gemma3) add_extra_tokens_for_images_gemma4() (in module maxtext.multimodal.processor_gemma4) add_extra_tokens_for_images_llama4() (in module maxtext.multimodal.processor_llama4) add_extra_tokens_for_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) add_segmentation_and_position() (in module maxtext.input_pipeline.input_pipeline_utils) advance_circular_buffers() (maxtext.layers.pipeline.CircularPipeline method) all_gather_over_fsdp() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) all_reduce_ws_grad_dcn() (in module maxtext.models.deepseek_batchsplit) allow_split_physical_axes (maxtext.configs.types.HardwareAndMesh attribute) amax_history_length (maxtext.layers.quantizations.Fp8Einsum attribute) amplitude_to_db() (in module maxtext.multimodal.utils) AnalysisAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.analysis) analyze_model_structures() (maxtext.experimental.agent.ckpt_conversion_agent.analysis.AnalysisAgent method) AOT (class in maxtext.configs.types) apply() (maxtext.models.models.TransformerLinen method) (maxtext.models.models.TransformerLinenPure method) apply_attention() (maxtext.layers.attention_op.AttentionOp method) apply_attention_dot() (maxtext.layers.attention_op.AttentionOp method) apply_attention_with_norm() (maxtext.models.qwen2.AttentionWithNorm method) (maxtext.models.qwen3.AttentionWithNorm method) (maxtext.models.qwen3_custom.Qwen3CustomMoeDecoderLayer method) apply_chat_template() (in module maxtext.input_pipeline.input_pipeline_utils) apply_ffn_activation() (maxtext.layers.moe.RoutedMoE method) apply_gradients() (maxtext.layers.train_state_nnx.TrainStateNNX method) apply_hook_fns() (in module maxtext.checkpoint_conversion.utils.utils) apply_lti_model_update() (in module maxtext.layers.learn_to_init_layer) apply_mask_to_logits() (in module maxtext.layers.attention_op) apply_multidimensional_rope() (in module maxtext.models.gemma4_vision) apply_multiprocessing_and_prefetch() (in module maxtext.input_pipeline.data_processing_utils) apply_output_head() (maxtext.layers.decoders.Decoder method) (maxtext.layers.nnx_decoders.NNXDecoder method) apply_partial_rope() (maxtext.layers.attention_mla.Indexer method) apply_rotary() (maxtext.layers.embeddings.RotaryEmbedding method) apply_rotary_embedding() (maxtext.layers.attentions.Attention method) AqtQuantization (class in maxtext.layers.quantizations) ar_cache_axis_order (maxtext.configs.types.InferenceLayout attribute) args (maxtext.layers.nnx_wrappers.ToLinen attribute) aspect_ratios (maxtext.multimodal.processor_llama4.Llama4PreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) assemble_script() (in module maxtext.experimental.agent.ckpt_conversion_agent.utils.utils) assert_is_supported_dtype() (in module maxtext.kernels.megablox.common) async_checkpointing (maxtext.configs.types.Checkpointing attribute) async_scheduling (maxtext.configs.types.VLLM attribute) attend() (maxtext.layers.embeddings.Embed method) attend_on_embedding() (in module maxtext.layers.embeddings) Attention (class in maxtext.configs.types) (class in maxtext.layers.attentions) attention (maxtext.configs.types.Attention attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) attention_as_linen() (in module maxtext.layers.attentions) attention_bias (maxtext.configs.types.ModelArchitecture attribute) attention_dropout_for_audio (maxtext.configs.types.AudioEncoder attribute) attention_kernel (maxtext.layers.attentions.Attention attribute) attention_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) attention_op_as_linen() (in module maxtext.layers.attention_op) attention_out (maxtext.configs.types.RematAndOffload attribute) attention_output_dim (maxtext.configs.types.ModelArchitecture attribute) attention_reference() (in module maxtext.kernels.attention.splash_attention_kernel) attention_reference_custom() (in module maxtext.kernels.attention.splash_attention_kernel) attention_sink (maxtext.configs.types.Attention attribute) attention_type (maxtext.configs.types.Attention attribute) (maxtext.layers.attentions.Attention attribute) AttentionIndexer (class in maxtext.configs.types) AttentionOp (class in maxtext.layers.attention_op) AttentionWithNorm (class in maxtext.models.qwen2) (class in maxtext.models.qwen3) attn (maxtext.models.qwen3.Qwen3OmniMoeVisionAttention attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) attn_logits_soft_cap (maxtext.configs.types.Attention attribute) (maxtext.layers.attentions.Attention attribute) audio_encoder_as_linen() (in module maxtext.layers.encoders) audio_lengths (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) audio_mask (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) audio_path (maxtext.configs.types.MultimodalGeneral attribute) audio_placeholder (maxtext.configs.types.MultimodalGeneral attribute) audio_values (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) AudioEncoder (class in maxtext.configs.types) (class in maxtext.layers.encoders) autoregressive_decode_assert (maxtext.configs.types.InferenceGeneral attribute) avg_pool_by_positions() (in module maxtext.models.gemma4_vision) B B (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) base_config (maxtext.configs.types.RunInfo attribute) base_emb_dim (maxtext.configs.types.ModelArchitecture attribute) base_mlp_dim (maxtext.configs.types.ModelArchitecture attribute) base_moe_mlp_dim (maxtext.configs.types.MoEGeneral attribute) base_num_decoder_layers (maxtext.configs.types.ModelArchitecture attribute) base_num_kv_heads (maxtext.configs.types.ModelArchitecture attribute) base_num_query_heads (maxtext.configs.types.ModelArchitecture attribute) base_output_directory (maxtext.configs.types.RunInfo attribute) BaseAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.base) BaseMaxTextToVLLMConverter (class in maxtext.integration.vllm.torchax_converter.base) batch_size (maxtext.configs.types.RLDataset attribute) batch_split_factor (maxtext.configs.types.DeepSeekMoE attribute) batch_split_schedule() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) batch_split_schedule_bwd() (in module maxtext.models.deepseek_batchsplit) beta_fast (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) beta_slow (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) BFLOAT16 (maxtext.configs.types.DType attribute) (maxtext.configs.types.MatmulPrecision attribute) bias (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) block_kv (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_kv_compute (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_kv_dkv (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_kv_dkv_compute (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_kv_dq (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_q (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_q_dkv (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) block_q_dq (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) blocks (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) BlockSizes (class in maxtext.kernels.attention.splash_attention_kernel) build_index() (in module maxtext.input_pipeline.olmo_data) build_tokenizer() (in module maxtext.input_pipeline.tokenizer) bwd_calibration_method (maxtext.layers.quantizations.QwixQuantization attribute) bwd_quantization_calibration_method (maxtext.configs.types.Quantization attribute) BytesList (class in maxtext.input_pipeline.protos.feature_pb2) C C (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) C4MLPERF (maxtext.configs.types.DatasetType attribute) calculate_attn_weight() (in module maxtext.layers.learn_to_init_layer) calculate_global_batch_sizes() (in module maxtext.configs.pyconfig_deprecated) calculate_indexer_loss() (maxtext.layers.attention_mla.MLA method) calculate_load_balance_updates() (in module maxtext.layers.moe) calculate_moba_gate_logic() (maxtext.layers.attention_op.AttentionOp method) calculate_mtp_acceptance_rate() (in module maxtext.layers.multi_token_prediction) calculate_mtp_loss() (in module maxtext.layers.multi_token_prediction) calculate_rampup_samples_and_steps() (in module maxtext.configs.pyconfig_deprecated) calculate_video_frame_range() (in module maxtext.multimodal.processor_qwen3_omni) canonicalize_tuple() (in module maxtext.layers.linears) capacity_factor (maxtext.configs.types.MoEGeneral attribute) cast_as_fprop_dtype (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.PositionalEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) cast_logits_to_fp32 (maxtext.configs.types.Logits attribute) chat_template (maxtext.configs.types.Tokenizer attribute) chat_template_path (maxtext.configs.types.Tokenizer attribute) check_arrays_match() (in module maxtext.checkpoint_conversion.utils.hf_utils) check_attention_inputs() (maxtext.layers.attention_op.AttentionOp method) check_predicted_tokens_match() (in module maxtext.checkpoint_conversion.utils.hf_utils) checkpoint_conversion_fn (maxtext.configs.types.Checkpointing attribute) checkpoint_dir (maxtext.configs.types.DerivedValues attribute) checkpoint_is_quantized (maxtext.configs.types.Checkpointing attribute) checkpoint_period (maxtext.configs.types.Checkpointing attribute) checkpoint_storage_concurrent_gb (maxtext.configs.types.OrbaxStorage attribute) checkpoint_storage_target_data_file_size_bytes (maxtext.configs.types.OrbaxStorage attribute) checkpoint_storage_use_ocdbt (maxtext.configs.types.OrbaxStorage attribute) checkpoint_storage_use_zarr3 (maxtext.configs.types.OrbaxStorage attribute) checkpoint_todelete_full_path (maxtext.configs.types.Checkpointing attribute) checkpoint_todelete_subdir (maxtext.configs.types.Checkpointing attribute) Checkpointing (class in maxtext.configs.types) chips_per_vm (maxtext.configs.types.RLHardware attribute) chunk_attn_window_size (maxtext.configs.types.Attention attribute) chunk_size (maxtext.layers.attention_op.ChunkedCausalMask attribute) ChunkedCausalMask (class in maxtext.layers.attention_op) CircularPipeline (class in maxtext.layers.pipeline) collect_stack_trace (maxtext.configs.types.StackTrace attribute) colocated_python_checkpointing (maxtext.configs.types.Checkpointing attribute) colocated_python_data_input (maxtext.configs.types.DatasetGeneral attribute) combine_columns() (in module maxtext.input_pipeline.input_pipeline_utils) compile_topology (maxtext.configs.types.AOT attribute) compile_topology_num_slices (maxtext.configs.types.AOT attribute) compile_xla_flags (maxtext.configs.types.LayoutAndSharding attribute) compiled_trainstep_file (maxtext.configs.types.AOT attribute) COMPOSITE (maxtext.configs.types.SamplingStrategy attribute) compressed_size() (in module maxtext.experimental.agent.ckpt_conversion_agent.evaluation) CompressedTokenizer (class in maxtext.layers.engram) compute() (in module maxtext.models.deepseek_batchsplit_fp8) compute_axis_order (maxtext.configs.types.InferenceLayout attribute) compute_cos_sin() (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding method) compute_fingerprint() (in module maxtext.input_pipeline.olmo_data) compute_gating() (in module maxtext.models.deepseek_batchsplit) compute_linear() (in module maxtext.models.deepseek_batchsplit) compute_local_attention() (maxtext.layers.attention_op.AttentionOp method) compute_logits() (maxtext.integration.vllm.maxtext_vllm_adapter.adapter.MaxTextForCausalLM method) ComputeQwen3OmniPositions (class in maxtext.input_pipeline.input_pipeline_utils) config (maxtext.layers.attentions.Attention attribute) (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.layers.learn_to_init_layer.LearnToInitDecoderLayer attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) (maxtext.layers.quantizations.QwixDotGeneral attribute) (maxtext.layers.quantizations.QwixEinsum attribute) (maxtext.models.gemma.GemmaDecoderLayer attribute) (maxtext.models.gemma2.Gemma2DecoderLayer attribute) (maxtext.models.gpt_oss.GptOssScannableBlock attribute) (maxtext.models.llama4.Llama4MultiModalProjector attribute) (maxtext.models.llama4.Llama4UnfoldConvolution attribute) (maxtext.models.llama4.Llama4VisionEncoder attribute) (maxtext.models.llama4.Llama4VisionMLP attribute) (maxtext.models.llama4.Llama4VisionMLP2 attribute) (maxtext.models.llama4.Llama4VisionModel attribute) (maxtext.models.llama4.Llama4VisionPixelShuffleMLP attribute) (maxtext.models.models.TransformerLinenPure attribute) (maxtext.models.olmo3.Olmo3ScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextDecoderLayer attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) (maxtext.models.qwen3.Qwen3NextScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextSparseMoeBlock attribute) (maxtext.models.qwen3.Qwen3OmniAudioEncoder attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionAttention attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionProjector attribute) (maxtext.models.qwen3_5.Qwen3_5DecoderLayer attribute) configure_kv_quant() (in module maxtext.layers.quantizations) configure_quantization() (in module maxtext.layers.quantizations) constant_bound_config (maxtext.configs.types.DevelopmentAndDebugging attribute) ConstantBoundConfig (class in maxtext.layers.quantizations) context (maxtext.configs.types.RematAndOffload attribute) context_parallel_load_balance (maxtext.configs.types.HardwareAndMesh attribute) context_parallel_reorder_strategy (maxtext.configs.types.HardwareAndMesh attribute) context_parallel_size (maxtext.configs.types.DerivedValues attribute) context_parallel_strategy (maxtext.configs.types.HardwareAndMesh attribute) context_sharding (maxtext.configs.types.LayoutAndSharding attribute) conv_chunksize_for_audio (maxtext.configs.types.AudioEncoder attribute) conv_stride_for_vit (maxtext.configs.types.VisionTower attribute) convert() (maxtext.integration.vllm.torchax_converter.base.BaseMaxTextToVLLMConverter method) (maxtext.integration.vllm.torchax_converter.gemma4_moe.Gemma4MaxTextToVLLMConverter method) convert_checkpoint_if_possible (maxtext.configs.types.DerivedValues attribute) convert_dense_general_inputs_shape() (maxtext.layers.attentions.Attention method) convert_hf_map_to_sharding_map() (maxtext.integration.tunix.utils.VllmWeightMapping method) convert_jax_weight_to_numpy() (in module maxtext.checkpoint_conversion.utils.utils) convert_jax_weight_to_torch() (in module maxtext.checkpoint_conversion.utils.hf_utils) convert_to_conversational_format() (in module maxtext.input_pipeline.instruction_data_processing) convert_to_RGB() (in module maxtext.multimodal.utils) COSINE (maxtext.configs.types.LearningRateScheduleType attribute) (maxtext.configs.types.WsdDecayStyle attribute) cost_estimate_flops_bwd (maxtext.configs.types.SplashAttention attribute) cost_estimate_flops_fwd (maxtext.configs.types.SplashAttention attribute) cp_size (maxtext.layers.attention_op.LoadBalancedCausalMask attribute) create_data_iterator() (in module maxtext.experimental.rl.grpo_input_pipeline) (in module maxtext.input_pipeline.input_pipeline_interface) create_huggingface_hub_repo_if_not_exist() (in module maxtext.checkpoint_conversion.utils.utils) create_new_logical_axis_rules() (in module maxtext.configs.pyconfig_deprecated) create_parallelisms_list() (in module maxtext.configs.pyconfig_deprecated) create_pipeline() (in module maxtext.layers.pipeline) create_process_specific_iterator() (in module maxtext.input_pipeline.input_pipeline_interface) create_projection_layer() (maxtext.models.gpt3.Gpt3MultiHeadAttention method) cudnn_flash_attention() (maxtext.layers.attention_op.AttentionOp method) cudnn_jax_flash_attention() (maxtext.layers.attention_op.AttentionOp method) current_linen_module() (in module maxtext.layers.nnx_wrappers) CUSTOM (maxtext.configs.types.RematPolicy attribute) custom_mesh (maxtext.configs.types.HardwareAndMesh attribute) custom_mesh_and_rule (maxtext.configs.types.HardwareAndMesh attribute) D d_model_for_audio (maxtext.configs.types.AudioEncoder attribute) data_generator (maxtext.input_pipeline.synthetic_data_processing.SyntheticDataIterator attribute) data_sharding (maxtext.configs.types.LayoutAndSharding attribute) data_shuffle_seed (maxtext.configs.types.TrainingLoop attribute) dataloader (maxtext.experimental.rl.grpo_input_pipeline.SingleHostDataLoader attribute) dataset_name (maxtext.configs.types.TfdsDataset attribute) dataset_path (maxtext.configs.types.TfdsDataset attribute) dataset_type (maxtext.configs.types.DatasetGeneral attribute) DatasetGeneral (class in maxtext.configs.types) DatasetType (class in maxtext.configs.types) DataTypes (class in maxtext.configs.types) dcn_autoregressive_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_context_autoregressive_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_context_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_data_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_diloco_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_expert_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_fsdp_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_fsdp_transpose_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_parallelism (maxtext.configs.types.DerivedValues attribute) dcn_pipeline_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_sequence_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_tensor_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_tensor_sequence_parallelism (maxtext.configs.types.DcnParallelism attribute) dcn_tensor_transpose_parallelism (maxtext.configs.types.DcnParallelism attribute) DcnParallelism (class in maxtext.configs.types) Debug (class in maxtext.configs.types) debug (maxtext.configs.types.MaxTextConfig attribute) debug_converter (maxtext.configs.types.VLLM attribute) debug_sharding (maxtext.configs.types.RunInfo attribute) decode() (maxtext.input_pipeline.tokenizer.HFTokenizer method) (maxtext.input_pipeline.tokenizer.SentencePieceTokenizer method) (maxtext.input_pipeline.tokenizer.TikTokenTokenizer method) decode_sampling_nucleus_p (maxtext.configs.types.Decoding attribute) decode_sampling_strategy (maxtext.configs.types.Decoding attribute) decode_sampling_temperature (maxtext.configs.types.Decoding attribute) decode_sampling_top_k (maxtext.configs.types.Decoding attribute) Decoder (class in maxtext.layers.decoders) decoder_as_linen() (in module maxtext.layers.nnx_decoders) decoder_block (maxtext.configs.types.ModelArchitecture attribute) decoder_layer (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) decoder_layer_input (maxtext.configs.types.RematAndOffload attribute) DecoderLayer (class in maxtext.layers.decoders) Decoding (class in maxtext.configs.types) deep_idx (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) DEEPSEEK_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) DEEPSEEK_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) DEEPSEEK_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) DEEPSEEK_NNX_TO_VLLM_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) deepseek_routing() (maxtext.layers.moe.RoutedMoE method) deepseek_scale_weights() (maxtext.layers.moe.RoutedMoE method) DEEPSEEK_VLLM_MAPPING (class in maxtext.integration.tunix.weight_mapping.deepseek3) DeepSeekDenseLayer (class in maxtext.models.deepseek) DeepSeekGenericLayer (class in maxtext.models.deepseek) DeepSeekMoE (class in maxtext.configs.types) DeepSeekMoELayer (class in maxtext.models.deepseek) DeepseekV32Config (class in maxtext.checkpoint_conversion.utils.hf_model_configs) deepstack_process() (in module maxtext.layers.decoders) (in module maxtext.layers.nnx_decoders) deepstack_visual_indexes_for_vit (maxtext.configs.types.VisionTower attribute) DEFAULT (maxtext.configs.types.MatmulPrecision attribute) (maxtext.configs.types.RopeType attribute) degenerate_group_masking (maxtext.configs.types.RL attribute) dense_general() (in module maxtext.layers.linears) dense_init_scale (maxtext.configs.types.ModelArchitecture attribute) dense_matmul() (maxtext.layers.moe.RoutedMoE method) DenseGeneral (class in maxtext.layers.linears) DerivedValues (class in maxtext.configs.types) DESCRIPTOR (maxtext.input_pipeline.protos.example_pb2.Example attribute) (maxtext.input_pipeline.protos.example_pb2.SequenceExample attribute) (maxtext.input_pipeline.protos.feature_pb2.BytesList attribute) (maxtext.input_pipeline.protos.feature_pb2.Feature attribute) (maxtext.input_pipeline.protos.feature_pb2.FeatureList attribute) (maxtext.input_pipeline.protos.feature_pb2.FeatureLists attribute) (maxtext.input_pipeline.protos.feature_pb2.FeatureLists.FeatureListEntry attribute) (maxtext.input_pipeline.protos.feature_pb2.Features attribute) (maxtext.input_pipeline.protos.feature_pb2.Features.FeatureEntry attribute) (maxtext.input_pipeline.protos.feature_pb2.FloatList attribute) (maxtext.input_pipeline.protos.feature_pb2.Int64List attribute) detect_and_extract_checkpoint() (in module maxtext.checkpoint_conversion.utils.utils) determine_is_moe_layer() (in module maxtext.models.llama4) determine_is_nope_layer() (in module maxtext.models.llama4) DevelopmentAndDebugging (class in maxtext.configs.types) DEVICE (maxtext.configs.types.RematLocation attribute) diloco_outer_lr (maxtext.configs.types.DilocoParams attribute) diloco_outer_momentum (maxtext.configs.types.DilocoParams attribute) diloco_sync_period (maxtext.configs.types.DilocoParams attribute) DilocoParams (class in maxtext.configs.types) distill_alpha (maxtext.configs.types.Distillation attribute) distill_alpha_end (maxtext.configs.types.Distillation attribute) distill_alpha_schedule (maxtext.configs.types.Distillation attribute) distill_beta (maxtext.configs.types.Distillation attribute) distill_beta_end (maxtext.configs.types.Distillation attribute) distill_beta_schedule (maxtext.configs.types.Distillation attribute) distill_feature_loss_type (maxtext.configs.types.Distillation attribute) distill_layer_indices (maxtext.configs.types.Distillation attribute) distill_student_weights_share_map (maxtext.configs.types.Distillation attribute) distill_temperature (maxtext.configs.types.Distillation attribute) distill_temperature_end (maxtext.configs.types.Distillation attribute) distill_temperature_schedule (maxtext.configs.types.Distillation attribute) distill_weights_copy_map (maxtext.configs.types.Distillation attribute) Distillation (class in maxtext.configs.types) DKV (maxtext.configs.types.KvQuantAxis attribute) dlhs_lhs (maxtext.layers.quantizations.PerTensorScales attribute) dlhs_lhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) dlhs_rhs (maxtext.layers.quantizations.PerTensorScales attribute) dlhs_rhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) dot() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) dot_general() (maxtext.layers.quantizations.NANOOFp8Provider method) (maxtext.layers.quantizations.NvidaFp8Provider method) dot_general_cls() (maxtext.layers.quantizations.AqtQuantization method) (maxtext.layers.quantizations.Fp8Quantization method) (maxtext.layers.quantizations.NANOOFp8Quantization method) (maxtext.layers.quantizations.Quantization method) (maxtext.layers.quantizations.QwixQuantization method) (maxtext.layers.quantizations.TransformerEngineQuantization method) downsample_hidden_size_for_audio (maxtext.configs.types.AudioEncoder attribute) dpo_beta (maxtext.configs.types.FineTuning attribute) dpo_label_smoothing (maxtext.configs.types.FineTuning attribute) dpo_preprocessing_pipeline() (in module maxtext.input_pipeline.grain_data_processing) dq_reduction_steps (maxtext.configs.types.SplashAttention attribute) drhs_lhs (maxtext.layers.quantizations.PerTensorScales attribute) drhs_lhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) drhs_rhs (maxtext.layers.quantizations.PerTensorScales attribute) drhs_rhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) Dropout (class in maxtext.layers.linears) dropout_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) dropout_rate (maxtext.configs.types.TrainingLoop attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) DSLAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.dsl) DType (class in maxtext.configs.types) dtype (maxtext.configs.types.DataTypes attribute) (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.normalizations.Qwen3NextRMSNormGated attribute) (maxtext.layers.quantizations.Fp8Einsum attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) dtype_mm (maxtext.configs.types.DataTypes attribute) dump_hlo (maxtext.configs.types.HloDump attribute) dump_hlo_delete_local_after (maxtext.configs.types.HloDump attribute) dump_hlo_gcs_dir (maxtext.configs.types.HloDump attribute) dump_hlo_local_dir (maxtext.configs.types.HloDump attribute) dump_hlo_local_module_name (maxtext.configs.types.HloDump attribute) dump_hlo_module_name (maxtext.configs.types.HloDump attribute) dump_hlo_upload_all (maxtext.configs.types.HloDump attribute) dump_hlo_xla_flags (maxtext.configs.types.HloDump attribute) dump_jaxpr (maxtext.configs.types.HloDump attribute) dump_jaxpr_delete_local_after (maxtext.configs.types.HloDump attribute) dump_jaxpr_gcs_dir (maxtext.configs.types.HloDump attribute) dump_jaxpr_local_dir (maxtext.configs.types.HloDump attribute) dump_step (maxtext.configs.types.HloDump attribute) E e4m3_dtype (maxtext.layers.quantizations.Fp8Einsum attribute) e5m2_dtype (maxtext.layers.quantizations.Fp8Einsum attribute) Einsum (class in maxtext.models.gemma3) einsum() (maxtext.layers.quantizations.AqtQuantization method) (maxtext.layers.quantizations.Fp8Quantization method) (maxtext.layers.quantizations.NvidaFp8Provider method) (maxtext.layers.quantizations.Quantization method) (maxtext.layers.quantizations.QwixQuantization method) (maxtext.layers.quantizations.TransformerEngineQuantization method) elastic_enabled (maxtext.configs.types.ElasticTraining attribute) elastic_max_retries (maxtext.configs.types.ElasticTraining attribute) elastic_min_slice_count (maxtext.configs.types.ElasticTraining attribute) elastic_timeout_seconds (maxtext.configs.types.ElasticTraining attribute) ElasticTraining (class in maxtext.configs.types) emb_dim (maxtext.configs.types.DerivedValues attribute) Embed (class in maxtext.layers.embeddings) embed_as_linen() (in module maxtext.layers.embeddings) embed_dim (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) embed_input_ids() (maxtext.integration.vllm.maxtext_vllm_adapter.adapter.MaxTextForCausalLM method) embedding_dims (maxtext.layers.embeddings.PositionalEmbedding attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) embedding_norm (maxtext.layers.multi_token_prediction.MultiTokenPredictionLayer property) EmergencyCheckpointing (class in maxtext.configs.types) enable_autocheckpoint (maxtext.configs.types.Checkpointing attribute) enable_checkpoint_cloud_logger (maxtext.configs.types.Metrics attribute) enable_checkpointing (maxtext.configs.types.Checkpointing attribute) enable_continuous_checkpointing (maxtext.configs.types.Checkpointing attribute) enable_data_shuffling (maxtext.configs.types.TrainingLoop attribute) enable_diloco (maxtext.configs.types.DilocoParams attribute) enable_dp_attention (maxtext.configs.types.VLLM attribute) enable_dropout (maxtext.configs.types.TrainingLoop attribute) enable_emergency_checkpoint (maxtext.configs.types.EmergencyCheckpointing attribute) enable_expert_parallel (maxtext.configs.types.VLLM attribute) enable_gcp_goodput_metrics (maxtext.configs.types.Goodput attribute) enable_gcp_step_deviation_metrics (maxtext.configs.types.Goodput attribute) enable_goodput_recording (maxtext.configs.types.Goodput attribute) enable_jax_profiler (maxtext.configs.types.Profiling attribute) enable_llm_inference_pool (maxtext.configs.types.InferenceGeneral attribute) enable_model_warmup (maxtext.configs.types.InferenceGeneral attribute) enable_multi_tier_checkpointing (maxtext.configs.types.EmergencyCheckpointing attribute) enable_nnx (maxtext.configs.types.HardwareAndMesh attribute) enable_orbax_v1 (maxtext.configs.types.Checkpointing attribute) enable_padding_causal_mask (maxtext.configs.types.Attention attribute) enable_pathways_goodput (maxtext.configs.types.Goodput attribute) enable_prefix_caching (maxtext.configs.types.PrefixCaching attribute) enable_rampup_batch_size (maxtext.configs.types.DatasetGeneral attribute) enable_single_controller (maxtext.configs.types.DevelopmentAndDebugging attribute) enable_single_replica_ckpt_restoring (maxtext.configs.types.Checkpointing attribute) enable_tensorboard (maxtext.configs.types.Tensorboard attribute) enable_tpu_profiling_options (maxtext.configs.types.Profiling attribute) enable_tunix_perf_metrics (maxtext.configs.types.Metrics attribute) encode() (maxtext.input_pipeline.tokenizer.HFTokenizer method) (maxtext.input_pipeline.tokenizer.SentencePieceTokenizer method) (maxtext.input_pipeline.tokenizer.TikTokenTokenizer method) Encoder (class in maxtext.models.gemma3) Encoder1DBlock (class in maxtext.models.gemma3) encoder_attention_heads_for_audio (maxtext.configs.types.AudioEncoder attribute) encoder_ffn_dim_for_audio (maxtext.configs.types.AudioEncoder attribute) encoder_layers_for_audio (maxtext.configs.types.AudioEncoder attribute) end (maxtext.input_pipeline.olmo_data.RepetitionTuple attribute) Engram (class in maxtext.configs.types) (class in maxtext.layers.engram) engram (maxtext.configs.types.RematAndOffload attribute) engram_head_dim (maxtext.configs.types.Engram attribute) engram_kernel_size (maxtext.configs.types.Engram attribute) engram_layers (maxtext.configs.types.Engram attribute) engram_max_ngram_size (maxtext.configs.types.Engram attribute) engram_num_heads (maxtext.configs.types.Engram attribute) engram_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) engram_seed (maxtext.configs.types.Engram attribute) engram_vocab_bases (maxtext.configs.types.Engram attribute) eps (maxtext.layers.attentions.L2Norm attribute) (maxtext.layers.normalizations.Qwen3NextRMSNormGated attribute) epsilon_high (maxtext.configs.types.RL attribute) estimate_kolmogorov() (in module maxtext.experimental.agent.ckpt_conversion_agent.evaluation) eval_corr_lst (maxtext.configs.types.RLEvaluation attribute) eval_data_columns (maxtext.configs.types.DatasetGeneral attribute) eval_dataset_name (maxtext.configs.types.TfdsDataset attribute) eval_image_column (maxtext.configs.types.DatasetGeneral attribute) eval_interval (maxtext.configs.types.TrainingLoop attribute) eval_make_lst (maxtext.configs.types.RLEvaluation attribute) eval_mode (maxtext.configs.types.RLEvaluation attribute) eval_per_device_batch_size (maxtext.configs.types.DatasetGeneral attribute) eval_sampling_strategy (maxtext.configs.types.RLEvaluation attribute) eval_split (maxtext.configs.types.TfdsDataset attribute) eval_steps (maxtext.configs.types.TrainingLoop attribute) Example (class in maxtext.input_pipeline.protos.example_pb2) expansion_factor_real_data (maxtext.configs.types.DatasetGeneral attribute) expert_group_mask() (in module maxtext.models.deepseek_batchsplit) (maxtext.layers.moe.RoutedMoE method) expert_indices_and_weights() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) expert_selection() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) extract_content() (in module maxtext.input_pipeline.distillation_data_processing) extract_layer_weights() (in module maxtext.models.deepseek_batchsplit) extract_linen_weights() (in module maxtext.checkpoint_conversion.utils.utils) extract_nnx_weights() (in module maxtext.checkpoint_conversion.utils.utils) extract_reasoning_and_answer() (in module maxtext.input_pipeline.instruction_data_processing) extract_token_ids() (in module maxtext.input_pipeline.input_pipeline_utils) ExtractImagesAndMasks (class in maxtext.input_pipeline.input_pipeline_utils) F factorized_posemb() (in module maxtext.models.gemma4_vision) Feature (class in maxtext.input_pipeline.protos.feature_pb2) feature_names (maxtext.input_pipeline.grain_tokenizer.TokenizerTransformBase attribute) FeatureList (class in maxtext.input_pipeline.protos.feature_pb2) FeatureLists (class in maxtext.input_pipeline.protos.feature_pb2) FeatureLists.FeatureListEntry (class in maxtext.input_pipeline.protos.feature_pb2) Features (class in maxtext.input_pipeline.protos.feature_pb2) Features.FeatureEntry (class in maxtext.input_pipeline.protos.feature_pb2) fetch_active_stage_weights() (maxtext.layers.pipeline.CircularPipeline method) fetch_weights() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) files (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) filter_dataset() (in module maxtext.input_pipeline.distillation_data_processing) final_logits_soft_cap (maxtext.configs.types.Logits attribute) find_data_files() (in module maxtext.input_pipeline.grain_data_processing) find_periodic_sequences() (in module maxtext.input_pipeline.olmo_data) find_supported_resolutions() (in module maxtext.multimodal.processor_llama4) FineTuning (class in maxtext.configs.types) fingerprint (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) first_num_dense_layers (maxtext.configs.types.DeepSeekMoE attribute) flash_attention_block_masked() (in module maxtext.kernels.attention.jax_flash_attention) flash_attention_kernel() (in module maxtext.kernels.attention.splash_attention_kernel) flat_map() (maxtext.input_pipeline.grain_tokenizer.TokenizeAndChunk method) FLOAT16 (maxtext.configs.types.DType attribute) FLOAT32 (maxtext.configs.types.DType attribute) (maxtext.configs.types.MatmulPrecision attribute) float32_gate_logits (maxtext.configs.types.MoEGeneral attribute) float32_logits (maxtext.configs.types.Attention attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) float32_qk_product (maxtext.configs.types.Attention attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) float32_weight_sum (maxtext.configs.types.MoEGeneral attribute) FloatList (class in maxtext.input_pipeline.protos.feature_pb2) FoldImagesIntoBatch (class in maxtext.input_pipeline.input_pipeline_utils) force_q_layout (maxtext.configs.types.Attention attribute) force_unroll (maxtext.configs.types.Checkpointing attribute) format_and_batch() (in module maxtext.input_pipeline.data_processing_utils) format_fn() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) format_meter() (maxtext.checkpoint_conversion.utils.utils.MemoryMonitorTqdm method) format_version (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) formatting_func_kwargs (maxtext.configs.types.FineTuning attribute) formatting_func_path (maxtext.configs.types.FineTuning attribute) forward() (maxtext.integration.vllm.maxtext_vllm_adapter.adapter.MaxTextForCausalLM method) forward_serve_vllm() (maxtext.layers.attentions.Attention method) FP8 (maxtext.configs.types.QuantizationType attribute) FP8_FULL (maxtext.configs.types.QuantizationType attribute) FP8_GPU (maxtext.configs.types.QuantizationType attribute) FP8_NANO_V2 (maxtext.configs.types.QuantizationType attribute) Fp8Einsum (class in maxtext.layers.quantizations) Fp8Quantization (class in maxtext.layers.quantizations) fprop_dtype (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.PositionalEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) freeze_audio_encoder_params (maxtext.configs.types.MultimodalGeneral attribute) freeze_vision_encoder_params (maxtext.configs.types.MultimodalGeneral attribute) freqs_cis (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding property) (maxtext.layers.embeddings.YarnRotaryEmbedding property) from_all_variables_to_repeat_weights() (maxtext.layers.pipeline.CircularPipeline method) from_head_minor() (in module maxtext.kernels.attention.splash_attention_kernel) from_repeat_weights_to_bsw() (maxtext.layers.pipeline.CircularPipeline method) FULL (maxtext.configs.types.RematPolicy attribute) fused_mlp (maxtext.configs.types.ModelArchitecture attribute) fused_moe_matmul() (maxtext.layers.moe.RoutedMoE method) fused_qkv (maxtext.configs.types.ModelArchitecture attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) fwd_lhs (maxtext.layers.quantizations.PerTensorScales attribute) fwd_lhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) fwd_rhs (maxtext.layers.quantizations.PerTensorScales attribute) fwd_rhs_bound (maxtext.layers.quantizations.ConstantBoundConfig attribute) G GateLogit (class in maxtext.layers.moe) gather_microbatch_inputs_vmap() (maxtext.layers.pipeline.CircularPipeline method) gather_weights() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) gather_weights_across_stages_vmap() (maxtext.layers.pipeline.CircularPipeline method) GcpMonitoring (class in maxtext.configs.types) gcs_debug_path (maxtext.configs.types.VLLM attribute) gcs_metrics (maxtext.configs.types.Metrics attribute) GCSTFRecordIterDataset (class in maxtext.input_pipeline.input_pipeline_utils) gdn_chunk_size (maxtext.configs.types.Qwen3Next attribute) gdn_conv_kernel_dim (maxtext.configs.types.Qwen3Next attribute) gdn_key_head_dim (maxtext.configs.types.Qwen3Next attribute) gdn_num_key_heads (maxtext.configs.types.Qwen3Next attribute) gdn_num_value_heads (maxtext.configs.types.Qwen3Next attribute) gdn_value_head_dim (maxtext.configs.types.Qwen3Next attribute) GEMMA2_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) (in module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.hf_shape) GEMMA2_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) (in module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.param_mapping) GEMMA2_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) (in module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.param_mapping) Gemma2DecoderLayer (class in maxtext.models.gemma2) GEMMA3_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) GEMMA3_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) (in module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.gemma3) GEMMA3_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) Gemma3DecoderLayer (class in maxtext.models.gemma3) Gemma3PreprocessorOutput (class in maxtext.multimodal.processor_gemma3) Gemma3ScannableBlock (class in maxtext.models.gemma3) gemma3visionencoder_as_linen() (in module maxtext.models.gemma3) Gemma3VisionEncoderLayer (class in maxtext.models.gemma3) GEMMA4_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) GEMMA4_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) GEMMA4_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) gemma4_vision_encoder_as_linen() (in module maxtext.models.gemma4_vision) Gemma4Attention (class in maxtext.models.gemma4_vision) Gemma4DecoderLayer (class in maxtext.models.gemma4) Gemma4EncoderBlock (class in maxtext.models.gemma4_vision) Gemma4MaxTextToVLLMConverter (class in maxtext.integration.vllm.torchax_converter.gemma4_moe) Gemma4MoE (class in maxtext.models.gemma4) Gemma4PartialRotaryEmbedding (class in maxtext.layers.embeddings) Gemma4PreprocessorOutput (class in maxtext.multimodal.processor_gemma4) Gemma4ScannableBlock (class in maxtext.models.gemma4) Gemma4VisionEncoderLayer (class in maxtext.models.gemma4_vision) Gemma4VisionProjector (class in maxtext.models.gemma4_vision) Gemma4VisionRotaryEmbedding (class in maxtext.models.gemma4_vision) GemmaDecoderLayer (class in maxtext.models.gemma) generate_attention_mask() (maxtext.layers.attention_op.AttentionOp method) generate_hook_functions() (maxtext.experimental.agent.ckpt_conversion_agent.transformation.TransformationAgent method) generate_mask() (maxtext.layers.attention_mla.Indexer method) generate_masks() (maxtext.layers.moe.RoutedMoE method) generate_masks_subgroup() (maxtext.layers.moe.RoutedMoE method) generate_maxtext_config() (in module maxtext.integration.vllm.maxtext_vllm_adapter.adapter) generate_moba_mask_single_item() (maxtext.layers.attention_op.AttentionOp method) generate_padding_batch_eval (maxtext.configs.types.DatasetGeneral attribute) generate_padding_batch_train (maxtext.configs.types.DatasetGeneral attribute) generate_param_mapping() (maxtext.experimental.agent.ckpt_conversion_agent.mapping.MappingAgent method) generate_shape_mapping() (maxtext.experimental.agent.ckpt_conversion_agent.mapping.MappingAgent method) generate_slice (maxtext.configs.types.InferenceServer attribute) generate_text() (maxtext.experimental.agent.ckpt_conversion_agent.base.BaseAgent method) generation_configs (maxtext.configs.types.RLEvaluation attribute) get_all_to_all_params() (maxtext.layers.moe.RoutedMoE static method) get_attention_type() (in module maxtext.models.gemma3) (in module maxtext.models.gemma4) (in module maxtext.models.gpt_oss) (in module maxtext.models.olmo3) get_batchsplit_init_kernel_axes() (in module maxtext.layers.moe) get_best_resolution() (in module maxtext.multimodal.processor_llama4) get_bidirectional_mask_audio() (in module maxtext.multimodal.processor) get_bidirectional_mask_vision() (in module maxtext.multimodal.processor) get_block_size() (maxtext.layers.quantizations.TransformerEngineQuantization method) get_chunked_index() (in module maxtext.multimodal.processor_qwen3_omni) get_col_name() (in module maxtext.layers.nnx_wrappers) get_context_autoregressive_parallelism_size() (maxtext.layers.moe.RoutedMoE method) get_context_parallel_size() (in module maxtext.configs.pyconfig_deprecated) get_context_partition_and_sub_seq() (maxtext.layers.moe.RoutedMoE method) get_current_repeat_from_stages() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_current_stage_weights() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_current_weights_from_bsw() (maxtext.layers.pipeline.CircularPipeline method) get_dataset() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) get_datasets() (in module maxtext.input_pipeline.grain_data_processing) (in module maxtext.input_pipeline.tfds_data_processing) get_decoder_layers() (maxtext.layers.decoders.Decoder method) (maxtext.layers.nnx_decoders.NNXDecoder method) get_default() (maxtext.kernels.attention.splash_attention_kernel.BlockSizes class method) get_dummy_audio_shape_for_init() (in module maxtext.multimodal.processor) get_dummy_audio_shape_for_init_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) get_dummy_image_shape_for_init() (in module maxtext.multimodal.processor) get_dummy_image_shape_for_init_gemma3() (in module maxtext.multimodal.processor_gemma3) get_dummy_image_shape_for_init_gemma4() (in module maxtext.multimodal.processor_gemma4) get_dummy_image_shape_for_init_llama4() (in module maxtext.multimodal.processor_llama4) get_dummy_image_shape_for_init_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) get_einsum() (maxtext.layers.moe.RoutedMoE method) get_expert_parallelism_size() (maxtext.layers.moe.RoutedMoE method) get_factors() (in module maxtext.multimodal.processor_llama4) get_fp8_full_qwix_rule_w_sparsity() (in module maxtext.layers.quantizations) get_functions() (in module maxtext.layers.mhc) get_gate_logit() (in module maxtext.layers.moe) get_hf_model_state_dict() (in module maxtext.checkpoint_conversion.compare_hf_ckpt) get_image_offsets() (in module maxtext.multimodal.processor) get_image_offsets_gemma3() (in module maxtext.multimodal.processor_gemma3) get_image_offsets_gemma4() (in module maxtext.multimodal.processor_gemma4) get_image_offsets_llama4() (in module maxtext.multimodal.processor_llama4) get_individual_scales() (in module maxtext.configs.pyconfig_deprecated) (in module maxtext.configs.types) get_input_embeddings() (maxtext.integration.vllm.maxtext_vllm_adapter.adapter.MaxTextForCausalLM method) get_iteration_inputs() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_kernel_name() (in module maxtext.kernels.attention.splash_attention_kernel) get_key_value() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) get_keys() (maxtext.configs.pyconfig_deprecated.HyperParameters method) get_llm_pos_ids_for_vision() (in module maxtext.multimodal.processor_qwen3_omni) get_local_batch_size() (in module maxtext.input_pipeline.data_processing_utils) get_local_save_path_manager() (in module maxtext.checkpoint_conversion.utils.utils) get_logical_spec_repeats_removed() (maxtext.layers.pipeline.Pipeline static method) (maxtext.layers.pipeline_deprecated.Pipeline static method) get_logits_comparison_metrics() (in module maxtext.checkpoint_conversion.utils.hf_utils) get_main_vmap_func_for_iterations() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_maxtext_model_info() (in module maxtext.checkpoint_conversion.to_maxtext) get_mha_cost_estimate() (in module maxtext.kernels.attention.ragged_attention) get_microbatch_and_repeat_ids() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_mm_offsets_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) get_new_loop_state() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_norm_layer() (maxtext.layers.decoders.Decoder method) (maxtext.layers.linears.MlpBlock method) (maxtext.layers.nnx_decoders.NNXDecoder method) get_num_target_devices() (in module maxtext.configs.pyconfig_deprecated) get_num_tokens_for_this_image() (in module maxtext.multimodal.processor_llama4) get_pipeline_remat_policy() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_pipeline_stage_module() (maxtext.layers.decoders.Decoder method) get_place_holder_synthetic_data() (maxtext.input_pipeline.synthetic_data_processing.PlaceHolderDataIterator static method) get_process_loading_real_data() (in module maxtext.input_pipeline.input_pipeline_interface) get_qt_provider() (in module maxtext.layers.quantizations) get_quant_mode() (in module maxtext.layers.quantizations) get_quantization_local_shard_count() (in module maxtext.configs.pyconfig_deprecated) get_quantization_rule() (in module maxtext.layers.quantizations) get_query_pre_attn_scalar() (in module maxtext.models.gemma3) get_ragged_buffer_size() (maxtext.layers.moe.RoutedMoE static method) get_remat_policy() (maxtext.layers.decoders.Decoder method) (maxtext.layers.nnx_decoders.NNXDecoder method) get_rope_index() (in module maxtext.multimodal.processor_qwen3_omni) get_routed_and_shared_moe() (in module maxtext.layers.moe) get_routed_moe() (in module maxtext.layers.moe) get_template_placeholders() (in module maxtext.input_pipeline.instruction_data_processing) get_tensor() (maxtext.checkpoint_conversion.to_maxtext.LazyHFLoader method) get_tensor_parallelism_size() (maxtext.layers.moe.RoutedMoE method) get_tensor_transpose_parallelism_size() (maxtext.layers.moe.RoutedMoE method) get_tokenizer() (in module maxtext.input_pipeline.input_pipeline_utils) get_tokenizer_and_pad_id() (in module maxtext.input_pipeline.data_processing_utils) get_tokens_for_this_image() (in module maxtext.multimodal.processor_llama4) get_topk() (maxtext.layers.moe.RoutedMoE method) get_vmap_func_for_init() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) get_vocab_sizes() (maxtext.layers.engram.NgramHashMapping method) get_weight_sharding() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) global_batch_size_to_eval_on (maxtext.configs.types.DerivedValues attribute) global_batch_size_to_load (maxtext.configs.types.DerivedValues attribute) global_batch_size_to_load_eval (maxtext.configs.types.DerivedValues attribute) global_batch_size_to_load_increment (maxtext.configs.types.DerivedValues attribute) global_batch_size_to_load_start (maxtext.configs.types.DerivedValues attribute) global_batch_size_to_train_on (maxtext.configs.types.DerivedValues attribute) global_head_dim (maxtext.configs.types.ModelArchitecture attribute) global_mesh (maxtext.experimental.rl.grpo_input_pipeline.SingleHostDataLoader attribute) global_num_kv_heads (maxtext.configs.types.Attention attribute) global_parameter_scale (maxtext.configs.types.ModelArchitecture attribute) global_rampup_samples (maxtext.configs.types.DatasetGeneral attribute) global_rope_max_timescale (maxtext.configs.types.Rope attribute) global_rope_proportion (maxtext.configs.types.Rope attribute) global_to_local() (in module maxtext.input_pipeline.olmo_data) GlobalRMSNorm (class in maxtext.layers.normalizations) gmm() (in module maxtext.kernels.megablox.backend) (in module maxtext.kernels.megablox.ops) (in module maxtext.models.deepseek_batchsplit) Goodput (class in maxtext.configs.types) goodput_upload_interval_seconds (maxtext.configs.types.Goodput attribute) gpt3_layer_norm() (in module maxtext.models.gpt3) Gpt3DecoderLayer (class in maxtext.models.gpt3) Gpt3LayerNorm (class in maxtext.models.gpt3) Gpt3MultiHeadAttention (class in maxtext.models.gpt3) GPT_OSS_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) GPT_OSS_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) GPT_OSS_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) GPT_OSS_VLLM_MAPPING (class in maxtext.integration.tunix.weight_mapping.gpt_oss) GptOssDecoderLayer (class in maxtext.models.gpt_oss) GptOssScannableBlock (class in maxtext.models.gpt_oss) gpu_ragged_attention() (maxtext.layers.attention_op.AttentionOp method) grad_dtype (maxtext.configs.types.DataTypes attribute) gradient_accumulation_steps (maxtext.configs.types.Optimizer attribute) gradient_clipping_threshold (maxtext.configs.types.Optimizer attribute) GRAIN (maxtext.configs.types.DatasetType attribute) grain_data_source_max_workers (maxtext.configs.types.GrainDataset attribute) grain_eval_files (maxtext.configs.types.GrainDataset attribute) grain_file_type (maxtext.configs.types.GrainDataset attribute) grain_num_threads (maxtext.configs.types.GrainDataset attribute) grain_num_threads_eval (maxtext.configs.types.GrainDataset attribute) grain_packing_type (maxtext.configs.types.DatasetGeneral attribute) grain_per_worker_buffer_size (maxtext.configs.types.GrainDataset attribute) grain_per_worker_buffer_size_eval (maxtext.configs.types.GrainDataset attribute) grain_prefetch_buffer_size (maxtext.configs.types.GrainDataset attribute) grain_prefetch_buffer_size_eval (maxtext.configs.types.GrainDataset attribute) grain_ram_budget_mb (maxtext.configs.types.GrainDataset attribute) grain_shuffle_buffer_size (maxtext.configs.types.GrainDataset attribute) grain_train_files (maxtext.configs.types.GrainDataset attribute) grain_train_mixture_config_path (maxtext.configs.types.GrainDataset attribute) grain_use_elastic_iterator (maxtext.configs.types.GrainDataset attribute) grain_worker_count (maxtext.configs.types.GrainDataset attribute) grain_worker_count_eval (maxtext.configs.types.GrainDataset attribute) GrainDataset (class in maxtext.configs.types) GREEDY (maxtext.configs.types.SamplingStrategy attribute) grpo_beta (maxtext.configs.types.RL attribute) grpo_epsilon (maxtext.configs.types.RL attribute) H hardware (maxtext.configs.types.HardwareAndMesh attribute) HardwareAndMesh (class in maxtext.configs.types) has_backward_blocks (maxtext.kernels.attention.splash_attention_kernel.BlockSizes property) has_npy_magic() (in module maxtext.input_pipeline.olmo_data) hbm_utilization_vllm (maxtext.configs.types.VLLM attribute) head_dim (maxtext.configs.types.ModelArchitecture attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) HEAD_DIM_MINOR (maxtext.kernels.attention.splash_attention_kernel.QKVLayout attribute) HEADS_AND_DKV (maxtext.configs.types.KvQuantAxis attribute) heartbeat_reporting_interval_in_seconds (maxtext.configs.types.GcpMonitoring attribute) hertz_to_mel() (in module maxtext.multimodal.utils) HF (maxtext.configs.types.DatasetType attribute) hf_access_token (maxtext.configs.types.HfDataset attribute) hf_data_dir (maxtext.configs.types.HfDataset attribute) hf_eval_files (maxtext.configs.types.HfDataset attribute) hf_eval_split (maxtext.configs.types.HfDataset attribute) hf_name (maxtext.configs.types.HfDataset attribute) hf_path (maxtext.configs.types.HfDataset attribute) hf_train_files (maxtext.configs.types.HfDataset attribute) HfDataset (class in maxtext.configs.types) HFDataSource (class in maxtext.input_pipeline.input_pipeline_utils) HFNormalizeFeatures (class in maxtext.input_pipeline.input_pipeline_utils) HFTokenizer (class in maxtext.input_pipeline.tokenizer) hidden_size (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) hidden_size_for_vit (maxtext.configs.types.VisionTower attribute) hidden_state_norm (maxtext.layers.multi_token_prediction.MultiTokenPredictionLayer property) hide_profiler_step_metric (maxtext.configs.types.Profiling attribute) HIGH (maxtext.configs.types.MatmulPrecision attribute) HIGHEST (maxtext.configs.types.MatmulPrecision attribute) HloDump (class in maxtext.configs.types) HUGGINGFACE (maxtext.configs.types.TokenizerType attribute) HyperParameters (class in maxtext.configs.pyconfig_deprecated) I ici_autoregressive_parallelism (maxtext.configs.types.IciParallelism attribute) ici_context_autoregressive_parallelism (maxtext.configs.types.IciParallelism attribute) ici_context_parallelism (maxtext.configs.types.IciParallelism attribute) ici_data_parallelism (maxtext.configs.types.IciParallelism attribute) ici_diloco_parallelism (maxtext.configs.types.IciParallelism attribute) ici_expert_parallelism (maxtext.configs.types.IciParallelism attribute) ici_fsdp_parallelism (maxtext.configs.types.IciParallelism attribute) ici_fsdp_transpose_parallelism (maxtext.configs.types.IciParallelism attribute) ici_parallelism (maxtext.configs.types.DerivedValues attribute) ici_pipeline_parallelism (maxtext.configs.types.IciParallelism attribute) ici_sequence_parallelism (maxtext.configs.types.IciParallelism attribute) ici_tensor_parallelism (maxtext.configs.types.IciParallelism attribute) ici_tensor_sequence_parallelism (maxtext.configs.types.IciParallelism attribute) ici_tensor_transpose_parallelism (maxtext.configs.types.IciParallelism attribute) IciParallelism (class in maxtext.configs.types) image_path (maxtext.configs.types.MultimodalGeneral attribute) image_placeholder (maxtext.configs.types.MultimodalGeneral attribute) image_size (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) image_size_for_vit (maxtext.configs.types.MultimodalGeneral attribute) in_channels (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) in_convert_mode() (in module maxtext.layers.quantizations) in_serve_mode() (in module maxtext.layers.quantizations) Indexer (class in maxtext.layers.attention_mla) indexer_head_dim (maxtext.configs.types.AttentionIndexer attribute) indexer_loss_scaling_factor (maxtext.configs.types.AttentionIndexer attribute) indexer_n_heads (maxtext.configs.types.AttentionIndexer attribute) indexer_sparse_training (maxtext.configs.types.AttentionIndexer attribute) indexer_topk (maxtext.configs.types.AttentionIndexer attribute) inference_benchmark_test (maxtext.configs.types.InferenceBenchmark attribute) inference_metadata_file (maxtext.configs.types.InferenceBenchmark attribute) inference_microbenchmark_log_file_path (maxtext.configs.types.InferenceBenchmark attribute) inference_microbenchmark_loop_iters (maxtext.configs.types.InferenceBenchmark attribute) inference_microbenchmark_num_samples (maxtext.configs.types.InferenceBenchmark attribute) inference_microbenchmark_prefill_lengths (maxtext.configs.types.InferenceBenchmark attribute) inference_microbenchmark_stages (maxtext.configs.types.InferenceBenchmark attribute) inference_server (maxtext.configs.types.InferenceServer attribute) InferenceBenchmark (class in maxtext.configs.types) InferenceGeneral (class in maxtext.configs.types) InferenceLayout (class in maxtext.configs.types) InferenceServer (class in maxtext.configs.types) inhomogeneous_layer_cycle_interval (maxtext.configs.types.HardwareAndMesh attribute) init() (maxtext.models.models.TransformerLinen method) (maxtext.models.models.TransformerLinenPure method) init_cache() (maxtext.models.models.Transformer method) init_indexer_cache() (maxtext.layers.attention_mla.MLA method) init_kv_caches() (maxtext.layers.attentions.Attention method) init_kv_w() (maxtext.layers.attentions.Attention method) init_mla_kv_caches() (maxtext.layers.attention_mla.MLA method) init_out_w() (maxtext.layers.attentions.Attention method) (maxtext.models.qwen3_custom.Qwen3CustomAttention method) init_qkv_w() (maxtext.layers.attentions.Attention method) init_query_w() (maxtext.layers.attentions.Attention method) init_rotary_embedding() (maxtext.layers.attentions.Attention method) (maxtext.models.gemma4_vision.Gemma4Attention method) init_splash_kernel() (in module maxtext.models.deepseek_batchsplit) init_states() (maxtext.layers.pipeline.CircularPipeline method) (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) init_weights_seed (maxtext.configs.types.TrainingLoop attribute) initialize() (in module maxtext.configs.pyconfig) (in module maxtext.configs.pyconfig_deprecated) initialize_pydantic() (in module maxtext.configs.pyconfig) initialize_yarn_freqs() (in module maxtext.models.deepseek_batchsplit) initialize_yarn_mask() (in module maxtext.models.deepseek_batchsplit) input_data_sharding_logical_axes (maxtext.configs.types.LayoutAndSharding attribute) InputRequest (class in maxtext.input_pipeline.distillation_data_processing) inputs_kv_shape (maxtext.layers.attentions.Attention attribute) inputs_q_shape (maxtext.layers.attentions.Attention attribute) insert_layer_ws_grad() (in module maxtext.models.deepseek_batchsplit) insert_sequence() (in module maxtext.multimodal.processor_gemma3) (in module maxtext.multimodal.processor_gemma4) instance_offset (maxtext.input_pipeline.olmo_data.OlmoNpyFileEntry attribute) INT4 (maxtext.configs.types.QuantizationType attribute) Int64List (class in maxtext.input_pipeline.protos.feature_pb2) INT8 (maxtext.configs.types.QuantizationType attribute) interleave_moe_layer_step (maxtext.configs.types.MoEGeneral attribute) intermediate_size (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) intermediate_size_for_vit (maxtext.configs.types.VisionTower attribute) internal_compile (maxtext.configs.types.LayoutAndSharding attribute) internal_compile_num_devices (maxtext.configs.types.LayoutAndSharding attribute) INTMP (maxtext.configs.types.QuantizationType attribute) is_clean_instance() (in module maxtext.input_pipeline.olmo_data) is_conversational() (in module maxtext.input_pipeline.input_pipeline_utils) is_partition_in_decode() (maxtext.layers.attention_op.AttentionOp method) is_tpu() (in module maxtext.kernels.megablox.common) is_vanilla_variable() (in module maxtext.layers.nnx_wrappers) itemsize (maxtext.checkpoint_conversion.to_maxtext.LazyTensor property) iterations_to_complete_first_microbatch() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) iterations_to_complete_first_microbatch_one_repeat() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) J jax_cache_dir (maxtext.configs.types.DevelopmentAndDebugging attribute) jax_chunk_gated_delta_rule() (in module maxtext.models.qwen3) jax_debug_log_modules (maxtext.configs.types.DevelopmentAndDebugging attribute) jax_distributed_initialization_timeout (maxtext.configs.types.DevelopmentAndDebugging attribute) jax_profiler_port (maxtext.configs.types.Profiling attribute) K k_layout (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) KeepFeatures (class in maxtext.input_pipeline.input_pipeline_utils) kernel_init (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) key_proj (maxtext.configs.types.RematAndOffload attribute) kv (maxtext.kernels.attention.splash_attention_kernel.SegmentIds attribute) kv_cache_buffer (maxtext.configs.types.VLLM attribute) kv_lora_rank (maxtext.configs.types.MlaAttention attribute) kv_projection() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) (maxtext.layers.attentions.Attention method) kv_quant (maxtext.layers.attentions.Attention attribute) kv_quant_axis (maxtext.configs.types.Quantization attribute) kv_quant_dtype (maxtext.configs.types.Quantization attribute) kv_wa_proj (maxtext.configs.types.RematAndOffload attribute) KvQuantAxis (class in maxtext.configs.types) kwargs (maxtext.layers.nnx_wrappers.ToLinen attribute) L l2_norm_as_linen() (in module maxtext.layers.attentions) L2Norm (class in maxtext.layers.attentions) l2norm() (in module maxtext.layers.normalizations) label (maxtext.input_pipeline.olmo_data.OlmoNpyFileEntry attribute) layer_idx (maxtext.models.qwen3.Qwen3NextDecoderLayer attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) (maxtext.models.qwen3_5.Qwen3_5DecoderLayer attribute) layers (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) LayoutAndSharding (class in maxtext.configs.types) lazy_init() (in module maxtext.layers.nnx_wrappers) (maxtext.layers.nnx_wrappers.ToNNX method) LazyHFLoader (class in maxtext.checkpoint_conversion.to_maxtext) LazyTensor (class in maxtext.checkpoint_conversion.to_maxtext) LazyTensorHandler (class in maxtext.checkpoint_conversion.to_maxtext) learn_to_init_mode (maxtext.configs.types.Distillation attribute) learn_to_init_wrapper (maxtext.layers.learn_to_init_layer.LearnToInitDecoderLayer attribute) learning_rate (maxtext.configs.types.Optimizer attribute) learning_rate_final_fraction (maxtext.configs.types.Optimizer attribute) learning_rate_schedule_steps (maxtext.configs.types.Optimizer attribute) LearningRateScheduleType (class in maxtext.configs.types) LearnToInitDecoderLayer (class in maxtext.layers.learn_to_init_layer) LearnToInitDense (class in maxtext.layers.learn_to_init_layer) LINEAR (maxtext.configs.types.WsdDecayStyle attribute) linear_fc1 (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) linear_fc2 (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) linen_rngs_dict() (in module maxtext.layers.nnx_wrappers) linen_vars_to_nnx_attrs() (in module maxtext.layers.nnx_wrappers) LLAMA31_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) LLAMA31_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) (in module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.llama3) LLAMA31_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) LLAMA31_NNX_TO_VLLM_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) LLAMA3_1 (maxtext.configs.types.RopeType attribute) LLAMA3_VLLM_MAPPING (class in maxtext.integration.tunix.weight_mapping.llama3) Llama4Attention (class in maxtext.configs.types) Llama4DecoderLayer (class in maxtext.models.llama4) Llama4MultiModalProjector (class in maxtext.models.llama4) llama4multimodalprojector_as_linen() (in module maxtext.models.llama4) Llama4PreprocessorOutput (class in maxtext.multimodal.processor_llama4) Llama4ScannableBlock (class in maxtext.models.llama4) Llama4UnfoldConvolution (class in maxtext.models.llama4) Llama4VisionEncoder (class in maxtext.models.llama4) Llama4VisionEncoderLayer (class in maxtext.models.llama4) Llama4VisionMLP (class in maxtext.models.llama4) Llama4VisionMLP2 (class in maxtext.models.llama4) Llama4VisionModel (class in maxtext.models.llama4) llama4visionmodel_as_linen() (in module maxtext.models.llama4) Llama4VisionPixelShuffleMLP (class in maxtext.models.llama4) llama_rotary_embedding_as_linen() (in module maxtext.layers.embeddings) llama_vision_rotary_embedding_as_linen() (in module maxtext.layers.embeddings) LlamaDecoderLayer (class in maxtext.models.llama2) LlamaLTIDecoderLayer (class in maxtext.models.llama2) LLaMARotaryEmbedding (class in maxtext.layers.embeddings) LlamaVisionRotaryEmbedding (class in maxtext.layers.embeddings) ln1 (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) ln2 (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) ln_q (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) load_audio() (in module maxtext.multimodal.utils) load_balance_loss() (maxtext.layers.moe.RoutedMoE method) load_balance_loss_weight (maxtext.configs.types.MoEGeneral attribute) load_chat_template_from_file() (in module maxtext.input_pipeline.instruction_data_processing) load_checkpoint_only_once (maxtext.configs.types.Checkpointing attribute) load_data_template_from_file() (in module maxtext.input_pipeline.instruction_data_processing) load_dataset() (in module maxtext.input_pipeline.distillation_data_processing) load_formatter() (in module maxtext.input_pipeline.instruction_data_processing) load_from_prefill_dir (maxtext.configs.types.InferenceGeneral attribute) load_full_state_path (maxtext.configs.types.Checkpointing attribute) load_hf_dict_from_safetensors() (in module maxtext.checkpoint_conversion.utils.utils) load_hf_dict_from_transformers() (in module maxtext.checkpoint_conversion.utils.utils) load_image_from_path() (in module maxtext.multimodal.utils) load_index() (in module maxtext.input_pipeline.olmo_data) load_json() (in module maxtext.experimental.agent.ckpt_conversion_agent.utils.utils) load_model_specific_defaults() (maxtext.configs.types.MaxTextConfig class method) load_orbax_checkpoint() (in module maxtext.checkpoint_conversion.utils.utils) load_parameters_path (maxtext.configs.types.Checkpointing attribute) load_prompt_template() (in module maxtext.experimental.agent.ckpt_conversion_agent.utils.utils) load_safetensors_generic() (in module maxtext.checkpoint_conversion.compare_hf_ckpt) load_text_file() (in module maxtext.experimental.agent.ckpt_conversion_agent.utils.utils) load_weights() (maxtext.integration.vllm.maxtext_vllm_adapter.adapter.MaxTextForCausalLM method) LoadBalancedCausalMask (class in maxtext.layers.attention_op) local_checkpoint_directory (maxtext.configs.types.EmergencyCheckpointing attribute) local_checkpoint_period (maxtext.configs.types.EmergencyCheckpointing attribute) local_iterator (maxtext.experimental.rl.grpo_input_pipeline.SingleHostDataLoader attribute) local_permute() (maxtext.layers.moe.RoutedMoE static method) local_rope_max_timescale (maxtext.configs.types.Rope attribute) local_rope_proportion (maxtext.configs.types.Rope attribute) log_config (maxtext.configs.types.RunInfo attribute) log_period (maxtext.configs.types.TrainingLoop attribute) logical_axis_names (maxtext.models.deepseek.DeepSeekGenericLayer property) logical_axis_rules (maxtext.configs.types.LayoutAndSharding attribute) Logits (class in maxtext.configs.types) logits_dot_in_fp32 (maxtext.configs.types.Logits attribute) logits_from_hidden_states() (maxtext.models.models.TransformerLinenPure method) logits_via_embedding (maxtext.configs.types.Logits attribute) lookup_table (maxtext.layers.engram.CompressedTokenizer attribute) lora_input_adapters_path (maxtext.configs.types.Checkpointing attribute) lora_to_hf_mappings() (maxtext.integration.tunix.tunix_adapter.TunixMaxTextAdapter method) (maxtext.integration.tunix.utils.VllmWeightMapping method) (maxtext.integration.tunix.weight_mapping.deepseek3.DEEPSEEK_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.gpt_oss.GPT_OSS_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.llama3.LLAMA3_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen2.QWEN2_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen3.QWEN3_VLLM_MAPPING static method) loss_algo (maxtext.configs.types.RL attribute) lr_schedule_type (maxtext.configs.types.Optimizer attribute) lti_use_general_linear_map (maxtext.configs.types.Distillation attribute) M main() (in module maxtext.checkpoint_conversion.compare_hf_ckpt) (in module maxtext.checkpoint_conversion.to_maxtext) (in module maxtext.experimental.agent.ckpt_conversion_agent.evaluation) make_attention_reference() (in module maxtext.kernels.attention.splash_attention_kernel) make_c4_mlperf_eval_iterator() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) make_c4_mlperf_train_iterator() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) make_grain_eval_iterator() (in module maxtext.input_pipeline.grain_data_processing) make_grain_train_iterator() (in module maxtext.input_pipeline.grain_data_processing) make_group_metadata() (in module maxtext.kernels.megablox.backend) make_hf_eval_iterator() (in module maxtext.input_pipeline.hf_data_processing) make_hf_train_iterator() (in module maxtext.experimental.rl.grpo_input_pipeline) (in module maxtext.input_pipeline.hf_data_processing) make_masked_mha_reference() (in module maxtext.kernels.attention.splash_attention_kernel) make_masked_mqa_reference() (in module maxtext.kernels.attention.splash_attention_kernel) make_olmo_grain_data_loader() (in module maxtext.input_pipeline.olmo_data_grain) make_olmo_grain_eval_iterator() (in module maxtext.input_pipeline.olmo_grain_data_processing) make_olmo_grain_train_iterator() (in module maxtext.input_pipeline.olmo_grain_data_processing) make_splash_mha() (in module maxtext.kernels.attention.splash_attention_kernel) make_splash_mha_single_device() (in module maxtext.kernels.attention.splash_attention_kernel) make_splash_mqa() (in module maxtext.kernels.attention.splash_attention_kernel) make_splash_mqa_single_device() (in module maxtext.kernels.attention.splash_attention_kernel) make_tfds_eval_iterator() (in module maxtext.input_pipeline.tfds_data_processing) make_tfds_train_iterator() (in module maxtext.input_pipeline.tfds_data_processing) make_tfrecord_iter_dataset() (in module maxtext.input_pipeline.input_pipeline_utils) managed_mldiagnostics (maxtext.configs.types.ManagedMLDiagnostics attribute) managed_mldiagnostics_dir (maxtext.configs.types.DerivedValues attribute) managed_mldiagnostics_run_group (maxtext.configs.types.ManagedMLDiagnostics attribute) ManagedMLDiagnostics (class in maxtext.configs.types) ManifoldConstrainedHyperConnections (class in maxtext.configs.types) (class in maxtext.layers.mhc) manual_bwd() (maxtext.kernels.attention.splash_attention_kernel.SplashAttentionKernel method) manual_fwd() (maxtext.kernels.attention.splash_attention_kernel.SplashAttentionKernel method) manual_quantize() (in module maxtext.layers.quantizations) manual_sharding_spec() (maxtext.kernels.attention.splash_attention_kernel.SplashAttentionKernel method) map() (maxtext.input_pipeline.grain_tokenizer.TokenizeAndTrim method) (maxtext.input_pipeline.input_pipeline_utils.ComputeQwen3OmniPositions method) (maxtext.input_pipeline.input_pipeline_utils.ExtractImagesAndMasks method) (maxtext.input_pipeline.input_pipeline_utils.FoldImagesIntoBatch method) (maxtext.input_pipeline.input_pipeline_utils.HFNormalizeFeatures method) (maxtext.input_pipeline.input_pipeline_utils.KeepFeatures method) (maxtext.input_pipeline.input_pipeline_utils.NormalizeFeatures method) (maxtext.input_pipeline.input_pipeline_utils.PadOrTrimToMaxLength method) (maxtext.input_pipeline.input_pipeline_utils.ParseFeatures method) (maxtext.input_pipeline.input_pipeline_utils.ReformatPacking method) (maxtext.input_pipeline.input_pipeline_utils.Rekey method) (maxtext.input_pipeline.input_pipeline_utils.SFTPromptMasking method) (maxtext.input_pipeline.input_pipeline_utils.SFTPromptMaskingVision method) (maxtext.input_pipeline.input_pipeline_utils.ShiftData method) (maxtext.input_pipeline.olmo_data_grain.NgramFilterTransform method) (maxtext.input_pipeline.olmo_data_grain.ShiftToInputsTargets method) map_to_prompt_completion() (in module maxtext.input_pipeline.distillation_data_processing) mapping() (maxtext.layers.mhc.ManifoldConstrainedHyperConnections method) MappingAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.mapping) mask_blocker() (in module maxtext.kernels.attention.jax_flash_attention) match_aqt_and_unquantized_param() (in module maxtext.layers.quantizations) math_qa_formatting() (in module maxtext.input_pipeline.instruction_data_processing) math_verify_num_procs (maxtext.configs.types.Reward attribute) math_verify_timeout (maxtext.configs.types.Reward attribute) matmul_precision (maxtext.configs.types.DataTypes attribute) MatmulPrecision (class in maxtext.configs.types) max_checkify (maxtext.configs.types.DevelopmentAndDebugging attribute) max_concurrency (maxtext.configs.types.RL attribute) max_corpus_chars (maxtext.configs.types.DatasetGeneral attribute) max_fan_out (maxtext.input_pipeline.grain_tokenizer.TokenizeAndChunk attribute) max_num_batched_tokens (maxtext.configs.types.VLLM attribute) max_num_checkpoints_to_keep (maxtext.configs.types.Checkpointing attribute) max_num_images_per_example (maxtext.configs.types.MultimodalGeneral attribute) max_num_seqs (maxtext.configs.types.VLLM attribute) max_output_tokens (maxtext.input_pipeline.distillation_data_processing.InputRequest attribute) max_position_embeddings (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) max_prefill_predict_length (maxtext.configs.types.InferenceGeneral attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) max_sample_len_for_audio (maxtext.configs.types.AudioEncoder attribute) max_segments_per_seq (maxtext.configs.types.DatasetGeneral attribute) max_source_positions_for_audio (maxtext.configs.types.AudioEncoder attribute) max_target_length (maxtext.configs.types.InferenceGeneral attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) max_timescale_for_audio (maxtext.configs.types.AudioEncoder attribute) max_wavelength (maxtext.layers.embeddings.PositionalEmbedding attribute) maxtext module maxtext.checkpoint_conversion module maxtext.checkpoint_conversion.compare_hf_ckpt module maxtext.checkpoint_conversion.to_maxtext module maxtext.checkpoint_conversion.utils module maxtext.checkpoint_conversion.utils.hf_model_configs module maxtext.checkpoint_conversion.utils.hf_shape module maxtext.checkpoint_conversion.utils.hf_utils module maxtext.checkpoint_conversion.utils.param_mapping module maxtext.checkpoint_conversion.utils.utils module maxtext.configs module maxtext.configs.pyconfig module maxtext.configs.pyconfig_deprecated module maxtext.configs.types module maxtext.experimental module maxtext.experimental.agent module maxtext.experimental.agent.ckpt_conversion_agent module maxtext.experimental.agent.ckpt_conversion_agent.analysis module maxtext.experimental.agent.ckpt_conversion_agent.base module maxtext.experimental.agent.ckpt_conversion_agent.baselines module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.hf_shape module maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.param_mapping module maxtext.experimental.agent.ckpt_conversion_agent.dsl module maxtext.experimental.agent.ckpt_conversion_agent.evaluation module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.gemma3 module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.llama3 module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.qwen3 module maxtext.experimental.agent.ckpt_conversion_agent.mapping module maxtext.experimental.agent.ckpt_conversion_agent.plan module maxtext.experimental.agent.ckpt_conversion_agent.prompt_chain module maxtext.experimental.agent.ckpt_conversion_agent.step1 module maxtext.experimental.agent.ckpt_conversion_agent.step2 module maxtext.experimental.agent.ckpt_conversion_agent.transformation module maxtext.experimental.agent.ckpt_conversion_agent.utils module maxtext.experimental.agent.ckpt_conversion_agent.utils.utils module maxtext.experimental.rl module maxtext.experimental.rl.grpo_input_pipeline module maxtext.input_pipeline module maxtext.input_pipeline.data_processing_utils module maxtext.input_pipeline.distillation_data_processing module maxtext.input_pipeline.grain_data_processing module maxtext.input_pipeline.grain_tokenizer module maxtext.input_pipeline.hf_data_processing module maxtext.input_pipeline.input_pipeline_interface module maxtext.input_pipeline.input_pipeline_utils module maxtext.input_pipeline.instruction_data_processing module maxtext.input_pipeline.multihost_dataloading module maxtext.input_pipeline.olmo_data module maxtext.input_pipeline.olmo_data_grain module maxtext.input_pipeline.olmo_grain_data_processing module maxtext.input_pipeline.packing module maxtext.input_pipeline.packing.sequence_packing module maxtext.input_pipeline.protos module maxtext.input_pipeline.protos.example_pb2 module maxtext.input_pipeline.protos.feature_pb2 module maxtext.input_pipeline.synthetic_data_processing module maxtext.input_pipeline.tfds_data_processing module maxtext.input_pipeline.tfds_data_processing_c4_mlperf module maxtext.input_pipeline.tokenizer module maxtext.integration module maxtext.integration.tunix module maxtext.integration.tunix.tunix_adapter module maxtext.integration.tunix.utils module maxtext.integration.tunix.weight_mapping module maxtext.integration.tunix.weight_mapping.deepseek3 module maxtext.integration.tunix.weight_mapping.gpt_oss module maxtext.integration.tunix.weight_mapping.llama3 module maxtext.integration.tunix.weight_mapping.qwen2 module maxtext.integration.tunix.weight_mapping.qwen3 module maxtext.integration.vllm module maxtext.integration.vllm.maxtext_vllm_adapter module maxtext.integration.vllm.maxtext_vllm_adapter.adapter module maxtext.integration.vllm.setup module maxtext.integration.vllm.torchax_converter module maxtext.integration.vllm.torchax_converter.base module maxtext.integration.vllm.torchax_converter.gemma4_moe module maxtext.integration.vllm.torchax_converter.qwen3_moe module maxtext.kernels module maxtext.kernels.attention module maxtext.kernels.attention.jax_flash_attention module maxtext.kernels.attention.ragged_attention module maxtext.kernels.attention.splash_attention_kernel module maxtext.kernels.gather_reduce_pallas module maxtext.kernels.gather_reduce_sc module maxtext.kernels.megablox module maxtext.kernels.megablox.backend module maxtext.kernels.megablox.common module maxtext.kernels.megablox.ops module maxtext.kernels.sort_activations module maxtext.layers module maxtext.layers.attention_mla module maxtext.layers.attention_op module maxtext.layers.attentions module maxtext.layers.decoders module maxtext.layers.embeddings module maxtext.layers.encoders module maxtext.layers.engram module maxtext.layers.initializers module maxtext.layers.learn_to_init_layer module maxtext.layers.linears module maxtext.layers.mhc module maxtext.layers.moe module maxtext.layers.multi_token_prediction module maxtext.layers.nnx_decoders module maxtext.layers.nnx_wrappers module maxtext.layers.normalizations module maxtext.layers.pipeline module maxtext.layers.pipeline_deprecated module maxtext.layers.quantizations module maxtext.layers.train_state_nnx module maxtext.models module maxtext.models.deepseek module maxtext.models.deepseek_batchsplit module maxtext.models.deepseek_batchsplit_fp8 module maxtext.models.gemma module maxtext.models.gemma2 module maxtext.models.gemma3 module maxtext.models.gemma4 module maxtext.models.gemma4_vision module maxtext.models.gpt3 module maxtext.models.gpt_oss module maxtext.models.llama2 module maxtext.models.llama4 module maxtext.models.mistral module maxtext.models.mixtral module maxtext.models.models module maxtext.models.olmo3 module maxtext.models.qwen2 module maxtext.models.qwen3 module maxtext.models.qwen3_5 module maxtext.models.qwen3_custom module maxtext.models.simple_layer module maxtext.multimodal module maxtext.multimodal.processor module maxtext.multimodal.processor_gemma3 module maxtext.multimodal.processor_gemma4 module maxtext.multimodal.processor_llama4 module maxtext.multimodal.processor_qwen3_omni module maxtext.multimodal.utils module MaxTextConfig (class in maxtext.configs.types) MaxTextForCausalLM (class in maxtext.integration.vllm.maxtext_vllm_adapter.adapter) maybe_all_gather_kernel_weight_in_expert_parallelism() (maxtext.layers.moe.RoutedMoE method) maybe_quantize_model() (in module maxtext.layers.quantizations) megablox (maxtext.configs.types.MoEKernels attribute) mel_filter_bank() (in module maxtext.multimodal.utils) mel_to_hertz() (in module maxtext.multimodal.utils) MemoryMonitorTqdm (class in maxtext.checkpoint_conversion.utils.utils) merge() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) merge_gating_gmm (maxtext.configs.types.MoEKernels attribute) merge_image_columns() (in module maxtext.input_pipeline.input_pipeline_utils) merge_mm_embeddings() (in module maxtext.multimodal.utils) merger (maxtext.models.qwen3.Qwen3OmniMoeVisionProjector attribute) merger_list (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) mesh (maxtext.layers.attentions.Attention attribute) (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) (maxtext.models.gemma.GemmaDecoderLayer attribute) (maxtext.models.gemma2.Gemma2DecoderLayer attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) (maxtext.models.gpt_oss.GptOssScannableBlock attribute) (maxtext.models.llama4.Llama4VisionEncoder attribute) (maxtext.models.llama4.Llama4VisionModel attribute) (maxtext.models.models.TransformerLinenPure attribute) (maxtext.models.olmo3.Olmo3ScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextDecoderLayer attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) (maxtext.models.qwen3.Qwen3NextScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextSparseMoeBlock attribute) (maxtext.models.qwen3.Qwen3OmniAudioEncoder attribute) (maxtext.models.qwen3_5.Qwen3_5DecoderLayer attribute) mesh_axes (maxtext.configs.types.HardwareAndMesh attribute) metadata_fn() (maxtext.layers.nnx_wrappers.ToLinen method) Metrics (class in maxtext.configs.types) metrics_dir (maxtext.configs.types.DerivedValues attribute) metrics_file (maxtext.configs.types.Metrics attribute) mhc_expansion_rate (maxtext.configs.types.ManifoldConstrainedHyperConnections attribute) micro_batch_size_to_eval_on (maxtext.configs.types.DerivedValues attribute) micro_batch_size_to_train_on (maxtext.configs.types.DerivedValues attribute) MINIMAL (maxtext.configs.types.RematPolicy attribute) MINIMAL_OFFLOADED (maxtext.configs.types.RematPolicy attribute) minimal_policy() (maxtext.layers.decoders.Decoder method) (maxtext.layers.nnx_decoders.NNXDecoder method) MistralDecoderLayer (class in maxtext.models.mistral) MIXTRAL_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) MIXTRAL_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) MIXTRAL_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) MixtralDecoderLayer (class in maxtext.models.mixtral) MLA (class in maxtext.layers.attention_mla) mla() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) mla_as_linen() (in module maxtext.layers.attention_mla) mla_bwd() (in module maxtext.models.deepseek_batchsplit) mla_get_key_value() (maxtext.layers.attention_mla.MLA method) mla_kv (maxtext.configs.types.RematAndOffload attribute) mla_kv_projection() (maxtext.layers.attention_mla.MLA method) mla_naive_kvcache (maxtext.configs.types.MlaAttention attribute) mla_q (maxtext.configs.types.RematAndOffload attribute) mla_query_projection() (maxtext.layers.attention_mla.MLA method) mla_remat() (in module maxtext.models.deepseek_batchsplit) mla_with_norms() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) mla_with_norms_bwd() (in module maxtext.models.deepseek_batchsplit) mla_with_norms_remat() (in module maxtext.models.deepseek_batchsplit) MlaAttention (class in maxtext.configs.types) mlp (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) mlp_0 (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) mlp_2 (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) mlp_activations (maxtext.configs.types.ModelArchitecture attribute) mlp_activations_limit (maxtext.configs.types.ModelArchitecture attribute) mlp_bias (maxtext.configs.types.DeepSeekMoE attribute) mlp_block() (in module maxtext.layers.linears) mlp_dim (maxtext.configs.types.DerivedValues attribute) mlp_logical_axis_names (maxtext.models.deepseek.DeepSeekGenericLayer property) mlp_op() (maxtext.models.deepseek.DeepSeekDenseLayer method) (maxtext.models.deepseek.DeepSeekGenericLayer method) (maxtext.models.deepseek.DeepSeekMoELayer method) mlp_out (maxtext.models.qwen3.Qwen3OmniMoeVisionBlock attribute) MlpBlock (class in maxtext.layers.linears) MlpBlockViT (class in maxtext.models.gemma3) mlpwi (maxtext.configs.types.RematAndOffload attribute) mlpwi_0 (maxtext.configs.types.RematAndOffload attribute) mlpwi_1 (maxtext.configs.types.RematAndOffload attribute) mlpwo (maxtext.configs.types.RematAndOffload attribute) MoBa (class in maxtext.configs.types) moba (maxtext.configs.types.MoBa attribute) moba_chunk_size (maxtext.configs.types.MoBa attribute) moba_topk (maxtext.configs.types.MoBa attribute) model_call_mode (maxtext.configs.types.InferenceGeneral attribute) model_config (maxtext.configs.types.AdamW attribute) (maxtext.configs.types.AOT attribute) (maxtext.configs.types.Attention attribute) (maxtext.configs.types.AttentionIndexer attribute) (maxtext.configs.types.AudioEncoder attribute) (maxtext.configs.types.Checkpointing attribute) (maxtext.configs.types.DatasetGeneral attribute) (maxtext.configs.types.DataTypes attribute) (maxtext.configs.types.DcnParallelism attribute) (maxtext.configs.types.Debug attribute) (maxtext.configs.types.Decoding attribute) (maxtext.configs.types.DeepSeekMoE attribute) (maxtext.configs.types.DerivedValues attribute) (maxtext.configs.types.DevelopmentAndDebugging attribute) (maxtext.configs.types.DilocoParams attribute) (maxtext.configs.types.Distillation attribute) (maxtext.configs.types.ElasticTraining attribute) (maxtext.configs.types.EmergencyCheckpointing attribute) (maxtext.configs.types.Engram attribute) (maxtext.configs.types.FineTuning attribute) (maxtext.configs.types.GcpMonitoring attribute) (maxtext.configs.types.Goodput attribute) (maxtext.configs.types.GrainDataset attribute) (maxtext.configs.types.HardwareAndMesh attribute) (maxtext.configs.types.HfDataset attribute) (maxtext.configs.types.HloDump attribute) (maxtext.configs.types.IciParallelism attribute) (maxtext.configs.types.InferenceBenchmark attribute) (maxtext.configs.types.InferenceGeneral attribute) (maxtext.configs.types.InferenceLayout attribute) (maxtext.configs.types.InferenceServer attribute) (maxtext.configs.types.LayoutAndSharding attribute) (maxtext.configs.types.Llama4Attention attribute) (maxtext.configs.types.Logits attribute) (maxtext.configs.types.ManagedMLDiagnostics attribute) (maxtext.configs.types.ManifoldConstrainedHyperConnections attribute) (maxtext.configs.types.MaxTextConfig attribute) (maxtext.configs.types.Metrics attribute) (maxtext.configs.types.MlaAttention attribute) (maxtext.configs.types.MoBa attribute) (maxtext.configs.types.ModelArchitecture attribute) (maxtext.configs.types.MoEGeneral attribute) (maxtext.configs.types.MoEKernels attribute) (maxtext.configs.types.MTP attribute) (maxtext.configs.types.MultimodalGeneral attribute) (maxtext.configs.types.Muon attribute) (maxtext.configs.types.OlmoGrainDataset attribute) (maxtext.configs.types.Optimizer attribute) (maxtext.configs.types.OrbaxStorage attribute) (maxtext.configs.types.PagedAttention attribute) (maxtext.configs.types.PipelineParallelism attribute) (maxtext.configs.types.PositionalEmbedding attribute) (maxtext.configs.types.PrefixCaching attribute) (maxtext.configs.types.Profiling attribute) (maxtext.configs.types.Quantization attribute) (maxtext.configs.types.Qwen3Next attribute) (maxtext.configs.types.RematAndOffload attribute) (maxtext.configs.types.Reward attribute) (maxtext.configs.types.RL attribute) (maxtext.configs.types.RLDataset attribute) (maxtext.configs.types.RLEvaluation attribute) (maxtext.configs.types.RLHardware attribute) (maxtext.configs.types.Rope attribute) (maxtext.configs.types.RunInfo attribute) (maxtext.configs.types.SpecialTokens attribute) (maxtext.configs.types.SplashAttention attribute) (maxtext.configs.types.StackTrace attribute) (maxtext.configs.types.Tensorboard attribute) (maxtext.configs.types.TfdsDataset attribute) (maxtext.configs.types.Tokenizer attribute) (maxtext.configs.types.TrainingLoop attribute) (maxtext.configs.types.VisionProjector attribute) (maxtext.configs.types.VisionTower attribute) (maxtext.configs.types.VLLM attribute) (maxtext.configs.types.YarnRope attribute) model_mode (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.models.gemma.GemmaDecoderLayer attribute) (maxtext.models.gemma2.Gemma2DecoderLayer attribute) (maxtext.models.models.TransformerLinenPure attribute) (maxtext.models.qwen3.Qwen3NextDecoderLayer attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) (maxtext.models.qwen3.Qwen3NextScannableBlock attribute) (maxtext.models.qwen3_5.Qwen3_5DecoderLayer attribute) model_name (maxtext.configs.types.RunInfo attribute) (maxtext.input_pipeline.input_pipeline_utils.FoldImagesIntoBatch attribute) model_type (maxtext.checkpoint_conversion.utils.hf_model_configs.DeepseekV32Config attribute) ModelArchitecture (class in maxtext.configs.types) module maxtext maxtext.checkpoint_conversion maxtext.checkpoint_conversion.compare_hf_ckpt maxtext.checkpoint_conversion.to_maxtext maxtext.checkpoint_conversion.utils maxtext.checkpoint_conversion.utils.hf_model_configs maxtext.checkpoint_conversion.utils.hf_shape maxtext.checkpoint_conversion.utils.hf_utils maxtext.checkpoint_conversion.utils.param_mapping maxtext.checkpoint_conversion.utils.utils maxtext.configs maxtext.configs.pyconfig maxtext.configs.pyconfig_deprecated maxtext.configs.types maxtext.experimental maxtext.experimental.agent maxtext.experimental.agent.ckpt_conversion_agent maxtext.experimental.agent.ckpt_conversion_agent.analysis maxtext.experimental.agent.ckpt_conversion_agent.base maxtext.experimental.agent.ckpt_conversion_agent.baselines maxtext.experimental.agent.ckpt_conversion_agent.baselines.context maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.hf_shape maxtext.experimental.agent.ckpt_conversion_agent.baselines.context.param_mapping maxtext.experimental.agent.ckpt_conversion_agent.dsl maxtext.experimental.agent.ckpt_conversion_agent.evaluation maxtext.experimental.agent.ckpt_conversion_agent.ground_truth maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.gemma3 maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.llama3 maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.qwen3 maxtext.experimental.agent.ckpt_conversion_agent.mapping maxtext.experimental.agent.ckpt_conversion_agent.plan maxtext.experimental.agent.ckpt_conversion_agent.prompt_chain maxtext.experimental.agent.ckpt_conversion_agent.step1 maxtext.experimental.agent.ckpt_conversion_agent.step2 maxtext.experimental.agent.ckpt_conversion_agent.transformation maxtext.experimental.agent.ckpt_conversion_agent.utils maxtext.experimental.agent.ckpt_conversion_agent.utils.utils maxtext.experimental.rl maxtext.experimental.rl.grpo_input_pipeline maxtext.input_pipeline maxtext.input_pipeline.data_processing_utils maxtext.input_pipeline.distillation_data_processing maxtext.input_pipeline.grain_data_processing maxtext.input_pipeline.grain_tokenizer maxtext.input_pipeline.hf_data_processing maxtext.input_pipeline.input_pipeline_interface maxtext.input_pipeline.input_pipeline_utils maxtext.input_pipeline.instruction_data_processing maxtext.input_pipeline.multihost_dataloading maxtext.input_pipeline.olmo_data maxtext.input_pipeline.olmo_data_grain maxtext.input_pipeline.olmo_grain_data_processing maxtext.input_pipeline.packing maxtext.input_pipeline.packing.sequence_packing maxtext.input_pipeline.protos maxtext.input_pipeline.protos.example_pb2 maxtext.input_pipeline.protos.feature_pb2 maxtext.input_pipeline.synthetic_data_processing maxtext.input_pipeline.tfds_data_processing maxtext.input_pipeline.tfds_data_processing_c4_mlperf maxtext.input_pipeline.tokenizer maxtext.integration maxtext.integration.tunix maxtext.integration.tunix.tunix_adapter maxtext.integration.tunix.utils maxtext.integration.tunix.weight_mapping maxtext.integration.tunix.weight_mapping.deepseek3 maxtext.integration.tunix.weight_mapping.gpt_oss maxtext.integration.tunix.weight_mapping.llama3 maxtext.integration.tunix.weight_mapping.qwen2 maxtext.integration.tunix.weight_mapping.qwen3 maxtext.integration.vllm maxtext.integration.vllm.maxtext_vllm_adapter maxtext.integration.vllm.maxtext_vllm_adapter.adapter maxtext.integration.vllm.setup maxtext.integration.vllm.torchax_converter maxtext.integration.vllm.torchax_converter.base maxtext.integration.vllm.torchax_converter.gemma4_moe maxtext.integration.vllm.torchax_converter.qwen3_moe maxtext.kernels maxtext.kernels.attention maxtext.kernels.attention.jax_flash_attention maxtext.kernels.attention.ragged_attention maxtext.kernels.attention.splash_attention_kernel maxtext.kernels.gather_reduce_pallas maxtext.kernels.gather_reduce_sc maxtext.kernels.megablox maxtext.kernels.megablox.backend maxtext.kernels.megablox.common maxtext.kernels.megablox.ops maxtext.kernels.sort_activations maxtext.layers maxtext.layers.attention_mla maxtext.layers.attention_op maxtext.layers.attentions maxtext.layers.decoders maxtext.layers.embeddings maxtext.layers.encoders maxtext.layers.engram maxtext.layers.initializers maxtext.layers.learn_to_init_layer maxtext.layers.linears maxtext.layers.mhc maxtext.layers.moe maxtext.layers.multi_token_prediction maxtext.layers.nnx_decoders maxtext.layers.nnx_wrappers maxtext.layers.normalizations maxtext.layers.pipeline maxtext.layers.pipeline_deprecated maxtext.layers.quantizations maxtext.layers.train_state_nnx maxtext.models maxtext.models.deepseek maxtext.models.deepseek_batchsplit maxtext.models.deepseek_batchsplit_fp8 maxtext.models.gemma maxtext.models.gemma2 maxtext.models.gemma3 maxtext.models.gemma4 maxtext.models.gemma4_vision maxtext.models.gpt3 maxtext.models.gpt_oss maxtext.models.llama2 maxtext.models.llama4 maxtext.models.mistral maxtext.models.mixtral maxtext.models.models maxtext.models.olmo3 maxtext.models.qwen2 maxtext.models.qwen3 maxtext.models.qwen3_5 maxtext.models.qwen3_custom maxtext.models.simple_layer maxtext.multimodal maxtext.multimodal.processor maxtext.multimodal.processor_gemma3 maxtext.multimodal.processor_gemma4 maxtext.multimodal.processor_llama4 maxtext.multimodal.processor_qwen3_omni maxtext.multimodal.utils moe() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) moe_block (maxtext.models.llama4.Llama4DecoderLayer property) moe_bwd() (in module maxtext.models.deepseek_batchsplit) moe_expert_input_dim (maxtext.configs.types.MoEGeneral attribute) moe_fsdp_use_two_stage_all_gather (maxtext.configs.types.MoEGeneral attribute) moe_mlp_dim (maxtext.configs.types.DerivedValues attribute) moe_mlpwi_0 (maxtext.configs.types.RematAndOffload attribute) moe_mlpwi_1 (maxtext.configs.types.RematAndOffload attribute) moe_mlpwo (maxtext.configs.types.RematAndOffload attribute) MoEGeneral (class in maxtext.configs.types) MoEKernels (class in maxtext.configs.types) monitor_goodput (maxtext.configs.types.Goodput attribute) monitor_step_time_deviation (maxtext.configs.types.Goodput attribute) mrope_section (maxtext.configs.types.MultimodalGeneral attribute) mscale (maxtext.configs.types.YarnRope attribute) mtc_data_parallelism (maxtext.configs.types.EmergencyCheckpointing attribute) MTP (class in maxtext.configs.types) mtp_acceptance (class in maxtext.layers.multi_token_prediction) mtp_eval_target_module (maxtext.configs.types.MTP attribute) mtp_loss_scaling_factor (maxtext.configs.types.MTP attribute) mtp_losses (class in maxtext.layers.multi_token_prediction) mtp_num_layers (maxtext.configs.types.MTP attribute) mu_dtype (maxtext.configs.types.AdamW attribute) multi_sampling (maxtext.configs.types.InferenceGeneral attribute) multi_tier_checkpointing_backup_interval_minutes (maxtext.configs.types.EmergencyCheckpointing attribute) multi_token_prediction_block_as_linen() (in module maxtext.layers.multi_token_prediction) MultiHeadEmbedding (class in maxtext.layers.engram) MultiHostDataLoadIterator (class in maxtext.input_pipeline.multihost_dataloading) MultimodalGeneral (class in maxtext.configs.types) MultiTokenPredictionBlock (class in maxtext.layers.multi_token_prediction) MultiTokenPredictionLayer (class in maxtext.layers.multi_token_prediction) Muon (class in maxtext.configs.types) MUON (maxtext.configs.types.OptimizerType attribute) muon_beta (maxtext.configs.types.Muon attribute) muon_consistent_rms (maxtext.configs.types.Muon attribute) muon_weight_decay (maxtext.configs.types.Muon attribute) N n_instances (maxtext.input_pipeline.olmo_data.OlmoNpyFileEntry attribute) n_routing_groups (maxtext.configs.types.DeepSeekMoE attribute) n_tokens (maxtext.input_pipeline.olmo_data.OlmoNpyFileEntry attribute) n_window_for_audio (maxtext.configs.types.AudioEncoder attribute) n_window_infer_for_audio (maxtext.configs.types.AudioEncoder attribute) naive_jax_chunk_gated_delta_rule() (in module maxtext.models.qwen3) name (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.layers.nnx_wrappers.ToLinen attribute) (maxtext.layers.pipeline.CircularPipeline attribute) (maxtext.layers.pipeline.Pipeline attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) (maxtext.layers.quantizations.Fp8Einsum attribute) (maxtext.layers.quantizations.QwixDotGeneral attribute) (maxtext.layers.quantizations.QwixEinsum attribute) (maxtext.models.models.TransformerLinen attribute) (maxtext.models.models.TransformerLinenPure attribute) NANOO_FP8 (maxtext.configs.types.QuantizationType attribute) NANOOFp8Provider (class in maxtext.layers.quantizations) NANOOFp8Quantization (class in maxtext.layers.quantizations) nbytes (maxtext.checkpoint_conversion.to_maxtext.LazyTensor property) nd_dense_init() (in module maxtext.layers.initializers) need_circ_storage() (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) next_power_of_two() (in module maxtext.integration.vllm.maxtext_vllm_adapter.adapter) NgramFilterTransform (class in maxtext.input_pipeline.olmo_data_grain) NgramHashMapping (class in maxtext.layers.engram) nnx_attrs_to_linen_vars() (in module maxtext.layers.nnx_wrappers) nnx_class (maxtext.layers.nnx_wrappers.ToLinen attribute) NNXDecoder (class in maxtext.layers.nnx_decoders) NNXDecoderLayer (class in maxtext.layers.nnx_decoders) no_op() (maxtext.models.models.Transformer method) NONE (maxtext.configs.types.KvQuantAxis attribute) (maxtext.configs.types.ProfilerType attribute) (maxtext.configs.types.QuantizationType attribute) nope_layer_interval (maxtext.configs.types.PositionalEmbedding attribute) norm_topk_prob (maxtext.configs.types.MoEGeneral attribute) normalization_layer_epsilon (maxtext.configs.types.ModelArchitecture attribute) normalize_attention() (maxtext.layers.attention_op.AttentionOp method) normalize_axes() (in module maxtext.layers.linears) normalize_cudnn_attention() (maxtext.layers.attention_op.AttentionOp method) normalize_embedding_logits (maxtext.configs.types.Logits attribute) normalize_features() (in module maxtext.input_pipeline.input_pipeline_utils) normalize_images() (in module maxtext.multimodal.utils) NormalizeFeatures (class in maxtext.input_pipeline.input_pipeline_utils) NSYS (maxtext.configs.types.ProfilerType attribute) NUCLEUS (maxtext.configs.types.SamplingStrategy attribute) num_attention_heads (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) num_attention_heads_for_vit (maxtext.configs.types.VisionTower attribute) num_audios (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) num_batches (maxtext.configs.types.RLDataset attribute) num_channels_for_vit (maxtext.configs.types.VisionTower attribute) num_conv_layers_for_audio (maxtext.configs.types.AudioEncoder attribute) num_decoder_layers (maxtext.configs.types.DerivedValues attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) num_diloco_replicas (maxtext.configs.types.DerivedValues attribute) num_epoch (maxtext.configs.types.DatasetGeneral attribute) num_eval_passes (maxtext.configs.types.RLEvaluation attribute) num_experts (maxtext.configs.types.MoEGeneral attribute) num_experts_per_tok (maxtext.configs.types.MoEGeneral attribute) num_features (maxtext.layers.normalizations.Qwen3NextRMSNormGated attribute) num_generations (maxtext.configs.types.RL attribute) num_heads (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) num_hidden_layers_for_vit (maxtext.configs.types.VisionTower attribute) num_images (maxtext.multimodal.processor_gemma3.Gemma3PreprocessorOutput attribute) (maxtext.multimodal.processor_gemma4.Gemma4PreprocessorOutput attribute) (maxtext.multimodal.processor_llama4.Llama4PreprocessorOutput attribute) (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) num_instances (maxtext.input_pipeline.olmo_data_grain.OlmoIndexSampler property) num_iterations (maxtext.configs.types.RL attribute) num_kv_heads (maxtext.configs.types.DerivedValues attribute) (maxtext.layers.attentions.Attention attribute) num_layers_per_pipeline_stage (maxtext.configs.types.PipelineParallelism attribute) num_local_instances_per_epoch (maxtext.input_pipeline.olmo_data_grain.OlmoIndexSampler property) num_mel_bins_for_audio (maxtext.configs.types.AudioEncoder attribute) num_new_token (maxtext.layers.engram.CompressedTokenizer attribute) num_of_layers (maxtext.models.gpt_oss.GptOssScannableBlock attribute) (maxtext.models.olmo3.Olmo3ScannableBlock attribute) num_pipeline_microbatches (maxtext.configs.types.PipelineParallelism attribute) num_pipeline_repeats (maxtext.configs.types.PipelineParallelism attribute) num_position_embeddings (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) num_position_embeddings_for_vit (maxtext.configs.types.VisionTower attribute) num_query_heads (maxtext.configs.types.DerivedValues attribute) (maxtext.layers.attentions.Attention attribute) num_reserved_special_tokens (maxtext.input_pipeline.tokenizer.TikTokenTokenizer attribute) num_samplers_slices (maxtext.configs.types.RLHardware attribute) num_slices (maxtext.configs.types.HardwareAndMesh attribute) NUM_SLOTS (maxtext.integration.vllm.torchax_converter.gemma4_moe.Gemma4MaxTextToVLLMConverter attribute) num_target_devices (maxtext.configs.types.DerivedValues attribute) num_test_batches (maxtext.configs.types.RLDataset attribute) num_trainer_slices (maxtext.configs.types.RLHardware attribute) num_videos (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) num_vocab_tiling (maxtext.configs.types.Tokenizer attribute) NvidaFp8Provider (class in maxtext.layers.quantizations) O off_policy_steps (maxtext.configs.types.RL attribute) offline_data_dir (maxtext.configs.types.Distillation attribute) OFFLOAD (maxtext.configs.types.RematLocation attribute) offset (maxtext.layers.attention_op.LoadBalancedCausalMask attribute), [1] OLMO3_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) OLMO3_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) Olmo3DecoderLayer (class in maxtext.models.olmo3) Olmo3ScannableBlock (class in maxtext.models.olmo3) olmo_apply_ngram_filter (maxtext.configs.types.OlmoGrainDataset attribute) OLMO_GRAIN (maxtext.configs.types.DatasetType attribute) olmo_index_path (maxtext.configs.types.OlmoGrainDataset attribute) olmo_path_remap_from (maxtext.configs.types.OlmoGrainDataset attribute) olmo_path_remap_to (maxtext.configs.types.OlmoGrainDataset attribute) OlmoGrainDataset (class in maxtext.configs.types) OlmoIndexSampler (class in maxtext.input_pipeline.olmo_data_grain) OlmoNpyDataSource (class in maxtext.input_pipeline.olmo_data_grain) OlmoNpyFileEntry (class in maxtext.input_pipeline.olmo_data) OlmoNpyIndex (class in maxtext.input_pipeline.olmo_data) opt_type (maxtext.configs.types.Optimizer attribute) optimize_mesh_for_tpu_v6e (maxtext.configs.types.HardwareAndMesh attribute) Optimizer (class in maxtext.configs.types) optimizer_memory_host_offload (maxtext.configs.types.RematAndOffload attribute) OptimizerType (class in maxtext.configs.types) OrbaxStorage (class in maxtext.configs.types) original_max_position_embeddings (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) out_head_dim (maxtext.layers.attention_mla.MLA property) (maxtext.layers.attentions.Attention property) out_hidden_size_for_vit (maxtext.configs.types.VisionTower attribute) out_proj (maxtext.configs.types.RematAndOffload attribute) out_projection() (maxtext.layers.attentions.Attention method) output_dim_for_audio (maxtext.configs.types.AudioEncoder attribute) output_length (maxtext.models.gemma3.VisionExit attribute) override_logical_axis_rules (maxtext.configs.types.RunInfo attribute) override_model_config (maxtext.configs.types.RunInfo attribute) P pack_dataset() (in module maxtext.input_pipeline.packing.sequence_packing) packing (maxtext.configs.types.DatasetGeneral attribute) pad_to_best_fit_jax() (in module maxtext.multimodal.processor_llama4) pad_to_max_tiles() (in module maxtext.multimodal.processor_llama4) padded_base_moe_mlp_dim (maxtext.configs.types.MoEGeneral attribute) PadOrTrimToMaxLength (class in maxtext.input_pipeline.input_pipeline_utils) PagedAttention (class in maxtext.configs.types) pagedattn_head_dim_alignment (maxtext.configs.types.PagedAttention attribute) pagedattn_max_pages_per_group (maxtext.configs.types.PagedAttention attribute) pagedattn_num_pages (maxtext.configs.types.PagedAttention attribute) pagedattn_pages_per_compute_block (maxtext.configs.types.PagedAttention attribute) pagedattn_tokens_per_page (maxtext.configs.types.PagedAttention attribute) param_scan_axis (maxtext.configs.types.HardwareAndMesh attribute) parameter_memory_host_offload (maxtext.configs.types.RematAndOffload attribute) parent (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.layers.nnx_wrappers.ToLinen attribute) (maxtext.layers.pipeline.CircularPipeline attribute) (maxtext.layers.pipeline.Pipeline attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) (maxtext.layers.quantizations.Fp8Einsum attribute) (maxtext.layers.quantizations.QwixDotGeneral attribute) (maxtext.layers.quantizations.QwixEinsum attribute) (maxtext.models.models.TransformerLinen attribute) (maxtext.models.models.TransformerLinenPure attribute) parse_and_keep_features() (in module maxtext.input_pipeline.data_processing_utils) parse_npy_header() (in module maxtext.input_pipeline.olmo_data) ParseFeatures (class in maxtext.input_pipeline.input_pipeline_utils) partial_rotary_embedding_as_linen() (in module maxtext.layers.embeddings) partial_rotary_factor (maxtext.configs.types.Qwen3Next attribute) PartialRotaryEmbedding (class in maxtext.layers.embeddings) pat_str (maxtext.input_pipeline.tokenizer.TikTokenTokenizer attribute) patch_embed (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) patch_size (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) patch_size_for_vit (maxtext.configs.types.VisionTower attribute) patchify() (in module maxtext.models.gemma4_vision) path (maxtext.input_pipeline.olmo_data.OlmoNpyFileEntry attribute) penalty_incorrect_answer (maxtext.configs.types.Reward attribute) penalty_incorrect_format (maxtext.configs.types.Reward attribute) per_device_batch_size (maxtext.configs.types.DatasetGeneral attribute) per_device_batch_size_increment (maxtext.configs.types.DatasetGeneral attribute) per_device_batch_size_start (maxtext.configs.types.DatasetGeneral attribute) period (maxtext.input_pipeline.olmo_data.RepetitionTuple attribute) permute() (maxtext.layers.moe.RoutedMoE method) permute_output_micro_per_stage_dim() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) PerTensorScales (class in maxtext.layers.quantizations) Pipeline (class in maxtext.layers.pipeline) (class in maxtext.layers.pipeline_deprecated) pipeline_delay_activation_forwarding (maxtext.configs.types.PipelineParallelism attribute) pipeline_fsdp_ag_once (maxtext.configs.types.PipelineParallelism attribute) pipeline_fsdp_ag_per_repeat (maxtext.configs.types.PipelineParallelism attribute) pipeline_parallel_layers (maxtext.configs.types.PipelineParallelism attribute) PipelineBase (class in maxtext.layers.pipeline) PipelineParallelism (class in maxtext.configs.types) pixel_grid_thw (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) pixel_mask (maxtext.multimodal.processor_gemma3.Gemma3PreprocessorOutput attribute) (maxtext.multimodal.processor_gemma4.Gemma4PreprocessorOutput attribute) (maxtext.multimodal.processor_llama4.Llama4PreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) pixel_shuffle() (in module maxtext.models.llama4) pixel_shuffle_ratio_for_vit (maxtext.configs.types.VisionProjector attribute) pixel_values (maxtext.multimodal.processor_gemma3.Gemma3PreprocessorOutput attribute) (maxtext.multimodal.processor_gemma4.Gemma4PreprocessorOutput attribute) (maxtext.multimodal.processor_llama4.Llama4PreprocessorOutput attribute) (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) (maxtext.multimodal.utils.PreprocessorOutput attribute) PlaceHolderDataIterator (class in maxtext.input_pipeline.synthetic_data_processing) plan_conversion() (maxtext.experimental.agent.ckpt_conversion_agent.plan.PlanAgent method) PlanAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.plan) pos_embed_interpolate (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) posemb_type_for_vit (maxtext.configs.types.MultimodalGeneral attribute) position_id_per_seconds (maxtext.configs.types.MultimodalGeneral attribute) positional_embedding_as_linen() (in module maxtext.layers.embeddings) PositionalEmbedding (class in maxtext.configs.types) (class in maxtext.layers.embeddings) positions_xy (maxtext.multimodal.processor_gemma4.Gemma4PreprocessorOutput attribute) post_attention_norm_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) post_process() (maxtext.models.deepseek.DeepSeekGenericLayer method) power_to_db() (in module maxtext.multimodal.utils) POWER_TRACE_NONE (maxtext.configs.types.XProfTPUPowerTraceMode attribute) POWER_TRACE_NORMAL (maxtext.configs.types.XProfTPUPowerTraceMode attribute) POWER_TRACE_SPI (maxtext.configs.types.XProfTPUPowerTraceMode attribute) pre_attention_norm_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) pre_process_audio_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) pre_process_image_sft() (in module maxtext.input_pipeline.input_pipeline_utils) pre_process_qwen3_image() (in module maxtext.multimodal.processor_qwen3_omni) prefill_cache_axis_order (maxtext.configs.types.InferenceLayout attribute) prefill_cache_dir (maxtext.configs.types.InferenceGeneral attribute) prefill_chunk_size (maxtext.configs.types.InferenceGeneral attribute) prefill_slice (maxtext.configs.types.InferenceServer attribute) prefix_caching_dram_byte (maxtext.configs.types.PrefixCaching attribute) prefix_caching_hbm_byte (maxtext.configs.types.PrefixCaching attribute) PrefixCaching (class in maxtext.configs.types) prefuse_moe_weights (maxtext.configs.types.MoEGeneral attribute) prepare_text_for_image_fusion() (in module maxtext.input_pipeline.input_pipeline_utils) (in module maxtext.multimodal.processor) preprocess_eval_dataset() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) preprocess_image_for_training() (in module maxtext.multimodal.processor) preprocess_mm_data() (in module maxtext.multimodal.processor) preprocess_mm_data_gemma3() (in module maxtext.multimodal.processor_gemma3) preprocess_mm_data_gemma4() (in module maxtext.multimodal.processor_gemma4) preprocess_mm_data_llama4() (in module maxtext.multimodal.processor_llama4) preprocess_mm_data_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) preprocess_train_dataset() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) preprocess_video() (in module maxtext.multimodal.processor_qwen3_omni) preprocessing_pipeline() (in module maxtext.experimental.rl.grpo_input_pipeline) (in module maxtext.input_pipeline.hf_data_processing) (in module maxtext.input_pipeline.tfds_data_processing) PreprocessorOutput (class in maxtext.multimodal.utils) pretrain_preprocessing_pipeline() (in module maxtext.input_pipeline.grain_data_processing) print_peak_memory() (in module maxtext.checkpoint_conversion.utils.utils) print_ram_usage() (in module maxtext.checkpoint_conversion.utils.utils) process_activations() (in module maxtext.models.deepseek_batchsplit_fp8) process_dataset() (in module maxtext.input_pipeline.distillation_data_processing) process_maxtext_param() (in module maxtext.checkpoint_conversion.utils.utils) profile_cleanly (maxtext.configs.types.Profiling attribute) profile_periodically_period (maxtext.configs.types.Profiling attribute) profile_power_events (maxtext.configs.types.Profiling attribute) profiler (maxtext.configs.types.Profiling attribute) profiler_steps (maxtext.configs.types.Profiling attribute) ProfilerType (class in maxtext.configs.types) Profiling (class in maxtext.configs.types) proj (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) projection() (maxtext.models.gpt3.Gpt3MultiHeadAttention method) projection_layer (maxtext.layers.multi_token_prediction.MultiTokenPredictionLayer property) projector_dropout_for_vit (maxtext.configs.types.VisionProjector attribute) projector_input_dim_for_vit (maxtext.configs.types.VisionProjector attribute) projector_output_dim_for_vit (maxtext.configs.types.VisionProjector attribute) prometheus_port (maxtext.configs.types.Metrics attribute) prompt (maxtext.configs.types.InferenceGeneral attribute) (maxtext.input_pipeline.distillation_data_processing.InputRequest attribute) prompt_chaining_agent (class in maxtext.experimental.agent.ckpt_conversion_agent.prompt_chain) prompt_token_ids (maxtext.input_pipeline.distillation_data_processing.InputRequest attribute) pure_nnx (maxtext.configs.types.HardwareAndMesh attribute) pure_nnx_decoder (maxtext.configs.types.HardwareAndMesh attribute) Q q (maxtext.kernels.attention.splash_attention_kernel.SegmentIds attribute) q_layout (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) q_lora_rank (maxtext.configs.types.MlaAttention attribute) qk_clip_threshold (maxtext.configs.types.Attention attribute) qk_nope_head_dim (maxtext.configs.types.MlaAttention attribute) qk_norm_with_scale (maxtext.configs.types.ModelArchitecture attribute) qk_product() (maxtext.layers.attention_op.AttentionOp method) qk_rope_head_dim (maxtext.configs.types.MlaAttention attribute) qkv_proj (maxtext.configs.types.RematAndOffload attribute) QKV_PROJ_OFFLOADED (maxtext.configs.types.RematPolicy attribute) qkv_projection() (maxtext.layers.attentions.Attention method) (maxtext.models.gpt3.Gpt3MultiHeadAttention method) QKVLayout (class in maxtext.kernels.attention.splash_attention_kernel) quant (maxtext.layers.attentions.Attention attribute) (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.models.gemma.GemmaDecoderLayer attribute) (maxtext.models.gemma2.Gemma2DecoderLayer attribute) (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) (maxtext.models.gpt_oss.GptOssScannableBlock attribute) (maxtext.models.models.TransformerLinenPure attribute) (maxtext.models.olmo3.Olmo3ScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextDecoderLayer attribute) (maxtext.models.qwen3.Qwen3NextFullAttention attribute) (maxtext.models.qwen3.Qwen3NextScannableBlock attribute) (maxtext.models.qwen3.Qwen3NextSparseMoeBlock attribute) (maxtext.models.qwen3_5.Qwen3_5DecoderLayer attribute) quant_cfg_path (maxtext.configs.types.Quantization attribute) quant_dg (maxtext.layers.quantizations.AqtQuantization attribute) quant_dot_general (maxtext.layers.linears.DenseGeneral property) (maxtext.layers.moe.GateLogit property) quant_mode (maxtext.layers.quantizations.AqtQuantization attribute) (maxtext.layers.quantizations.Fp8Quantization attribute) (maxtext.layers.quantizations.NANOOFp8Quantization attribute) (maxtext.layers.quantizations.QwixQuantization attribute) Quantization (class in maxtext.configs.types) (class in maxtext.layers.quantizations) quantization (maxtext.configs.types.Quantization attribute) quantization_local_shard_count (maxtext.configs.types.Quantization attribute) QuantizationType (class in maxtext.configs.types) quantize_kvcache (maxtext.configs.types.Quantization attribute) query_proj (maxtext.configs.types.RematAndOffload attribute) query_projection() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) (maxtext.layers.attentions.Attention method) query_wa_proj (maxtext.configs.types.RematAndOffload attribute) QWEN2_VLLM_MAPPING (class in maxtext.integration.tunix.weight_mapping.qwen2) Qwen2DecoderLayer (class in maxtext.models.qwen2) Qwen3_5DecoderLayer (class in maxtext.models.qwen3_5) Qwen3_5FullAttention (class in maxtext.models.qwen3_5) Qwen3_5GatedDeltaNet (class in maxtext.models.qwen3_5) Qwen3_5ScannableBlock (class in maxtext.models.qwen3_5) Qwen3_5SparseMoEBlock (class in maxtext.models.qwen3_5) QWEN3_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.experimental.agent.ckpt_conversion_agent.ground_truth.qwen3) QWEN3_NEXT_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) QWEN3_NEXT_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) QWEN3_NEXT_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) QWEN3_NNX_TO_VLLM_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) QWEN3_OMNI_MOE_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) QWEN3_OMNI_MOE_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) qwen3_omni_mrope_embedding_as_linen() (in module maxtext.layers.embeddings) QWEN3_VLLM_MAPPING (class in maxtext.integration.tunix.weight_mapping.qwen3) Qwen3CustomAttention (class in maxtext.models.qwen3_custom) Qwen3CustomMoeDecoderLayer (class in maxtext.models.qwen3_custom) Qwen3DecoderLayer (class in maxtext.models.qwen3) Qwen3MaxTextToVLLMConverter (class in maxtext.integration.vllm.torchax_converter.qwen3_moe) Qwen3MoeDecoderLayer (class in maxtext.models.qwen3) Qwen3Next (class in maxtext.configs.types) Qwen3NextDecoderLayer (class in maxtext.models.qwen3) Qwen3NextFullAttention (class in maxtext.models.qwen3) Qwen3NextGatedDeltaNet (class in maxtext.models.qwen3) Qwen3NextRMSNorm() (in module maxtext.layers.normalizations) Qwen3NextRMSNormGated (class in maxtext.layers.normalizations) Qwen3NextScannableBlock (class in maxtext.models.qwen3) Qwen3NextSparseMoeBlock (class in maxtext.models.qwen3) qwen3omni_audioencoder_as_linen() (in module maxtext.models.qwen3) qwen3omni_audioprojector_as_linen() (in module maxtext.models.qwen3) qwen3omni_visionencoder_as_linen() (in module maxtext.models.qwen3) qwen3omni_visionprojector_as_linen() (in module maxtext.models.qwen3) Qwen3OmniAudioEncoder (class in maxtext.models.qwen3) Qwen3OmniAudioEncoderLayer (class in maxtext.models.qwen3) Qwen3OmniAudioProjector (class in maxtext.models.qwen3) qwen3omnimoe_vision_pos_embed_interpolate_as_linen() (in module maxtext.layers.embeddings) Qwen3OmniMoeThinkerTextRotaryEmbedding (class in maxtext.layers.embeddings) Qwen3OmniMoeVisionAttention (class in maxtext.models.qwen3) Qwen3OmniMoeVisionBlock (class in maxtext.models.qwen3) Qwen3OmniMoeVisionEncoder (class in maxtext.models.qwen3) Qwen3OmniMoeVisionMLP (class in maxtext.models.qwen3) Qwen3OmniMoeVisionPatchEmbed (class in maxtext.models.qwen3) Qwen3OmniMoeVisionPatchMerger (class in maxtext.models.qwen3) Qwen3OmniMoeVisionPosEmbedInterpolate (class in maxtext.layers.embeddings) Qwen3OmniMoeVisionProjector (class in maxtext.models.qwen3) Qwen3OmniMoeVisionRotaryEmbedding (class in maxtext.layers.embeddings) Qwen3OmniPreprocessorOutput (class in maxtext.multimodal.processor_qwen3_omni) QWEN_HF_WEIGHTS_TO_SHAPE() (in module maxtext.checkpoint_conversion.utils.hf_shape) QWEN_MAXTEXT_TO_HF_PARAM_HOOK_FN() (in module maxtext.checkpoint_conversion.utils.param_mapping) QWEN_MAXTEXT_TO_HF_PARAM_MAPPING() (in module maxtext.checkpoint_conversion.utils.param_mapping) QwixDotGeneral (class in maxtext.layers.quantizations) QwixEinsum (class in maxtext.layers.quantizations) QwixQuantization (class in maxtext.layers.quantizations) R ragged_block_size (maxtext.configs.types.Attention attribute) ragged_buffer_factor (maxtext.configs.types.MoEGeneral attribute) ragged_flash_attention_kernel() (in module maxtext.kernels.attention.ragged_attention) ragged_gqa() (in module maxtext.kernels.attention.ragged_attention) ragged_mha() (in module maxtext.kernels.attention.ragged_attention) ragged_mqa() (in module maxtext.kernels.attention.ragged_attention) rampup_end_step (maxtext.configs.types.DerivedValues attribute) rampup_samples_per_increment_to_load (maxtext.configs.types.DerivedValues attribute) random_routing() (in module maxtext.layers.moe) raw_generate_synthetic_data() (maxtext.input_pipeline.synthetic_data_processing.SyntheticDataIterator static method) read_npy_header_from_path() (in module maxtext.input_pipeline.olmo_data) read_raw_metadata_from_path() (in module maxtext.input_pipeline.olmo_data) realign_output_microbatches() (maxtext.layers.pipeline.CircularPipeline method) reasoning_end_token (maxtext.configs.types.SpecialTokens attribute) reasoning_start_token (maxtext.configs.types.SpecialTokens attribute) record_internal_nn_metrics (maxtext.configs.types.Metrics attribute) reduce_concat_tokens() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) reduce_scatter_ws_grad() (in module maxtext.models.deepseek_batchsplit) reference_gqa() (in module maxtext.kernels.attention.ragged_attention) reference_mha() (in module maxtext.kernels.attention.ragged_attention) reference_mqa() (in module maxtext.kernels.attention.ragged_attention) reformat_prompt() (in module maxtext.input_pipeline.input_pipeline_utils) (in module maxtext.multimodal.processor) reformat_prompt_gemma3() (in module maxtext.multimodal.processor_gemma3) reformat_prompt_gemma4() (in module maxtext.multimodal.processor_gemma4) reformat_prompt_llama4() (in module maxtext.multimodal.processor_llama4) reformat_prompt_qwen3_omni() (in module maxtext.multimodal.processor_qwen3_omni) reformat_response() (in module maxtext.input_pipeline.input_pipeline_utils) (in module maxtext.multimodal.processor) ReformatPacking (class in maxtext.input_pipeline.input_pipeline_utils) register() (in module maxtext.integration.vllm.maxtext_vllm_adapter) Rekey (class in maxtext.input_pipeline.input_pipeline_utils) rekey() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) REMAT (maxtext.configs.types.RematLocation attribute) remat_policy (maxtext.configs.types.RematAndOffload attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) remat_policy_for_vit (maxtext.configs.types.RematAndOffload attribute) RematAndOffload (class in maxtext.configs.types) RematLocation (class in maxtext.configs.types) RematPolicy (class in maxtext.configs.types) RemoteIteratorWrapper (class in maxtext.input_pipeline.multihost_dataloading) remove_quantized_params() (in module maxtext.layers.quantizations) remove_size_one_mesh_axis_from_type (maxtext.configs.types.HardwareAndMesh attribute) RepetitionTuple (class in maxtext.input_pipeline.olmo_data) replicate_quant_scale (maxtext.configs.types.Quantization attribute) replicate_scale (maxtext.layers.quantizations.AqtQuantization attribute) replicator_backup_interval_minutes (maxtext.configs.types.EmergencyCheckpointing attribute) report_heartbeat_metric_for_gcp_monitoring (maxtext.configs.types.GcpMonitoring attribute) report_performance_metric_for_gcp_monitoring (maxtext.configs.types.GcpMonitoring attribute) res_mapping() (maxtext.layers.mhc.ManifoldConstrainedHyperConnections method) reset() (maxtext.experimental.rl.grpo_input_pipeline.SingleHostDataLoader method) (maxtext.input_pipeline.multihost_dataloading.MultiHostDataLoadIterator method) (maxtext.input_pipeline.multihost_dataloading.RemoteIteratorWrapper method) (maxtext.input_pipeline.synthetic_data_processing.PlaceHolderDataIterator method) (maxtext.input_pipeline.synthetic_data_processing.SyntheticDataIterator method) reshape_and_update_weights() (maxtext.layers.moe.RoutedMoE method) reshape_q (maxtext.configs.types.InferenceLayout attribute) reshard_chunk_size (maxtext.configs.types.RL attribute) resolve_config_path() (in module maxtext.configs.pyconfig_deprecated) restore_state() (maxtext.input_pipeline.multihost_dataloading.RemoteIteratorWrapper method) retrieve_quantized_weight() (maxtext.layers.moe.RoutedMoE method) return_log_prob (maxtext.configs.types.InferenceGeneral attribute) reuse_example_batch (maxtext.configs.types.DatasetGeneral attribute) reverse_transepose() (maxtext.layers.attention_op.AttentionOp method) Reward (class in maxtext.configs.types) reward_exact_answer (maxtext.configs.types.Reward attribute) reward_exact_format_match (maxtext.configs.types.Reward attribute) reward_partial_format_match (maxtext.configs.types.Reward attribute) reward_ratio_guess_to_answer_high (maxtext.configs.types.Reward attribute) reward_ratio_guess_to_answer_low (maxtext.configs.types.Reward attribute) reward_white_space_format_match (maxtext.configs.types.Reward attribute) RL (class in maxtext.configs.types) rl (maxtext.configs.types.Debug attribute) (maxtext.configs.types.MaxTextConfig attribute) RLDataset (class in maxtext.configs.types) RLEvaluation (class in maxtext.configs.types) RLHardware (class in maxtext.configs.types) rms_norm() (in module maxtext.layers.normalizations) (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) RMSNorm (class in maxtext.layers.normalizations) rngs (maxtext.layers.attentions.L2Norm attribute) (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.PositionalEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) (maxtext.layers.learn_to_init_layer.LearnToInitDecoderLayer attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) roll_and_mask() (in module maxtext.layers.multi_token_prediction) rollout_data_parallelism (maxtext.configs.types.RLHardware attribute) rollout_expert_parallelism (maxtext.configs.types.RLHardware attribute) rollout_micro_batch_size (maxtext.configs.types.RLDataset attribute) rollout_tensor_parallelism (maxtext.configs.types.RLHardware attribute) Rope (class in maxtext.configs.types) rope_attention_scaling (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) rope_factor (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) rope_interleave (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) rope_linear_scaling_factor (maxtext.configs.types.Rope attribute) rope_max_timescale (maxtext.configs.types.Rope attribute) rope_min_timescale (maxtext.configs.types.Rope attribute) rope_theta (maxtext.layers.embeddings.LlamaVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) rope_theta_for_vit (maxtext.configs.types.VisionTower attribute) rope_truncate (maxtext.configs.types.YarnRope attribute) (maxtext.layers.embeddings.YarnRotaryEmbedding attribute) rope_type (maxtext.configs.types.Rope attribute) rope_use_scale (maxtext.configs.types.Rope attribute) RopeType (class in maxtext.configs.types) rotary_embedding_as_linen() (in module maxtext.layers.embeddings) RotaryEmbedding (class in maxtext.layers.embeddings) route() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) route_compute_unroute() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) route_compute_unroute_bwd() (in module maxtext.models.deepseek_batchsplit) route_impl_bwd() (in module maxtext.models.deepseek_batchsplit) route_impl_fwd() (in module maxtext.models.deepseek_batchsplit) routed_bias (maxtext.configs.types.DeepSeekMoE attribute) routed_bias_update_rate (maxtext.configs.types.DeepSeekMoE attribute) routed_moe (maxtext.layers.moe.RoutedAndSharedMoE property) routed_scaling_factor (maxtext.configs.types.DeepSeekMoE attribute) routed_score_func (maxtext.configs.types.DeepSeekMoE attribute) RoutedAndSharedMoE (class in maxtext.layers.moe) RoutedMoE (class in maxtext.layers.moe) run_chain() (maxtext.experimental.agent.ckpt_conversion_agent.prompt_chain.prompt_chaining_agent method) run_name (maxtext.configs.types.RunInfo attribute) run_one_iteration() (maxtext.layers.pipeline.CircularPipeline method) (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) RunInfo (class in maxtext.configs.types) S sa_block_kv (maxtext.configs.types.SplashAttention attribute) sa_block_kv_compute (maxtext.configs.types.SplashAttention attribute) sa_block_kv_dkv (maxtext.configs.types.SplashAttention attribute) sa_block_kv_dkv_compute (maxtext.configs.types.SplashAttention attribute) sa_block_kv_dq (maxtext.configs.types.SplashAttention attribute) sa_block_q (maxtext.configs.types.SplashAttention attribute) sa_block_q_dkv (maxtext.configs.types.SplashAttention attribute) sa_block_q_dq (maxtext.configs.types.SplashAttention attribute) sa_k_layout (maxtext.configs.types.SplashAttention attribute) sa_q_layout (maxtext.configs.types.SplashAttention attribute) sa_use_fused_bwd_kernel (maxtext.configs.types.SplashAttention attribute) sa_v_layout (maxtext.configs.types.SplashAttention attribute) sampler_devices_fraction (maxtext.configs.types.RLHardware attribute) SamplingStrategy (class in maxtext.configs.types) save() (maxtext.input_pipeline.olmo_data.OlmoNpyIndex method) save_checkpoint_on_completion (maxtext.configs.types.Checkpointing attribute) save_config_file() (in module maxtext.checkpoint_conversion.utils.utils) save_config_to_gcs (maxtext.configs.types.Metrics attribute) SAVE_DOT_EXCEPT_MLP (maxtext.configs.types.RematPolicy attribute) SAVE_DOT_EXCEPT_MLPWI (maxtext.configs.types.RematPolicy attribute) SAVE_DOT_WITH_CONTEXT_EXCEPT_MLP (maxtext.configs.types.RematPolicy attribute) save_index_file() (in module maxtext.checkpoint_conversion.utils.utils) save_model_files() (in module maxtext.checkpoint_conversion.utils.utils) SAVE_OUT_PROJ (maxtext.configs.types.RematPolicy attribute) SAVE_QKV_PROJ (maxtext.configs.types.RematPolicy attribute) save_quantized_params_path (maxtext.configs.types.Checkpointing attribute) save_safetensor_file() (in module maxtext.checkpoint_conversion.utils.utils) save_state() (maxtext.input_pipeline.multihost_dataloading.RemoteIteratorWrapper method) save_weight_files() (in module maxtext.checkpoint_conversion.utils.utils) sc_gather_reduce() (in module maxtext.kernels.gather_reduce_pallas) (in module maxtext.kernels.gather_reduce_sc) scale_embedding_for_audio (maxtext.configs.types.AudioEncoder attribute) scan_batch_split_layers() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) scan_decoder_layers() (maxtext.layers.decoders.Decoder method) scan_layers (maxtext.configs.types.HardwareAndMesh attribute) scan_layers_per_stage (maxtext.configs.types.PipelineParallelism attribute) scan_pipeline_iterations (maxtext.configs.types.PipelineParallelism attribute) scan_pipeline_repeats (maxtext.configs.types.PipelineParallelism attribute) scheduling_group() (in module maxtext.models.deepseek_batchsplit) scope (maxtext.layers.decoders.Decoder attribute) (maxtext.layers.decoders.DecoderLayer attribute) (maxtext.layers.decoders.SequentialBlockDecoderLayers attribute) (maxtext.layers.nnx_wrappers.ToLinen attribute) (maxtext.layers.pipeline.CircularPipeline attribute) (maxtext.layers.pipeline.Pipeline attribute) (maxtext.layers.pipeline.PipelineBase attribute) (maxtext.layers.pipeline_deprecated.Pipeline attribute) (maxtext.layers.quantizations.Fp8Einsum attribute) (maxtext.layers.quantizations.QwixDotGeneral attribute) (maxtext.layers.quantizations.QwixEinsum attribute) (maxtext.models.models.TransformerLinen attribute) (maxtext.models.models.TransformerLinenPure attribute) SegmentIds (class in maxtext.kernels.attention.splash_attention_kernel) select_input_dtype() (in module maxtext.kernels.megablox.common) self_attention_module_name (maxtext.layers.learn_to_init_layer.LearnToInitDecoderLayer attribute) self_attention_with_norm_op() (maxtext.models.deepseek.DeepSeekGenericLayer method) SENTENCEPIECE (maxtext.configs.types.TokenizerType attribute) SentencePieceTokenizer (class in maxtext.input_pipeline.tokenizer) SEQ_MINOR (maxtext.kernels.attention.splash_attention_kernel.QKVLayout attribute) sequence_length (maxtext.input_pipeline.grain_tokenizer.TokenizerTransformBase attribute) (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) SequenceExample (class in maxtext.input_pipeline.protos.example_pb2) SequentialBlockDecoderLayers (class in maxtext.layers.decoders) serialize() (maxtext.checkpoint_conversion.to_maxtext.LazyTensorHandler method) set_and_validate_pipeline_config() (in module maxtext.configs.pyconfig_deprecated) set_derived_and_validate_values() (maxtext.configs.types.MaxTextConfig method) set_mu_dtype() (in module maxtext.configs.pyconfig_deprecated) set_remat_policy() (maxtext.layers.decoders.Decoder method) set_remat_policy_on_layers_per_stage (maxtext.configs.types.PipelineParallelism attribute) set_remat_policy_on_pipeline_iterations (maxtext.configs.types.PipelineParallelism attribute) setup() (maxtext.layers.decoders.Decoder method) (maxtext.layers.pipeline.PipelineBase method) (maxtext.layers.pipeline_deprecated.Pipeline method) (maxtext.layers.quantizations.Fp8Einsum method) (maxtext.models.models.TransformerLinenPure method) sft_preprocessing_pipeline() (in module maxtext.input_pipeline.grain_data_processing) sft_train_on_completion_only (maxtext.configs.types.FineTuning attribute) SFTPromptMasking (class in maxtext.input_pipeline.input_pipeline_utils) SFTPromptMaskingVision (class in maxtext.input_pipeline.input_pipeline_utils) SGD (maxtext.configs.types.OptimizerType attribute) shard_checkpoint() (in module maxtext.checkpoint_conversion.utils.utils) shard_dim_by_stages() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) shard_exp_on_fsdp (maxtext.configs.types.MoEGeneral attribute) shard_indices() (maxtext.input_pipeline.olmo_data_grain.OlmoIndexSampler method) shard_mode (maxtext.configs.types.HardwareAndMesh attribute) shard_optimizer_over_data (maxtext.configs.types.LayoutAndSharding attribute) sharding_strategy (maxtext.configs.types.RunInfo attribute) sharding_tolerance (maxtext.configs.types.LayoutAndSharding attribute) shardy (maxtext.configs.types.HardwareAndMesh attribute) share_kv_projections (maxtext.configs.types.Attention attribute) shared_expert() (in module maxtext.models.deepseek_batchsplit) shared_expert_and_route() (in module maxtext.models.deepseek_batchsplit) shared_experts (maxtext.configs.types.DeepSeekMoE attribute) shift_and_refine() (in module maxtext.input_pipeline.input_pipeline_utils) shift_data_by_truncation() (in module maxtext.input_pipeline.input_pipeline_utils) shift_left() (in module maxtext.input_pipeline.input_pipeline_utils) shift_right() (in module maxtext.input_pipeline.input_pipeline_utils) ShiftData (class in maxtext.input_pipeline.input_pipeline_utils) ShiftToInputsTargets (class in maxtext.input_pipeline.olmo_data_grain) ShortConv (class in maxtext.layers.engram) should_update_load_balance() (maxtext.layers.moe.RoutedMoE method) shuffled_global_indices() (maxtext.input_pipeline.olmo_data_grain.OlmoIndexSampler method) SimpleDecoderLayer (class in maxtext.models.simple_layer) SimpleMlpDecoderLayer (class in maxtext.models.simple_layer) SingleHostDataLoader (class in maxtext.experimental.rl.grpo_input_pipeline) sinkhorn() (in module maxtext.layers.mhc) sinkhorn_iterations (maxtext.configs.types.ManifoldConstrainedHyperConnections attribute) size (maxtext.checkpoint_conversion.to_maxtext.LazyTensor property) skip_first_n_steps_for_profiler (maxtext.configs.types.Profiling attribute) skip_jax_distributed_system (maxtext.configs.types.DevelopmentAndDebugging attribute) skip_rng (maxtext.layers.nnx_wrappers.ToLinen attribute) skip_step_interval (maxtext.configs.types.Optimizer attribute) skip_step_on_spikes (maxtext.configs.types.Optimizer attribute) skip_step_scaling_factor (maxtext.configs.types.Optimizer attribute) sliding_window_size (maxtext.configs.types.Attention attribute) smart_nframes() (in module maxtext.multimodal.processor_qwen3_omni) smart_resize() (in module maxtext.multimodal.processor_qwen3_omni) solution_end_token (maxtext.configs.types.SpecialTokens attribute) solution_start_token (maxtext.configs.types.SpecialTokens attribute) source_checkpoint_layout (maxtext.configs.types.Checkpointing attribute) sparse_matmul (maxtext.configs.types.MoEKernels attribute) sparse_matmul() (maxtext.layers.moe.RoutedMoE method) spatial_merge_size (maxtext.layers.embeddings.Qwen3OmniMoeVisionPosEmbedInterpolate attribute) (maxtext.layers.embeddings.Qwen3OmniMoeVisionRotaryEmbedding attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionEncoder attribute) spatial_merge_size_for_vit (maxtext.configs.types.VisionTower attribute) special_tokens (maxtext.input_pipeline.tokenizer.TikTokenTokenizer attribute) SpecialTokens (class in maxtext.configs.types) spectrogram() (in module maxtext.multimodal.utils) SplashAttention (class in maxtext.configs.types) SplashAttentionKernel (class in maxtext.kernels.attention.splash_attention_kernel) split() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) split_to_tiles() (in module maxtext.multimodal.processor_llama4) split_tokens() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) split_tokens_to_targets_length() (in module maxtext.input_pipeline.tfds_data_processing_c4_mlperf) stack_prefill_result_cache (maxtext.configs.types.InferenceLayout attribute) stack_trace_interval_seconds (maxtext.configs.types.StackTrace attribute) stack_trace_to_cloud (maxtext.configs.types.StackTrace attribute) StackTrace (class in maxtext.configs.types) staggered_call() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) StandaloneVllmWeightMapping (class in maxtext.integration.tunix.weight_mapping) start (maxtext.input_pipeline.olmo_data.RepetitionTuple attribute) StaticWrapper (class in maxtext.layers.engram) step_deviation_interval_seconds (maxtext.configs.types.Goodput attribute) steps (maxtext.configs.types.TrainingLoop attribute) stop_strings (maxtext.configs.types.VLLM attribute) string_to_bool() (in module maxtext.configs.pyconfig_deprecated) student_overrides (maxtext.configs.types.Distillation attribute) student_params_to_update (maxtext.configs.types.Distillation attribute) subslice_shape (maxtext.configs.types.DevelopmentAndDebugging attribute) sum_grads() (in module maxtext.models.deepseek_batchsplit) supports_bfloat16_matmul() (in module maxtext.kernels.megablox.common) swap_space_vllm_gb (maxtext.configs.types.VLLM attribute) SYNTHETIC (maxtext.configs.types.DatasetType attribute) SyntheticDataIterator (class in maxtext.input_pipeline.synthetic_data_processing) system_prompt (maxtext.configs.types.RL attribute) T target_eval_loss (maxtext.configs.types.TrainingLoop attribute) TE_FP8_CS (maxtext.configs.types.QuantizationType attribute) TE_FP8_DS (maxtext.configs.types.QuantizationType attribute) TE_MXFP8 (maxtext.configs.types.QuantizationType attribute) TE_NVFP4 (maxtext.configs.types.QuantizationType attribute) TE_NVFP4_NO_RHT (maxtext.configs.types.QuantizationType attribute) teacher_overrides (maxtext.configs.types.Distillation attribute) temperature_tuning (maxtext.configs.types.Llama4Attention attribute) temporal_patch_size (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) temporal_patch_size_for_vit (maxtext.configs.types.VisionTower attribute) TENSOR_A (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) TENSOR_B (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) TENSOR_C (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) TENSOR_W (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) Tensorboard (class in maxtext.configs.types) tensorboard_dir (maxtext.configs.types.DerivedValues attribute) tensors_on_device (maxtext.configs.types.DerivedValues attribute) tensors_to_offload (maxtext.configs.types.DerivedValues attribute) test_batch_start_index (maxtext.configs.types.RLDataset attribute) TFDS (maxtext.configs.types.DatasetType attribute) TfdsDataset (class in maxtext.configs.types) tgmm() (in module maxtext.kernels.megablox.backend) TIKTOKEN (maxtext.configs.types.TokenizerType attribute) TikTokenTokenizer (class in maxtext.input_pipeline.tokenizer) tile_size_for_vit (maxtext.configs.types.VisionTower attribute) timer() (in module maxtext.integration.vllm.torchax_converter.base) times (maxtext.input_pipeline.olmo_data.RepetitionTuple attribute) timescale (maxtext.layers.embeddings.Gemma4PartialRotaryEmbedding property) (maxtext.layers.embeddings.LLaMARotaryEmbedding property) (maxtext.layers.embeddings.RotaryEmbedding property) to_hf_hook_fns() (maxtext.integration.tunix.tunix_adapter.TunixMaxTextAdapter method) (maxtext.integration.tunix.utils.VllmWeightMapping method) (maxtext.integration.tunix.weight_mapping.deepseek3.DEEPSEEK_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.gpt_oss.GPT_OSS_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.llama3.LLAMA3_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen2.QWEN2_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen3.QWEN3_VLLM_MAPPING static method) to_hf_mapping() (maxtext.integration.tunix.utils.VllmWeightMapping method) (maxtext.integration.tunix.weight_mapping.deepseek3.DEEPSEEK_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.gpt_oss.GPT_OSS_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.llama3.LLAMA3_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen2.QWEN2_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen3.QWEN3_VLLM_MAPPING static method) to_hf_mappings() (maxtext.integration.tunix.tunix_adapter.TunixMaxTextAdapter method) to_hf_transpose_keys() (maxtext.integration.tunix.tunix_adapter.TunixMaxTextAdapter method) (maxtext.integration.tunix.utils.VllmWeightMapping method) (maxtext.integration.tunix.weight_mapping.deepseek3.DEEPSEEK_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.gpt_oss.GPT_OSS_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.llama3.LLAMA3_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen2.QWEN2_VLLM_MAPPING static method) (maxtext.integration.tunix.weight_mapping.qwen3.QWEN3_VLLM_MAPPING static method) to_json_dict() (maxtext.input_pipeline.olmo_data.OlmoNpyIndex method) to_linen() (in module maxtext.layers.nnx_wrappers) to_linen_class() (in module maxtext.layers.nnx_wrappers) to_linen_var() (in module maxtext.layers.nnx_wrappers) to_nnx_var() (in module maxtext.layers.nnx_wrappers) tokenization() (in module maxtext.input_pipeline.input_pipeline_utils) tokenize_eval_data (maxtext.configs.types.Tokenizer attribute) tokenize_train_data (maxtext.configs.types.Tokenizer attribute) TokenizeAndChunk (class in maxtext.input_pipeline.grain_tokenizer) TokenizeAndTrim (class in maxtext.input_pipeline.grain_tokenizer) TokenizeOp() (in module maxtext.input_pipeline.input_pipeline_utils) Tokenizer (class in maxtext.configs.types) tokenizer (maxtext.input_pipeline.grain_tokenizer.TokenizerTransformBase attribute) (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) tokenizer_path (maxtext.configs.types.Tokenizer attribute) tokenizer_type (maxtext.configs.types.Tokenizer attribute) TokenizerTransformBase (class in maxtext.input_pipeline.grain_tokenizer) TokenizerType (class in maxtext.configs.types) ToLinen (class in maxtext.layers.nnx_wrappers) ToNNX (class in maxtext.layers.nnx_wrappers) TOPK (maxtext.configs.types.SamplingStrategy attribute) topk_routing_group (maxtext.configs.types.DeepSeekMoE attribute) total_instances (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) total_tokens (maxtext.input_pipeline.olmo_data.OlmoNpyIndex attribute) tpu_flash_attention() (in module maxtext.models.deepseek_batchsplit) (maxtext.layers.attention_op.AttentionOp method) tpu_flash_attention_bwd() (in module maxtext.models.deepseek_batchsplit) tpu_generation() (in module maxtext.kernels.megablox.common) tpu_kind() (in module maxtext.kernels.megablox.common) tpu_num_chips_to_profile_per_task (maxtext.configs.types.Profiling attribute) tpu_num_sparse_core_tiles_to_trace (maxtext.configs.types.Profiling attribute) tpu_num_sparse_cores_to_trace (maxtext.configs.types.Profiling attribute) tpu_ragged_attention() (maxtext.layers.attention_op.AttentionOp method) train_data_columns (maxtext.configs.types.DatasetGeneral attribute) train_fraction (maxtext.configs.types.RLDataset attribute) train_image_column (maxtext.configs.types.DatasetGeneral attribute) train_micro_batch_size (maxtext.configs.types.RLDataset attribute) train_split (maxtext.configs.types.TfdsDataset attribute) trainable_parameters_mask (maxtext.configs.types.Optimizer attribute) trainable_position_size (maxtext.configs.types.PositionalEmbedding attribute) trainer_devices_fraction (maxtext.configs.types.RLHardware attribute) TrainingLoop (class in maxtext.configs.types) TrainStateNNX (class in maxtext.layers.train_state_nnx) transform_bias() (maxtext.layers.moe.RoutedMoE method) TransformationAgent (class in maxtext.experimental.agent.ckpt_conversion_agent.transformation) Transformer (class in maxtext.models.models) transformer_as_linen() (in module maxtext.models.models) transformer_layer (maxtext.layers.multi_token_prediction.MultiTokenPredictionLayer property) TransformerEngineQuantization (class in maxtext.layers.quantizations) TransformerLinen (class in maxtext.models.models) TransformerLinenPure (class in maxtext.models.models) tree_flatten() (maxtext.configs.pyconfig_deprecated.HyperParameters method) (maxtext.kernels.attention.splash_attention_kernel.SplashAttentionKernel method) tree_unflatten() (maxtext.configs.pyconfig_deprecated.HyperParameters class method) (maxtext.kernels.attention.splash_attention_kernel.SplashAttentionKernel class method) truncate_to_max_allowable_length() (in module maxtext.input_pipeline.input_pipeline_utils) TunixMaxTextAdapter (class in maxtext.integration.tunix.tunix_adapter) U unpermute() (maxtext.layers.moe.RoutedMoE method) unroute() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) unroute_impl_bwd() (in module maxtext.models.deepseek_batchsplit) unroute_impl_fwd() (in module maxtext.models.deepseek_batchsplit) unroute_ubatch_fn() (in module maxtext.models.deepseek_batchsplit) unroute_ubatch_fn_bwd() (in module maxtext.models.deepseek_batchsplit) unroute_ubatch_fn_remat() (in module maxtext.models.deepseek_batchsplit) unroute_ubatch_remat_and_bwd_shard_mapped() (in module maxtext.models.deepseek_batchsplit) unroute_ubatch_shard_mapped() (in module maxtext.models.deepseek_batchsplit) update_indexer_cache() (maxtext.layers.attention_mla.Indexer method) update_kv_caches() (maxtext.layers.attentions.Attention method) update_mla_kv_caches() (maxtext.layers.attention_mla.MLA method) update_model_keys() (in module maxtext.configs.pyconfig_deprecated) upload_all_profiler_results (maxtext.configs.types.Profiling attribute) upload_file_to_gcs() (in module maxtext.checkpoint_conversion.utils.utils) upload_folder_to_gcs() (in module maxtext.checkpoint_conversion.utils.utils) upload_state_dict_to_gcs() (in module maxtext.checkpoint_conversion.utils.utils) use_2d_fsdp_sharding (maxtext.configs.types.MoEGeneral attribute) use_agentic_rollout (maxtext.configs.types.RL attribute) use_audio (maxtext.configs.types.MultimodalGeneral attribute) use_audio_in_video (maxtext.configs.types.MultimodalGeneral attribute) use_batch_split_schedule (maxtext.configs.types.DeepSeekMoE attribute) use_bias (maxtext.models.gpt3.Gpt3MultiHeadAttention attribute) use_chat_template (maxtext.configs.types.Tokenizer attribute) use_chunked_prefill (maxtext.configs.types.InferenceGeneral attribute) use_custom_sort_vjp (maxtext.configs.types.MoEGeneral attribute) use_dpo (maxtext.configs.types.FineTuning attribute) use_fused_bwd_kernel (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) use_gather_mosaic_kernel (maxtext.configs.types.MoEGeneral attribute) use_grpo (maxtext.configs.types.FineTuning attribute) use_indexer (maxtext.configs.types.AttentionIndexer attribute) use_iota_embed (maxtext.configs.types.PositionalEmbedding attribute) use_jax_splash (maxtext.configs.types.Attention attribute) use_manual_quantization (maxtext.configs.types.Quantization attribute) use_max_logit_estimate (maxtext.configs.types.SplashAttention attribute) use_mrope (maxtext.configs.types.MultimodalGeneral attribute) use_multimodal (maxtext.configs.types.MultimodalGeneral attribute) use_pathways (maxtext.configs.types.RLHardware attribute) use_post_attn_norm (maxtext.configs.types.Attention attribute) use_post_ffw_norm (maxtext.configs.types.Attention attribute) use_postshuffle_norm (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) use_qk_clip (maxtext.configs.types.Attention attribute) use_qk_norm (maxtext.configs.types.Llama4Attention attribute) use_qk_norm_in_gdn (maxtext.configs.types.Qwen3Next attribute) use_qwix_quantization (maxtext.configs.types.Quantization attribute) use_ragged_attention (maxtext.configs.types.Attention attribute) use_random_routing (maxtext.configs.types.MoEGeneral attribute) use_replicator_service (maxtext.configs.types.EmergencyCheckpointing attribute) use_ring_of_experts (maxtext.configs.types.MoEGeneral attribute) use_sft (maxtext.configs.types.FineTuning attribute) use_splash_scheduler (maxtext.configs.types.SplashAttention attribute) use_standalone_converter (maxtext.configs.types.VLLM attribute) use_tokamax_gmm (maxtext.configs.types.Attention attribute) use_tokamax_splash (maxtext.configs.types.Attention attribute) use_truncation (maxtext.configs.types.Tokenizer attribute) use_tunix_gradient_accumulation (maxtext.configs.types.Optimizer attribute) use_untrainable_positional_embedding (maxtext.configs.types.PositionalEmbedding attribute) use_vertex_tensorboard (maxtext.configs.types.Tensorboard attribute) using_expert_parallelism() (in module maxtext.configs.pyconfig_deprecated) using_fsdp_and_transpose_parallelism() (in module maxtext.configs.pyconfig_deprecated) using_pipeline_parallelism (maxtext.configs.types.DerivedValues attribute) using_pipeline_parallelism() (in module maxtext.configs.pyconfig_deprecated) using_sequence_parallelism() (in module maxtext.configs.pyconfig_deprecated) using_tensor_parallelism() (in module maxtext.configs.pyconfig_deprecated) V v_head_dim (maxtext.configs.types.MlaAttention attribute) v_layout (maxtext.kernels.attention.splash_attention_kernel.BlockSizes attribute) v_norm_with_scale (maxtext.configs.types.ModelArchitecture attribute) validate_and_assign_remat_tensors() (in module maxtext.configs.pyconfig_deprecated) validate_and_configure_sft_columns() (in module maxtext.input_pipeline.data_processing_utils) validate_and_filter_param_map_keys() (in module maxtext.checkpoint_conversion.utils.utils) validate_and_set_hlo_dump_defaults() (in module maxtext.configs.pyconfig_deprecated) validate_and_update_keys() (in module maxtext.configs.pyconfig_deprecated) validate_attention_kernel() (in module maxtext.configs.pyconfig_deprecated) validate_attention_type() (in module maxtext.configs.pyconfig_deprecated) validate_attention_window_params() (in module maxtext.configs.pyconfig_deprecated) validate_compute_axis_order() (in module maxtext.configs.pyconfig_deprecated) (in module maxtext.layers.attention_op) validate_constant_bound() (in module maxtext.configs.pyconfig_deprecated) validate_context_parallel_strategy_ring() (in module maxtext.configs.pyconfig_deprecated) validate_data_input() (in module maxtext.configs.pyconfig_deprecated) validate_deepseek_moe() (in module maxtext.configs.pyconfig_deprecated) validate_expert_shard_attention_option() (in module maxtext.configs.pyconfig_deprecated) validate_gpt_oss_moe() (in module maxtext.configs.pyconfig_deprecated) validate_gpu_flash_attention() (in module maxtext.layers.attention_op) validate_keys() (in module maxtext.configs.pyconfig_deprecated) validate_kv_quant_axis() (in module maxtext.configs.pyconfig_deprecated) validate_llama4_config() (in module maxtext.configs.pyconfig_deprecated) validate_mlp_dim() (in module maxtext.configs.pyconfig_deprecated) validate_moba_attention() (in module maxtext.configs.pyconfig_deprecated) validate_model_call_mode() (in module maxtext.configs.pyconfig_deprecated) validate_model_name() (in module maxtext.configs.pyconfig_deprecated) validate_multimodal_model_name() (in module maxtext.configs.pyconfig_deprecated) validate_multiple_slices() (in module maxtext.configs.pyconfig_deprecated) validate_no_keys_overwritten_twice() (in module maxtext.configs.pyconfig_deprecated) validate_optimizer_sharding_over_data() (in module maxtext.configs.pyconfig_deprecated) validate_periodic_profiler() (in module maxtext.configs.pyconfig_deprecated) validate_prefill_and_target_lengths() (in module maxtext.configs.pyconfig_deprecated) validate_profiler_type() (in module maxtext.configs.pyconfig_deprecated) validate_quantization_methods() (in module maxtext.configs.pyconfig_deprecated) validate_ragged_buffer_factor() (maxtext.configs.types.MaxTextConfig method) validate_ragged_dot() (in module maxtext.configs.pyconfig_deprecated) validate_rampup_batch_size() (in module maxtext.configs.pyconfig_deprecated) validate_ring_of_experts_parallelism() (in module maxtext.configs.pyconfig_deprecated) validate_rope_type() (in module maxtext.configs.pyconfig_deprecated) validate_shard_expert_on_fsdp() (in module maxtext.configs.pyconfig_deprecated) validate_shard_mode() (in module maxtext.configs.pyconfig_deprecated) validate_sparse_matmul_parallelism() (in module maxtext.configs.pyconfig_deprecated) validate_tokamax_usage() (in module maxtext.configs.pyconfig_deprecated) validate_tokenizer() (in module maxtext.configs.pyconfig_deprecated) validate_vocab_tiling() (in module maxtext.configs.pyconfig_deprecated) value_proj (maxtext.configs.types.RematAndOffload attribute) variable_to_logically_partitioned() (in module maxtext.layers.initializers) VectorTypeHelper (class in maxtext.kernels.gather_reduce_sc) verify_chat_template_generation_prompt_logic() (in module maxtext.input_pipeline.input_pipeline_utils) verify_dictionaries() (in module maxtext.checkpoint_conversion.compare_hf_ckpt) verify_dsl() (maxtext.experimental.agent.ckpt_conversion_agent.dsl.DSLAgent method) vertex_tensorboard_project (maxtext.configs.types.Tensorboard attribute) vertex_tensorboard_region (maxtext.configs.types.Tensorboard attribute) video_grid_thw (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) video_path (maxtext.configs.types.MultimodalGeneral attribute) video_placeholder (maxtext.configs.types.MultimodalGeneral attribute) video_second_per_grid (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) video_values (maxtext.multimodal.processor_qwen3_omni.Qwen3OmniPreprocessorOutput attribute) vision_encoder_as_linen() (in module maxtext.layers.encoders) vision_exit_as_linen() (in module maxtext.models.gemma3) vision_output_dim_for_vit (maxtext.configs.types.VisionTower attribute) vision_output_length (maxtext.configs.types.VisionTower attribute) vision_sft_preprocessing_pipeline() (in module maxtext.input_pipeline.hf_data_processing) VisionEmbedder (class in maxtext.models.gemma3) visionembedder_as_linen() (in module maxtext.models.gemma3) VisionEncoder (class in maxtext.layers.encoders) VisionEntry (class in maxtext.models.gemma4_vision) VisionExit (class in maxtext.models.gemma3) (class in maxtext.models.gemma4_vision) VisionProjector (class in maxtext.configs.types) VisionTower (class in maxtext.configs.types) VLLM (class in maxtext.configs.types) vllm_additional_config (maxtext.configs.types.VLLM attribute) vllm_hf_config_path (maxtext.configs.types.VLLM attribute) vllm_hf_overrides (maxtext.configs.types.VLLM attribute) vllm_load_format (maxtext.configs.types.VLLM attribute) VllmWeightMapping (class in maxtext.integration.tunix.utils) vmap_gather() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) vmap_parallel_gather() (maxtext.layers.pipeline.Pipeline method) (maxtext.layers.pipeline_deprecated.Pipeline method) vocab_size (maxtext.configs.types.Tokenizer attribute) W W (maxtext.layers.learn_to_init_layer.LearnToInitDense attribute) warmup_steps_fraction (maxtext.configs.types.Optimizer attribute) weight_calibration_method (maxtext.layers.quantizations.QwixQuantization attribute) weight_dtype (maxtext.configs.types.DataTypes attribute) (maxtext.layers.attentions.Attention attribute) (maxtext.layers.normalizations.Qwen3NextRMSNormGated attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionMLP attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchEmbed attribute) (maxtext.models.qwen3.Qwen3OmniMoeVisionPatchMerger attribute) weight_prefetching() (maxtext.layers.pipeline.CircularPipeline method) weight_quantization_calibration_method (maxtext.configs.types.Quantization attribute) weight_sparsity_m (maxtext.configs.types.Quantization attribute) weight_sparsity_n (maxtext.configs.types.Quantization attribute) weight_sparsity_start_step (maxtext.configs.types.Quantization attribute) weight_sparsity_update_step (maxtext.configs.types.Quantization attribute) WEIGHTED (maxtext.configs.types.SamplingStrategy attribute) wi_tile_dlhs_batch_seq (maxtext.configs.types.MoEKernels attribute) wi_tile_dlhs_embed_dim (maxtext.configs.types.MoEKernels attribute) wi_tile_dlhs_mlp_dim (maxtext.configs.types.MoEKernels attribute) wi_tile_drhs_batch_seq (maxtext.configs.types.MoEKernels attribute) wi_tile_drhs_embed_dim (maxtext.configs.types.MoEKernels attribute) wi_tile_drhs_mlp_dim (maxtext.configs.types.MoEKernels attribute) wi_tile_fwd_batch_seq (maxtext.configs.types.MoEKernels attribute) wi_tile_fwd_embed_dim (maxtext.configs.types.MoEKernels attribute) wi_tile_fwd_mlp_dim (maxtext.configs.types.MoEKernels attribute) window_function() (in module maxtext.multimodal.utils) with_data_parallel_constraint() (in module maxtext.models.deepseek_batchsplit_fp8) with_logical_constraint() (maxtext.models.deepseek.DeepSeekGenericLayer method) wo_tile_dlhs_batch_seq (maxtext.configs.types.MoEKernels attribute) wo_tile_dlhs_embed_dim (maxtext.configs.types.MoEKernels attribute) wo_tile_dlhs_mlp_dim (maxtext.configs.types.MoEKernels attribute) wo_tile_drhs_batch_seq (maxtext.configs.types.MoEKernels attribute) wo_tile_drhs_embed_dim (maxtext.configs.types.MoEKernels attribute) wo_tile_drhs_mlp_dim (maxtext.configs.types.MoEKernels attribute) wo_tile_fwd_batch_seq (maxtext.configs.types.MoEKernels attribute) wo_tile_fwd_embed_dim (maxtext.configs.types.MoEKernels attribute) wo_tile_fwd_mlp_dim (maxtext.configs.types.MoEKernels attribute) WSD (maxtext.configs.types.LearningRateScheduleType attribute) wsd_decay_steps_fraction (maxtext.configs.types.Optimizer attribute) wsd_decay_style (maxtext.configs.types.Optimizer attribute) WsdDecayStyle (class in maxtext.configs.types) wv_product() (maxtext.layers.attention_op.AttentionOp method) X XPLANE (maxtext.configs.types.ProfilerType attribute) xprof_e2e_enable_fw_power_level_event (maxtext.configs.types.Profiling attribute) xprof_e2e_enable_fw_thermal_event (maxtext.configs.types.Profiling attribute) xprof_e2e_enable_fw_throttle_event (maxtext.configs.types.Profiling attribute) xprof_tpu_power_trace_level (maxtext.configs.types.Profiling attribute) XProfTPUPowerTraceMode (class in maxtext.configs.types) Y yaml_key_to_env_key() (in module maxtext.configs.pyconfig_deprecated) YARN (maxtext.configs.types.RopeType attribute) yarn() (in module maxtext.models.deepseek_batchsplit) (in module maxtext.models.deepseek_batchsplit_fp8) yarn_rotary_embedding_as_linen() (in module maxtext.layers.embeddings) YarnRope (class in maxtext.configs.types) YarnRotaryEmbedding (class in maxtext.layers.embeddings) Z z_loss_multiplier (maxtext.configs.types.Logits attribute)