quant_stage: quant_modifiers: SmoothQuantModifier: smoothing_strength: 0.9 mappings: - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj'] - re:.*input_layernorm - - ['re:.*gate_proj', 're:.*up_proj'] - re:.*post_attention_layernorm - - ['re:.*down_proj'] - re:.*up_proj ignore: [] GPTQModifier: config_groups: group_0: targets: [Linear] weights: num_bits: 8 type: int symmetric: true group_size: null strategy: channel block_structure: null dynamic: false actorder: null observer: mse observer_kwargs: {} input_activations: num_bits: 8 type: int symmetric: true group_size: null strategy: token block_structure: null dynamic: true actorder: null observer: null observer_kwargs: {} output_activations: null format: null targets: [Linear] ignore: [lm_head] sequential_update: true sequential_targets: [MistralDecoderLayer] block_size: 128 dampening_frac: 0.09999999999999999 offload_hessians: false