diff --git a/.gitattributes b/.gitattributes index 2434acdec479bd27a032b356b9cd5acdb5cfffd1..d32dd68e65028149460ca8d87c7ca21e1bdad988 100644 --- a/.gitattributes +++ b/.gitattributes @@ -7824,3 +7824,36 @@ neuronxcc-2.23.6484.0+3b612583/MODULE_c0822879a9404f208f02+84337dd9/model.neff f neuronxcc-2.23.6484.0+3b612583/MODULE_c0822879a9404f208f02+84337dd9/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_b202b8e3d4558ac1c782+97496b47/model.neff filter=lfs diff=lfs merge=lfs -text neuronxcc-2.23.6484.0+3b612583/MODULE_2268c0503ccbf4859036+97496b47/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text +neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json new file mode 100644 index 0000000000000000000000000000000000000000..8742379d6487566d9936ab114df8cbcd8af5fb03 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/0538515e2eab8cf1a81f.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json new file mode 100644 index 0000000000000000000000000000000000000000..b074f1a7f0c45f977a77fabf517c1d16b10f864a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/18421eedc4d9b7b7fb51.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json new file mode 100644 index 0000000000000000000000000000000000000000..2134526f2a42b1697204c65fda95c4675a9d4a12 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/5756ac9be9333f9c8d82.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json new file mode 100644 index 0000000000000000000000000000000000000000..0a0a7171224a6954e6fb75c67cdc3a9712c4d411 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/d3503138cf92a3bb6ba7.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json new file mode 100644 index 0000000000000000000000000000000000000000..6abc7a37a5f514a83116dea634cc0c2887da444a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/bd8b5aaeb4693c199f97f6eebd1bbca77255bea6e0ed042c0c592ee5adccec97/f7dfefb13d09e6187d80.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json new file mode 100644 index 0000000000000000000000000000000000000000..784bde98726778605bbd930c15caf801b35223e2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/00688423de1428d98e68.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 1, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": false, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 1, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json new file mode 100644 index 0000000000000000000000000000000000000000..c536a3a345cf752b3921f05dd5ff96c4b183259a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/87e6309b5c6fe024caaa.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json new file mode 100644 index 0000000000000000000000000000000000000000..18825581fc8909b28bdfe582660101cfafc1875a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/8eff71913f842fa8f404.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 8, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 8, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json new file mode 100644 index 0000000000000000000000000000000000000000..d810d715897b894c4a59d497c48df5f2faf3e17f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/b8db5a05f548780ebfae.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json new file mode 100644 index 0000000000000000000000000000000000000000..a22a96adc3a50b5b29b60113f44aa92da4ec9443 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/dfcd91ed17670ed71f9d.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 16, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 16, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json new file mode 100644 index 0000000000000000000000000000000000000000..ec26c0415d77e94862e05a00dce62a443f1168b9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/d02c07e1054a0d66fe47bc2bd8603a512b513ad501882b6d49d36b9747c64a47/ed4c5924c8e305088a85.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 4, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 4, + "max_context_length": 32768, + "max_topk": 256, + "n_active_tokens": 32768, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 32768, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json new file mode 100644 index 0000000000000000000000000000000000000000..d810d715897b894c4a59d497c48df5f2faf3e17f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-1b-it/b8db5a05f548780ebfae.json @@ -0,0 +1,91 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-1b-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 1152, + "initializer_range": 0.02, + "intermediate_size": 6912, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-1b-it", + "checkpoint_revision": "dcc83ea841ab6100d6b47a070329e1ba4cf78752", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 26, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": 512, + "sliding_window_pattern": 6, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json new file mode 100644 index 0000000000000000000000000000000000000000..b074f1a7f0c45f977a77fabf517c1d16b10f864a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/0_REGISTRY/0.4.6.dev4/gemma3_text/google/gemma-3-270m-it/18421eedc4d9b7b7fb51.json @@ -0,0 +1,81 @@ +{ + "_entry_class": "SingleModelCacheEntry", + "_model_id": "google/gemma-3-270m-it", + "_sliding_window_pattern": 6, + "_task": "text-generation", + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "dtype": "bfloat16", + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "neuron": { + "_serialized_key": "NxDNeuronConfig", + "batch_size": 32, + "capacity_factor": null, + "checkpoint_id": "google/gemma-3-270m-it", + "checkpoint_revision": "ac82b4e820549b854eebf28ce6dedaf9fdfa17b3", + "continuous_batching": true, + "ep_degree": 1, + "fused_qkv": true, + "glu_mlp": true, + "local_ranks_size": 2, + "max_batch_size": 32, + "max_context_length": 16384, + "max_topk": 256, + "n_active_tokens": 16384, + "neuronxcc_version": "2.21.33363.0+82129205", + "on_device_sampling": true, + "optimum_neuron_version": "0.4.6.dev4", + "output_logits": false, + "pp_degree": 1, + "prefill_chunk_size": 0, + "sequence_length": 16384, + "speculation_length": 0, + "start_rank_id": 0, + "target": "trn1", + "torch_dtype": "bfloat16", + "tp_degree": 2 + }, + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_local_base_freq": 10000.0, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": 512, + "use_bidirectional_attention": false, + "use_cache": true, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..63679a09567c314bb9c60413c1d3d88945055284 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03adffb1ac29009d5dc73c5a1cb3238a85f92058a525ad350f72cac0708fce18 +size 1779146 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..4853a7076094b2ca10c6f2d98033b2172d196259 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_083c75747563fca496d7+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:597f4f3cdcd094d9b61867edb5be9b2772bccfdc5109041406729cae7091bb0f +size 32287744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3ccf8abc32ecf51e263054ec6c5f7124504c4fd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570017663a6900b5493f75c47921e938e0e1744a44c1f04d5f4536bfa44d644c +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..44bccb0ecee120cf84f9f3ed0cf1d0d1464a22c6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_134003a684b11c49131d+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9544338723eb399adde799a747e2c438ff69fdfd1deeec0e1ce187b38edab180 +size 83313664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..312254947785a14deb5ca279ee14723b0288a275 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cffabd2edca1fa4e310a911a7f1ee3949022a4d512d81ab71073566d4e75b82 +size 842788 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..072da2af0574c0d18af1ac6ca8b7e20e805d3870 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b397f20205d28dac763ae3e1b0b2d6cc82391a85343d2e59a61b09c83f0b41a2 +size 15289344 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..cd65345a6a903baf951afaa5f3a5048cc8f4ff22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_1b9cad3a2eb3c406661d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7789b9e434dece65641d8f5f98d2b54d005c9a140d377677724c8cef055f366 +size 15466469 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..73dd38c73accb898fec4b812a3e3b86ba2437846 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cb39e4ee43fef9e4ab34ba0c65d0e5d40a495386b6e2b33b16ade366f630493 +size 2549448 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..54d08f338e838089bfd618ca6a54199310616c8d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_33dcb1875eda19b3ebb5+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a41555756fc779961639134f6e2566d8faf2f2e441379455fa9d097f0232525 +size 7711744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c49bb8a2645fe583af0597c9812d28558d09a331 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86f1aa49dbddfc33c2f59d85ddec5a2e3d26274e2bd24dc9b44645a626a9acf0 +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..a1e2dc87d73d4c5f79beccda767518b29bd88a24 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4978e0dffee300919f4f+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8427cba9f0cc79b11f2a4905885b74a5ee87deac370e3845d8b65da6f6250a03 +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..bc1ff09316246ba2ab9d8405e209686aa49e66cb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d0ec3e537d1edb6bd63452260cc0127e02213437bfd0e40bd7c37a039b2993 +size 1739896 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..17aacccb7cceeb4717d7db6140ef5c6a4b88443f --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_4dd21482986783a76fee+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6cf965cb2fdea318aa68997cd8cca53e7fb87c41a68cf7e0c7acca4bdcefa06 +size 82566144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..8e7da1759f18644fd58b12e947edc76849946cc0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a014b74ceda95eade982c79b8f8c1b429e66ab6a36b6a6a7fed94bb0e453bf6 +size 2549448 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..e21cdeae29e400ae88dbc76631b584b1195d9765 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_61998e972adde9b6f5d2+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e406af060c688d0d78087538230afc828390f3b763072b3a181d212d56a4ce59 +size 7711744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..48c4a0dd31696a0866142ce73baeacd9170436f1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123441c0c01d741960d9edf16df88dad93f3426a0a1a940c2348c8e6d3add05a +size 891117 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..9f2f305786968566f79aadaa303be565f99ccf70 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:936df18dc8957bed20db8622f6d7b72bc72e61832b07efa4c8d925146b024023 +size 5530624 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..5f1de785a707d5dcc07d1ddaad3e63354e68534e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_7701466c22adfdd959a6+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa2ade0e387063ba57d08563e9714f731604a9747d8a4a39ef642f0dbed3402 +size 5707749 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..450d3f4e6af5daaf406fdb889011045fe0bf58e7 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fff76f4ff7318c5dba578d830c0143cfd69e2a85526d97cb316f603e37c991ed +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..610a3fb9b64190db9ad66d696aba5b1ec64d0d86 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8b7c51b3642e17a79f64+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:238000d2883e3847412559a856aa0e25b08ade4240632c2d276500f25b75cc06 +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff index 8f597d9254f560486716a1df9eb61d75b8ba58f4..607eb141c64018328fd47205630261f8aade6245 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/model.neff @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:898f2da95941f1d40fd67afbdc1be7f696a8740f0a9c000fdc5e31bf92bda2ae +oid sha256:936f7c19a4ff86454a5b83a26a3fb7b07cdcc62518970b6c78f0bf293cc21420 size 30598144 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo index b07edc3580ad9fad9bff561bd16a054cf4802997..e424f96aac1c54f0bed80e6ae76e81ef50f4d7a4 100644 --- a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff1216675c7144590c1+c4f887dc/wrapped_neff.hlo @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:640748ccdacf643dce82351dbace050c74bfa894e05443c3fb40e81eddb220fb +oid sha256:c5009b951afa0dace7b72350a1ffd6ed43bfb3919fd73d44c0de35f45a53cc56 size 30708309 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..56883efb446a30f65122354deaecbd9e07bd2127 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4041b29f7b9b399a0ce45b3d6cf353206c50e80704a931977b6150b712ca5584 +size 656602 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..92046cab8fb4f5f642704e6d15ecb7b241715c56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d740f7f2ecea942ea86820849b04c793d85b1a27e3f2ca68378548f54f6ef0ef +size 19252224 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7e10876eb40f372b751d4ccd8ad45186eda0e284 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_8ff80109ec96db204c79+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc16916f41a2b29d4666512abcf5574e6e0b0084884874e61d77e39b3bd9d415 +size 19374868 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..3abb42aa0bc31f6048446e47c689445b14ddf0a6 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40de54a293a9d07390bdfc8efc7a66171274e2d19ea27d5af0f36db00e6970d9 +size 891924 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..fe3dc43772d8e4c1d6adcebfb46118712c5a6922 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ffce009447cbf266712d8b182140cf4b8ad02b5c1a807eca71e782ca1362162 +size 9903104 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..df947164d82e268eada82e36fd7693b8afeb41c1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_a36fd4fef78c745e1416+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401c29f76367655299c8980aaed43243cd7a7d429084f4c8ed38cb08b6754323 +size 10080229 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..e9d61234b043e54a08b3e6eac0497b5da706a38e --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c002da7fb053a98cb443d5933c4adc1e8e771f8669d9090a199bc95ddfcfad42 +size 842788 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..64b9f761eac62127b7bb0d16e6ca48ee9642f54b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d549a2b91d6589e765e56ed6551515979b03ee91709cb6ca6803071b5d126f +size 27761664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..071777164176481217bdd5a32e8ebce9a0454406 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ac3c7cc1cd56727823f0+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6561a091b91f028c18dab3ec2b98fe5ee1cc8fc6162a42334d88f430462456e8 +size 27938789 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..457b4bbf4f6972e3a1299e24c61e2f6090fb4c22 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f26bdb3a565bea60ea9289a3a632756326fd2d156224839610ee4b2f30fb1d2f +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..96c5faab2852e9ec23fa33107e3940c1839aacc3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b1639d901305de0ccbb9+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3d20f8b1395de354a91b601ba1d610518bc100bdb757601f6808bd98f69dd2 +size 83313664 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..0c3ccadd2394039a6891fc9e2c4fec493bf375cc --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4a7180c79df0d067e960030b46425c217430be95efbd931ad1b3310733f6f1 +size 656586 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..c6f07a8af43f1bc891eb1ab4e496ec561bbedb88 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0353221fc2c2895c0efa4c883c10183dbb0286a7cb7aa5e582f72ec274eeb16 +size 10957824 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7b19dd1944796d1ba629bbfc691e9f2a93d92a8a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b406585baf2c99ea743d+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf27d3d6de096aba6558fd96146eb143ddbb3df576e33083ac3f71690e5699d +size 11080468 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..b431b81c2179de62c0d0c98a41b34538a0859747 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5231dfd98b741f8a7925588041cdcd45f2f927acef9e412cb34e6f8c4e947b3e +size 2549552 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..f52d47b30234721947c3693cfe72478bda088bd3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_b96a1f42beeb09ee40ca+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0bce227347cd77ac4286ffd97388546988de59cf17e03dad12ffcba42640ea +size 17900544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..9ac80839a44d9dcd768fa7797df381b4537e6aae --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04da69c73d9f7a86fa19933dffa67a34ddc65ea57f6f8a296f151df8b2e2478c +size 656586 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..640cb86569bd9c81338457fae00ab05f23aca7bb --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc7e40c383be5f91a93de631c564de343d1d3c2bab14879acf444739e67fd46c +size 6472704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..7b1f48fac41b3dcd6a93d67c4b85d868571c83e5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_c49ef4b16e6a8513ee7c+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c9aa30c711919642e19e8959bfc40cb2300195d12a625f6a9d1823628ed7c0 +size 6595348 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..71275d7139e76687b0013549c76f07a477136fc5 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0002025273f0b98f953ba4142218f8f29a9bb772981ba47f8b130058d0018d6f +size 2492798 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..b455f7e8f4dae4bd26d62874f3e95e9b7af79576 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_d986d392e2b714a772e0+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee5f7363549a34d43bc58ead521933d456144c25b9999b64c48fd26b24a0b066 +size 18248704 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..836724f44545ce0dedda1521fd4c623a6ea8ec72 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..78a30ad51fc5ba7ff93979ff9016d0c8b4f090c9 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f15dda2e1fc2136c3ec65404054c304b554108de5dd8f26baa2a502e140a8e7 +size 1779218 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..33fb0f70363ab105c91473f2038dc6e79664dd54 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_da330f1b1aad44160016+24129607/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30543d8500b1061bd0abc83042b4034fe4f1d855463277f314671cee55134689 +size 80927744 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..c299a648ccd24eba7ae90bf6fa829ba720be7dc8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45cc1db247cb9b9ce9701e74f8b2d0b1cf76d082d5464d606cdfa72dd327502f +size 655761 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..8190798906486985a6519eeb67a640a6efc92af2 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e39c5570c9bfe45da79fb0f3d9c0fce1d6d557de0de99ae87d0c5142d8b46e +size 3564544 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..c4e4bdad508a28a624fb922ba87d2c01472031d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_dba668a28cef64f8b275+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:192fead1b3e39db169c9b34b5a513dd5d409c71030ba6625a14caa90ce1dcb16 +size 3687188 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..a40bd2828c187293a24e2e3596941e589e7837a4 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_de01404cf46fd8e2f601+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f53252f239e5fe07901edc6150c25c17f3b31138be24368f153e4dd3ce016c5 +size 586812 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..65936e92e7fd446d616313450a91428e7fc8fc56 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce1604ae39ec335f4db3a45de7e302e01abe9ac192afecad610be83e95f09df6 +size 607450 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..ca3104c6676bbaf0a5c7718eaecd2c190d65c6d1 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfd7524f0b25e28fa72601d2d422011f24dfa7381382a9c39239216f087ae6c8 +size 18791424 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..a84091a4b87298007c01b2d634966246ee04422d --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_e21228aada91cc0cee76+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c347aeb5f1d2da3a6aef823616e0579e07c386f4816340b3214e83c1603626a2 +size 18914068 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..968f1f51c233ccb55252a3ed35eef202606829d3 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07046888a067e6ae8cf67d0469290a9cab91b6ac9b6ffcd5151e7bfe573f08fb +size 891924 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..5af573f47557ff01e7eeb8440759e7269e8ab458 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09714af0311f568e6b478aeb0785154a6e23c1e618565d57a35a743cb63f9f14 +size 16446464 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..b40d804b197e19993d54c2e25d041f4b8ae5cda8 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_eaa8316a0cd427639f95+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723169d7e8104355b405afe258abd592b82261b7124151036304dd4791cc5c0c +size 16623589 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json new file mode 100644 index 0000000000000000000000000000000000000000..5726abc7d1d8c52fa95bc7919439a23a23fe3b9a --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/compile_flags.json @@ -0,0 +1 @@ +["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"] \ No newline at end of file diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.done b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.done new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb new file mode 100644 index 0000000000000000000000000000000000000000..47a758d0d94c616e0ebf4445ab33d0d95c9d361b --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.hlo_module.pb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a74b43399dd59cd6aaed48f2fe004c4941eb0914caa26aa6d689db7231ad50 +size 891940 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff new file mode 100644 index 0000000000000000000000000000000000000000..3b7347ccd114dae6dea1cb9d026a4e64eadab7e0 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/model.neff @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cfa3a95da30bb1001806e374e8528fa1bcf68656ad7496f085f6aba3fdd336 +size 28283904 diff --git a/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo new file mode 100644 index 0000000000000000000000000000000000000000..1e2139293a2f4d2a36e2ec9371e022a259c38222 --- /dev/null +++ b/neuronxcc-2.21.33363.0+82129205/MODULE_ee2fac66603ba635ef0e+a02c3a36/wrapped_neff.hlo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:712a1f408624afbea41b4a676e4400917b5d8b20131898a1b86683c3b16676cc +size 28461029