了解 CohereForAI/c4ai-command-r-plus

C4AI Command R+ 是 104B 亿参数模型的开放权重研究版本，具有高度先进的功能，其中包括检索增强生成（RAG）和用于自动执行复杂任务的工具。此模型生成中的工具使用支持多步骤工具使用，这允许模型在多个步骤中组合多个工具以完成困难的任务。C4AI Command R+ 是一个多语言模型，以 10 种语言评估性能：英语、法语、西班牙语、意大利语、德语、巴西葡萄牙语、日语、韩语、阿拉伯语和简体中文。Command R+ 针对各种用例进行了优化，包括推理、摘要和问答。

模型大小：1040亿个参数

上下文长度：128K

有非量化版本：CohereForAI/c4ai-command-r-plus

和量化版本：c4ai-command-r-plus-4bit

需要更新到最新的 transformers

pip install ‘git+https://github.com/huggingface/transformers.git’ bitsandbytes accelerate

1. c4ai-command-r-plus-4bit

下面的代码是在wsl2 下，6GB的显卡测试的

需要更新到最新的 transformers，查看config.json 内容

{
  "_name_or_path": "CohereForAI/c4ai-command-r-plus",
  "architectures": [
    "CohereForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 5,
  "eos_token_id": 255001,
  "hidden_act": "silu",
  "hidden_size": 12288,
  "initializer_range": 0.02,
  "intermediate_size": 33792,
  "layer_norm_eps": 1e-05,
  "logit_scale": 0.8333333333333334,
  "max_position_embeddings": 8192,
  "model_max_length": 131072,
  "model_type": "cohere",
  "num_attention_heads": 96,
  "num_hidden_layers": 64,
  "num_key_value_heads": 8,
  "pad_token_id": 0,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "float16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "fp4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rope_theta": 75000000.0,
  "torch_dtype": "float16",
  "transformers_version": "4.40.0.dev0",
  "use_cache": true,
  "use_qk_norm": true,
  "vocab_size": 256000
}

{

"_name_or_path": "CohereForAI/c4ai-command-r-plus",

"architectures": [

"CohereForCausalLM"

"attention_bias": false,

"attention_dropout": 0.0,

"bos_token_id": 5,

"eos_token_id": 255001,

"hidden_act": "silu",

"hidden_size": 12288,

"initializer_range": 0.02,

"intermediate_size": 33792,

"layer_norm_eps": 1e-05,

"logit_scale": 0.8333333333333334,

"max_position_embeddings": 8192,

"model_max_length": 131072,

"model_type": "cohere",

"num_attention_heads": 96,

"num_hidden_layers": 64,

"num_key_value_heads": 8,

"pad_token_id": 0,

"quantization_config": {

"_load_in_4bit": true,

"_load_in_8bit": false,

"bnb_4bit_compute_dtype": "float16",

"bnb_4bit_quant_storage": "uint8",

"bnb_4bit_quant_type": "fp4",

"bnb_4bit_use_double_quant": false,

"llm_int8_enable_fp32_cpu_offload": false,

"llm_int8_has_fp16_weight": false,

"llm_int8_skip_modules": null,

"llm_int8_threshold": 6.0,

"load_in_4bit": true,

"load_in_8bit": false,

"quant_method": "bitsandbytes"

"rope_theta": 75000000.0,

"torch_dtype": "float16",

"transformers_version": "4.40.0.dev0",

"use_cache": true,

"use_qk_norm": true,

"vocab_size": 256000

}

可以得知：”transformers_version”: “4.40.0.dev0”

from transformers import AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r-plus-4bit"
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
print(model)

from transformers import AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r-plus-4bit"

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")

print(model)

运行结果：

python test01.py
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|█████████████████| 13/13 [00:07<00:00,  1.69it/s]
CohereForCausalLM(
  (model): CohereModel(
    (embed_tokens): Embedding(256000, 12288, padding_idx=0)
    (layers): ModuleList(
      (0-63): 64 x CohereDecoderLayer(
        (self_attn): CohereSdpaAttention(
          (q_norm): CohereLayerNorm()
          (k_norm): CohereLayerNorm()
          (q_proj): Linear4bit(in_features=12288, out_features=12288, bias=False)
          (k_proj): Linear4bit(in_features=12288, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=12288, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=12288, out_features=12288, bias=False)
          (rotary_emb): CohereRotaryEmbedding()
        )
        (mlp): CohereMLP(
          (gate_proj): Linear4bit(in_features=12288, out_features=33792, bias=False)
          (up_proj): Linear4bit(in_features=12288, out_features=33792, bias=False)
          (down_proj): Linear4bit(in_features=33792, out_features=12288, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): CohereLayerNorm()
      )
    )
    (norm): CohereLayerNorm()
  )
  (lm_head): Linear(in_features=12288, out_features=256000, bias=False)
)

python test01.py

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.

Loading checkpoint shards: 100%|█████████████████| 13/13 [00:07<00:00, 1.69it/s]

CohereForCausalLM(

(model): CohereModel(

(embed_tokens): Embedding(256000, 12288, padding_idx=0)

(layers): ModuleList(

(0-63): 64 x CohereDecoderLayer(

(self_attn): CohereSdpaAttention(

(q_norm): CohereLayerNorm()

(k_norm): CohereLayerNorm()

(q_proj): Linear4bit(in_features=12288, out_features=12288, bias=False)

(k_proj): Linear4bit(in_features=12288, out_features=1024, bias=False)

(v_proj): Linear4bit(in_features=12288, out_features=1024, bias=False)

(o_proj): Linear4bit(in_features=12288, out_features=12288, bias=False)

(rotary_emb): CohereRotaryEmbedding()

)

(mlp): CohereMLP(

(gate_proj): Linear4bit(in_features=12288, out_features=33792, bias=False)

(up_proj): Linear4bit(in_features=12288, out_features=33792, bias=False)

(down_proj): Linear4bit(in_features=33792, out_features=12288, bias=False)

(act_fn): SiLU()

)

(input_layernorm): CohereLayerNorm()

)

(norm): CohereLayerNorm()

)

(lm_head): Linear(in_features=12288, out_features=256000, bias=False)

)

2. c4ai-command-r-plus

查看 config.json 文件，”transformers_version”: “4.40.0.dev0”

{
  "architectures": [
    "CohereForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 5,
  "eos_token_id": 255001,
  "hidden_act": "silu",
  "hidden_size": 12288,
  "initializer_range": 0.02,
  "intermediate_size": 33792,
  "layer_norm_eps": 1e-05,
  "logit_scale": 0.8333333333333334,
  "max_position_embeddings": 8192,
  "model_max_length": 131072,
  "model_type": "cohere",
  "num_attention_heads": 96,
  "num_hidden_layers": 64,
  "num_key_value_heads": 8,
  "pad_token_id": 0,
  "rope_theta": 75000000.0,
  "torch_dtype": "float16",
  "transformers_version": "4.40.0.dev0",
  "use_cache": true,
  "use_qk_norm": true,
  "vocab_size": 256000
}

{

"architectures": [

"CohereForCausalLM"

"attention_bias": false,

"attention_dropout": 0.0,

"bos_token_id": 5,

"eos_token_id": 255001,

"hidden_act": "silu",

"hidden_size": 12288,

"initializer_range": 0.02,

"intermediate_size": 33792,

"layer_norm_eps": 1e-05,

"logit_scale": 0.8333333333333334,

"max_position_embeddings": 8192,

"model_max_length": 131072,

"model_type": "cohere",

"num_attention_heads": 96,

"num_hidden_layers": 64,

"num_key_value_heads": 8,

"pad_token_id": 0,

"rope_theta": 75000000.0,

"torch_dtype": "float16",

"transformers_version": "4.40.0.dev0",

"use_cache": true,

"use_qk_norm": true,

"vocab_size": 256000

}

下面的代码和上面的test01.py 其实是相同的，但是执行过程不一样

量化版本：4bit 会使用 GPU 内存和CPU内存

非量化版本不会使用GPU 内存，而只是使用CPU内存

from transformers import AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r-plus"

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
print(model)

from transformers import AutoModelForCausalLM

model_id = "CohereForAI/c4ai-command-r-plus"

model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")

print(model)

运行结果：

 python test02.py
Loading checkpoint shards: 100%|█████████████████| 44/44 [39:00<00:00, 53.20s/it]
CohereForCausalLM(
  (model): CohereModel(
    (embed_tokens): Embedding(256000, 12288, padding_idx=0)
    (layers): ModuleList(
      (0-63): 64 x CohereDecoderLayer(
        (self_attn): CohereSdpaAttention(
          (q_norm): CohereLayerNorm()
          (k_norm): CohereLayerNorm()
          (q_proj): Linear(in_features=12288, out_features=12288, bias=False)
          (k_proj): Linear(in_features=12288, out_features=1024, bias=False)
          (v_proj): Linear(in_features=12288, out_features=1024, bias=False)
          (o_proj): Linear(in_features=12288, out_features=12288, bias=False)
          (rotary_emb): CohereRotaryEmbedding()
        )
        (mlp): CohereMLP(
          (gate_proj): Linear(in_features=12288, out_features=33792, bias=False)
          (up_proj): Linear(in_features=12288, out_features=33792, bias=False)
          (down_proj): Linear(in_features=33792, out_features=12288, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): CohereLayerNorm()
      )
    )
    (norm): CohereLayerNorm()
  )
  (lm_head): Linear(in_features=12288, out_features=256000, bias=False)
)

python test02.py

Loading checkpoint shards: 100%|█████████████████| 44/44 [39:00<00:00, 53.20s/it]

CohereForCausalLM(

(model): CohereModel(

(embed_tokens): Embedding(256000, 12288, padding_idx=0)

(layers): ModuleList(

(0-63): 64 x CohereDecoderLayer(

(self_attn): CohereSdpaAttention(

(q_norm): CohereLayerNorm()

(k_norm): CohereLayerNorm()

(q_proj): Linear(in_features=12288, out_features=12288, bias=False)

(k_proj): Linear(in_features=12288, out_features=1024, bias=False)

(v_proj): Linear(in_features=12288, out_features=1024, bias=False)

(o_proj): Linear(in_features=12288, out_features=12288, bias=False)

(rotary_emb): CohereRotaryEmbedding()

)

(mlp): CohereMLP(

(gate_proj): Linear(in_features=12288, out_features=33792, bias=False)

(up_proj): Linear(in_features=12288, out_features=33792, bias=False)

(down_proj): Linear(in_features=33792, out_features=12288, bias=False)

(act_fn): SiLU()

)

(input_layernorm): CohereLayerNorm()

)

(norm): CohereLayerNorm()

)

(lm_head): Linear(in_features=12288, out_features=256000, bias=False)

)

1. c4ai-command-r-plus-4bit

2. c4ai-command-r-plus

相关文章

发表评论 取消回复

发表评论取消回复