对话中微调，提高模型能力

支持用户反馈的收集，并在累积足够的高质量反馈数据后进行微调。这个版本的代码会在每次对话后，询问用户是否满意模型的回复。如果用户表示不满意，还可以提供更好的回答。收集到足够的反馈数据后，代码会自动触发微调。

代码如下：

import json
import os
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from peft import get_peft_model, LoraConfig, prepare_model_for_int8_training
import torch

# Load the model and tokenizer
model_name = "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Configure LoRA for lightweight fine-tuning
lora_config = LoraConfig(
    r=8,  # Low-rank dimension
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"]  # Adjust based on model architecture
)
model = get_peft_model(model, lora_config)

# Initialize or load conversation context and feedback data
initial_messages = [
    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}
]
messages = initial_messages.copy()

# Load saved conversation and LoRA weights if they exist
conversation_file = "conversation_history.json"
lora_weights_file = "lora_weights.pt"
feedback_data_file = "feedback_data.json"

if os.path.exists(conversation_file):
    with open(conversation_file, "r") as f:
        messages = json.load(f)
    print("Loaded previous conversation history.")

if os.path.exists(lora_weights_file):
    model.load_state_dict(torch.load(lora_weights_file), strict=False)
    print("Loaded previous LoRA weights.")

# Load feedback data if it exists
feedback_data = []
if os.path.exists(feedback_data_file):
    with open(feedback_data_file, "r") as f:
        feedback_data = json.load(f)
    print("Loaded previous feedback data.")

# Collect user feedback and store for future fine-tuning
def collect_feedback(prompt, response):
    feedback = input("Is this response satisfactory? (yes/no): ").strip().lower()
    if feedback == "yes":
        feedback_data.append({"prompt": prompt, "response": response})
    elif feedback == "no":
        print("Feedback noted. Please provide a better response if possible.")
        better_response = input("Better response: ").strip()
        feedback_data.append({"prompt": prompt, "response": better_response})

# Fine-tune the model when enough feedback data has been collected
def fine_tune_with_feedback():
    if len(feedback_data) >= 10:  # Trigger fine-tuning when there are 10 feedback samples
        print("Starting fine-tuning with user feedback...")
        
        # Prepare dataset for training
        train_data = [{"input_ids": tokenizer(feedback['prompt'], return_tensors="pt").input_ids.squeeze(),
                       "labels": tokenizer(feedback['response'], return_tensors="pt").input_ids.squeeze()}
                      for feedback in feedback_data]
        
        # Define training arguments
        training_args = TrainingArguments(
            output_dir="./results",
            num_train_epochs=1,
            per_device_train_batch_size=1,
            save_steps=10,
            save_total_limit=2,
            logging_dir='./logs'
        )
        
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_data,
        )
        
        trainer.train()
        print("Fine-tuning complete with user feedback.")
        feedback_data.clear()  # Clear feedback data after fine-tuning

# Enter conversation loop
try:
    while True:
        # Get user input
        user_input = input("User: ").strip()

        # Exit and reset commands
        if user_input.lower() == "/exit":
            print("Exiting chat.")
            break
        if user_input.lower() == "/clean":
            messages = initial_messages.copy()
            print("Chat history cleared. Starting a new conversation.")
            continue
        if not user_input:
            print("Input cannot be empty. Please enter something.")
            continue

        # Add user input to the conversation
        messages.append({"role": "user", "content": user_input})

        # Build the chat template
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # Tokenize input and prepare it for the model
        model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

        # Generate a response from the model
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=150
        )

        # Extract model output, removing special tokens
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]
        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

        # Add the model's response to the conversation
        messages.append({"role": "assistant", "content": response})

        # Print the model's response
        print(f"Qwen: {response}")

        # Collect user feedback
        collect_feedback(user_input, response)

        # Fine-tune the model if there is enough feedback data
        fine_tune_with_feedback()

# Save the conversation history, LoRA weights, and feedback data when exiting
finally:
    with open(conversation_file, "w") as f:
        json.dump(messages, f)
    torch.save(model.state_dict(), lora_weights_file)
    with open(feedback_data_file, "w") as f:
        json.dump(feedback_data, f)
    print("Conversation history, LoRA weights, and feedback data saved.")

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

import json

import os

from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments

from peft import get_peft_model, LoraConfig, prepare_model_for_int8_training

import torch

# Load the model and tokenizer

model_name = "huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3"

model = AutoModelForCausalLM.from_pretrained(

model_name,

torch_dtype="auto",

device_map="auto"

)

tokenizer = AutoTokenizer.from_pretrained(model_name)

# Configure LoRA for lightweight fine-tuning

lora_config = LoraConfig(

r=8, # Low-rank dimension

lora_alpha=16,

lora_dropout=0.1,

target_modules=["q_proj", "v_proj"] # Adjust based on model architecture

)

model = get_peft_model(model, lora_config)

# Initialize or load conversation context and feedback data

initial_messages = [

{"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant."}

]

messages = initial_messages.copy()

# Load saved conversation and LoRA weights if they exist

conversation_file = "conversation_history.json"

lora_weights_file = "lora_weights.pt"

feedback_data_file = "feedback_data.json"

if os.path.exists(conversation_file):

with open(conversation_file, "r") as f:

messages = json.load(f)

print("Loaded previous conversation history.")

if os.path.exists(lora_weights_file):

model.load_state_dict(torch.load(lora_weights_file), strict=False)

print("Loaded previous LoRA weights.")

# Load feedback data if it exists

feedback_data = []

if os.path.exists(feedback_data_file):

with open(feedback_data_file, "r") as f:

feedback_data = json.load(f)

print("Loaded previous feedback data.")

# Collect user feedback and store for future fine-tuning

def collect_feedback(prompt, response):

feedback = input("Is this response satisfactory? (yes/no): ").strip().lower()

if feedback == "yes":

feedback_data.append({"prompt": prompt, "response": response})

elif feedback == "no":

print("Feedback noted. Please provide a better response if possible.")

better_response = input("Better response: ").strip()

feedback_data.append({"prompt": prompt, "response": better_response})

# Fine-tune the model when enough feedback data has been collected

def fine_tune_with_feedback():

if len(feedback_data) >= 10: # Trigger fine-tuning when there are 10 feedback samples

print("Starting fine-tuning with user feedback...")

# Prepare dataset for training

train_data = [{"input_ids": tokenizer(feedback['prompt'], return_tensors="pt").input_ids.squeeze(),

"labels": tokenizer(feedback['response'], return_tensors="pt").input_ids.squeeze()}

for feedback in feedback_data]

# Define training arguments

training_args = TrainingArguments(

output_dir="./results",

num_train_epochs=1,

per_device_train_batch_size=1,

save_steps=10,

save_total_limit=2,

logging_dir='./logs'

)

trainer = Trainer(

model=model,

args=training_args,

train_dataset=train_data,

)

trainer.train()

print("Fine-tuning complete with user feedback.")

feedback_data.clear() # Clear feedback data after fine-tuning

# Enter conversation loop

try:

while True:

# Get user input

user_input = input("User: ").strip()

# Exit and reset commands

if user_input.lower() == "/exit":

print("Exiting chat.")

break

if user_input.lower() == "/clean":

messages = initial_messages.copy()

print("Chat history cleared. Starting a new conversation.")

continue

if not user_input:

print("Input cannot be empty. Please enter something.")

continue

# Add user input to the conversation

messages.append({"role": "user", "content": user_input})

# Build the chat template

text = tokenizer.apply_chat_template(

messages,

tokenize=False,

add_generation_prompt=True

)

# Tokenize input and prepare it for the model

model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

# Generate a response from the model

generated_ids = model.generate(

**model_inputs,

max_new_tokens=150

)

# Extract model output, removing special tokens

generated_ids = [

output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)

]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

# Add the model's response to the conversation

messages.append({"role": "assistant", "content": response})

# Print the model's response

print(f"Qwen: {response}")

# Collect user feedback

collect_feedback(user_input, response)

# Fine-tune the model if there is enough feedback data

fine_tune_with_feedback()

# Save the conversation history, LoRA weights, and feedback data when exiting

finally:

with open(conversation_file, "w") as f:

json.dump(messages, f)

torch.save(model.state_dict(), lora_weights_file)

with open(feedback_data_file, "w") as f:

json.dump(feedback_data, f)

print("Conversation history, LoRA weights, and feedback data saved.")

代码解释

反馈收集：collect_feedback 函数会在每次对话后询问用户对模型回复的满意度，用户可以标记“满意”或“不满意”。如果不满意，可以提供更好的回答替代生成的内容。
自动微调触发：fine_tune_with_feedback 函数会在反馈数据累积到一定量（这里设置为10条）时自动微调模型。微调后的模型会根据用户提供的高质量反馈更好地响应相似的输入。
保存和加载反馈数据：在退出时，代码会保存收集的反馈数据，以便下次加载时继续积累，并在需要时再次触发微调。
保存微调参数：模型每次退出都会保存最新的 LoRA 权重，这样在下次启动时可以加载上次的微调结果。

微调后需要重新加载模型么？

不需要重新加载模型，微调过程直接在当前加载的模型上进行，更新后的模型会立即在当前会话中生效。具体流程如下：

微调完成后，模型参数已经更新，之后的对话将会使用微调后的模型生成响应。
保存和加载权重：退出时，将微调后的模型权重保存到文件中。如果会话结束并重新启动，代码会从文件中加载权重，以保持上次微调的效果。

注意事项

频繁微调：如果微调较为频繁，建议设置较低的学习率或适当的微调步数，以防模型过拟合或出现不稳定现象。
累积效果：在长时间使用后，模型的表现会逐步积累用户偏好的特征，通过保留权重文件，保持微调效果。

代码解释

微调后需要重新加载模型么？

注意事项

相关文章

发表评论 取消回复

发表评论取消回复