源大模型Yuan2.0-M32 混合Attention专家-源码解析

class ParallelAttention_router(nn.Module):
    def __init__(self, config):
        super(ParallelAttention_router, self).__init__()
        layer_number=0
        self.layer_number = max(1, layer_number)
        

        self.flash_attn_drop = 0.01
        self.hidden_size = config.hidden_size
        self.projection_size = config.moe_config['moe_num_experts']            
            
        self.query = nn.Linear(self.hidden_size, self.projection_size, bias=False)
        self.key = nn.Linear(self.hidden_size, self.projection_size, bias=False)
        self.value = nn.Linear(self.hidden_size, self.projection_size, bias=False)


    def forward(self, hidden_states, attention_mask=None, enc_position_ids=None,
                encoder_output=None, inference_params=None,
                rotary_pos_emb=None):
        is_first_step = False
        before_hidden_states = None
        
        query_layer = self.query(hidden_states)
        key_layer = self.key(hidden_states)
        value_layer = self.value(hidden_states)
        
        b = query_layer.size(0)
        s = query_layer.size(1) # seq*batch = token_num
        z = query_layer.size(2) # expert_num
        
        # use fp32 router
        query_layer = query_layer.float().view(b,s,z,1)
        key_layer = key_layer.float().view(b,s,z,1)
        value_layer = value_layer.float().view(b,s,z,1)
        
        
        attn_weights = torch.matmul(query_layer, key_layer.transpose(2, 3))
        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
        
        attn_output = torch.matmul(attn_weights, value_layer)
        
        router_output = attn_output.view(b*s, z)
        
        return router_output

class YuanExpertMLP(nn.Module):
    def __init__(self, config):
        super(YuanExpertMLP, self).__init__()
        
        self.gated_linear_unit = config.moe_config['gated_linear_unit']
        self.ffn_hidden_size = config.moe_config['ffn_hidden_size']


        if self.gated_linear_unit:
            self.w1 = nn.Linear(config.hidden_size, self.ffn_hidden_size*2, bias=False)
            
              
        else:
            self.w1 = nn
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

医疗AI强化曾小健

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值