o
    =¼Ÿeb  ã                   @   s<   d dl Z d dlmZ d dlm  mZ G dd„ dejƒZdS )é    Nc                       s*   e Zd ZdZ‡ fdd„Zddd„Z‡  ZS )ÚMultiheadAttentionRelativez?
    Multihead attention with relative positional encoding
    c              
      s$   t t| ƒj||ddddd d d d S )Ng        TF)ÚdropoutÚbiasÚadd_bias_kvÚadd_zero_attnÚkdimÚvdim)Úsuperr   Ú__init__)ÚselfÚ	embed_dimÚ	num_heads©Ú	__class__© ú_/home/opencvuniv/Work/Pranav/stereo-transformer/repo-new/stereo-transformer/module/attention.pyr
      s   
þz#MultiheadAttentionRelative.__init__Nc              	   C   s®  |  ¡ \}}}	|	| j }
|
| j |	ksJ dƒ‚t ||¡r5t ||¡r5t || j| j¡jddd\}}}nbt ||¡r—| j}d}|	}| j||…dd…f }|durW|||… }t |||¡}|du rm|du shJ ‚d}d}n*| j}|	}d}| j|d…dd…f }|dur‰||d… }t |||¡jddd\}}|durÎt 	|d|¡ 
||d¡}d}d|	 }| j||…dd…f }| j||… }t |||¡jddd\}}nd}d}t|
ƒd }|| }|durä|| }| ¡  
||| j|
¡}|durþ| ¡  
d|| j|
¡}|dur| ¡  
d|| j|
¡}|dur| ¡  
||| j|
¡}|dur.| ¡  
||| j|
¡}t d	||¡}|durOt d
||¡}t d||¡}|| | }n|}t|  ¡ ƒ|| j||gksaJ ‚|durn|d }||7 }|}tj|dd}t | 
|| j ||¡| dddd¡ 
|| j ||
¡¡}t|  ¡ ƒ|| j ||
gks¤J ‚| || j||
¡ dddd¡ |||	¡}t || jj| jj¡}|jdd| j }|jdd}|||fS )aÕ  
        Multihead attention

        :param query: [W,HN,C]
        :param key: [W,HN,C]
        :param value: [W,HN,C]
        :param attn_mask: mask to invalidate attention, -inf is used for invalid attention, [W,W]
        :param pos_enc: [2W-1,C]
        :param pos_indexes: index to select relative encodings, flattened in transformer WW
        :return: output value vector, attention with softmax (for debugging) and raw attention (used for last layer)
        z(embed_dim must be divisible by num_headsé   éÿÿÿÿ)Údimr   Né   g      à¿zwnec,vnec->newvzwnec,wvec->newvzvnec,wvec->newv)NN.é   )Úsizer   ÚtorchÚequalÚFÚlinearÚin_proj_weightÚin_proj_biasÚchunkÚindex_selectÚviewÚfloatÚ
contiguousÚeinsumÚlistÚsoftmaxÚbmmÚpermuteÚreshapeÚout_projÚweightr   Úsum)r   ÚqueryÚkeyÚvalueÚ	attn_maskÚpos_encÚpos_indexesÚwÚbszr   Úhead_dimÚqÚkÚvÚ_bÚ_startÚ_endÚ_wZq_rZk_rÚscalingZ	attn_featZattn_feat_posZattn_pos_featÚattnÚraw_attnZv_or   r   r   Úforward   sŠ   
$ÿ



 
ÿ"(
z"MultiheadAttentionRelative.forward)NNN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r
   r?   Ú__classcell__r   r   r   r   r   
   s    r   )r   Útorch.nnÚnnZtorch.nn.functionalÚ
functionalr   ÚMultiheadAttentionr   r   r   r   r   Ú<module>   s   