text_emb = outputs.text_embeds image_emb = outputs.image_embeds >>> print(text_emb.shape) >>> print(image_emb.shape) torch.Size([21, 512]) torch.Size([21, 512])