CLIP-Notebook.ipynb
text_emb = outputs.text_embeds
image_emb = outputs.image_embeds

>>> print(text_emb.shape)      >>> print(image_emb.shape)
torch.Size([21, 512])          torch.Size([21, 512])