{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyNsnqZmgWZoL+JNMch42SIN", "include_colab_link": true }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "source": [ "pip install -q transformers[sentencepiece]~=4.33.0" ], "metadata": { "id": "c0aoIDAY1VJp" }, "execution_count": 1, "outputs": [] }, { "cell_type": "code", "source": [ "import IPython.display as ipd\n", "import torch\n", "\n", "torch.__version__" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "YSFLOYQY1z13", "outputId": "1c62cac9-c51f-4c58-c89b-c1b6be19232e" }, "execution_count": 2, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "'2.0.1+cu118'" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 2 } ] }, { "cell_type": "code", "source": [ "from transformers import T5Tokenizer, T5EncoderModel, T5ForConditionalGeneration\n", "\n", "model = T5ForConditionalGeneration.from_pretrained(\"t5-base\")\n", "encoder = T5EncoderModel.from_pretrained(\"t5-base\")\n", "tokenizer = T5Tokenizer.from_pretrained(\"t5-base\", padding='max_length', truncation=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 290, "referenced_widgets": [ "39ba84f833884d9bb8767b0b63987d47", "2516a2d1ae6e48ff8e39e5894a4e282e", "2f50f385346945ec919f22941d66718c", "c447d5e5128a4c988ef3dcfda4ab58a4", "2e8ea0184da94facab2d07971e5929ea", "b1aa516cf74a43739616d98019622ef2", "4cb37c0740d64f808e936c25fe7337dd", "83e96093af9f477b91cce0047b2944b6", "1b571544751c419588b34baa9ed5aec8", "d98b80c2a911484f911bae3165487b7c", "ed624823a83b431ca206e694c1b685b3", "53a6731b0d4940bbb48a6a97cdb8118b", "9de2fbebc8904f0da388fdd3b9adde1f", "22f8e3ff4dce44ee9277649103b43922", "13515dc1d70a449f81caed8029d99478", "4456c1a20584474583636f76aa85958d", "479be7e4be1a466eb044e1cc3f0deb6e", "9797657fd363405db5b92eb35fcd0d08", "fb09965475844f2cbe7ad4bdf1c005bc", "2576092fde614a90b26a396521e681e5", "1e77ff99cbef4fffa50ea75877fe6288", "2a66ad181ee14cdf88900912d3cf8aa6", "7e059dcb110d41b09482dc3a28cfb385", "d273f7f4379e4ca68d3851765cef4ca4", "a36f62aa17e14d51b4203fbf18f0ccaa", "abb31cb6915a421fb7feefb7661aa850", "7cef01670697453dafa19deb0972e2b6", "c1e0e8faf5354e2989f57030e1d53aa5", "2571dba8f99d4782a4985e333536cb79", "fc7efdbcd29a44f58b256070a862cf18", "e4f559eb7e9c4e4badb732ed8365f674", "7278523b4e3740a6be4d386c16f5a480", "307dc9637459428d93fdb4236bbf2ded", "3493e4bfba064bee8b968f3911bd9f9f", "e2b0d468bc3745e2b4b6d1aac2fd990c", "44c302a82198415eb0bceb662c2f952a", "1a85a1087675412381f6ccd68b5ad02e", "da07122cba6f45fb815478dd3ce94808", "aa71939788954ae38a0a4b5dfb0ffbcc", "900cf9c9824a40f4a1aba8ce23df2b9b", "a430e7ef40684cdbb86cd506c0babeeb", "41329a3067ea4ade91dac8da00c4d9cc", "4c3c2cf9b3fe4fe6a7686f0247539792", "e5b93db5985d4135854c35295a6286d4" ] }, "id": "TpRkQvY91P3s", "outputId": "4a812256-e859-46a6-c714-fb7dcfb680af" }, "execution_count": 3, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "39ba84f833884d9bb8767b0b63987d47", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading (…)lve/main/config.json: 0%| | 0.00/1.21k [00:00. If you see this, DO NOT PANIC! This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n" ] } ] }, { "cell_type": "code", "source": [ "body = \"Sea turtles (superfamily Chelonioidea), sometimes called marine turtles,[3] are reptiles of the order Testudines and of the suborder Cryptodira. The seven existing species of sea turtles are the flatback, green, hawksbill, leatherback, loggerhead, Kemp's ridley, and olive ridley sea turtles.[4] All of the seven species listed above, except for the flatback, are present in US waters, and are listed as endangered and/or threatened under the Endangered Species Act.[5] The flatback itself exists in the waters of Australia, Papua New Guinea and Indonesia.[5] Sea turtles can be categorized as hard-shelled (cheloniid) or leathery-shelled (dermochelyid).[6] The only dermochelyid species of sea turtle is the leatherback.[6]\"\n", "inputs = [f\"summarize: {body}\"]\n", "\n", "# Encode strings with T5.\n", "encoding = tokenizer(inputs, return_tensors=\"pt\", padding=True)\n", "embeddings = model.encoder(**encoding)\n", "\n", "# Perturb embeddings a little bit.\n", "embeddings.last_hidden_state += torch.normal(mean=0.0, std=1e-3, size=embeddings.last_hidden_state.shape)\n", "\n", "# Decode same embeddings with T5 back to text.\n", "tokens = model.generate(encoder_outputs=embeddings)\n", "tokenizer.batch_decode(tokens, skip_special_tokens=True)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "kOwyisf_XMSM", "outputId": "985fd651-ac62-4852-89a9-1d23cab8afc3" }, "execution_count": 6, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['the flatback, green, hawksbill, leatherback, loggerhead,']" ] }, "metadata": {}, "execution_count": 6 } ] } ] }