[{"data":1,"prerenderedAt":161},["ShallowReactive",2],{"tech-article":3},{"id":4,"title":5,"body":6,"date":148,"description":149,"extension":150,"meta":151,"navigation":152,"path":153,"seo":154,"stem":155,"tags":156,"__hash__":160},"tech/tech/build-llm-from-scratch.md","Google Colabで「自分だけのLLM」をゼロから作ってみた",{"type":7,"value":8,"toc":138},"minimark",[9,13,17,28,31,99,102,105,109],[10,11,12],"h2",{"id":12},"はじめに",[14,15,16],"p",{},"Google Colabの無料枠（T4 GPU）のみを使い、小規模な言語モデルをゼロから構築しました。データ準備・トークナイズ・モデル実装・学習・テキスト生成までの全工程を一気通貫で行っています。",[14,18,19,20,27],{},"本記事の詳細は ",[21,22,26],"a",{"href":23,"rel":24},"https://qiita.com/motoki_fujino6290/items/88247df20faacbb7d0af",[25],"nofollow","Qiitaの記事"," をご覧ください。",[10,29,30],{"id":30},"技術スタック",[32,33,34,47],"table",{},[35,36,37],"thead",{},[38,39,40,44],"tr",{},[41,42,43],"th",{},"項目",[41,45,46],{},"内容",[48,49,50,59,67,75,83,91],"tbody",{},[38,51,52,56],{},[53,54,55],"td",{},"フレームワーク",[53,57,58],{},"PyTorch",[38,60,61,64],{},[53,62,63],{},"モデルアーキテクチャ",[53,65,66],{},"Transformer（Causal Self-Attention、4層、256次元）",[38,68,69,72],{},[53,70,71],{},"トークナイザ",[53,73,74],{},"SentencePiece（BPE、語彙数4,000）",[38,76,77,80],{},[53,78,79],{},"実行環境",[53,81,82],{},"Google Colab（T4 GPU）",[38,84,85,88],{},[53,86,87],{},"学習データ",[53,89,90],{},"日本文学3作品（約49万文字）",[38,92,93,96],{},[53,94,95],{},"モデルサイズ",[53,97,98],{},"約524万パラメータ",[10,100,101],{"id":101},"概要",[14,103,104],{},"ChatGPTのようなLLMがなぜ動くのかを理解するため、小規模なTransformerモデルをゼロから実装しました。",[106,107,108],"h3",{"id":108},"ポイント",[110,111,112,120,126,132],"ul",{},[113,114,115,119],"li",{},[116,117,118],"strong",{},"青空文庫の日本文学をデータとして活用",": パブリックドメインの小説3作品からテキストを収集",[113,121,122,125],{},[116,123,124],{},"SentencePieceによるトークナイズ",": BPEアルゴリズムで日本語テキストをサブワード分割",[113,127,128,131],{},[116,129,130],{},"Transformerの自前実装",": Causal Self-Attentionを含む4層のTransformerを構築",[113,133,134,137],{},[116,135,136],{},"Temperature Samplingによるテキスト生成",": 確率的なテキスト生成の仕組みを実装",{"title":139,"searchDepth":140,"depth":140,"links":141},"",2,[142,143,144],{"id":12,"depth":140,"text":12},{"id":30,"depth":140,"text":30},{"id":101,"depth":140,"text":101,"children":145},[146],{"id":108,"depth":147,"text":108},3,"2026-03-22","無料のGoogle Colabで、データ準備からTransformerの実装・学習・テキスト生成まで一気通貫でLLMを構築しました","md",{},true,"/tech/build-llm-from-scratch",{"title":5,"description":149},"tech/build-llm-from-scratch",[157,58,158,159],"LLM","Transformer","Google Colab","Ebz30Rrs0LCICGEeG4JfgoPQVDeuW8Kbf2xTdgNiEuA",1774835869774]