Skip to content

Instantly share code, notes, and snippets.

@geohot
Last active May 1, 2024 13:41
Show Gist options
  • Save geohot/7c9f10f5770f058a1de6ef0598e4c9d8 to your computer and use it in GitHub Desktop.
Save geohot/7c9f10f5770f058a1de6ef0598e4c9d8 to your computer and use it in GitHub Desktop.
Outputted llm.c from tinygrad
#include <stdlib.h>
#include <stdbool.h>
#include <tgmath.h>
#define max(x,y) ((x>y)?x:y)
#define half __fp16
void E_(int* data0) {
int val0 = data0[0];
data0[0] = (val0+1);
}
void r_64_64(int* data0) {
for (int ridx0 = 0; ridx0 < 64; ridx0++) {
data0[ridx0] = (ridx0+1+(-1));
}
}
void E_64_64(bool* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 64; ridx0++) {
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
float val0 = data1[(ridx0*1024)+ridx1];
data0[(ridx0*64)+ridx1] = (0.0f==val0);
}
}
}
void r_50257_50257(int* data0) {
for (int ridx0 = 0; ridx0 < 50257; ridx0++) {
data0[ridx0] = (ridx0+1+(-1));
}
}
void E_n1(float* data0, const float* data1) {
float val0 = data1[0];
data0[0] = (1.0f-val0);
}
void E_n2(float* data0, const float* data1, const int* data2) {
float val0 = data1[0];
int val1 = data2[0];
float alu0 = max(val0,0.0f);
float alu1 = max((-val0),0.0f);
float alu2 = (alu0+alu1);
float cast0 = (float)(val1);
float alu3 = (val0/(alu2+1e-12f));
float alu4 = max(alu3,0.0f);
float alu5 = max((-alu3),0.0f);
float alu6 = (((alu3-1.0f)*(-0.5f))-(1.5f*(1.0f-(alu4+alu5))));
data0[0] = (1.0f-(exp2((log2(alu2)*0.6931471805599453f*cast0*1.4426950408889634f))*((sin((1.5707963267948966f-(cast0*3.141592653589793f)))*alu6)+(1.0f-alu6))));
}
void r_64_768_1024(float* data0, const int* data1, const int* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 64; ridx0++) {
int val0 = data1[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 1024; ridx2++) {
int val1 = data2[ridx2];
float val2 = data3[ridx1+(ridx2*768)];
acc0 = (((float)((val0==val1))*val2)+acc0);
}
data0[(ridx0*768)+ridx1] = acc0;
}
}
}
void E_256(int* data0, const int* data1) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
int val0 = data1[ridx0];
data0[ridx0] = (int)((((-1)==val0)==false));
}
}
void r_256(int* data0, const int* data1) {
int acc0 = 0;
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
int val0 = data1[ridx0];
acc0 = ((int)((((-1)==val0)==false))+acc0);
}
data0[0] = acc0;
}
void r_4_64_768_50257(float* data0, const int* data1, const int* data2, const float* data3, const float* data4) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
int val0 = data1[(ridx0*64)+ridx1];
int alu0 = (ridx1*768);
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float acc0 = 0.0f;
float val1 = data4[alu0+ridx2];
for (int ridx3 = 0; ridx3 < 50257; ridx3++) {
int val2 = data2[ridx3];
float val3 = data3[ridx2+(ridx3*768)];
acc0 = (((float)((val0==val2))*val3)+acc0);
}
data0[(ridx0*49152)+alu0+ridx2] = (acc0+val1);
}
}
}
}
void E_n3(float* data0, const int* data1) {
int val0 = data1[0];
data0[0] = (1.0f/(float)(val0));
}
void r_256_768(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float val0 = data1[(ridx0*768)+ridx1];
acc0 = (val0+acc0);
}
data0[ridx0] = (acc0*0.0013020833333333333f);
}
}
void r_256_768n1(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float val1 = data1[(ridx0*768)+ridx1];
float alu0 = (val1-val0);
acc0 = ((alu0*alu0)+acc0);
}
data0[ridx0] = (1.0f/((acc0*0.0013020833333333333f)+1e-05f));
}
}
void E_256n1(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val0 = data1[ridx0];
data0[ridx0] = sqrt(val0);
}
}
void E_256_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val0 = data2[ridx0];
float val1 = data3[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
int alu0 = ((ridx0*768)+ridx1);
float val2 = data1[alu0];
float val3 = data4[ridx1];
float val4 = data5[ridx1];
data0[alu0] = (((val2-val0)*val1*val3)+val4);
}
}
}
void r_256_2304_768(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 2304; ridx1++) {
float acc0 = 0.0f;
float val0 = data3[ridx1];
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float val1 = data1[(ridx0*768)+ridx2];
float val2 = data2[(ridx1*768)+ridx2];
acc0 = ((val1*val2)+acc0);
}
data0[(ridx0*2304)+ridx1] = (acc0+val0);
}
}
}
void r_4_12_64_64_64(float* data0, const bool* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
int alu0 = ((ridx0*147456)+(ridx1*64));
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
int alu1 = (ridx2*64);
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
bool val0 = data1[alu1+ridx3];
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val1 = data2[alu0+(ridx2*2304)+ridx4];
float val2 = data2[alu0+(ridx3*2304)+ridx4+768];
acc0 = ((val1*val2)+acc0);
}
data0[(ridx0*49152)+(ridx1*4096)+alu1+ridx3] = (val0?(-INFINITY):(acc0*0.125f));
}
}
}
}
}
void r_3072_64(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = -INFINITY;
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
float val0 = data1[(ridx0*64)+ridx1];
float alu0 = max(val0,acc0);
acc0 = alu0;
}
data0[ridx0] = acc0;
}
}
void r_3072_64n1(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
float val1 = data1[(ridx0*64)+ridx1];
acc0 = (exp2(((val1-val0)*1.4426950408889634f))+acc0);
}
data0[ridx0] = acc0;
}
}
void r_3072_64n2(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
float val1 = data1[(ridx0*64)+ridx1];
acc0 = ((float)((val1==val0))+acc0);
}
data0[ridx0] = acc0;
}
}
void E_3072_64(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float val0 = data2[ridx0];
float val1 = data3[ridx0];
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
int alu0 = ((ridx0*64)+ridx1);
float val2 = data1[alu0];
data0[alu0] = (exp2(((val2-val0)*1.4426950408889634f))/val1);
}
}
}
void r_4_12_64_64_64n1(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
int alu0 = ((ridx0*49152)+(ridx1*4096)+(ridx2*64));
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val0 = data1[alu0+ridx4];
float val1 = data2[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)+1536];
acc0 = ((val0*val1)+acc0);
}
data0[alu0+ridx3] = acc0;
}
}
}
}
}
void r_4_64_768_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
int alu0 = (ridx0*49152);
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float acc0 = 0.0f;
int alu1 = (alu0+(ridx1*768)+ridx2);
float val0 = data1[alu1];
float val1 = data4[ridx2];
for (int ridx3 = 0; ridx3 < 768; ridx3++) {
float val2 = data2[alu0+(ridx1*64)+((ridx3/64)*4096)+(ridx3%64)];
float val3 = data3[(ridx2*768)+ridx3];
acc0 = ((val2*val3)+acc0);
}
data0[alu1] = (val0+acc0+val1);
}
}
}
}
void r_256_3072_768(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 3072; ridx1++) {
float acc0 = 0.0f;
float val0 = data3[ridx1];
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float val1 = data1[(ridx0*768)+ridx2];
float val2 = data2[(ridx1*768)+ridx2];
acc0 = ((val1*val2)+acc0);
}
data0[(ridx0*3072)+ridx1] = (acc0+val0);
}
}
}
void E_786432(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 786432; ridx0++) {
float val0 = data1[ridx0];
data0[ridx0] = (0.5f*val0*(1.0f+((2.0f*(1.0f/(1.0f+exp2((2.0f*val0*0.7978845608f*(1.0f+(0.044715f*val0*val0))*(-1.4426950408889634f))))))-1.0f)));
}
}
void r_256_768_3072(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val0 = data1[alu0];
float val1 = data4[ridx1];
for (int ridx2 = 0; ridx2 < 3072; ridx2++) {
float val2 = data2[(ridx0*3072)+ridx2];
float val3 = data3[(ridx1*3072)+ridx2];
acc0 = ((val2*val3)+acc0);
}
data0[alu0] = (val0+acc0+val1);
}
}
}
void r_256_50257_768(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float val0 = data1[(ridx0*768)+ridx2];
float val1 = data2[(ridx1*768)+ridx2];
acc0 = ((val0*val1)+acc0);
}
data0[(ridx0*50257)+ridx1] = acc0;
}
}
}
void r_256_50257(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = -INFINITY;
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
float val0 = data1[(ridx0*50257)+ridx1];
float alu0 = max(val0,acc0);
acc0 = alu0;
}
data0[ridx0] = acc0;
}
}
void r_256_50257n1(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
float val1 = data1[(ridx0*50257)+ridx1];
acc0 = ((float)((val1==val0))+acc0);
}
data0[ridx0] = acc0;
}
}
void r_256_50257n2(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
float val1 = data1[(ridx0*50257)+ridx1];
acc0 = (exp2(((val1-val0)*1.4426950408889634f))+acc0);
}
data0[ridx0] = acc0;
}
}
void E_256n2(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val0 = data1[ridx0];
data0[ridx0] = (log2(val0)*0.6931471805599453f);
}
}
void r_256_50257n3(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5) {
float val0 = data4[0];
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
int val1 = data1[ridx0];
int val2 = data3[ridx0];
float val3 = data5[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
int val4 = data2[ridx1];
acc0 = ((-((float)((((val1==val4)?(-1):0)*val2))*val0))+acc0);
}
data0[ridx0] = (acc0/val3);
}
}
void r_256_50257n4(float* data0, const float* data1, const float* data2, const float* data3, const int* data4, const int* data5, const int* data6, const int* data7) {
float acc0 = 0.0f;
int val0 = data7[0];
float cast0 = (float)(val0);
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val1 = data2[ridx0];
float val2 = data3[ridx0];
int val3 = data4[ridx0];
int val4 = data6[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
float val5 = data1[(ridx0*50257)+ridx1];
int val6 = data5[ridx1];
acc0 = ((((val5-val1)-val2)*(float)((((val3==val6)?(-1):0)*val4)))+acc0);
}
}
data0[0] = ((acc0/cast0)+(0.0f/cast0));
}
void r_256_50257n5(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
float val0 = data4[0];
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
int val1 = data1[ridx0];
int val2 = data3[ridx0];
float val3 = data6[ridx0];
float val4 = data7[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
int val5 = data2[ridx1];
float val6 = data5[(ridx0*50257)+ridx1];
acc0 = ((-(((float)((((val1==val5)?(-1):0)*val2))*val0)+(exp2(((val6-val3)*1.4426950408889634f))*val4)))+acc0);
}
data0[ridx0] = acc0;
}
}
void E_256_50257(float* data0, const int* data1, const int* data2, const int* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8, const float* data9) {
float val0 = data4[0];
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
int val1 = data1[ridx0];
int val2 = data3[ridx0];
float val3 = data6[ridx0];
float val4 = data7[ridx0];
float val5 = data8[ridx0];
float val6 = data9[ridx0];
for (int ridx1 = 0; ridx1 < 50257; ridx1++) {
int alu0 = ((ridx0*50257)+ridx1);
int val7 = data2[ridx1];
float val8 = data5[alu0];
data0[alu0] = (((float)((((val1==val7)?(-1):0)*val2))*val0)+(exp2(((val8-val3)*1.4426950408889634f))*val4)+(((float)((val8==val3))/val5)*val6));
}
}
}
void r_256_768_50257(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 50257; ridx2++) {
float val0 = data1[ridx1+(ridx2*768)];
float val1 = data2[(ridx0*50257)+ridx2];
acc0 = ((val0*val1)+acc0);
}
data0[(ridx0*768)+ridx1] = acc0;
}
}
}
void r2_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8, const float* data9) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data8[0];
float val3 = data9[0];
for (int ridx0 = 0; ridx0 < 768; ridx0++) {
float acc0 = 0.0f;
float val4 = data0[ridx0];
float val5 = data1[ridx0];
for (int ridx1 = 0; ridx1 < 256; ridx1++) {
int alu0 = (ridx0+(ridx1*768));
float val6 = data4[alu0];
float val7 = data5[ridx1];
float val8 = data6[ridx1];
float val9 = data7[alu0];
acc0 = (((val6-val7)*val8*val9)+acc0);
}
data0[ridx0] = ((val0*val4)+(val1*acc0));
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0));
}
}
void r2_768_256n1(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data5[0];
float val3 = data6[0];
for (int ridx0 = 0; ridx0 < 768; ridx0++) {
float acc0 = 0.0f;
float val4 = data0[ridx0];
float val5 = data1[ridx0];
for (int ridx1 = 0; ridx1 < 256; ridx1++) {
float val6 = data4[ridx0+(ridx1*768)];
acc0 = (val6+acc0);
}
data0[ridx0] = ((val0*val4)+(val1*acc0));
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0));
}
}
void r_256_768n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
float val1 = data5[ridx0];
float val2 = data6[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
int alu0 = ((ridx0*768)+ridx1);
float val3 = data1[alu0];
float val4 = data3[ridx1];
float val5 = data4[alu0];
acc0 = (((val3-val0)*val4*val5)+acc0);
}
data0[ridx0] = ((-(acc0/(val1*2.0f)))*val2*val2*0.0013020833333333333f);
}
}
void E_768(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 768; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void r_256_768n3(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float acc0 = 0.0f;
float val0 = data1[ridx0];
float val1 = data5[ridx0];
float val2 = data6[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float val3 = data2[ridx1];
int alu0 = ((ridx0*768)+ridx1);
float val4 = data3[alu0];
float val5 = data4[alu0];
float alu1 = ((val5-val1)*val2);
acc0 = ((-((val0*val3*val4)+alu1+alu1))+acc0);
}
data0[ridx0] = (acc0*0.0013020833333333333f);
}
}
void E_256_768n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val0 = data1[ridx0];
float val1 = data5[ridx0];
float val2 = data6[ridx0];
float val3 = data7[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
int alu0 = ((ridx0*768)+ridx1);
float val4 = data2[ridx1];
float val5 = data3[alu0];
float val6 = data4[alu0];
float alu1 = ((val6-val1)*val2);
data0[alu0] = ((val0*val4*val5)+alu1+alu1+val3);
}
}
}
void r2_768_3072_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data6[0];
float val3 = data7[0];
for (int ridx0 = 0; ridx0 < 768; ridx0++) {
for (int ridx1 = 0; ridx1 < 3072; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*3072)+ridx1);
float val4 = data0[alu0];
float val5 = data1[alu0];
for (int ridx2 = 0; ridx2 < 256; ridx2++) {
float val6 = data4[ridx1+(ridx2*3072)];
float val7 = data5[ridx0+(ridx2*768)];
acc0 = ((val6*val7)+acc0);
}
data0[alu0] = ((val0*val4)+(val1*acc0));
data1[alu0] = ((val2*val5)+(val3*acc0*acc0));
}
}
}
void r_256_3072_768n1(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 3072; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*3072)+ridx1);
float val0 = data1[alu0];
float alu1 = (0.044715f*val0);
float alu2 = (val0*0.7978845608f);
float alu3 = (1.0f+(alu1*val0));
float alu4 = (1.0f/(1.0f+exp2((2.0f*alu2*alu3*(-1.4426950408889634f)))));
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float val1 = data2[ridx1+(ridx2*3072)];
float val2 = data3[(ridx0*768)+ridx2];
acc0 = ((val1*val2)+acc0);
}
float alu5 = (2.0f*alu4*(1.0f-alu4)*2.0f*0.5f*val0*acc0);
float alu6 = (alu2*alu5);
data0[alu0] = ((alu1*alu6)+(0.044715f*val0*alu6)+(0.7978845608f*alu3*alu5)+(0.5f*(1.0f+((2.0f*alu4)-1.0f))*acc0));
}
}
}
void r2_3072_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data6[0];
float val3 = data7[0];
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val4 = data0[alu0];
float val5 = data1[alu0];
for (int ridx2 = 0; ridx2 < 256; ridx2++) {
float val6 = data4[ridx1+(ridx2*768)];
float val7 = data5[ridx0+(ridx2*3072)];
acc0 = ((val6*val7)+acc0);
}
data0[alu0] = ((val0*val4)+(val1*acc0));
data1[alu0] = ((val2*val5)+(val3*acc0*acc0));
}
}
}
void r2_3072_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data5[0];
float val3 = data6[0];
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = 0.0f;
float val4 = data0[ridx0];
float val5 = data1[ridx0];
for (int ridx1 = 0; ridx1 < 256; ridx1++) {
float val6 = data4[ridx0+(ridx1*3072)];
acc0 = (val6+acc0);
}
data0[ridx0] = ((val0*val4)+(val1*acc0));
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0));
}
}
void E_2359296(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 2359296; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void r_256_768_3072n1(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 3072; ridx2++) {
float val0 = data1[ridx1+(ridx2*768)];
float val1 = data2[(ridx0*3072)+ridx2];
acc0 = ((val0*val1)+acc0);
}
data0[(ridx0*768)+ridx1] = acc0;
}
}
}
void E_3072(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void E_2359296n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 2359296; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void E_256_768n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
float val0 = data2[ridx0];
float val1 = data6[ridx0];
float val2 = data7[ridx0];
float val3 = data8[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
int alu0 = ((ridx0*768)+ridx1);
float val4 = data1[alu0];
float val5 = data3[ridx1];
float val6 = data4[alu0];
float val7 = data5[alu0];
float alu1 = ((val7-val1)*val2);
data0[alu0] = (val4+(val0*val5*val6)+alu1+alu1+val3);
}
}
}
void r2_768_768_4_64(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data6[0];
float val3 = data7[0];
for (int ridx0 = 0; ridx0 < 768; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val4 = data0[alu0];
float val5 = data1[alu0];
for (int ridx2 = 0; ridx2 < 4; ridx2++) {
int alu1 = (ridx2*49152);
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float val6 = data4[alu1+(ridx3*64)+((ridx1/64)*4096)+(ridx1%64)];
float val7 = data5[ridx0+alu1+(ridx3*768)];
acc0 = ((val6*val7)+acc0);
}
}
data0[alu0] = ((val0*val4)+(val1*acc0));
data1[alu0] = ((val2*val5)+(val3*acc0*acc0));
}
}
}
void r_256_768_768(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
int alu0 = (ridx0*768);
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 768; ridx2++) {
float val0 = data1[ridx1+(ridx2*768)];
float val1 = data2[alu0+ridx2];
acc0 = ((val0*val1)+acc0);
}
data0[alu0+ridx1] = acc0;
}
}
}
void E_589824(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 589824; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void r_4_12_64_64_64n2(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
int alu0 = (ridx0*49152);
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
int alu1 = (alu0+(ridx1*4096));
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val0 = data1[alu1+ridx3+(ridx4*64)];
float val1 = data2[alu0+(ridx1*64)+ridx2+(ridx4*768)];
acc0 = ((val0*val1)+acc0);
}
data0[alu1+(ridx2*64)+ridx3] = acc0;
}
}
}
}
}
void r_4_12_64_64_64n3(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
int alu0 = (ridx0*49152);
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
int alu1 = (ridx1*64);
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val0 = data1[(ridx0*147456)+alu1+(ridx3*2304)+ridx4+1536];
float val1 = data2[alu0+alu1+(ridx2*768)+ridx4];
acc0 = ((val0*val1)+acc0);
}
data0[alu0+(ridx1*4096)+(ridx2*64)+ridx3] = acc0;
}
}
}
}
}
void r_3072_64n3(float* data0, const float* data1, const float* data2, const float* data3, const float* data4) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = 0.0f;
float val0 = data3[ridx0];
float val1 = data4[ridx0];
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
int alu0 = ((ridx0*64)+ridx1);
float val2 = data1[alu0];
float val3 = data2[alu0];
acc0 = ((((-val2)*exp2(((val3-val0)*1.4426950408889634f)))/(val1*val1))+acc0);
}
data0[ridx0] = acc0;
}
}
void r_3072_64n4(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
for (int ridx0 = 0; ridx0 < 3072; ridx0++) {
float acc0 = 0.0f;
float val0 = data2[ridx0];
float val1 = data4[ridx0];
float val2 = data5[ridx0];
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
int alu0 = ((ridx0*64)+ridx1);
float val3 = data1[alu0];
float val4 = data3[alu0];
acc0 = ((-(exp2(((val3-val0)*1.4426950408889634f))*((val4/val1)+val2)))+acc0);
}
data0[ridx0] = acc0;
}
}
void E_48_64_64(float* data0, const bool* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7, const float* data8) {
for (int ridx0 = 0; ridx0 < 48; ridx0++) {
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
int alu0 = (ridx1*64);
int alu1 = ((ridx0*64)+ridx1);
float val0 = data3[alu1];
float val1 = data5[alu1];
float val2 = data6[alu1];
float val3 = data7[alu1];
float val4 = data8[alu1];
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
int alu2 = ((ridx0*4096)+alu0+ridx2);
bool val5 = data1[alu0+ridx2];
float val6 = data2[alu2];
float val7 = data4[alu2];
data0[alu2] = (0.125f*(val5?0.0f:((exp2(((val6-val0)*1.4426950408889634f))*((val7/val1)+val2))+(((float)((val6==val0))/val3)*val4))));
}
}
}
}
void r_4_12_64_64_64n4(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
int alu0 = ((ridx0*49152)+(ridx1*4096));
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val0 = data1[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)];
float val1 = data2[alu0+ridx2+(ridx4*64)];
acc0 = ((val0*val1)+acc0);
}
data0[alu0+(ridx2*64)+ridx3] = acc0;
}
}
}
}
}
void r_4_12_64_64_64n5(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 12; ridx1++) {
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
int alu0 = ((ridx0*49152)+(ridx1*4096)+(ridx2*64));
for (int ridx3 = 0; ridx3 < 64; ridx3++) {
float acc0 = 0.0f;
for (int ridx4 = 0; ridx4 < 64; ridx4++) {
float val0 = data1[(ridx0*147456)+(ridx1*64)+ridx3+(ridx4*2304)+768];
float val1 = data2[alu0+ridx4];
acc0 = ((val0*val1)+acc0);
}
data0[alu0+ridx3] = acc0;
}
}
}
}
}
void E_4_64_2304(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 4; ridx0++) {
for (int ridx1 = 0; ridx1 < 64; ridx1++) {
for (int ridx2 = 0; ridx2 < 2304; ridx2++) {
int alu0 = (ridx2*(-1));
float val0 = ((alu0<(-1535))?data1[(ridx0*49152)+ridx1+(ridx2*64)+(-98304)]:0.0f);
int alu1 = (ridx1+(ridx2/768));
int alu2 = (alu1+63);
int alu3 = (((ridx2/64)%12)*4096);
int alu4 = (ridx2%64);
float val1 = (((alu0<(-767))*(ridx2<1536))?data2[(((ridx0+(alu2/64)+3)%4)*49152)+((alu2%64)*64)+alu3+alu4]:0.0f);
float val2 = ((ridx2<768)?data3[(((ridx0+(alu1/64))%4)*49152)+((alu1%64)*64)+alu3+alu4]:0.0f);
data0[(ridx0*147456)+(ridx1*2304)+ridx2] = (val0+val1+val2);
}
}
}
}
void r2_2304_768_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6, const float* data7) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data6[0];
float val3 = data7[0];
for (int ridx0 = 0; ridx0 < 2304; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val4 = data0[alu0];
float val5 = data1[alu0];
for (int ridx2 = 0; ridx2 < 256; ridx2++) {
float val6 = data4[ridx1+(ridx2*768)];
float val7 = data5[ridx0+(ridx2*2304)];
acc0 = ((val6*val7)+acc0);
}
data0[alu0] = ((val0*val4)+(val1*acc0));
data1[alu0] = ((val2*val5)+(val3*acc0*acc0));
}
}
}
void r2_2304_256(float* data0, float* data1, const float* data2, const float* data3, const float* data4, const float* data5, const float* data6) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data5[0];
float val3 = data6[0];
for (int ridx0 = 0; ridx0 < 2304; ridx0++) {
float acc0 = 0.0f;
float val4 = data0[ridx0];
float val5 = data1[ridx0];
for (int ridx1 = 0; ridx1 < 256; ridx1++) {
float val6 = data4[ridx0+(ridx1*2304)];
acc0 = (val6+acc0);
}
data0[ridx0] = ((val0*val4)+(val1*acc0));
data1[ridx0] = ((val2*val5)+(val3*acc0*acc0));
}
}
void r_256_768_2304(float* data0, const float* data1, const float* data2) {
for (int ridx0 = 0; ridx0 < 256; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 2304; ridx2++) {
float val0 = data1[ridx1+(ridx2*768)];
float val1 = data2[(ridx0*2304)+ridx2];
acc0 = ((val0*val1)+acc0);
}
data0[(ridx0*768)+ridx1] = acc0;
}
}
}
void E_2304(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 2304; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void E_1769472(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 1769472; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void r_49152_4(float* data0, const float* data1) {
for (int ridx0 = 0; ridx0 < 49152; ridx0++) {
float acc0 = 0.0f;
for (int ridx1 = 0; ridx1 < 4; ridx1++) {
float val0 = data1[ridx0+(ridx1*49152)];
acc0 = (val0+acc0);
}
data0[ridx0] = acc0;
}
}
void r_50257_768_256(float* data0, const int* data1, const int* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 50257; ridx0++) {
int val0 = data2[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
for (int ridx2 = 0; ridx2 < 256; ridx2++) {
int val1 = data1[ridx2];
float val2 = data3[ridx1+(ridx2*768)];
acc0 = (((float)((val1==val0))*val2)+acc0);
}
data0[(ridx0*768)+ridx1] = acc0;
}
}
}
void r2_1024_768_64(float* data0, float* data1, const float* data2, const float* data3, const int* data4, const int* data5, const float* data6, const float* data7, const float* data8) {
float val0 = data2[0];
float val1 = data3[0];
float val2 = data7[0];
float val3 = data8[0];
for (int ridx0 = 0; ridx0 < 1024; ridx0++) {
int val4 = data5[ridx0];
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val5 = data0[alu0];
float val6 = data1[alu0];
for (int ridx2 = 0; ridx2 < 64; ridx2++) {
int val7 = data4[ridx2];
float val8 = data6[ridx1+(ridx2*768)];
acc0 = (((float)((val7==val4))*val8)+acc0);
}
data0[alu0] = ((val0*val5)+(val1*acc0));
data1[alu0] = ((val2*val6)+(val3*acc0*acc0));
}
}
}
void r_50257_768_256n1(float* data0, const float* data1, const float* data2, const float* data3) {
for (int ridx0 = 0; ridx0 < 50257; ridx0++) {
for (int ridx1 = 0; ridx1 < 768; ridx1++) {
float acc0 = 0.0f;
int alu0 = ((ridx0*768)+ridx1);
float val0 = data3[alu0];
for (int ridx2 = 0; ridx2 < 256; ridx2++) {
float val1 = data1[ridx1+(ridx2*768)];
float val2 = data2[ridx0+(ridx2*50257)];
acc0 = ((val1*val2)+acc0);
}
data0[alu0] = (acc0+val0);
}
}
}
void E_786432n1(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 786432; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
void E_38597376(float* data0, const float* data1, const float* data2, const float* data3) {
float val0 = data1[0];
float val1 = data2[0];
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) {
float val2 = data0[ridx0];
float val3 = data3[ridx0];
data0[ridx0] = ((val0*val2)+(val1*val3));
}
}
void E_38597376n1(float* data0, const float* data1, const float* data2, const float* data3) {
float val0 = data1[0];
float val1 = data2[0];
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) {
float val2 = data0[ridx0];
float val3 = data3[ridx0];
data0[ridx0] = ((val0*val2)+(val1*val3*val3));
}
}
void E_38597376n2(float* data0, const float* data1, const float* data2, const float* data3, const float* data4, const float* data5) {
float val0 = data1[0];
float val1 = data3[0];
float val2 = data5[0];
for (int ridx0 = 0; ridx0 < 38597376; ridx0++) {
float val3 = data0[ridx0];
float val4 = data2[ridx0];
float val5 = data4[ridx0];
data0[ridx0] = (val3-(val0*((val4/val1)/(sqrt((val5/val2))+1e-08f))));
}
}
int main() {
int* adam_t = (int*)malloc(4);
int* X = (int*)malloc(1024);
int* b2 = (int*)malloc(256);
bool* b3 = (bool*)malloc(4096);
float* h_0_attn_bias = (float*)malloc(4194304);
bool* b5 = (bool*)malloc(4096);
float* h_1_attn_bias = (float*)malloc(4194304);
bool* b7 = (bool*)malloc(4096);
float* h_2_attn_bias = (float*)malloc(4194304);
bool* b9 = (bool*)malloc(4096);
float* h_3_attn_bias = (float*)malloc(4194304);
bool* b11 = (bool*)malloc(4096);
float* h_4_attn_bias = (float*)malloc(4194304);
bool* b13 = (bool*)malloc(4096);
float* h_5_attn_bias = (float*)malloc(4194304);
bool* b15 = (bool*)malloc(4096);
float* h_6_attn_bias = (float*)malloc(4194304);
bool* b17 = (bool*)malloc(4096);
float* h_7_attn_bias = (float*)malloc(4194304);
bool* b19 = (bool*)malloc(4096);
float* h_8_attn_bias = (float*)malloc(4194304);
bool* b21 = (bool*)malloc(4096);
float* h_9_attn_bias = (float*)malloc(4194304);
bool* b23 = (bool*)malloc(4096);
float* h_10_attn_bias = (float*)malloc(4194304);
bool* b25 = (bool*)malloc(4096);
float* h_11_attn_bias = (float*)malloc(4194304);
int* Y = (int*)malloc(1024);
int* b28 = (int*)malloc(201028);
float* b29 = (float*)malloc(4);
float* adam_b1 = (float*)malloc(4);
float* b31 = (float*)malloc(4);
float* adam_b2 = (float*)malloc(4);
float* b33 = (float*)malloc(4);
float* b34 = (float*)malloc(4);
float* b35 = (float*)malloc(196608);
int* wpe_arange = (int*)malloc(4096);
float* wpe_weight = (float*)malloc(3145728);
int* b38 = (int*)malloc(1024);
int* b39 = (int*)malloc(4);
float* b40 = (float*)malloc(786432);
int* wte_arange = (int*)malloc(201028);
float* lm_head_weight = (float*)malloc(154389504);
float* b43 = (float*)malloc(4);
float* b44 = (float*)malloc(1024);
float* b45 = (float*)malloc(1024);
float* b46 = (float*)malloc(1024);
float* b47 = (float*)malloc(786432);
float* h_0_ln_1_weight = (float*)malloc(3072);
float* h_0_ln_1_bias = (float*)malloc(3072);
float* b50 = (float*)malloc(2359296);
float* h_0_attn_c_attn_weight = (float*)malloc(7077888);
float* h_0_attn_c_attn_bias = (float*)malloc(9216);
float* b53 = (float*)malloc(786432);
float* b54 = (float*)malloc(12288);
float* b55 = (float*)malloc(12288);
float* b56 = (float*)malloc(12288);
float* b57 = (float*)malloc(786432);
float* b58 = (float*)malloc(786432);
float* b59 = (float*)malloc(786432);
float* h_0_attn_c_proj_weight = (float*)malloc(2359296);
float* h_0_attn_c_proj_bias = (float*)malloc(3072);
float* b62 = (float*)malloc(1024);
float* b63 = (float*)malloc(1024);
float* b64 = (float*)malloc(1024);
float* b65 = (float*)malloc(786432);
float* h_0_ln_2_weight = (float*)malloc(3072);
float* h_0_ln_2_bias = (float*)malloc(3072);
float* b68 = (float*)malloc(3145728);
float* h_0_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_0_mlp_c_fc_bias = (float*)malloc(12288);
float* b71 = (float*)malloc(3145728);
float* b72 = (float*)malloc(786432);
float* h_0_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_0_mlp_c_proj_bias = (float*)malloc(3072);
float* b75 = (float*)malloc(1024);
float* b76 = (float*)malloc(1024);
float* b77 = (float*)malloc(1024);
float* b78 = (float*)malloc(786432);
float* h_1_ln_1_weight = (float*)malloc(3072);
float* h_1_ln_1_bias = (float*)malloc(3072);
float* b81 = (float*)malloc(2359296);
float* h_1_attn_c_attn_weight = (float*)malloc(7077888);
float* h_1_attn_c_attn_bias = (float*)malloc(9216);
float* b84 = (float*)malloc(786432);
float* b85 = (float*)malloc(12288);
float* b86 = (float*)malloc(12288);
float* b87 = (float*)malloc(12288);
float* b88 = (float*)malloc(786432);
float* b89 = (float*)malloc(786432);
float* b90 = (float*)malloc(786432);
float* h_1_attn_c_proj_weight = (float*)malloc(2359296);
float* h_1_attn_c_proj_bias = (float*)malloc(3072);
float* b93 = (float*)malloc(1024);
float* b94 = (float*)malloc(1024);
float* b95 = (float*)malloc(1024);
float* b96 = (float*)malloc(786432);
float* h_1_ln_2_weight = (float*)malloc(3072);
float* h_1_ln_2_bias = (float*)malloc(3072);
float* b99 = (float*)malloc(3145728);
float* h_1_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_1_mlp_c_fc_bias = (float*)malloc(12288);
float* b102 = (float*)malloc(3145728);
float* b103 = (float*)malloc(786432);
float* h_1_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_1_mlp_c_proj_bias = (float*)malloc(3072);
float* b106 = (float*)malloc(1024);
float* b107 = (float*)malloc(1024);
float* b108 = (float*)malloc(1024);
float* b109 = (float*)malloc(786432);
float* h_2_ln_1_weight = (float*)malloc(3072);
float* h_2_ln_1_bias = (float*)malloc(3072);
float* b112 = (float*)malloc(2359296);
float* h_2_attn_c_attn_weight = (float*)malloc(7077888);
float* h_2_attn_c_attn_bias = (float*)malloc(9216);
float* b115 = (float*)malloc(786432);
float* b116 = (float*)malloc(12288);
float* b117 = (float*)malloc(12288);
float* b118 = (float*)malloc(12288);
float* b119 = (float*)malloc(786432);
float* b120 = (float*)malloc(786432);
float* b121 = (float*)malloc(786432);
float* h_2_attn_c_proj_weight = (float*)malloc(2359296);
float* h_2_attn_c_proj_bias = (float*)malloc(3072);
float* b124 = (float*)malloc(1024);
float* b125 = (float*)malloc(1024);
float* b126 = (float*)malloc(1024);
float* b127 = (float*)malloc(786432);
float* h_2_ln_2_weight = (float*)malloc(3072);
float* h_2_ln_2_bias = (float*)malloc(3072);
float* b130 = (float*)malloc(3145728);
float* h_2_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_2_mlp_c_fc_bias = (float*)malloc(12288);
float* b133 = (float*)malloc(3145728);
float* b134 = (float*)malloc(786432);
float* h_2_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_2_mlp_c_proj_bias = (float*)malloc(3072);
float* b137 = (float*)malloc(1024);
float* b138 = (float*)malloc(1024);
float* b139 = (float*)malloc(1024);
float* b140 = (float*)malloc(786432);
float* h_3_ln_1_weight = (float*)malloc(3072);
float* h_3_ln_1_bias = (float*)malloc(3072);
float* b143 = (float*)malloc(2359296);
float* h_3_attn_c_attn_weight = (float*)malloc(7077888);
float* h_3_attn_c_attn_bias = (float*)malloc(9216);
float* b146 = (float*)malloc(786432);
float* b147 = (float*)malloc(12288);
float* b148 = (float*)malloc(12288);
float* b149 = (float*)malloc(12288);
float* b150 = (float*)malloc(786432);
float* b151 = (float*)malloc(786432);
float* b152 = (float*)malloc(786432);
float* h_3_attn_c_proj_weight = (float*)malloc(2359296);
float* h_3_attn_c_proj_bias = (float*)malloc(3072);
float* b155 = (float*)malloc(1024);
float* b156 = (float*)malloc(1024);
float* b157 = (float*)malloc(1024);
float* b158 = (float*)malloc(786432);
float* h_3_ln_2_weight = (float*)malloc(3072);
float* h_3_ln_2_bias = (float*)malloc(3072);
float* b161 = (float*)malloc(3145728);
float* h_3_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_3_mlp_c_fc_bias = (float*)malloc(12288);
float* b164 = (float*)malloc(3145728);
float* b165 = (float*)malloc(786432);
float* h_3_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_3_mlp_c_proj_bias = (float*)malloc(3072);
float* b168 = (float*)malloc(1024);
float* b169 = (float*)malloc(1024);
float* b170 = (float*)malloc(1024);
float* b171 = (float*)malloc(786432);
float* h_4_ln_1_weight = (float*)malloc(3072);
float* h_4_ln_1_bias = (float*)malloc(3072);
float* b174 = (float*)malloc(2359296);
float* h_4_attn_c_attn_weight = (float*)malloc(7077888);
float* h_4_attn_c_attn_bias = (float*)malloc(9216);
float* b177 = (float*)malloc(786432);
float* b178 = (float*)malloc(12288);
float* b179 = (float*)malloc(12288);
float* b180 = (float*)malloc(12288);
float* b181 = (float*)malloc(786432);
float* b182 = (float*)malloc(786432);
float* b183 = (float*)malloc(786432);
float* h_4_attn_c_proj_weight = (float*)malloc(2359296);
float* h_4_attn_c_proj_bias = (float*)malloc(3072);
float* b186 = (float*)malloc(1024);
float* b187 = (float*)malloc(1024);
float* b188 = (float*)malloc(1024);
float* b189 = (float*)malloc(786432);
float* h_4_ln_2_weight = (float*)malloc(3072);
float* h_4_ln_2_bias = (float*)malloc(3072);
float* b192 = (float*)malloc(3145728);
float* h_4_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_4_mlp_c_fc_bias = (float*)malloc(12288);
float* b195 = (float*)malloc(3145728);
float* b196 = (float*)malloc(786432);
float* h_4_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_4_mlp_c_proj_bias = (float*)malloc(3072);
float* b199 = (float*)malloc(1024);
float* b200 = (float*)malloc(1024);
float* b201 = (float*)malloc(1024);
float* b202 = (float*)malloc(786432);
float* h_5_ln_1_weight = (float*)malloc(3072);
float* h_5_ln_1_bias = (float*)malloc(3072);
float* b205 = (float*)malloc(2359296);
float* h_5_attn_c_attn_weight = (float*)malloc(7077888);
float* h_5_attn_c_attn_bias = (float*)malloc(9216);
float* b208 = (float*)malloc(786432);
float* b209 = (float*)malloc(12288);
float* b210 = (float*)malloc(12288);
float* b211 = (float*)malloc(12288);
float* b212 = (float*)malloc(786432);
float* b213 = (float*)malloc(786432);
float* b214 = (float*)malloc(786432);
float* h_5_attn_c_proj_weight = (float*)malloc(2359296);
float* h_5_attn_c_proj_bias = (float*)malloc(3072);
float* b217 = (float*)malloc(1024);
float* b218 = (float*)malloc(1024);
float* b219 = (float*)malloc(1024);
float* b220 = (float*)malloc(786432);
float* h_5_ln_2_weight = (float*)malloc(3072);
float* h_5_ln_2_bias = (float*)malloc(3072);
float* b223 = (float*)malloc(3145728);
float* h_5_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_5_mlp_c_fc_bias = (float*)malloc(12288);
float* b226 = (float*)malloc(3145728);
float* b227 = (float*)malloc(786432);
float* h_5_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_5_mlp_c_proj_bias = (float*)malloc(3072);
float* b230 = (float*)malloc(1024);
float* b231 = (float*)malloc(1024);
float* b232 = (float*)malloc(1024);
float* b233 = (float*)malloc(786432);
float* h_6_ln_1_weight = (float*)malloc(3072);
float* h_6_ln_1_bias = (float*)malloc(3072);
float* b236 = (float*)malloc(2359296);
float* h_6_attn_c_attn_weight = (float*)malloc(7077888);
float* h_6_attn_c_attn_bias = (float*)malloc(9216);
float* b239 = (float*)malloc(786432);
float* b240 = (float*)malloc(12288);
float* b241 = (float*)malloc(12288);
float* b242 = (float*)malloc(12288);
float* b243 = (float*)malloc(786432);
float* b244 = (float*)malloc(786432);
float* b245 = (float*)malloc(786432);
float* h_6_attn_c_proj_weight = (float*)malloc(2359296);
float* h_6_attn_c_proj_bias = (float*)malloc(3072);
float* b248 = (float*)malloc(1024);
float* b249 = (float*)malloc(1024);
float* b250 = (float*)malloc(1024);
float* b251 = (float*)malloc(786432);
float* h_6_ln_2_weight = (float*)malloc(3072);
float* h_6_ln_2_bias = (float*)malloc(3072);
float* b254 = (float*)malloc(3145728);
float* h_6_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_6_mlp_c_fc_bias = (float*)malloc(12288);
float* b257 = (float*)malloc(3145728);
float* b258 = (float*)malloc(786432);
float* h_6_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_6_mlp_c_proj_bias = (float*)malloc(3072);
float* b261 = (float*)malloc(1024);
float* b262 = (float*)malloc(1024);
float* b263 = (float*)malloc(1024);
float* b264 = (float*)malloc(786432);
float* h_7_ln_1_weight = (float*)malloc(3072);
float* h_7_ln_1_bias = (float*)malloc(3072);
float* b267 = (float*)malloc(2359296);
float* h_7_attn_c_attn_weight = (float*)malloc(7077888);
float* h_7_attn_c_attn_bias = (float*)malloc(9216);
float* b270 = (float*)malloc(786432);
float* b271 = (float*)malloc(12288);
float* b272 = (float*)malloc(12288);
float* b273 = (float*)malloc(12288);
float* b274 = (float*)malloc(786432);
float* b275 = (float*)malloc(786432);
float* b276 = (float*)malloc(786432);
float* h_7_attn_c_proj_weight = (float*)malloc(2359296);
float* h_7_attn_c_proj_bias = (float*)malloc(3072);
float* b279 = (float*)malloc(1024);
float* b280 = (float*)malloc(1024);
float* b281 = (float*)malloc(1024);
float* b282 = (float*)malloc(786432);
float* h_7_ln_2_weight = (float*)malloc(3072);
float* h_7_ln_2_bias = (float*)malloc(3072);
float* b285 = (float*)malloc(3145728);
float* h_7_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_7_mlp_c_fc_bias = (float*)malloc(12288);
float* b288 = (float*)malloc(3145728);
float* b289 = (float*)malloc(786432);
float* h_7_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_7_mlp_c_proj_bias = (float*)malloc(3072);
float* b292 = (float*)malloc(1024);
float* b293 = (float*)malloc(1024);
float* b294 = (float*)malloc(1024);
float* b295 = (float*)malloc(786432);
float* h_8_ln_1_weight = (float*)malloc(3072);
float* h_8_ln_1_bias = (float*)malloc(3072);
float* b298 = (float*)malloc(2359296);
float* h_8_attn_c_attn_weight = (float*)malloc(7077888);
float* h_8_attn_c_attn_bias = (float*)malloc(9216);
float* b301 = (float*)malloc(786432);
float* b302 = (float*)malloc(12288);
float* b303 = (float*)malloc(12288);
float* b304 = (float*)malloc(12288);
float* b305 = (float*)malloc(786432);
float* b306 = (float*)malloc(786432);
float* b307 = (float*)malloc(786432);
float* h_8_attn_c_proj_weight = (float*)malloc(2359296);
float* h_8_attn_c_proj_bias = (float*)malloc(3072);
float* b310 = (float*)malloc(1024);
float* b311 = (float*)malloc(1024);
float* b312 = (float*)malloc(1024);
float* b313 = (float*)malloc(786432);
float* h_8_ln_2_weight = (float*)malloc(3072);
float* h_8_ln_2_bias = (float*)malloc(3072);
float* b316 = (float*)malloc(3145728);
float* h_8_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_8_mlp_c_fc_bias = (float*)malloc(12288);
float* b319 = (float*)malloc(3145728);
float* b320 = (float*)malloc(786432);
float* h_8_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_8_mlp_c_proj_bias = (float*)malloc(3072);
float* b323 = (float*)malloc(1024);
float* b324 = (float*)malloc(1024);
float* b325 = (float*)malloc(1024);
float* b326 = (float*)malloc(786432);
float* h_9_ln_1_weight = (float*)malloc(3072);
float* h_9_ln_1_bias = (float*)malloc(3072);
float* b329 = (float*)malloc(2359296);
float* h_9_attn_c_attn_weight = (float*)malloc(7077888);
float* h_9_attn_c_attn_bias = (float*)malloc(9216);
float* b332 = (float*)malloc(786432);
float* b333 = (float*)malloc(12288);
float* b334 = (float*)malloc(12288);
float* b335 = (float*)malloc(12288);
float* b336 = (float*)malloc(786432);
float* b337 = (float*)malloc(786432);
float* b338 = (float*)malloc(786432);
float* h_9_attn_c_proj_weight = (float*)malloc(2359296);
float* h_9_attn_c_proj_bias = (float*)malloc(3072);
float* b341 = (float*)malloc(1024);
float* b342 = (float*)malloc(1024);
float* b343 = (float*)malloc(1024);
float* b344 = (float*)malloc(786432);
float* h_9_ln_2_weight = (float*)malloc(3072);
float* h_9_ln_2_bias = (float*)malloc(3072);
float* b347 = (float*)malloc(3145728);
float* h_9_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_9_mlp_c_fc_bias = (float*)malloc(12288);
float* b350 = (float*)malloc(3145728);
float* b351 = (float*)malloc(786432);
float* h_9_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_9_mlp_c_proj_bias = (float*)malloc(3072);
float* b354 = (float*)malloc(1024);
float* b355 = (float*)malloc(1024);
float* b356 = (float*)malloc(1024);
float* b357 = (float*)malloc(786432);
float* h_10_ln_1_weight = (float*)malloc(3072);
float* h_10_ln_1_bias = (float*)malloc(3072);
float* b360 = (float*)malloc(2359296);
float* h_10_attn_c_attn_weight = (float*)malloc(7077888);
float* h_10_attn_c_attn_bias = (float*)malloc(9216);
float* b363 = (float*)malloc(786432);
float* b364 = (float*)malloc(12288);
float* b365 = (float*)malloc(12288);
float* b366 = (float*)malloc(12288);
float* b367 = (float*)malloc(786432);
float* b368 = (float*)malloc(786432);
float* b369 = (float*)malloc(786432);
float* h_10_attn_c_proj_weight = (float*)malloc(2359296);
float* h_10_attn_c_proj_bias = (float*)malloc(3072);
float* b372 = (float*)malloc(1024);
float* b373 = (float*)malloc(1024);
float* b374 = (float*)malloc(1024);
float* b375 = (float*)malloc(786432);
float* h_10_ln_2_weight = (float*)malloc(3072);
float* h_10_ln_2_bias = (float*)malloc(3072);
float* b378 = (float*)malloc(3145728);
float* h_10_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_10_mlp_c_fc_bias = (float*)malloc(12288);
float* b381 = (float*)malloc(3145728);
float* b382 = (float*)malloc(786432);
float* h_10_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_10_mlp_c_proj_bias = (float*)malloc(3072);
float* b385 = (float*)malloc(1024);
float* b386 = (float*)malloc(1024);
float* b387 = (float*)malloc(1024);
float* b388 = (float*)malloc(786432);
float* h_11_ln_1_weight = (float*)malloc(3072);
float* h_11_ln_1_bias = (float*)malloc(3072);
float* b391 = (float*)malloc(2359296);
float* h_11_attn_c_attn_weight = (float*)malloc(7077888);
float* h_11_attn_c_attn_bias = (float*)malloc(9216);
float* b394 = (float*)malloc(786432);
float* b395 = (float*)malloc(12288);
float* b396 = (float*)malloc(12288);
float* b397 = (float*)malloc(12288);
float* b398 = (float*)malloc(786432);
float* b399 = (float*)malloc(786432);
float* b400 = (float*)malloc(786432);
float* h_11_attn_c_proj_weight = (float*)malloc(2359296);
float* h_11_attn_c_proj_bias = (float*)malloc(3072);
float* b403 = (float*)malloc(1024);
float* b404 = (float*)malloc(1024);
float* b405 = (float*)malloc(1024);
float* b406 = (float*)malloc(786432);
float* h_11_ln_2_weight = (float*)malloc(3072);
float* h_11_ln_2_bias = (float*)malloc(3072);
float* b409 = (float*)malloc(3145728);
float* h_11_mlp_c_fc_weight = (float*)malloc(9437184);
float* h_11_mlp_c_fc_bias = (float*)malloc(12288);
float* b412 = (float*)malloc(3145728);
float* b413 = (float*)malloc(786432);
float* h_11_mlp_c_proj_weight = (float*)malloc(9437184);
float* h_11_mlp_c_proj_bias = (float*)malloc(3072);
float* b416 = (float*)malloc(1024);
float* b417 = (float*)malloc(1024);
float* b418 = (float*)malloc(1024);
float* b419 = (float*)malloc(786432);
float* ln_f_weight = (float*)malloc(3072);
float* ln_f_bias = (float*)malloc(3072);
float* b422 = (float*)malloc(51463168);
float* b423 = (float*)malloc(1024);
float* b424 = (float*)malloc(1024);
float* b425 = (float*)malloc(1024);
float* b426 = (float*)malloc(1024);
float* b427 = (float*)malloc(1024);
float* loss = (float*)malloc(4);
float* b429 = (float*)malloc(51463168);
float* b430 = (float*)malloc(786432);
float* adam_m_ln_f_weight = (float*)malloc(3072);
float* adam_v_ln_f_weight = (float*)malloc(3072);
float* adam_m_ln_f_bias = (float*)malloc(3072);
float* adam_v_ln_f_bias = (float*)malloc(3072);
float* adam_lr = (float*)malloc(4);
float* b436 = (float*)malloc(786432);
float* adam_m_h_11_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_11_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_11_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_11_mlp_c_proj_bias = (float*)malloc(3072);
float* b441 = (float*)malloc(3145728);
float* adam_m_h_11_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_11_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_11_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_11_mlp_c_fc_bias = (float*)malloc(12288);
float* b446 = (float*)malloc(786432);
float* adam_m_h_11_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_11_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_11_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_11_ln_2_bias = (float*)malloc(3072);
float* b451 = (float*)malloc(786432);
float* adam_m_h_11_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_11_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_11_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_11_attn_c_proj_bias = (float*)malloc(3072);
float* b456 = (float*)malloc(786432);
float* b457 = (float*)malloc(786432);
float* b458 = (float*)malloc(12288);
float* b459 = (float*)malloc(12288);
float* b460 = (float*)malloc(2359296);
float* adam_m_h_11_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_11_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_11_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_11_attn_c_attn_bias = (float*)malloc(9216);
float* b465 = (float*)malloc(786432);
float* adam_m_h_11_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_11_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_11_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_11_ln_1_bias = (float*)malloc(3072);
float* b470 = (float*)malloc(786432);
float* adam_m_h_10_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_10_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_10_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_10_mlp_c_proj_bias = (float*)malloc(3072);
float* b475 = (float*)malloc(3145728);
float* adam_m_h_10_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_10_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_10_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_10_mlp_c_fc_bias = (float*)malloc(12288);
float* b480 = (float*)malloc(786432);
float* adam_m_h_10_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_10_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_10_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_10_ln_2_bias = (float*)malloc(3072);
float* b485 = (float*)malloc(786432);
float* adam_m_h_10_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_10_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_10_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_10_attn_c_proj_bias = (float*)malloc(3072);
float* b490 = (float*)malloc(2359296);
float* adam_m_h_10_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_10_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_10_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_10_attn_c_attn_bias = (float*)malloc(9216);
float* b495 = (float*)malloc(786432);
float* adam_m_h_10_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_10_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_10_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_10_ln_1_bias = (float*)malloc(3072);
float* b500 = (float*)malloc(786432);
float* adam_m_h_9_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_9_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_9_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_9_mlp_c_proj_bias = (float*)malloc(3072);
float* b505 = (float*)malloc(3145728);
float* adam_m_h_9_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_9_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_9_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_9_mlp_c_fc_bias = (float*)malloc(12288);
float* b510 = (float*)malloc(786432);
float* adam_m_h_9_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_9_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_9_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_9_ln_2_bias = (float*)malloc(3072);
float* b515 = (float*)malloc(786432);
float* adam_m_h_9_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_9_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_9_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_9_attn_c_proj_bias = (float*)malloc(3072);
float* b520 = (float*)malloc(2359296);
float* adam_m_h_9_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_9_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_9_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_9_attn_c_attn_bias = (float*)malloc(9216);
float* b525 = (float*)malloc(786432);
float* adam_m_h_9_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_9_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_9_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_9_ln_1_bias = (float*)malloc(3072);
float* b530 = (float*)malloc(786432);
float* adam_m_h_8_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_8_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_8_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_8_mlp_c_proj_bias = (float*)malloc(3072);
float* b535 = (float*)malloc(3145728);
float* adam_m_h_8_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_8_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_8_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_8_mlp_c_fc_bias = (float*)malloc(12288);
float* b540 = (float*)malloc(786432);
float* adam_m_h_8_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_8_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_8_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_8_ln_2_bias = (float*)malloc(3072);
float* b545 = (float*)malloc(786432);
float* adam_m_h_8_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_8_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_8_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_8_attn_c_proj_bias = (float*)malloc(3072);
float* b550 = (float*)malloc(2359296);
float* adam_m_h_8_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_8_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_8_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_8_attn_c_attn_bias = (float*)malloc(9216);
float* b555 = (float*)malloc(786432);
float* adam_m_h_8_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_8_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_8_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_8_ln_1_bias = (float*)malloc(3072);
float* b560 = (float*)malloc(786432);
float* adam_m_h_7_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_7_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_7_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_7_mlp_c_proj_bias = (float*)malloc(3072);
float* b565 = (float*)malloc(3145728);
float* adam_m_h_7_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_7_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_7_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_7_mlp_c_fc_bias = (float*)malloc(12288);
float* b570 = (float*)malloc(786432);
float* adam_m_h_7_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_7_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_7_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_7_ln_2_bias = (float*)malloc(3072);
float* b575 = (float*)malloc(786432);
float* adam_m_h_7_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_7_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_7_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_7_attn_c_proj_bias = (float*)malloc(3072);
float* b580 = (float*)malloc(2359296);
float* adam_m_h_7_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_7_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_7_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_7_attn_c_attn_bias = (float*)malloc(9216);
float* b585 = (float*)malloc(786432);
float* adam_m_h_7_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_7_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_7_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_7_ln_1_bias = (float*)malloc(3072);
float* b590 = (float*)malloc(786432);
float* adam_m_h_6_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_6_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_6_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_6_mlp_c_proj_bias = (float*)malloc(3072);
float* b595 = (float*)malloc(3145728);
float* adam_m_h_6_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_6_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_6_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_6_mlp_c_fc_bias = (float*)malloc(12288);
float* b600 = (float*)malloc(786432);
float* adam_m_h_6_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_6_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_6_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_6_ln_2_bias = (float*)malloc(3072);
float* b605 = (float*)malloc(786432);
float* adam_m_h_6_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_6_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_6_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_6_attn_c_proj_bias = (float*)malloc(3072);
float* b610 = (float*)malloc(2359296);
float* adam_m_h_6_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_6_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_6_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_6_attn_c_attn_bias = (float*)malloc(9216);
float* b615 = (float*)malloc(786432);
float* adam_m_h_6_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_6_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_6_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_6_ln_1_bias = (float*)malloc(3072);
float* b620 = (float*)malloc(786432);
float* adam_m_h_5_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_5_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_5_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_5_mlp_c_proj_bias = (float*)malloc(3072);
float* b625 = (float*)malloc(3145728);
float* adam_m_h_5_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_5_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_5_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_5_mlp_c_fc_bias = (float*)malloc(12288);
float* b630 = (float*)malloc(786432);
float* adam_m_h_5_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_5_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_5_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_5_ln_2_bias = (float*)malloc(3072);
float* b635 = (float*)malloc(786432);
float* adam_m_h_5_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_5_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_5_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_5_attn_c_proj_bias = (float*)malloc(3072);
float* b640 = (float*)malloc(2359296);
float* adam_m_h_5_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_5_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_5_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_5_attn_c_attn_bias = (float*)malloc(9216);
float* b645 = (float*)malloc(786432);
float* adam_m_h_5_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_5_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_5_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_5_ln_1_bias = (float*)malloc(3072);
float* b650 = (float*)malloc(786432);
float* adam_m_h_4_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_4_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_4_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_4_mlp_c_proj_bias = (float*)malloc(3072);
float* b655 = (float*)malloc(3145728);
float* adam_m_h_4_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_4_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_4_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_4_mlp_c_fc_bias = (float*)malloc(12288);
float* b660 = (float*)malloc(786432);
float* adam_m_h_4_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_4_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_4_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_4_ln_2_bias = (float*)malloc(3072);
float* b665 = (float*)malloc(786432);
float* adam_m_h_4_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_4_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_4_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_4_attn_c_proj_bias = (float*)malloc(3072);
float* b670 = (float*)malloc(2359296);
float* adam_m_h_4_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_4_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_4_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_4_attn_c_attn_bias = (float*)malloc(9216);
float* b675 = (float*)malloc(786432);
float* adam_m_h_4_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_4_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_4_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_4_ln_1_bias = (float*)malloc(3072);
float* b680 = (float*)malloc(786432);
float* adam_m_h_3_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_3_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_3_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_3_mlp_c_proj_bias = (float*)malloc(3072);
float* b685 = (float*)malloc(3145728);
float* adam_m_h_3_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_3_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_3_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_3_mlp_c_fc_bias = (float*)malloc(12288);
float* b690 = (float*)malloc(786432);
float* adam_m_h_3_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_3_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_3_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_3_ln_2_bias = (float*)malloc(3072);
float* b695 = (float*)malloc(786432);
float* adam_m_h_3_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_3_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_3_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_3_attn_c_proj_bias = (float*)malloc(3072);
float* b700 = (float*)malloc(2359296);
float* adam_m_h_3_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_3_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_3_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_3_attn_c_attn_bias = (float*)malloc(9216);
float* b705 = (float*)malloc(786432);
float* adam_m_h_3_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_3_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_3_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_3_ln_1_bias = (float*)malloc(3072);
float* b710 = (float*)malloc(786432);
float* adam_m_h_2_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_2_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_2_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_2_mlp_c_proj_bias = (float*)malloc(3072);
float* b715 = (float*)malloc(3145728);
float* adam_m_h_2_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_2_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_2_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_2_mlp_c_fc_bias = (float*)malloc(12288);
float* b720 = (float*)malloc(786432);
float* adam_m_h_2_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_2_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_2_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_2_ln_2_bias = (float*)malloc(3072);
float* b725 = (float*)malloc(786432);
float* adam_m_h_2_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_2_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_2_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_2_attn_c_proj_bias = (float*)malloc(3072);
float* b730 = (float*)malloc(2359296);
float* adam_m_h_2_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_2_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_2_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_2_attn_c_attn_bias = (float*)malloc(9216);
float* b735 = (float*)malloc(786432);
float* adam_m_h_2_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_2_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_2_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_2_ln_1_bias = (float*)malloc(3072);
float* b740 = (float*)malloc(786432);
float* adam_m_h_1_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_1_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_1_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_1_mlp_c_proj_bias = (float*)malloc(3072);
float* b745 = (float*)malloc(3145728);
float* adam_m_h_1_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_1_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_1_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_1_mlp_c_fc_bias = (float*)malloc(12288);
float* b750 = (float*)malloc(786432);
float* adam_m_h_1_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_1_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_1_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_1_ln_2_bias = (float*)malloc(3072);
float* b755 = (float*)malloc(786432);
float* adam_m_h_1_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_1_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_1_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_1_attn_c_proj_bias = (float*)malloc(3072);
float* b760 = (float*)malloc(2359296);
float* adam_m_h_1_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_1_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_1_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_1_attn_c_attn_bias = (float*)malloc(9216);
float* b765 = (float*)malloc(786432);
float* adam_m_h_1_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_1_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_1_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_1_ln_1_bias = (float*)malloc(3072);
float* b770 = (float*)malloc(786432);
float* adam_m_h_0_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_v_h_0_mlp_c_proj_weight = (float*)malloc(9437184);
float* adam_m_h_0_mlp_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_0_mlp_c_proj_bias = (float*)malloc(3072);
float* b775 = (float*)malloc(3145728);
float* adam_m_h_0_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_v_h_0_mlp_c_fc_weight = (float*)malloc(9437184);
float* adam_m_h_0_mlp_c_fc_bias = (float*)malloc(12288);
float* adam_v_h_0_mlp_c_fc_bias = (float*)malloc(12288);
float* b780 = (float*)malloc(786432);
float* adam_m_h_0_ln_2_weight = (float*)malloc(3072);
float* adam_v_h_0_ln_2_weight = (float*)malloc(3072);
float* adam_m_h_0_ln_2_bias = (float*)malloc(3072);
float* adam_v_h_0_ln_2_bias = (float*)malloc(3072);
float* b785 = (float*)malloc(786432);
float* adam_m_h_0_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_v_h_0_attn_c_proj_weight = (float*)malloc(2359296);
float* adam_m_h_0_attn_c_proj_bias = (float*)malloc(3072);
float* adam_v_h_0_attn_c_proj_bias = (float*)malloc(3072);
float* b790 = (float*)malloc(2359296);
float* adam_m_h_0_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_v_h_0_attn_c_attn_weight = (float*)malloc(7077888);
float* adam_m_h_0_attn_c_attn_bias = (float*)malloc(9216);
float* adam_v_h_0_attn_c_attn_bias = (float*)malloc(9216);
float* b795 = (float*)malloc(786432);
float* adam_m_h_0_ln_1_weight = (float*)malloc(3072);
float* adam_v_h_0_ln_1_weight = (float*)malloc(3072);
float* adam_m_h_0_ln_1_bias = (float*)malloc(3072);
float* adam_v_h_0_ln_1_bias = (float*)malloc(3072);
float* b800 = (float*)malloc(196608);
float* b801 = (float*)malloc(154389504);
float* adam_m_wpe_weight = (float*)malloc(3145728);
float* adam_v_wpe_weight = (float*)malloc(3145728);
float* grad_lm_head_weight = (float*)malloc(154389504);
float* adam_m_lm_head_weight = (float*)malloc(154389504);
float* adam_v_lm_head_weight = (float*)malloc(154389504);
E_(adam_t);
r_64_64(b2);
E_64_64(b3, h_0_attn_bias);
E_64_64(b5, h_1_attn_bias);
E_64_64(b7, h_2_attn_bias);
E_64_64(b9, h_3_attn_bias);
E_64_64(b11, h_4_attn_bias);
E_64_64(b13, h_5_attn_bias);
E_64_64(b15, h_6_attn_bias);
E_64_64(b17, h_7_attn_bias);
E_64_64(b19, h_8_attn_bias);
E_64_64(b21, h_9_attn_bias);
E_64_64(b23, h_10_attn_bias);
E_64_64(b25, h_11_attn_bias);
r_50257_50257(b28);
E_n1(b29, adam_b1);
E_n1(b31, adam_b2);
E_n2(b33, adam_b1, adam_t);
E_n2(b34, adam_b2, adam_t);
r_64_768_1024(b35, b2, wpe_arange, wpe_weight);
E_256(b38, Y);
r_256(b39, Y);
r_4_64_768_50257(b40, X, wte_arange, lm_head_weight, b35);
E_n3(b43, b39);
r_256_768(b44, b40);
r_256_768n1(b45, b40, b44);
E_256n1(b46, b45);
E_256_768(b47, b40, b44, b46, h_0_ln_1_weight, h_0_ln_1_bias);
r_256_2304_768(b50, b47, h_0_attn_c_attn_weight, h_0_attn_c_attn_bias);
r_4_12_64_64_64(b53, b3, b50);
r_3072_64(b54, b53);
r_3072_64n1(b55, b53, b54);
r_3072_64n2(b56, b53, b54);
E_3072_64(b57, b53, b54, b55);
r_4_12_64_64_64n1(b58, b57, b50);
r_4_64_768_768(b59, b40, b58, h_0_attn_c_proj_weight, h_0_attn_c_proj_bias);
r_256_768(b62, b59);
r_256_768n1(b63, b59, b62);
E_256n1(b64, b63);
E_256_768(b65, b59, b62, b64, h_0_ln_2_weight, h_0_ln_2_bias);
r_256_3072_768(b68, b65, h_0_mlp_c_fc_weight, h_0_mlp_c_fc_bias);
E_786432(b71, b68);
r_256_768_3072(b72, b59, b71, h_0_mlp_c_proj_weight, h_0_mlp_c_proj_bias);
r_256_768(b75, b72);
r_256_768n1(b76, b72, b75);
E_256n1(b77, b76);
E_256_768(b78, b72, b75, b77, h_1_ln_1_weight, h_1_ln_1_bias);
r_256_2304_768(b81, b78, h_1_attn_c_attn_weight, h_1_attn_c_attn_bias);
r_4_12_64_64_64(b84, b5, b81);
r_3072_64(b85, b84);
r_3072_64n1(b86, b84, b85);
r_3072_64n2(b87, b84, b85);
E_3072_64(b88, b84, b85, b86);
r_4_12_64_64_64n1(b89, b88, b81);
r_4_64_768_768(b90, b72, b89, h_1_attn_c_proj_weight, h_1_attn_c_proj_bias);
r_256_768(b93, b90);
r_256_768n1(b94, b90, b93);
E_256n1(b95, b94);
E_256_768(b96, b90, b93, b95, h_1_ln_2_weight, h_1_ln_2_bias);
r_256_3072_768(b99, b96, h_1_mlp_c_fc_weight, h_1_mlp_c_fc_bias);
E_786432(b102, b99);
r_256_768_3072(b103, b90, b102, h_1_mlp_c_proj_weight, h_1_mlp_c_proj_bias);
r_256_768(b106, b103);
r_256_768n1(b107, b103, b106);
E_256n1(b108, b107);
E_256_768(b109, b103, b106, b108, h_2_ln_1_weight, h_2_ln_1_bias);
r_256_2304_768(b112, b109, h_2_attn_c_attn_weight, h_2_attn_c_attn_bias);
r_4_12_64_64_64(b115, b7, b112);
r_3072_64(b116, b115);
r_3072_64n1(b117, b115, b116);
r_3072_64n2(b118, b115, b116);
E_3072_64(b119, b115, b116, b117);
r_4_12_64_64_64n1(b120, b119, b112);
r_4_64_768_768(b121, b103, b120, h_2_attn_c_proj_weight, h_2_attn_c_proj_bias);
r_256_768(b124, b121);
r_256_768n1(b125, b121, b124);
E_256n1(b126, b125);
E_256_768(b127, b121, b124, b126, h_2_ln_2_weight, h_2_ln_2_bias);
r_256_3072_768(b130, b127, h_2_mlp_c_fc_weight, h_2_mlp_c_fc_bias);
E_786432(b133, b130);
r_256_768_3072(b134, b121, b133, h_2_mlp_c_proj_weight, h_2_mlp_c_proj_bias);
r_256_768(b137, b134);
r_256_768n1(b138, b134, b137);
E_256n1(b139, b138);
E_256_768(b140, b134, b137, b139, h_3_ln_1_weight, h_3_ln_1_bias);
r_256_2304_768(b143, b140, h_3_attn_c_attn_weight, h_3_attn_c_attn_bias);
r_4_12_64_64_64(b146, b9, b143);
r_3072_64(b147, b146);
r_3072_64n1(b148, b146, b147);
r_3072_64n2(b149, b146, b147);
E_3072_64(b150, b146, b147, b148);
r_4_12_64_64_64n1(b151, b150, b143);
r_4_64_768_768(b152, b134, b151, h_3_attn_c_proj_weight, h_3_attn_c_proj_bias);
r_256_768(b155, b152);
r_256_768n1(b156, b152, b155);
E_256n1(b157, b156);
E_256_768(b158, b152, b155, b157, h_3_ln_2_weight, h_3_ln_2_bias);
r_256_3072_768(b161, b158, h_3_mlp_c_fc_weight, h_3_mlp_c_fc_bias);
E_786432(b164, b161);
r_256_768_3072(b165, b152, b164, h_3_mlp_c_proj_weight, h_3_mlp_c_proj_bias);
r_256_768(b168, b165);
r_256_768n1(b169, b165, b168);
E_256n1(b170, b169);
E_256_768(b171, b165, b168, b170, h_4_ln_1_weight, h_4_ln_1_bias);
r_256_2304_768(b174, b171, h_4_attn_c_attn_weight, h_4_attn_c_attn_bias);
r_4_12_64_64_64(b177, b11, b174);
r_3072_64(b178, b177);
r_3072_64n1(b179, b177, b178);
r_3072_64n2(b180, b177, b178);
E_3072_64(b181, b177, b178, b179);
r_4_12_64_64_64n1(b182, b181, b174);
r_4_64_768_768(b183, b165, b182, h_4_attn_c_proj_weight, h_4_attn_c_proj_bias);
r_256_768(b186, b183);
r_256_768n1(b187, b183, b186);
E_256n1(b188, b187);
E_256_768(b189, b183, b186, b188, h_4_ln_2_weight, h_4_ln_2_bias);
r_256_3072_768(b192, b189, h_4_mlp_c_fc_weight, h_4_mlp_c_fc_bias);
E_786432(b195, b192);
r_256_768_3072(b196, b183, b195, h_4_mlp_c_proj_weight, h_4_mlp_c_proj_bias);
r_256_768(b199, b196);
r_256_768n1(b200, b196, b199);
E_256n1(b201, b200);
E_256_768(b202, b196, b199, b201, h_5_ln_1_weight, h_5_ln_1_bias);
r_256_2304_768(b205, b202, h_5_attn_c_attn_weight, h_5_attn_c_attn_bias);
r_4_12_64_64_64(b208, b13, b205);
r_3072_64(b209, b208);
r_3072_64n1(b210, b208, b209);
r_3072_64n2(b211, b208, b209);
E_3072_64(b212, b208, b209, b210);
r_4_12_64_64_64n1(b213, b212, b205);
r_4_64_768_768(b214, b196, b213, h_5_attn_c_proj_weight, h_5_attn_c_proj_bias);
r_256_768(b217, b214);
r_256_768n1(b218, b214, b217);
E_256n1(b219, b218);
E_256_768(b220, b214, b217, b219, h_5_ln_2_weight, h_5_ln_2_bias);
r_256_3072_768(b223, b220, h_5_mlp_c_fc_weight, h_5_mlp_c_fc_bias);
E_786432(b226, b223);
r_256_768_3072(b227, b214, b226, h_5_mlp_c_proj_weight, h_5_mlp_c_proj_bias);
r_256_768(b230, b227);
r_256_768n1(b231, b227, b230);
E_256n1(b232, b231);
E_256_768(b233, b227, b230, b232, h_6_ln_1_weight, h_6_ln_1_bias);
r_256_2304_768(b236, b233, h_6_attn_c_attn_weight, h_6_attn_c_attn_bias);
r_4_12_64_64_64(b239, b15, b236);
r_3072_64(b240, b239);
r_3072_64n1(b241, b239, b240);
r_3072_64n2(b242, b239, b240);
E_3072_64(b243, b239, b240, b241);
r_4_12_64_64_64n1(b244, b243, b236);
r_4_64_768_768(b245, b227, b244, h_6_attn_c_proj_weight, h_6_attn_c_proj_bias);
r_256_768(b248, b245);
r_256_768n1(b249, b245, b248);
E_256n1(b250, b249);
E_256_768(b251, b245, b248, b250, h_6_ln_2_weight, h_6_ln_2_bias);
r_256_3072_768(b254, b251, h_6_mlp_c_fc_weight, h_6_mlp_c_fc_bias);
E_786432(b257, b254);
r_256_768_3072(b258, b245, b257, h_6_mlp_c_proj_weight, h_6_mlp_c_proj_bias);
r_256_768(b261, b258);
r_256_768n1(b262, b258, b261);
E_256n1(b263, b262);
E_256_768(b264, b258, b261, b263, h_7_ln_1_weight, h_7_ln_1_bias);
r_256_2304_768(b267, b264, h_7_attn_c_attn_weight, h_7_attn_c_attn_bias);
r_4_12_64_64_64(b270, b17, b267);
r_3072_64(b271, b270);
r_3072_64n1(b272, b270, b271);
r_3072_64n2(b273, b270, b271);
E_3072_64(b274, b270, b271, b272);
r_4_12_64_64_64n1(b275, b274, b267);
r_4_64_768_768(b276, b258, b275, h_7_attn_c_proj_weight, h_7_attn_c_proj_bias);
r_256_768(b279, b276);
r_256_768n1(b280, b276, b279);
E_256n1(b281, b280);
E_256_768(b282, b276, b279, b281, h_7_ln_2_weight, h_7_ln_2_bias);
r_256_3072_768(b285, b282, h_7_mlp_c_fc_weight, h_7_mlp_c_fc_bias);
E_786432(b288, b285);
r_256_768_3072(b289, b276, b288, h_7_mlp_c_proj_weight, h_7_mlp_c_proj_bias);
r_256_768(b292, b289);
r_256_768n1(b293, b289, b292);
E_256n1(b294, b293);
E_256_768(b295, b289, b292, b294, h_8_ln_1_weight, h_8_ln_1_bias);
r_256_2304_768(b298, b295, h_8_attn_c_attn_weight, h_8_attn_c_attn_bias);
r_4_12_64_64_64(b301, b19, b298);
r_3072_64(b302, b301);
r_3072_64n1(b303, b301, b302);
r_3072_64n2(b304, b301, b302);
E_3072_64(b305, b301, b302, b303);
r_4_12_64_64_64n1(b306, b305, b298);
r_4_64_768_768(b307, b289, b306, h_8_attn_c_proj_weight, h_8_attn_c_proj_bias);
r_256_768(b310, b307);
r_256_768n1(b311, b307, b310);
E_256n1(b312, b311);
E_256_768(b313, b307, b310, b312, h_8_ln_2_weight, h_8_ln_2_bias);
r_256_3072_768(b316, b313, h_8_mlp_c_fc_weight, h_8_mlp_c_fc_bias);
E_786432(b319, b316);
r_256_768_3072(b320, b307, b319, h_8_mlp_c_proj_weight, h_8_mlp_c_proj_bias);
r_256_768(b323, b320);
r_256_768n1(b324, b320, b323);
E_256n1(b325, b324);
E_256_768(b326, b320, b323, b325, h_9_ln_1_weight, h_9_ln_1_bias);
r_256_2304_768(b329, b326, h_9_attn_c_attn_weight, h_9_attn_c_attn_bias);
r_4_12_64_64_64(b332, b21, b329);
r_3072_64(b333, b332);
r_3072_64n1(b334, b332, b333);
r_3072_64n2(b335, b332, b333);
E_3072_64(b336, b332, b333, b334);
r_4_12_64_64_64n1(b337, b336, b329);
r_4_64_768_768(b338, b320, b337, h_9_attn_c_proj_weight, h_9_attn_c_proj_bias);
r_256_768(b341, b338);
r_256_768n1(b342, b338, b341);
E_256n1(b343, b342);
E_256_768(b344, b338, b341, b343, h_9_ln_2_weight, h_9_ln_2_bias);
r_256_3072_768(b347, b344, h_9_mlp_c_fc_weight, h_9_mlp_c_fc_bias);
E_786432(b350, b347);
r_256_768_3072(b351, b338, b350, h_9_mlp_c_proj_weight, h_9_mlp_c_proj_bias);
r_256_768(b354, b351);
r_256_768n1(b355, b351, b354);
E_256n1(b356, b355);
E_256_768(b357, b351, b354, b356, h_10_ln_1_weight, h_10_ln_1_bias);
r_256_2304_768(b360, b357, h_10_attn_c_attn_weight, h_10_attn_c_attn_bias);
r_4_12_64_64_64(b363, b23, b360);
r_3072_64(b364, b363);
r_3072_64n1(b365, b363, b364);
r_3072_64n2(b366, b363, b364);
E_3072_64(b367, b363, b364, b365);
r_4_12_64_64_64n1(b368, b367, b360);
r_4_64_768_768(b369, b351, b368, h_10_attn_c_proj_weight, h_10_attn_c_proj_bias);
r_256_768(b372, b369);
r_256_768n1(b373, b369, b372);
E_256n1(b374, b373);
E_256_768(b375, b369, b372, b374, h_10_ln_2_weight, h_10_ln_2_bias);
r_256_3072_768(b378, b375, h_10_mlp_c_fc_weight, h_10_mlp_c_fc_bias);
E_786432(b381, b378);
r_256_768_3072(b382, b369, b381, h_10_mlp_c_proj_weight, h_10_mlp_c_proj_bias);
r_256_768(b385, b382);
r_256_768n1(b386, b382, b385);
E_256n1(b387, b386);
E_256_768(b388, b382, b385, b387, h_11_ln_1_weight, h_11_ln_1_bias);
r_256_2304_768(b391, b388, h_11_attn_c_attn_weight, h_11_attn_c_attn_bias);
r_4_12_64_64_64(b394, b25, b391);
r_3072_64(b395, b394);
r_3072_64n1(b396, b394, b395);
r_3072_64n2(b397, b394, b395);
E_3072_64(b398, b394, b395, b396);
r_4_12_64_64_64n1(b399, b398, b391);
r_4_64_768_768(b400, b382, b399, h_11_attn_c_proj_weight, h_11_attn_c_proj_bias);
r_256_768(b403, b400);
r_256_768n1(b404, b400, b403);
E_256n1(b405, b404);
E_256_768(b406, b400, b403, b405, h_11_ln_2_weight, h_11_ln_2_bias);
r_256_3072_768(b409, b406, h_11_mlp_c_fc_weight, h_11_mlp_c_fc_bias);
E_786432(b412, b409);
r_256_768_3072(b413, b400, b412, h_11_mlp_c_proj_weight, h_11_mlp_c_proj_bias);
r_256_768(b416, b413);
r_256_768n1(b417, b413, b416);
E_256n1(b418, b417);
E_256_768(b419, b413, b416, b418, ln_f_weight, ln_f_bias);
r_256_50257_768(b422, b419, lm_head_weight);
r_256_50257(b423, b422);
r_256_50257n1(b424, b422, b423);
r_256_50257n2(b425, b422, b423);
E_256n2(b426, b425);
r_256_50257n3(b427, Y, b28, b38, b43, b425);
r_256_50257n4(loss, b422, b423, b426, Y, b28, b38, b39);
r_256_50257n5(b426, Y, b28, b38, b43, b422, b423, b427);
E_256_50257(b429, Y, b28, b38, b43, b422, b423, b427, b424, b426);
r_256_768_50257(b430, lm_head_weight, b429);
r2_768_256(adam_m_ln_f_weight, adam_v_ln_f_weight, adam_b1, b29, b413, b416, b418, b430, adam_b2, b31);
r2_768_256n1(adam_m_ln_f_bias, adam_v_ln_f_bias, adam_b1, b29, b430, adam_b2, b31);
r_256_768n2(b426, b413, b416, ln_f_weight, b430, b418, b417);
E_768(ln_f_bias, adam_lr, adam_m_ln_f_bias, b33, adam_v_ln_f_bias, b34);
r_256_768n3(b417, b418, ln_f_weight, b430, b413, b416, b426);
E_256_768n1(b436, b418, ln_f_weight, b430, b413, b416, b426, b417);
r2_768_3072_256(adam_m_h_11_mlp_c_proj_weight, adam_v_h_11_mlp_c_proj_weight, adam_b1, b29, b412, b436, adam_b2, b31);
r2_768_256n1(adam_m_h_11_mlp_c_proj_bias, adam_v_h_11_mlp_c_proj_bias, adam_b1, b29, b436, adam_b2, b31);
r_256_3072_768n1(b441, b409, h_11_mlp_c_proj_weight, b436);
E_768(ln_f_weight, adam_lr, adam_m_ln_f_weight, b33, adam_v_ln_f_weight, b34);
E_768(h_11_mlp_c_proj_bias, adam_lr, adam_m_h_11_mlp_c_proj_bias, b33, adam_v_h_11_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_11_mlp_c_fc_weight, adam_v_h_11_mlp_c_fc_weight, adam_b1, b29, b406, b441, adam_b2, b31);
r2_3072_256(adam_m_h_11_mlp_c_fc_bias, adam_v_h_11_mlp_c_fc_bias, adam_b1, b29, b441, adam_b2, b31);
E_2359296(h_11_mlp_c_proj_weight, adam_lr, adam_m_h_11_mlp_c_proj_weight, b33, adam_v_h_11_mlp_c_proj_weight, b34);
r_256_768_3072n1(b446, h_11_mlp_c_fc_weight, b441);
E_3072(h_11_mlp_c_fc_bias, adam_lr, adam_m_h_11_mlp_c_fc_bias, b33, adam_v_h_11_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_11_ln_2_weight, adam_v_h_11_ln_2_weight, adam_b1, b29, b400, b403, b405, b446, adam_b2, b31);
r2_768_256n1(adam_m_h_11_ln_2_bias, adam_v_h_11_ln_2_bias, adam_b1, b29, b446, adam_b2, b31);
r_256_768n2(b417, b400, b403, h_11_ln_2_weight, b446, b405, b404);
E_2359296n1(h_11_mlp_c_fc_weight, adam_lr, adam_m_h_11_mlp_c_fc_weight, b33, adam_v_h_11_mlp_c_fc_weight, b34);
E_768(h_11_ln_2_bias, adam_lr, adam_m_h_11_ln_2_bias, b33, adam_v_h_11_ln_2_bias, b34);
r_256_768n3(b404, b405, h_11_ln_2_weight, b446, b400, b403, b417);
E_256_768n2(b451, b436, b405, h_11_ln_2_weight, b446, b400, b403, b417, b404);
r2_768_768_4_64(adam_m_h_11_attn_c_proj_weight, adam_v_h_11_attn_c_proj_weight, adam_b1, b29, b399, b451, adam_b2, b31);
r2_768_256n1(adam_m_h_11_attn_c_proj_bias, adam_v_h_11_attn_c_proj_bias, adam_b1, b29, b451, adam_b2, b31);
r_256_768_768(b456, h_11_attn_c_proj_weight, b451);
E_768(h_11_ln_2_weight, adam_lr, adam_m_h_11_ln_2_weight, b33, adam_v_h_11_ln_2_weight, b34);
E_768(h_11_attn_c_proj_bias, adam_lr, adam_m_h_11_attn_c_proj_bias, b33, adam_v_h_11_attn_c_proj_bias, b34);
E_589824(h_11_attn_c_proj_weight, adam_lr, adam_m_h_11_attn_c_proj_weight, b33, adam_v_h_11_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b457, b398, b456);
r_4_12_64_64_64n3(b398, b391, b456);
r_3072_64n3(b458, b398, b394, b395, b396);
r_3072_64n4(b459, b394, b395, b398, b396, b458);
E_48_64_64(b456, b25, b394, b395, b398, b396, b458, b397, b459);
r_4_12_64_64_64n4(b398, b391, b456);
r_4_12_64_64_64n5(b394, b391, b456);
E_4_64_2304(b460, b457, b398, b394);
r2_2304_768_256(adam_m_h_11_attn_c_attn_weight, adam_v_h_11_attn_c_attn_weight, adam_b1, b29, b388, b460, adam_b2, b31);
r2_2304_256(adam_m_h_11_attn_c_attn_bias, adam_v_h_11_attn_c_attn_bias, adam_b1, b29, b460, adam_b2, b31);
r_256_768_2304(b465, h_11_attn_c_attn_weight, b460);
E_2304(h_11_attn_c_attn_bias, adam_lr, adam_m_h_11_attn_c_attn_bias, b33, adam_v_h_11_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_11_ln_1_weight, adam_v_h_11_ln_1_weight, adam_b1, b29, b382, b385, b387, b465, adam_b2, b31);
r2_768_256n1(adam_m_h_11_ln_1_bias, adam_v_h_11_ln_1_bias, adam_b1, b29, b465, adam_b2, b31);
r_256_768n2(b404, b382, b385, h_11_ln_1_weight, b465, b387, b386);
E_1769472(h_11_attn_c_attn_weight, adam_lr, adam_m_h_11_attn_c_attn_weight, b33, adam_v_h_11_attn_c_attn_weight, b34);
E_768(h_11_ln_1_bias, adam_lr, adam_m_h_11_ln_1_bias, b33, adam_v_h_11_ln_1_bias, b34);
r_256_768n3(b386, b387, h_11_ln_1_weight, b465, b382, b385, b404);
E_256_768n2(b470, b451, b387, h_11_ln_1_weight, b465, b382, b385, b404, b386);
r2_768_3072_256(adam_m_h_10_mlp_c_proj_weight, adam_v_h_10_mlp_c_proj_weight, adam_b1, b29, b381, b470, adam_b2, b31);
r2_768_256n1(adam_m_h_10_mlp_c_proj_bias, adam_v_h_10_mlp_c_proj_bias, adam_b1, b29, b470, adam_b2, b31);
r_256_3072_768n1(b475, b378, h_10_mlp_c_proj_weight, b470);
E_768(h_11_ln_1_weight, adam_lr, adam_m_h_11_ln_1_weight, b33, adam_v_h_11_ln_1_weight, b34);
E_768(h_10_mlp_c_proj_bias, adam_lr, adam_m_h_10_mlp_c_proj_bias, b33, adam_v_h_10_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_10_mlp_c_fc_weight, adam_v_h_10_mlp_c_fc_weight, adam_b1, b29, b375, b475, adam_b2, b31);
r2_3072_256(adam_m_h_10_mlp_c_fc_bias, adam_v_h_10_mlp_c_fc_bias, adam_b1, b29, b475, adam_b2, b31);
E_2359296(h_10_mlp_c_proj_weight, adam_lr, adam_m_h_10_mlp_c_proj_weight, b33, adam_v_h_10_mlp_c_proj_weight, b34);
r_256_768_3072n1(b480, h_10_mlp_c_fc_weight, b475);
E_3072(h_10_mlp_c_fc_bias, adam_lr, adam_m_h_10_mlp_c_fc_bias, b33, adam_v_h_10_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_10_ln_2_weight, adam_v_h_10_ln_2_weight, adam_b1, b29, b369, b372, b374, b480, adam_b2, b31);
r2_768_256n1(adam_m_h_10_ln_2_bias, adam_v_h_10_ln_2_bias, adam_b1, b29, b480, adam_b2, b31);
r_256_768n2(b386, b369, b372, h_10_ln_2_weight, b480, b374, b373);
E_2359296n1(h_10_mlp_c_fc_weight, adam_lr, adam_m_h_10_mlp_c_fc_weight, b33, adam_v_h_10_mlp_c_fc_weight, b34);
E_768(h_10_ln_2_bias, adam_lr, adam_m_h_10_ln_2_bias, b33, adam_v_h_10_ln_2_bias, b34);
r_256_768n3(b373, b374, h_10_ln_2_weight, b480, b369, b372, b386);
E_256_768n2(b485, b470, b374, h_10_ln_2_weight, b480, b369, b372, b386, b373);
r2_768_768_4_64(adam_m_h_10_attn_c_proj_weight, adam_v_h_10_attn_c_proj_weight, adam_b1, b29, b368, b485, adam_b2, b31);
r2_768_256n1(adam_m_h_10_attn_c_proj_bias, adam_v_h_10_attn_c_proj_bias, adam_b1, b29, b485, adam_b2, b31);
r_256_768_768(b394, h_10_attn_c_proj_weight, b485);
E_768(h_10_ln_2_weight, adam_lr, adam_m_h_10_ln_2_weight, b33, adam_v_h_10_ln_2_weight, b34);
E_768(h_10_attn_c_proj_bias, adam_lr, adam_m_h_10_attn_c_proj_bias, b33, adam_v_h_10_attn_c_proj_bias, b34);
E_589824(h_10_attn_c_proj_weight, adam_lr, adam_m_h_10_attn_c_proj_weight, b33, adam_v_h_10_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b398, b367, b394);
r_4_12_64_64_64n3(b367, b360, b394);
r_3072_64n3(b459, b367, b363, b364, b365);
r_3072_64n4(b397, b363, b364, b367, b365, b459);
E_48_64_64(b394, b23, b363, b364, b367, b365, b459, b366, b397);
r_4_12_64_64_64n4(b367, b360, b394);
r_4_12_64_64_64n5(b363, b360, b394);
E_4_64_2304(b490, b398, b367, b363);
r2_2304_768_256(adam_m_h_10_attn_c_attn_weight, adam_v_h_10_attn_c_attn_weight, adam_b1, b29, b357, b490, adam_b2, b31);
r2_2304_256(adam_m_h_10_attn_c_attn_bias, adam_v_h_10_attn_c_attn_bias, adam_b1, b29, b490, adam_b2, b31);
r_256_768_2304(b495, h_10_attn_c_attn_weight, b490);
E_2304(h_10_attn_c_attn_bias, adam_lr, adam_m_h_10_attn_c_attn_bias, b33, adam_v_h_10_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_10_ln_1_weight, adam_v_h_10_ln_1_weight, adam_b1, b29, b351, b354, b356, b495, adam_b2, b31);
r2_768_256n1(adam_m_h_10_ln_1_bias, adam_v_h_10_ln_1_bias, adam_b1, b29, b495, adam_b2, b31);
r_256_768n2(b373, b351, b354, h_10_ln_1_weight, b495, b356, b355);
E_1769472(h_10_attn_c_attn_weight, adam_lr, adam_m_h_10_attn_c_attn_weight, b33, adam_v_h_10_attn_c_attn_weight, b34);
E_768(h_10_ln_1_bias, adam_lr, adam_m_h_10_ln_1_bias, b33, adam_v_h_10_ln_1_bias, b34);
r_256_768n3(b355, b356, h_10_ln_1_weight, b495, b351, b354, b373);
E_256_768n2(b500, b485, b356, h_10_ln_1_weight, b495, b351, b354, b373, b355);
r2_768_3072_256(adam_m_h_9_mlp_c_proj_weight, adam_v_h_9_mlp_c_proj_weight, adam_b1, b29, b350, b500, adam_b2, b31);
r2_768_256n1(adam_m_h_9_mlp_c_proj_bias, adam_v_h_9_mlp_c_proj_bias, adam_b1, b29, b500, adam_b2, b31);
r_256_3072_768n1(b505, b347, h_9_mlp_c_proj_weight, b500);
E_768(h_10_ln_1_weight, adam_lr, adam_m_h_10_ln_1_weight, b33, adam_v_h_10_ln_1_weight, b34);
E_768(h_9_mlp_c_proj_bias, adam_lr, adam_m_h_9_mlp_c_proj_bias, b33, adam_v_h_9_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_9_mlp_c_fc_weight, adam_v_h_9_mlp_c_fc_weight, adam_b1, b29, b344, b505, adam_b2, b31);
r2_3072_256(adam_m_h_9_mlp_c_fc_bias, adam_v_h_9_mlp_c_fc_bias, adam_b1, b29, b505, adam_b2, b31);
E_2359296(h_9_mlp_c_proj_weight, adam_lr, adam_m_h_9_mlp_c_proj_weight, b33, adam_v_h_9_mlp_c_proj_weight, b34);
r_256_768_3072n1(b510, h_9_mlp_c_fc_weight, b505);
E_3072(h_9_mlp_c_fc_bias, adam_lr, adam_m_h_9_mlp_c_fc_bias, b33, adam_v_h_9_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_9_ln_2_weight, adam_v_h_9_ln_2_weight, adam_b1, b29, b338, b341, b343, b510, adam_b2, b31);
r2_768_256n1(adam_m_h_9_ln_2_bias, adam_v_h_9_ln_2_bias, adam_b1, b29, b510, adam_b2, b31);
r_256_768n2(b355, b338, b341, h_9_ln_2_weight, b510, b343, b342);
E_2359296n1(h_9_mlp_c_fc_weight, adam_lr, adam_m_h_9_mlp_c_fc_weight, b33, adam_v_h_9_mlp_c_fc_weight, b34);
E_768(h_9_ln_2_bias, adam_lr, adam_m_h_9_ln_2_bias, b33, adam_v_h_9_ln_2_bias, b34);
r_256_768n3(b342, b343, h_9_ln_2_weight, b510, b338, b341, b355);
E_256_768n2(b515, b500, b343, h_9_ln_2_weight, b510, b338, b341, b355, b342);
r2_768_768_4_64(adam_m_h_9_attn_c_proj_weight, adam_v_h_9_attn_c_proj_weight, adam_b1, b29, b337, b515, adam_b2, b31);
r2_768_256n1(adam_m_h_9_attn_c_proj_bias, adam_v_h_9_attn_c_proj_bias, adam_b1, b29, b515, adam_b2, b31);
r_256_768_768(b363, h_9_attn_c_proj_weight, b515);
E_768(h_9_ln_2_weight, adam_lr, adam_m_h_9_ln_2_weight, b33, adam_v_h_9_ln_2_weight, b34);
E_768(h_9_attn_c_proj_bias, adam_lr, adam_m_h_9_attn_c_proj_bias, b33, adam_v_h_9_attn_c_proj_bias, b34);
E_589824(h_9_attn_c_proj_weight, adam_lr, adam_m_h_9_attn_c_proj_weight, b33, adam_v_h_9_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b367, b336, b363);
r_4_12_64_64_64n3(b336, b329, b363);
r_3072_64n3(b397, b336, b332, b333, b334);
r_3072_64n4(b366, b332, b333, b336, b334, b397);
E_48_64_64(b363, b21, b332, b333, b336, b334, b397, b335, b366);
r_4_12_64_64_64n4(b336, b329, b363);
r_4_12_64_64_64n5(b332, b329, b363);
E_4_64_2304(b520, b367, b336, b332);
r2_2304_768_256(adam_m_h_9_attn_c_attn_weight, adam_v_h_9_attn_c_attn_weight, adam_b1, b29, b326, b520, adam_b2, b31);
r2_2304_256(adam_m_h_9_attn_c_attn_bias, adam_v_h_9_attn_c_attn_bias, adam_b1, b29, b520, adam_b2, b31);
r_256_768_2304(b525, h_9_attn_c_attn_weight, b520);
E_2304(h_9_attn_c_attn_bias, adam_lr, adam_m_h_9_attn_c_attn_bias, b33, adam_v_h_9_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_9_ln_1_weight, adam_v_h_9_ln_1_weight, adam_b1, b29, b320, b323, b325, b525, adam_b2, b31);
r2_768_256n1(adam_m_h_9_ln_1_bias, adam_v_h_9_ln_1_bias, adam_b1, b29, b525, adam_b2, b31);
r_256_768n2(b342, b320, b323, h_9_ln_1_weight, b525, b325, b324);
E_1769472(h_9_attn_c_attn_weight, adam_lr, adam_m_h_9_attn_c_attn_weight, b33, adam_v_h_9_attn_c_attn_weight, b34);
E_768(h_9_ln_1_bias, adam_lr, adam_m_h_9_ln_1_bias, b33, adam_v_h_9_ln_1_bias, b34);
r_256_768n3(b324, b325, h_9_ln_1_weight, b525, b320, b323, b342);
E_256_768n2(b530, b515, b325, h_9_ln_1_weight, b525, b320, b323, b342, b324);
r2_768_3072_256(adam_m_h_8_mlp_c_proj_weight, adam_v_h_8_mlp_c_proj_weight, adam_b1, b29, b319, b530, adam_b2, b31);
r2_768_256n1(adam_m_h_8_mlp_c_proj_bias, adam_v_h_8_mlp_c_proj_bias, adam_b1, b29, b530, adam_b2, b31);
r_256_3072_768n1(b535, b316, h_8_mlp_c_proj_weight, b530);
E_768(h_9_ln_1_weight, adam_lr, adam_m_h_9_ln_1_weight, b33, adam_v_h_9_ln_1_weight, b34);
E_768(h_8_mlp_c_proj_bias, adam_lr, adam_m_h_8_mlp_c_proj_bias, b33, adam_v_h_8_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_8_mlp_c_fc_weight, adam_v_h_8_mlp_c_fc_weight, adam_b1, b29, b313, b535, adam_b2, b31);
r2_3072_256(adam_m_h_8_mlp_c_fc_bias, adam_v_h_8_mlp_c_fc_bias, adam_b1, b29, b535, adam_b2, b31);
E_2359296(h_8_mlp_c_proj_weight, adam_lr, adam_m_h_8_mlp_c_proj_weight, b33, adam_v_h_8_mlp_c_proj_weight, b34);
r_256_768_3072n1(b540, h_8_mlp_c_fc_weight, b535);
E_3072(h_8_mlp_c_fc_bias, adam_lr, adam_m_h_8_mlp_c_fc_bias, b33, adam_v_h_8_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_8_ln_2_weight, adam_v_h_8_ln_2_weight, adam_b1, b29, b307, b310, b312, b540, adam_b2, b31);
r2_768_256n1(adam_m_h_8_ln_2_bias, adam_v_h_8_ln_2_bias, adam_b1, b29, b540, adam_b2, b31);
r_256_768n2(b324, b307, b310, h_8_ln_2_weight, b540, b312, b311);
E_2359296n1(h_8_mlp_c_fc_weight, adam_lr, adam_m_h_8_mlp_c_fc_weight, b33, adam_v_h_8_mlp_c_fc_weight, b34);
E_768(h_8_ln_2_bias, adam_lr, adam_m_h_8_ln_2_bias, b33, adam_v_h_8_ln_2_bias, b34);
r_256_768n3(b311, b312, h_8_ln_2_weight, b540, b307, b310, b324);
E_256_768n2(b545, b530, b312, h_8_ln_2_weight, b540, b307, b310, b324, b311);
r2_768_768_4_64(adam_m_h_8_attn_c_proj_weight, adam_v_h_8_attn_c_proj_weight, adam_b1, b29, b306, b545, adam_b2, b31);
r2_768_256n1(adam_m_h_8_attn_c_proj_bias, adam_v_h_8_attn_c_proj_bias, adam_b1, b29, b545, adam_b2, b31);
r_256_768_768(b332, h_8_attn_c_proj_weight, b545);
E_768(h_8_ln_2_weight, adam_lr, adam_m_h_8_ln_2_weight, b33, adam_v_h_8_ln_2_weight, b34);
E_768(h_8_attn_c_proj_bias, adam_lr, adam_m_h_8_attn_c_proj_bias, b33, adam_v_h_8_attn_c_proj_bias, b34);
E_589824(h_8_attn_c_proj_weight, adam_lr, adam_m_h_8_attn_c_proj_weight, b33, adam_v_h_8_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b336, b305, b332);
r_4_12_64_64_64n3(b305, b298, b332);
r_3072_64n3(b366, b305, b301, b302, b303);
r_3072_64n4(b335, b301, b302, b305, b303, b366);
E_48_64_64(b332, b19, b301, b302, b305, b303, b366, b304, b335);
r_4_12_64_64_64n4(b305, b298, b332);
r_4_12_64_64_64n5(b301, b298, b332);
E_4_64_2304(b550, b336, b305, b301);
r2_2304_768_256(adam_m_h_8_attn_c_attn_weight, adam_v_h_8_attn_c_attn_weight, adam_b1, b29, b295, b550, adam_b2, b31);
r2_2304_256(adam_m_h_8_attn_c_attn_bias, adam_v_h_8_attn_c_attn_bias, adam_b1, b29, b550, adam_b2, b31);
r_256_768_2304(b555, h_8_attn_c_attn_weight, b550);
E_2304(h_8_attn_c_attn_bias, adam_lr, adam_m_h_8_attn_c_attn_bias, b33, adam_v_h_8_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_8_ln_1_weight, adam_v_h_8_ln_1_weight, adam_b1, b29, b289, b292, b294, b555, adam_b2, b31);
r2_768_256n1(adam_m_h_8_ln_1_bias, adam_v_h_8_ln_1_bias, adam_b1, b29, b555, adam_b2, b31);
r_256_768n2(b311, b289, b292, h_8_ln_1_weight, b555, b294, b293);
E_1769472(h_8_attn_c_attn_weight, adam_lr, adam_m_h_8_attn_c_attn_weight, b33, adam_v_h_8_attn_c_attn_weight, b34);
E_768(h_8_ln_1_bias, adam_lr, adam_m_h_8_ln_1_bias, b33, adam_v_h_8_ln_1_bias, b34);
r_256_768n3(b293, b294, h_8_ln_1_weight, b555, b289, b292, b311);
E_256_768n2(b560, b545, b294, h_8_ln_1_weight, b555, b289, b292, b311, b293);
r2_768_3072_256(adam_m_h_7_mlp_c_proj_weight, adam_v_h_7_mlp_c_proj_weight, adam_b1, b29, b288, b560, adam_b2, b31);
r2_768_256n1(adam_m_h_7_mlp_c_proj_bias, adam_v_h_7_mlp_c_proj_bias, adam_b1, b29, b560, adam_b2, b31);
r_256_3072_768n1(b565, b285, h_7_mlp_c_proj_weight, b560);
E_768(h_8_ln_1_weight, adam_lr, adam_m_h_8_ln_1_weight, b33, adam_v_h_8_ln_1_weight, b34);
E_768(h_7_mlp_c_proj_bias, adam_lr, adam_m_h_7_mlp_c_proj_bias, b33, adam_v_h_7_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_7_mlp_c_fc_weight, adam_v_h_7_mlp_c_fc_weight, adam_b1, b29, b282, b565, adam_b2, b31);
r2_3072_256(adam_m_h_7_mlp_c_fc_bias, adam_v_h_7_mlp_c_fc_bias, adam_b1, b29, b565, adam_b2, b31);
E_2359296(h_7_mlp_c_proj_weight, adam_lr, adam_m_h_7_mlp_c_proj_weight, b33, adam_v_h_7_mlp_c_proj_weight, b34);
r_256_768_3072n1(b570, h_7_mlp_c_fc_weight, b565);
E_3072(h_7_mlp_c_fc_bias, adam_lr, adam_m_h_7_mlp_c_fc_bias, b33, adam_v_h_7_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_7_ln_2_weight, adam_v_h_7_ln_2_weight, adam_b1, b29, b276, b279, b281, b570, adam_b2, b31);
r2_768_256n1(adam_m_h_7_ln_2_bias, adam_v_h_7_ln_2_bias, adam_b1, b29, b570, adam_b2, b31);
r_256_768n2(b293, b276, b279, h_7_ln_2_weight, b570, b281, b280);
E_2359296n1(h_7_mlp_c_fc_weight, adam_lr, adam_m_h_7_mlp_c_fc_weight, b33, adam_v_h_7_mlp_c_fc_weight, b34);
E_768(h_7_ln_2_bias, adam_lr, adam_m_h_7_ln_2_bias, b33, adam_v_h_7_ln_2_bias, b34);
r_256_768n3(b280, b281, h_7_ln_2_weight, b570, b276, b279, b293);
E_256_768n2(b575, b560, b281, h_7_ln_2_weight, b570, b276, b279, b293, b280);
r2_768_768_4_64(adam_m_h_7_attn_c_proj_weight, adam_v_h_7_attn_c_proj_weight, adam_b1, b29, b275, b575, adam_b2, b31);
r2_768_256n1(adam_m_h_7_attn_c_proj_bias, adam_v_h_7_attn_c_proj_bias, adam_b1, b29, b575, adam_b2, b31);
r_256_768_768(b301, h_7_attn_c_proj_weight, b575);
E_768(h_7_ln_2_weight, adam_lr, adam_m_h_7_ln_2_weight, b33, adam_v_h_7_ln_2_weight, b34);
E_768(h_7_attn_c_proj_bias, adam_lr, adam_m_h_7_attn_c_proj_bias, b33, adam_v_h_7_attn_c_proj_bias, b34);
E_589824(h_7_attn_c_proj_weight, adam_lr, adam_m_h_7_attn_c_proj_weight, b33, adam_v_h_7_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b305, b274, b301);
r_4_12_64_64_64n3(b274, b267, b301);
r_3072_64n3(b335, b274, b270, b271, b272);
r_3072_64n4(b304, b270, b271, b274, b272, b335);
E_48_64_64(b301, b17, b270, b271, b274, b272, b335, b273, b304);
r_4_12_64_64_64n4(b274, b267, b301);
r_4_12_64_64_64n5(b270, b267, b301);
E_4_64_2304(b580, b305, b274, b270);
r2_2304_768_256(adam_m_h_7_attn_c_attn_weight, adam_v_h_7_attn_c_attn_weight, adam_b1, b29, b264, b580, adam_b2, b31);
r2_2304_256(adam_m_h_7_attn_c_attn_bias, adam_v_h_7_attn_c_attn_bias, adam_b1, b29, b580, adam_b2, b31);
r_256_768_2304(b585, h_7_attn_c_attn_weight, b580);
E_2304(h_7_attn_c_attn_bias, adam_lr, adam_m_h_7_attn_c_attn_bias, b33, adam_v_h_7_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_7_ln_1_weight, adam_v_h_7_ln_1_weight, adam_b1, b29, b258, b261, b263, b585, adam_b2, b31);
r2_768_256n1(adam_m_h_7_ln_1_bias, adam_v_h_7_ln_1_bias, adam_b1, b29, b585, adam_b2, b31);
r_256_768n2(b280, b258, b261, h_7_ln_1_weight, b585, b263, b262);
E_1769472(h_7_attn_c_attn_weight, adam_lr, adam_m_h_7_attn_c_attn_weight, b33, adam_v_h_7_attn_c_attn_weight, b34);
E_768(h_7_ln_1_bias, adam_lr, adam_m_h_7_ln_1_bias, b33, adam_v_h_7_ln_1_bias, b34);
r_256_768n3(b262, b263, h_7_ln_1_weight, b585, b258, b261, b280);
E_256_768n2(b590, b575, b263, h_7_ln_1_weight, b585, b258, b261, b280, b262);
r2_768_3072_256(adam_m_h_6_mlp_c_proj_weight, adam_v_h_6_mlp_c_proj_weight, adam_b1, b29, b257, b590, adam_b2, b31);
r2_768_256n1(adam_m_h_6_mlp_c_proj_bias, adam_v_h_6_mlp_c_proj_bias, adam_b1, b29, b590, adam_b2, b31);
r_256_3072_768n1(b595, b254, h_6_mlp_c_proj_weight, b590);
E_768(h_7_ln_1_weight, adam_lr, adam_m_h_7_ln_1_weight, b33, adam_v_h_7_ln_1_weight, b34);
E_768(h_6_mlp_c_proj_bias, adam_lr, adam_m_h_6_mlp_c_proj_bias, b33, adam_v_h_6_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_6_mlp_c_fc_weight, adam_v_h_6_mlp_c_fc_weight, adam_b1, b29, b251, b595, adam_b2, b31);
r2_3072_256(adam_m_h_6_mlp_c_fc_bias, adam_v_h_6_mlp_c_fc_bias, adam_b1, b29, b595, adam_b2, b31);
E_2359296(h_6_mlp_c_proj_weight, adam_lr, adam_m_h_6_mlp_c_proj_weight, b33, adam_v_h_6_mlp_c_proj_weight, b34);
r_256_768_3072n1(b600, h_6_mlp_c_fc_weight, b595);
E_3072(h_6_mlp_c_fc_bias, adam_lr, adam_m_h_6_mlp_c_fc_bias, b33, adam_v_h_6_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_6_ln_2_weight, adam_v_h_6_ln_2_weight, adam_b1, b29, b245, b248, b250, b600, adam_b2, b31);
r2_768_256n1(adam_m_h_6_ln_2_bias, adam_v_h_6_ln_2_bias, adam_b1, b29, b600, adam_b2, b31);
r_256_768n2(b262, b245, b248, h_6_ln_2_weight, b600, b250, b249);
E_2359296n1(h_6_mlp_c_fc_weight, adam_lr, adam_m_h_6_mlp_c_fc_weight, b33, adam_v_h_6_mlp_c_fc_weight, b34);
E_768(h_6_ln_2_bias, adam_lr, adam_m_h_6_ln_2_bias, b33, adam_v_h_6_ln_2_bias, b34);
r_256_768n3(b249, b250, h_6_ln_2_weight, b600, b245, b248, b262);
E_256_768n2(b605, b590, b250, h_6_ln_2_weight, b600, b245, b248, b262, b249);
r2_768_768_4_64(adam_m_h_6_attn_c_proj_weight, adam_v_h_6_attn_c_proj_weight, adam_b1, b29, b244, b605, adam_b2, b31);
r2_768_256n1(adam_m_h_6_attn_c_proj_bias, adam_v_h_6_attn_c_proj_bias, adam_b1, b29, b605, adam_b2, b31);
r_256_768_768(b270, h_6_attn_c_proj_weight, b605);
E_768(h_6_ln_2_weight, adam_lr, adam_m_h_6_ln_2_weight, b33, adam_v_h_6_ln_2_weight, b34);
E_768(h_6_attn_c_proj_bias, adam_lr, adam_m_h_6_attn_c_proj_bias, b33, adam_v_h_6_attn_c_proj_bias, b34);
E_589824(h_6_attn_c_proj_weight, adam_lr, adam_m_h_6_attn_c_proj_weight, b33, adam_v_h_6_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b274, b243, b270);
r_4_12_64_64_64n3(b243, b236, b270);
r_3072_64n3(b304, b243, b239, b240, b241);
r_3072_64n4(b273, b239, b240, b243, b241, b304);
E_48_64_64(b270, b15, b239, b240, b243, b241, b304, b242, b273);
r_4_12_64_64_64n4(b243, b236, b270);
r_4_12_64_64_64n5(b239, b236, b270);
E_4_64_2304(b610, b274, b243, b239);
r2_2304_768_256(adam_m_h_6_attn_c_attn_weight, adam_v_h_6_attn_c_attn_weight, adam_b1, b29, b233, b610, adam_b2, b31);
r2_2304_256(adam_m_h_6_attn_c_attn_bias, adam_v_h_6_attn_c_attn_bias, adam_b1, b29, b610, adam_b2, b31);
r_256_768_2304(b615, h_6_attn_c_attn_weight, b610);
E_2304(h_6_attn_c_attn_bias, adam_lr, adam_m_h_6_attn_c_attn_bias, b33, adam_v_h_6_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_6_ln_1_weight, adam_v_h_6_ln_1_weight, adam_b1, b29, b227, b230, b232, b615, adam_b2, b31);
r2_768_256n1(adam_m_h_6_ln_1_bias, adam_v_h_6_ln_1_bias, adam_b1, b29, b615, adam_b2, b31);
r_256_768n2(b249, b227, b230, h_6_ln_1_weight, b615, b232, b231);
E_1769472(h_6_attn_c_attn_weight, adam_lr, adam_m_h_6_attn_c_attn_weight, b33, adam_v_h_6_attn_c_attn_weight, b34);
E_768(h_6_ln_1_bias, adam_lr, adam_m_h_6_ln_1_bias, b33, adam_v_h_6_ln_1_bias, b34);
r_256_768n3(b231, b232, h_6_ln_1_weight, b615, b227, b230, b249);
E_256_768n2(b620, b605, b232, h_6_ln_1_weight, b615, b227, b230, b249, b231);
r2_768_3072_256(adam_m_h_5_mlp_c_proj_weight, adam_v_h_5_mlp_c_proj_weight, adam_b1, b29, b226, b620, adam_b2, b31);
r2_768_256n1(adam_m_h_5_mlp_c_proj_bias, adam_v_h_5_mlp_c_proj_bias, adam_b1, b29, b620, adam_b2, b31);
r_256_3072_768n1(b625, b223, h_5_mlp_c_proj_weight, b620);
E_768(h_6_ln_1_weight, adam_lr, adam_m_h_6_ln_1_weight, b33, adam_v_h_6_ln_1_weight, b34);
E_768(h_5_mlp_c_proj_bias, adam_lr, adam_m_h_5_mlp_c_proj_bias, b33, adam_v_h_5_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_5_mlp_c_fc_weight, adam_v_h_5_mlp_c_fc_weight, adam_b1, b29, b220, b625, adam_b2, b31);
r2_3072_256(adam_m_h_5_mlp_c_fc_bias, adam_v_h_5_mlp_c_fc_bias, adam_b1, b29, b625, adam_b2, b31);
E_2359296(h_5_mlp_c_proj_weight, adam_lr, adam_m_h_5_mlp_c_proj_weight, b33, adam_v_h_5_mlp_c_proj_weight, b34);
r_256_768_3072n1(b630, h_5_mlp_c_fc_weight, b625);
E_3072(h_5_mlp_c_fc_bias, adam_lr, adam_m_h_5_mlp_c_fc_bias, b33, adam_v_h_5_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_5_ln_2_weight, adam_v_h_5_ln_2_weight, adam_b1, b29, b214, b217, b219, b630, adam_b2, b31);
r2_768_256n1(adam_m_h_5_ln_2_bias, adam_v_h_5_ln_2_bias, adam_b1, b29, b630, adam_b2, b31);
r_256_768n2(b231, b214, b217, h_5_ln_2_weight, b630, b219, b218);
E_2359296n1(h_5_mlp_c_fc_weight, adam_lr, adam_m_h_5_mlp_c_fc_weight, b33, adam_v_h_5_mlp_c_fc_weight, b34);
E_768(h_5_ln_2_bias, adam_lr, adam_m_h_5_ln_2_bias, b33, adam_v_h_5_ln_2_bias, b34);
r_256_768n3(b218, b219, h_5_ln_2_weight, b630, b214, b217, b231);
E_256_768n2(b635, b620, b219, h_5_ln_2_weight, b630, b214, b217, b231, b218);
r2_768_768_4_64(adam_m_h_5_attn_c_proj_weight, adam_v_h_5_attn_c_proj_weight, adam_b1, b29, b213, b635, adam_b2, b31);
r2_768_256n1(adam_m_h_5_attn_c_proj_bias, adam_v_h_5_attn_c_proj_bias, adam_b1, b29, b635, adam_b2, b31);
r_256_768_768(b239, h_5_attn_c_proj_weight, b635);
E_768(h_5_ln_2_weight, adam_lr, adam_m_h_5_ln_2_weight, b33, adam_v_h_5_ln_2_weight, b34);
E_768(h_5_attn_c_proj_bias, adam_lr, adam_m_h_5_attn_c_proj_bias, b33, adam_v_h_5_attn_c_proj_bias, b34);
E_589824(h_5_attn_c_proj_weight, adam_lr, adam_m_h_5_attn_c_proj_weight, b33, adam_v_h_5_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b243, b212, b239);
r_4_12_64_64_64n3(b212, b205, b239);
r_3072_64n3(b273, b212, b208, b209, b210);
r_3072_64n4(b242, b208, b209, b212, b210, b273);
E_48_64_64(b239, b13, b208, b209, b212, b210, b273, b211, b242);
r_4_12_64_64_64n4(b212, b205, b239);
r_4_12_64_64_64n5(b208, b205, b239);
E_4_64_2304(b640, b243, b212, b208);
r2_2304_768_256(adam_m_h_5_attn_c_attn_weight, adam_v_h_5_attn_c_attn_weight, adam_b1, b29, b202, b640, adam_b2, b31);
r2_2304_256(adam_m_h_5_attn_c_attn_bias, adam_v_h_5_attn_c_attn_bias, adam_b1, b29, b640, adam_b2, b31);
r_256_768_2304(b645, h_5_attn_c_attn_weight, b640);
E_2304(h_5_attn_c_attn_bias, adam_lr, adam_m_h_5_attn_c_attn_bias, b33, adam_v_h_5_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_5_ln_1_weight, adam_v_h_5_ln_1_weight, adam_b1, b29, b196, b199, b201, b645, adam_b2, b31);
r2_768_256n1(adam_m_h_5_ln_1_bias, adam_v_h_5_ln_1_bias, adam_b1, b29, b645, adam_b2, b31);
r_256_768n2(b218, b196, b199, h_5_ln_1_weight, b645, b201, b200);
E_1769472(h_5_attn_c_attn_weight, adam_lr, adam_m_h_5_attn_c_attn_weight, b33, adam_v_h_5_attn_c_attn_weight, b34);
E_768(h_5_ln_1_bias, adam_lr, adam_m_h_5_ln_1_bias, b33, adam_v_h_5_ln_1_bias, b34);
r_256_768n3(b200, b201, h_5_ln_1_weight, b645, b196, b199, b218);
E_256_768n2(b650, b635, b201, h_5_ln_1_weight, b645, b196, b199, b218, b200);
r2_768_3072_256(adam_m_h_4_mlp_c_proj_weight, adam_v_h_4_mlp_c_proj_weight, adam_b1, b29, b195, b650, adam_b2, b31);
r2_768_256n1(adam_m_h_4_mlp_c_proj_bias, adam_v_h_4_mlp_c_proj_bias, adam_b1, b29, b650, adam_b2, b31);
r_256_3072_768n1(b655, b192, h_4_mlp_c_proj_weight, b650);
E_768(h_5_ln_1_weight, adam_lr, adam_m_h_5_ln_1_weight, b33, adam_v_h_5_ln_1_weight, b34);
E_768(h_4_mlp_c_proj_bias, adam_lr, adam_m_h_4_mlp_c_proj_bias, b33, adam_v_h_4_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_4_mlp_c_fc_weight, adam_v_h_4_mlp_c_fc_weight, adam_b1, b29, b189, b655, adam_b2, b31);
r2_3072_256(adam_m_h_4_mlp_c_fc_bias, adam_v_h_4_mlp_c_fc_bias, adam_b1, b29, b655, adam_b2, b31);
E_2359296(h_4_mlp_c_proj_weight, adam_lr, adam_m_h_4_mlp_c_proj_weight, b33, adam_v_h_4_mlp_c_proj_weight, b34);
r_256_768_3072n1(b660, h_4_mlp_c_fc_weight, b655);
E_3072(h_4_mlp_c_fc_bias, adam_lr, adam_m_h_4_mlp_c_fc_bias, b33, adam_v_h_4_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_4_ln_2_weight, adam_v_h_4_ln_2_weight, adam_b1, b29, b183, b186, b188, b660, adam_b2, b31);
r2_768_256n1(adam_m_h_4_ln_2_bias, adam_v_h_4_ln_2_bias, adam_b1, b29, b660, adam_b2, b31);
r_256_768n2(b200, b183, b186, h_4_ln_2_weight, b660, b188, b187);
E_2359296n1(h_4_mlp_c_fc_weight, adam_lr, adam_m_h_4_mlp_c_fc_weight, b33, adam_v_h_4_mlp_c_fc_weight, b34);
E_768(h_4_ln_2_bias, adam_lr, adam_m_h_4_ln_2_bias, b33, adam_v_h_4_ln_2_bias, b34);
r_256_768n3(b187, b188, h_4_ln_2_weight, b660, b183, b186, b200);
E_256_768n2(b665, b650, b188, h_4_ln_2_weight, b660, b183, b186, b200, b187);
r2_768_768_4_64(adam_m_h_4_attn_c_proj_weight, adam_v_h_4_attn_c_proj_weight, adam_b1, b29, b182, b665, adam_b2, b31);
r2_768_256n1(adam_m_h_4_attn_c_proj_bias, adam_v_h_4_attn_c_proj_bias, adam_b1, b29, b665, adam_b2, b31);
r_256_768_768(b208, h_4_attn_c_proj_weight, b665);
E_768(h_4_ln_2_weight, adam_lr, adam_m_h_4_ln_2_weight, b33, adam_v_h_4_ln_2_weight, b34);
E_768(h_4_attn_c_proj_bias, adam_lr, adam_m_h_4_attn_c_proj_bias, b33, adam_v_h_4_attn_c_proj_bias, b34);
E_589824(h_4_attn_c_proj_weight, adam_lr, adam_m_h_4_attn_c_proj_weight, b33, adam_v_h_4_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b212, b181, b208);
r_4_12_64_64_64n3(b181, b174, b208);
r_3072_64n3(b242, b181, b177, b178, b179);
r_3072_64n4(b211, b177, b178, b181, b179, b242);
E_48_64_64(b208, b11, b177, b178, b181, b179, b242, b180, b211);
r_4_12_64_64_64n4(b181, b174, b208);
r_4_12_64_64_64n5(b177, b174, b208);
E_4_64_2304(b670, b212, b181, b177);
r2_2304_768_256(adam_m_h_4_attn_c_attn_weight, adam_v_h_4_attn_c_attn_weight, adam_b1, b29, b171, b670, adam_b2, b31);
r2_2304_256(adam_m_h_4_attn_c_attn_bias, adam_v_h_4_attn_c_attn_bias, adam_b1, b29, b670, adam_b2, b31);
r_256_768_2304(b675, h_4_attn_c_attn_weight, b670);
E_2304(h_4_attn_c_attn_bias, adam_lr, adam_m_h_4_attn_c_attn_bias, b33, adam_v_h_4_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_4_ln_1_weight, adam_v_h_4_ln_1_weight, adam_b1, b29, b165, b168, b170, b675, adam_b2, b31);
r2_768_256n1(adam_m_h_4_ln_1_bias, adam_v_h_4_ln_1_bias, adam_b1, b29, b675, adam_b2, b31);
r_256_768n2(b187, b165, b168, h_4_ln_1_weight, b675, b170, b169);
E_1769472(h_4_attn_c_attn_weight, adam_lr, adam_m_h_4_attn_c_attn_weight, b33, adam_v_h_4_attn_c_attn_weight, b34);
E_768(h_4_ln_1_bias, adam_lr, adam_m_h_4_ln_1_bias, b33, adam_v_h_4_ln_1_bias, b34);
r_256_768n3(b169, b170, h_4_ln_1_weight, b675, b165, b168, b187);
E_256_768n2(b680, b665, b170, h_4_ln_1_weight, b675, b165, b168, b187, b169);
r2_768_3072_256(adam_m_h_3_mlp_c_proj_weight, adam_v_h_3_mlp_c_proj_weight, adam_b1, b29, b164, b680, adam_b2, b31);
r2_768_256n1(adam_m_h_3_mlp_c_proj_bias, adam_v_h_3_mlp_c_proj_bias, adam_b1, b29, b680, adam_b2, b31);
r_256_3072_768n1(b685, b161, h_3_mlp_c_proj_weight, b680);
E_768(h_4_ln_1_weight, adam_lr, adam_m_h_4_ln_1_weight, b33, adam_v_h_4_ln_1_weight, b34);
E_768(h_3_mlp_c_proj_bias, adam_lr, adam_m_h_3_mlp_c_proj_bias, b33, adam_v_h_3_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_3_mlp_c_fc_weight, adam_v_h_3_mlp_c_fc_weight, adam_b1, b29, b158, b685, adam_b2, b31);
r2_3072_256(adam_m_h_3_mlp_c_fc_bias, adam_v_h_3_mlp_c_fc_bias, adam_b1, b29, b685, adam_b2, b31);
E_2359296(h_3_mlp_c_proj_weight, adam_lr, adam_m_h_3_mlp_c_proj_weight, b33, adam_v_h_3_mlp_c_proj_weight, b34);
r_256_768_3072n1(b690, h_3_mlp_c_fc_weight, b685);
E_3072(h_3_mlp_c_fc_bias, adam_lr, adam_m_h_3_mlp_c_fc_bias, b33, adam_v_h_3_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_3_ln_2_weight, adam_v_h_3_ln_2_weight, adam_b1, b29, b152, b155, b157, b690, adam_b2, b31);
r2_768_256n1(adam_m_h_3_ln_2_bias, adam_v_h_3_ln_2_bias, adam_b1, b29, b690, adam_b2, b31);
r_256_768n2(b169, b152, b155, h_3_ln_2_weight, b690, b157, b156);
E_2359296n1(h_3_mlp_c_fc_weight, adam_lr, adam_m_h_3_mlp_c_fc_weight, b33, adam_v_h_3_mlp_c_fc_weight, b34);
E_768(h_3_ln_2_bias, adam_lr, adam_m_h_3_ln_2_bias, b33, adam_v_h_3_ln_2_bias, b34);
r_256_768n3(b156, b157, h_3_ln_2_weight, b690, b152, b155, b169);
E_256_768n2(b695, b680, b157, h_3_ln_2_weight, b690, b152, b155, b169, b156);
r2_768_768_4_64(adam_m_h_3_attn_c_proj_weight, adam_v_h_3_attn_c_proj_weight, adam_b1, b29, b151, b695, adam_b2, b31);
r2_768_256n1(adam_m_h_3_attn_c_proj_bias, adam_v_h_3_attn_c_proj_bias, adam_b1, b29, b695, adam_b2, b31);
r_256_768_768(b177, h_3_attn_c_proj_weight, b695);
E_768(h_3_ln_2_weight, adam_lr, adam_m_h_3_ln_2_weight, b33, adam_v_h_3_ln_2_weight, b34);
E_768(h_3_attn_c_proj_bias, adam_lr, adam_m_h_3_attn_c_proj_bias, b33, adam_v_h_3_attn_c_proj_bias, b34);
E_589824(h_3_attn_c_proj_weight, adam_lr, adam_m_h_3_attn_c_proj_weight, b33, adam_v_h_3_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b181, b150, b177);
r_4_12_64_64_64n3(b150, b143, b177);
r_3072_64n3(b211, b150, b146, b147, b148);
r_3072_64n4(b180, b146, b147, b150, b148, b211);
E_48_64_64(b177, b9, b146, b147, b150, b148, b211, b149, b180);
r_4_12_64_64_64n4(b150, b143, b177);
r_4_12_64_64_64n5(b146, b143, b177);
E_4_64_2304(b700, b181, b150, b146);
r2_2304_768_256(adam_m_h_3_attn_c_attn_weight, adam_v_h_3_attn_c_attn_weight, adam_b1, b29, b140, b700, adam_b2, b31);
r2_2304_256(adam_m_h_3_attn_c_attn_bias, adam_v_h_3_attn_c_attn_bias, adam_b1, b29, b700, adam_b2, b31);
r_256_768_2304(b705, h_3_attn_c_attn_weight, b700);
E_2304(h_3_attn_c_attn_bias, adam_lr, adam_m_h_3_attn_c_attn_bias, b33, adam_v_h_3_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_3_ln_1_weight, adam_v_h_3_ln_1_weight, adam_b1, b29, b134, b137, b139, b705, adam_b2, b31);
r2_768_256n1(adam_m_h_3_ln_1_bias, adam_v_h_3_ln_1_bias, adam_b1, b29, b705, adam_b2, b31);
r_256_768n2(b156, b134, b137, h_3_ln_1_weight, b705, b139, b138);
E_1769472(h_3_attn_c_attn_weight, adam_lr, adam_m_h_3_attn_c_attn_weight, b33, adam_v_h_3_attn_c_attn_weight, b34);
E_768(h_3_ln_1_bias, adam_lr, adam_m_h_3_ln_1_bias, b33, adam_v_h_3_ln_1_bias, b34);
r_256_768n3(b138, b139, h_3_ln_1_weight, b705, b134, b137, b156);
E_256_768n2(b710, b695, b139, h_3_ln_1_weight, b705, b134, b137, b156, b138);
r2_768_3072_256(adam_m_h_2_mlp_c_proj_weight, adam_v_h_2_mlp_c_proj_weight, adam_b1, b29, b133, b710, adam_b2, b31);
r2_768_256n1(adam_m_h_2_mlp_c_proj_bias, adam_v_h_2_mlp_c_proj_bias, adam_b1, b29, b710, adam_b2, b31);
r_256_3072_768n1(b715, b130, h_2_mlp_c_proj_weight, b710);
E_768(h_3_ln_1_weight, adam_lr, adam_m_h_3_ln_1_weight, b33, adam_v_h_3_ln_1_weight, b34);
E_768(h_2_mlp_c_proj_bias, adam_lr, adam_m_h_2_mlp_c_proj_bias, b33, adam_v_h_2_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_2_mlp_c_fc_weight, adam_v_h_2_mlp_c_fc_weight, adam_b1, b29, b127, b715, adam_b2, b31);
r2_3072_256(adam_m_h_2_mlp_c_fc_bias, adam_v_h_2_mlp_c_fc_bias, adam_b1, b29, b715, adam_b2, b31);
E_2359296(h_2_mlp_c_proj_weight, adam_lr, adam_m_h_2_mlp_c_proj_weight, b33, adam_v_h_2_mlp_c_proj_weight, b34);
r_256_768_3072n1(b720, h_2_mlp_c_fc_weight, b715);
E_3072(h_2_mlp_c_fc_bias, adam_lr, adam_m_h_2_mlp_c_fc_bias, b33, adam_v_h_2_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_2_ln_2_weight, adam_v_h_2_ln_2_weight, adam_b1, b29, b121, b124, b126, b720, adam_b2, b31);
r2_768_256n1(adam_m_h_2_ln_2_bias, adam_v_h_2_ln_2_bias, adam_b1, b29, b720, adam_b2, b31);
r_256_768n2(b138, b121, b124, h_2_ln_2_weight, b720, b126, b125);
E_2359296n1(h_2_mlp_c_fc_weight, adam_lr, adam_m_h_2_mlp_c_fc_weight, b33, adam_v_h_2_mlp_c_fc_weight, b34);
E_768(h_2_ln_2_bias, adam_lr, adam_m_h_2_ln_2_bias, b33, adam_v_h_2_ln_2_bias, b34);
r_256_768n3(b125, b126, h_2_ln_2_weight, b720, b121, b124, b138);
E_256_768n2(b725, b710, b126, h_2_ln_2_weight, b720, b121, b124, b138, b125);
r2_768_768_4_64(adam_m_h_2_attn_c_proj_weight, adam_v_h_2_attn_c_proj_weight, adam_b1, b29, b120, b725, adam_b2, b31);
r2_768_256n1(adam_m_h_2_attn_c_proj_bias, adam_v_h_2_attn_c_proj_bias, adam_b1, b29, b725, adam_b2, b31);
r_256_768_768(b146, h_2_attn_c_proj_weight, b725);
E_768(h_2_ln_2_weight, adam_lr, adam_m_h_2_ln_2_weight, b33, adam_v_h_2_ln_2_weight, b34);
E_768(h_2_attn_c_proj_bias, adam_lr, adam_m_h_2_attn_c_proj_bias, b33, adam_v_h_2_attn_c_proj_bias, b34);
E_589824(h_2_attn_c_proj_weight, adam_lr, adam_m_h_2_attn_c_proj_weight, b33, adam_v_h_2_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b150, b119, b146);
r_4_12_64_64_64n3(b119, b112, b146);
r_3072_64n3(b180, b119, b115, b116, b117);
r_3072_64n4(b149, b115, b116, b119, b117, b180);
E_48_64_64(b146, b7, b115, b116, b119, b117, b180, b118, b149);
r_4_12_64_64_64n4(b119, b112, b146);
r_4_12_64_64_64n5(b115, b112, b146);
E_4_64_2304(b730, b150, b119, b115);
r2_2304_768_256(adam_m_h_2_attn_c_attn_weight, adam_v_h_2_attn_c_attn_weight, adam_b1, b29, b109, b730, adam_b2, b31);
r2_2304_256(adam_m_h_2_attn_c_attn_bias, adam_v_h_2_attn_c_attn_bias, adam_b1, b29, b730, adam_b2, b31);
r_256_768_2304(b735, h_2_attn_c_attn_weight, b730);
E_2304(h_2_attn_c_attn_bias, adam_lr, adam_m_h_2_attn_c_attn_bias, b33, adam_v_h_2_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_2_ln_1_weight, adam_v_h_2_ln_1_weight, adam_b1, b29, b103, b106, b108, b735, adam_b2, b31);
r2_768_256n1(adam_m_h_2_ln_1_bias, adam_v_h_2_ln_1_bias, adam_b1, b29, b735, adam_b2, b31);
r_256_768n2(b125, b103, b106, h_2_ln_1_weight, b735, b108, b107);
E_1769472(h_2_attn_c_attn_weight, adam_lr, adam_m_h_2_attn_c_attn_weight, b33, adam_v_h_2_attn_c_attn_weight, b34);
E_768(h_2_ln_1_bias, adam_lr, adam_m_h_2_ln_1_bias, b33, adam_v_h_2_ln_1_bias, b34);
r_256_768n3(b107, b108, h_2_ln_1_weight, b735, b103, b106, b125);
E_256_768n2(b740, b725, b108, h_2_ln_1_weight, b735, b103, b106, b125, b107);
r2_768_3072_256(adam_m_h_1_mlp_c_proj_weight, adam_v_h_1_mlp_c_proj_weight, adam_b1, b29, b102, b740, adam_b2, b31);
r2_768_256n1(adam_m_h_1_mlp_c_proj_bias, adam_v_h_1_mlp_c_proj_bias, adam_b1, b29, b740, adam_b2, b31);
r_256_3072_768n1(b745, b99, h_1_mlp_c_proj_weight, b740);
E_768(h_2_ln_1_weight, adam_lr, adam_m_h_2_ln_1_weight, b33, adam_v_h_2_ln_1_weight, b34);
E_768(h_1_mlp_c_proj_bias, adam_lr, adam_m_h_1_mlp_c_proj_bias, b33, adam_v_h_1_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_1_mlp_c_fc_weight, adam_v_h_1_mlp_c_fc_weight, adam_b1, b29, b96, b745, adam_b2, b31);
r2_3072_256(adam_m_h_1_mlp_c_fc_bias, adam_v_h_1_mlp_c_fc_bias, adam_b1, b29, b745, adam_b2, b31);
E_2359296(h_1_mlp_c_proj_weight, adam_lr, adam_m_h_1_mlp_c_proj_weight, b33, adam_v_h_1_mlp_c_proj_weight, b34);
r_256_768_3072n1(b750, h_1_mlp_c_fc_weight, b745);
E_3072(h_1_mlp_c_fc_bias, adam_lr, adam_m_h_1_mlp_c_fc_bias, b33, adam_v_h_1_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_1_ln_2_weight, adam_v_h_1_ln_2_weight, adam_b1, b29, b90, b93, b95, b750, adam_b2, b31);
r2_768_256n1(adam_m_h_1_ln_2_bias, adam_v_h_1_ln_2_bias, adam_b1, b29, b750, adam_b2, b31);
r_256_768n2(b107, b90, b93, h_1_ln_2_weight, b750, b95, b94);
E_2359296n1(h_1_mlp_c_fc_weight, adam_lr, adam_m_h_1_mlp_c_fc_weight, b33, adam_v_h_1_mlp_c_fc_weight, b34);
E_768(h_1_ln_2_bias, adam_lr, adam_m_h_1_ln_2_bias, b33, adam_v_h_1_ln_2_bias, b34);
r_256_768n3(b94, b95, h_1_ln_2_weight, b750, b90, b93, b107);
E_256_768n2(b755, b740, b95, h_1_ln_2_weight, b750, b90, b93, b107, b94);
r2_768_768_4_64(adam_m_h_1_attn_c_proj_weight, adam_v_h_1_attn_c_proj_weight, adam_b1, b29, b89, b755, adam_b2, b31);
r2_768_256n1(adam_m_h_1_attn_c_proj_bias, adam_v_h_1_attn_c_proj_bias, adam_b1, b29, b755, adam_b2, b31);
r_256_768_768(b115, h_1_attn_c_proj_weight, b755);
E_768(h_1_ln_2_weight, adam_lr, adam_m_h_1_ln_2_weight, b33, adam_v_h_1_ln_2_weight, b34);
E_768(h_1_attn_c_proj_bias, adam_lr, adam_m_h_1_attn_c_proj_bias, b33, adam_v_h_1_attn_c_proj_bias, b34);
E_589824(h_1_attn_c_proj_weight, adam_lr, adam_m_h_1_attn_c_proj_weight, b33, adam_v_h_1_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b119, b88, b115);
r_4_12_64_64_64n3(b88, b81, b115);
r_3072_64n3(b149, b88, b84, b85, b86);
r_3072_64n4(b118, b84, b85, b88, b86, b149);
E_48_64_64(b115, b5, b84, b85, b88, b86, b149, b87, b118);
r_4_12_64_64_64n4(b88, b81, b115);
r_4_12_64_64_64n5(b84, b81, b115);
E_4_64_2304(b760, b119, b88, b84);
r2_2304_768_256(adam_m_h_1_attn_c_attn_weight, adam_v_h_1_attn_c_attn_weight, adam_b1, b29, b78, b760, adam_b2, b31);
r2_2304_256(adam_m_h_1_attn_c_attn_bias, adam_v_h_1_attn_c_attn_bias, adam_b1, b29, b760, adam_b2, b31);
r_256_768_2304(b765, h_1_attn_c_attn_weight, b760);
E_2304(h_1_attn_c_attn_bias, adam_lr, adam_m_h_1_attn_c_attn_bias, b33, adam_v_h_1_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_1_ln_1_weight, adam_v_h_1_ln_1_weight, adam_b1, b29, b72, b75, b77, b765, adam_b2, b31);
r2_768_256n1(adam_m_h_1_ln_1_bias, adam_v_h_1_ln_1_bias, adam_b1, b29, b765, adam_b2, b31);
r_256_768n2(b94, b72, b75, h_1_ln_1_weight, b765, b77, b76);
E_1769472(h_1_attn_c_attn_weight, adam_lr, adam_m_h_1_attn_c_attn_weight, b33, adam_v_h_1_attn_c_attn_weight, b34);
E_768(h_1_ln_1_bias, adam_lr, adam_m_h_1_ln_1_bias, b33, adam_v_h_1_ln_1_bias, b34);
r_256_768n3(b76, b77, h_1_ln_1_weight, b765, b72, b75, b94);
E_256_768n2(b770, b755, b77, h_1_ln_1_weight, b765, b72, b75, b94, b76);
r2_768_3072_256(adam_m_h_0_mlp_c_proj_weight, adam_v_h_0_mlp_c_proj_weight, adam_b1, b29, b71, b770, adam_b2, b31);
r2_768_256n1(adam_m_h_0_mlp_c_proj_bias, adam_v_h_0_mlp_c_proj_bias, adam_b1, b29, b770, adam_b2, b31);
r_256_3072_768n1(b775, b68, h_0_mlp_c_proj_weight, b770);
E_768(h_1_ln_1_weight, adam_lr, adam_m_h_1_ln_1_weight, b33, adam_v_h_1_ln_1_weight, b34);
E_768(h_0_mlp_c_proj_bias, adam_lr, adam_m_h_0_mlp_c_proj_bias, b33, adam_v_h_0_mlp_c_proj_bias, b34);
r2_3072_768_256(adam_m_h_0_mlp_c_fc_weight, adam_v_h_0_mlp_c_fc_weight, adam_b1, b29, b65, b775, adam_b2, b31);
r2_3072_256(adam_m_h_0_mlp_c_fc_bias, adam_v_h_0_mlp_c_fc_bias, adam_b1, b29, b775, adam_b2, b31);
E_2359296(h_0_mlp_c_proj_weight, adam_lr, adam_m_h_0_mlp_c_proj_weight, b33, adam_v_h_0_mlp_c_proj_weight, b34);
r_256_768_3072n1(b780, h_0_mlp_c_fc_weight, b775);
E_3072(h_0_mlp_c_fc_bias, adam_lr, adam_m_h_0_mlp_c_fc_bias, b33, adam_v_h_0_mlp_c_fc_bias, b34);
r2_768_256(adam_m_h_0_ln_2_weight, adam_v_h_0_ln_2_weight, adam_b1, b29, b59, b62, b64, b780, adam_b2, b31);
r2_768_256n1(adam_m_h_0_ln_2_bias, adam_v_h_0_ln_2_bias, adam_b1, b29, b780, adam_b2, b31);
r_256_768n2(b76, b59, b62, h_0_ln_2_weight, b780, b64, b63);
E_2359296n1(h_0_mlp_c_fc_weight, adam_lr, adam_m_h_0_mlp_c_fc_weight, b33, adam_v_h_0_mlp_c_fc_weight, b34);
E_768(h_0_ln_2_bias, adam_lr, adam_m_h_0_ln_2_bias, b33, adam_v_h_0_ln_2_bias, b34);
r_256_768n3(b63, b64, h_0_ln_2_weight, b780, b59, b62, b76);
E_256_768n2(b785, b770, b64, h_0_ln_2_weight, b780, b59, b62, b76, b63);
r2_768_768_4_64(adam_m_h_0_attn_c_proj_weight, adam_v_h_0_attn_c_proj_weight, adam_b1, b29, b58, b785, adam_b2, b31);
r2_768_256n1(adam_m_h_0_attn_c_proj_bias, adam_v_h_0_attn_c_proj_bias, adam_b1, b29, b785, adam_b2, b31);
r_256_768_768(b84, h_0_attn_c_proj_weight, b785);
E_768(h_0_ln_2_weight, adam_lr, adam_m_h_0_ln_2_weight, b33, adam_v_h_0_ln_2_weight, b34);
E_768(h_0_attn_c_proj_bias, adam_lr, adam_m_h_0_attn_c_proj_bias, b33, adam_v_h_0_attn_c_proj_bias, b34);
E_589824(h_0_attn_c_proj_weight, adam_lr, adam_m_h_0_attn_c_proj_weight, b33, adam_v_h_0_attn_c_proj_weight, b34);
r_4_12_64_64_64n2(b88, b57, b84);
r_4_12_64_64_64n3(b57, b50, b84);
r_3072_64n3(b118, b57, b53, b54, b55);
r_3072_64n4(b87, b53, b54, b57, b55, b118);
E_48_64_64(b84, b3, b53, b54, b57, b55, b118, b56, b87);
r_4_12_64_64_64n4(b57, b50, b84);
r_4_12_64_64_64n5(b53, b50, b84);
E_4_64_2304(b790, b88, b57, b53);
r2_2304_768_256(adam_m_h_0_attn_c_attn_weight, adam_v_h_0_attn_c_attn_weight, adam_b1, b29, b47, b790, adam_b2, b31);
r2_2304_256(adam_m_h_0_attn_c_attn_bias, adam_v_h_0_attn_c_attn_bias, adam_b1, b29, b790, adam_b2, b31);
r_256_768_2304(b795, h_0_attn_c_attn_weight, b790);
E_2304(h_0_attn_c_attn_bias, adam_lr, adam_m_h_0_attn_c_attn_bias, b33, adam_v_h_0_attn_c_attn_bias, b34);
r2_768_256(adam_m_h_0_ln_1_weight, adam_v_h_0_ln_1_weight, adam_b1, b29, b40, b44, b46, b795, adam_b2, b31);
r2_768_256n1(adam_m_h_0_ln_1_bias, adam_v_h_0_ln_1_bias, adam_b1, b29, b795, adam_b2, b31);
r_256_768n2(b63, b40, b44, h_0_ln_1_weight, b795, b46, b45);
E_1769472(h_0_attn_c_attn_weight, adam_lr, adam_m_h_0_attn_c_attn_weight, b33, adam_v_h_0_attn_c_attn_weight, b34);
E_768(h_0_ln_1_bias, adam_lr, adam_m_h_0_ln_1_bias, b33, adam_v_h_0_ln_1_bias, b34);
r_256_768n3(b45, b46, h_0_ln_1_weight, b795, b40, b44, b63);
E_256_768n2(b53, b785, b46, h_0_ln_1_weight, b795, b40, b44, b63, b45);
E_768(h_0_ln_1_weight, adam_lr, adam_m_h_0_ln_1_weight, b33, adam_v_h_0_ln_1_weight, b34);
r_49152_4(b800, b53);
r_50257_768_256(b801, X, wte_arange, b53);
r2_1024_768_64(adam_m_wpe_weight, adam_v_wpe_weight, adam_b1, b29, b2, wpe_arange, b800, adam_b2, b31);
r_50257_768_256n1(grad_lm_head_weight, b419, b429, b801);
E_786432n1(wpe_weight, adam_lr, adam_m_wpe_weight, b33, adam_v_wpe_weight, b34);
E_38597376(adam_m_lm_head_weight, adam_b1, b29, grad_lm_head_weight);
E_38597376n1(adam_v_lm_head_weight, adam_b2, b31, grad_lm_head_weight);
E_38597376n2(lm_head_weight, adam_lr, adam_m_lm_head_weight, b33, adam_v_lm_head_weight, b34);
}
@michaelskyba
Copy link

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment