{"version":"1.1.0","chapters":[{"startTime":32.0,"title":"Background"},{"startTime":56.0,"title":"Prompting LLMs with direct queries"},{"startTime":220.0,"title":"Prompting LLMs with flattery and dialogue"},{"startTime":305.0,"title":"Simulator Theory"},{"startTime":543.0,"title":"The limits of flattery"},{"startTime":736.0,"title":"Derrida — il n'y a pas de hors-texte"},{"startTime":917.0,"title":"The Waluigi Effect"},{"startTime":1020.0,"title":"(1) Rules are meant to be broken."},{"startTime":1185.0,"title":"(2) Traits are complex, valences are simple."},{"startTime":1323.0,"title":"(3) Structuralist narratology"},{"startTime":1542.0,"title":"Superpositions will typically collapse to waluigis"},{"startTime":1668.0,"title":"Evidence from Microsoft Sydney"},{"startTime":1722.0,"title":"Waluigis after RLHF"},{"startTime":1879.0,"title":"(2) Empirical evidence from Perez et al."},{"startTime":1991.0,"title":"(3) RLHF promotes mode-collapse"},{"startTime":2422.0,"title":"Conclusion"}]}