<?xml version="1.0" encoding="utf-8" standalone="yes"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
  <channel>
    <title>Security on Cdani&#39;s Blog</title>
    <link>https://c-daniele.github.io/en/tags/security/</link>
    <description>Recent content in Security on Cdani&#39;s Blog</description>
    <generator>Hugo</generator>
    <language>en-US</language>
    <lastBuildDate>Thu, 15 May 2025 00:00:00 +0200</lastBuildDate>
    <atom:link href="https://c-daniele.github.io/en/tags/security/index.xml" rel="self" type="application/rss+xml" />
    <item>
      <title>Policy Puppetry Prompt Injection</title>
      <link>https://c-daniele.github.io/en/posts/2025-05-15-policy-puppetry/</link>
      <pubDate>Thu, 15 May 2025 00:00:00 +0200</pubDate>
      <guid>https://c-daniele.github.io/en/posts/2025-05-15-policy-puppetry/</guid>
      <description>&lt;h1 id=&#34;policy-puppetry-prompt-injection&#34;&gt;Policy Puppetry Prompt Injection&lt;/h1&gt;&#xA;&lt;p&gt;A few days ago, I experimented with some Jailbreaking techniques, which I share in the &lt;a href=&#34;https://github.com/c-daniele/policy-puppetry&#34; target=&#34;_blank&#34; rel=&#34;noopener noreffer &#34;&gt;repo&lt;/a&gt;.&lt;br&gt;&#xA;I started from a &lt;a href=&#34;https://hiddenlayer.com/innovation-hub/novel-universal-bypass-for-all-major-llms/&#34; target=&#34;_blank&#34; rel=&#34;noopener noreffer &#34;&gt;HiddenLayer article&lt;/a&gt; published a few weeks ago, where the research team described a rather creative and ingenious &lt;strong&gt;jailbreaking&lt;/strong&gt; technique to bypass safety guardrails and the alignment of frontier models.&lt;br&gt;&#xA;The technique appears to be &lt;strong&gt;universal&lt;/strong&gt; and applicable with a &lt;strong&gt;single prompt&lt;/strong&gt; to multiple models, capable of revealing typically unsafe content or even portions of the native system prompt.&lt;/p&gt;</description>
    </item>
  </channel>
</rss>
