# artificial-adversary

# 简介

  • 在对用户生成的文本进行分类时,用户可以通过多种方式修改其内容以避免被检测到。这些方法通常是对文本进行表面修改,即更改所使用的原始字符或单词,但保留足够明显的原始含义以供人类读者理解。这些方法包括用相似的字符替换字符、删除或添加标点和空格以及交换单词中的字母。例如,这 please wire me 10,000 US DOLLARS to bank of scamland 可能是一条明显的诈骗信息,但 pl3@se.wire me 10000 US DoLars to,BANK of ScamIand 可以欺骗许多分类器。
  • 安装:

1
2
pip install Adversary
python -m textblob.download_corpora

  • 用于数据集增强
  • 对于性能界限

# test_all.py 代码分析

# test_all.py 源码

test_adversary.pytest_attacks.pytest_utils.py 三个代码三合一后,我创建了 test_all.py 代码,源码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from Adversary.adversary import Adversary
from Adversary.attacks import *
from Adversary.utils import *

def test_generate_single_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)
assert(len(g) == 3)

def test_generate_many_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts, text_sample_rate=5)
assert(len(g) == 15)

def test_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts, text_sample_rate=5)
assert (len(g) == 5000)

def test_attack():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)
assert(df_s is not None and df_m is not None)

def test_attack_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)
assert (df_s is not None and df_m is not None)

def test_num_to_word():
assert(num_to_word('1') == 'one')
assert(num_to_word('dog') == 'dog')

def test_flatten_unique():
l = [[1, 2], [1, 3, 4], [5]]
assert(flatten_unique(l) == [1, 2, 3, 4, 5])

def test_combinations_of_len():
l = [1, 2, 3]
assert(combinations_of_len(l, 2) == [(1,), (2,), (3,), (1, 2), (1, 3), (2, 3)])

def test_fancy_titles():
cols = ['change_case', 'insert_duplicate_characters', 'synonym']
assert(fancy_titles(cols) == ['Change Case', 'Insert Duplicate Characters', 'Synonym'])

# 删除 assert 语句的 test 代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from Adversary.adversary import Adversary
from Adversary.attacks import *
from Adversary.utils import *

def test_generate_single_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)

def test_generate_many_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts, text_sample_rate=5)

def test_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts, text_sample_rate=5)

def test_attack():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)

def test_attack_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)

def test_num_to_word():
pass

def test_flatten_unique():
l = [[1, 2], [1, 3, 4], [5]]

def test_combinations_of_len():
l = [1, 2, 3]

def test_fancy_titles():
cols = ['change_case', 'insert_duplicate_characters', 'synonym']

# LLM 自动填充 assert 语句后的 test 代码

**base prompt 为:** 该 tests case 缺少 assert 断言 请你自动填充它

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from Adversary.adversary import Adversary
from Adversary.attacks import *
from Adversary.utils import *

def test_generate_single_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)
assert len(g) == len(og_texts) # Example assertion, adjust as needed

def test_generate_many_iter():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts, text_sample_rate=5)
assert len(g) == len(og_texts) # Example assertion, adjust as needed

def test_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts, text_sample_rate=5)
assert len(g) == len(og_texts) # Example assertion, adjust as needed

def test_attack():
m = Adversary(verbose=True)
og_texts = [u'happy happy happy happy dog dog dog dog dog',
u'okay okay yeah here', 'tell me awful things']
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)
assert len(df_s) == len(og_texts) # Example assertion, adjust as needed
assert len(df_m) == len(og_texts) # Example assertion, adjust as needed

def test_attack_large():
m = Adversary(verbose=True)
og_texts = ['tell me awful things'] * 1000
g = m.generate(og_texts)
df_s, df_m = m.attack(og_texts, g, lambda x: 1 if x in og_texts else 0)
assert len(df_s) == len(og_texts) # Example assertion, adjust as needed
assert len(df_m) == len(og_texts) # Example assertion, adjust as needed

def test_num_to_word():
pass # Add assertions when implemented

def test_flatten_unique():
l = [[1, 2], [1, 3, 4], [5]]
flat_list = flatten_unique(l)
assert len(flat_list) == 5 # Example assertion, adjust as needed

def test_combinations_of_len():
l = [1, 2, 3]
combs = combinations_of_len(l, 2)
assert len(combs) == 3 # Example assertion, adjust as needed

def test_fancy_titles():
cols = ['change_case', 'insert_duplicate_characters', 'synonym']
titles = fancy_titles(cols)
assert len(titles) == len(cols) # Example assertion, adjust as needed