Qifan Zhang
commited on
Commit
Β·
0e97d35
1
Parent(s):
8cd5cbf
update p2_flexibility, ui
Browse files- .gitignore +4 -2
- app.py +9 -11
- description.txt β data/description.txt +0 -0
- data/example.csv +10 -0
- utils/models.py +4 -4
- utils/pipeline.py +20 -5
.gitignore
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
-
data
|
| 2 |
.idea
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
.idea
|
| 2 |
+
data/example
|
| 3 |
+
data/tmp
|
| 4 |
+
|
| 5 |
+
output.csv
|
app.py
CHANGED
|
@@ -50,18 +50,13 @@ def process(task_name: str,
|
|
| 50 |
return {'Error': e}, None, None
|
| 51 |
|
| 52 |
|
|
|
|
| 53 |
task_name_dropdown = gr.components.Dropdown(
|
| 54 |
label='Task Name',
|
| 55 |
value='Originality',
|
| 56 |
choices=['Originality', 'Flexibility']
|
| 57 |
)
|
| 58 |
|
| 59 |
-
model_name_input = gr.components.Textbox(
|
| 60 |
-
value='paraphrase-multilingual-MiniLM-L12-v2',
|
| 61 |
-
lines=1,
|
| 62 |
-
type='text'
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
model_name_dropdown = gr.components.Dropdown(
|
| 66 |
label='Model Name',
|
| 67 |
value=list_models[0],
|
|
@@ -69,11 +64,16 @@ model_name_dropdown = gr.components.Dropdown(
|
|
| 69 |
)
|
| 70 |
|
| 71 |
text_input = gr.components.Textbox(
|
| 72 |
-
value='
|
| 73 |
lines=10,
|
| 74 |
type='text'
|
| 75 |
)
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
text_output = gr.components.Textbox(
|
| 78 |
label='Output',
|
| 79 |
type='text'
|
|
@@ -83,16 +83,14 @@ dataframe_output = gr.components.Dataframe(
|
|
| 83 |
label='DataFrame'
|
| 84 |
)
|
| 85 |
|
| 86 |
-
description = open('description.txt', 'r').read()
|
| 87 |
-
|
| 88 |
file_output = gr.components.File(label='Output File',
|
| 89 |
file_count='single',
|
| 90 |
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
| 91 |
|
| 92 |
app = gr.Interface(
|
| 93 |
fn=process,
|
| 94 |
-
inputs=[task_name_dropdown, model_name_dropdown, text_input,
|
| 95 |
outputs=[text_output, dataframe_output, file_output],
|
| 96 |
-
description=description
|
| 97 |
)
|
| 98 |
app.launch()
|
|
|
|
| 50 |
return {'Error': e}, None, None
|
| 51 |
|
| 52 |
|
| 53 |
+
# input
|
| 54 |
task_name_dropdown = gr.components.Dropdown(
|
| 55 |
label='Task Name',
|
| 56 |
value='Originality',
|
| 57 |
choices=['Originality', 'Flexibility']
|
| 58 |
)
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
model_name_dropdown = gr.components.Dropdown(
|
| 61 |
label='Model Name',
|
| 62 |
value=list_models[0],
|
|
|
|
| 64 |
)
|
| 65 |
|
| 66 |
text_input = gr.components.Textbox(
|
| 67 |
+
value=open('data/example.csv', 'r').read(),
|
| 68 |
lines=10,
|
| 69 |
type='text'
|
| 70 |
)
|
| 71 |
|
| 72 |
+
# output
|
| 73 |
+
file_input = gr.components.File(label='Input File',
|
| 74 |
+
file_count='single',
|
| 75 |
+
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
| 76 |
+
|
| 77 |
text_output = gr.components.Textbox(
|
| 78 |
label='Output',
|
| 79 |
type='text'
|
|
|
|
| 83 |
label='DataFrame'
|
| 84 |
)
|
| 85 |
|
|
|
|
|
|
|
| 86 |
file_output = gr.components.File(label='Output File',
|
| 87 |
file_count='single',
|
| 88 |
file_types=['', '.', '.csv', '.xls', '.xlsx'])
|
| 89 |
|
| 90 |
app = gr.Interface(
|
| 91 |
fn=process,
|
| 92 |
+
inputs=[task_name_dropdown, model_name_dropdown, text_input, file_input],
|
| 93 |
outputs=[text_output, dataframe_output, file_output],
|
| 94 |
+
description=open('data/description.txt', 'r').read()
|
| 95 |
)
|
| 96 |
app.launch()
|
description.txt β data/description.txt
RENAMED
|
File without changes
|
data/example.csv
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
id,prompt,response
|
| 2 |
+
1,εΊε,θΏζ»€ε¨
|
| 3 |
+
1,εΊε,εζζΈη½ζι±Ό
|
| 4 |
+
1,εΊε,εζζ倴
|
| 5 |
+
1,ηε·,ζ
δΊΊ
|
| 6 |
+
1,ηε·,η¨δ½ιζ
|
| 7 |
+
1,ηε·,ε½ι£ιζ
|
| 8 |
+
2,εΊε,εθ’«ε
|
| 9 |
+
2,εΊε,δΏζ
|
| 10 |
+
2,εΊε,η»ε¨ζ δΈεζεεΊ
|
utils/models.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import torch
|
| 3 |
from functools import lru_cache
|
|
|
|
|
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
|
| 6 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
@@ -20,6 +20,6 @@ class SBert:
|
|
| 20 |
self.model = SentenceTransformer(path, device=DEVICE)
|
| 21 |
|
| 22 |
@lru_cache(maxsize=10000)
|
| 23 |
-
def __call__(self, x) ->
|
| 24 |
-
y = self.model.encode(x)
|
| 25 |
return y
|
|
|
|
|
|
|
|
|
|
| 1 |
from functools import lru_cache
|
| 2 |
+
|
| 3 |
+
import torch
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
|
| 6 |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
|
| 20 |
self.model = SentenceTransformer(path, device=DEVICE)
|
| 21 |
|
| 22 |
@lru_cache(maxsize=10000)
|
| 23 |
+
def __call__(self, x) -> torch.Tensor:
|
| 24 |
+
y = self.model.encode(x, convert_to_tensor=True)
|
| 25 |
return y
|
utils/pipeline.py
CHANGED
|
@@ -9,22 +9,37 @@ def p0_originality(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
|
| 9 |
assert 'response' in df.columns
|
| 10 |
model = SBert(model_name)
|
| 11 |
|
| 12 |
-
def get_cos_sim(
|
| 13 |
prompt_vec = model(prompt)
|
| 14 |
response_vec = model(response)
|
| 15 |
score = cos_sim(prompt_vec, response_vec).item()
|
| 16 |
return score
|
| 17 |
|
| 18 |
-
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(
|
| 19 |
return df
|
| 20 |
|
| 21 |
|
| 22 |
def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
| 23 |
-
|
|
|
|
| 24 |
assert 'id' in df.columns
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
df_out = df.groupby(by=['id', 'prompt']) \
|
| 26 |
-
.agg({'id': 'first', 'prompt': 'first', '
|
| 27 |
-
.rename(columns={'
|
| 28 |
.reset_index(drop=True)
|
| 29 |
return df_out
|
| 30 |
|
|
|
|
| 9 |
assert 'response' in df.columns
|
| 10 |
model = SBert(model_name)
|
| 11 |
|
| 12 |
+
def get_cos_sim(prompt: str, response: str) -> float:
|
| 13 |
prompt_vec = model(prompt)
|
| 14 |
response_vec = model(response)
|
| 15 |
score = cos_sim(prompt_vec, response_vec).item()
|
| 16 |
return score
|
| 17 |
|
| 18 |
+
df['originality'] = df.apply(lambda x: 1 - get_cos_sim(x['prompt'], x['response']), axis=1)
|
| 19 |
return df
|
| 20 |
|
| 21 |
|
| 22 |
def p1_flexibility(df: pd.DataFrame, model_name: str) -> pd.DataFrame:
|
| 23 |
+
assert 'prompt' in df.columns
|
| 24 |
+
assert 'response' in df.columns
|
| 25 |
assert 'id' in df.columns
|
| 26 |
+
model = SBert(model_name)
|
| 27 |
+
|
| 28 |
+
def get_cos_sim(responses: list[str]) -> float:
|
| 29 |
+
responses_vec = [model(_) for _ in responses]
|
| 30 |
+
count = 0
|
| 31 |
+
score = 0
|
| 32 |
+
for i in range(len(responses_vec)):
|
| 33 |
+
for j in range(1, len(responses_vec)):
|
| 34 |
+
if i == j:
|
| 35 |
+
continue
|
| 36 |
+
score += cos_sim(responses_vec[i], responses_vec[j]).item()
|
| 37 |
+
count += 1
|
| 38 |
+
return score / count
|
| 39 |
+
|
| 40 |
df_out = df.groupby(by=['id', 'prompt']) \
|
| 41 |
+
.agg({'id': 'first', 'prompt': 'first', 'response': get_cos_sim}) \
|
| 42 |
+
.rename(columns={'response': 'flexibility'}) \
|
| 43 |
.reset_index(drop=True)
|
| 44 |
return df_out
|
| 45 |
|