Qifan Zhang
commited on
Commit
·
e691ea0
1
Parent(s):
8f29b1d
fear: add log
Browse files- app.py +11 -0
- requirements.txt +2 -0
- utils/models.py +4 -4
app.py
CHANGED
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
| 7 |
|
| 8 |
from utils import pipeline
|
| 9 |
from utils.models import list_models
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
def read_data(filepath: str) -> Optional[pd.DataFrame]:
|
|
@@ -27,6 +28,7 @@ def process(
|
|
| 27 |
file=None,
|
| 28 |
) -> (None, pd.DataFrame, str):
|
| 29 |
try:
|
|
|
|
| 30 |
# load file
|
| 31 |
if file:
|
| 32 |
df = read_data(file.name)
|
|
@@ -51,6 +53,15 @@ def process(
|
|
| 51 |
return None, df.iloc[:10], path
|
| 52 |
|
| 53 |
except:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
return {'Info': 'Something wrong', 'Error': traceback.format_exc()}, None, None
|
| 55 |
|
| 56 |
|
|
|
|
| 7 |
|
| 8 |
from utils import pipeline
|
| 9 |
from utils.models import list_models
|
| 10 |
+
from loguru import logger
|
| 11 |
|
| 12 |
|
| 13 |
def read_data(filepath: str) -> Optional[pd.DataFrame]:
|
|
|
|
| 28 |
file=None,
|
| 29 |
) -> (None, pd.DataFrame, str):
|
| 30 |
try:
|
| 31 |
+
logger.info(f'Processing {task_name} with {model_name} and {pooling}')
|
| 32 |
# load file
|
| 33 |
if file:
|
| 34 |
df = read_data(file.name)
|
|
|
|
| 53 |
return None, df.iloc[:10], path
|
| 54 |
|
| 55 |
except:
|
| 56 |
+
error = traceback.format_exc()
|
| 57 |
+
logger.warning({
|
| 58 |
+
'error': error,
|
| 59 |
+
'task_name': task_name,
|
| 60 |
+
'model_name': model_name,
|
| 61 |
+
'pooling': pooling,
|
| 62 |
+
'text': text,
|
| 63 |
+
'file': file,
|
| 64 |
+
})
|
| 65 |
return {'Info': 'Something wrong', 'Error': traceback.format_exc()}, None, None
|
| 66 |
|
| 67 |
|
requirements.txt
CHANGED
|
@@ -8,3 +8,5 @@ sentence-transformers
|
|
| 8 |
openpyxl
|
| 9 |
tabulate
|
| 10 |
gradio
|
|
|
|
|
|
|
|
|
| 8 |
openpyxl
|
| 9 |
tabulate
|
| 10 |
gradio
|
| 11 |
+
loguru
|
| 12 |
+
|
utils/models.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from functools import lru_cache
|
| 2 |
|
| 3 |
import torch
|
|
|
|
| 4 |
from sentence_transformers import SentenceTransformer
|
| 5 |
from transformers import AutoTokenizer, AutoModel
|
| 6 |
|
|
@@ -19,10 +20,8 @@ list_models = [
|
|
| 19 |
|
| 20 |
class SBert:
|
| 21 |
def __init__(self, path):
|
| 22 |
-
print(f'Loading model from {path} ...')
|
| 23 |
self.model = SentenceTransformer(path, device=DEVICE)
|
| 24 |
-
|
| 25 |
-
# pprint(self.model.__dict__)
|
| 26 |
|
| 27 |
@lru_cache(maxsize=10000)
|
| 28 |
def __call__(self, x) -> torch.Tensor:
|
|
@@ -34,8 +33,9 @@ class ModelWithPooling:
|
|
| 34 |
def __init__(self, path):
|
| 35 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
| 36 |
self.model = AutoModel.from_pretrained(path)
|
|
|
|
| 37 |
|
| 38 |
-
@lru_cache(maxsize=
|
| 39 |
@torch.no_grad()
|
| 40 |
def __call__(self, text: str, pooling='mean'):
|
| 41 |
inputs = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt")
|
|
|
|
| 1 |
from functools import lru_cache
|
| 2 |
|
| 3 |
import torch
|
| 4 |
+
from loguru import logger
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
from transformers import AutoTokenizer, AutoModel
|
| 7 |
|
|
|
|
| 20 |
|
| 21 |
class SBert:
|
| 22 |
def __init__(self, path):
|
|
|
|
| 23 |
self.model = SentenceTransformer(path, device=DEVICE)
|
| 24 |
+
logger.info(f'Load {self.__class__} from {path} ...')
|
|
|
|
| 25 |
|
| 26 |
@lru_cache(maxsize=10000)
|
| 27 |
def __call__(self, x) -> torch.Tensor:
|
|
|
|
| 33 |
def __init__(self, path):
|
| 34 |
self.tokenizer = AutoTokenizer.from_pretrained(path)
|
| 35 |
self.model = AutoModel.from_pretrained(path)
|
| 36 |
+
logger.info(f'Load {self.__class__} from {path} ...')
|
| 37 |
|
| 38 |
+
@lru_cache(maxsize=100)
|
| 39 |
@torch.no_grad()
|
| 40 |
def __call__(self, text: str, pooling='mean'):
|
| 41 |
inputs = self.tokenizer(text, padding=True, truncation=True, return_tensors="pt")
|