SQL
select date,content_id,rec_queue_name_,
-- w:word,到:结束,选第一个
regexp_extract(candidate_key_,'id=(\\w+):',1) as candidate_key,count(1) as exposure
from events
where date >= '2021-10-27' and date <= '2021-11-01'
and event = 'imp_detail_page'
and channel = 'ssss_popular'
and content_id = '7ebc54c538a745fcb898d08344dbf91f'
and (rec_queue_name_ = 'zili-recent-liked-eges-cold-start-exploit-queue'
or rec_queue_name_ = 'zili-recent-related-exploit-queue-v97')
group by date,content_id,rec_queue_name_,regexp_extract(candidate_key_,'id=(\\w+):',1)
python
A(.*?)B 表示截取 A/B 中间的字符串
import re
txt = 'inews_panipuri:queue:type=eges_explicit_second_coldstart_v3:when=hour:id=10e7e8fa6c5644b6b929a6b8a93f8bba:language=en:ordering=sim'
bs64_str = re.findall("id=(.*?):", txt)[0]
print(bs64_str)
def get_id(txt):
try:
bs64_str = re.findall("id=(.*?):", txt)[0]
except Exception as e:
print(e)
print(txt)
data['id'] = data['candidate_key'].apply(lambda x:get_id(x))
data.head()