完成单元测试 | 更新架构
This commit is contained in:
parent
c27bb27ec3
commit
b811246fba
7
.gitignore
vendored
7
.gitignore
vendored
@ -6,4 +6,9 @@ node_modules
|
||||
__pycache__
|
||||
*.pyc
|
||||
*.pyd
|
||||
*.pyi
|
||||
*.pyi
|
||||
scripts/*.ts
|
||||
audio
|
||||
images
|
||||
logs
|
||||
*.sklearn
|
42
README.md
42
README.md
@ -32,13 +32,47 @@ pip install -r requirements.txt
|
||||
## 架构
|
||||
|
||||
```mermaid
|
||||
graph LR
|
||||
a(拉格朗日 NTQQ server) <-->|http,ws| b(onebot layer)
|
||||
graph TB
|
||||
core(Lagrage.Core)
|
||||
onebot(Lagrange.onebot)
|
||||
vecdb(vecdb)
|
||||
llm(LLM)
|
||||
intent(intent tree)
|
||||
|
||||
c(vecdb) -->|http| b
|
||||
d(LLM) -->|http| b
|
||||
core(Lagrange.Core) --> onebot(Lagrange.onebot)
|
||||
|
||||
onebot -->|query| intent
|
||||
intent -->|intent| onebot
|
||||
|
||||
subgraph Intent Recognition
|
||||
intent -->|query| vecdb
|
||||
vecdb -->|ktop| intent
|
||||
intent -->|ktop,query| llm
|
||||
llm -->|intent| intent
|
||||
end
|
||||
|
||||
subgraph execution
|
||||
onebot --> command{intent}
|
||||
command --> query
|
||||
command --> upload
|
||||
command --> ...
|
||||
end
|
||||
|
||||
subgraph third party
|
||||
LLM
|
||||
Google
|
||||
server
|
||||
end
|
||||
|
||||
query --> LLM
|
||||
query --> Google
|
||||
upload --> server
|
||||
```
|
||||
|
||||
- `Lagrange.onebot` --> 📁bot
|
||||
- `vecdb` --> 📁rag
|
||||
- `intent tree` --> 📁prompt
|
||||
|
||||
---
|
||||
|
||||
## 接口规范
|
||||
|
@ -1,453 +0,0 @@
|
||||
/**
|
||||
* @author 锦恢
|
||||
* @email 1193466151@qq.com
|
||||
* @description Lagrange.Core 前端接口
|
||||
* @comment 接口调用详细参考文档
|
||||
* - https://github.com/botuniverse/onebot-11/blob/master/communication/ws.md
|
||||
*/
|
||||
|
||||
import * as Lagrange from '../type';
|
||||
|
||||
/**
|
||||
* @description 发送私聊消息
|
||||
* @param user_id 对方 QQ 号
|
||||
* @param message 要发送的内容
|
||||
* @param auto_escape 消息内容是否作为纯文本发送(即不解析 CQ 码),只在 message 字段是字符串时有效
|
||||
*/
|
||||
export function sendPrivateMsg(user_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
|
||||
return {
|
||||
action: 'send_private_msg',
|
||||
params: { user_id, message, auto_escape }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 发送群消息
|
||||
* @param group_id 群号
|
||||
* @param message 要发送的内容
|
||||
* @param auto_escape 消息内容是否作为纯文本发送(即不解析 CQ 码),只在 message 字段是字符串时有效
|
||||
*/
|
||||
export function sendGroupMsg(group_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
|
||||
return {
|
||||
action: 'send_group_msg',
|
||||
params: { group_id, message, auto_escape }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 发送消息
|
||||
* @param message_type 消息类型,支持 private、group,分别对应私聊、群组,如不传入,则根据传入的 *_id 参数判断
|
||||
* @param user_id 对方 QQ 号(消息类型为 private 时需要)
|
||||
* @param group_id 群号(消息类型为 group 时需要)
|
||||
* @param message 要发送的内容
|
||||
* @param auto_escape 消息内容是否作为纯文本发送(即不解析 CQ 码),只在 message 字段是字符串时有效
|
||||
*/
|
||||
export function sendMsg(message_type: string, user_id: number, group_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
|
||||
return {
|
||||
action: 'send_msg',
|
||||
params: { message_type, user_id, group_id, message, auto_escape }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 撤回消息
|
||||
* @param message_id 消息 ID
|
||||
*/
|
||||
export function deleteMsg(message_id: number) {
|
||||
return {
|
||||
action: 'delete_msg',
|
||||
params: { message_id }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取消息
|
||||
* @param message_id 消息 ID
|
||||
*/
|
||||
export function getMsg(message_id: number) {
|
||||
return {
|
||||
action: 'get_msg',
|
||||
params: { message_id }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取合并转发消息
|
||||
* @param id 合并转发 ID
|
||||
*/
|
||||
export function getForwardMsg(id: string) {
|
||||
return {
|
||||
action: 'get_forward_msg',
|
||||
params: { id }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 发送好友赞
|
||||
* @param user_id 对方 QQ 号
|
||||
* @param times 赞的次数,每个好友每天最多 10 次
|
||||
*/
|
||||
export function sendLike(user_id: number, times: number = 1) {
|
||||
return {
|
||||
action: 'send_like',
|
||||
params: { user_id, times }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组踢人
|
||||
* @param group_id 群号
|
||||
* @param user_id 要踢的 QQ 号
|
||||
* @param reject_add_request 拒绝此人的加群请求
|
||||
*/
|
||||
export function setGroupKick(group_id: number, user_id: number, reject_add_request: boolean = false) {
|
||||
return {
|
||||
action: 'set_group_kick',
|
||||
params: { group_id, user_id, reject_add_request }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组单人禁言
|
||||
* @param group_id 群号
|
||||
* @param user_id 要禁言的 QQ 号
|
||||
* @param duration 禁言时长,单位秒,0 表示取消禁言
|
||||
*/
|
||||
export function setGroupBan(group_id: number, user_id: number, duration: number = 30 * 60) {
|
||||
return {
|
||||
action: 'set_group_ban',
|
||||
params: { group_id, user_id, duration }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组匿名用户禁言
|
||||
* @param group_id 群号
|
||||
* @param anonymous 可选,要禁言的匿名用户对象(群消息上报的 anonymous 字段)
|
||||
* @param anonymous_flag 可选,要禁言的匿名用户的 flag(需从群消息上报的数据中获得)
|
||||
* @param duration 禁言时长,单位秒,无法取消匿名用户禁言
|
||||
*/
|
||||
export function setGroupAnonymousBan(group_id: number, anonymous: object, anonymous_flag: string, duration: number = 30 * 60) {
|
||||
return {
|
||||
action: 'set_group_anonymous_ban',
|
||||
params: { group_id, anonymous, anonymous_flag, duration }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组全员禁言
|
||||
* @param group_id 群号
|
||||
* @param enable 是否禁言
|
||||
*/
|
||||
export function setGroupWholeBan(group_id: number, enable: boolean = true) {
|
||||
return {
|
||||
action: 'set_group_whole_ban',
|
||||
params: { group_id, enable }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组设置管理员
|
||||
* @param group_id 群号
|
||||
* @param user_id 要设置管理员的 QQ 号
|
||||
* @param enable true 为设置,false 为取消
|
||||
*/
|
||||
export function setGroupAdmin(group_id: number, user_id: number, enable: boolean = true) {
|
||||
return {
|
||||
action: 'set_group_admin',
|
||||
params: { group_id, user_id, enable }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 群组匿名
|
||||
* @param group_id 群号
|
||||
* @param enable 是否允许匿名聊天
|
||||
*/
|
||||
export function setGroupAnonymous(group_id: number, enable: boolean = true) {
|
||||
return {
|
||||
action: 'set_group_anonymous',
|
||||
params: { group_id, enable }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 设置群名片(群备注)
|
||||
* @param group_id 群号
|
||||
* @param user_id 要设置的 QQ 号
|
||||
* @param card 群名片内容,不填或空字符串表示删除群名片
|
||||
*/
|
||||
export function setGroupCard(group_id: number, user_id: number, card: string = "") {
|
||||
return {
|
||||
action: 'set_group_card',
|
||||
params: { group_id, user_id, card }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 设置群名
|
||||
* @param group_id 群号
|
||||
* @param group_name 新群名
|
||||
*/
|
||||
export function setGroupName(group_id: number, group_name: string) {
|
||||
return {
|
||||
action: 'set_group_name',
|
||||
params: { group_id, group_name }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 退出群组
|
||||
* @param group_id 群号
|
||||
* @param is_dismiss 是否解散,如果登录号是群主,则仅在此项为 true 时能够解散
|
||||
*/
|
||||
export function setGroupLeave(group_id: number, is_dismiss: boolean = false) {
|
||||
return {
|
||||
action: 'set_group_leave',
|
||||
params: { group_id, is_dismiss }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 设置群组专属头衔
|
||||
* @param group_id 群号
|
||||
* @param user_id 要设置的 QQ 号
|
||||
* @param special_title 专属头衔,不填或空字符串表示删除专属头衔
|
||||
* @param duration 专属头衔有效期,单位秒,-1 表示永久,不过此项似乎没有效果,可能是只有某些特殊的时间长度有效,有待测试
|
||||
*/
|
||||
export function setGroupSpecialTitle(group_id: number, user_id: number, special_title: string = "", duration: number = -1) {
|
||||
return {
|
||||
action: 'set_group_special_title',
|
||||
params: { group_id, user_id, special_title, duration }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 处理加好友请求
|
||||
* @param flag 加好友请求的 flag(需从上报的数据中获得)
|
||||
* @param approve 是否同意请求
|
||||
* @param remark 添加后的好友备注(仅在同意时有效)
|
||||
*/
|
||||
export function setFriendAddRequest(flag: string, approve: boolean = true, remark: string = "") {
|
||||
return {
|
||||
action: 'set_friend_add_request',
|
||||
params: { flag, approve, remark }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 处理加群请求/邀请
|
||||
* @param flag 加群请求的 flag(需从上报的数据中获得)
|
||||
* @param sub_type add 或 invite,请求类型(需要和上报消息中的 sub_type 字段相符)
|
||||
* @param approve 是否同意请求/邀请
|
||||
* @param reason 拒绝理由(仅在拒绝时有效)
|
||||
*/
|
||||
export function setGroupAddRequest(flag: string, sub_type: string, approve: boolean = true, reason: string = "") {
|
||||
return {
|
||||
action: 'set_group_add_request',
|
||||
params: { flag, sub_type, approve, reason }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取登录号信息
|
||||
*/
|
||||
export function getLoginInfo() {
|
||||
return {
|
||||
action: 'get_login_info',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取陌生人信息
|
||||
* @param user_id QQ 号
|
||||
* @param no_cache 是否不使用缓存(使用缓存可能更新不及时,但响应更快)
|
||||
*/
|
||||
export function getStrangerInfo(user_id: number, no_cache: boolean = false) {
|
||||
return {
|
||||
action: 'get_stranger_info',
|
||||
params: { user_id, no_cache }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取好友列表
|
||||
*/
|
||||
export function getFriendList() {
|
||||
return {
|
||||
action: 'get_friend_list',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取群信息
|
||||
* @param group_id 群号
|
||||
* @param no_cache 是否不使用缓存(使用缓存可能更新不及时,但响应更快)
|
||||
*/
|
||||
export function getGroupInfo(group_id: number, no_cache: boolean = false) {
|
||||
return {
|
||||
action: 'get_group_info',
|
||||
params: { group_id, no_cache }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取群列表
|
||||
*/
|
||||
export function getGroupList() {
|
||||
return {
|
||||
action: 'get_group_list',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取群成员信息
|
||||
* @param group_id 群号
|
||||
* @param user_id QQ 号
|
||||
* @param no_cache 是否不使用缓存(使用缓存可能更新不及时,但响应更快)
|
||||
*/
|
||||
export function getGroupMemberInfo(group_id: number, user_id: number, no_cache: boolean = false) {
|
||||
return {
|
||||
action: 'get_group_member_info',
|
||||
params: { group_id, user_id, no_cache }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取群成员列表
|
||||
* @param group_id 群号
|
||||
*/
|
||||
export function getGroupMemberList(group_id: number) {
|
||||
return {
|
||||
action: 'get_group_member_list',
|
||||
params: { group_id }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取群荣誉信息
|
||||
* @param group_id 群号
|
||||
* @param type 要获取的群荣誉类型,可传入 talkative performer legend strong_newbie emotion 以分别获取单个类型的群荣誉数据,或传入 all 获取所有数据
|
||||
*/
|
||||
export function getGroupHonorInfo(group_id: number, type: string) {
|
||||
return {
|
||||
action: 'get_group_honor_info',
|
||||
params: { group_id, type }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取 Cookies
|
||||
* @param domain 需要获取 cookies 的域名
|
||||
*/
|
||||
export function getCookies(domain: string = "") {
|
||||
return {
|
||||
action: 'get_cookies',
|
||||
params: { domain }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取 CSRF Token
|
||||
*/
|
||||
export function getCsrfToken() {
|
||||
return {
|
||||
action: 'get_csrf_token',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取 QQ 相关接口凭证
|
||||
*/
|
||||
export function getCredentials() {
|
||||
return {
|
||||
action: 'get_credentials',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取语音
|
||||
*/
|
||||
export function getRecord() {
|
||||
return {
|
||||
action: 'get_record',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取图片
|
||||
* @param file 收到的图片文件名(消息段的 file 参数),如 6B4DE3DFD1BD271E3297859D41C530F5.jpg
|
||||
*/
|
||||
export function getImage(file: string) {
|
||||
return {
|
||||
action: 'get_image',
|
||||
params: { file }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 检查是否可以发送图片
|
||||
*/
|
||||
export function canSendImage() {
|
||||
return {
|
||||
action: 'can_send_image',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 检查是否可以发送语音
|
||||
*/
|
||||
export function canSendRecord() {
|
||||
return {
|
||||
action: 'can_send_record',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取运行状态
|
||||
*/
|
||||
export function getStatus() {
|
||||
return {
|
||||
action: 'get_status',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 获取版本信息
|
||||
*/
|
||||
export function getVersionInfo() {
|
||||
return {
|
||||
action: 'get_version_info',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 重启 OneBot 实现
|
||||
*/
|
||||
export function setRestart() {
|
||||
return {
|
||||
action: 'set_restart',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @description 清理缓存
|
||||
*/
|
||||
export function cleanCache() {
|
||||
return {
|
||||
action: 'clean_cache',
|
||||
params: { }
|
||||
};
|
||||
}
|
||||
|
@ -13,6 +13,16 @@ export const apiQueryVecdb = (req: apiQueryVecdbRequest) => r<CommonResponse<api
|
||||
data: req
|
||||
});
|
||||
|
||||
|
||||
export const apiGetIntentRecogition = (req: apiGetIntentRecogitionRequest) => r<CommonResponse<apiGetIntentRecogitionData>>({
|
||||
url: '/intent/get-intent-recogition', method: 'POST',
|
||||
data: req
|
||||
});
|
||||
|
||||
export const apiIntentRetrain = (req: apiIntentRetrainRequest) => r<CommonResponse<apiIntentRetrainData>>({
|
||||
url: '/intent/retrain-embedding-mapping', method: 'POST',
|
||||
});
|
||||
|
||||
export interface apiQueryVecdbRequest {
|
||||
query: string,
|
||||
k?: number
|
||||
@ -28,4 +38,19 @@ export interface apiQueryVecdbDataItem {
|
||||
}
|
||||
}
|
||||
|
||||
export type apiQueryVecdbData = apiQueryVecdbDataItem[];
|
||||
export type apiQueryVecdbData = apiQueryVecdbDataItem[];
|
||||
|
||||
|
||||
export interface apiGetIntentRecogitionRequest {
|
||||
query: string
|
||||
}
|
||||
|
||||
export interface apiGetIntentRecogitionData {
|
||||
id: number,
|
||||
name: string
|
||||
}
|
||||
|
||||
export interface apiIntentRetrainRequest {
|
||||
}
|
||||
|
||||
export type apiIntentRetrainData = string;
|
0
bot/digital-ide.ts
Normal file
0
bot/digital-ide.ts
Normal file
63
bot/event.ts
63
bot/event.ts
@ -1,63 +0,0 @@
|
||||
import lagrangeMapper from './lagrange-mapping';
|
||||
|
||||
import type * as Lagrange from './type';
|
||||
import type { LagrangeContext } from './context';
|
||||
|
||||
class Pipe {
|
||||
context: LagrangeContext | undefined;
|
||||
send: Lagrange.SendApi | undefined;
|
||||
public injectContext(context: LagrangeContext) {
|
||||
this.context = context;
|
||||
this.send = context.send.bind(context);
|
||||
}
|
||||
|
||||
public run(message: Lagrange.Message) {
|
||||
switch (message.post_type) {
|
||||
case 'message': this.messagePipe(message); break;
|
||||
case 'notice': this.noticePipe(message); break;
|
||||
case 'request':this.requestPipe(message); break;
|
||||
default: break;
|
||||
}
|
||||
}
|
||||
|
||||
// 处理 message 类型的 post_type 消息
|
||||
public messagePipe(message: Lagrange.MessagePostType) {
|
||||
switch (message.message_type) {
|
||||
case 'private':
|
||||
lagrangeMapper.resolvePrivateUser(message, this.send);
|
||||
break;
|
||||
case 'group':
|
||||
lagrangeMapper.resolveGroup(message, this.send);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// 处理 notice 类型的 post_type 消息
|
||||
public noticePipe(message: Lagrange.NoticePostType) {
|
||||
|
||||
}
|
||||
|
||||
// 处理 request 类型的 post_type 消息
|
||||
public requestPipe(message: Lagrange.RequestPostType) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
export const pipe = new Pipe();
|
||||
|
||||
export function onMessage(event: Buffer) {
|
||||
const messageBuffer = event.toString('utf-8');
|
||||
const messageJson = JSON.parse(messageBuffer) as Lagrange.Message;
|
||||
// 忽略系统 message
|
||||
if (messageJson.post_type !== 'meta_event') {
|
||||
console.log('进入 runPipe');
|
||||
pipe.run(messageJson);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export function onClose() {
|
||||
console.log('服务器连接关闭');
|
||||
}
|
15
bot/impl.ts
15
bot/impl.ts
@ -1,15 +0,0 @@
|
||||
import lagrangeMapper from './lagrange-mapping';
|
||||
import { apiQueryVecdb } from './api/vecdb';
|
||||
|
||||
import type * as Lagrange from './type';
|
||||
|
||||
|
||||
export class Impl {
|
||||
|
||||
@lagrangeMapper.onPrivateUser(1193466151)
|
||||
async handleJinhui(c: Lagrange.PrivateUserInvokeContext) {
|
||||
console.log('raw message:' + c.message.raw_message);
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -1,95 +0,0 @@
|
||||
import assert from 'assert';
|
||||
|
||||
import type * as Lagrange from './type';
|
||||
|
||||
type PrivateUserInvoker = (context: Lagrange.PrivateUserInvokeContext) => Lagrange.Thenable<undefined | void | string | Lagrange.Send.Default>;
|
||||
type GroupUserInvoker = (context: Lagrange.GroupUserInvokeContext) => Lagrange.Thenable<undefined | void | string | Lagrange.Send.Default>;
|
||||
|
||||
type MessageInvoker = PrivateUserInvoker | GroupUserInvoker;
|
||||
|
||||
interface CustomDescriptor<T extends MessageInvoker> {
|
||||
value?: T;
|
||||
configurable?: boolean;
|
||||
enumerable?: boolean;
|
||||
writable?: boolean;
|
||||
get?(): any;
|
||||
set?(v: any): void;
|
||||
}
|
||||
|
||||
interface MessageInvokerStorage<T extends MessageInvoker> {
|
||||
invoker: T;
|
||||
config?: Partial<Lagrange.CommonMessage>
|
||||
}
|
||||
|
||||
class LagrangeMapper {
|
||||
private _privateUserStorage: Map<number, MessageInvokerStorage<PrivateUserInvoker>>;
|
||||
private _groupStorage: Map<number, MessageInvokerStorage<GroupUserInvoker>>;
|
||||
|
||||
constructor() {
|
||||
this._privateUserStorage = new Map<number, MessageInvokerStorage<PrivateUserInvoker>>();
|
||||
this._groupStorage = new Map<number, MessageInvokerStorage<GroupUserInvoker>>();
|
||||
}
|
||||
|
||||
get privateUserStorage() {
|
||||
return this._privateUserStorage;
|
||||
}
|
||||
|
||||
get groupStorage() {
|
||||
return this._groupStorage;
|
||||
}
|
||||
|
||||
public resolvePrivateUser(message: Lagrange.PrivateMessage, send: Lagrange.SendApi) {
|
||||
const user_id = message.user_id;
|
||||
const userStorage = this._privateUserStorage.get(user_id);
|
||||
console.log(user_id);
|
||||
console.log(userStorage);
|
||||
|
||||
if (userStorage) {
|
||||
userStorage.invoker({ message, send });
|
||||
}
|
||||
}
|
||||
|
||||
public resolveGroup(message: Lagrange.GroupMessage, send: Lagrange.SendApi) {
|
||||
const group_id = message.group_id;
|
||||
const groupStorage = this._groupStorage.get(group_id);
|
||||
if (groupStorage) {
|
||||
groupStorage.invoker({ message, send });
|
||||
}
|
||||
}
|
||||
|
||||
public onPrivateUser(user_id: number) {
|
||||
const _this = this;
|
||||
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<PrivateUserInvoker>) {
|
||||
if (_this._privateUserStorage.has(user_id)) {
|
||||
console.warn(`${propertyKey} -> 用户 ${user_id} 已经被注册过了,该操作将覆盖原本的!`);
|
||||
}
|
||||
const invoker = descriptor.value;
|
||||
_this._privateUserStorage.set(user_id, { invoker });
|
||||
}
|
||||
}
|
||||
|
||||
public onGroupUser(config: Partial<Lagrange.CommonMessage>) {
|
||||
assert(config.user_id, 'onGroupUser 中 user_id 不能为空');
|
||||
assert(config.group_id, 'onGroupUser 中 group_id 不能为空');
|
||||
const _this = this;
|
||||
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<GroupUserInvoker>) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public onGroup(config: Partial<Lagrange.CommonMessage>) {
|
||||
assert(config.group_id, 'onGroup 中 group_id 不能为空');
|
||||
const _this = this;
|
||||
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<GroupUserInvoker>) {
|
||||
const group_id = config.group_id;
|
||||
if (_this.groupStorage.has(group_id)) {
|
||||
console.warn(`${propertyKey} -> 群 ${group_id} 已经被注册过了,该操作将覆盖原本的!`);
|
||||
}
|
||||
const invoker = descriptor.value;
|
||||
_this.groupStorage.set(group_id, { invoker, config });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const lagMapper = new LagrangeMapper();
|
||||
export default lagMapper;
|
18
bot/main.ts
18
bot/main.ts
@ -1,14 +1,24 @@
|
||||
import * as fs from 'fs';
|
||||
|
||||
import lagServer from './context';
|
||||
import './impl';
|
||||
import { server } from 'lagrange.onebot';
|
||||
import './test';
|
||||
import './digital-ide';
|
||||
|
||||
const buffer = fs.readFileSync('./app/publish/appsettings.json', 'utf-8');
|
||||
const config = JSON.parse(buffer);
|
||||
const impl = config.Implementations[0];
|
||||
|
||||
lagServer.run({
|
||||
server.onMounted(c => {
|
||||
c.sendPrivateMsg(1193466151, '成功上线');
|
||||
});
|
||||
|
||||
server.onUnmounted(c => {
|
||||
c.sendPrivateMsg(1193466151, '成功下线');
|
||||
});
|
||||
|
||||
server.run({
|
||||
host: impl.Host,
|
||||
port: impl.Port,
|
||||
path: impl.Suffix
|
||||
path: impl.Suffix,
|
||||
qq: 1542544558
|
||||
});
|
@ -1,32 +1,27 @@
|
||||
import * as Lagrange from './type';
|
||||
import { plugins, LagrangeContext, Message } from 'lagrange.onebot';
|
||||
|
||||
export class Impl {
|
||||
|
||||
class Plugins {
|
||||
registeredPlugins: Map<string, Function>;
|
||||
constructor() {
|
||||
|
||||
}
|
||||
|
||||
public register() {
|
||||
return function(target: any, propertyKey: string, descriptor: PropertyDecorator) {
|
||||
@plugins.register('wget-image')
|
||||
async wgetImage(c: LagrangeContext<Message>) {
|
||||
// 判断一下,只解析 message 类型的数据
|
||||
if (c.message.post_type === 'message') {
|
||||
const text = c.message.raw_message;
|
||||
if (text.startsWith('\\wget-image')) {
|
||||
const url = text.substring('\\wget-image'.length).trim();
|
||||
c.sendMessage([
|
||||
{
|
||||
type: 'image',
|
||||
data: {
|
||||
file: url,
|
||||
timeout: 10000
|
||||
}
|
||||
}
|
||||
]);
|
||||
|
||||
// 插件中使用 finishSession 会让被装饰的事务函数不再被执行,直接结束对话
|
||||
c.finishSession();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public use(name: string) {
|
||||
return function(target: any, propertyKey: string, descriptor: PropertyDecorator) {
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const plugins = new Plugins();
|
||||
export default plugins;
|
||||
|
||||
|
||||
class Impl {
|
||||
|
||||
echo(message: Lagrange.CommonMessage) {
|
||||
|
||||
}
|
||||
}
|
28
bot/test.ts
Normal file
28
bot/test.ts
Normal file
@ -0,0 +1,28 @@
|
||||
import './plugins';
|
||||
|
||||
import { mapper, plugins, LagrangeContext, PrivateMessage, GroupMessage, Send } from 'lagrange.onebot'
|
||||
|
||||
import { apiQueryVecdb } from './api/vecdb';
|
||||
|
||||
export class Impl {
|
||||
@mapper.onPrivateUser(1193466151)
|
||||
@plugins.use('echo')
|
||||
@plugins.use('pm')
|
||||
@plugins.use('wget-image')
|
||||
async handleJinhui(c: LagrangeContext<PrivateMessage>) {
|
||||
c.sendMessage([{
|
||||
type: 'image',
|
||||
data: {
|
||||
file: 'file:///data/zhelonghuang/project/rag-llm/images/bird.png',
|
||||
timeout: 10000
|
||||
}
|
||||
}])
|
||||
c.finishSession();
|
||||
}
|
||||
|
||||
@mapper.onGroup(956419963, { at: false })
|
||||
async handleTestGroup(c: LagrangeContext<GroupMessage>) {
|
||||
console.log(c.message.message);
|
||||
console.log(c.message.raw_message);
|
||||
}
|
||||
}
|
542
bot/type.ts
542
bot/type.ts
@ -1,542 +0,0 @@
|
||||
/**
|
||||
* @author 锦恢
|
||||
* @email 1193466151@qq.com
|
||||
* @description Lagrange.Core 前端接口
|
||||
* @comment 详细的接口信息来源
|
||||
* - https://github.com/botuniverse/onebot-11/blob/master/api/public.md
|
||||
* - https://docs.go-cqhttp.org/reference/data_struct.html
|
||||
*/
|
||||
|
||||
export interface HeartBeatStatus {
|
||||
app_initialized: boolean,
|
||||
app_enabled: boolean,
|
||||
app_good: boolean,
|
||||
online: boolean,
|
||||
good: boolean
|
||||
}
|
||||
|
||||
export type MetaEventType = 'heartbeat' | 'lifecycle';
|
||||
|
||||
export interface HeartBeatMessage {
|
||||
interval: number,
|
||||
status: HeartBeatStatus,
|
||||
meta_event_type: 'heartbeat',
|
||||
time: number,
|
||||
self_id: number,
|
||||
post_type: 'meta_event'
|
||||
}
|
||||
|
||||
export interface Sender {
|
||||
user_id: number,
|
||||
nickname: string,
|
||||
sex: 'unknown' | 'male' | 'female',
|
||||
card?: string,
|
||||
age?: number,
|
||||
area?: string,
|
||||
level?: string, // 群聊等级,但是是 string
|
||||
role?: string,
|
||||
title?: string
|
||||
}
|
||||
|
||||
// 参考文档: https://github.com/botuniverse/onebot-11/blob/master/message/segment.md
|
||||
export namespace Receive {
|
||||
export interface Text {
|
||||
type: 'text',
|
||||
data: {
|
||||
text: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Face {
|
||||
type: 'face',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Image {
|
||||
type: 'image',
|
||||
data: {
|
||||
file: string,
|
||||
url: string,
|
||||
// 在简略窗口可以看到的信息,对于图片来说,这就是 [图片]
|
||||
summary: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Audio {
|
||||
type: 'record',
|
||||
data: {
|
||||
file: string,
|
||||
magic: 0 | 1,
|
||||
url: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Video {
|
||||
type: 'video',
|
||||
data: {
|
||||
file: string,
|
||||
url: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface At {
|
||||
type: 'at',
|
||||
data: {
|
||||
qq: string
|
||||
}
|
||||
}
|
||||
|
||||
// 猜拳魔法表情
|
||||
export interface FingerGuess {
|
||||
type: 'rps',
|
||||
data: {}
|
||||
}
|
||||
|
||||
// 掷骰子魔法表情
|
||||
export interface Dice {
|
||||
type: 'dice',
|
||||
data: {}
|
||||
}
|
||||
|
||||
// 窗口抖动(戳一戳)
|
||||
export interface WindowJitter {
|
||||
type: 'shake',
|
||||
data: {}
|
||||
}
|
||||
|
||||
// 戳一戳
|
||||
export interface Poke {
|
||||
type: 'poke',
|
||||
data: {
|
||||
type: string,
|
||||
id: string,
|
||||
name: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Link {
|
||||
type: 'share',
|
||||
data: {
|
||||
// URL
|
||||
url: string,
|
||||
// 标题
|
||||
title: string,
|
||||
// 发送时可选,内容描述
|
||||
content?: string,
|
||||
// 发送时可选,图片 URL
|
||||
image?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface RecommendFriend {
|
||||
type: 'contact',
|
||||
data: {
|
||||
type: 'qq',
|
||||
// 被推荐人的 QQ 号
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface RecommendGroup {
|
||||
type: 'contact',
|
||||
data: {
|
||||
type: 'group',
|
||||
// 被推荐群的群号
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Location {
|
||||
type: 'location',
|
||||
data: {
|
||||
// 纬度
|
||||
lat: string,
|
||||
// 经度
|
||||
lon: string,
|
||||
// 发送时可选,标题
|
||||
title?: string,
|
||||
// 发送时可选,内容描述
|
||||
content?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Reply {
|
||||
type: 'reply',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Forward {
|
||||
type: 'forward',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface XML {
|
||||
type: 'xml',
|
||||
data: {
|
||||
// XML 内容
|
||||
data: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface JSON {
|
||||
type: 'json',
|
||||
data: {
|
||||
data: string
|
||||
}
|
||||
}
|
||||
|
||||
export type Default = Text | Face | Image | Audio | Video | At | FingerGuess | Dice | WindowJitter | Poke | Link | RecommendFriend | RecommendGroup | Location | Reply | Forward | XML | JSON;
|
||||
}
|
||||
|
||||
export namespace Send {
|
||||
export interface Text {
|
||||
type: 'text',
|
||||
data: {
|
||||
text: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Face {
|
||||
type: 'face',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Image {
|
||||
type: 'image',
|
||||
data: {
|
||||
/**
|
||||
* 发送时 file 可行的三种取值
|
||||
* 1. 绝对路径,例如 file:///C:\\Users\Richard\Pictures\1.png
|
||||
* 2. 网络 URL,例如 http://i1.piimg.com/567571/fdd6e7b6d93f1ef0.jpg
|
||||
* Base64 编码,例如 base64://iVBORw0KGgoAAAANSUhEUgAAABQAAAAVCAIAAADJt1n/AAAAKElEQVQ4EWPk5+RmIBcwkasRpG9UM4mhNxpgowFGMARGEwnBIEJVAAAdBgBNAZf+QAAAAABJRU5ErkJggg==
|
||||
*/
|
||||
file: string,
|
||||
|
||||
// 只在通过网络 URL 发送时有效,表示是否使用已缓存的文件,默认 1
|
||||
cache: 0 | 1,
|
||||
|
||||
// 只在通过网络 URL 发送时有效,表示是否通过代理下载文件(需通过环境变量或配置文件配置代理),默认 1
|
||||
proxy: 0 | 1,
|
||||
|
||||
// 只在通过网络 URL 发送时有效,单位秒,表示下载网络文件的超时时间,默认不超时
|
||||
timeout: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface Audio {
|
||||
type: 'record',
|
||||
data: {
|
||||
file: string,
|
||||
magic: 0 | 1,
|
||||
cache: 0 | 1,
|
||||
proxy: 0 | 1,
|
||||
timeout: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface Video {
|
||||
type: 'video',
|
||||
data: {
|
||||
file: string,
|
||||
cache: 0 | 1,
|
||||
proxy: 0 | 1,
|
||||
timeout: number
|
||||
}
|
||||
}
|
||||
|
||||
export interface At {
|
||||
type: 'at',
|
||||
data: {
|
||||
qq: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface FingerGuess {
|
||||
type: 'rps',
|
||||
data: {}
|
||||
}
|
||||
|
||||
export interface Dice {
|
||||
type: 'dice',
|
||||
data: {}
|
||||
}
|
||||
|
||||
export interface WindowJitter {
|
||||
type: 'shake',
|
||||
data: {}
|
||||
}
|
||||
|
||||
// 戳一戳
|
||||
export interface Poke {
|
||||
type: 'poke',
|
||||
data: {
|
||||
type: string,
|
||||
id: string,
|
||||
}
|
||||
}
|
||||
|
||||
export interface Anonymous {
|
||||
type: 'anonymous',
|
||||
data: {}
|
||||
}
|
||||
|
||||
export interface Link {
|
||||
type: 'share',
|
||||
data: {
|
||||
// URL
|
||||
url: string,
|
||||
// 标题
|
||||
title: string,
|
||||
// 发送时可选,内容描述
|
||||
content?: string,
|
||||
// 发送时可选,图片 URL
|
||||
image?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface RecommendFriend {
|
||||
type: 'contact',
|
||||
data: {
|
||||
type: 'qq',
|
||||
// 被推荐人的 QQ 号
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface RecommendGroup {
|
||||
type: 'contact',
|
||||
data: {
|
||||
type: 'group',
|
||||
// 被推荐群的群号
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Location {
|
||||
type: 'location',
|
||||
data: {
|
||||
// 纬度
|
||||
lat: string,
|
||||
// 经度
|
||||
lon: string,
|
||||
// 发送时可选,标题
|
||||
title?: string,
|
||||
// 发送时可选,内容描述
|
||||
content?: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface MusicShare {
|
||||
type: 'music',
|
||||
data: {
|
||||
// 分别表示使用 QQ 音乐、网易云音乐、虾米音乐
|
||||
type: 'qq' | '163' | 'xm',
|
||||
// 歌曲 ID
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface CustomMusicShare {
|
||||
type: 'music',
|
||||
data: {
|
||||
type: 'custom',
|
||||
url: string,
|
||||
audio: string,
|
||||
title: string,
|
||||
content: string,
|
||||
image: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface Reply {
|
||||
type: 'reply',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface ForwardNode {
|
||||
type: 'node',
|
||||
data: {
|
||||
id: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface XML {
|
||||
type: 'xml',
|
||||
data: {
|
||||
// XML 内容
|
||||
data: string
|
||||
}
|
||||
}
|
||||
|
||||
export interface JSON {
|
||||
type: 'json',
|
||||
data: {
|
||||
data: string
|
||||
}
|
||||
}
|
||||
|
||||
export type Default = Text | Face | Image | Audio | Video | At | FingerGuess | Dice | WindowJitter | Poke | Anonymous | Link | RecommendFriend | RecommendGroup | Location | MusicShare | CustomMusicShare | Reply | ForwardNode | XML | JSON;
|
||||
}
|
||||
|
||||
|
||||
|
||||
export interface MsgFile {
|
||||
// 一般是 ''
|
||||
id: string,
|
||||
// 文件名
|
||||
name: string,
|
||||
// 文件大小,单位:字节
|
||||
size: number,
|
||||
// id
|
||||
busid: number,
|
||||
// 链接 IPv4
|
||||
url: string
|
||||
}
|
||||
|
||||
export interface MetaMessage {
|
||||
post_type: 'meta_event',
|
||||
[msg: string]: any
|
||||
}
|
||||
|
||||
export interface CommonMessage {
|
||||
// 事件类型
|
||||
post_type: 'message',
|
||||
// 信息来自私聊还是群聊
|
||||
message_type?: 'private' | 'group',
|
||||
// 发送信息的是朋友还是群友/陌生人
|
||||
sub_type?: 'friend' | 'normal',
|
||||
// 消息的编号
|
||||
message_id?: number,
|
||||
// 群号
|
||||
group_id?: number,
|
||||
// 发消息的人的 QQ 号
|
||||
user_id: number,
|
||||
// 是否为匿名发言,一般都是 null
|
||||
anonymous?: null | boolean,
|
||||
// 消息内容(结构化)
|
||||
message?: Receive.Default,
|
||||
// 消息内容(纯文本)
|
||||
raw_message?: string,
|
||||
// 发送的时间戳
|
||||
time: number,
|
||||
// 自己的 id
|
||||
self_id: number,
|
||||
// 发送的文件
|
||||
// 默认字体大小,一般都是 0
|
||||
font?: number
|
||||
}
|
||||
|
||||
export interface PrivateMessage {
|
||||
// 事件类型
|
||||
post_type: 'message',
|
||||
// 信息来自私聊还是群聊
|
||||
message_type: 'private',
|
||||
// 消息的编号
|
||||
message_id: number,
|
||||
// 发消息的人的 QQ 号
|
||||
user_id: number,
|
||||
// 消息内容(结构化)
|
||||
message: Receive.Default,
|
||||
// 消息内容(纯文本)
|
||||
raw_message: string,
|
||||
// 发送的时间戳
|
||||
time: number,
|
||||
// 自己的 id
|
||||
self_id: number,
|
||||
// 默认字体大小,一般都是 0
|
||||
font?: number
|
||||
}
|
||||
|
||||
|
||||
export interface GroupMessage {
|
||||
// 事件类型
|
||||
post_type: 'message',
|
||||
// 信息来自私聊还是群聊
|
||||
message_type: 'group',
|
||||
// 发送信息的是朋友还是群友/陌生人
|
||||
sub_type: 'friend' | 'normal',
|
||||
// 消息的编号
|
||||
message_id: number,
|
||||
// 群号
|
||||
group_id: number,
|
||||
// 发消息的人的 QQ 号
|
||||
user_id: number,
|
||||
// 是否为匿名发言,一般都是 null
|
||||
anonymous: null | boolean,
|
||||
// 消息内容(结构化)
|
||||
message: Receive.Default,
|
||||
// 消息内容(纯文本)
|
||||
raw_message: string,
|
||||
// 发送的时间戳
|
||||
time: number,
|
||||
// 自己的 id
|
||||
self_id: number,
|
||||
// 发送的文件
|
||||
// 默认字体大小,一般都是 0
|
||||
font?: number
|
||||
}
|
||||
|
||||
export interface FileMessage {
|
||||
post_type: 'notice',
|
||||
user_id: number,
|
||||
file: MsgFile,
|
||||
notice_type?: 'offline_file',
|
||||
time: number,
|
||||
self_id: number
|
||||
}
|
||||
|
||||
// 加群或者加好友
|
||||
export interface AddMessage {
|
||||
post_type: 'request',
|
||||
sub_type: 'add',
|
||||
user_id: number,
|
||||
group_id: number,
|
||||
// 默认为 0 代表没有邀请者
|
||||
invitor_id: number,
|
||||
request_type: 'private' | 'group',
|
||||
// 群问题和申请者的回答
|
||||
comment: string,
|
||||
flag: string,
|
||||
time: number,
|
||||
self_id: number,
|
||||
}
|
||||
|
||||
// 同意
|
||||
export interface ApproveMessage {
|
||||
post_type: 'notice',
|
||||
sub_type: 'approve',
|
||||
group_id: number,
|
||||
operator_id: number,
|
||||
user_id: number,
|
||||
notice_type: 'group_increase',
|
||||
time: number,
|
||||
self_id: number,
|
||||
}
|
||||
|
||||
export type Message = MetaMessage | PrivateMessage | GroupMessage | FileMessage | AddMessage | ApproveMessage;
|
||||
export type MessagePostType = PrivateMessage | GroupMessage;
|
||||
export type NoticePostType = FileMessage | ApproveMessage;
|
||||
export type RequestPostType = AddMessage;
|
||||
|
||||
|
||||
export type Thenable<T> = T | Promise<T>;
|
||||
|
||||
export type SendApi = (msg: string | Send.Default[]) => Thenable<void | Error>;
|
||||
|
||||
export interface InvokerContext<M = Message> {
|
||||
message: M,
|
||||
send: SendApi
|
||||
}
|
||||
|
||||
export type PrivateUserInvokeContext = InvokerContext<PrivateMessage>;
|
||||
export type GroupUserInvokeContext = InvokerContext<GroupMessage>;
|
@ -0,0 +1,2 @@
|
||||
addr: 127.0.0.1
|
||||
port: 8082
|
50
config/story.yml
Normal file
50
config/story.yml
Normal file
@ -0,0 +1,50 @@
|
||||
schema:
|
||||
root:
|
||||
name: root
|
||||
children:
|
||||
- name: usage
|
||||
description: 使用查询
|
||||
children:
|
||||
- name: bug
|
||||
description: bug 查询
|
||||
children:
|
||||
- name: command
|
||||
description: 指令
|
||||
children:
|
||||
- name: others
|
||||
description: 其他
|
||||
children:
|
||||
|
||||
stories:
|
||||
- message: 请问 property.json 如何配置?
|
||||
intent: usage
|
||||
- message: 我的自动补全无法使用,是不是有bug?
|
||||
intent: bug
|
||||
- message: 帮我上传一下这份数据
|
||||
intent: command
|
||||
- message: surface了解一下?
|
||||
intent: others
|
||||
- message: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了
|
||||
intent: usage
|
||||
- message: 这群要被chisel夺舍了吗
|
||||
intent: others
|
||||
- message: Metals一开直接报错
|
||||
intent: others
|
||||
- message: 话说digital-ide打开大的verilog卡死了
|
||||
intent: bug
|
||||
- message: 请问一下,第一次点击对文件仿真可以出波形文件,再次点击的时候就会提示unknown module type了。是哪个配置没配置好?
|
||||
intent: usage
|
||||
- message: 怎么调整是哪个版本的vivado来构建工程呢
|
||||
intent: usage
|
||||
- message: 咱们这个插件win7的vscode是不是只能用很早之前的版本
|
||||
intent: usage
|
||||
- message: 帮我将这份数据保存到服务器上
|
||||
intent: command
|
||||
- message: 他这个意思是 单个功耗很低 但是功耗低那肯定性能就寄 频率肯定不高 靠人多
|
||||
intent: others
|
||||
|
||||
|
||||
rejects:
|
||||
- metal
|
||||
- metals
|
||||
- idea
|
281
notebook/experiment.ipynb
Normal file
281
notebook/experiment.ipynb
Normal file
File diff suppressed because one or more lines are too long
331
notebook/github-issue.ipynb
Normal file
331
notebook/github-issue.ipynb
Normal file
@ -0,0 +1,331 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import requests as r\n",
|
||||
"from bs4 import BeautifulSoup, Tag\n",
|
||||
"import json\n",
|
||||
"import os\n",
|
||||
"from threading import Thread\n",
|
||||
"from urllib.parse import urlparse"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def download_worker(media_url, save_path):\n",
|
||||
" res = r.get(media_url)\n",
|
||||
" with open(save_path, 'wb') as fp:\n",
|
||||
" fp.write(res.content)\n",
|
||||
" print('[crawler] 图像已经保存至', save_path)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def crawler_issue(issue_id: str):\n",
|
||||
" folder = 'issue-' + str(issue_id)\n",
|
||||
" os.makedirs('../docs/digital-issue/' + folder, exist_ok=True)\n",
|
||||
" res = r.get('https://github.com/Digital-EDA/Digital-IDE/issues/' + str(issue_id))\n",
|
||||
" soup = BeautifulSoup(res.text, 'html.parser')\n",
|
||||
"\n",
|
||||
" texts = []\n",
|
||||
" for td in soup.find_all('td', attrs={ 'class': 'js-comment-body' }):\n",
|
||||
" if isinstance(td, Tag):\n",
|
||||
" for p in td.find_all('p'):\n",
|
||||
" if isinstance(p, Tag):\n",
|
||||
" text = p.text\n",
|
||||
" texts.append(text)\n",
|
||||
" for a in p.find_all('a'):\n",
|
||||
" href = a.attrs['href']\n",
|
||||
" if href and '.png' in href:\n",
|
||||
" urlp = urlparse(href)\n",
|
||||
" name = urlp.path.split('/')[-1]\n",
|
||||
" save_path = '../docs/digital-issue/' + folder + '/' + name\n",
|
||||
" t = Thread(target=download_worker, args=(href, save_path))\n",
|
||||
" t.start()\n",
|
||||
"\n",
|
||||
" text = '\\n'.join(texts)\n",
|
||||
" with open('../docs/digital-issue/{}/issue.md'.format(folder), 'w', encoding='utf-8') as fp:\n",
|
||||
" fp.write(text) "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"crawler_issue(67)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def crawler_issue_page(page_url: str):\n",
|
||||
" res = r.get(page_url)\n",
|
||||
" if res.status_code != 200:\n",
|
||||
" print('page url {} return 404'.format(page_url))\n",
|
||||
" return\n",
|
||||
" html = res.text\n",
|
||||
" soup = BeautifulSoup(html, 'html.parser')\n",
|
||||
" issue_container = soup.find('div', { 'class': 'js-navigation-container js-active-navigation-container' })\n",
|
||||
" issue_ids = []\n",
|
||||
" for div in issue_container.children:\n",
|
||||
" if isinstance(div, Tag):\n",
|
||||
" id = div.attrs['id'].split('_')[-1]\n",
|
||||
" issue_ids.append(int(id))\n",
|
||||
" \n",
|
||||
" for issue_id in issue_ids:\n",
|
||||
" print('爬取 issue-{} 中 ...'.format(issue_id))\n",
|
||||
" crawler_issue(issue_id)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"爬取 issue-71 中 ...\n",
|
||||
"爬取 issue-70 中 ...\n",
|
||||
"爬取 issue-69 中 ...\n",
|
||||
"爬取 issue-68 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220146-9deeccbf-cc0e-4810-bdd9-80e11d083c15.png\n",
|
||||
"爬取 issue-67 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220242-9abe8b7b-5985-4c1a-ac0f-30aba75ef8d2.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331219978-d22a5a5d-da00-430c-b966-68517ab264c0.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220080-b0c5f0af-e38c-4819-9efa-7491650ddb92.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220203-3bb8aefd-e04a-4eb7-ae87-ca48f1daa120.png\n",
|
||||
"爬取 issue-66 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n",
|
||||
"爬取 issue-65 中 ...\n",
|
||||
"爬取 issue-64 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-66/330112198-fb783018-b217-4cab-afef-32d339c4047a.png\n",
|
||||
"爬取 issue-63 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328184842-2e13483e-4ece-4eb6-8c8a-3d9c92a97651.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185054-e08c66b2-7e87-4238-88cb-e0672b2de530.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185191-305e8b86-a9de-434b-a1cf-80c441c51df2.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033289-dc79968e-8279-43aa-b6a1-a6f1acd4155f.png\n",
|
||||
"爬取 issue-62 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033187-1b9134e0-387e-491d-a478-3ea6438728a4.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-63/326496750-53a0c779-8a4b-418d-b21c-4ea2151edd92.png\n",
|
||||
"爬取 issue-61 中 ...\n",
|
||||
"爬取 issue-60 中 ...\n",
|
||||
"爬取 issue-55 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-61/325593801-bd5c4229-f47a-4d6e-99a1-0cc912378f0e.png\n",
|
||||
"爬取 issue-54 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-55/316435757-14c497a5-6ecc-4f97-850b-7e13988ec7aa.png\n",
|
||||
"爬取 issue-53 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-54/306546248-83a57abe-63ff-43ae-8140-5e2b284193f0.png\n",
|
||||
"爬取 issue-52 中 ...\n",
|
||||
"爬取 issue-51 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-53/305870587-69d44e21-edfc-4fc0-9ad7-daaec393caac.png\n",
|
||||
"爬取 issue-50 中 ...\n",
|
||||
"爬取 issue-49 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-51/302262263-3e5581e0-4e36-463b-9379-43d1f9e366b8.png\n",
|
||||
"爬取 issue-48 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495384-94077fee-624f-48cc-98fd-d6e6fe16251b.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495541-23556932-5526-4428-b1c2-25c840352422.png\n",
|
||||
"爬取 issue-47 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495316-f8c98f42-a320-4c4e-84f7-2aaab4fb39f1.png\n",
|
||||
"爬取 issue-46 中 ...\n",
|
||||
"爬取 issue-45 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358431-92b4f252-91a9-4326-ae14-9d21037d3478.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358432-2bbec170-1400-49c3-a30e-a0acc4bf3f66.png\n",
|
||||
"爬取 issue-44 中 ...\n",
|
||||
"爬取 issue-43 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602119-bd06d356-3356-45a8-8556-b9b60fdb337b.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665721-e4730448-1588-424c-9a98-c661dfb5237d.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665609-f6bf03cd-857b-4156-8795-6e41416d96e4.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335599172-7ea754a3-1dea-428f-baf4-e04c400e2744.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665655-3341f355-0ca4-4757-9814-5702515922e7.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665566-dc8dd944-f464-4606-8900-4562cdf404c7.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335601886-3731689f-7ddb-4d4b-9e73-9e1a631e403f.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602049-bade0794-1653-42f7-8816-d3f3484009e3.png\n",
|
||||
"爬取 issue-42 中 ...\n",
|
||||
"爬取 issue-41 中 ...\n",
|
||||
"爬取 issue-40 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-42/292425880-bc7f4792-6c68-45e0-862f-23b7b1232dce.png\n",
|
||||
"爬取 issue-39 中 ...\n",
|
||||
"爬取 issue-38 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-39/291777353-268ce49b-78c4-46a8-b543-542db0ef6dcc.png\n",
|
||||
"爬取 issue-37 中 ...\n",
|
||||
"爬取 issue-36 中 ...\n",
|
||||
"爬取 issue-35 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-37/291228777-6d65928c-5ffd-4aab-af19-03291f31473a.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267690-67d01501-ab54-4274-8425-e876b7035391.png\n",
|
||||
"爬取 issue-34 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267797-b00bdf44-c648-47d3-9bd9-eb1e68b12193.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268221-1c380a09-76b5-45a6-aff3-d8a873868402.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268654-d8a5686c-c109-461b-b68a-ff00bcd9f462.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268318-27f79c67-8e8d-419a-a0b7-e744b416b704.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268113-20044b19-d508-47ea-9f07-f675bc72a2cb.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267929-d697e859-8ad0-4cc3-aa15-e50d0a26dc53.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268011-69f0a3ef-d509-47c9-b949-36d280edc4f8.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268429-e0517629-7b5a-4751-a431-330f04d8c1ee.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268509-e9c687a4-2b28-46f1-8670-827359df792e.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225163-78241e4c-400a-4d75-a008-3c34ca26ae4a.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225347-63167ad5-6896-4afe-a6d7-197532a23f8f.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-35/291224673-a3f70f16-271c-4905-b1f0-1c011b56d3bf.png\n",
|
||||
"爬取 issue-33 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
|
||||
"爬取 issue-32 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291310957-8b8f17a0-ec66-4009-9657-2433d51319c8.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
|
||||
"爬取 issue-31 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024353-47c9297c-6160-402a-b3bf-e08bd9c923ea.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024305-41e868d5-e737-4f1d-93af-558db2abba91.png\n",
|
||||
"爬取 issue-30 中 ...\n",
|
||||
"爬取 issue-29 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-31/290022938-62774f96-82ac-46f4-9599-818a6a430cd9.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022151-dc253b33-fbfe-4f9c-8023-e00e180015d6.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022070-0e449a39-5360-474e-bc18-1c1729071f66.png\n",
|
||||
"爬取 issue-28 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
|
||||
"爬取 issue-27 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112763-1dfaa7f2-f52b-42b5-ba40-c47c16205265.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112141-a1c08800-b6dd-4215-9ecf-288759cc0174.png\n",
|
||||
"爬取 issue-26 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287923928-5e817c41-c54d-409c-be36-576efb0a299a.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287924096-490bef45-fc31-4ffc-a3a5-d077c350ff88.png\n",
|
||||
"爬取 issue-25 中 ...\n",
|
||||
"爬取 issue-24 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282802849-27fc9f7a-f7f5-4b8d-84e5-1060166b0ad7.png\n",
|
||||
"爬取 issue-23 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282800215-946fad7c-54ae-46c9-be9a-d5a69b4fbf7b.png\n",
|
||||
"爬取 issue-22 中 ...\n",
|
||||
"爬取 issue-21 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-22/275505102-406e3256-7a3e-4deb-9456-2a49b41ca85d.png\n",
|
||||
"爬取 issue-20 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/279390992-4a04af63-c176-49a8-a60e-5c3e95c07f8b.png\n",
|
||||
"爬取 issue-19 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/275300475-444cf824-5489-461d-9678-440901554f68.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173022-80623e60-fba6-4f4c-85eb-5fb542ba8170.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173043-927f355a-37b5-45f5-bd88-78317549bf54.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173050-b186e855-4ad3-44c0-8708-59b11c5506a7.png\n",
|
||||
"爬取 issue-17 中 ...\n",
|
||||
"爬取 issue-16 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-17/264944498-f609f333-53a1-40f3-8bd5-a320b21398df.png\n",
|
||||
"爬取 issue-15 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-16/264304291-1fbb69c3-02fa-4d50-9dec-cc6da46c1dd2.png\n",
|
||||
"爬取 issue-14 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302335-b7b9d42e-aa16-474d-8c49-5573e397c374.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302196-e355b398-1ba7-4b7e-aa0b-b1d67646182a.png\n",
|
||||
"爬取 issue-13 中 ...\n",
|
||||
"爬取 issue-12 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-14/263748729-d0d2d005-019b-404f-a720-8f75b19a52ba.png\n",
|
||||
"爬取 issue-11 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475138-92d989d2-2b5e-432c-bfde-8bd8f3524b6e.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475180-595e8d91-2645-47bf-a4db-24aad89d12ae.png\n",
|
||||
"爬取 issue-10 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/289706401-d79d32f8-5738-4088-bc92-74e19da24885.png\n",
|
||||
"爬取 issue-9 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-11/263142498-1af4cb41-c431-4de0-9d26-65729d3dfe65.png\n",
|
||||
"爬取 issue-8 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-10/263140095-8d3beafa-ad35-405d-bcf7-3964853174b2.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163518-42173b79-b7b3-41c3-8860-1007f140fe86.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163571-1caa7264-3702-4467-9986-49e0557b0edc.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163490-45ab4e0e-7175-4a65-9a70-4e51b4c1366a.png\n",
|
||||
"爬取 issue-7 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-8/260685640-0c9db5e7-ae1e-4558-b3f7-72ebb4f67043.png\n",
|
||||
"爬取 issue-6 中 ...\n",
|
||||
"爬取 issue-5 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630437-8d34c4a0-cc48-44b5-bbb8-94742c2e0776.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630481-0cbc73cf-f516-4b3d-92f5-17598f089297.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976429-8ad21695-2397-4a79-8fab-43fa01da5e24.png\n",
|
||||
"爬取 issue-4 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260248721-a56ad686-d1ac-4fa4-9fe7-fb9007f7a1e3.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976463-9b61e743-536e-4d53-af74-f8015b104a36.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976410-22019e06-df93-48b1-93a6-05901197b277.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260716833-f3d89a67-7b4f-4daa-9a0a-8313dcf9caaa.png\n",
|
||||
"爬取 issue-3 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255643011-49b2efa2-09f4-463e-908b-4510d2110429.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/260307745-b545d146-a49b-4ebf-af88-ce3982a2e0ff.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255646125-b5035137-6df5-4189-95c2-199970dfbe8d.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386114-73e4b52e-3eee-4652-971e-4bf123d6c9aa.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386123-e5e990fa-af1f-439b-bb74-aa20af50366c.png\n",
|
||||
"爬取 issue-2 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386053-b536d9d7-fe3b-4c83-a581-0884e3cf04f6.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386339-56413f1c-bb32-49c0-aa85-dceeceb8594a.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255608587-cd487ee5-95be-47a5-90d4-5f02e0a94cc2.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255603702-8379ccc3-4d7a-407f-8777-aba9666e7c58.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107703-9e01db47-6c15-4d41-b823-a1896be68af7.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107481-5c0127b4-2273-4a14-b996-2d109a947a5e.png\n",
|
||||
"爬取 issue-1 中 ...\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255342825-dae63d0c-05b8-4965-b2e0-19df84778a5e.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/253884964-cbaf42fa-fa7d-48ed-8353-184dd0895a12.png\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/255247553-45d154cc-37d1-459d-80d3-adad6324de4c.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
|
||||
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-1/253879145-d8f82699-aca6-44aa-bb1c-57066cf39f66.png\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"page_urls = [\n",
|
||||
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=1&q=',\n",
|
||||
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=2&q=',\n",
|
||||
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=3&q='\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for url in page_urls:\n",
|
||||
" crawler_issue_page(url)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "base",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -2,7 +2,7 @@
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
@ -19,15 +19,15 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"整理得到 238 个文档\n",
|
||||
"分块得到 1206 个文档\n",
|
||||
"整理得到 304 个文档\n",
|
||||
"分块得到 1273 个文档\n",
|
||||
"数据库已存储到 blog-vecdb 中\n"
|
||||
]
|
||||
}
|
||||
@ -41,7 +41,7 @@
|
||||
" db = FAISS.load_local(db_persistent_dir, embedding, allow_dangerous_deserialization=True)\n",
|
||||
" print('成功从 {} 中提取数据'.format(db_persistent_dir))\n",
|
||||
"else:\n",
|
||||
" loader = DirectoryLoader('./docs', glob='**/*.md')\n",
|
||||
" loader = DirectoryLoader('../docs', glob='**/*.md')\n",
|
||||
" docs = loader.load()\n",
|
||||
" print('整理得到 {} 个文档'.format(len(docs)))\n",
|
||||
"\n",
|
||||
@ -60,21 +60,21 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='完整的 VCD 语法,可以参考中科大资源网站上的 IEEE 1364 标准 第 18 章的内容。\\n\\n基本信息\\n\\n一个 vcd 文件会描述波形产生的基本信息,包括,日期,版本,单位时间,注释等等。\\n\\n日期 date\\n\\n$date\\n Sat Apr 20 20:06:14 2024\\n$end\\n\\n与 verilog 类似,$end$ 是一切 scope 的结束符号,你也可以写成这样:\\n\\n$date Sat Apr 20 20:06:14 2024 $end\\n\\n版本 version\\n\\n$version\\n Icarus Verilog\\n$end\\n\\n时间单位 timescale\\n\\n$timescale\\n 1ns\\n$end\\n\\n注释 comment\\n\\n$comment\\n Show the parameter values.\\n$end\\n\\n变量申明\\n\\nvcd 中通过如下的方式申明一个 module 内的变量,,每条的格式为 $var type bitwidth id name,比如:\\n\\n$scope module ID_EX $end\\n$var wire 2 ! AluOp [1:0] $end\\n$var wire 1 \" AluSrc $end\\n$var wire 1 # MemRead $end\\n$upscope $end\\n\\n以上语句申明了一个模块 ID_EX 和内部的三个信号 AluOp,AluSrc 和 MemRead。这三个变量在接下来的 vcd 描述中会被 !,\",# 替代,这也被称为 vcd 描述中,变量的 id.\\n\\n单个 module 的变量申明通过 $upscope $end 结束。\\n\\n所有 module 的变量申明通过 $enddefinitions $end 结束。\\n\\nparameter 赋值', metadata={'source': 'docs/224.md', 'start_index': 0}),\n",
|
||||
"[(Document(page_content='完整的 VCD 语法,可以参考中科大资源网站上的 IEEE 1364 标准 第 18 章的内容。\\n\\n基本信息\\n\\n一个 vcd 文件会描述波形产生的基本信息,包括,日期,版本,单位时间,注释等等。\\n\\n日期 date\\n\\n$date\\n Sat Apr 20 20:06:14 2024\\n$end\\n\\n与 verilog 类似,$end$ 是一切 scope 的结束符号,你也可以写成这样:\\n\\n$date Sat Apr 20 20:06:14 2024 $end\\n\\n版本 version\\n\\n$version\\n Icarus Verilog\\n$end\\n\\n时间单位 timescale\\n\\n$timescale\\n 1ns\\n$end\\n\\n注释 comment\\n\\n$comment\\n Show the parameter values.\\n$end\\n\\n变量申明\\n\\nvcd 中通过如下的方式申明一个 module 内的变量,,每条的格式为 $var type bitwidth id name,比如:\\n\\n$scope module ID_EX $end\\n$var wire 2 ! AluOp [1:0] $end\\n$var wire 1 \" AluSrc $end\\n$var wire 1 # MemRead $end\\n$upscope $end\\n\\n以上语句申明了一个模块 ID_EX 和内部的三个信号 AluOp,AluSrc 和 MemRead。这三个变量在接下来的 vcd 描述中会被 !,\",# 替代,这也被称为 vcd 描述中,变量的 id.\\n\\n单个 module 的变量申明通过 $upscope $end 结束。\\n\\n所有 module 的变量申明通过 $enddefinitions $end 结束。\\n\\nparameter 赋值', metadata={'source': '../docs/kirigaya.cn/224.md', 'start_index': 0}),\n",
|
||||
" 0.4351002),\n",
|
||||
" (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型,可参考:https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档:https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式,可以用于进行DIDE的vcd可视化的二次开发,目前发现,为了实现vcd文件,基本的仓库主要是下面这两个:\\n\\nJS层与一个简单的前端:https://github.com/wavedrom/vcdrom\\n\\nwasm 解析:https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块,再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此,vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑,并不适合进行修改,因此需要修改一部分代码。\\n\\n整合项目在 : https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': 'docs/72.md', 'start_index': 0}),\n",
|
||||
" (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型,可参考:https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档:https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式,可以用于进行DIDE的vcd可视化的二次开发,目前发现,为了实现vcd文件,基本的仓库主要是下面这两个:\\n\\nJS层与一个简单的前端:https://github.com/wavedrom/vcdrom\\n\\nwasm 解析:https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块,再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此,vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑,并不适合进行修改,因此需要修改一部分代码。\\n\\n整合项目在 : https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': '../docs/kirigaya.cn/72.md', 'start_index': 0}),\n",
|
||||
" 0.55531096),\n",
|
||||
" (Document(page_content='cap.release()\\nout.release()\\ncv2.destroyAllWindows()\\n```\\n\\n其中FORMAT代表目标视频的编码格式,目前我还遇到了很多bug,已经确定的FORMAT和编码关系的对照表如下:\\n\\n目标视频编码格式 FORMAT 取值 mp4 \"mp4v\" aiv \"MJPG\"\\n\\n参考\\n\\n[1] python opencv写视频——cv2.VideoWriter()_翟羽嚄的博客-CSDN博客_cv2.videowriter', metadata={'source': 'docs/21.md', 'start_index': 711}),\n",
|
||||
" (Document(page_content='cap.release()\\nout.release()\\ncv2.destroyAllWindows()\\n```\\n\\n其中FORMAT代表目标视频的编码格式,目前我还遇到了很多bug,已经确定的FORMAT和编码关系的对照表如下:\\n\\n目标视频编码格式 FORMAT 取值 mp4 \"mp4v\" aiv \"MJPG\"\\n\\n参考\\n\\n[1] python opencv写视频——cv2.VideoWriter()_翟羽嚄的博客-CSDN博客_cv2.videowriter', metadata={'source': '../docs/kirigaya.cn/21.md', 'start_index': 711}),\n",
|
||||
" 0.71963197)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -88,10 +88,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[(Document(page_content='除了b站的两个教程外,建议多出文档以及示例工程,一个刚接触FPGA但有不熟悉vivado的萌新就指望这个加快学习进度了\\n请问B站教程标题是啥?我搜digital-ide啥也搜不到\\n请问B站教程标题是啥?我搜digital-ide啥也搜不到\\n《Digital-IDE使用教程(一个DDS的实现)》\\n《Digital-IDE应用之FM调制解调》\\n请问B站教程标题是啥?我搜digital-ide啥也搜不到\\n《Digital-IDE使用教程(一个DDS的实现)》 《Digital-IDE应用之FM调制解调》\\n好的谢谢\\n那个教程是好早之前我出的了,新的版本一直不稳定,今年稳定之后会出后续教程。\\nDigital IDE 的使用教程可以看\\n官方文档为 https://sterben.nitcloud.cn/zh/ 但是目前文档不是很完善。', metadata={'source': '../docs/digital-issue/issue-47/issue.md', 'start_index': 0}),\n",
|
||||
" 0.64211607),\n",
|
||||
" (Document(page_content='home: true\\nheroImage: /icon.png\\ndescription: Vscode 平台上的 ASIC & FPGA 开发扩展\\nactionText: 快速开始 🐳\\nactionLink: /zh/guide/introduction\\nfeatures:\\n- title: ✨ HDL 语言支持\\n details: 支持 verilog, vhdl, systemverilog, tcl 脚本等\\n- title: 🎯 项目管理\\n details: 在你的项目中查看结构化的 HDL 文件\\n- title: 🛠️ 额外的工具\\n details: FSM, Netlist, 一键仿真, 文档化,让你的编程体验更加舒坦。\\n\\n::: slot footer\\nMIT Licensed | Copyright © 2018-present Digital-EDA\\n:::', metadata={'source': '../docs/digital-document/index.md', 'start_index': 0}),\n",
|
||||
" 0.7582667),\n",
|
||||
" (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型,可参考:https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档:https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式,可以用于进行DIDE的vcd可视化的二次开发,目前发现,为了实现vcd文件,基本的仓库主要是下面这两个:\\n\\nJS层与一个简单的前端:https://github.com/wavedrom/vcdrom\\n\\nwasm 解析:https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块,再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此,vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑,并不适合进行修改,因此需要修改一部分代码。\\n\\n整合项目在 : https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': '../docs/kirigaya.cn/72.md', 'start_index': 0}),\n",
|
||||
" 0.83628875)]"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"db.similarity_search_with_score(\n",
|
||||
" query='digital ide 有什么教程吗',\n",
|
||||
" k=3\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
3454
package-lock.json
generated
Normal file
3454
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
17
package.json
17
package.json
@ -10,17 +10,30 @@
|
||||
"scripts": {
|
||||
"build": "tsc",
|
||||
"start": "serve dist",
|
||||
"serve": "tsc -w & serve dist"
|
||||
"serve": "tsc -w & serve dist",
|
||||
"test": "mocha"
|
||||
},
|
||||
"dependencies": {
|
||||
"@ptkdev/logger": "^1.8.0",
|
||||
"@types/node": "^20.12.12",
|
||||
"axios": "^1.7.2",
|
||||
"fs": "^0.0.1-security",
|
||||
"lagrange.onebot": "^1.0.0",
|
||||
"ws": "^8.17.0",
|
||||
"yaml": "^2.4.2"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/ws": "^8.5.10",
|
||||
"chai": "4.3.4",
|
||||
"serve": "^14.2.3",
|
||||
"typescript": "^5.4.5"
|
||||
"typescript": "^5.4.5",
|
||||
"mocha": "^10.4.0",
|
||||
"require-uncached": "^2.0.0",
|
||||
"shelljs": "^0.8.5",
|
||||
"should": "^13.2.3",
|
||||
"sinon": "^18.0.0",
|
||||
"jsverify": "^0.8.4",
|
||||
"knuth-shuffle": "^1.0.8",
|
||||
"@sinonjs/referee-sinon": "^12.0.0"
|
||||
}
|
||||
}
|
||||
|
1
prompt/__init__.py
Normal file
1
prompt/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
from prompt.core import PromptEngine
|
261
prompt/core.py
Normal file
261
prompt/core.py
Normal file
@ -0,0 +1,261 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
import warnings
|
||||
import random
|
||||
import math
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
import yaml
|
||||
import json5
|
||||
from loguru import logger
|
||||
|
||||
logger.add(
|
||||
sink='./logs/prompt.log',
|
||||
level='DEBUG',
|
||||
rotation='00:00',
|
||||
retention='7 days',
|
||||
compression='zip',
|
||||
encoding='utf-8',
|
||||
enqueue=True,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntentNode:
|
||||
name: str
|
||||
description: str | None
|
||||
children: list[IntentNode]
|
||||
parent: IntentNode | None
|
||||
stories: list[Story]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Story:
|
||||
message: str
|
||||
intent: str
|
||||
|
||||
class PromptEngine:
|
||||
path: str
|
||||
schema: IntentNode | None
|
||||
stories: list[Story]
|
||||
rejects: list[str]
|
||||
intent2id: dict[str, int]
|
||||
id2intent: dict[int, str]
|
||||
name2node: dict[str, IntentNode]
|
||||
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.config = yaml.load(open(path, 'r', encoding='utf-8'), yaml.Loader)
|
||||
self.intent2id = {}
|
||||
self.id2intent = {}
|
||||
self.name2node = {}
|
||||
self.schema = self.handle_schema(self.config['schema'])
|
||||
self.stories = self.handle_stories(self.config['stories'])
|
||||
self.rejects = self.handle_rejects(self.config['rejects'])
|
||||
|
||||
def handle_schema(self, raw_schema: dict) -> IntentNode:
|
||||
raw_root = raw_schema.get('root', None)
|
||||
if raw_root is None:
|
||||
warnings.warn('schema must have a root node as the beginning, otherwise intent recogition will not work')
|
||||
return None
|
||||
|
||||
current_layers: list[tuple[dict, IntentNode | None]] = [(raw_root, None)]
|
||||
nodes: list[IntentNode] = []
|
||||
|
||||
# 层次遍历
|
||||
while len(current_layers) > 0:
|
||||
new_current_layers: list[tuple[dict, IntentNode | None]] = []
|
||||
for raw_node, intent_node in current_layers:
|
||||
name = raw_node.get('name', None)
|
||||
children = raw_node.get('children', None)
|
||||
description = raw_node.get('description', None)
|
||||
if name is None:
|
||||
raise NameError('you must specify a name in story item, current item : {}'.format(raw_node))
|
||||
if children is None:
|
||||
children = []
|
||||
|
||||
if name not in self.intent2id:
|
||||
assign_id = len(self.intent2id)
|
||||
self.intent2id[name] = assign_id
|
||||
self.id2intent[assign_id] = name
|
||||
|
||||
node = IntentNode(name, description, [], intent_node, [])
|
||||
self.name2node[name] = node
|
||||
|
||||
nodes.append(node)
|
||||
if intent_node:
|
||||
intent_node.children.append(node)
|
||||
for raw_node in children:
|
||||
new_current_layers.append((raw_node, node))
|
||||
current_layers.clear()
|
||||
current_layers.extend(new_current_layers)
|
||||
return nodes[0]
|
||||
|
||||
def handle_stories(self, raw_stories: list[dict]) -> list[Story]:
|
||||
stories: list[Story] = []
|
||||
for pair in raw_stories:
|
||||
message = pair.get('message', None)
|
||||
intent = pair.get('intent', None)
|
||||
if intent not in self.intent2id:
|
||||
warnings.warn('{} is not the intent you declare in schema, so this pair will be ignored'.format(intent))
|
||||
continue
|
||||
if message and intent:
|
||||
story = Story(message, intent)
|
||||
node = self.name2node.get(intent)
|
||||
node.stories.append(story)
|
||||
stories.append(story)
|
||||
return stories
|
||||
|
||||
def handle_rejects(self, raw_rejects: list[str]) -> list[str]:
|
||||
rejects = []
|
||||
for reject in raw_rejects:
|
||||
rejects.append(reject)
|
||||
return rejects
|
||||
|
||||
def generate_chunk(self, stories: list[Story]) -> tuple[str]:
|
||||
prompts = []
|
||||
for story in stories:
|
||||
prompts.append('Message: ' + story.message.strip())
|
||||
intent_id = self.intent2id.get(story.intent)
|
||||
prompts.append('Intent: { id: %s }' % (intent_id))
|
||||
|
||||
prompts.pop()
|
||||
|
||||
user_content = '\n'.join(prompts) + '\n' + 'Intent: '
|
||||
assistant_content = '{id : %s}' % (intent_id)
|
||||
return user_content, assistant_content
|
||||
|
||||
def generate_llm_message(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10) -> list[dict]:
|
||||
if intent is None:
|
||||
intent = self.schema
|
||||
|
||||
story_cache = []
|
||||
for node in intent.children:
|
||||
story_cache.extend(node.stories)
|
||||
|
||||
random.shuffle(story_cache)
|
||||
chunk_num = math.ceil(len(story_cache) / chunk_size)
|
||||
message = []
|
||||
for chunk_id in range(chunk_num):
|
||||
start = chunk_id * chunk_size
|
||||
end = min(len(story_cache), start + chunk_size)
|
||||
chunk = story_cache[start: end]
|
||||
user_content, assistant_content = self.generate_chunk(chunk)
|
||||
message.append({
|
||||
'role': 'user',
|
||||
'content': user_content
|
||||
})
|
||||
message.append({
|
||||
'role': 'assistant',
|
||||
'content': assistant_content
|
||||
})
|
||||
|
||||
if len(message) / 2 >= max_chunk_num:
|
||||
break
|
||||
|
||||
message.append({
|
||||
'role': 'user',
|
||||
'content': question + '\nIntent: '
|
||||
})
|
||||
|
||||
# 创建开头的预设
|
||||
preset = 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.'
|
||||
intent_preset = ['The intent should be one of the following:']
|
||||
for node in intent.children:
|
||||
intent_id = self.intent2id.get(node.name)
|
||||
intent_preset.append('- {}'.format(intent_id))
|
||||
intent_preset = '\n'.join(intent_preset)
|
||||
message[0]['content'] = preset + '\n' + intent_preset + '\n' + message[0]['content']
|
||||
return message
|
||||
|
||||
|
||||
class TreeIntent(ABC):
|
||||
path: str
|
||||
engine: PromptEngine
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.engine = PromptEngine(path)
|
||||
|
||||
@abstractmethod
|
||||
def call_llm(self, message: list[dict]) -> str:
|
||||
"""
|
||||
example of message:
|
||||
[
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Message: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了\nIntent: "
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "{ id: 0 }"
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "话说digital-ide打开大的verilog卡死了\nIntent: "
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "{ id: 1 }"
|
||||
}
|
||||
]
|
||||
"""
|
||||
pass
|
||||
|
||||
def purify_json(self, json_string: str):
|
||||
stack = []
|
||||
start_index = None
|
||||
for i, ch in enumerate(json_string):
|
||||
if ch == '{':
|
||||
if len(stack) == 0:
|
||||
start_index = i
|
||||
stack.append(ch)
|
||||
elif ch == '}':
|
||||
stack.pop()
|
||||
if len(stack) == 0:
|
||||
return json_string[start_index: i + 1]
|
||||
else:
|
||||
pass
|
||||
return json_string
|
||||
|
||||
def try_generate_intent_id(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10, retry: int = 3) -> int | None:
|
||||
engine = self.engine
|
||||
for i in range(retry):
|
||||
try:
|
||||
message = engine.generate_llm_message(question, intent, chunk_size, max_chunk_num)
|
||||
result = self.call_llm(message)
|
||||
result = self.purify_json(result)
|
||||
result = json5.loads(result)
|
||||
intent_id = result['id']
|
||||
return int(intent_id)
|
||||
except Exception as e:
|
||||
continue
|
||||
return None
|
||||
|
||||
def inference(self, question: str, chunk_size: int = 5, max_chunk_num: int = 10) -> list[IntentNode] | None:
|
||||
root_node = self.engine.schema
|
||||
results: list[IntentNode] = []
|
||||
engine = self.engine
|
||||
stack: list[IntentNode] = [root_node]
|
||||
while len(stack) > 0:
|
||||
node = stack.pop()
|
||||
intent_id = self.try_generate_intent_id(question, node)
|
||||
if intent_id is None:
|
||||
logger.warning('fail to generate intent id from message, check log file for details')
|
||||
logger.debug(json5.dumps({ 'question': question, 'node.name': node.name }, ensure_ascii=False))
|
||||
return None
|
||||
if intent_id not in engine.id2intent:
|
||||
logger.warning('inferred intent id {} not in the list of engine.id2intent {}'.format(intent_id, list(engine.id2intent.keys())))
|
||||
logger.debug(json5.dumps({ 'question': question, 'node.name': node.name, 'intent_id': intent_id }, ensure_ascii=False))
|
||||
return None
|
||||
|
||||
intent_name = engine.id2intent[intent_id]
|
||||
intent_node = engine.name2node[intent_name]
|
||||
results.append(intent_node)
|
||||
if len(intent_node.children) >= 2:
|
||||
stack.append(intent_node)
|
||||
|
||||
return results
|
||||
|
||||
if __name__ == '__main__':
|
||||
prompt_engine = PromptEngine('./story.yml')
|
||||
msg = prompt_engine.generate_llm_message('如何解决 digital ide 无法载入配置文件的问题?')
|
||||
print(msg)
|
83
prompt/erine.py
Normal file
83
prompt/erine.py
Normal file
@ -0,0 +1,83 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
import requests as r
|
||||
|
||||
from core import TreeIntent, logger
|
||||
|
||||
|
||||
class ErineIntent(TreeIntent):
|
||||
api_key: str
|
||||
secret_key: str
|
||||
access_token: str
|
||||
def __init__(self, path: str, api_key: str = None, secret_key: str = None) -> None:
|
||||
super().__init__(path)
|
||||
self.api_key = api_key or os.environ['BAIDU_API_KEY']
|
||||
self.secret_key = secret_key or os.environ['BAIDU_SECRET_KEY']
|
||||
|
||||
try:
|
||||
self.access_token = self.get_access_token()
|
||||
except Exception as e:
|
||||
raise ValueError('fail to get access token in initialization')
|
||||
|
||||
|
||||
def get_access_token(self):
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
api_key = self.api_key
|
||||
secret_key = self.secret_key
|
||||
url = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}'
|
||||
payload = json.dumps("")
|
||||
|
||||
res = r.post(
|
||||
url=url,
|
||||
data=payload,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
resJson = res.json()
|
||||
access_token = resJson.get('access_token')
|
||||
assert isinstance(access_token, str), 'access_token 获取失败,详细信息' + str(resJson)
|
||||
return access_token
|
||||
|
||||
def post_message(self, message: list[dict]):
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
payload = json.dumps({
|
||||
'messages': message,
|
||||
'penalty_score': 2.0
|
||||
})
|
||||
url = 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-8k?access_token=' + self.access_token
|
||||
return r.post(url, headers=headers, data=payload)
|
||||
|
||||
def call_llm(self, message: list[dict]) -> str:
|
||||
try:
|
||||
res = self.post_message(message)
|
||||
except Exception:
|
||||
self.access_token = self.get_access_token()
|
||||
res = self.post_message(message)
|
||||
try:
|
||||
return res.json()['result']
|
||||
except Exception as e:
|
||||
logger.error('get error when parse response of wenxinyiyan: ' + str(e))
|
||||
logger.debug(res.json())
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
erine = ErineIntent('./config/story.yml')
|
||||
result = []
|
||||
for i in range(20):
|
||||
nodes = erine.inference('那不就是rv芯片往上堆扩展吗')
|
||||
if nodes is None:
|
||||
print('none -> ohters')
|
||||
else:
|
||||
node = nodes[0]
|
||||
result.append(node.name)
|
||||
print(node.name)
|
||||
|
||||
from collections import Counter
|
||||
print(Counter(result))
|
68
prompt/log.py
Normal file
68
prompt/log.py
Normal file
@ -0,0 +1,68 @@
|
||||
from __future__ import annotations
|
||||
from typing import Callable
|
||||
import asyncio
|
||||
import sys
|
||||
from weakref import WeakSet
|
||||
|
||||
import json
|
||||
|
||||
|
||||
class AsyncWorker:
|
||||
worker_fn: Callable
|
||||
task_pool: WeakSet
|
||||
loop: asyncio.AbstractEventLoop | None
|
||||
cb: Callable | None
|
||||
|
||||
def __init__(self, worker_fn: Callable, cb: Callable = None) -> None:
|
||||
self.worker_fn = worker_fn
|
||||
self.loop = None
|
||||
self.cb = cb
|
||||
self.task_pool = WeakSet()
|
||||
|
||||
def dispatch(self, *args):
|
||||
try:
|
||||
loop = self.loop or asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
return
|
||||
|
||||
coro = self.worker_fn(*args)
|
||||
task = loop.create_task(coro)
|
||||
|
||||
def coor_cb(future: asyncio.Future):
|
||||
pass
|
||||
|
||||
task.add_done_callback(coor_cb)
|
||||
self.task_pool.add(task)
|
||||
|
||||
def stop(self):
|
||||
for task in self.task_pool:
|
||||
task.cancel()
|
||||
|
||||
def complete_all_tasks(self):
|
||||
return [self.complete_task(task) for task in self.task_pool]
|
||||
|
||||
async def complete_task(self, task: asyncio.Task):
|
||||
loop = asyncio.get_event_loop()
|
||||
if task.get_loop() != loop:
|
||||
return
|
||||
try:
|
||||
await task
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def worker(n, m):
|
||||
for i in range(n):
|
||||
for j in range(m):
|
||||
pass
|
||||
|
||||
print('finish')
|
||||
|
||||
async_worker = AsyncWorker(worker)
|
||||
async_worker.dispatch(1000, 1000)
|
||||
async_worker.complete_all_tasks()
|
||||
|
||||
import time
|
||||
while True:
|
||||
time.sleep(1)
|
||||
print('load')
|
15
rag/admin.py
Normal file
15
rag/admin.py
Normal file
@ -0,0 +1,15 @@
|
||||
from flask import Flask, request, jsonify
|
||||
from loguru import logger
|
||||
|
||||
logger.add(
|
||||
sink='./logs/rag.log',
|
||||
level='DEBUG',
|
||||
rotation='00:00',
|
||||
retention='7 days',
|
||||
compression='zip',
|
||||
encoding='utf-8',
|
||||
enqueue=True,
|
||||
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
|
||||
)
|
||||
|
||||
app = Flask(__file__)
|
8
rag/configs.py
Normal file
8
rag/configs.py
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
necessary_files = {
|
||||
'vecdb-config': './config/vecdb.yml',
|
||||
'intent-story': './config/story.yml',
|
||||
'blog-vecdb-data': './blog-vecdb/index.faiss',
|
||||
'blog-vecdb-model': './blog-vecdb/index.pkl',
|
||||
'intent-classifier': './model/embedding_mapping.sklearn'
|
||||
}
|
@ -6,6 +6,7 @@ class StatusCode(Enum):
|
||||
server_error = 4002
|
||||
resource_not_found = 4003
|
||||
timeout = 4004
|
||||
process_error = 4005
|
||||
|
||||
class MsgCode(Enum):
|
||||
success = '请求处理成功'
|
||||
|
97
rag/intent.py
Normal file
97
rag/intent.py
Normal file
@ -0,0 +1,97 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import numpy as np
|
||||
import joblib
|
||||
import json
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from embedding import embedding
|
||||
from constant import StatusCode, MsgCode
|
||||
from admin import app
|
||||
from configs import necessary_files
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.abspath('.'))
|
||||
|
||||
from prompt import PromptEngine
|
||||
|
||||
class IntentRecogition:
|
||||
def __init__(self) -> None:
|
||||
self.embed_intent_classificator = joblib.load(necessary_files['intent-classifier'])
|
||||
self.engine = PromptEngine(necessary_files['intent-story'])
|
||||
|
||||
def get_intent_recogition(self, query: str) -> dict:
|
||||
query_embed = embedding.embed_documents([query])
|
||||
result_id = self.embed_intent_classificator.predict(query_embed)[0]
|
||||
result_id = int(result_id)
|
||||
return {
|
||||
'id': result_id,
|
||||
'name': self.engine.id2intent[result_id]
|
||||
}
|
||||
|
||||
intent_recogition = IntentRecogition()
|
||||
|
||||
|
||||
@app.route('/intent/retrain-embedding-mapping', methods=['post'])
|
||||
def retrain_embedding_mapping():
|
||||
engine = PromptEngine(necessary_files['intent-story'])
|
||||
model = LogisticRegression()
|
||||
sentences = []
|
||||
labels = []
|
||||
for story in engine.stories:
|
||||
sentences.append(story.message)
|
||||
labels.append(engine.intent2id[story.intent])
|
||||
|
||||
try:
|
||||
labels = np.array(labels)
|
||||
embed = embedding.embed_documents(sentences)
|
||||
model.fit(embed, labels)
|
||||
|
||||
intent_recogition.engine = engine
|
||||
intent_recogition.embed_intent_classificator = model
|
||||
joblib.dump(model, necessary_files['intent-classifier'])
|
||||
except Exception as e:
|
||||
response = jsonify({
|
||||
'code': StatusCode.process_error.value,
|
||||
'data': str(e),
|
||||
'msg': MsgCode.query_not_empty.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
|
||||
|
||||
response = jsonify({
|
||||
'code': StatusCode.success.value,
|
||||
'data': 'save data to ' + necessary_files['intent-classifier'],
|
||||
'msg': StatusCode.success.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
|
||||
|
||||
|
||||
@app.route('/intent/get-intent-recogition', methods=['post'])
|
||||
def get_intent_recogition():
|
||||
params = request.data.decode('utf-8')
|
||||
params: dict = json.loads(params)
|
||||
result_data = {}
|
||||
|
||||
query = params.get('query', None)
|
||||
if query is None:
|
||||
response = jsonify({
|
||||
'code': StatusCode.user_error.value,
|
||||
'data': result_data,
|
||||
'msg': MsgCode.query_not_empty.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
|
||||
result = intent_recogition.get_intent_recogition(query)
|
||||
|
||||
response = jsonify({
|
||||
'code': StatusCode.success.value,
|
||||
'data': result,
|
||||
'msg': StatusCode.success.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
56
rag/main.py
56
rag/main.py
@ -1,51 +1,15 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import json
|
||||
import os
|
||||
|
||||
from embedding import db
|
||||
from url_mapping import urlmapping
|
||||
from constant import StatusCode, MsgCode
|
||||
from admin import app, logger
|
||||
from intent import *
|
||||
from vecdb import *
|
||||
from configs import necessary_files
|
||||
|
||||
app = Flask(__file__)
|
||||
def assert_resource(path: str):
|
||||
assert os.path.exists(path), '{} 不存在'.format(file)
|
||||
|
||||
@app.route('/vecdb/similarity_search_with_score', methods=['post'])
|
||||
def post_similarity_search_with_score():
|
||||
params = request.data.decode('utf-8')
|
||||
params: dict = json.loads(params)
|
||||
result_data = []
|
||||
|
||||
query = params.get('query', None)
|
||||
if query is None:
|
||||
response = jsonify({
|
||||
'code': StatusCode.user_error.value,
|
||||
'data': result_data,
|
||||
'msg': MsgCode.query_not_empty.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
|
||||
k = int(params.get('k', 3))
|
||||
results = db.similarity_search_with_score(query=query, k=k)
|
||||
|
||||
for doc, score in results:
|
||||
page_content = doc.page_content
|
||||
meta = doc.metadata
|
||||
source = meta.get('source', '')
|
||||
if len(source) > 0:
|
||||
source = urlmapping.url_from_mapping(source)
|
||||
result_data.append({
|
||||
'content': page_content.strip(),
|
||||
'meta': meta,
|
||||
'source': source,
|
||||
'score': float(score)
|
||||
})
|
||||
|
||||
response = jsonify({
|
||||
'code': StatusCode.success.value,
|
||||
'data': result_data,
|
||||
'msg': StatusCode.success.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
for file in necessary_files.values():
|
||||
assert_resource(file)
|
||||
|
||||
if __name__ == '__main__':
|
||||
from gevent import pywsgi
|
||||
@ -53,6 +17,6 @@ if __name__ == '__main__':
|
||||
config: dict = yaml.load(open('./config/vecdb.yml'), Loader=yaml.Loader)
|
||||
addr = config.get('addr', '127.0.0.1')
|
||||
port = int(config.get('port', 8081))
|
||||
|
||||
server = pywsgi.WSGIServer((addr, port), app)
|
||||
logger.info('RAG 系统运行在 http://{}:{}'.format(addr, port))
|
||||
server.serve_forever()
|
14
rag/test.py
14
rag/test.py
@ -1,14 +0,0 @@
|
||||
import requests as r
|
||||
import json
|
||||
|
||||
payload = json.dumps({
|
||||
'query': '一键生成 requirements.txt ',
|
||||
'k': 3
|
||||
})
|
||||
|
||||
res = r.post('http://localhost:8081/vecdb/similarity_search_with_score', data=payload)
|
||||
|
||||
print(res.status_code)
|
||||
|
||||
if res.status_code == 200:
|
||||
print(res.json())
|
47
rag/vecdb.py
Normal file
47
rag/vecdb.py
Normal file
@ -0,0 +1,47 @@
|
||||
from flask import Flask, request, jsonify
|
||||
import json
|
||||
|
||||
from embedding import db
|
||||
from constant import StatusCode, MsgCode
|
||||
from url_mapping import urlmapping
|
||||
from admin import app
|
||||
|
||||
@app.route('/vecdb/similarity_search_with_score', methods=['post'])
|
||||
def post_similarity_search_with_score():
|
||||
params = request.data.decode('utf-8')
|
||||
params: dict = json.loads(params)
|
||||
result_data = []
|
||||
|
||||
query = params.get('query', None)
|
||||
if query is None:
|
||||
response = jsonify({
|
||||
'code': StatusCode.user_error.value,
|
||||
'data': result_data,
|
||||
'msg': MsgCode.query_not_empty.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
||||
|
||||
k = int(params.get('k', 3))
|
||||
results = db.similarity_search_with_score(query=query, k=k)
|
||||
|
||||
for doc, score in results:
|
||||
page_content = doc.page_content
|
||||
meta = doc.metadata
|
||||
source = meta.get('source', '')
|
||||
if len(source) > 0:
|
||||
source = urlmapping.url_from_mapping(source)
|
||||
result_data.append({
|
||||
'content': page_content.strip(),
|
||||
'meta': meta,
|
||||
'source': source,
|
||||
'score': float(score)
|
||||
})
|
||||
|
||||
response = jsonify({
|
||||
'code': StatusCode.success.value,
|
||||
'data': result_data,
|
||||
'msg': StatusCode.success.value
|
||||
})
|
||||
response.status_code = StatusCode.success.value
|
||||
return response
|
110
scripts/ernie.py
Normal file
110
scripts/ernie.py
Normal file
@ -0,0 +1,110 @@
|
||||
import requests as r
|
||||
import json
|
||||
|
||||
api_key = '9J5qFTYr6wPRxkoVoXycnoWf'
|
||||
secret_key = 'Xa6eJelStx5i7Ft3qQH0NAT6AvOkqhkH'
|
||||
|
||||
def get_access_token():
|
||||
headers = {
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json'
|
||||
}
|
||||
|
||||
url = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}'
|
||||
payload = json.dumps("")
|
||||
|
||||
res = r.post(
|
||||
url=url,
|
||||
data=payload,
|
||||
headers=headers
|
||||
)
|
||||
|
||||
resJson = res.json()
|
||||
access_token = resJson.get('access_token')
|
||||
assert isinstance(access_token, str), 'access_token 获取失败,详细信息' + str(resJson)
|
||||
return access_token
|
||||
|
||||
access_token = get_access_token()
|
||||
|
||||
text = open('./template.txt', 'r', encoding='utf-8').read()
|
||||
t2 = open('./t2.txt', 'r', encoding='utf-8').read()
|
||||
|
||||
payload = json.dumps({
|
||||
# "messages": [
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": text
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "{ id: 3 }"
|
||||
# },
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "Message: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了\nIntent: "
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "{ id: 0 }"
|
||||
# },
|
||||
# {
|
||||
# "role": "user",
|
||||
# "content": "话说digital-ide打开大的verilog卡死了\nIntent: "
|
||||
# },
|
||||
# {
|
||||
# "role": "assistant",
|
||||
# "content": "{ id: 1 }"
|
||||
# },
|
||||
# {
|
||||
# 'role': 'user',
|
||||
# "content": "请问一下,第一次点击对文件仿真可以出波形文件,再次点击的时候就会提示unknown module type了。是哪个配置没配置好?\nIntent: "
|
||||
# },
|
||||
# ]
|
||||
'messages': [
|
||||
# {
|
||||
# 'role': 'user',
|
||||
# 'content': 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.\nThe intent should be one of the following:\n- 1\n- 2\n- 3\n- 4\nMessage: surface了解一下?\nIntent: { id: 4 }\nMessage: Metals一开直接报错\nIntent: { id: 4 }\nMessage: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了\nIntent: { id: 1 }\nMessage: 请问 property.json 如何配置?\nIntent: { id: 1 }\nMessage: 请问一下,第一次点击对文件仿真可以出波形文件,再次点击的时候就会提示unknown module type了。是哪个配置没配置好?\nIntent: '
|
||||
# },
|
||||
# {
|
||||
# 'role': 'assistant',
|
||||
# 'content': '{id : 1}'
|
||||
# },
|
||||
# {
|
||||
# 'role': 'user',
|
||||
# 'content': 'Message: 话说digital-ide打开大的verilog卡死了\nIntent: { id: 2 }\nMessage: 帮我上传一下这份数据\nIntent: { id: 3 }\nMessage: 我的自动补全无法使用,是不是有bug?\nIntent: { id: 2 }\nMessage: 这群要被chisel夺舍了吗\nIntent: '
|
||||
# },
|
||||
# {
|
||||
# 'role': 'assistant',
|
||||
# 'content': '{id : 4}'
|
||||
# },
|
||||
{
|
||||
"role": "user",
|
||||
"content": "如何解决 digital ide 无法载入配置文件的问题?\nIntent: "
|
||||
}
|
||||
]
|
||||
})
|
||||
|
||||
headers = {
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
url = 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-8k?access_token=' + access_token
|
||||
res = r.post(url, headers=headers, data=payload)
|
||||
print(res.json())
|
||||
# print(res.json()['result'])
|
||||
|
||||
# cache = []
|
||||
|
||||
# for line in res.iter_lines():
|
||||
# line_text: str = line.decode('UTF-8')
|
||||
# if line_text.startswith('data:'):
|
||||
# iter_json = json.loads(line_text.lstrip('data: '))
|
||||
# result: str = iter_json['result']
|
||||
# cache.append(result)
|
||||
# if result.endswith('。') or result.endswith('.'):
|
||||
# sentence = ''.join(cache).strip()
|
||||
# print(sentence)
|
||||
# cache.clear()
|
||||
|
||||
# if len(cache) > 0:
|
||||
# print(''.join(cache).strip())
|
123
scripts/make_ts_context_api.py
Normal file
123
scripts/make_ts_context_api.py
Normal file
@ -0,0 +1,123 @@
|
||||
import markdown
|
||||
import re
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
|
||||
"""/**
|
||||
* @description 发送私聊消息
|
||||
* @param user_id 对方 QQ 号
|
||||
* @param message 要发送的内容
|
||||
* @param auto_escape 消息内容是否作为纯文本发送(即不解析 CQ 码),只在 message 字段是字符串时有效
|
||||
*/
|
||||
function send_private_msg(user_id: number, message: Lagrange.Message, auto_escape: boolean = false) {
|
||||
return {
|
||||
action: 'send_private_msg',
|
||||
params: { user_id, message, auto_escape }
|
||||
};
|
||||
}"""
|
||||
|
||||
template = """/**
|
||||
* @description %s%s */
|
||||
public %s(%s) {
|
||||
return this.send({
|
||||
action: '%s',
|
||||
params: { %s }
|
||||
});
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
onebot_document = open('./scripts/onebot.md', 'r', encoding='utf-8').read()
|
||||
html = markdown.markdown(onebot_document)
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
|
||||
def snake_to_camel(s, capitalize_first_letter=False):
|
||||
components = s.split('_')
|
||||
camel = ''.join(x.capitalize() for x in components)
|
||||
if not capitalize_first_letter:
|
||||
camel = camel[0].lower() + camel[1:]
|
||||
|
||||
return camel
|
||||
|
||||
def next_node(el: Tag):
|
||||
p = el.next_sibling
|
||||
while len(p.text.strip()) == 0:
|
||||
p = p.next_sibling
|
||||
return p
|
||||
|
||||
tss = """/**
|
||||
* @author 锦恢
|
||||
* @email 1193466151@qq.com
|
||||
* @description Lagrange.Core 前端接口
|
||||
* @comment 接口调用详细参考文档
|
||||
* - https://github.com/botuniverse/onebot-11/blob/master/communication/ws.md
|
||||
*/
|
||||
|
||||
import * as Lagrange from '../type';
|
||||
|
||||
"""
|
||||
|
||||
for el in soup.find_all('h2'):
|
||||
el: Tag
|
||||
|
||||
function_name = None
|
||||
function_desc = None
|
||||
for child in el.children:
|
||||
if child.name == 'code':
|
||||
function_name = child.text
|
||||
elif child.name is None:
|
||||
function_desc = child.text
|
||||
|
||||
if function_name and function_desc:
|
||||
ts_func_name = snake_to_camel(function_name)
|
||||
|
||||
title2 = next_node(el)
|
||||
table = next_node(title2)
|
||||
|
||||
count = 0
|
||||
params = []
|
||||
for line in table.text.strip().split('\n'):
|
||||
count += 1
|
||||
if count >= 3:
|
||||
splits = [l for l in line.split('|') if len(l.strip()) > 0]
|
||||
print(splits)
|
||||
if len(splits) == 4:
|
||||
param = {
|
||||
'name': splits[0].strip(),
|
||||
'type': splits[1].strip().split()[0],
|
||||
'default': splits[2].strip(),
|
||||
'desc': splits[3].strip()
|
||||
}
|
||||
elif len(splits) == 3:
|
||||
param = {
|
||||
'name': splits[0].strip(),
|
||||
'type': splits[1].strip().split()[0],
|
||||
'default': '-',
|
||||
'desc': splits[2].strip()
|
||||
}
|
||||
|
||||
if param['type'] == 'message':
|
||||
param['type'] = 'string | Lagrange.Send.Default[]'
|
||||
|
||||
params.append(param)
|
||||
|
||||
t1 = function_desc.strip()
|
||||
t2 = '\n'
|
||||
for param in params:
|
||||
t2 += ' * @param {} {}\n'.format(param['name'], param['desc'])
|
||||
t3 = ts_func_name
|
||||
t4 = []
|
||||
for param in params:
|
||||
if param['default'] == '-':
|
||||
t4.append('{}: {}'.format(param['name'], param['type']))
|
||||
for param in params:
|
||||
if param['default'] != '-':
|
||||
t4.append('{}: {} = {}'.format(param['name'], param['type'], param['default']))
|
||||
t4 = ', '.join(t4)
|
||||
t5 = function_name
|
||||
t6 = [param['name'] for param in params]
|
||||
t6 = ', '.join(t6)
|
||||
ts_code = template % (t1, t2, t3, t4, t5, t6)
|
||||
|
||||
tss += ts_code
|
||||
|
||||
open('./scripts/onebot.ts', 'w', encoding='utf-8').write(tss)
|
176
scripts/prompt.py
Normal file
176
scripts/prompt.py
Normal file
@ -0,0 +1,176 @@
|
||||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
import warnings
|
||||
import random
|
||||
import math
|
||||
|
||||
import yaml
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class IntentNode:
|
||||
name: str
|
||||
description: str | None
|
||||
children: list[IntentNode]
|
||||
parent: IntentNode | None
|
||||
stories: list[Story]
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Story:
|
||||
message: str
|
||||
intent: str
|
||||
|
||||
class PromptEngine:
|
||||
path: str
|
||||
schema: IntentNode | None
|
||||
stories: list[Story]
|
||||
rejects: list[str]
|
||||
intent2id: dict[str, int]
|
||||
id2intent: dict[int, str]
|
||||
name2node: dict[str, IntentNode]
|
||||
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.config = yaml.load(open(path, 'r', encoding='utf-8'), yaml.Loader)
|
||||
self.intent2id = {}
|
||||
self.id2intent = {}
|
||||
self.name2node = {}
|
||||
self.schema = self.handle_schema(self.config['schema'])
|
||||
self.stories = self.handle_stories(self.config['stories'])
|
||||
self.rejects = self.handle_rejects(self.config['rejects'])
|
||||
|
||||
def handle_schema(self, raw_schema: dict) -> IntentNode:
|
||||
raw_root = raw_schema.get('root', None)
|
||||
if raw_root is None:
|
||||
warnings.warn('schema must have a root node as the beginning, otherwise intent recogition will not work')
|
||||
return None
|
||||
|
||||
current_layers: list[tuple[dict, IntentNode | None]] = [(raw_root, None)]
|
||||
nodes: list[IntentNode] = []
|
||||
|
||||
# 层次遍历
|
||||
while len(current_layers) > 0:
|
||||
new_current_layers: list[tuple[dict, IntentNode | None]] = []
|
||||
for raw_node, intent_node in current_layers:
|
||||
name = raw_node.get('name', None)
|
||||
children = raw_node.get('children', None)
|
||||
description = raw_node.get('description', None)
|
||||
if name is None:
|
||||
raise NameError('you must specify a name in story item, current item : {}'.format(raw_node))
|
||||
if children is None:
|
||||
children = []
|
||||
|
||||
if name not in self.intent2id:
|
||||
assign_id = len(self.intent2id)
|
||||
self.intent2id[name] = assign_id
|
||||
self.id2intent[assign_id] = name
|
||||
|
||||
node = IntentNode(name, description, [], intent_node, [])
|
||||
self.name2node[name] = node
|
||||
|
||||
nodes.append(node)
|
||||
if intent_node:
|
||||
intent_node.children.append(node)
|
||||
for raw_node in children:
|
||||
new_current_layers.append((raw_node, node))
|
||||
current_layers.clear()
|
||||
current_layers.extend(new_current_layers)
|
||||
return nodes[0]
|
||||
|
||||
def handle_stories(self, raw_stories: list[dict]) -> list[Story]:
|
||||
stories: list[Story] = []
|
||||
for pair in raw_stories:
|
||||
message = pair.get('message', None)
|
||||
intent = pair.get('intent', None)
|
||||
if intent not in self.intent2id:
|
||||
warnings.warn('{} is not the intent you declare in schema, so this pair will be ignored'.format(intent))
|
||||
continue
|
||||
if message and intent:
|
||||
story = Story(message, intent)
|
||||
node = self.name2node.get(intent)
|
||||
node.stories.append(story)
|
||||
stories.append(story)
|
||||
return stories
|
||||
|
||||
def handle_rejects(self, raw_rejects: list[str]) -> list[str]:
|
||||
rejects = []
|
||||
for reject in raw_rejects:
|
||||
rejects.append(reject)
|
||||
return rejects
|
||||
|
||||
def generate_chunk(self, stories: list[Story]) -> tuple[str]:
|
||||
prompts = []
|
||||
for story in stories:
|
||||
prompts.append('Message: ' + story.message.strip())
|
||||
intent_id = self.intent2id.get(story.intent)
|
||||
prompts.append('Intent: { id: %s }' % (intent_id))
|
||||
|
||||
prompts.pop()
|
||||
|
||||
user_content = '\n'.join(prompts) + '\n' + 'Intent: '
|
||||
assistant_content = '{id : %s}' % (intent_id)
|
||||
return user_content, assistant_content
|
||||
|
||||
def generate_llm_message(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10):
|
||||
if intent is None:
|
||||
intent = self.schema
|
||||
|
||||
story_cache = []
|
||||
for node in intent.children:
|
||||
story_cache.extend(node.stories)
|
||||
|
||||
random.shuffle(story_cache)
|
||||
chunk_num = math.ceil(len(story_cache) / chunk_size)
|
||||
message = []
|
||||
for chunk_id in range(chunk_num):
|
||||
start = chunk_id * chunk_size
|
||||
end = min(len(story_cache), start + chunk_size)
|
||||
chunk = story_cache[start: end]
|
||||
user_content, assistant_content = self.generate_chunk(chunk)
|
||||
message.append({
|
||||
'role': 'user',
|
||||
'content': user_content
|
||||
})
|
||||
message.append({
|
||||
'role': 'assistant',
|
||||
'content': assistant_content
|
||||
})
|
||||
|
||||
if len(message) / 2 >= max_chunk_num:
|
||||
break
|
||||
|
||||
message.append({
|
||||
'role': 'user',
|
||||
'content': question + '\nIntent: '
|
||||
})
|
||||
|
||||
# 创建开头的预设
|
||||
preset = 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.'
|
||||
intent_preset = ['The intent should be one of the following:']
|
||||
for node in intent.children:
|
||||
intent_id = self.intent2id.get(node.name)
|
||||
intent_preset.append('- {}'.format(intent_id))
|
||||
intent_preset = '\n'.join(intent_preset)
|
||||
message[0]['content'] = preset + '\n' + intent_preset + '\n' + message[0]['content']
|
||||
return message
|
||||
|
||||
|
||||
class KIntent:
|
||||
path: str
|
||||
engine: PromptEngine
|
||||
def __init__(self, path: str) -> None:
|
||||
self.path = path
|
||||
self.engine = PromptEngine(path)
|
||||
|
||||
def inference(self, question: str, chunk_size: int = 5, max_chunk_num: int = 10) -> list[IntentNode]:
|
||||
root_node = self.engine.schema
|
||||
results: list[IntentNode] = []
|
||||
stack = [root_node]
|
||||
while len(stack) > 0:
|
||||
node = stack.pop()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
prompt_engine = PromptEngine('./story.yml')
|
||||
msg = prompt_engine.generate_llm_message('如何解决 digital ide 无法载入配置文件的问题?')
|
||||
print(msg)
|
41
scripts/story.yml
Normal file
41
scripts/story.yml
Normal file
@ -0,0 +1,41 @@
|
||||
schema:
|
||||
root:
|
||||
name: root
|
||||
children:
|
||||
- name: usage
|
||||
description: 使用查询
|
||||
children:
|
||||
- name: bug
|
||||
description: bug 查询
|
||||
children:
|
||||
- name: command
|
||||
description: 指令
|
||||
children:
|
||||
- name: others
|
||||
description: 其他
|
||||
children:
|
||||
|
||||
stories:
|
||||
- message: 请问 property.json 如何配置?
|
||||
intent: usage
|
||||
- message: 我的自动补全无法使用,是不是有bug?
|
||||
intent: bug
|
||||
- message: 帮我上传一下这份数据
|
||||
intent: command
|
||||
- message: surface了解一下?
|
||||
intent: others
|
||||
- message: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了
|
||||
intent: usage
|
||||
- message: 这群要被chisel夺舍了吗
|
||||
intent: others
|
||||
- message: Metals一开直接报错
|
||||
intent: others
|
||||
- message: 话说digital-ide打开大的verilog卡死了
|
||||
intent: bug
|
||||
- message: 请问一下,第一次点击对文件仿真可以出波形文件,再次点击的时候就会提示unknown module type了。是哪个配置没配置好?
|
||||
intent: usage
|
||||
|
||||
rejects:
|
||||
- metal
|
||||
- metals
|
||||
- idea
|
18
scripts/t2.txt
Normal file
18
scripts/t2.txt
Normal file
@ -0,0 +1,18 @@
|
||||
Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.
|
||||
|
||||
The intent should be one of the following:
|
||||
- 0
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
|
||||
Message: 大佬们,为啥我的digital ide启动之后所有功能都没启动捏?我配置了property文件,然后插件的vivado路经和modelsim路经都加上了
|
||||
Intent: { id: 0 }
|
||||
Message: 这群要被chisel夺舍了吗
|
||||
Intent: { id: 3 }
|
||||
Message: Metals一开直接报错
|
||||
Intent: { id: 3 }
|
||||
Message: 话说digital-ide打开大的verilog卡死了
|
||||
Intent: { id: 1 }
|
||||
Message: 请问一下,第一次点击对文件仿真可以出波形文件,再次点击的时候就会提示unknown module type了。是哪个配置没配置好?
|
||||
Intent: { id: <answer here> }
|
16
scripts/template.txt
Normal file
16
scripts/template.txt
Normal file
@ -0,0 +1,16 @@
|
||||
Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.
|
||||
The intent should be one of the following:
|
||||
- 0
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
Message: 请问 property.json 如何配置?
|
||||
Intent: { id: 0 }
|
||||
Message: 我的自动补全无法使用,是不是有bug?
|
||||
Intent: { id: 1 }
|
||||
Message: 帮我上传一下这份数据
|
||||
Intent: { id: 2 }
|
||||
Message: 为数不多我觉得很好的动漫了
|
||||
Intent: { id: 3 }
|
||||
Message: M1系列可以跑iPhone和iPad游戏
|
||||
Intent: { id: <answer here> }
|
6
scripts/test.js
Normal file
6
scripts/test.js
Normal file
@ -0,0 +1,6 @@
|
||||
|
||||
function hello(a) {
|
||||
console.log(a);
|
||||
}
|
||||
|
||||
hello.call(this, 'hello');
|
13
scripts/text.txt
Normal file
13
scripts/text.txt
Normal file
@ -0,0 +1,13 @@
|
||||
Label a users message from a
|
||||
conversation with an intent. Reply ONLY with the name of the intent.
|
||||
|
||||
The intent should be one of the following:
|
||||
{% for intent in intents %}
|
||||
- {{intent}}
|
||||
{% endfor %}
|
||||
{% for example in examples %}
|
||||
Message: {{example['text']}}
|
||||
Intent: {{example['intent']}}
|
||||
{% endfor %}
|
||||
Message: {{message}}
|
||||
Intent:
|
41
test/index.js
Normal file
41
test/index.js
Normal file
@ -0,0 +1,41 @@
|
||||
const path = require('path');
|
||||
const Mocha = require('mocha');
|
||||
const glob = require('glob');
|
||||
|
||||
function run() {
|
||||
// Create the mocha test
|
||||
const mocha = new Mocha({
|
||||
ui: 'tdd',
|
||||
color: true
|
||||
});
|
||||
|
||||
const testsRoot = path.resolve(__dirname, '..');
|
||||
|
||||
return new Promise((c, e) => {
|
||||
glob('**/**.test.js', { cwd: testsRoot }, (err, files) => {
|
||||
if (err) {
|
||||
return e(err);
|
||||
}
|
||||
|
||||
files.forEach(f => mocha.addFile(path.resolve(testsRoot, f)));
|
||||
|
||||
try {
|
||||
|
||||
// 设置每个测试的超时
|
||||
mocha.timeout(60000);
|
||||
mocha.run(failures => {
|
||||
if (failures > 0) {
|
||||
e(new Error(`${failures} tests failed.`));
|
||||
} else {
|
||||
c();
|
||||
}
|
||||
});
|
||||
} catch (err) {
|
||||
console.error(err);
|
||||
e(err);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
run();
|
45
test/suite/rag.test.js
Normal file
45
test/suite/rag.test.js
Normal file
@ -0,0 +1,45 @@
|
||||
const fs = require('fs');
|
||||
const yaml = require('yaml');
|
||||
const assert = require('assert');
|
||||
const axios = require('axios');
|
||||
|
||||
const vecdbBuffer = fs.readFileSync('./config/vecdb.yml', 'utf-8');
|
||||
const vecdbConfig = yaml.parse(vecdbBuffer);
|
||||
const vecdbBaseURL = `http://${vecdbConfig['addr']}:${vecdbConfig['port']}`;
|
||||
|
||||
const vecdbRequests = axios.create({
|
||||
baseURL: vecdbBaseURL,
|
||||
timeout: 5000
|
||||
});
|
||||
|
||||
const apiGetIntentRecogition = (req) => vecdbRequests({
|
||||
url: '/intent/get-intent-recogition', method: 'POST',
|
||||
data: req
|
||||
});
|
||||
|
||||
|
||||
suite('test intent recogition', () => {
|
||||
|
||||
// 也可以事先写好测试集,写在测试程序里或者从静态文件中读入
|
||||
const intent_suites = [
|
||||
{ input: '如何使用 digital ide 这个插件?', expect: 'usage' },
|
||||
{ input: '我今天打开 vscode,发现 自动补全失效了,我是哪里没有配置好吗?', expect: 'usage,bug' },
|
||||
{ input: 'path top.v is not a hdlFile 请问报这个错误大概是啥原因啊', expect: 'usage,bug' },
|
||||
{ input: '我同学在学习强国看到小麦收割了,然后就买相应的股就赚了', expect: 'others' },
|
||||
{ input: '我平时写代码就喜欢喝茶', expect: 'others' },
|
||||
];
|
||||
|
||||
for (const s of intent_suites) {
|
||||
const input = s.input;
|
||||
const expects = s.expect.split(',');
|
||||
|
||||
test(`Message: ${input}) => Intent: ${expects.join(',')}`, async () => {
|
||||
const axiosRes = await apiGetIntentRecogition({ query: input });
|
||||
const res = axiosRes.data;
|
||||
const payload = res.data;
|
||||
const intentName = payload.name;
|
||||
|
||||
assert(expects.includes(intentName), `infer intent "${intentName}" not in expect "${expects}"`);
|
||||
});
|
||||
}
|
||||
});
|
@ -4,7 +4,10 @@
|
||||
"target": "ES2020",
|
||||
"outDir": "dist",
|
||||
"esModuleInterop": true,
|
||||
"experimentalDecorators": true
|
||||
"experimentalDecorators": true,
|
||||
"declaration": true,
|
||||
"declarationDir": "dist",
|
||||
"typeRoots": ["./types"]
|
||||
},
|
||||
"include": [
|
||||
"bot/**/*"
|
||||
|
Loading…
x
Reference in New Issue
Block a user