完成单元测试 | 更新架构

This commit is contained in:
锦恢 2024-06-06 14:05:57 +08:00
parent c27bb27ec3
commit b811246fba
42 changed files with 7374 additions and 1393 deletions

7
.gitignore vendored
View File

@ -6,4 +6,9 @@ node_modules
__pycache__ __pycache__
*.pyc *.pyc
*.pyd *.pyd
*.pyi *.pyi
scripts/*.ts
audio
images
logs
*.sklearn

View File

@ -32,13 +32,47 @@ pip install -r requirements.txt
## 架构 ## 架构
```mermaid ```mermaid
graph LR graph TB
a(拉格朗日 NTQQ server) <-->|http,ws| b(onebot layer) core(Lagrage.Core)
onebot(Lagrange.onebot)
vecdb(vecdb)
llm(LLM)
intent(intent tree)
c(vecdb) -->|http| b core(Lagrange.Core) --> onebot(Lagrange.onebot)
d(LLM) -->|http| b
onebot -->|query| intent
intent -->|intent| onebot
subgraph Intent Recognition
intent -->|query| vecdb
vecdb -->|ktop| intent
intent -->|ktop,query| llm
llm -->|intent| intent
end
subgraph execution
onebot --> command{intent}
command --> query
command --> upload
command --> ...
end
subgraph third party
LLM
Google
server
end
query --> LLM
query --> Google
upload --> server
``` ```
- `Lagrange.onebot` --> 📁bot
- `vecdb` --> 📁rag
- `intent tree` --> 📁prompt
--- ---
## 接口规范 ## 接口规范

View File

@ -1,453 +0,0 @@
/**
* @author
* @email 1193466151@qq.com
* @description Lagrange.Core
* @comment
* - https://github.com/botuniverse/onebot-11/blob/master/communication/ws.md
*/
import * as Lagrange from '../type';
/**
* @description
* @param user_id QQ
* @param message
* @param auto_escape CQ message
*/
export function sendPrivateMsg(user_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
return {
action: 'send_private_msg',
params: { user_id, message, auto_escape }
};
}
/**
* @description
* @param group_id
* @param message
* @param auto_escape CQ message
*/
export function sendGroupMsg(group_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
return {
action: 'send_group_msg',
params: { group_id, message, auto_escape }
};
}
/**
* @description
* @param message_type privategroup *_id
* @param user_id QQ private
* @param group_id group
* @param message
* @param auto_escape CQ message
*/
export function sendMsg(message_type: string, user_id: number, group_id: number, message: string | Lagrange.Send.Default[], auto_escape: boolean = false) {
return {
action: 'send_msg',
params: { message_type, user_id, group_id, message, auto_escape }
};
}
/**
* @description
* @param message_id ID
*/
export function deleteMsg(message_id: number) {
return {
action: 'delete_msg',
params: { message_id }
};
}
/**
* @description
* @param message_id ID
*/
export function getMsg(message_id: number) {
return {
action: 'get_msg',
params: { message_id }
};
}
/**
* @description
* @param id ID
*/
export function getForwardMsg(id: string) {
return {
action: 'get_forward_msg',
params: { id }
};
}
/**
* @description
* @param user_id QQ
* @param times 10
*/
export function sendLike(user_id: number, times: number = 1) {
return {
action: 'send_like',
params: { user_id, times }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param reject_add_request
*/
export function setGroupKick(group_id: number, user_id: number, reject_add_request: boolean = false) {
return {
action: 'set_group_kick',
params: { group_id, user_id, reject_add_request }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param duration 0
*/
export function setGroupBan(group_id: number, user_id: number, duration: number = 30 * 60) {
return {
action: 'set_group_ban',
params: { group_id, user_id, duration }
};
}
/**
* @description
* @param group_id
* @param anonymous anonymous
* @param anonymous_flag flag
* @param duration
*/
export function setGroupAnonymousBan(group_id: number, anonymous: object, anonymous_flag: string, duration: number = 30 * 60) {
return {
action: 'set_group_anonymous_ban',
params: { group_id, anonymous, anonymous_flag, duration }
};
}
/**
* @description
* @param group_id
* @param enable
*/
export function setGroupWholeBan(group_id: number, enable: boolean = true) {
return {
action: 'set_group_whole_ban',
params: { group_id, enable }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param enable true false
*/
export function setGroupAdmin(group_id: number, user_id: number, enable: boolean = true) {
return {
action: 'set_group_admin',
params: { group_id, user_id, enable }
};
}
/**
* @description
* @param group_id
* @param enable
*/
export function setGroupAnonymous(group_id: number, enable: boolean = true) {
return {
action: 'set_group_anonymous',
params: { group_id, enable }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param card
*/
export function setGroupCard(group_id: number, user_id: number, card: string = "") {
return {
action: 'set_group_card',
params: { group_id, user_id, card }
};
}
/**
* @description
* @param group_id
* @param group_name
*/
export function setGroupName(group_id: number, group_name: string) {
return {
action: 'set_group_name',
params: { group_id, group_name }
};
}
/**
* @description 退
* @param group_id
* @param is_dismiss true
*/
export function setGroupLeave(group_id: number, is_dismiss: boolean = false) {
return {
action: 'set_group_leave',
params: { group_id, is_dismiss }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param special_title
* @param duration -1
*/
export function setGroupSpecialTitle(group_id: number, user_id: number, special_title: string = "", duration: number = -1) {
return {
action: 'set_group_special_title',
params: { group_id, user_id, special_title, duration }
};
}
/**
* @description
* @param flag flag
* @param approve
* @param remark
*/
export function setFriendAddRequest(flag: string, approve: boolean = true, remark: string = "") {
return {
action: 'set_friend_add_request',
params: { flag, approve, remark }
};
}
/**
* @description
* @param flag flag
* @param sub_type add invite sub_type
* @param approve
* @param reason
*/
export function setGroupAddRequest(flag: string, sub_type: string, approve: boolean = true, reason: string = "") {
return {
action: 'set_group_add_request',
params: { flag, sub_type, approve, reason }
};
}
/**
* @description
*/
export function getLoginInfo() {
return {
action: 'get_login_info',
params: { }
};
}
/**
* @description
* @param user_id QQ
* @param no_cache 使使
*/
export function getStrangerInfo(user_id: number, no_cache: boolean = false) {
return {
action: 'get_stranger_info',
params: { user_id, no_cache }
};
}
/**
* @description
*/
export function getFriendList() {
return {
action: 'get_friend_list',
params: { }
};
}
/**
* @description
* @param group_id
* @param no_cache 使使
*/
export function getGroupInfo(group_id: number, no_cache: boolean = false) {
return {
action: 'get_group_info',
params: { group_id, no_cache }
};
}
/**
* @description
*/
export function getGroupList() {
return {
action: 'get_group_list',
params: { }
};
}
/**
* @description
* @param group_id
* @param user_id QQ
* @param no_cache 使使
*/
export function getGroupMemberInfo(group_id: number, user_id: number, no_cache: boolean = false) {
return {
action: 'get_group_member_info',
params: { group_id, user_id, no_cache }
};
}
/**
* @description
* @param group_id
*/
export function getGroupMemberList(group_id: number) {
return {
action: 'get_group_member_list',
params: { group_id }
};
}
/**
* @description
* @param group_id
* @param type talkative performer legend strong_newbie emotion all
*/
export function getGroupHonorInfo(group_id: number, type: string) {
return {
action: 'get_group_honor_info',
params: { group_id, type }
};
}
/**
* @description Cookies
* @param domain cookies
*/
export function getCookies(domain: string = "") {
return {
action: 'get_cookies',
params: { domain }
};
}
/**
* @description CSRF Token
*/
export function getCsrfToken() {
return {
action: 'get_csrf_token',
params: { }
};
}
/**
* @description QQ
*/
export function getCredentials() {
return {
action: 'get_credentials',
params: { }
};
}
/**
* @description
*/
export function getRecord() {
return {
action: 'get_record',
params: { }
};
}
/**
* @description
* @param file file 6B4DE3DFD1BD271E3297859D41C530F5.jpg
*/
export function getImage(file: string) {
return {
action: 'get_image',
params: { file }
};
}
/**
* @description
*/
export function canSendImage() {
return {
action: 'can_send_image',
params: { }
};
}
/**
* @description
*/
export function canSendRecord() {
return {
action: 'can_send_record',
params: { }
};
}
/**
* @description
*/
export function getStatus() {
return {
action: 'get_status',
params: { }
};
}
/**
* @description
*/
export function getVersionInfo() {
return {
action: 'get_version_info',
params: { }
};
}
/**
* @description OneBot
*/
export function setRestart() {
return {
action: 'set_restart',
params: { }
};
}
/**
* @description
*/
export function cleanCache() {
return {
action: 'clean_cache',
params: { }
};
}

View File

@ -13,6 +13,16 @@ export const apiQueryVecdb = (req: apiQueryVecdbRequest) => r<CommonResponse<api
data: req data: req
}); });
export const apiGetIntentRecogition = (req: apiGetIntentRecogitionRequest) => r<CommonResponse<apiGetIntentRecogitionData>>({
url: '/intent/get-intent-recogition', method: 'POST',
data: req
});
export const apiIntentRetrain = (req: apiIntentRetrainRequest) => r<CommonResponse<apiIntentRetrainData>>({
url: '/intent/retrain-embedding-mapping', method: 'POST',
});
export interface apiQueryVecdbRequest { export interface apiQueryVecdbRequest {
query: string, query: string,
k?: number k?: number
@ -28,4 +38,19 @@ export interface apiQueryVecdbDataItem {
} }
} }
export type apiQueryVecdbData = apiQueryVecdbDataItem[]; export type apiQueryVecdbData = apiQueryVecdbDataItem[];
export interface apiGetIntentRecogitionRequest {
query: string
}
export interface apiGetIntentRecogitionData {
id: number,
name: string
}
export interface apiIntentRetrainRequest {
}
export type apiIntentRetrainData = string;

0
bot/digital-ide.ts Normal file
View File

View File

@ -1,63 +0,0 @@
import lagrangeMapper from './lagrange-mapping';
import type * as Lagrange from './type';
import type { LagrangeContext } from './context';
class Pipe {
context: LagrangeContext | undefined;
send: Lagrange.SendApi | undefined;
public injectContext(context: LagrangeContext) {
this.context = context;
this.send = context.send.bind(context);
}
public run(message: Lagrange.Message) {
switch (message.post_type) {
case 'message': this.messagePipe(message); break;
case 'notice': this.noticePipe(message); break;
case 'request':this.requestPipe(message); break;
default: break;
}
}
// 处理 message 类型的 post_type 消息
public messagePipe(message: Lagrange.MessagePostType) {
switch (message.message_type) {
case 'private':
lagrangeMapper.resolvePrivateUser(message, this.send);
break;
case 'group':
lagrangeMapper.resolveGroup(message, this.send);
break;
default:
break;
}
}
// 处理 notice 类型的 post_type 消息
public noticePipe(message: Lagrange.NoticePostType) {
}
// 处理 request 类型的 post_type 消息
public requestPipe(message: Lagrange.RequestPostType) {
}
}
export const pipe = new Pipe();
export function onMessage(event: Buffer) {
const messageBuffer = event.toString('utf-8');
const messageJson = JSON.parse(messageBuffer) as Lagrange.Message;
// 忽略系统 message
if (messageJson.post_type !== 'meta_event') {
console.log('进入 runPipe');
pipe.run(messageJson);
}
}
export function onClose() {
console.log('服务器连接关闭');
}

View File

@ -1,15 +0,0 @@
import lagrangeMapper from './lagrange-mapping';
import { apiQueryVecdb } from './api/vecdb';
import type * as Lagrange from './type';
export class Impl {
@lagrangeMapper.onPrivateUser(1193466151)
async handleJinhui(c: Lagrange.PrivateUserInvokeContext) {
console.log('raw message:' + c.message.raw_message);
}
}

View File

@ -1,95 +0,0 @@
import assert from 'assert';
import type * as Lagrange from './type';
type PrivateUserInvoker = (context: Lagrange.PrivateUserInvokeContext) => Lagrange.Thenable<undefined | void | string | Lagrange.Send.Default>;
type GroupUserInvoker = (context: Lagrange.GroupUserInvokeContext) => Lagrange.Thenable<undefined | void | string | Lagrange.Send.Default>;
type MessageInvoker = PrivateUserInvoker | GroupUserInvoker;
interface CustomDescriptor<T extends MessageInvoker> {
value?: T;
configurable?: boolean;
enumerable?: boolean;
writable?: boolean;
get?(): any;
set?(v: any): void;
}
interface MessageInvokerStorage<T extends MessageInvoker> {
invoker: T;
config?: Partial<Lagrange.CommonMessage>
}
class LagrangeMapper {
private _privateUserStorage: Map<number, MessageInvokerStorage<PrivateUserInvoker>>;
private _groupStorage: Map<number, MessageInvokerStorage<GroupUserInvoker>>;
constructor() {
this._privateUserStorage = new Map<number, MessageInvokerStorage<PrivateUserInvoker>>();
this._groupStorage = new Map<number, MessageInvokerStorage<GroupUserInvoker>>();
}
get privateUserStorage() {
return this._privateUserStorage;
}
get groupStorage() {
return this._groupStorage;
}
public resolvePrivateUser(message: Lagrange.PrivateMessage, send: Lagrange.SendApi) {
const user_id = message.user_id;
const userStorage = this._privateUserStorage.get(user_id);
console.log(user_id);
console.log(userStorage);
if (userStorage) {
userStorage.invoker({ message, send });
}
}
public resolveGroup(message: Lagrange.GroupMessage, send: Lagrange.SendApi) {
const group_id = message.group_id;
const groupStorage = this._groupStorage.get(group_id);
if (groupStorage) {
groupStorage.invoker({ message, send });
}
}
public onPrivateUser(user_id: number) {
const _this = this;
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<PrivateUserInvoker>) {
if (_this._privateUserStorage.has(user_id)) {
console.warn(`${propertyKey} -> 用户 ${user_id} 已经被注册过了,该操作将覆盖原本的!`);
}
const invoker = descriptor.value;
_this._privateUserStorage.set(user_id, { invoker });
}
}
public onGroupUser(config: Partial<Lagrange.CommonMessage>) {
assert(config.user_id, 'onGroupUser 中 user_id 不能为空');
assert(config.group_id, 'onGroupUser 中 group_id 不能为空');
const _this = this;
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<GroupUserInvoker>) {
}
}
public onGroup(config: Partial<Lagrange.CommonMessage>) {
assert(config.group_id, 'onGroup 中 group_id 不能为空');
const _this = this;
return function(target: any, propertyKey: string, descriptor: CustomDescriptor<GroupUserInvoker>) {
const group_id = config.group_id;
if (_this.groupStorage.has(group_id)) {
console.warn(`${propertyKey} -> 群 ${group_id} 已经被注册过了,该操作将覆盖原本的!`);
}
const invoker = descriptor.value;
_this.groupStorage.set(group_id, { invoker, config });
}
}
}
const lagMapper = new LagrangeMapper();
export default lagMapper;

View File

@ -1,14 +1,24 @@
import * as fs from 'fs'; import * as fs from 'fs';
import lagServer from './context'; import { server } from 'lagrange.onebot';
import './impl'; import './test';
import './digital-ide';
const buffer = fs.readFileSync('./app/publish/appsettings.json', 'utf-8'); const buffer = fs.readFileSync('./app/publish/appsettings.json', 'utf-8');
const config = JSON.parse(buffer); const config = JSON.parse(buffer);
const impl = config.Implementations[0]; const impl = config.Implementations[0];
lagServer.run({ server.onMounted(c => {
c.sendPrivateMsg(1193466151, '成功上线');
});
server.onUnmounted(c => {
c.sendPrivateMsg(1193466151, '成功下线');
});
server.run({
host: impl.Host, host: impl.Host,
port: impl.Port, port: impl.Port,
path: impl.Suffix path: impl.Suffix,
qq: 1542544558
}); });

View File

@ -1,32 +1,27 @@
import * as Lagrange from './type'; import { plugins, LagrangeContext, Message } from 'lagrange.onebot';
export class Impl {
class Plugins { @plugins.register('wget-image')
registeredPlugins: Map<string, Function>; async wgetImage(c: LagrangeContext<Message>) {
constructor() { // 判断一下,只解析 message 类型的数据
if (c.message.post_type === 'message') {
} const text = c.message.raw_message;
if (text.startsWith('\\wget-image')) {
public register() { const url = text.substring('\\wget-image'.length).trim();
return function(target: any, propertyKey: string, descriptor: PropertyDecorator) { c.sendMessage([
{
type: 'image',
data: {
file: url,
timeout: 10000
}
}
]);
// 插件中使用 finishSession 会让被装饰的事务函数不再被执行,直接结束对话
c.finishSession();
}
} }
} }
public use(name: string) {
return function(target: any, propertyKey: string, descriptor: PropertyDecorator) {
}
}
}
const plugins = new Plugins();
export default plugins;
class Impl {
echo(message: Lagrange.CommonMessage) {
}
} }

28
bot/test.ts Normal file
View File

@ -0,0 +1,28 @@
import './plugins';
import { mapper, plugins, LagrangeContext, PrivateMessage, GroupMessage, Send } from 'lagrange.onebot'
import { apiQueryVecdb } from './api/vecdb';
export class Impl {
@mapper.onPrivateUser(1193466151)
@plugins.use('echo')
@plugins.use('pm')
@plugins.use('wget-image')
async handleJinhui(c: LagrangeContext<PrivateMessage>) {
c.sendMessage([{
type: 'image',
data: {
file: 'file:///data/zhelonghuang/project/rag-llm/images/bird.png',
timeout: 10000
}
}])
c.finishSession();
}
@mapper.onGroup(956419963, { at: false })
async handleTestGroup(c: LagrangeContext<GroupMessage>) {
console.log(c.message.message);
console.log(c.message.raw_message);
}
}

View File

@ -1,542 +0,0 @@
/**
* @author
* @email 1193466151@qq.com
* @description Lagrange.Core
* @comment
* - https://github.com/botuniverse/onebot-11/blob/master/api/public.md
* - https://docs.go-cqhttp.org/reference/data_struct.html
*/
export interface HeartBeatStatus {
app_initialized: boolean,
app_enabled: boolean,
app_good: boolean,
online: boolean,
good: boolean
}
export type MetaEventType = 'heartbeat' | 'lifecycle';
export interface HeartBeatMessage {
interval: number,
status: HeartBeatStatus,
meta_event_type: 'heartbeat',
time: number,
self_id: number,
post_type: 'meta_event'
}
export interface Sender {
user_id: number,
nickname: string,
sex: 'unknown' | 'male' | 'female',
card?: string,
age?: number,
area?: string,
level?: string, // 群聊等级,但是是 string
role?: string,
title?: string
}
// 参考文档: https://github.com/botuniverse/onebot-11/blob/master/message/segment.md
export namespace Receive {
export interface Text {
type: 'text',
data: {
text: string
}
}
export interface Face {
type: 'face',
data: {
id: string
}
}
export interface Image {
type: 'image',
data: {
file: string,
url: string,
// 在简略窗口可以看到的信息,对于图片来说,这就是 [图片]
summary: string
}
}
export interface Audio {
type: 'record',
data: {
file: string,
magic: 0 | 1,
url: string
}
}
export interface Video {
type: 'video',
data: {
file: string,
url: string
}
}
export interface At {
type: 'at',
data: {
qq: string
}
}
// 猜拳魔法表情
export interface FingerGuess {
type: 'rps',
data: {}
}
// 掷骰子魔法表情
export interface Dice {
type: 'dice',
data: {}
}
// 窗口抖动(戳一戳)
export interface WindowJitter {
type: 'shake',
data: {}
}
// 戳一戳
export interface Poke {
type: 'poke',
data: {
type: string,
id: string,
name: string
}
}
export interface Link {
type: 'share',
data: {
// URL
url: string,
// 标题
title: string,
// 发送时可选,内容描述
content?: string,
// 发送时可选,图片 URL
image?: string
}
}
export interface RecommendFriend {
type: 'contact',
data: {
type: 'qq',
// 被推荐人的 QQ 号
id: string
}
}
export interface RecommendGroup {
type: 'contact',
data: {
type: 'group',
// 被推荐群的群号
id: string
}
}
export interface Location {
type: 'location',
data: {
// 纬度
lat: string,
// 经度
lon: string,
// 发送时可选,标题
title?: string,
// 发送时可选,内容描述
content?: string
}
}
export interface Reply {
type: 'reply',
data: {
id: string
}
}
export interface Forward {
type: 'forward',
data: {
id: string
}
}
export interface XML {
type: 'xml',
data: {
// XML 内容
data: string
}
}
export interface JSON {
type: 'json',
data: {
data: string
}
}
export type Default = Text | Face | Image | Audio | Video | At | FingerGuess | Dice | WindowJitter | Poke | Link | RecommendFriend | RecommendGroup | Location | Reply | Forward | XML | JSON;
}
export namespace Send {
export interface Text {
type: 'text',
data: {
text: string
}
}
export interface Face {
type: 'face',
data: {
id: string
}
}
export interface Image {
type: 'image',
data: {
/**
* file
* 1. file:///C:\\Users\Richard\Pictures\1.png
* 2. URL http://i1.piimg.com/567571/fdd6e7b6d93f1ef0.jpg
* Base64 base64://iVBORw0KGgoAAAANSUhEUgAAABQAAAAVCAIAAADJt1n/AAAAKElEQVQ4EWPk5+RmIBcwkasRpG9UM4mhNxpgowFGMARGEwnBIEJVAAAdBgBNAZf+QAAAAABJRU5ErkJggg==
*/
file: string,
// 只在通过网络 URL 发送时有效,表示是否使用已缓存的文件,默认 1
cache: 0 | 1,
// 只在通过网络 URL 发送时有效,表示是否通过代理下载文件(需通过环境变量或配置文件配置代理),默认 1
proxy: 0 | 1,
// 只在通过网络 URL 发送时有效,单位秒,表示下载网络文件的超时时间,默认不超时
timeout: number
}
}
export interface Audio {
type: 'record',
data: {
file: string,
magic: 0 | 1,
cache: 0 | 1,
proxy: 0 | 1,
timeout: number
}
}
export interface Video {
type: 'video',
data: {
file: string,
cache: 0 | 1,
proxy: 0 | 1,
timeout: number
}
}
export interface At {
type: 'at',
data: {
qq: string
}
}
export interface FingerGuess {
type: 'rps',
data: {}
}
export interface Dice {
type: 'dice',
data: {}
}
export interface WindowJitter {
type: 'shake',
data: {}
}
// 戳一戳
export interface Poke {
type: 'poke',
data: {
type: string,
id: string,
}
}
export interface Anonymous {
type: 'anonymous',
data: {}
}
export interface Link {
type: 'share',
data: {
// URL
url: string,
// 标题
title: string,
// 发送时可选,内容描述
content?: string,
// 发送时可选,图片 URL
image?: string
}
}
export interface RecommendFriend {
type: 'contact',
data: {
type: 'qq',
// 被推荐人的 QQ 号
id: string
}
}
export interface RecommendGroup {
type: 'contact',
data: {
type: 'group',
// 被推荐群的群号
id: string
}
}
export interface Location {
type: 'location',
data: {
// 纬度
lat: string,
// 经度
lon: string,
// 发送时可选,标题
title?: string,
// 发送时可选,内容描述
content?: string
}
}
export interface MusicShare {
type: 'music',
data: {
// 分别表示使用 QQ 音乐、网易云音乐、虾米音乐
type: 'qq' | '163' | 'xm',
// 歌曲 ID
id: string
}
}
export interface CustomMusicShare {
type: 'music',
data: {
type: 'custom',
url: string,
audio: string,
title: string,
content: string,
image: string
}
}
export interface Reply {
type: 'reply',
data: {
id: string
}
}
export interface ForwardNode {
type: 'node',
data: {
id: string
}
}
export interface XML {
type: 'xml',
data: {
// XML 内容
data: string
}
}
export interface JSON {
type: 'json',
data: {
data: string
}
}
export type Default = Text | Face | Image | Audio | Video | At | FingerGuess | Dice | WindowJitter | Poke | Anonymous | Link | RecommendFriend | RecommendGroup | Location | MusicShare | CustomMusicShare | Reply | ForwardNode | XML | JSON;
}
export interface MsgFile {
// 一般是 ''
id: string,
// 文件名
name: string,
// 文件大小,单位:字节
size: number,
// id
busid: number,
// 链接 IPv4
url: string
}
export interface MetaMessage {
post_type: 'meta_event',
[msg: string]: any
}
export interface CommonMessage {
// 事件类型
post_type: 'message',
// 信息来自私聊还是群聊
message_type?: 'private' | 'group',
// 发送信息的是朋友还是群友/陌生人
sub_type?: 'friend' | 'normal',
// 消息的编号
message_id?: number,
// 群号
group_id?: number,
// 发消息的人的 QQ 号
user_id: number,
// 是否为匿名发言,一般都是 null
anonymous?: null | boolean,
// 消息内容(结构化)
message?: Receive.Default,
// 消息内容(纯文本)
raw_message?: string,
// 发送的时间戳
time: number,
// 自己的 id
self_id: number,
// 发送的文件
// 默认字体大小,一般都是 0
font?: number
}
export interface PrivateMessage {
// 事件类型
post_type: 'message',
// 信息来自私聊还是群聊
message_type: 'private',
// 消息的编号
message_id: number,
// 发消息的人的 QQ 号
user_id: number,
// 消息内容(结构化)
message: Receive.Default,
// 消息内容(纯文本)
raw_message: string,
// 发送的时间戳
time: number,
// 自己的 id
self_id: number,
// 默认字体大小,一般都是 0
font?: number
}
export interface GroupMessage {
// 事件类型
post_type: 'message',
// 信息来自私聊还是群聊
message_type: 'group',
// 发送信息的是朋友还是群友/陌生人
sub_type: 'friend' | 'normal',
// 消息的编号
message_id: number,
// 群号
group_id: number,
// 发消息的人的 QQ 号
user_id: number,
// 是否为匿名发言,一般都是 null
anonymous: null | boolean,
// 消息内容(结构化)
message: Receive.Default,
// 消息内容(纯文本)
raw_message: string,
// 发送的时间戳
time: number,
// 自己的 id
self_id: number,
// 发送的文件
// 默认字体大小,一般都是 0
font?: number
}
export interface FileMessage {
post_type: 'notice',
user_id: number,
file: MsgFile,
notice_type?: 'offline_file',
time: number,
self_id: number
}
// 加群或者加好友
export interface AddMessage {
post_type: 'request',
sub_type: 'add',
user_id: number,
group_id: number,
// 默认为 0 代表没有邀请者
invitor_id: number,
request_type: 'private' | 'group',
// 群问题和申请者的回答
comment: string,
flag: string,
time: number,
self_id: number,
}
// 同意
export interface ApproveMessage {
post_type: 'notice',
sub_type: 'approve',
group_id: number,
operator_id: number,
user_id: number,
notice_type: 'group_increase',
time: number,
self_id: number,
}
export type Message = MetaMessage | PrivateMessage | GroupMessage | FileMessage | AddMessage | ApproveMessage;
export type MessagePostType = PrivateMessage | GroupMessage;
export type NoticePostType = FileMessage | ApproveMessage;
export type RequestPostType = AddMessage;
export type Thenable<T> = T | Promise<T>;
export type SendApi = (msg: string | Send.Default[]) => Thenable<void | Error>;
export interface InvokerContext<M = Message> {
message: M,
send: SendApi
}
export type PrivateUserInvokeContext = InvokerContext<PrivateMessage>;
export type GroupUserInvokeContext = InvokerContext<GroupMessage>;

View File

@ -0,0 +1,2 @@
addr: 127.0.0.1
port: 8082

50
config/story.yml Normal file
View File

@ -0,0 +1,50 @@
schema:
root:
name: root
children:
- name: usage
description: 使用查询
children:
- name: bug
description: bug 查询
children:
- name: command
description: 指令
children:
- name: others
description: 其他
children:
stories:
- message: 请问 property.json 如何配置?
intent: usage
- message: 我的自动补全无法使用是不是有bug
intent: bug
- message: 帮我上传一下这份数据
intent: command
- message: surface了解一下
intent: others
- message: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了
intent: usage
- message: 这群要被chisel夺舍了吗
intent: others
- message: Metals一开直接报错
intent: others
- message: 话说digital-ide打开大的verilog卡死了
intent: bug
- message: 请问一下第一次点击对文件仿真可以出波形文件再次点击的时候就会提示unknown module type了。是哪个配置没配置好
intent: usage
- message: 怎么调整是哪个版本的vivado来构建工程呢
intent: usage
- message: 咱们这个插件win7的vscode是不是只能用很早之前的版本
intent: usage
- message: 帮我将这份数据保存到服务器上
intent: command
- message: 他这个意思是 单个功耗很低 但是功耗低那肯定性能就寄 频率肯定不高 靠人多
intent: others
rejects:
- metal
- metals
- idea

281
notebook/experiment.ipynb Normal file

File diff suppressed because one or more lines are too long

331
notebook/github-issue.ipynb Normal file
View File

@ -0,0 +1,331 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import requests as r\n",
"from bs4 import BeautifulSoup, Tag\n",
"import json\n",
"import os\n",
"from threading import Thread\n",
"from urllib.parse import urlparse"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def download_worker(media_url, save_path):\n",
" res = r.get(media_url)\n",
" with open(save_path, 'wb') as fp:\n",
" fp.write(res.content)\n",
" print('[crawler] 图像已经保存至', save_path)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def crawler_issue(issue_id: str):\n",
" folder = 'issue-' + str(issue_id)\n",
" os.makedirs('../docs/digital-issue/' + folder, exist_ok=True)\n",
" res = r.get('https://github.com/Digital-EDA/Digital-IDE/issues/' + str(issue_id))\n",
" soup = BeautifulSoup(res.text, 'html.parser')\n",
"\n",
" texts = []\n",
" for td in soup.find_all('td', attrs={ 'class': 'js-comment-body' }):\n",
" if isinstance(td, Tag):\n",
" for p in td.find_all('p'):\n",
" if isinstance(p, Tag):\n",
" text = p.text\n",
" texts.append(text)\n",
" for a in p.find_all('a'):\n",
" href = a.attrs['href']\n",
" if href and '.png' in href:\n",
" urlp = urlparse(href)\n",
" name = urlp.path.split('/')[-1]\n",
" save_path = '../docs/digital-issue/' + folder + '/' + name\n",
" t = Thread(target=download_worker, args=(href, save_path))\n",
" t.start()\n",
"\n",
" text = '\\n'.join(texts)\n",
" with open('../docs/digital-issue/{}/issue.md'.format(folder), 'w', encoding='utf-8') as fp:\n",
" fp.write(text) "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n"
]
}
],
"source": [
"crawler_issue(67)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def crawler_issue_page(page_url: str):\n",
" res = r.get(page_url)\n",
" if res.status_code != 200:\n",
" print('page url {} return 404'.format(page_url))\n",
" return\n",
" html = res.text\n",
" soup = BeautifulSoup(html, 'html.parser')\n",
" issue_container = soup.find('div', { 'class': 'js-navigation-container js-active-navigation-container' })\n",
" issue_ids = []\n",
" for div in issue_container.children:\n",
" if isinstance(div, Tag):\n",
" id = div.attrs['id'].split('_')[-1]\n",
" issue_ids.append(int(id))\n",
" \n",
" for issue_id in issue_ids:\n",
" print('爬取 issue-{} 中 ...'.format(issue_id))\n",
" crawler_issue(issue_id)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"爬取 issue-71 中 ...\n",
"爬取 issue-70 中 ...\n",
"爬取 issue-69 中 ...\n",
"爬取 issue-68 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220146-9deeccbf-cc0e-4810-bdd9-80e11d083c15.png\n",
"爬取 issue-67 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220242-9abe8b7b-5985-4c1a-ac0f-30aba75ef8d2.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331219978-d22a5a5d-da00-430c-b966-68517ab264c0.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220080-b0c5f0af-e38c-4819-9efa-7491650ddb92.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-69/331220203-3bb8aefd-e04a-4eb7-ae87-ca48f1daa120.png\n",
"爬取 issue-66 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-67/331660157-920d7143-f262-42d5-af57-a817bf3aee01.png\n",
"爬取 issue-65 中 ...\n",
"爬取 issue-64 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-66/330112198-fb783018-b217-4cab-afef-32d339c4047a.png\n",
"爬取 issue-63 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328184842-2e13483e-4ece-4eb6-8c8a-3d9c92a97651.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185054-e08c66b2-7e87-4238-88cb-e0672b2de530.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-65/328185191-305e8b86-a9de-434b-a1cf-80c441c51df2.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033289-dc79968e-8279-43aa-b6a1-a6f1acd4155f.png\n",
"爬取 issue-62 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-64/327033187-1b9134e0-387e-491d-a478-3ea6438728a4.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-63/326496750-53a0c779-8a4b-418d-b21c-4ea2151edd92.png\n",
"爬取 issue-61 中 ...\n",
"爬取 issue-60 中 ...\n",
"爬取 issue-55 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-61/325593801-bd5c4229-f47a-4d6e-99a1-0cc912378f0e.png\n",
"爬取 issue-54 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-55/316435757-14c497a5-6ecc-4f97-850b-7e13988ec7aa.png\n",
"爬取 issue-53 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-54/306546248-83a57abe-63ff-43ae-8140-5e2b284193f0.png\n",
"爬取 issue-52 中 ...\n",
"爬取 issue-51 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-53/305870587-69d44e21-edfc-4fc0-9ad7-daaec393caac.png\n",
"爬取 issue-50 中 ...\n",
"爬取 issue-49 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-51/302262263-3e5581e0-4e36-463b-9379-43d1f9e366b8.png\n",
"爬取 issue-48 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495384-94077fee-624f-48cc-98fd-d6e6fe16251b.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495541-23556932-5526-4428-b1c2-25c840352422.png\n",
"爬取 issue-47 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-49/300495316-f8c98f42-a320-4c4e-84f7-2aaab4fb39f1.png\n",
"爬取 issue-46 中 ...\n",
"爬取 issue-45 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358431-92b4f252-91a9-4326-ae14-9d21037d3478.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-46/294358432-2bbec170-1400-49c3-a30e-a0acc4bf3f66.png\n",
"爬取 issue-44 中 ...\n",
"爬取 issue-43 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602119-bd06d356-3356-45a8-8556-b9b60fdb337b.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665721-e4730448-1588-424c-9a98-c661dfb5237d.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665609-f6bf03cd-857b-4156-8795-6e41416d96e4.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335599172-7ea754a3-1dea-428f-baf4-e04c400e2744.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665655-3341f355-0ca4-4757-9814-5702515922e7.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/292665566-dc8dd944-f464-4606-8900-4562cdf404c7.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335601886-3731689f-7ddb-4d4b-9e73-9e1a631e403f.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-43/335602049-bade0794-1653-42f7-8816-d3f3484009e3.png\n",
"爬取 issue-42 中 ...\n",
"爬取 issue-41 中 ...\n",
"爬取 issue-40 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-42/292425880-bc7f4792-6c68-45e0-862f-23b7b1232dce.png\n",
"爬取 issue-39 中 ...\n",
"爬取 issue-38 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-39/291777353-268ce49b-78c4-46a8-b543-542db0ef6dcc.png\n",
"爬取 issue-37 中 ...\n",
"爬取 issue-36 中 ...\n",
"爬取 issue-35 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-37/291228777-6d65928c-5ffd-4aab-af19-03291f31473a.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267690-67d01501-ab54-4274-8425-e876b7035391.png\n",
"爬取 issue-34 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267797-b00bdf44-c648-47d3-9bd9-eb1e68b12193.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268221-1c380a09-76b5-45a6-aff3-d8a873868402.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268654-d8a5686c-c109-461b-b68a-ff00bcd9f462.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268318-27f79c67-8e8d-419a-a0b7-e744b416b704.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268113-20044b19-d508-47ea-9f07-f675bc72a2cb.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291267929-d697e859-8ad0-4cc3-aa15-e50d0a26dc53.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268011-69f0a3ef-d509-47c9-b949-36d280edc4f8.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268429-e0517629-7b5a-4751-a431-330f04d8c1ee.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291268509-e9c687a4-2b28-46f1-8670-827359df792e.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225163-78241e4c-400a-4d75-a008-3c34ca26ae4a.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-36/291225347-63167ad5-6896-4afe-a6d7-197532a23f8f.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-35/291224673-a3f70f16-271c-4905-b1f0-1c011b56d3bf.png\n",
"爬取 issue-33 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
"爬取 issue-32 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291310957-8b8f17a0-ec66-4009-9657-2433d51319c8.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-34/291140726-cba47a2d-dd99-49e5-b302-733e266d0c44.png\n",
"爬取 issue-31 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024353-47c9297c-6160-402a-b3bf-e08bd9c923ea.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-32/290024305-41e868d5-e737-4f1d-93af-558db2abba91.png\n",
"爬取 issue-30 中 ...\n",
"爬取 issue-29 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-31/290022938-62774f96-82ac-46f4-9599-818a6a430cd9.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022151-dc253b33-fbfe-4f9c-8023-e00e180015d6.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-30/290022070-0e449a39-5360-474e-bc18-1c1729071f66.png\n",
"爬取 issue-28 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
"爬取 issue-27 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290020867-53b8d764-f8f8-41ab-8262-0ac17702fa42.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-29/290018445-3d43470c-5b8b-4466-b8c2-800694771143.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112763-1dfaa7f2-f52b-42b5-ba40-c47c16205265.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-28/288112141-a1c08800-b6dd-4215-9ecf-288759cc0174.png\n",
"爬取 issue-26 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287923928-5e817c41-c54d-409c-be36-576efb0a299a.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-27/287924096-490bef45-fc31-4ffc-a3a5-d077c350ff88.png\n",
"爬取 issue-25 中 ...\n",
"爬取 issue-24 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282802849-27fc9f7a-f7f5-4b8d-84e5-1060166b0ad7.png\n",
"爬取 issue-23 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-25/282800215-946fad7c-54ae-46c9-be9a-d5a69b4fbf7b.png\n",
"爬取 issue-22 中 ...\n",
"爬取 issue-21 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-22/275505102-406e3256-7a3e-4deb-9456-2a49b41ca85d.png\n",
"爬取 issue-20 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/279390992-4a04af63-c176-49a8-a60e-5c3e95c07f8b.png\n",
"爬取 issue-19 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-21/275300475-444cf824-5489-461d-9678-440901554f68.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173022-80623e60-fba6-4f4c-85eb-5fb542ba8170.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173043-927f355a-37b5-45f5-bd88-78317549bf54.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-20/268173050-b186e855-4ad3-44c0-8708-59b11c5506a7.png\n",
"爬取 issue-17 中 ...\n",
"爬取 issue-16 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-17/264944498-f609f333-53a1-40f3-8bd5-a320b21398df.png\n",
"爬取 issue-15 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-16/264304291-1fbb69c3-02fa-4d50-9dec-cc6da46c1dd2.png\n",
"爬取 issue-14 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302335-b7b9d42e-aa16-474d-8c49-5573e397c374.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-15/264302196-e355b398-1ba7-4b7e-aa0b-b1d67646182a.png\n",
"爬取 issue-13 中 ...\n",
"爬取 issue-12 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-14/263748729-d0d2d005-019b-404f-a720-8f75b19a52ba.png\n",
"爬取 issue-11 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475138-92d989d2-2b5e-432c-bfde-8bd8f3524b6e.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/263475180-595e8d91-2645-47bf-a4db-24aad89d12ae.png\n",
"爬取 issue-10 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-12/289706401-d79d32f8-5738-4088-bc92-74e19da24885.png\n",
"爬取 issue-9 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-11/263142498-1af4cb41-c431-4de0-9d26-65729d3dfe65.png\n",
"爬取 issue-8 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-10/263140095-8d3beafa-ad35-405d-bcf7-3964853174b2.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163518-42173b79-b7b3-41c3-8860-1007f140fe86.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163571-1caa7264-3702-4467-9986-49e0557b0edc.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-9/261163490-45ab4e0e-7175-4a65-9a70-4e51b4c1366a.png\n",
"爬取 issue-7 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-8/260685640-0c9db5e7-ae1e-4558-b3f7-72ebb4f67043.png\n",
"爬取 issue-6 中 ...\n",
"爬取 issue-5 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630437-8d34c4a0-cc48-44b5-bbb8-94742c2e0776.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-7/260630481-0cbc73cf-f516-4b3d-92f5-17598f089297.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976429-8ad21695-2397-4a79-8fab-43fa01da5e24.png\n",
"爬取 issue-4 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260248721-a56ad686-d1ac-4fa4-9fe7-fb9007f7a1e3.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976463-9b61e743-536e-4d53-af74-f8015b104a36.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/256976410-22019e06-df93-48b1-93a6-05901197b277.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-6/260716833-f3d89a67-7b4f-4daa-9a0a-8313dcf9caaa.png\n",
"爬取 issue-3 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255643011-49b2efa2-09f4-463e-908b-4510d2110429.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/260307745-b545d146-a49b-4ebf-af88-ce3982a2e0ff.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-5/255646125-b5035137-6df5-4189-95c2-199970dfbe8d.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386114-73e4b52e-3eee-4652-971e-4bf123d6c9aa.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386123-e5e990fa-af1f-439b-bb74-aa20af50366c.png\n",
"爬取 issue-2 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386053-b536d9d7-fe3b-4c83-a581-0884e3cf04f6.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-4/255386339-56413f1c-bb32-49c0-aa85-dceeceb8594a.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255608587-cd487ee5-95be-47a5-90d4-5f02e0a94cc2.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255603702-8379ccc3-4d7a-407f-8777-aba9666e7c58.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107703-9e01db47-6c15-4d41-b823-a1896be68af7.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/260107481-5c0127b4-2273-4a14-b996-2d109a947a5e.png\n",
"爬取 issue-1 中 ...\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-3/255342825-dae63d0c-05b8-4965-b2e0-19df84778a5e.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/253884964-cbaf42fa-fa7d-48ed-8353-184dd0895a12.png\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/255247553-45d154cc-37d1-459d-80d3-adad6324de4c.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-2/256974788-7c2e9bf0-d239-4022-9aac-f8f160afad6f.png\n",
"[crawler] 图像已经保存至 ../docs/digital-issue/issue-1/253879145-d8f82699-aca6-44aa-bb1c-57066cf39f66.png\n"
]
}
],
"source": [
"page_urls = [\n",
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=1&q=',\n",
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=2&q=',\n",
" 'https://github.com/Digital-EDA/Digital-IDE/issues?page=3&q='\n",
"]\n",
"\n",
"for url in page_urls:\n",
" crawler_issue_page(url)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -19,15 +19,15 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 29, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"整理得到 238 个文档\n", "整理得到 304 个文档\n",
"分块得到 1206 个文档\n", "分块得到 1273 个文档\n",
"数据库已存储到 blog-vecdb 中\n" "数据库已存储到 blog-vecdb 中\n"
] ]
} }
@ -41,7 +41,7 @@
" db = FAISS.load_local(db_persistent_dir, embedding, allow_dangerous_deserialization=True)\n", " db = FAISS.load_local(db_persistent_dir, embedding, allow_dangerous_deserialization=True)\n",
" print('成功从 {} 中提取数据'.format(db_persistent_dir))\n", " print('成功从 {} 中提取数据'.format(db_persistent_dir))\n",
"else:\n", "else:\n",
" loader = DirectoryLoader('./docs', glob='**/*.md')\n", " loader = DirectoryLoader('../docs', glob='**/*.md')\n",
" docs = loader.load()\n", " docs = loader.load()\n",
" print('整理得到 {} 个文档'.format(len(docs)))\n", " print('整理得到 {} 个文档'.format(len(docs)))\n",
"\n", "\n",
@ -60,21 +60,21 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[(Document(page_content='完整的 VCD 语法,可以参考中科大资源网站上的 IEEE 1364 标准 第 18 章的内容。\\n\\n基本信息\\n\\n一个 vcd 文件会描述波形产生的基本信息,包括,日期,版本,单位时间,注释等等。\\n\\n日期 date\\n\\n$date\\n Sat Apr 20 20:06:14 2024\\n$end\\n\\n与 verilog 类似,$end$ 是一切 scope 的结束符号,你也可以写成这样:\\n\\n$date Sat Apr 20 20:06:14 2024 $end\\n\\n版本 version\\n\\n$version\\n Icarus Verilog\\n$end\\n\\n时间单位 timescale\\n\\n$timescale\\n 1ns\\n$end\\n\\n注释 comment\\n\\n$comment\\n Show the parameter values.\\n$end\\n\\n变量申明\\n\\nvcd 中通过如下的方式申明一个 module 内的变量,,每条的格式为 $var type bitwidth id name比如\\n\\n$scope module ID_EX $end\\n$var wire 2 ! AluOp [1:0] $end\\n$var wire 1 \" AluSrc $end\\n$var wire 1 # MemRead $end\\n$upscope $end\\n\\n以上语句申明了一个模块 ID_EX 和内部的三个信号 AluOpAluSrc 和 MemRead。这三个变量在接下来的 vcd 描述中会被 !\"# 替代,这也被称为 vcd 描述中,变量的 id.\\n\\n单个 module 的变量申明通过 $upscope $end 结束。\\n\\n所有 module 的变量申明通过 $enddefinitions $end 结束。\\n\\nparameter 赋值', metadata={'source': 'docs/224.md', 'start_index': 0}),\n", "[(Document(page_content='完整的 VCD 语法,可以参考中科大资源网站上的 IEEE 1364 标准 第 18 章的内容。\\n\\n基本信息\\n\\n一个 vcd 文件会描述波形产生的基本信息,包括,日期,版本,单位时间,注释等等。\\n\\n日期 date\\n\\n$date\\n Sat Apr 20 20:06:14 2024\\n$end\\n\\n与 verilog 类似,$end$ 是一切 scope 的结束符号,你也可以写成这样:\\n\\n$date Sat Apr 20 20:06:14 2024 $end\\n\\n版本 version\\n\\n$version\\n Icarus Verilog\\n$end\\n\\n时间单位 timescale\\n\\n$timescale\\n 1ns\\n$end\\n\\n注释 comment\\n\\n$comment\\n Show the parameter values.\\n$end\\n\\n变量申明\\n\\nvcd 中通过如下的方式申明一个 module 内的变量,,每条的格式为 $var type bitwidth id name比如\\n\\n$scope module ID_EX $end\\n$var wire 2 ! AluOp [1:0] $end\\n$var wire 1 \" AluSrc $end\\n$var wire 1 # MemRead $end\\n$upscope $end\\n\\n以上语句申明了一个模块 ID_EX 和内部的三个信号 AluOpAluSrc 和 MemRead。这三个变量在接下来的 vcd 描述中会被 !\"# 替代,这也被称为 vcd 描述中,变量的 id.\\n\\n单个 module 的变量申明通过 $upscope $end 结束。\\n\\n所有 module 的变量申明通过 $enddefinitions $end 结束。\\n\\nparameter 赋值', metadata={'source': '../docs/kirigaya.cn/224.md', 'start_index': 0}),\n",
" 0.4351002),\n", " 0.4351002),\n",
" (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型可参考https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式可以用于进行DIDE的vcd可视化的二次开发目前发现为了实现vcd文件基本的仓库主要是下面这两个\\n\\nJS层与一个简单的前端https://github.com/wavedrom/vcdrom\\n\\nwasm 解析https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑并不适合进行修改因此需要修改一部分代码。\\n\\n整合项目在 https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': 'docs/72.md', 'start_index': 0}),\n", " (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型可参考https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式可以用于进行DIDE的vcd可视化的二次开发目前发现为了实现vcd文件基本的仓库主要是下面这两个\\n\\nJS层与一个简单的前端https://github.com/wavedrom/vcdrom\\n\\nwasm 解析https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑并不适合进行修改因此需要修改一部分代码。\\n\\n整合项目在 https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': '../docs/kirigaya.cn/72.md', 'start_index': 0}),\n",
" 0.55531096),\n", " 0.55531096),\n",
" (Document(page_content='cap.release()\\nout.release()\\ncv2.destroyAllWindows()\\n```\\n\\n其中FORMAT代表目标视频的编码格式目前我还遇到了很多bug已经确定的FORMAT和编码关系的对照表如下\\n\\n目标视频编码格式 FORMAT 取值 mp4 \"mp4v\" aiv \"MJPG\"\\n\\n参考\\n\\n[1] python opencv写视频——cv2.VideoWriter()_翟羽嚄的博客-CSDN博客_cv2.videowriter', metadata={'source': 'docs/21.md', 'start_index': 711}),\n", " (Document(page_content='cap.release()\\nout.release()\\ncv2.destroyAllWindows()\\n```\\n\\n其中FORMAT代表目标视频的编码格式目前我还遇到了很多bug已经确定的FORMAT和编码关系的对照表如下\\n\\n目标视频编码格式 FORMAT 取值 mp4 \"mp4v\" aiv \"MJPG\"\\n\\n参考\\n\\n[1] python opencv写视频——cv2.VideoWriter()_翟羽嚄的博客-CSDN博客_cv2.videowriter', metadata={'source': '../docs/kirigaya.cn/21.md', 'start_index': 711}),\n",
" 0.71963197)]" " 0.71963197)]"
] ]
}, },
"execution_count": 11, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -88,10 +88,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
"source": [] {
"data": {
"text/plain": [
"[(Document(page_content='除了b站的两个教程外建议多出文档以及示例工程一个刚接触FPGA但有不熟悉vivado的萌新就指望这个加快学习进度了\\n请问B站教程标题是啥我搜digital-ide啥也搜不到\\n请问B站教程标题是啥我搜digital-ide啥也搜不到\\n《Digital-IDE使用教程(一个DDS的实现)》\\n《Digital-IDE应用之FM调制解调》\\n请问B站教程标题是啥我搜digital-ide啥也搜不到\\n《Digital-IDE使用教程(一个DDS的实现)》 《Digital-IDE应用之FM调制解调》\\n好的谢谢\\n那个教程是好早之前我出的了新的版本一直不稳定今年稳定之后会出后续教程。\\nDigital IDE 的使用教程可以看\\n官方文档为 https://sterben.nitcloud.cn/zh/ 但是目前文档不是很完善。', metadata={'source': '../docs/digital-issue/issue-47/issue.md', 'start_index': 0}),\n",
" 0.64211607),\n",
" (Document(page_content='home: true\\nheroImage: /icon.png\\ndescription: Vscode 平台上的 ASIC & FPGA 开发扩展\\nactionText: 快速开始 🐳\\nactionLink: /zh/guide/introduction\\nfeatures:\\n- title: ✨ HDL 语言支持\\n details: 支持 verilog, vhdl, systemverilog, tcl 脚本等\\n- title: 🎯 项目管理\\n details: 在你的项目中查看结构化的 HDL 文件\\n- title: 🛠️ 额外的工具\\n details: FSM, Netlist, 一键仿真, 文档化,让你的编程体验更加舒坦。\\n\\n::: slot footer\\nMIT Licensed | Copyright © 2018-present Digital-EDA\\n:::', metadata={'source': '../docs/digital-document/index.md', 'start_index': 0}),\n",
" 0.7582667),\n",
" (Document(page_content='TODO\\n\\n找到所有 vcd item 的 type 类型可参考https://pyvcd.readthedocs.io/en/latest/vcd.common.html\\n\\nbug\\n\\n详见飞书文档https://nc-ai-lab.feishu.cn/wiki/Z4AxwU1SdilATAk7GuvcYkIDnwh\\n\\n流程\\n\\n目前需要为Digital-IDE设计一个render用于显示VCD文件。\\n\\nIEEE 1364定义VCD(value change dump)文件是含已选变量(信号)的值变化信息存储文件。\\n\\nwavedrom在GitHub上开发过vcd和hdl的wavedrom脚本格式可以用于进行DIDE的vcd可视化的二次开发目前发现为了实现vcd文件基本的仓库主要是下面这两个\\n\\nJS层与一个简单的前端https://github.com/wavedrom/vcdrom\\n\\nwasm 解析https://github.com/wavedrom/vcd\\n\\n打算基于这两个仓库二次开发一个好用的vcd渲染模块再加入DIDE中。\\n\\n开发思路\\n\\nvcd 的渲染器分为如下几步进行开发。\\n\\nmermaid\\ngraph LR\\na(读取)-->b(渲染)-->c(解析)\\n\\nVCD 读取和解析并不难,难在如何快速安全地读取,对于一些长时间的模拟和仿真而言, vcd 文件可能会非常大。因此vcd 需要分块读取,分块解析,为了避免这些不必要的麻烦,我使用了 https://github.com/wavedrom/vcdrom 这个项目的后端进行修改。\\n\\nwasm 解析器\\n\\n原项目写得过于紧凑并不适合进行修改因此需要修改一部分代码。\\n\\n整合项目在 https://github.com/Digital-EDA/digital-vcd-parser', metadata={'source': '../docs/kirigaya.cn/72.md', 'start_index': 0}),\n",
" 0.83628875)]"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"db.similarity_search_with_score(\n",
" query='digital ide 有什么教程吗',\n",
" k=3\n",
")"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",

3454
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -10,17 +10,30 @@
"scripts": { "scripts": {
"build": "tsc", "build": "tsc",
"start": "serve dist", "start": "serve dist",
"serve": "tsc -w & serve dist" "serve": "tsc -w & serve dist",
"test": "mocha"
}, },
"dependencies": { "dependencies": {
"@ptkdev/logger": "^1.8.0",
"@types/node": "^20.12.12", "@types/node": "^20.12.12",
"axios": "^1.7.2", "axios": "^1.7.2",
"fs": "^0.0.1-security",
"lagrange.onebot": "^1.0.0",
"ws": "^8.17.0", "ws": "^8.17.0",
"yaml": "^2.4.2" "yaml": "^2.4.2"
}, },
"devDependencies": { "devDependencies": {
"@types/ws": "^8.5.10", "@types/ws": "^8.5.10",
"chai": "4.3.4",
"serve": "^14.2.3", "serve": "^14.2.3",
"typescript": "^5.4.5" "typescript": "^5.4.5",
"mocha": "^10.4.0",
"require-uncached": "^2.0.0",
"shelljs": "^0.8.5",
"should": "^13.2.3",
"sinon": "^18.0.0",
"jsverify": "^0.8.4",
"knuth-shuffle": "^1.0.8",
"@sinonjs/referee-sinon": "^12.0.0"
} }
} }

1
prompt/__init__.py Normal file
View File

@ -0,0 +1 @@
from prompt.core import PromptEngine

261
prompt/core.py Normal file
View File

@ -0,0 +1,261 @@
from __future__ import annotations
from dataclasses import dataclass
import warnings
import random
import math
from abc import ABC, abstractmethod
import yaml
import json5
from loguru import logger
logger.add(
sink='./logs/prompt.log',
level='DEBUG',
rotation='00:00',
retention='7 days',
compression='zip',
encoding='utf-8',
enqueue=True,
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
@dataclass(frozen=True)
class IntentNode:
name: str
description: str | None
children: list[IntentNode]
parent: IntentNode | None
stories: list[Story]
@dataclass(frozen=True)
class Story:
message: str
intent: str
class PromptEngine:
path: str
schema: IntentNode | None
stories: list[Story]
rejects: list[str]
intent2id: dict[str, int]
id2intent: dict[int, str]
name2node: dict[str, IntentNode]
def __init__(self, path: str) -> None:
self.path = path
self.config = yaml.load(open(path, 'r', encoding='utf-8'), yaml.Loader)
self.intent2id = {}
self.id2intent = {}
self.name2node = {}
self.schema = self.handle_schema(self.config['schema'])
self.stories = self.handle_stories(self.config['stories'])
self.rejects = self.handle_rejects(self.config['rejects'])
def handle_schema(self, raw_schema: dict) -> IntentNode:
raw_root = raw_schema.get('root', None)
if raw_root is None:
warnings.warn('schema must have a root node as the beginning, otherwise intent recogition will not work')
return None
current_layers: list[tuple[dict, IntentNode | None]] = [(raw_root, None)]
nodes: list[IntentNode] = []
# 层次遍历
while len(current_layers) > 0:
new_current_layers: list[tuple[dict, IntentNode | None]] = []
for raw_node, intent_node in current_layers:
name = raw_node.get('name', None)
children = raw_node.get('children', None)
description = raw_node.get('description', None)
if name is None:
raise NameError('you must specify a name in story item, current item : {}'.format(raw_node))
if children is None:
children = []
if name not in self.intent2id:
assign_id = len(self.intent2id)
self.intent2id[name] = assign_id
self.id2intent[assign_id] = name
node = IntentNode(name, description, [], intent_node, [])
self.name2node[name] = node
nodes.append(node)
if intent_node:
intent_node.children.append(node)
for raw_node in children:
new_current_layers.append((raw_node, node))
current_layers.clear()
current_layers.extend(new_current_layers)
return nodes[0]
def handle_stories(self, raw_stories: list[dict]) -> list[Story]:
stories: list[Story] = []
for pair in raw_stories:
message = pair.get('message', None)
intent = pair.get('intent', None)
if intent not in self.intent2id:
warnings.warn('{} is not the intent you declare in schema, so this pair will be ignored'.format(intent))
continue
if message and intent:
story = Story(message, intent)
node = self.name2node.get(intent)
node.stories.append(story)
stories.append(story)
return stories
def handle_rejects(self, raw_rejects: list[str]) -> list[str]:
rejects = []
for reject in raw_rejects:
rejects.append(reject)
return rejects
def generate_chunk(self, stories: list[Story]) -> tuple[str]:
prompts = []
for story in stories:
prompts.append('Message: ' + story.message.strip())
intent_id = self.intent2id.get(story.intent)
prompts.append('Intent: { id: %s }' % (intent_id))
prompts.pop()
user_content = '\n'.join(prompts) + '\n' + 'Intent: '
assistant_content = '{id : %s}' % (intent_id)
return user_content, assistant_content
def generate_llm_message(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10) -> list[dict]:
if intent is None:
intent = self.schema
story_cache = []
for node in intent.children:
story_cache.extend(node.stories)
random.shuffle(story_cache)
chunk_num = math.ceil(len(story_cache) / chunk_size)
message = []
for chunk_id in range(chunk_num):
start = chunk_id * chunk_size
end = min(len(story_cache), start + chunk_size)
chunk = story_cache[start: end]
user_content, assistant_content = self.generate_chunk(chunk)
message.append({
'role': 'user',
'content': user_content
})
message.append({
'role': 'assistant',
'content': assistant_content
})
if len(message) / 2 >= max_chunk_num:
break
message.append({
'role': 'user',
'content': question + '\nIntent: '
})
# 创建开头的预设
preset = 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.'
intent_preset = ['The intent should be one of the following:']
for node in intent.children:
intent_id = self.intent2id.get(node.name)
intent_preset.append('- {}'.format(intent_id))
intent_preset = '\n'.join(intent_preset)
message[0]['content'] = preset + '\n' + intent_preset + '\n' + message[0]['content']
return message
class TreeIntent(ABC):
path: str
engine: PromptEngine
def __init__(self, path: str) -> None:
self.path = path
self.engine = PromptEngine(path)
@abstractmethod
def call_llm(self, message: list[dict]) -> str:
"""
example of message:
[
{
"role": "user",
"content": "Message: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了\nIntent: "
},
{
"role": "assistant",
"content": "{ id: 0 }"
},
{
"role": "user",
"content": "话说digital-ide打开大的verilog卡死了\nIntent: "
},
{
"role": "assistant",
"content": "{ id: 1 }"
}
]
"""
pass
def purify_json(self, json_string: str):
stack = []
start_index = None
for i, ch in enumerate(json_string):
if ch == '{':
if len(stack) == 0:
start_index = i
stack.append(ch)
elif ch == '}':
stack.pop()
if len(stack) == 0:
return json_string[start_index: i + 1]
else:
pass
return json_string
def try_generate_intent_id(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10, retry: int = 3) -> int | None:
engine = self.engine
for i in range(retry):
try:
message = engine.generate_llm_message(question, intent, chunk_size, max_chunk_num)
result = self.call_llm(message)
result = self.purify_json(result)
result = json5.loads(result)
intent_id = result['id']
return int(intent_id)
except Exception as e:
continue
return None
def inference(self, question: str, chunk_size: int = 5, max_chunk_num: int = 10) -> list[IntentNode] | None:
root_node = self.engine.schema
results: list[IntentNode] = []
engine = self.engine
stack: list[IntentNode] = [root_node]
while len(stack) > 0:
node = stack.pop()
intent_id = self.try_generate_intent_id(question, node)
if intent_id is None:
logger.warning('fail to generate intent id from message, check log file for details')
logger.debug(json5.dumps({ 'question': question, 'node.name': node.name }, ensure_ascii=False))
return None
if intent_id not in engine.id2intent:
logger.warning('inferred intent id {} not in the list of engine.id2intent {}'.format(intent_id, list(engine.id2intent.keys())))
logger.debug(json5.dumps({ 'question': question, 'node.name': node.name, 'intent_id': intent_id }, ensure_ascii=False))
return None
intent_name = engine.id2intent[intent_id]
intent_node = engine.name2node[intent_name]
results.append(intent_node)
if len(intent_node.children) >= 2:
stack.append(intent_node)
return results
if __name__ == '__main__':
prompt_engine = PromptEngine('./story.yml')
msg = prompt_engine.generate_llm_message('如何解决 digital ide 无法载入配置文件的问题?')
print(msg)

83
prompt/erine.py Normal file
View File

@ -0,0 +1,83 @@
import os
import json
import requests as r
from core import TreeIntent, logger
class ErineIntent(TreeIntent):
api_key: str
secret_key: str
access_token: str
def __init__(self, path: str, api_key: str = None, secret_key: str = None) -> None:
super().__init__(path)
self.api_key = api_key or os.environ['BAIDU_API_KEY']
self.secret_key = secret_key or os.environ['BAIDU_SECRET_KEY']
try:
self.access_token = self.get_access_token()
except Exception as e:
raise ValueError('fail to get access token in initialization')
def get_access_token(self):
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
api_key = self.api_key
secret_key = self.secret_key
url = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}'
payload = json.dumps("")
res = r.post(
url=url,
data=payload,
headers=headers
)
resJson = res.json()
access_token = resJson.get('access_token')
assert isinstance(access_token, str), 'access_token 获取失败,详细信息' + str(resJson)
return access_token
def post_message(self, message: list[dict]):
headers = {
'Content-Type': 'application/json'
}
payload = json.dumps({
'messages': message,
'penalty_score': 2.0
})
url = 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-8k?access_token=' + self.access_token
return r.post(url, headers=headers, data=payload)
def call_llm(self, message: list[dict]) -> str:
try:
res = self.post_message(message)
except Exception:
self.access_token = self.get_access_token()
res = self.post_message(message)
try:
return res.json()['result']
except Exception as e:
logger.error('get error when parse response of wenxinyiyan: ' + str(e))
logger.debug(res.json())
return None
if __name__ == '__main__':
erine = ErineIntent('./config/story.yml')
result = []
for i in range(20):
nodes = erine.inference('那不就是rv芯片往上堆扩展吗')
if nodes is None:
print('none -> ohters')
else:
node = nodes[0]
result.append(node.name)
print(node.name)
from collections import Counter
print(Counter(result))

68
prompt/log.py Normal file
View File

@ -0,0 +1,68 @@
from __future__ import annotations
from typing import Callable
import asyncio
import sys
from weakref import WeakSet
import json
class AsyncWorker:
worker_fn: Callable
task_pool: WeakSet
loop: asyncio.AbstractEventLoop | None
cb: Callable | None
def __init__(self, worker_fn: Callable, cb: Callable = None) -> None:
self.worker_fn = worker_fn
self.loop = None
self.cb = cb
self.task_pool = WeakSet()
def dispatch(self, *args):
try:
loop = self.loop or asyncio.get_event_loop()
except RuntimeError:
return
coro = self.worker_fn(*args)
task = loop.create_task(coro)
def coor_cb(future: asyncio.Future):
pass
task.add_done_callback(coor_cb)
self.task_pool.add(task)
def stop(self):
for task in self.task_pool:
task.cancel()
def complete_all_tasks(self):
return [self.complete_task(task) for task in self.task_pool]
async def complete_task(self, task: asyncio.Task):
loop = asyncio.get_event_loop()
if task.get_loop() != loop:
return
try:
await task
except Exception:
pass
async def worker(n, m):
for i in range(n):
for j in range(m):
pass
print('finish')
async_worker = AsyncWorker(worker)
async_worker.dispatch(1000, 1000)
async_worker.complete_all_tasks()
import time
while True:
time.sleep(1)
print('load')

15
rag/admin.py Normal file
View File

@ -0,0 +1,15 @@
from flask import Flask, request, jsonify
from loguru import logger
logger.add(
sink='./logs/rag.log',
level='DEBUG',
rotation='00:00',
retention='7 days',
compression='zip',
encoding='utf-8',
enqueue=True,
format="{time:YYYY-MM-DD HH:mm:ss} | {level} | {message}"
)
app = Flask(__file__)

8
rag/configs.py Normal file
View File

@ -0,0 +1,8 @@
necessary_files = {
'vecdb-config': './config/vecdb.yml',
'intent-story': './config/story.yml',
'blog-vecdb-data': './blog-vecdb/index.faiss',
'blog-vecdb-model': './blog-vecdb/index.pkl',
'intent-classifier': './model/embedding_mapping.sklearn'
}

View File

@ -6,6 +6,7 @@ class StatusCode(Enum):
server_error = 4002 server_error = 4002
resource_not_found = 4003 resource_not_found = 4003
timeout = 4004 timeout = 4004
process_error = 4005
class MsgCode(Enum): class MsgCode(Enum):
success = '请求处理成功' success = '请求处理成功'

97
rag/intent.py Normal file
View File

@ -0,0 +1,97 @@
from flask import Flask, request, jsonify
import numpy as np
import joblib
import json
from sklearn.linear_model import LogisticRegression
from embedding import embedding
from constant import StatusCode, MsgCode
from admin import app
from configs import necessary_files
import sys
import os
sys.path.append(os.path.abspath('.'))
from prompt import PromptEngine
class IntentRecogition:
def __init__(self) -> None:
self.embed_intent_classificator = joblib.load(necessary_files['intent-classifier'])
self.engine = PromptEngine(necessary_files['intent-story'])
def get_intent_recogition(self, query: str) -> dict:
query_embed = embedding.embed_documents([query])
result_id = self.embed_intent_classificator.predict(query_embed)[0]
result_id = int(result_id)
return {
'id': result_id,
'name': self.engine.id2intent[result_id]
}
intent_recogition = IntentRecogition()
@app.route('/intent/retrain-embedding-mapping', methods=['post'])
def retrain_embedding_mapping():
engine = PromptEngine(necessary_files['intent-story'])
model = LogisticRegression()
sentences = []
labels = []
for story in engine.stories:
sentences.append(story.message)
labels.append(engine.intent2id[story.intent])
try:
labels = np.array(labels)
embed = embedding.embed_documents(sentences)
model.fit(embed, labels)
intent_recogition.engine = engine
intent_recogition.embed_intent_classificator = model
joblib.dump(model, necessary_files['intent-classifier'])
except Exception as e:
response = jsonify({
'code': StatusCode.process_error.value,
'data': str(e),
'msg': MsgCode.query_not_empty.value
})
response.status_code = StatusCode.success.value
return response
response = jsonify({
'code': StatusCode.success.value,
'data': 'save data to ' + necessary_files['intent-classifier'],
'msg': StatusCode.success.value
})
response.status_code = StatusCode.success.value
return response
@app.route('/intent/get-intent-recogition', methods=['post'])
def get_intent_recogition():
params = request.data.decode('utf-8')
params: dict = json.loads(params)
result_data = {}
query = params.get('query', None)
if query is None:
response = jsonify({
'code': StatusCode.user_error.value,
'data': result_data,
'msg': MsgCode.query_not_empty.value
})
response.status_code = StatusCode.success.value
return response
result = intent_recogition.get_intent_recogition(query)
response = jsonify({
'code': StatusCode.success.value,
'data': result,
'msg': StatusCode.success.value
})
response.status_code = StatusCode.success.value
return response

View File

@ -1,51 +1,15 @@
from flask import Flask, request, jsonify import os
import json
from embedding import db from admin import app, logger
from url_mapping import urlmapping from intent import *
from constant import StatusCode, MsgCode from vecdb import *
from configs import necessary_files
app = Flask(__file__) def assert_resource(path: str):
assert os.path.exists(path), '{} 不存在'.format(file)
@app.route('/vecdb/similarity_search_with_score', methods=['post']) for file in necessary_files.values():
def post_similarity_search_with_score(): assert_resource(file)
params = request.data.decode('utf-8')
params: dict = json.loads(params)
result_data = []
query = params.get('query', None)
if query is None:
response = jsonify({
'code': StatusCode.user_error.value,
'data': result_data,
'msg': MsgCode.query_not_empty.value
})
response.status_code = StatusCode.success.value
return response
k = int(params.get('k', 3))
results = db.similarity_search_with_score(query=query, k=k)
for doc, score in results:
page_content = doc.page_content
meta = doc.metadata
source = meta.get('source', '')
if len(source) > 0:
source = urlmapping.url_from_mapping(source)
result_data.append({
'content': page_content.strip(),
'meta': meta,
'source': source,
'score': float(score)
})
response = jsonify({
'code': StatusCode.success.value,
'data': result_data,
'msg': StatusCode.success.value
})
response.status_code = StatusCode.success.value
return response
if __name__ == '__main__': if __name__ == '__main__':
from gevent import pywsgi from gevent import pywsgi
@ -53,6 +17,6 @@ if __name__ == '__main__':
config: dict = yaml.load(open('./config/vecdb.yml'), Loader=yaml.Loader) config: dict = yaml.load(open('./config/vecdb.yml'), Loader=yaml.Loader)
addr = config.get('addr', '127.0.0.1') addr = config.get('addr', '127.0.0.1')
port = int(config.get('port', 8081)) port = int(config.get('port', 8081))
server = pywsgi.WSGIServer((addr, port), app) server = pywsgi.WSGIServer((addr, port), app)
logger.info('RAG 系统运行在 http://{}:{}'.format(addr, port))
server.serve_forever() server.serve_forever()

View File

@ -1,14 +0,0 @@
import requests as r
import json
payload = json.dumps({
'query': '一键生成 requirements.txt ',
'k': 3
})
res = r.post('http://localhost:8081/vecdb/similarity_search_with_score', data=payload)
print(res.status_code)
if res.status_code == 200:
print(res.json())

47
rag/vecdb.py Normal file
View File

@ -0,0 +1,47 @@
from flask import Flask, request, jsonify
import json
from embedding import db
from constant import StatusCode, MsgCode
from url_mapping import urlmapping
from admin import app
@app.route('/vecdb/similarity_search_with_score', methods=['post'])
def post_similarity_search_with_score():
params = request.data.decode('utf-8')
params: dict = json.loads(params)
result_data = []
query = params.get('query', None)
if query is None:
response = jsonify({
'code': StatusCode.user_error.value,
'data': result_data,
'msg': MsgCode.query_not_empty.value
})
response.status_code = StatusCode.success.value
return response
k = int(params.get('k', 3))
results = db.similarity_search_with_score(query=query, k=k)
for doc, score in results:
page_content = doc.page_content
meta = doc.metadata
source = meta.get('source', '')
if len(source) > 0:
source = urlmapping.url_from_mapping(source)
result_data.append({
'content': page_content.strip(),
'meta': meta,
'source': source,
'score': float(score)
})
response = jsonify({
'code': StatusCode.success.value,
'data': result_data,
'msg': StatusCode.success.value
})
response.status_code = StatusCode.success.value
return response

110
scripts/ernie.py Normal file
View File

@ -0,0 +1,110 @@
import requests as r
import json
api_key = '9J5qFTYr6wPRxkoVoXycnoWf'
secret_key = 'Xa6eJelStx5i7Ft3qQH0NAT6AvOkqhkH'
def get_access_token():
headers = {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
url = f'https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id={api_key}&client_secret={secret_key}'
payload = json.dumps("")
res = r.post(
url=url,
data=payload,
headers=headers
)
resJson = res.json()
access_token = resJson.get('access_token')
assert isinstance(access_token, str), 'access_token 获取失败,详细信息' + str(resJson)
return access_token
access_token = get_access_token()
text = open('./template.txt', 'r', encoding='utf-8').read()
t2 = open('./t2.txt', 'r', encoding='utf-8').read()
payload = json.dumps({
# "messages": [
# {
# "role": "user",
# "content": text
# },
# {
# "role": "assistant",
# "content": "{ id: 3 }"
# },
# {
# "role": "user",
# "content": "Message: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了\nIntent: "
# },
# {
# "role": "assistant",
# "content": "{ id: 0 }"
# },
# {
# "role": "user",
# "content": "话说digital-ide打开大的verilog卡死了\nIntent: "
# },
# {
# "role": "assistant",
# "content": "{ id: 1 }"
# },
# {
# 'role': 'user',
# "content": "请问一下第一次点击对文件仿真可以出波形文件再次点击的时候就会提示unknown module type了。是哪个配置没配置好\nIntent: "
# },
# ]
'messages': [
# {
# 'role': 'user',
# 'content': 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.\nThe intent should be one of the following:\n- 1\n- 2\n- 3\n- 4\nMessage: surface了解一下\nIntent: { id: 4 }\nMessage: Metals一开直接报错\nIntent: { id: 4 }\nMessage: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了\nIntent: { id: 1 }\nMessage: 请问 property.json 如何配置?\nIntent: { id: 1 }\nMessage: 请问一下第一次点击对文件仿真可以出波形文件再次点击的时候就会提示unknown module type了。是哪个配置没配置好\nIntent: '
# },
# {
# 'role': 'assistant',
# 'content': '{id : 1}'
# },
# {
# 'role': 'user',
# 'content': 'Message: 话说digital-ide打开大的verilog卡死了\nIntent: { id: 2 }\nMessage: 帮我上传一下这份数据\nIntent: { id: 3 }\nMessage: 我的自动补全无法使用是不是有bug\nIntent: { id: 2 }\nMessage: 这群要被chisel夺舍了吗\nIntent: '
# },
# {
# 'role': 'assistant',
# 'content': '{id : 4}'
# },
{
"role": "user",
"content": "如何解决 digital ide 无法载入配置文件的问题?\nIntent: "
}
]
})
headers = {
'Content-Type': 'application/json'
}
url = 'https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/ernie-lite-8k?access_token=' + access_token
res = r.post(url, headers=headers, data=payload)
print(res.json())
# print(res.json()['result'])
# cache = []
# for line in res.iter_lines():
# line_text: str = line.decode('UTF-8')
# if line_text.startswith('data:'):
# iter_json = json.loads(line_text.lstrip('data: '))
# result: str = iter_json['result']
# cache.append(result)
# if result.endswith('。') or result.endswith('.'):
# sentence = ''.join(cache).strip()
# print(sentence)
# cache.clear()
# if len(cache) > 0:
# print(''.join(cache).strip())

View File

@ -0,0 +1,123 @@
import markdown
import re
from bs4 import BeautifulSoup, Tag
"""/**
* @description 发送私聊消息
* @param user_id 对方 QQ
* @param message 要发送的内容
* @param auto_escape 消息内容是否作为纯文本发送即不解析 CQ 只在 message 字段是字符串时有效
*/
function send_private_msg(user_id: number, message: Lagrange.Message, auto_escape: boolean = false) {
return {
action: 'send_private_msg',
params: { user_id, message, auto_escape }
};
}"""
template = """/**
* @description %s%s */
public %s(%s) {
return this.send({
action: '%s',
params: { %s }
});
}
"""
onebot_document = open('./scripts/onebot.md', 'r', encoding='utf-8').read()
html = markdown.markdown(onebot_document)
soup = BeautifulSoup(html, 'html.parser')
def snake_to_camel(s, capitalize_first_letter=False):
components = s.split('_')
camel = ''.join(x.capitalize() for x in components)
if not capitalize_first_letter:
camel = camel[0].lower() + camel[1:]
return camel
def next_node(el: Tag):
p = el.next_sibling
while len(p.text.strip()) == 0:
p = p.next_sibling
return p
tss = """/**
* @author 锦恢
* @email 1193466151@qq.com
* @description Lagrange.Core 前端接口
* @comment 接口调用详细参考文档
* - https://github.com/botuniverse/onebot-11/blob/master/communication/ws.md
*/
import * as Lagrange from '../type';
"""
for el in soup.find_all('h2'):
el: Tag
function_name = None
function_desc = None
for child in el.children:
if child.name == 'code':
function_name = child.text
elif child.name is None:
function_desc = child.text
if function_name and function_desc:
ts_func_name = snake_to_camel(function_name)
title2 = next_node(el)
table = next_node(title2)
count = 0
params = []
for line in table.text.strip().split('\n'):
count += 1
if count >= 3:
splits = [l for l in line.split('|') if len(l.strip()) > 0]
print(splits)
if len(splits) == 4:
param = {
'name': splits[0].strip(),
'type': splits[1].strip().split()[0],
'default': splits[2].strip(),
'desc': splits[3].strip()
}
elif len(splits) == 3:
param = {
'name': splits[0].strip(),
'type': splits[1].strip().split()[0],
'default': '-',
'desc': splits[2].strip()
}
if param['type'] == 'message':
param['type'] = 'string | Lagrange.Send.Default[]'
params.append(param)
t1 = function_desc.strip()
t2 = '\n'
for param in params:
t2 += ' * @param {} {}\n'.format(param['name'], param['desc'])
t3 = ts_func_name
t4 = []
for param in params:
if param['default'] == '-':
t4.append('{}: {}'.format(param['name'], param['type']))
for param in params:
if param['default'] != '-':
t4.append('{}: {} = {}'.format(param['name'], param['type'], param['default']))
t4 = ', '.join(t4)
t5 = function_name
t6 = [param['name'] for param in params]
t6 = ', '.join(t6)
ts_code = template % (t1, t2, t3, t4, t5, t6)
tss += ts_code
open('./scripts/onebot.ts', 'w', encoding='utf-8').write(tss)

176
scripts/prompt.py Normal file
View File

@ -0,0 +1,176 @@
from __future__ import annotations
from dataclasses import dataclass
import warnings
import random
import math
import yaml
@dataclass(frozen=True)
class IntentNode:
name: str
description: str | None
children: list[IntentNode]
parent: IntentNode | None
stories: list[Story]
@dataclass(frozen=True)
class Story:
message: str
intent: str
class PromptEngine:
path: str
schema: IntentNode | None
stories: list[Story]
rejects: list[str]
intent2id: dict[str, int]
id2intent: dict[int, str]
name2node: dict[str, IntentNode]
def __init__(self, path: str) -> None:
self.path = path
self.config = yaml.load(open(path, 'r', encoding='utf-8'), yaml.Loader)
self.intent2id = {}
self.id2intent = {}
self.name2node = {}
self.schema = self.handle_schema(self.config['schema'])
self.stories = self.handle_stories(self.config['stories'])
self.rejects = self.handle_rejects(self.config['rejects'])
def handle_schema(self, raw_schema: dict) -> IntentNode:
raw_root = raw_schema.get('root', None)
if raw_root is None:
warnings.warn('schema must have a root node as the beginning, otherwise intent recogition will not work')
return None
current_layers: list[tuple[dict, IntentNode | None]] = [(raw_root, None)]
nodes: list[IntentNode] = []
# 层次遍历
while len(current_layers) > 0:
new_current_layers: list[tuple[dict, IntentNode | None]] = []
for raw_node, intent_node in current_layers:
name = raw_node.get('name', None)
children = raw_node.get('children', None)
description = raw_node.get('description', None)
if name is None:
raise NameError('you must specify a name in story item, current item : {}'.format(raw_node))
if children is None:
children = []
if name not in self.intent2id:
assign_id = len(self.intent2id)
self.intent2id[name] = assign_id
self.id2intent[assign_id] = name
node = IntentNode(name, description, [], intent_node, [])
self.name2node[name] = node
nodes.append(node)
if intent_node:
intent_node.children.append(node)
for raw_node in children:
new_current_layers.append((raw_node, node))
current_layers.clear()
current_layers.extend(new_current_layers)
return nodes[0]
def handle_stories(self, raw_stories: list[dict]) -> list[Story]:
stories: list[Story] = []
for pair in raw_stories:
message = pair.get('message', None)
intent = pair.get('intent', None)
if intent not in self.intent2id:
warnings.warn('{} is not the intent you declare in schema, so this pair will be ignored'.format(intent))
continue
if message and intent:
story = Story(message, intent)
node = self.name2node.get(intent)
node.stories.append(story)
stories.append(story)
return stories
def handle_rejects(self, raw_rejects: list[str]) -> list[str]:
rejects = []
for reject in raw_rejects:
rejects.append(reject)
return rejects
def generate_chunk(self, stories: list[Story]) -> tuple[str]:
prompts = []
for story in stories:
prompts.append('Message: ' + story.message.strip())
intent_id = self.intent2id.get(story.intent)
prompts.append('Intent: { id: %s }' % (intent_id))
prompts.pop()
user_content = '\n'.join(prompts) + '\n' + 'Intent: '
assistant_content = '{id : %s}' % (intent_id)
return user_content, assistant_content
def generate_llm_message(self, question: str, intent: IntentNode = None, chunk_size: int = 5, max_chunk_num: int = 10):
if intent is None:
intent = self.schema
story_cache = []
for node in intent.children:
story_cache.extend(node.stories)
random.shuffle(story_cache)
chunk_num = math.ceil(len(story_cache) / chunk_size)
message = []
for chunk_id in range(chunk_num):
start = chunk_id * chunk_size
end = min(len(story_cache), start + chunk_size)
chunk = story_cache[start: end]
user_content, assistant_content = self.generate_chunk(chunk)
message.append({
'role': 'user',
'content': user_content
})
message.append({
'role': 'assistant',
'content': assistant_content
})
if len(message) / 2 >= max_chunk_num:
break
message.append({
'role': 'user',
'content': question + '\nIntent: '
})
# 创建开头的预设
preset = 'Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.'
intent_preset = ['The intent should be one of the following:']
for node in intent.children:
intent_id = self.intent2id.get(node.name)
intent_preset.append('- {}'.format(intent_id))
intent_preset = '\n'.join(intent_preset)
message[0]['content'] = preset + '\n' + intent_preset + '\n' + message[0]['content']
return message
class KIntent:
path: str
engine: PromptEngine
def __init__(self, path: str) -> None:
self.path = path
self.engine = PromptEngine(path)
def inference(self, question: str, chunk_size: int = 5, max_chunk_num: int = 10) -> list[IntentNode]:
root_node = self.engine.schema
results: list[IntentNode] = []
stack = [root_node]
while len(stack) > 0:
node = stack.pop()
if __name__ == '__main__':
prompt_engine = PromptEngine('./story.yml')
msg = prompt_engine.generate_llm_message('如何解决 digital ide 无法载入配置文件的问题?')
print(msg)

41
scripts/story.yml Normal file
View File

@ -0,0 +1,41 @@
schema:
root:
name: root
children:
- name: usage
description: 使用查询
children:
- name: bug
description: bug 查询
children:
- name: command
description: 指令
children:
- name: others
description: 其他
children:
stories:
- message: 请问 property.json 如何配置?
intent: usage
- message: 我的自动补全无法使用是不是有bug
intent: bug
- message: 帮我上传一下这份数据
intent: command
- message: surface了解一下
intent: others
- message: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了
intent: usage
- message: 这群要被chisel夺舍了吗
intent: others
- message: Metals一开直接报错
intent: others
- message: 话说digital-ide打开大的verilog卡死了
intent: bug
- message: 请问一下第一次点击对文件仿真可以出波形文件再次点击的时候就会提示unknown module type了。是哪个配置没配置好
intent: usage
rejects:
- metal
- metals
- idea

18
scripts/t2.txt Normal file
View File

@ -0,0 +1,18 @@
Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.
The intent should be one of the following:
- 0
- 1
- 2
- 3
Message: 大佬们为啥我的digital ide启动之后所有功能都没启动捏我配置了property文件然后插件的vivado路经和modelsim路经都加上了
Intent: { id: 0 }
Message: 这群要被chisel夺舍了吗
Intent: { id: 3 }
Message: Metals一开直接报错
Intent: { id: 3 }
Message: 话说digital-ide打开大的verilog卡死了
Intent: { id: 1 }
Message: 请问一下第一次点击对文件仿真可以出波形文件再次点击的时候就会提示unknown module type了。是哪个配置没配置好
Intent: { id: <answer here> }

16
scripts/template.txt Normal file
View File

@ -0,0 +1,16 @@
Label a users message from a conversation with an intent. Reply ONLY with the name of the intent.
The intent should be one of the following:
- 0
- 1
- 2
- 3
Message: 请问 property.json 如何配置?
Intent: { id: 0 }
Message: 我的自动补全无法使用是不是有bug
Intent: { id: 1 }
Message: 帮我上传一下这份数据
Intent: { id: 2 }
Message: 为数不多我觉得很好的动漫了
Intent: { id: 3 }
Message: M1系列可以跑iPhone和iPad游戏
Intent: { id: <answer here> }

6
scripts/test.js Normal file
View File

@ -0,0 +1,6 @@
function hello(a) {
console.log(a);
}
hello.call(this, 'hello');

13
scripts/text.txt Normal file
View File

@ -0,0 +1,13 @@
Label a users message from a
conversation with an intent. Reply ONLY with the name of the intent.
The intent should be one of the following:
{% for intent in intents %}
- {{intent}}
{% endfor %}
{% for example in examples %}
Message: {{example['text']}}
Intent: {{example['intent']}}
{% endfor %}
Message: {{message}}
Intent:

41
test/index.js Normal file
View File

@ -0,0 +1,41 @@
const path = require('path');
const Mocha = require('mocha');
const glob = require('glob');
function run() {
// Create the mocha test
const mocha = new Mocha({
ui: 'tdd',
color: true
});
const testsRoot = path.resolve(__dirname, '..');
return new Promise((c, e) => {
glob('**/**.test.js', { cwd: testsRoot }, (err, files) => {
if (err) {
return e(err);
}
files.forEach(f => mocha.addFile(path.resolve(testsRoot, f)));
try {
// 设置每个测试的超时
mocha.timeout(60000);
mocha.run(failures => {
if (failures > 0) {
e(new Error(`${failures} tests failed.`));
} else {
c();
}
});
} catch (err) {
console.error(err);
e(err);
}
});
});
}
run();

45
test/suite/rag.test.js Normal file
View File

@ -0,0 +1,45 @@
const fs = require('fs');
const yaml = require('yaml');
const assert = require('assert');
const axios = require('axios');
const vecdbBuffer = fs.readFileSync('./config/vecdb.yml', 'utf-8');
const vecdbConfig = yaml.parse(vecdbBuffer);
const vecdbBaseURL = `http://${vecdbConfig['addr']}:${vecdbConfig['port']}`;
const vecdbRequests = axios.create({
baseURL: vecdbBaseURL,
timeout: 5000
});
const apiGetIntentRecogition = (req) => vecdbRequests({
url: '/intent/get-intent-recogition', method: 'POST',
data: req
});
suite('test intent recogition', () => {
// 也可以事先写好测试集,写在测试程序里或者从静态文件中读入
const intent_suites = [
{ input: '如何使用 digital ide 这个插件?', expect: 'usage' },
{ input: '我今天打开 vscode发现 自动补全失效了,我是哪里没有配置好吗?', expect: 'usage,bug' },
{ input: 'path top.v is not a hdlFile 请问报这个错误大概是啥原因啊', expect: 'usage,bug' },
{ input: '我同学在学习强国看到小麦收割了,然后就买相应的股就赚了', expect: 'others' },
{ input: '我平时写代码就喜欢喝茶', expect: 'others' },
];
for (const s of intent_suites) {
const input = s.input;
const expects = s.expect.split(',');
test(`Message: ${input}) => Intent: ${expects.join(',')}`, async () => {
const axiosRes = await apiGetIntentRecogition({ query: input });
const res = axiosRes.data;
const payload = res.data;
const intentName = payload.name;
assert(expects.includes(intentName), `infer intent "${intentName}" not in expect "${expects}"`);
});
}
});

View File

@ -4,7 +4,10 @@
"target": "ES2020", "target": "ES2020",
"outDir": "dist", "outDir": "dist",
"esModuleInterop": true, "esModuleInterop": true,
"experimentalDecorators": true "experimentalDecorators": true,
"declaration": true,
"declarationDir": "dist",
"typeRoots": ["./types"]
}, },
"include": [ "include": [
"bot/**/*" "bot/**/*"

2003
yarn.lock

File diff suppressed because it is too large Load Diff