文章目录
- 前言
- java实战-Milvus 2.5.x版本向量库-通过集合字段变更示例学习相关api demo
- 1. Milvus版本
- 2. 示例逻辑分析
- 3. 集合字段变更示例demo
- 4. 测试
前言
如果您觉得有用的话,记得给博主点个赞,评论,收藏一键三连啊,写作不易啊^ _ ^。
而且听说点赞的人每天的运气都不会太差,实在白嫖的话,那欢迎常来啊!!!
java实战-Milvus 2.5.x版本向量库-通过集合字段变更示例学习相关api demo
注意:
关于demo中的Milvus 连接池与key的管理参考下面这篇文章。
java-Milvus 向量库(2.5.x版本)-连接池(多key)与自定义端点监听设计
1. Milvus版本
2. 示例逻辑分析
Milvus 不支持直接修改集合 schema(比如新增字段),所以你只能:
- 创建一个新集合(含你想新增的字段);
- 把旧集合的数据迁移过去;
- 继续在新集合中做后续操作。
注意的是数据迁移建议用 queryIteratorV2,而不是 searchIteratorV2.
功能 | queryIteratorV2 | searchIteratorV2 |
---|---|---|
用途 | 查询结构化数据(按条件筛选、返回字段) | 向量搜索,查找相似向量 |
是否依赖向量字段 | ❌ 不依赖 | ✅ 必须提供向量(FloatVector) |
用途场景 | 数据导出、分页查看、迁移、统计等 | 向量相似度匹配(推荐、检索) |
是否支持 filter | ✅ 支持 expr / filter | ✅ 支持 filter,但只能配合向量使用 |
返回内容 | 任意字段(非向量也可以) | topK 相似度结果(带 score) |
3. 集合字段变更示例demo
package org.example.controller;import io.swagger.annotations.Api;
import io.swagger.annotations.ApiOperation;
import lombok.extern.slf4j.Slf4j;
import org.example.annotation.CommonLog;
import org.example.exception.model.ResponseResult;
import org.example.service.MilvusService;
import org.springframework.web.bind.annotation.CrossOrigin;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.RestController;import javax.annotation.Resource;/**
* @author 杨镇宇
* @date 2025/7/8 08:46
* @version 1.0
*/
@Api(value = " milvus 练习", tags = {" milvus 练习"})
@Slf4j
@RestController
@RequestMapping(value="api/milvus")
public class MilvusController {@Resourceprivate MilvusService milvusService;@ApiOperation(value = "集合字段变更(数据迁移)测试", notes = "集合字段变更(数据迁移)测试")@CrossOrigin(origins = "*")@CommonLog(methodName = "集合字段变更(数据迁移)测试",className = "MilvusController#updateMigrateData",url = "api/milvus/updateMigrateData")@RequestMapping(value = "/updateMigrateData", method = RequestMethod.POST)public ResponseResult updateMigrateData(){milvusService.updateMigrateData();return ResponseResult.ok("测试完成");}}
package org.example.service.impl;import com.google.gson.Gson;
import com.google.gson.JsonObject;
import io.milvus.orm.iterator.QueryIterator;
import io.milvus.orm.iterator.SearchIteratorV2;
import io.milvus.response.QueryResultsWrapper;
import io.milvus.v2.client.MilvusClientV2;
import io.milvus.v2.common.ConsistencyLevel;
import io.milvus.v2.common.DataType;
import io.milvus.v2.common.IndexParam;
import io.milvus.v2.service.collection.request.*;
import io.milvus.v2.service.collection.response.DescribeCollectionResp;
import io.milvus.v2.service.vector.request.InsertReq;
import io.milvus.v2.service.vector.request.QueryIteratorReq;
import io.milvus.v2.service.vector.request.SearchIteratorReqV2;
import io.milvus.v2.service.vector.request.data.FloatVec;
import io.milvus.v2.service.vector.response.SearchResp;
import lombok.extern.slf4j.Slf4j;
import org.example.milvus.config.OldMilvusServiceClient;
import org.example.milvus.model.MilvusAdminClient;
import org.example.milvus.model.MilvusInsertClient;
import org.example.milvus.model.MilvusSearchClient;
import org.example.service.MilvusService;
import org.springframework.stereotype.Service;import javax.annotation.Resource;
import java.util.*;/**
* @author 杨镇宇
* @date 2025/7/8 09:02
* @version 1.0
*/
@Slf4j
@Service
public class MilvusServiceImpl implements MilvusService {/*** key:search-module*/@Resourceprivate MilvusSearchClient searchClient;/*** key:insert-module*/@Resourceprivate MilvusInsertClient insertClient;/*** key:admin-module*/@Resourceprivate MilvusAdminClient adminClient;/*** milvusServiceClient旧客户端*/@Resourceprivate OldMilvusServiceClient oldMilvusServiceClient;// 归一化函数(单位向量)public List<Float> normalizeVector(List<Float> vector) {double norm = 0.0;for (Float v : vector) {norm += v * v;}norm = Math.sqrt(norm);List<Float> normalized = new ArrayList<>();for (Float v : vector) {normalized.add((float) (v / norm));}return normalized;}/*** java实战-Milvus 向量库 集合字段变更*/@Overridepublic void updateMigrateData() {MilvusClientV2 client = adminClient.getClient();int dim = 4;// 维度String collectionName = "java_test";String description = "测试表";CreateCollectionReq.CollectionSchema collectionSchema = client.createSchema();collectionSchema.addField(AddFieldReq.builder().fieldName("id").dataType(DataType.Int64).isPrimaryKey(Boolean.TRUE).autoID(Boolean.FALSE).description("id").build());collectionSchema.addField(AddFieldReq.builder().fieldName("vector").dataType(DataType.FloatVector).dimension(dim).description("向量字段").build());collectionSchema.addField(AddFieldReq.builder().fieldName("user").dataType(DataType.VarChar).maxLength(100).description("用户").build());collectionSchema.addField(AddFieldReq.builder().fieldName("timestamp").dataType(DataType.Int64).description("时间").build());log.info("=============创建测试表提供测试===================");createTable(dim,collectionSchema,client,collectionName,description);log.info("--------------插入1200条数据用作测试--------------");MilvusClientV2 insert = insertClient.getClient();batchInsert(insert,collectionName);log.info("--------------查询前10条数据验证--------------");List<Float> queryVector = Arrays.asList(1.0f, 2.0f, 3.0f, 4.0f); // 示例向量query(client,queryVector,collectionName);// 新增字段AddFieldReq addBuild = AddFieldReq.builder().fieldName("age").dataType(DataType.VarChar).maxLength(100).description("年龄").isNullable(true) // ✅ 允许为空.build();collectionSchema.addField(addBuild);log.info("=============创建新表,进行数据迁移===================");String newCollection = migrateData(client, collectionName, collectionSchema, dim, description);log.info("--------------查询前10条数据验证--------------");query(client,queryVector,newCollection);log.info("删除旧表");DropCollectionReq dropCollectionReq = DropCollectionReq.builder().collectionName(collectionName).build();client.dropCollection(dropCollectionReq);}public void query(MilvusClientV2 client,List<Float> queryVector,String collectionName){queryVector = normalizeVector(queryVector); // ✨ 添加归一化SearchIteratorReqV2 searchReq = SearchIteratorReqV2.builder().collectionName(collectionName) // 替换为你的集合名.outputFields(Arrays.asList("id", "user", "timestamp","vector")) // 可加你想验证的字段.batchSize(10L).vectorFieldName("vector").vectors(Collections.singletonList(new FloatVec(queryVector))).filter("id > 0") // 可选条件,确保有返回.topK(10) // 查询前 10 个最相似的.metricType(IndexParam.MetricType.COSINE).consistencyLevel(ConsistencyLevel.BOUNDED).build();SearchIteratorV2 searchIterator = client.searchIteratorV2(searchReq);log.info("🔍 表【{}】:前 10 条查询结果:",collectionName);while (true) {List<SearchResp.SearchResult> res = searchIterator.next();if (res.isEmpty()) {log.info("🔍 查询结束");searchIterator.close();break;}for (SearchResp.SearchResult record : res) {Map<String, Object> entity = record.getEntity();log.info("🔍 >>>>> id:{},user:{},timestamp:{},vector:{}",entity.get("id"),entity.get("user"),entity.get("timestamp"),entity.get("vector"));}}}/*** 插入1200条数据* @param client*/public void batchInsert(MilvusClientV2 client,String collectionName){Gson gson = new Gson();List<JsonObject> dataList = new ArrayList<>();for (long i = 1; i <= 1200; i++) {JsonObject row = new JsonObject();// 构造向量:这里用随机值或自定义值填充List<Float> vector = Arrays.asList(1.0f * i, 2.0f * i, 3.0f * i, 4.0f * i); // dim = 4row.add("vector", gson.toJsonTree(normalizeVector(vector)));row.addProperty("id", i); // 手动指定主键row.addProperty("user", "user_" + i); // 示例字段row.addProperty("timestamp", System.currentTimeMillis()); // 注意 Int16 用 shortdataList.add(row);// 每 200 条插入一次,避免过大 payloadif (dataList.size() == 200 || i == 1200) {InsertReq insertReq = InsertReq.builder().collectionName(collectionName) // 替换为你的集合名.data(dataList).build();client.insert(insertReq);log.info("✅ 已插入 {} 条数据", i);dataList.clear();}}}/*** 新增字段,数据迁移*/public String migrateData(MilvusClientV2 client, String collectionName,CreateCollectionReq.CollectionSchema collectionSchema,int dim,String description){String newCollection = collectionName + "_new";// 获取旧集合结构信息DescribeCollectionResp oldSchemaResp = client.describeCollection(DescribeCollectionReq.builder().collectionName(collectionName).build());List<String> oldFieldNames = oldSchemaResp.getFieldNames();// 创建新集合createTable(dim,collectionSchema,client,newCollection,description);// 获取新集合结构信息DescribeCollectionResp newSchemaResp = client.describeCollection(DescribeCollectionReq.builder().collectionName(newCollection).build());List<String> newFieldNames = newSchemaResp.getFieldNames();Set<String> oldFieldSet = new HashSet<>(oldFieldNames);Gson gson = new Gson(); // 如果没有引入 gson,请确保引入:com.google.code.gson:gsonlong offset = 0;long limit = 200;while (true) {QueryIteratorReq queryReq = QueryIteratorReq.builder().collectionName(collectionName).expr("id > 0").outputFields(oldFieldNames).batchSize(limit).offset(offset).limit(limit).consistencyLevel(ConsistencyLevel.BOUNDED).build();QueryIterator queryIterator = client.queryIterator(queryReq);List<QueryResultsWrapper.RowRecord> rows = queryIterator.next();queryIterator.close();if (rows.isEmpty()) {log.info("✅ 数据迁移完成,总记录数: {}", offset);break;}List<JsonObject> insertRows = new ArrayList<>();for (QueryResultsWrapper.RowRecord row : rows) {JsonObject json = new JsonObject();for (String field : newFieldNames) {if (oldFieldSet.contains(field)) {Object value = row.get(field);if (value instanceof Number) {json.addProperty(field, (Number) value);} else if (value instanceof String) {json.addProperty(field, (String) value);} else if (value instanceof List<?>) {json.add(field, gson.toJsonTree(value));} else {log.warn("⚠ 未处理的字段类型: {} -> {}", field, value);}} else {// 🔧 新增字段统一设置为 null 或默认值json.addProperty(field, "");}}insertRows.add(json);}InsertReq insertReq = InsertReq.builder().collectionName(newCollection).data(insertRows).build();client.insert(insertReq);log.info("✅ 插入第 {} ~ {} 条数据", offset + 1, offset + rows.size());offset += rows.size();}log.info("✅ 集合 `{}` 迁移完成", newCollection);return newCollection;}/*** 创建集合 + 为所有向量字段建索引*/public static void createTable(int dim,CreateCollectionReq.CollectionSchema collectionSchema,MilvusClientV2 client, String collectionName, String description) {// 先判断是否存在,存在先删除HasCollectionReq name = HasCollectionReq.builder().collectionName(collectionName).build();if (client.hasCollection(name)) {log.info("Collection `{}` 已存在,准备删除", collectionName);client.dropCollection(DropCollectionReq.builder().collectionName(collectionName)// 集合名称.timeout(60000L)//进程的超时时长。指定时长到期后,进程将终止, 默认为60000L.build());log.info("Collection `{}` 删除成功", collectionName);}IndexParam indexParam = IndexParam.builder().fieldName("vector").metricType(IndexParam.MetricType.COSINE) // 检索时计算向量之间的相似度方式使用 COSINE(余弦相似度),COSINE 适用于归一化向量的相似度匹配,等价于夹角越小越相似。.build();// 创建 CollectionSchema 和 CollectionCreateCollectionReq createCollectionReq = CreateCollectionReq.builder().collectionSchema(collectionSchema) // 留空表示此集合将使用默认设置创建。要设置具有自定义架构的集合,您需要创建一个CollectionSchema对象并在此处引用它。.autoID(false)// 当数据插入到该集合时,主字段是否自动递增 ,TRUE自动递增.collectionName(collectionName)//表名.description(description)//描述.dimension(dim) // 保存向量嵌入的集合字段的维数.metricType("COSINE")//此集合使用的算法用于测量向量嵌入之间的相似性。 该值默认为IP。可能的值为L2、IP和COSINE。有关这些指标类型的详细信息.indexParams(Collections.singletonList(indexParam)).build();// 创建集合client.createCollection(createCollectionReq);log.info("Collection `{}` 创建完成", collectionName);DescribeCollectionResp resp = client.describeCollection(DescribeCollectionReq.builder().collectionName(collectionName).build());List<CreateCollectionReq.FieldSchema> fieldSchemaList = resp.getCollectionSchema().getFieldSchemaList();log.info("-------------------------【{}】集合结构-----------------------",collectionName);for (CreateCollectionReq.FieldSchema fieldSchema:fieldSchemaList){String field = fieldSchema.getName();String fieldDescription = fieldSchema.getDescription();DataType dataType = fieldSchema.getDataType();log.info("字段名:{},类型:{},备注:{}",field,dataType,fieldDescription);}}}
4. 测试
测试执行:
效果: