@@ -6384,3 +6384,156 @@ v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); }
63846384// CHECK-NEXT: ret void
63856385//
63866386v8i32 xvrepli_w () { return __lasx_xvrepli_w (1 ); }
6387+ // CHECK-LABEL: @cast_128_s(
6388+ // CHECK-NEXT: entry:
6389+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6390+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.cast.128.s(<4 x float> [[TMP0]])
6391+ // CHECK-NEXT: store <8 x float> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6392+ // CHECK-NEXT: ret void
6393+ //
6394+ v8f32 cast_128_s (v4f32 _1 ) { return __lasx_cast_128_s (_1 ); }
6395+ // CHECK-LABEL: @cast_128_d(
6396+ // CHECK-NEXT: entry:
6397+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6398+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.cast.128.d(<2 x double> [[TMP0]])
6399+ // CHECK-NEXT: store <4 x double> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6400+ // CHECK-NEXT: ret void
6401+ //
6402+ v4f64 cast_128_d (v2f64 _1 ) { return __lasx_cast_128_d (_1 ); }
6403+ // CHECK-LABEL: @cast_128(
6404+ // CHECK-NEXT: entry:
6405+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6406+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.cast.128(<2 x i64> [[TMP0]])
6407+ // CHECK-NEXT: store <4 x i64> [[TMP1]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6408+ // CHECK-NEXT: ret void
6409+ //
6410+ v4i64 cast_128 (v2i64 _1 ) { return __lasx_cast_128 (_1 ); }
6411+ // CHECK-LABEL: @concat_128_s(
6412+ // CHECK-NEXT: entry:
6413+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <4 x float>
6414+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6415+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.concat.128.s(<4 x float> [[TMP0]], <4 x float> [[TMP1]])
6416+ // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6417+ // CHECK-NEXT: ret void
6418+ //
6419+ v8f32 concat_128_s (v4f32 _1 , v4f32 _2 ) { return __lasx_concat_128_s (_1 , _2 ); }
6420+ // CHECK-LABEL: @concat_128_d(
6421+ // CHECK-NEXT: entry:
6422+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x double>
6423+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6424+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.concat.128.d(<2 x double> [[TMP0]], <2 x double> [[TMP1]])
6425+ // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6426+ // CHECK-NEXT: ret void
6427+ //
6428+ v4f64 concat_128_d (v2f64 _1 , v2f64 _2 ) { return __lasx_concat_128_d (_1 , _2 ); }
6429+ // CHECK-LABEL: @concat_128(
6430+ // CHECK-NEXT: entry:
6431+ // CHECK-NEXT: [[TMP0:%.*]] = bitcast i128 [[_1_COERCE:%.*]] to <2 x i64>
6432+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6433+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.concat.128(<2 x i64> [[TMP0]], <2 x i64> [[TMP1]])
6434+ // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6435+ // CHECK-NEXT: ret void
6436+ //
6437+ v4i64 concat_128 (v2i64 _1 , v2i64 _2 ) { return __lasx_concat_128 (_1 , _2 ); }
6438+ // CHECK-LABEL: @extract_128_lo_s(
6439+ // CHECK-NEXT: entry:
6440+ // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6441+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.lo.s(<8 x float> [[_1]])
6442+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6443+ // CHECK-NEXT: ret i128 [[TMP2]]
6444+ //
6445+ v4f32 extract_128_lo_s (v8f32 _1 ) { return __lasx_extract_128_lo_s (_1 ); }
6446+ // CHECK-LABEL: @extract_128_lo_d(
6447+ // CHECK-NEXT: entry:
6448+ // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6449+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.lo.d(<4 x double> [[_1]])
6450+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6451+ // CHECK-NEXT: ret i128 [[TMP2]]
6452+ //
6453+ v2f64 extract_128_lo_d (v4f64 _1 ) { return __lasx_extract_128_lo_d (_1 ); }
6454+ // CHECK-LABEL: @extract_128_lo(
6455+ // CHECK-NEXT: entry:
6456+ // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6457+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.lo(<4 x i64> [[_1]])
6458+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6459+ // CHECK-NEXT: ret i128 [[TMP2]]
6460+ //
6461+ v2i64 extract_128_lo (v4i64 _1 ) { return __lasx_extract_128_lo (_1 ); }
6462+ // CHECK-LABEL: @extract_128_hi_s(
6463+ // CHECK-NEXT: entry:
6464+ // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6465+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.loongarch.lasx.extract.128.hi.s(<8 x float> [[_1]])
6466+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to i128
6467+ // CHECK-NEXT: ret i128 [[TMP2]]
6468+ //
6469+ v4f32 extract_128_hi_s (v8f32 _1 ) { return __lasx_extract_128_hi_s (_1 ); }
6470+ // CHECK-LABEL: @extract_128_hi_d(
6471+ // CHECK-NEXT: entry:
6472+ // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6473+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.loongarch.lasx.extract.128.hi.d(<4 x double> [[_1]])
6474+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to i128
6475+ // CHECK-NEXT: ret i128 [[TMP2]]
6476+ //
6477+ v2f64 extract_128_hi_d (v4f64 _1 ) { return __lasx_extract_128_hi_d (_1 ); }
6478+ // CHECK-LABEL: @extract_128_hi(
6479+ // CHECK-NEXT: entry:
6480+ // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6481+ // CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.loongarch.lasx.extract.128.hi(<4 x i64> [[_1]])
6482+ // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
6483+ // CHECK-NEXT: ret i128 [[TMP2]]
6484+ //
6485+ v2i64 extract_128_hi (v4i64 _1 ) { return __lasx_extract_128_hi (_1 ); }
6486+ // CHECK-LABEL: @insert_128_lo_s(
6487+ // CHECK-NEXT: entry:
6488+ // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6489+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6490+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.lo.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6491+ // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6492+ // CHECK-NEXT: ret void
6493+ //
6494+ v8f32 insert_128_lo_s (v8f32 _1 , v4f32 _2 ) { return __lasx_insert_128_lo_s (_1 , _2 ); }
6495+ // CHECK-LABEL: @insert_128_lo_d(
6496+ // CHECK-NEXT: entry:
6497+ // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6498+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6499+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.lo.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6500+ // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6501+ // CHECK-NEXT: ret void
6502+ //
6503+ v4f64 insert_128_lo_d (v4f64 _1 , v2f64 _2 ) { return __lasx_insert_128_lo_d (_1 , _2 ); }
6504+ // CHECK-LABEL: @insert_128_lo(
6505+ // CHECK-NEXT: entry:
6506+ // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6507+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6508+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.lo(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
6509+ // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6510+ // CHECK-NEXT: ret void
6511+ //
6512+ v4i64 insert_128_lo (v4i64 _1 , v2i64 _2 ) { return __lasx_insert_128_lo (_1 , _2 ); }
6513+ // CHECK-LABEL: @insert_128_hi_s(
6514+ // CHECK-NEXT: entry:
6515+ // CHECK-NEXT: [[_1:%.*]] = load <8 x float>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6516+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <4 x float>
6517+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.loongarch.lasx.insert.128.hi.s(<8 x float> [[_1]], <4 x float> [[TMP1]])
6518+ // CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6519+ // CHECK-NEXT: ret void
6520+ //
6521+ v8f32 insert_128_hi_s (v8f32 _1 , v4f32 _2 ) { return __lasx_insert_128_hi_s (_1 , _2 ); }
6522+ // CHECK-LABEL: @insert_128_hi_d(
6523+ // CHECK-NEXT: entry:
6524+ // CHECK-NEXT: [[_1:%.*]] = load <4 x double>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6525+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x double>
6526+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x double> @llvm.loongarch.lasx.insert.128.hi.d(<4 x double> [[_1]], <2 x double> [[TMP1]])
6527+ // CHECK-NEXT: store <4 x double> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6528+ // CHECK-NEXT: ret void
6529+ //
6530+ v4f64 insert_128_hi_d (v4f64 _1 , v2f64 _2 ) { return __lasx_insert_128_hi_d (_1 , _2 ); }
6531+ // CHECK-LABEL: @insert_128_hi(
6532+ // CHECK-NEXT: entry:
6533+ // CHECK-NEXT: [[_1:%.*]] = load <4 x i64>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA2]]
6534+ // CHECK-NEXT: [[TMP1:%.*]] = bitcast i128 [[_2_COERCE:%.*]] to <2 x i64>
6535+ // CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i64> @llvm.loongarch.lasx.insert.128.hi(<4 x i64> [[_1]], <2 x i64> [[TMP1]])
6536+ // CHECK-NEXT: store <4 x i64> [[TMP2]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA2]]
6537+ // CHECK-NEXT: ret void
6538+ //
6539+ v4i64 insert_128_hi (v4i64 _1 , v2i64 _2 ) { return __lasx_insert_128_hi (_1 , _2 ); }
0 commit comments