Add new SentenceTransformer model
Browse files- README.md +337 -130
- config.json +2 -2
- config_sentence_transformers.json +1 -1
- model.safetensors +1 -1
- sentence_bert_config.json +1 -1
- tokenizer.json +1 -1
- tokenizer_config.json +1 -1
README.md
CHANGED
|
@@ -7,109 +7,113 @@ tags:
|
|
| 7 |
- feature-extraction
|
| 8 |
- dense
|
| 9 |
- generated_from_trainer
|
| 10 |
-
- dataset_size:
|
| 11 |
- loss:AnglELoss
|
| 12 |
- loss:CoSENTLoss
|
| 13 |
- loss:CachedMultipleNegativesRankingLoss
|
| 14 |
base_model: jhu-clsp/ettin-encoder-32m
|
| 15 |
widget:
|
| 16 |
-
- source_sentence:
|
| 17 |
sentences:
|
| 18 |
-
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
| 39 |
sentences:
|
| 40 |
-
-
|
| 41 |
-
|
| 42 |
-
-
|
| 43 |
-
|
| 44 |
-
-
|
| 45 |
-
|
| 46 |
-
- source_sentence: A
|
| 47 |
sentences:
|
| 48 |
-
- A
|
| 49 |
-
- A man
|
| 50 |
-
- A
|
| 51 |
-
- source_sentence:
|
| 52 |
sentences:
|
| 53 |
-
-
|
| 54 |
-
-
|
| 55 |
-
-
|
| 56 |
-
- source_sentence:
|
| 57 |
sentences:
|
| 58 |
-
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
| 69 |
datasets:
|
| 70 |
- google-research-datasets/paws
|
| 71 |
- nyu-mll/glue
|
| 72 |
- mwong/fever-evidence-related
|
|
|
|
|
|
|
| 73 |
- tasksource/sts-companion
|
| 74 |
- tasksource/zero-shot-label-nli
|
| 75 |
-
- tomaarsen/natural-questions-hard-negatives
|
| 76 |
-
- tomaarsen/gooaq-hard-negatives
|
| 77 |
-
- bclavie/msmarco-500k-triplets
|
| 78 |
-
- sentence-transformers/msmarco-co-condenser-margin-mse-sym-mnrl-mean-v1
|
| 79 |
-
- sentence-transformers/gooaq
|
| 80 |
-
- sentence-transformers/natural-questions
|
| 81 |
-
- sentence-transformers/quora-duplicates
|
| 82 |
pipeline_tag: sentence-similarity
|
| 83 |
library_name: sentence-transformers
|
| 84 |
---
|
| 85 |
|
| 86 |
# SentenceTransformer based on jhu-clsp/ettin-encoder-32m
|
| 87 |
|
| 88 |
-
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) on
|
| 89 |
|
| 90 |
## Model Details
|
| 91 |
|
| 92 |
### Model Description
|
| 93 |
- **Model Type:** Sentence Transformer
|
| 94 |
- **Base model:** [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) <!-- at revision 1b8ba06455dd44f80fc9c1ca9e22806157a57379 -->
|
| 95 |
-
- **Maximum Sequence Length:**
|
| 96 |
- **Output Dimensionality:** 384 dimensions
|
| 97 |
- **Similarity Function:** Cosine Similarity
|
| 98 |
- **Training Datasets:**
|
| 99 |
- [paws/labeled_final](https://huggingface.co/datasets/paws)
|
| 100 |
- [glue/mrpc](https://huggingface.co/datasets/glue)
|
| 101 |
- [fever-evidence-related](https://huggingface.co/datasets/mwong/fever-evidence-related)
|
|
|
|
|
|
|
| 102 |
- [glue/stsb](https://huggingface.co/datasets/glue)
|
| 103 |
- sick/relatedness
|
| 104 |
- [sts-companion](https://huggingface.co/datasets/tasksource/sts-companion)
|
| 105 |
- [zero-shot-label-nli](https://huggingface.co/datasets/tasksource/zero-shot-label-nli)
|
| 106 |
-
-
|
| 107 |
-
-
|
| 108 |
-
-
|
| 109 |
-
-
|
| 110 |
-
-
|
| 111 |
-
-
|
| 112 |
-
-
|
|
|
|
|
|
|
|
|
|
| 113 |
- **Language:** en
|
| 114 |
<!-- - **License:** Unknown -->
|
| 115 |
|
|
@@ -123,7 +127,7 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [j
|
|
| 123 |
|
| 124 |
```
|
| 125 |
SentenceTransformer(
|
| 126 |
-
(0): Transformer({'max_seq_length':
|
| 127 |
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 128 |
(2): Normalize()
|
| 129 |
)
|
|
@@ -147,12 +151,12 @@ from sentence_transformers import SentenceTransformer
|
|
| 147 |
model = SentenceTransformer("tasksource/ettin-32m-embed")
|
| 148 |
# Run inference
|
| 149 |
queries = [
|
| 150 |
-
"
|
| 151 |
]
|
| 152 |
documents = [
|
| 153 |
-
|
| 154 |
-
'
|
| 155 |
-
'
|
| 156 |
]
|
| 157 |
query_embeddings = model.encode_query(queries)
|
| 158 |
document_embeddings = model.encode_document(documents)
|
|
@@ -162,7 +166,7 @@ print(query_embeddings.shape, document_embeddings.shape)
|
|
| 162 |
# Get the similarity scores for the embeddings
|
| 163 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 164 |
print(similarities)
|
| 165 |
-
# tensor([[0.
|
| 166 |
```
|
| 167 |
|
| 168 |
<!--
|
|
@@ -264,10 +268,10 @@ You can finetune this model on your own dataset.
|
|
| 264 |
* Size: 403,218 training samples
|
| 265 |
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 266 |
* Approximate statistics based on the first 1000 samples:
|
| 267 |
-
| | sentence1 | sentence2
|
| 268 |
-
|
| 269 |
-
| type | string | string
|
| 270 |
-
| details | <ul><li>min: 6 tokens</li><li>mean: 13.92 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 33 tokens</li><li>mean:
|
| 271 |
* Samples:
|
| 272 |
| sentence1 | sentence2 | label |
|
| 273 |
|:-----------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
|
@@ -282,6 +286,58 @@ You can finetune this model on your own dataset.
|
|
| 282 |
}
|
| 283 |
```
|
| 284 |
</details>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
<details><summary>glue/stsb</summary>
|
| 286 |
|
| 287 |
#### glue/stsb
|
|
@@ -368,10 +424,10 @@ You can finetune this model on your own dataset.
|
|
| 368 |
* Size: 800,000 training samples
|
| 369 |
* Columns: <code>label</code>, <code>sentence1</code>, and <code>sentence2</code>
|
| 370 |
* Approximate statistics based on the first 1000 samples:
|
| 371 |
-
| | label | sentence1
|
| 372 |
-
|
| 373 |
-
| type | int | string
|
| 374 |
-
| details | <ul><li>0: ~51.20%</li><li>1: ~48.80%</li></ul> | <ul><li>min: 3 tokens</li><li>mean:
|
| 375 |
* Samples:
|
| 376 |
| label | sentence1 | sentence2 |
|
| 377 |
|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------|
|
|
@@ -390,14 +446,14 @@ You can finetune this model on your own dataset.
|
|
| 390 |
|
| 391 |
#### tomaarsen/natural-questions-hard-negatives
|
| 392 |
|
| 393 |
-
* Dataset:
|
| 394 |
* Size: 96,658 training samples
|
| 395 |
* Columns: <code>query</code>, <code>answer</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, <code>negative_4</code>, and <code>negative_5</code>
|
| 396 |
* Approximate statistics based on the first 1000 samples:
|
| 397 |
| | query | answer | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 |
|
| 398 |
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 399 |
| type | string | string | string | string | string | string | string |
|
| 400 |
-
| details | <ul><li>min: 10 tokens</li><li>mean: 12.52 tokens</li><li>max: 26 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 137.
|
| 401 |
* Samples:
|
| 402 |
| query | answer | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 |
|
| 403 |
|:----------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
@@ -418,7 +474,7 @@ You can finetune this model on your own dataset.
|
|
| 418 |
|
| 419 |
#### tomaarsen/gooaq-hard-negatives
|
| 420 |
|
| 421 |
-
* Dataset:
|
| 422 |
* Size: 800,000 training samples
|
| 423 |
* Columns: <code>question</code>, <code>answer</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, <code>negative_4</code>, and <code>negative_5</code>
|
| 424 |
* Approximate statistics based on the first 1000 samples:
|
|
@@ -446,7 +502,7 @@ You can finetune this model on your own dataset.
|
|
| 446 |
|
| 447 |
#### bclavie/msmarco-500k-triplets
|
| 448 |
|
| 449 |
-
* Dataset:
|
| 450 |
* Size: 500,000 training samples
|
| 451 |
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
| 452 |
* Approximate statistics based on the first 1000 samples:
|
|
@@ -474,7 +530,7 @@ You can finetune this model on your own dataset.
|
|
| 474 |
|
| 475 |
#### sentence-transformers/msmarco-co-condenser-margin-mse-sym-mnrl-mean-v1
|
| 476 |
|
| 477 |
-
* Dataset:
|
| 478 |
* Size: 800,000 training samples
|
| 479 |
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
| 480 |
* Approximate statistics based on the first 1000 samples:
|
|
@@ -502,7 +558,7 @@ You can finetune this model on your own dataset.
|
|
| 502 |
|
| 503 |
#### sentence-transformers/gooaq
|
| 504 |
|
| 505 |
-
* Dataset:
|
| 506 |
* Size: 800,000 training samples
|
| 507 |
* Columns: <code>question</code> and <code>answer</code>
|
| 508 |
* Approximate statistics based on the first 1000 samples:
|
|
@@ -530,14 +586,14 @@ You can finetune this model on your own dataset.
|
|
| 530 |
|
| 531 |
#### sentence-transformers/natural-questions
|
| 532 |
|
| 533 |
-
* Dataset:
|
| 534 |
* Size: 100,231 training samples
|
| 535 |
* Columns: <code>query</code> and <code>answer</code>
|
| 536 |
* Approximate statistics based on the first 1000 samples:
|
| 537 |
| | query | answer |
|
| 538 |
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 539 |
| type | string | string |
|
| 540 |
-
| details | <ul><li>min: 10 tokens</li><li>mean: 12.47 tokens</li><li>max: 23 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 138.
|
| 541 |
* Samples:
|
| 542 |
| query | answer |
|
| 543 |
|:----------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
@@ -558,7 +614,7 @@ You can finetune this model on your own dataset.
|
|
| 558 |
|
| 559 |
#### sentence-transformers/quora-duplicates
|
| 560 |
|
| 561 |
-
* Dataset:
|
| 562 |
* Size: 101,762 training samples
|
| 563 |
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 564 |
* Approximate statistics based on the first 1000 samples:
|
|
@@ -582,11 +638,96 @@ You can finetune this model on your own dataset.
|
|
| 582 |
}
|
| 583 |
```
|
| 584 |
</details>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 585 |
|
| 586 |
### Training Hyperparameters
|
| 587 |
#### Non-Default Hyperparameters
|
| 588 |
|
| 589 |
-
- `per_device_train_batch_size`:
|
|
|
|
| 590 |
- `weight_decay`: 1e-06
|
| 591 |
- `num_train_epochs`: 2
|
| 592 |
- `warmup_ratio`: 0.1
|
|
@@ -600,14 +741,14 @@ You can finetune this model on your own dataset.
|
|
| 600 |
- `do_predict`: False
|
| 601 |
- `eval_strategy`: no
|
| 602 |
- `prediction_loss_only`: True
|
| 603 |
-
- `per_device_train_batch_size`:
|
| 604 |
- `per_device_eval_batch_size`: 8
|
| 605 |
- `per_gpu_train_batch_size`: None
|
| 606 |
- `per_gpu_eval_batch_size`: None
|
| 607 |
- `gradient_accumulation_steps`: 1
|
| 608 |
- `eval_accumulation_steps`: None
|
| 609 |
- `torch_empty_cache_steps`: None
|
| 610 |
-
- `learning_rate`:
|
| 611 |
- `weight_decay`: 1e-06
|
| 612 |
- `adam_beta1`: 0.9
|
| 613 |
- `adam_beta2`: 0.999
|
|
@@ -633,7 +774,6 @@ You can finetune this model on your own dataset.
|
|
| 633 |
- `seed`: 42
|
| 634 |
- `data_seed`: None
|
| 635 |
- `jit_mode_eval`: False
|
| 636 |
-
- `use_ipex`: False
|
| 637 |
- `bf16`: False
|
| 638 |
- `fp16`: True
|
| 639 |
- `fp16_opt_level`: O1
|
|
@@ -660,6 +800,7 @@ You can finetune this model on your own dataset.
|
|
| 660 |
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 661 |
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 662 |
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
|
|
|
| 663 |
- `deepspeed`: None
|
| 664 |
- `label_smoothing_factor`: 0.0
|
| 665 |
- `optim`: adamw_torch
|
|
@@ -667,6 +808,8 @@ You can finetune this model on your own dataset.
|
|
| 667 |
- `adafactor`: False
|
| 668 |
- `group_by_length`: False
|
| 669 |
- `length_column_name`: length
|
|
|
|
|
|
|
| 670 |
- `ddp_find_unused_parameters`: None
|
| 671 |
- `ddp_bucket_cap_mb`: None
|
| 672 |
- `ddp_broadcast_buffers`: False
|
|
@@ -699,7 +842,7 @@ You can finetune this model on your own dataset.
|
|
| 699 |
- `torch_compile_backend`: None
|
| 700 |
- `torch_compile_mode`: None
|
| 701 |
- `include_tokens_per_second`: False
|
| 702 |
-
- `include_num_input_tokens_seen`:
|
| 703 |
- `neftune_noise_alpha`: None
|
| 704 |
- `optim_target_modules`: None
|
| 705 |
- `batch_eval_metrics`: False
|
|
@@ -707,7 +850,7 @@ You can finetune this model on your own dataset.
|
|
| 707 |
- `use_liger_kernel`: False
|
| 708 |
- `liger_kernel_config`: None
|
| 709 |
- `eval_use_gather_object`: False
|
| 710 |
-
- `average_tokens_across_devices`:
|
| 711 |
- `prompts`: None
|
| 712 |
- `batch_sampler`: batch_sampler
|
| 713 |
- `multi_dataset_batch_sampler`: proportional
|
|
@@ -719,50 +862,114 @@ You can finetune this model on your own dataset.
|
|
| 719 |
### Training Logs
|
| 720 |
| Epoch | Step | Training Loss |
|
| 721 |
|:------:|:-----:|:-------------:|
|
| 722 |
-
| 0.
|
| 723 |
-
| 0.
|
| 724 |
-
| 0.
|
| 725 |
-
| 0.
|
| 726 |
-
| 0.
|
| 727 |
-
| 0.
|
| 728 |
-
| 0.
|
| 729 |
-
| 0.
|
| 730 |
-
| 0.
|
| 731 |
-
| 0.
|
| 732 |
-
| 0.
|
| 733 |
-
| 0.
|
| 734 |
-
| 0.
|
| 735 |
-
| 0.
|
| 736 |
-
| 0.
|
| 737 |
-
| 0.
|
| 738 |
-
| 0.
|
| 739 |
-
|
|
| 740 |
-
|
|
| 741 |
-
|
|
| 742 |
-
|
|
| 743 |
-
|
|
| 744 |
-
|
|
| 745 |
-
|
|
| 746 |
-
|
|
| 747 |
-
|
|
| 748 |
-
|
|
| 749 |
-
|
|
| 750 |
-
|
|
| 751 |
-
|
|
| 752 |
-
|
|
| 753 |
-
|
|
| 754 |
-
|
|
| 755 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 756 |
|
| 757 |
|
| 758 |
### Framework Versions
|
| 759 |
- Python: 3.12.10
|
| 760 |
- Sentence Transformers: 5.1.2
|
| 761 |
-
- Transformers: 4.
|
| 762 |
- PyTorch: 2.7.1+cu126
|
| 763 |
- Accelerate: 1.7.0
|
| 764 |
- Datasets: 3.6.0
|
| 765 |
-
- Tokenizers: 0.
|
| 766 |
|
| 767 |
## Citation
|
| 768 |
|
|
|
|
| 7 |
- feature-extraction
|
| 8 |
- dense
|
| 9 |
- generated_from_trainer
|
| 10 |
+
- dataset_size:6331245
|
| 11 |
- loss:AnglELoss
|
| 12 |
- loss:CoSENTLoss
|
| 13 |
- loss:CachedMultipleNegativesRankingLoss
|
| 14 |
base_model: jhu-clsp/ettin-encoder-32m
|
| 15 |
widget:
|
| 16 |
+
- source_sentence: what is paediatric clinical psychology
|
| 17 |
sentences:
|
| 18 |
+
- Pediatric neuropsychology (paediatric in the UK) is a sub-speciality within the
|
| 19 |
+
field of clinical neuropsychology that studies the relationship between brain
|
| 20 |
+
health and behaviour in children.any pediatric neuropsychologists are involved
|
| 21 |
+
in teaching, research, supervision, and training of undergraduate and graduate
|
| 22 |
+
students in the field. In the United States undergraduate and graduate psychology
|
| 23 |
+
programs generally do not offer a track in pediatric neuropsychology, per se.
|
| 24 |
+
- "â\x80\x9CRealâ\x80\x9D hummus, should contain about 175 calories, out of which\
|
| 25 |
+
\ 70-80 calories are contributed by fat. The average Israeli eats 8-10 kilograms\
|
| 26 |
+
\ (18-22 pounds) of hummus every year, so weâ\x80\x99re talking about extra 15,000\
|
| 27 |
+
\ calories which can make him gain about 2.5kg of body weight each year. So you\
|
| 28 |
+
\ can see how excessive consumption of the packaged product might be fattening\
|
| 29 |
+
\ over the years. The common serving size of hummus (real hummus, that is), which\
|
| 30 |
+
\ is around one cup (220-240g) may contain 400-450 calories. And every pita (â\x80\
|
| 31 |
+
\x9Cpita breadâ\x80\x9D) contains another 270, so itâ\x80\x99s not really â\x80\
|
| 32 |
+
\x9Cdietaryâ\x80\x9D."
|
| 33 |
+
- "Pediatrics (also spelled paediatrics or pædiatrics) is the branch of medicine\
|
| 34 |
+
\ that involves the medical care of infants, children, and adolescents. The American\
|
| 35 |
+
\ Academy of Pediatrics recommends people be under pediatric care up to the age\
|
| 36 |
+
\ of 21.[1] A medical practitioner who specializes in this area is known as a\
|
| 37 |
+
\ pediatrician, or paediatrician. The word pediatrics and its cognates mean healer\
|
| 38 |
+
\ of children; they derive from two Greek words: Ï\x80αá¿\x96Ï\x82 (pais child)\
|
| 39 |
+
\ and ἰαÏ\x84Ï\x81Ï\x8CÏ\x82 (iatros doctor, healer)."
|
| 40 |
+
- source_sentence: These ancient rites are rarely performed in contemporary Sri Lanka
|
| 41 |
+
, but the conserved songs are still performed by folk musicians .
|
| 42 |
sentences:
|
| 43 |
+
- In 1971 , a main campus was completed in 33 MacDonnell Road for the new school
|
| 44 |
+
.
|
| 45 |
+
- These ancient rites are still performed in contemporary Sri Lanka , but the preserved
|
| 46 |
+
songs are rarely performed by folk musicians .
|
| 47 |
+
- After May 4 , 2012 , Gordon M. Snow was replaced by Joseph M. Demarest and then
|
| 48 |
+
Michael S. Welch with limited formal announcement .
|
| 49 |
+
- source_sentence: A woman is playing the flute.
|
| 50 |
sentences:
|
| 51 |
+
- A boy is playing the trumpet.
|
| 52 |
+
- A man tries to read the paper.
|
| 53 |
+
- A man is playing the guitar.
|
| 54 |
+
- source_sentence: Interference now on all our scans.
|
| 55 |
sentences:
|
| 56 |
+
- Would you permit me to explain this Polly?
|
| 57 |
+
- All Ourscans are jammed.
|
| 58 |
+
- The aircraft family was first introduced at the Paris Air Show in 1999.
|
| 59 |
+
- source_sentence: why has chs invested in da?
|
| 60 |
sentences:
|
| 61 |
+
- In order to renew the strategic road map to CHS's growth, CHS partnered with DA
|
| 62 |
+
to improve outcomes rather than increasing its size. Most of DA's capacity was
|
| 63 |
+
used to provide tools in order to support CHS-affiliated hospitals in delivering
|
| 64 |
+
best-in-class healthcare to patients.
|
| 65 |
+
- You can in theory add every enchantment that is compatible with a tool/weapon/armor
|
| 66 |
+
onto the same item. The bow can have these 7 enchantments, though mending and
|
| 67 |
+
infinity are mutually exclusive. So you can have up to 6 different enchantments
|
| 68 |
+
on a bow using an anvil.
|
| 69 |
+
- 'Clean up is a phrasal verb which means: to make (a room or space) clean and orderly.
|
| 70 |
+
... Clean out is a phrasal verb which means something such as a cupboard, room,
|
| 71 |
+
or container, you take everything out of it and clean the inside of it thoroughly.
|
| 72 |
+
Secondly, "clean"is a simple word which is often used in our daily life.'
|
| 73 |
datasets:
|
| 74 |
- google-research-datasets/paws
|
| 75 |
- nyu-mll/glue
|
| 76 |
- mwong/fever-evidence-related
|
| 77 |
+
- tasksource/parade
|
| 78 |
+
- tasksource/apt
|
| 79 |
- tasksource/sts-companion
|
| 80 |
- tasksource/zero-shot-label-nli
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
pipeline_tag: sentence-similarity
|
| 82 |
library_name: sentence-transformers
|
| 83 |
---
|
| 84 |
|
| 85 |
# SentenceTransformer based on jhu-clsp/ettin-encoder-32m
|
| 86 |
|
| 87 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) on 19 datasets. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
| 88 |
|
| 89 |
## Model Details
|
| 90 |
|
| 91 |
### Model Description
|
| 92 |
- **Model Type:** Sentence Transformer
|
| 93 |
- **Base model:** [jhu-clsp/ettin-encoder-32m](https://huggingface.co/jhu-clsp/ettin-encoder-32m) <!-- at revision 1b8ba06455dd44f80fc9c1ca9e22806157a57379 -->
|
| 94 |
+
- **Maximum Sequence Length:** 1024 tokens
|
| 95 |
- **Output Dimensionality:** 384 dimensions
|
| 96 |
- **Similarity Function:** Cosine Similarity
|
| 97 |
- **Training Datasets:**
|
| 98 |
- [paws/labeled_final](https://huggingface.co/datasets/paws)
|
| 99 |
- [glue/mrpc](https://huggingface.co/datasets/glue)
|
| 100 |
- [fever-evidence-related](https://huggingface.co/datasets/mwong/fever-evidence-related)
|
| 101 |
+
- [parade](https://huggingface.co/datasets/tasksource/parade)
|
| 102 |
+
- [apt](https://huggingface.co/datasets/tasksource/apt)
|
| 103 |
- [glue/stsb](https://huggingface.co/datasets/glue)
|
| 104 |
- sick/relatedness
|
| 105 |
- [sts-companion](https://huggingface.co/datasets/tasksource/sts-companion)
|
| 106 |
- [zero-shot-label-nli](https://huggingface.co/datasets/tasksource/zero-shot-label-nli)
|
| 107 |
+
- tomaarsen/natural-questions-hard-negatives
|
| 108 |
+
- tomaarsen/gooaq-hard-negatives
|
| 109 |
+
- bclavie/msmarco-500k-triplets
|
| 110 |
+
- sentence-transformers/msmarco-co-condenser-margin-mse-sym-mnrl-mean-v1
|
| 111 |
+
- sentence-transformers/gooaq
|
| 112 |
+
- sentence-transformers/natural-questions
|
| 113 |
+
- sentence-transformers/quora-duplicates
|
| 114 |
+
- sentence-transformers/s2orc
|
| 115 |
+
- sentence-transformers/codesearchnet
|
| 116 |
+
- sentence-transformers/stackexchange-duplicates
|
| 117 |
- **Language:** en
|
| 118 |
<!-- - **License:** Unknown -->
|
| 119 |
|
|
|
|
| 127 |
|
| 128 |
```
|
| 129 |
SentenceTransformer(
|
| 130 |
+
(0): Transformer({'max_seq_length': 1024, 'do_lower_case': False, 'architecture': 'ModernBertModel'})
|
| 131 |
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
| 132 |
(2): Normalize()
|
| 133 |
)
|
|
|
|
| 151 |
model = SentenceTransformer("tasksource/ettin-32m-embed")
|
| 152 |
# Run inference
|
| 153 |
queries = [
|
| 154 |
+
"why has chs invested in da?",
|
| 155 |
]
|
| 156 |
documents = [
|
| 157 |
+
"In order to renew the strategic road map to CHS's growth, CHS partnered with DA to improve outcomes rather than increasing its size. Most of DA's capacity was used to provide tools in order to support CHS-affiliated hospitals in delivering best-in-class healthcare to patients.",
|
| 158 |
+
'You can in theory add every enchantment that is compatible with a tool/weapon/armor onto the same item. The bow can have these 7 enchantments, though mending and infinity are mutually exclusive. So you can have up to 6 different enchantments on a bow using an anvil.',
|
| 159 |
+
'Clean up is a phrasal verb which means: to make (a room or space) clean and orderly. ... Clean out is a phrasal verb which means something such as a cupboard, room, or container, you take everything out of it and clean the inside of it thoroughly. Secondly, "clean"is a simple word which is often used in our daily life.',
|
| 160 |
]
|
| 161 |
query_embeddings = model.encode_query(queries)
|
| 162 |
document_embeddings = model.encode_document(documents)
|
|
|
|
| 166 |
# Get the similarity scores for the embeddings
|
| 167 |
similarities = model.similarity(query_embeddings, document_embeddings)
|
| 168 |
print(similarities)
|
| 169 |
+
# tensor([[ 0.6237, -0.0022, -0.1018]])
|
| 170 |
```
|
| 171 |
|
| 172 |
<!--
|
|
|
|
| 268 |
* Size: 403,218 training samples
|
| 269 |
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 270 |
* Approximate statistics based on the first 1000 samples:
|
| 271 |
+
| | sentence1 | sentence2 | label |
|
| 272 |
+
|:--------|:----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|:------------------------------------------------|
|
| 273 |
+
| type | string | string | int |
|
| 274 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 13.92 tokens</li><li>max: 48 tokens</li></ul> | <ul><li>min: 33 tokens</li><li>mean: 316.81 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>0: ~29.20%</li><li>1: ~70.80%</li></ul> |
|
| 275 |
* Samples:
|
| 276 |
| sentence1 | sentence2 | label |
|
| 277 |
|:-----------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
|
|
|
| 286 |
}
|
| 287 |
```
|
| 288 |
</details>
|
| 289 |
+
<details><summary>parade</summary>
|
| 290 |
+
|
| 291 |
+
#### parade
|
| 292 |
+
|
| 293 |
+
* Dataset: [parade](https://huggingface.co/datasets/tasksource/parade) at [466978f](https://huggingface.co/datasets/tasksource/parade/tree/466978f31aebf4d052287f32ea3ae393f178f386)
|
| 294 |
+
* Size: 7,550 training samples
|
| 295 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 296 |
+
* Approximate statistics based on the first 1000 samples:
|
| 297 |
+
| | sentence1 | sentence2 | label |
|
| 298 |
+
|:--------|:----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
|
| 299 |
+
| type | string | string | int |
|
| 300 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 21.97 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 21.81 tokens</li><li>max: 49 tokens</li></ul> | <ul><li>0: ~57.10%</li><li>1: ~42.90%</li></ul> |
|
| 301 |
+
* Samples:
|
| 302 |
+
| sentence1 | sentence2 | label |
|
| 303 |
+
|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------|
|
| 304 |
+
| <code>predictive models are involved with predicting a value based on other values in the dataset. the process of training a predictive model is known as supervised learning.</code> | <code>predict a value based on other values in the dataset. process of training a pred model is supervised learning.</code> | <code>1</code> |
|
| 305 |
+
| <code>predict a value based on other values in the dataset. process of training a pred model is supervised learning.</code> | <code>involved with predicting a value based on other values in the dataset; process of training this type of model is known as supervised learning</code> | <code>1</code> |
|
| 306 |
+
| <code>predicting one value (the target variable) using other values</code> | <code>predictive models are involved with predicting a value based on other values in the dataset.</code> | <code>1</code> |
|
| 307 |
+
* Loss: [<code>AnglELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#angleloss) with these parameters:
|
| 308 |
+
```json
|
| 309 |
+
{
|
| 310 |
+
"scale": 20.0,
|
| 311 |
+
"similarity_fct": "pairwise_angle_sim"
|
| 312 |
+
}
|
| 313 |
+
```
|
| 314 |
+
</details>
|
| 315 |
+
<details><summary>apt</summary>
|
| 316 |
+
|
| 317 |
+
#### apt
|
| 318 |
+
|
| 319 |
+
* Dataset: [apt](https://huggingface.co/datasets/tasksource/apt) at [f6c07f6](https://huggingface.co/datasets/tasksource/apt/tree/f6c07f66d3eccebd36418885ce10aff295d436dd)
|
| 320 |
+
* Size: 3,349 training samples
|
| 321 |
+
* Columns: <code>sentence1</code>, <code>sentence2</code>, and <code>label</code>
|
| 322 |
+
* Approximate statistics based on the first 1000 samples:
|
| 323 |
+
| | sentence1 | sentence2 | label |
|
| 324 |
+
|:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
|
| 325 |
+
| type | string | string | int |
|
| 326 |
+
| details | <ul><li>min: 4 tokens</li><li>mean: 17.28 tokens</li><li>max: 124 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 16.99 tokens</li><li>max: 121 tokens</li></ul> | <ul><li>0: ~35.90%</li><li>1: ~64.10%</li></ul> |
|
| 327 |
+
* Samples:
|
| 328 |
+
| sentence1 | sentence2 | label |
|
| 329 |
+
|:-------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------|:---------------|
|
| 330 |
+
| <code>Come on.</code> | <code>Come on</code> | <code>1</code> |
|
| 331 |
+
| <code>In Washington, the federal government remained closed for a second day.</code> | <code>The federal government in Washington was closed for a second day running.</code> | <code>1</code> |
|
| 332 |
+
| <code>The findings appear in next Friday's Physical Review Letters.</code> | <code>Results published next Friday</code> | <code>0</code> |
|
| 333 |
+
* Loss: [<code>AnglELoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#angleloss) with these parameters:
|
| 334 |
+
```json
|
| 335 |
+
{
|
| 336 |
+
"scale": 20.0,
|
| 337 |
+
"similarity_fct": "pairwise_angle_sim"
|
| 338 |
+
}
|
| 339 |
+
```
|
| 340 |
+
</details>
|
| 341 |
<details><summary>glue/stsb</summary>
|
| 342 |
|
| 343 |
#### glue/stsb
|
|
|
|
| 424 |
* Size: 800,000 training samples
|
| 425 |
* Columns: <code>label</code>, <code>sentence1</code>, and <code>sentence2</code>
|
| 426 |
* Approximate statistics based on the first 1000 samples:
|
| 427 |
+
| | label | sentence1 | sentence2 |
|
| 428 |
+
|:--------|:------------------------------------------------|:------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------|
|
| 429 |
+
| type | int | string | string |
|
| 430 |
+
| details | <ul><li>0: ~51.20%</li><li>1: ~48.80%</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 62.72 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 8.01 tokens</li><li>max: 16 tokens</li></ul> |
|
| 431 |
* Samples:
|
| 432 |
| label | sentence1 | sentence2 |
|
| 433 |
|:---------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------|
|
|
|
|
| 446 |
|
| 447 |
#### tomaarsen/natural-questions-hard-negatives
|
| 448 |
|
| 449 |
+
* Dataset: tomaarsen/natural-questions-hard-negatives
|
| 450 |
* Size: 96,658 training samples
|
| 451 |
* Columns: <code>query</code>, <code>answer</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, <code>negative_4</code>, and <code>negative_5</code>
|
| 452 |
* Approximate statistics based on the first 1000 samples:
|
| 453 |
| | query | answer | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 |
|
| 454 |
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 455 |
| type | string | string | string | string | string | string | string |
|
| 456 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 12.52 tokens</li><li>max: 26 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 137.85 tokens</li><li>max: 556 tokens</li></ul> | <ul><li>min: 23 tokens</li><li>mean: 144.1 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 13 tokens</li><li>mean: 142.73 tokens</li><li>max: 832 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 146.37 tokens</li><li>max: 649 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 145.79 tokens</li><li>max: 549 tokens</li></ul> | <ul><li>min: 19 tokens</li><li>mean: 142.01 tokens</li><li>max: 574 tokens</li></ul> |
|
| 457 |
* Samples:
|
| 458 |
| query | answer | negative_1 | negative_2 | negative_3 | negative_4 | negative_5 |
|
| 459 |
|:----------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
|
|
| 474 |
|
| 475 |
#### tomaarsen/gooaq-hard-negatives
|
| 476 |
|
| 477 |
+
* Dataset: tomaarsen/gooaq-hard-negatives
|
| 478 |
* Size: 800,000 training samples
|
| 479 |
* Columns: <code>question</code>, <code>answer</code>, <code>negative_1</code>, <code>negative_2</code>, <code>negative_3</code>, <code>negative_4</code>, and <code>negative_5</code>
|
| 480 |
* Approximate statistics based on the first 1000 samples:
|
|
|
|
| 502 |
|
| 503 |
#### bclavie/msmarco-500k-triplets
|
| 504 |
|
| 505 |
+
* Dataset: bclavie/msmarco-500k-triplets
|
| 506 |
* Size: 500,000 training samples
|
| 507 |
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
| 508 |
* Approximate statistics based on the first 1000 samples:
|
|
|
|
| 530 |
|
| 531 |
#### sentence-transformers/msmarco-co-condenser-margin-mse-sym-mnrl-mean-v1
|
| 532 |
|
| 533 |
+
* Dataset: sentence-transformers/msmarco-co-condenser-margin-mse-sym-mnrl-mean-v1
|
| 534 |
* Size: 800,000 training samples
|
| 535 |
* Columns: <code>query</code>, <code>positive</code>, and <code>negative</code>
|
| 536 |
* Approximate statistics based on the first 1000 samples:
|
|
|
|
| 558 |
|
| 559 |
#### sentence-transformers/gooaq
|
| 560 |
|
| 561 |
+
* Dataset: sentence-transformers/gooaq
|
| 562 |
* Size: 800,000 training samples
|
| 563 |
* Columns: <code>question</code> and <code>answer</code>
|
| 564 |
* Approximate statistics based on the first 1000 samples:
|
|
|
|
| 586 |
|
| 587 |
#### sentence-transformers/natural-questions
|
| 588 |
|
| 589 |
+
* Dataset: sentence-transformers/natural-questions
|
| 590 |
* Size: 100,231 training samples
|
| 591 |
* Columns: <code>query</code> and <code>answer</code>
|
| 592 |
* Approximate statistics based on the first 1000 samples:
|
| 593 |
| | query | answer |
|
| 594 |
|:--------|:-----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 595 |
| type | string | string |
|
| 596 |
+
| details | <ul><li>min: 10 tokens</li><li>mean: 12.47 tokens</li><li>max: 23 tokens</li></ul> | <ul><li>min: 17 tokens</li><li>mean: 138.32 tokens</li><li>max: 556 tokens</li></ul> |
|
| 597 |
* Samples:
|
| 598 |
| query | answer |
|
| 599 |
|:----------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
|
|
| 614 |
|
| 615 |
#### sentence-transformers/quora-duplicates
|
| 616 |
|
| 617 |
+
* Dataset: sentence-transformers/quora-duplicates
|
| 618 |
* Size: 101,762 training samples
|
| 619 |
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
| 620 |
* Approximate statistics based on the first 1000 samples:
|
|
|
|
| 638 |
}
|
| 639 |
```
|
| 640 |
</details>
|
| 641 |
+
<details><summary>sentence-transformers/s2orc</summary>
|
| 642 |
+
|
| 643 |
+
#### sentence-transformers/s2orc
|
| 644 |
+
|
| 645 |
+
* Dataset: sentence-transformers/s2orc
|
| 646 |
+
* Size: 800,000 training samples
|
| 647 |
+
* Columns: <code>title</code> and <code>abstract</code>
|
| 648 |
+
* Approximate statistics based on the first 1000 samples:
|
| 649 |
+
| | title | abstract |
|
| 650 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
| 651 |
+
| type | string | string |
|
| 652 |
+
| details | <ul><li>min: 6 tokens</li><li>mean: 20.08 tokens</li><li>max: 83 tokens</li></ul> | <ul><li>min: 18 tokens</li><li>mean: 131.03 tokens</li><li>max: 332 tokens</li></ul> |
|
| 653 |
+
* Samples:
|
| 654 |
+
| title | abstract |
|
| 655 |
+
|:----------------------------------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 656 |
+
| <code>Syntheses, Structures and Properties of Two Transition Metal-Flexible Ligand Coordination Polymers</code> | <code>Two coordination polymers based on 3,5-bis(4-carboxyphenylmethyloxy) benzoic acid (H3L), [M(HL)]·2H2O M = Mn(1), Co(2), have been synthesized under hydrothermal conditions. Their structures have been determined by single-crystal X-ray diffraction and further characterized by elemental analysis, IR spectra and TGA. The two complexes possess 3D framework with diamond channels resulting from the trans-configuration of the flexible ligand and three coordination modes, 3(η2, η1), 2(η1, η1), η1, of carboxyl groups in the ligand. The framework can be represented with Schlafli symbol of (48·66)(47·66). The wall of the channel consists of left- or right-handed helical polymeric chains. UV–visible–NIR and photoluminescence spectra, magnetic properties of 1 and 2 have also been discussed.</code> |
|
| 657 |
+
| <code>Discussion on the Influence and Development of Technical Aesthetics in Modern Landscape Design</code> | <code>The source of technical aesthetics was introduced and its meaning was explained.The relations between technical aesthetics and modern landscpae design were discussed.The embodiment of technical aesthetics in landscpae design was discussed in the aspects of new material,new technology,new structureand new apparatus.It was put forward that the the development direction of technical aesthetics were tending to sensibility, native land and zoology.</code> |
|
| 658 |
+
| <code>GRIN optics for dual-band IR sensors (Conference Presentation)</code> | <code>Graded index (GRIN) optics offer potential for both weight savings and increased performance but have until recently been limited to visible and NIR bands (wavelengths shorter than about 0.9 µm). NRL has developed glass-based IR-GRIN lenses compatible with SWIR-LWIR wavebands. Recent designs show the potential for significant SWaP reduction benefits and improved performance using IR-GRIN lens elements in dual-band, MWIR-LWIR sensors. The SWaP and performance advantages of IR-GRIN lenses in platform-relevant dual-band imagers will be presented.</code> |
|
| 659 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 660 |
+
```json
|
| 661 |
+
{
|
| 662 |
+
"scale": 20.0,
|
| 663 |
+
"similarity_fct": "cos_sim",
|
| 664 |
+
"mini_batch_size": 32,
|
| 665 |
+
"gather_across_devices": false
|
| 666 |
+
}
|
| 667 |
+
```
|
| 668 |
+
</details>
|
| 669 |
+
<details><summary>sentence-transformers/codesearchnet</summary>
|
| 670 |
+
|
| 671 |
+
#### sentence-transformers/codesearchnet
|
| 672 |
+
|
| 673 |
+
* Dataset: sentence-transformers/codesearchnet
|
| 674 |
+
* Size: 800,000 training samples
|
| 675 |
+
* Columns: <code>comment</code> and <code>code</code>
|
| 676 |
+
* Approximate statistics based on the first 1000 samples:
|
| 677 |
+
| | comment | code |
|
| 678 |
+
|:--------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
| 679 |
+
| type | string | string |
|
| 680 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 28.98 tokens</li><li>max: 142 tokens</li></ul> | <ul><li>min: 30 tokens</li><li>mean: 166.72 tokens</li><li>max: 1024 tokens</li></ul> |
|
| 681 |
+
* Samples:
|
| 682 |
+
| comment | code |
|
| 683 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 684 |
+
| <code>Computes the new parent id for the node being moved.<br><br>@return int</code> | <code>protected function parentId()<br> {<br> switch ( $this->position )<br> {<br> case 'root':<br> return null;<br><br> case 'child':<br> return $this->target->getKey();<br><br> default:<br> return $this->target->getParentId();<br> }<br> }</code> |
|
| 685 |
+
| <code>// SetWinSize overwrites the playlist's window size.</code> | <code>func (p *MediaPlaylist) SetWinSize(winsize uint) error {<br> if winsize > p.capacity {<br> return errors.New("capacity must be greater than winsize or equal")<br> }<br> p.winsize = winsize<br> return nil<br>}</code> |
|
| 686 |
+
| <code>Show the sidebar and squish the container to make room for the sidebar.<br>If hideOthers is true, hide other open sidebars.</code> | <code>function() {<br> var options = this.options;<br><br> if (options.hideOthers) {<br> this.secondary.each(function() {<br> var sidebar = $(this);<br><br> if (sidebar.hasClass('is-expanded')) {<br> sidebar.toolkit('offCanvas', 'hide');<br> }<br> });<br> }<br><br> this.fireEvent('showing');<br><br> this.container.addClass('move-' + this.opposite);<br><br> this.element<br> .reveal()<br> .addClass('is-expanded')<br> .aria('expanded', true);<br><br> if (options.stopScroll) {<br> $('body').addClass('no-scroll');<br> }<br><br> this.fireEvent('shown');<br> }</code> |
|
| 687 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 688 |
+
```json
|
| 689 |
+
{
|
| 690 |
+
"scale": 20.0,
|
| 691 |
+
"similarity_fct": "cos_sim",
|
| 692 |
+
"mini_batch_size": 32,
|
| 693 |
+
"gather_across_devices": false
|
| 694 |
+
}
|
| 695 |
+
```
|
| 696 |
+
</details>
|
| 697 |
+
<details><summary>sentence-transformers/stackexchange-duplicates</summary>
|
| 698 |
+
|
| 699 |
+
#### sentence-transformers/stackexchange-duplicates
|
| 700 |
+
|
| 701 |
+
* Dataset: sentence-transformers/stackexchange-duplicates
|
| 702 |
+
* Size: 250,460 training samples
|
| 703 |
+
* Columns: <code>body1</code> and <code>body2</code>
|
| 704 |
+
* Approximate statistics based on the first 1000 samples:
|
| 705 |
+
| | body1 | body2 |
|
| 706 |
+
|:--------|:--------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------|
|
| 707 |
+
| type | string | string |
|
| 708 |
+
| details | <ul><li>min: 13 tokens</li><li>mean: 174.01 tokens</li><li>max: 1024 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 156.88 tokens</li><li>max: 1024 tokens</li></ul> |
|
| 709 |
+
* Samples:
|
| 710 |
+
| body1 | body2 |
|
| 711 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
| 712 |
+
| <code>I've been wondering about this for years. It seems like a pretty obvious question, so I'm surprised not to have found it addressed among the other Tolkien minutiae on this site. Hopefully I haven't missed it, but anyway, here goes... In Tolkien's Middle-Earth writings, Evil cannot create things, only twist and warp what already exists. Thus, Orcs are twisted Elves, Trolls are twisted Ents, etc. So then, what's the original source for Dragons? They look pretty original to me! The only template that seems even remotely possible is the Eagles, as they're both powerful fliers, but the connection seems very remote indeed. Also, as twisted copies Orcs and Trolls are markedly inferior to Elves and Ents respectively, but I'm not aware of any text describing Dragons as inferior to Eagles.</code> | <code>All that I know of Smaug is that he (she?) came out of nowhere to attack and conquer Erebor. Where exactly did he come from? In fact, what are the origins of dragons? Did Ilúvatar create them or did they come from somewhere else?</code> |
|
| 713 |
+
| <code>Hi i have some data which coming out from database in form of table like this, first i match some data with searching and then display it on page now i need to download it as csv file format please help me check my code and i'm new in php. please check image too for the reference and please please help me //import.php // echo "<pre>"; //print_r($_POST);die(); $keyword = $_POST['keyword']; $csvname = $_POST['csv_file']; ?> <table border ="1"> <thead> <tr> <th>id</th> <th>title</th> <th>count</th> </tr> </thead> <?php $row = 0; if (($handle = fopen("idata.csv", "r",)) !== FALSE) { while (($data = fgetcsv($handle, 1000, ",")) !== FALSE) { $num = count($data); // echo "<p> $num fields in line $row: <br /></p>\n"; $row++; for ($c=0; $c < $num; $c++) { // echo $data[$c] . "<br...</code> | <code>What is the most efficient way to convert a MySQL query to CSV in PHP please? It would be best to avoid temp files as this reduces portability (dir paths and setting file-system permissions required). The CSV should also include one top line of field names.</code> |
|
| 714 |
+
| <code>Following along in tutorials I see the blur filter being used. I am using Blender 2.69 and I can't locate it visually or even with a search. Actually, there is no "Filters" category at all. Do I have to download something to get it?</code> | <code>I have been following tutorial until I started adding nodes. The problem is that he has completely different nodes than I have. Even nodes that are created at start are different (I have Material and Output and he has Render Layers and Composite). Have I missed something or should I use different nodes than he?</code> |
|
| 715 |
+
* Loss: [<code>CachedMultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedmultiplenegativesrankingloss) with these parameters:
|
| 716 |
+
```json
|
| 717 |
+
{
|
| 718 |
+
"scale": 20.0,
|
| 719 |
+
"similarity_fct": "cos_sim",
|
| 720 |
+
"mini_batch_size": 32,
|
| 721 |
+
"gather_across_devices": false
|
| 722 |
+
}
|
| 723 |
+
```
|
| 724 |
+
</details>
|
| 725 |
|
| 726 |
### Training Hyperparameters
|
| 727 |
#### Non-Default Hyperparameters
|
| 728 |
|
| 729 |
+
- `per_device_train_batch_size`: 256
|
| 730 |
+
- `learning_rate`: 8e-05
|
| 731 |
- `weight_decay`: 1e-06
|
| 732 |
- `num_train_epochs`: 2
|
| 733 |
- `warmup_ratio`: 0.1
|
|
|
|
| 741 |
- `do_predict`: False
|
| 742 |
- `eval_strategy`: no
|
| 743 |
- `prediction_loss_only`: True
|
| 744 |
+
- `per_device_train_batch_size`: 256
|
| 745 |
- `per_device_eval_batch_size`: 8
|
| 746 |
- `per_gpu_train_batch_size`: None
|
| 747 |
- `per_gpu_eval_batch_size`: None
|
| 748 |
- `gradient_accumulation_steps`: 1
|
| 749 |
- `eval_accumulation_steps`: None
|
| 750 |
- `torch_empty_cache_steps`: None
|
| 751 |
+
- `learning_rate`: 8e-05
|
| 752 |
- `weight_decay`: 1e-06
|
| 753 |
- `adam_beta1`: 0.9
|
| 754 |
- `adam_beta2`: 0.999
|
|
|
|
| 774 |
- `seed`: 42
|
| 775 |
- `data_seed`: None
|
| 776 |
- `jit_mode_eval`: False
|
|
|
|
| 777 |
- `bf16`: False
|
| 778 |
- `fp16`: True
|
| 779 |
- `fp16_opt_level`: O1
|
|
|
|
| 800 |
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
| 801 |
- `fsdp_transformer_layer_cls_to_wrap`: None
|
| 802 |
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
| 803 |
+
- `parallelism_config`: None
|
| 804 |
- `deepspeed`: None
|
| 805 |
- `label_smoothing_factor`: 0.0
|
| 806 |
- `optim`: adamw_torch
|
|
|
|
| 808 |
- `adafactor`: False
|
| 809 |
- `group_by_length`: False
|
| 810 |
- `length_column_name`: length
|
| 811 |
+
- `project`: huggingface
|
| 812 |
+
- `trackio_space_id`: trackio
|
| 813 |
- `ddp_find_unused_parameters`: None
|
| 814 |
- `ddp_bucket_cap_mb`: None
|
| 815 |
- `ddp_broadcast_buffers`: False
|
|
|
|
| 842 |
- `torch_compile_backend`: None
|
| 843 |
- `torch_compile_mode`: None
|
| 844 |
- `include_tokens_per_second`: False
|
| 845 |
+
- `include_num_input_tokens_seen`: no
|
| 846 |
- `neftune_noise_alpha`: None
|
| 847 |
- `optim_target_modules`: None
|
| 848 |
- `batch_eval_metrics`: False
|
|
|
|
| 850 |
- `use_liger_kernel`: False
|
| 851 |
- `liger_kernel_config`: None
|
| 852 |
- `eval_use_gather_object`: False
|
| 853 |
+
- `average_tokens_across_devices`: True
|
| 854 |
- `prompts`: None
|
| 855 |
- `batch_sampler`: batch_sampler
|
| 856 |
- `multi_dataset_batch_sampler`: proportional
|
|
|
|
| 862 |
### Training Logs
|
| 863 |
| Epoch | Step | Training Loss |
|
| 864 |
|:------:|:-----:|:-------------:|
|
| 865 |
+
| 0.0202 | 500 | 4.5778 |
|
| 866 |
+
| 0.0404 | 1000 | 3.5556 |
|
| 867 |
+
| 0.0606 | 1500 | 2.5948 |
|
| 868 |
+
| 0.0808 | 2000 | 2.3723 |
|
| 869 |
+
| 0.1011 | 2500 | 2.1149 |
|
| 870 |
+
| 0.1213 | 3000 | 2.3977 |
|
| 871 |
+
| 0.1415 | 3500 | 2.3535 |
|
| 872 |
+
| 0.1617 | 4000 | 1.9057 |
|
| 873 |
+
| 0.1819 | 4500 | 2.1313 |
|
| 874 |
+
| 0.2021 | 5000 | 2.1719 |
|
| 875 |
+
| 0.2223 | 5500 | 1.887 |
|
| 876 |
+
| 0.2425 | 6000 | 2.1792 |
|
| 877 |
+
| 0.2627 | 6500 | 2.3001 |
|
| 878 |
+
| 0.2830 | 7000 | 2.0002 |
|
| 879 |
+
| 0.3032 | 7500 | 1.9358 |
|
| 880 |
+
| 0.3234 | 8000 | 1.9074 |
|
| 881 |
+
| 0.3436 | 8500 | 1.9204 |
|
| 882 |
+
| 0.3638 | 9000 | 1.8991 |
|
| 883 |
+
| 0.3840 | 9500 | 2.0086 |
|
| 884 |
+
| 0.4042 | 10000 | 1.8229 |
|
| 885 |
+
| 0.4244 | 10500 | 1.7437 |
|
| 886 |
+
| 0.4446 | 11000 | 2.2012 |
|
| 887 |
+
| 0.4649 | 11500 | 1.6898 |
|
| 888 |
+
| 0.4851 | 12000 | 2.1212 |
|
| 889 |
+
| 0.5053 | 12500 | 1.8014 |
|
| 890 |
+
| 0.5255 | 13000 | 2.1112 |
|
| 891 |
+
| 0.5457 | 13500 | 1.885 |
|
| 892 |
+
| 0.5659 | 14000 | 1.6889 |
|
| 893 |
+
| 0.5861 | 14500 | 1.6377 |
|
| 894 |
+
| 0.6063 | 15000 | 1.8526 |
|
| 895 |
+
| 0.6265 | 15500 | 1.8912 |
|
| 896 |
+
| 0.6468 | 16000 | 1.8621 |
|
| 897 |
+
| 0.6670 | 16500 | 1.743 |
|
| 898 |
+
| 0.6872 | 17000 | 1.5893 |
|
| 899 |
+
| 0.7074 | 17500 | 1.9079 |
|
| 900 |
+
| 0.7276 | 18000 | 1.5885 |
|
| 901 |
+
| 0.7478 | 18500 | 1.9128 |
|
| 902 |
+
| 0.7680 | 19000 | 1.6654 |
|
| 903 |
+
| 0.7882 | 19500 | 1.7099 |
|
| 904 |
+
| 0.8084 | 20000 | 1.4688 |
|
| 905 |
+
| 0.8287 | 20500 | 1.3844 |
|
| 906 |
+
| 0.8489 | 21000 | 1.7908 |
|
| 907 |
+
| 0.8691 | 21500 | 1.7075 |
|
| 908 |
+
| 0.8893 | 22000 | 1.8114 |
|
| 909 |
+
| 0.9095 | 22500 | 1.5198 |
|
| 910 |
+
| 0.9297 | 23000 | 1.8605 |
|
| 911 |
+
| 0.9499 | 23500 | 1.6604 |
|
| 912 |
+
| 0.9701 | 24000 | 1.5891 |
|
| 913 |
+
| 0.9903 | 24500 | 1.5906 |
|
| 914 |
+
| 1.0106 | 25000 | 1.5027 |
|
| 915 |
+
| 1.0308 | 25500 | 1.7599 |
|
| 916 |
+
| 1.0510 | 26000 | 1.4124 |
|
| 917 |
+
| 1.0712 | 26500 | 1.5636 |
|
| 918 |
+
| 1.0914 | 27000 | 1.6126 |
|
| 919 |
+
| 1.1116 | 27500 | 1.4625 |
|
| 920 |
+
| 1.1318 | 28000 | 1.4467 |
|
| 921 |
+
| 1.1520 | 28500 | 1.6898 |
|
| 922 |
+
| 1.1722 | 29000 | 1.5088 |
|
| 923 |
+
| 1.1924 | 29500 | 1.5158 |
|
| 924 |
+
| 1.2127 | 30000 | 1.5266 |
|
| 925 |
+
| 1.2329 | 30500 | 1.465 |
|
| 926 |
+
| 1.2531 | 31000 | 1.5687 |
|
| 927 |
+
| 1.2733 | 31500 | 1.4397 |
|
| 928 |
+
| 1.2935 | 32000 | 1.7929 |
|
| 929 |
+
| 1.3137 | 32500 | 1.5893 |
|
| 930 |
+
| 1.3339 | 33000 | 1.4727 |
|
| 931 |
+
| 1.3541 | 33500 | 1.6007 |
|
| 932 |
+
| 1.3743 | 34000 | 1.2833 |
|
| 933 |
+
| 1.3946 | 34500 | 1.5541 |
|
| 934 |
+
| 1.4148 | 35000 | 1.3354 |
|
| 935 |
+
| 1.4350 | 35500 | 1.4509 |
|
| 936 |
+
| 1.4552 | 36000 | 1.6065 |
|
| 937 |
+
| 1.4754 | 36500 | 1.6393 |
|
| 938 |
+
| 1.4956 | 37000 | 1.3914 |
|
| 939 |
+
| 1.5158 | 37500 | 1.3584 |
|
| 940 |
+
| 1.5360 | 38000 | 1.5504 |
|
| 941 |
+
| 1.5562 | 38500 | 1.2169 |
|
| 942 |
+
| 1.5765 | 39000 | 1.4081 |
|
| 943 |
+
| 1.5967 | 39500 | 1.5506 |
|
| 944 |
+
| 1.6169 | 40000 | 1.473 |
|
| 945 |
+
| 1.6371 | 40500 | 1.2517 |
|
| 946 |
+
| 1.6573 | 41000 | 1.7644 |
|
| 947 |
+
| 1.6775 | 41500 | 1.4237 |
|
| 948 |
+
| 1.6977 | 42000 | 1.295 |
|
| 949 |
+
| 1.7179 | 42500 | 1.4951 |
|
| 950 |
+
| 1.7381 | 43000 | 1.4389 |
|
| 951 |
+
| 1.7584 | 43500 | 1.5742 |
|
| 952 |
+
| 1.7786 | 44000 | 1.4843 |
|
| 953 |
+
| 1.7988 | 44500 | 1.4806 |
|
| 954 |
+
| 1.8190 | 45000 | 1.3674 |
|
| 955 |
+
| 1.8392 | 45500 | 1.329 |
|
| 956 |
+
| 1.8594 | 46000 | 1.7644 |
|
| 957 |
+
| 1.8796 | 46500 | 1.36 |
|
| 958 |
+
| 1.8998 | 47000 | 1.2003 |
|
| 959 |
+
| 1.9200 | 47500 | 1.233 |
|
| 960 |
+
| 1.9403 | 48000 | 1.5147 |
|
| 961 |
+
| 1.9605 | 48500 | 1.3838 |
|
| 962 |
+
| 1.9807 | 49000 | 1.4928 |
|
| 963 |
|
| 964 |
|
| 965 |
### Framework Versions
|
| 966 |
- Python: 3.12.10
|
| 967 |
- Sentence Transformers: 5.1.2
|
| 968 |
+
- Transformers: 4.57.3
|
| 969 |
- PyTorch: 2.7.1+cu126
|
| 970 |
- Accelerate: 1.7.0
|
| 971 |
- Datasets: 3.6.0
|
| 972 |
+
- Tokenizers: 0.22.1
|
| 973 |
|
| 974 |
## Citation
|
| 975 |
|
config.json
CHANGED
|
@@ -13,6 +13,7 @@
|
|
| 13 |
"cls_token_id": 50281,
|
| 14 |
"decoder_bias": true,
|
| 15 |
"deterministic_flash_attn": false,
|
|
|
|
| 16 |
"embedding_dropout": 0.0,
|
| 17 |
"eos_token_id": 50282,
|
| 18 |
"global_attn_every_n_layers": 3,
|
|
@@ -41,7 +42,6 @@
|
|
| 41 |
"sep_token_id": 50282,
|
| 42 |
"sparse_pred_ignore_index": -100,
|
| 43 |
"sparse_prediction": false,
|
| 44 |
-
"
|
| 45 |
-
"transformers_version": "4.53.2",
|
| 46 |
"vocab_size": 50368
|
| 47 |
}
|
|
|
|
| 13 |
"cls_token_id": 50281,
|
| 14 |
"decoder_bias": true,
|
| 15 |
"deterministic_flash_attn": false,
|
| 16 |
+
"dtype": "float32",
|
| 17 |
"embedding_dropout": 0.0,
|
| 18 |
"eos_token_id": 50282,
|
| 19 |
"global_attn_every_n_layers": 3,
|
|
|
|
| 42 |
"sep_token_id": 50282,
|
| 43 |
"sparse_pred_ignore_index": -100,
|
| 44 |
"sparse_prediction": false,
|
| 45 |
+
"transformers_version": "4.57.3",
|
|
|
|
| 46 |
"vocab_size": 50368
|
| 47 |
}
|
config_sentence_transformers.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"model_type": "SentenceTransformer",
|
| 3 |
"__version__": {
|
| 4 |
"sentence_transformers": "5.1.2",
|
| 5 |
-
"transformers": "4.
|
| 6 |
"pytorch": "2.7.1+cu126"
|
| 7 |
},
|
| 8 |
"prompts": {
|
|
|
|
| 2 |
"model_type": "SentenceTransformer",
|
| 3 |
"__version__": {
|
| 4 |
"sentence_transformers": "5.1.2",
|
| 5 |
+
"transformers": "4.57.3",
|
| 6 |
"pytorch": "2.7.1+cu126"
|
| 7 |
},
|
| 8 |
"prompts": {
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 127538496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:895b49d6283aa8bc1a1bcf30e93046f410c8c32d946f0ee02e688c55f602024c
|
| 3 |
size 127538496
|
sentence_bert_config.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"max_seq_length":
|
| 3 |
"do_lower_case": false
|
| 4 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"max_seq_length": 1024,
|
| 3 |
"do_lower_case": false
|
| 4 |
}
|
tokenizer.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
-
"max_length":
|
| 6 |
"strategy": "LongestFirst",
|
| 7 |
"stride": 0
|
| 8 |
},
|
|
|
|
| 2 |
"version": "1.0",
|
| 3 |
"truncation": {
|
| 4 |
"direction": "Right",
|
| 5 |
+
"max_length": 1024,
|
| 6 |
"strategy": "LongestFirst",
|
| 7 |
"stride": 0
|
| 8 |
},
|
tokenizer_config.json
CHANGED
|
@@ -937,7 +937,7 @@
|
|
| 937 |
"input_ids",
|
| 938 |
"attention_mask"
|
| 939 |
],
|
| 940 |
-
"model_max_length":
|
| 941 |
"pad_token": "[PAD]",
|
| 942 |
"sep_token": "[SEP]",
|
| 943 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
|
|
|
| 937 |
"input_ids",
|
| 938 |
"attention_mask"
|
| 939 |
],
|
| 940 |
+
"model_max_length": 1024,
|
| 941 |
"pad_token": "[PAD]",
|
| 942 |
"sep_token": "[SEP]",
|
| 943 |
"tokenizer_class": "PreTrainedTokenizerFast",
|