Changes
On August 9, 2022 at 3:57:05 PM UTC, Alp Öktem:
-
Added resource Ladino - English to Synthetic parallel corpora LAD-EN, TR, ES
f | 1 | { | f | 1 | { |
2 | "author": "", | 2 | "author": "", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | 4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | ||
5 | "extras": [ | 5 | "extras": [ | ||
6 | { | 6 | { | ||
7 | "key": "image_url", | 7 | "key": "image_url", | ||
8 | "value": "" | 8 | "value": "" | ||
9 | } | 9 | } | ||
10 | ], | 10 | ], | ||
11 | "groups": [], | 11 | "groups": [], | ||
12 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | 12 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
13 | "isopen": true, | 13 | "isopen": true, | ||
14 | "license_id": "cc-by", | 14 | "license_id": "cc-by", | ||
15 | "license_title": "Creative Commons Attribution", | 15 | "license_title": "Creative Commons Attribution", | ||
16 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | 16 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | ||
17 | "maintainer": "", | 17 | "maintainer": "", | ||
18 | "maintainer_email": "", | 18 | "maintainer_email": "", | ||
19 | "metadata_created": "2022-08-09T15:56:40.059317", | 19 | "metadata_created": "2022-08-09T15:56:40.059317", | ||
n | 20 | "metadata_modified": "2022-08-09T15:56:40.059323", | n | 20 | "metadata_modified": "2022-08-09T15:57:05.908168", |
21 | "name": "synthetic-parallel-data", | 21 | "name": "synthetic-parallel-data", | ||
22 | "notes": "Synthetically produced parallel data using rule-based | 22 | "notes": "Synthetically produced parallel data using rule-based | ||
23 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Spanish: | 23 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Spanish: | ||
24 | 10,322,033 sentences\r\n\r\nLadino-Turkish: 4,574,021 | 24 | 10,322,033 sentences\r\n\r\nLadino-Turkish: 4,574,021 | ||
25 | sentences\r\n\r\nLadino-English: 5,748,012 sentences\r\n\r\nPaper: | 25 | sentences\r\n\r\nLadino-English: 5,748,012 sentences\r\n\r\nPaper: | ||
26 | https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as | 26 | https://arxiv.org/abs/2205.15599\r\n\r\nThis dataset is created as | ||
27 | part of project \"Judeo-Spanish: Connecting the two ends of the | 27 | part of project \"Judeo-Spanish: Connecting the two ends of the | ||
28 | Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center | 28 | Mediterranean\" carried out by Col\u00b7lectivaT and Sephardic Center | ||
29 | of Istanbul within the framework of the \u201cGrant Scheme for Common | 29 | of Istanbul within the framework of the \u201cGrant Scheme for Common | ||
30 | Cultural Heritage: Preservation and Dialogue between Turkey and the | 30 | Cultural Heritage: Preservation and Dialogue between Turkey and the | ||
31 | EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and | 31 | EU\u2013II (CCH-II)\u201d implemented by the Ministry of Culture and | ||
32 | Tourism of the Republic of Turkey with the financial support of the | 32 | Tourism of the Republic of Turkey with the financial support of the | ||
33 | European Union. The content of this website is the sole responsibility | 33 | European Union. The content of this website is the sole responsibility | ||
34 | of Col\u00b7lectivaT and does not necessarily reflect the views of the | 34 | of Col\u00b7lectivaT and does not necessarily reflect the views of the | ||
35 | European Union.", | 35 | European Union.", | ||
n | 36 | "num_resources": 0, | n | 36 | "num_resources": 1, |
37 | "num_tags": 1, | 37 | "num_tags": 1, | ||
38 | "organization": { | 38 | "organization": { | ||
39 | "approval_status": "approved", | 39 | "approval_status": "approved", | ||
40 | "created": "2022-08-09T08:55:19.044966", | 40 | "created": "2022-08-09T08:55:19.044966", | ||
41 | "description": "Col\u00b7lectivaT is a non-profit cooperative | 41 | "description": "Col\u00b7lectivaT is a non-profit cooperative | ||
42 | formed by knowledge workers that provides all-around services of | 42 | formed by knowledge workers that provides all-around services of | ||
43 | cultural translation, research and technological services for | 43 | cultural translation, research and technological services for | ||
44 | collaborative and linguistic work.", | 44 | collaborative and linguistic work.", | ||
45 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | 45 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
46 | "image_url": "https://collectivat.cat/img/logo_sm.png", | 46 | "image_url": "https://collectivat.cat/img/logo_sm.png", | ||
47 | "is_organization": true, | 47 | "is_organization": true, | ||
48 | "name": "col-lectivat", | 48 | "name": "col-lectivat", | ||
49 | "state": "active", | 49 | "state": "active", | ||
50 | "title": "Col\u00b7lectivaT", | 50 | "title": "Col\u00b7lectivaT", | ||
51 | "type": "organization" | 51 | "type": "organization" | ||
52 | }, | 52 | }, | ||
53 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | 53 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
54 | "private": false, | 54 | "private": false, | ||
55 | "relationships_as_object": [], | 55 | "relationships_as_object": [], | ||
56 | "relationships_as_subject": [], | 56 | "relationships_as_subject": [], | ||
t | 57 | "resources": [], | t | 57 | "resources": [ |
58 | { | ||||
59 | "cache_last_updated": null, | ||||
60 | "cache_url": null, | ||||
61 | "created": "2022-08-09T15:57:05.916509", | ||||
62 | "description": "", | ||||
63 | "format": "ZIP", | ||||
64 | "hash": "", | ||||
65 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | ||||
66 | "last_modified": null, | ||||
67 | "metadata_modified": "2022-08-09T15:57:05.911158", | ||||
68 | "mimetype": "application/zip", | ||||
69 | "mimetype_inner": null, | ||||
70 | "name": "Ladino - English", | ||||
71 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||||
72 | "position": 0, | ||||
73 | "resource_type": null, | ||||
74 | "size": null, | ||||
75 | "state": "active", | ||||
76 | "url": | ||||
77 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | ||||
78 | "url_type": null | ||||
79 | } | ||||
80 | ], | ||||
58 | "state": "draft", | 81 | "state": "draft", | ||
59 | "tags": [ | 82 | "tags": [ | ||
60 | { | 83 | { | ||
61 | "display_name": "parallel text", | 84 | "display_name": "parallel text", | ||
62 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | 85 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | ||
63 | "name": "parallel text", | 86 | "name": "parallel text", | ||
64 | "state": "active", | 87 | "state": "active", | ||
65 | "vocabulary_id": null | 88 | "vocabulary_id": null | ||
66 | } | 89 | } | ||
67 | ], | 90 | ], | ||
68 | "title": "Synthetic parallel corpora LAD-EN, TR, ES", | 91 | "title": "Synthetic parallel corpora LAD-EN, TR, ES", | ||
69 | "type": "dataset", | 92 | "type": "dataset", | ||
70 | "url": "", | 93 | "url": "", | ||
71 | "version": "" | 94 | "version": "" | ||
72 | } | 95 | } |