Changes
On February 22, 2023 at 4:12:40 PM UTC,
-
Uploaded a new file to resource Ladino - English in Synthetic parallel corpora Ladino-English, Turkish, Spanish
| f | 1 | { | f | 1 | { |
| 2 | "author": "", | 2 | "author": "", | ||
| 3 | "author_email": "", | 3 | "author_email": "", | ||
| 4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | 4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | ||
| 5 | "extras": [ | 5 | "extras": [ | ||
| 6 | { | 6 | { | ||
| 7 | "key": "image_url", | 7 | "key": "image_url", | ||
| 8 | "value": | 8 | "value": | ||
| 9 | a.sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg" | 9 | a.sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg" | ||
| 10 | } | 10 | } | ||
| 11 | ], | 11 | ], | ||
| 12 | "groups": [ | 12 | "groups": [ | ||
| 13 | { | 13 | { | ||
| 14 | "description": "Text corpora, parallel corpora", | 14 | "description": "Text corpora, parallel corpora", | ||
| 15 | "display_name": "Text dataset", | 15 | "display_name": "Text dataset", | ||
| 16 | "id": "e6125090-f7e9-4729-9a13-e4931090508a", | 16 | "id": "e6125090-f7e9-4729-9a13-e4931090508a", | ||
| 17 | "image_display_url": | 17 | "image_display_url": | ||
| 18 | .sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg", | 18 | .sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg", | ||
| 19 | "name": "text", | 19 | "name": "text", | ||
| 20 | "title": "Text dataset" | 20 | "title": "Text dataset" | ||
| 21 | } | 21 | } | ||
| 22 | ], | 22 | ], | ||
| 23 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | 23 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
| 24 | "isopen": true, | 24 | "isopen": true, | ||
| 25 | "license_id": "cc-by", | 25 | "license_id": "cc-by", | ||
| 26 | "license_title": "Creative Commons Attribution", | 26 | "license_title": "Creative Commons Attribution", | ||
| 27 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | 27 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | ||
| 28 | "maintainer": "", | 28 | "maintainer": "", | ||
| 29 | "maintainer_email": "", | 29 | "maintainer_email": "", | ||
| 30 | "metadata_created": "2022-08-09T15:56:40.059317", | 30 | "metadata_created": "2022-08-09T15:56:40.059317", | ||
| n | 31 | "metadata_modified": "2022-09-27T11:23:58.521920", | n | 31 | "metadata_modified": "2023-02-22T16:12:40.724617", |
| 32 | "name": "synthetic-parallel-data", | 32 | "name": "synthetic-parallel-data", | ||
| 33 | "notes": "Synthetically produced parallel data using rule-based | 33 | "notes": "Synthetically produced parallel data using rule-based | ||
| 34 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Turkish: | 34 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Turkish: | ||
| 35 | 4,574,021 sentences\r\n\r\nLadino-English: 5,748,012 | 35 | 4,574,021 sentences\r\n\r\nLadino-English: 5,748,012 | ||
| 36 | sentences\r\n\r\nTotal Ladino-Spanish: 10,322,033 sentences (This is | 36 | sentences\r\n\r\nTotal Ladino-Spanish: 10,322,033 sentences (This is | ||
| 37 | basically combination of the two corpora)\r\n\r\nPaper: | 37 | basically combination of the two corpora)\r\n\r\nPaper: | ||
| 38 | https://arxiv.org/abs/2205.15599\r\n\r\nLicense: CC-BY\r\n\r\nThis | 38 | https://arxiv.org/abs/2205.15599\r\n\r\nLicense: CC-BY\r\n\r\nThis | ||
| 39 | dataset is created as part of project \"Judeo-Spanish: Connecting the | 39 | dataset is created as part of project \"Judeo-Spanish: Connecting the | ||
| 40 | two ends of the Mediterranean\" carried out by Col\u00b7lectivaT and | 40 | two ends of the Mediterranean\" carried out by Col\u00b7lectivaT and | ||
| 41 | Sephardic Center of Istanbul within the framework of the \u201cGrant | 41 | Sephardic Center of Istanbul within the framework of the \u201cGrant | ||
| 42 | Scheme for Common Cultural Heritage: Preservation and Dialogue between | 42 | Scheme for Common Cultural Heritage: Preservation and Dialogue between | ||
| 43 | Turkey and the EU\u2013II (CCH-II)\u201d implemented by the Ministry | 43 | Turkey and the EU\u2013II (CCH-II)\u201d implemented by the Ministry | ||
| 44 | of Culture and Tourism of the Republic of Turkey with the financial | 44 | of Culture and Tourism of the Republic of Turkey with the financial | ||
| 45 | support of the European Union. The content of this website is the sole | 45 | support of the European Union. The content of this website is the sole | ||
| 46 | responsibility of Col\u00b7lectivaT and does not necessarily reflect | 46 | responsibility of Col\u00b7lectivaT and does not necessarily reflect | ||
| 47 | the views of the European Union.", | 47 | the views of the European Union.", | ||
| 48 | "num_resources": 3, | 48 | "num_resources": 3, | ||
| 49 | "num_tags": 1, | 49 | "num_tags": 1, | ||
| 50 | "organization": { | 50 | "organization": { | ||
| 51 | "approval_status": "approved", | 51 | "approval_status": "approved", | ||
| 52 | "created": "2022-08-09T08:55:19.044966", | 52 | "created": "2022-08-09T08:55:19.044966", | ||
| 53 | "description": "Col\u00b7lectivaT is a non-profit cooperative | 53 | "description": "Col\u00b7lectivaT is a non-profit cooperative | ||
| 54 | formed by knowledge workers that provides all-around services of | 54 | formed by knowledge workers that provides all-around services of | ||
| 55 | cultural translation, research and technological services for | 55 | cultural translation, research and technological services for | ||
| 56 | collaborative and linguistic work.", | 56 | collaborative and linguistic work.", | ||
| 57 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | 57 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
| 58 | "image_url": "https://collectivat.cat/img/logo_sm.png", | 58 | "image_url": "https://collectivat.cat/img/logo_sm.png", | ||
| 59 | "is_organization": true, | 59 | "is_organization": true, | ||
| 60 | "name": "col-lectivat", | 60 | "name": "col-lectivat", | ||
| 61 | "state": "active", | 61 | "state": "active", | ||
| 62 | "title": "Col\u00b7lectivaT", | 62 | "title": "Col\u00b7lectivaT", | ||
| 63 | "type": "organization" | 63 | "type": "organization" | ||
| 64 | }, | 64 | }, | ||
| 65 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | 65 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
| 66 | "private": false, | 66 | "private": false, | ||
| 67 | "relationships_as_object": [], | 67 | "relationships_as_object": [], | ||
| 68 | "relationships_as_subject": [], | 68 | "relationships_as_subject": [], | ||
| 69 | "resources": [ | 69 | "resources": [ | ||
| 70 | { | 70 | { | ||
| 71 | "cache_last_updated": null, | 71 | "cache_last_updated": null, | ||
| 72 | "cache_url": null, | 72 | "cache_url": null, | ||
| 73 | "created": "2022-08-09T15:57:05.916509", | 73 | "created": "2022-08-09T15:57:05.916509", | ||
| 74 | "description": "", | 74 | "description": "", | ||
| 75 | "format": "ZIP", | 75 | "format": "ZIP", | ||
| 76 | "hash": "", | 76 | "hash": "", | ||
| 77 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | 77 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | ||
| 78 | "last_modified": null, | 78 | "last_modified": null, | ||
| n | 79 | "metadata_modified": "2022-08-09T15:57:05.911158", | n | 79 | "metadata_modified": "2023-02-22T16:12:40.727322", |
| 80 | "mimetype": "application/zip", | 80 | "mimetype": "application/zip", | ||
| 81 | "mimetype_inner": null, | 81 | "mimetype_inner": null, | ||
| 82 | "name": "Ladino - English", | 82 | "name": "Ladino - English", | ||
| 83 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 83 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
| 84 | "position": 0, | 84 | "position": 0, | ||
| 85 | "resource_type": null, | 85 | "resource_type": null, | ||
| 86 | "size": null, | 86 | "size": null, | ||
| 87 | "state": "active", | 87 | "state": "active", | ||
| 88 | "url": | 88 | "url": | ||
| t | 89 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | t | 89 | .e2-2.dev/dataset-share/ladino_synthetic_parallel_dataset_en.csv.zip", |
| 90 | "url_type": null | 90 | "url_type": null | ||
| 91 | }, | 91 | }, | ||
| 92 | { | 92 | { | ||
| 93 | "cache_last_updated": null, | 93 | "cache_last_updated": null, | ||
| 94 | "cache_url": null, | 94 | "cache_url": null, | ||
| 95 | "created": "2022-08-09T15:57:38.631681", | 95 | "created": "2022-08-09T15:57:38.631681", | ||
| 96 | "description": "", | 96 | "description": "", | ||
| 97 | "format": "ZIP", | 97 | "format": "ZIP", | ||
| 98 | "hash": "", | 98 | "hash": "", | ||
| 99 | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | 99 | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | ||
| 100 | "last_modified": null, | 100 | "last_modified": null, | ||
| 101 | "metadata_modified": "2022-08-09T15:57:38.626254", | 101 | "metadata_modified": "2022-08-09T15:57:38.626254", | ||
| 102 | "mimetype": "application/zip", | 102 | "mimetype": "application/zip", | ||
| 103 | "mimetype_inner": null, | 103 | "mimetype_inner": null, | ||
| 104 | "name": "Ladino - Turkish", | 104 | "name": "Ladino - Turkish", | ||
| 105 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 105 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
| 106 | "position": 1, | 106 | "position": 1, | ||
| 107 | "resource_type": null, | 107 | "resource_type": null, | ||
| 108 | "size": null, | 108 | "size": null, | ||
| 109 | "state": "active", | 109 | "state": "active", | ||
| 110 | "url": | 110 | "url": | ||
| 111 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | 111 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | ||
| 112 | "url_type": null | 112 | "url_type": null | ||
| 113 | }, | 113 | }, | ||
| 114 | { | 114 | { | ||
| 115 | "cache_last_updated": null, | 115 | "cache_last_updated": null, | ||
| 116 | "cache_url": null, | 116 | "cache_url": null, | ||
| 117 | "created": "2022-09-27T11:19:23.761065", | 117 | "created": "2022-09-27T11:19:23.761065", | ||
| 118 | "description": "", | 118 | "description": "", | ||
| 119 | "format": "ZIP", | 119 | "format": "ZIP", | ||
| 120 | "hash": "", | 120 | "hash": "", | ||
| 121 | "id": "2f895bd6-c0f2-455b-9ae9-ff5678d8672f", | 121 | "id": "2f895bd6-c0f2-455b-9ae9-ff5678d8672f", | ||
| 122 | "last_modified": null, | 122 | "last_modified": null, | ||
| 123 | "metadata_modified": "2022-09-27T11:19:23.753453", | 123 | "metadata_modified": "2022-09-27T11:19:23.753453", | ||
| 124 | "mimetype": "application/zip", | 124 | "mimetype": "application/zip", | ||
| 125 | "mimetype_inner": null, | 125 | "mimetype_inner": null, | ||
| 126 | "name": "Ladino - Spanish", | 126 | "name": "Ladino - Spanish", | ||
| 127 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 127 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
| 128 | "position": 2, | 128 | "position": 2, | ||
| 129 | "resource_type": null, | 129 | "resource_type": null, | ||
| 130 | "size": null, | 130 | "size": null, | ||
| 131 | "state": "active", | 131 | "state": "active", | ||
| 132 | "url": "http://collectivat.cat/share/dataset_es-lad.csv.zip", | 132 | "url": "http://collectivat.cat/share/dataset_es-lad.csv.zip", | ||
| 133 | "url_type": null | 133 | "url_type": null | ||
| 134 | } | 134 | } | ||
| 135 | ], | 135 | ], | ||
| 136 | "state": "active", | 136 | "state": "active", | ||
| 137 | "tags": [ | 137 | "tags": [ | ||
| 138 | { | 138 | { | ||
| 139 | "display_name": "parallel text", | 139 | "display_name": "parallel text", | ||
| 140 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | 140 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | ||
| 141 | "name": "parallel text", | 141 | "name": "parallel text", | ||
| 142 | "state": "active", | 142 | "state": "active", | ||
| 143 | "vocabulary_id": null | 143 | "vocabulary_id": null | ||
| 144 | } | 144 | } | ||
| 145 | ], | 145 | ], | ||
| 146 | "title": "Synthetic parallel corpora Ladino-English, Turkish, | 146 | "title": "Synthetic parallel corpora Ladino-English, Turkish, | ||
| 147 | Spanish", | 147 | Spanish", | ||
| 148 | "type": "dataset", | 148 | "type": "dataset", | ||
| 149 | "url": "", | 149 | "url": "", | ||
| 150 | "version": "" | 150 | "version": "" | ||
| 151 | } | 151 | } |
This project is funded by the European Union.